diff options
Diffstat (limited to 'third_party/aom/av1/encoder/pickrst.c')
-rw-r--r-- | third_party/aom/av1/encoder/pickrst.c | 1184 |
1 files changed, 703 insertions, 481 deletions
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c index fec68377a..a2262b6fc 100644 --- a/third_party/aom/av1/encoder/pickrst.c +++ b/third_party/aom/av1/encoder/pickrst.c @@ -29,13 +29,13 @@ #include "av1/encoder/av1_quantize.h" #include "av1/encoder/encoder.h" +#include "av1/encoder/mathutils.h" #include "av1/encoder/picklpf.h" #include "av1/encoder/pickrst.h" -#include "av1/encoder/mathutils.h" // When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed. -// When set to RESTORE_NONE (0) we allow switchable. -const RestorationType force_restore_type = RESTORE_NONE; +// When set to RESTORE_TYPES we allow switchable. +static const RestorationType force_restore_type = RESTORE_TYPES; // Number of Wiener iterations #define NUM_WIENER_ITERS 5 @@ -44,7 +44,7 @@ typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *info, RestorationType *rest_level, - double *best_tile_cost, + int64_t *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame); const int frame_level_restore_bits[RESTORE_TYPES] = { 2, 2, 2, 2 }; @@ -124,13 +124,11 @@ static int64_t sse_restoration_frame(AV1_COMMON *const cm, static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src, AV1_COMP *const cpi, RestorationInfo *rsi, int components_pattern, int partial_frame, - int tile_idx, int subtile_idx, - int subtile_bits, + int tile_idx, YV12_BUFFER_CONFIG *dst_frame) { AV1_COMMON *const cm = &cpi->common; int64_t filt_err; int tile_width, tile_height, nhtiles, nvtiles; - int h_start, h_end, v_start, v_end; int ntiles, width, height; // Y and UV components cannot be mixed @@ -151,11 +149,16 @@ static int64_t try_restoration_tile(const YV12_BUFFER_CONFIG *src, av1_loop_restoration_frame(cm->frame_to_show, cm, rsi, components_pattern, partial_frame, dst_frame); - av1_get_rest_tile_limits(tile_idx, subtile_idx, subtile_bits, nhtiles, - nvtiles, tile_width, tile_height, width, height, 0, - 0, &h_start, &h_end, &v_start, &v_end); - filt_err = sse_restoration_tile(src, dst_frame, cm, h_start, h_end - h_start, - v_start, v_end - v_start, components_pattern); + RestorationTileLimits limits = av1_get_rest_tile_limits( + tile_idx, nhtiles, nvtiles, tile_width, tile_height, width, +#if CONFIG_STRIPED_LOOP_RESTORATION + height, components_pattern > 1 ? cm->subsampling_y : 0); +#else + height); +#endif + filt_err = sse_restoration_tile( + src, dst_frame, cm, limits.h_start, limits.h_end - limits.h_start, + limits.v_start, limits.v_end - limits.v_start, components_pattern); return filt_err; } @@ -172,16 +175,16 @@ static int64_t try_restoration_frame(const YV12_BUFFER_CONFIG *src, return filt_err; } -static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height, - int src_stride, uint8_t *dat8, - int dat_stride, int bit_depth, +static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height, + int src_stride, const uint8_t *dat8, + int dat_stride, int use_highbitdepth, int32_t *flt1, int flt1_stride, int32_t *flt2, int flt2_stride, int *xqd) { int i, j; int64_t err = 0; int xq[2]; decode_xq(xqd, xq); - if (bit_depth == 8) { + if (!use_highbitdepth) { const uint8_t *src = src8; const uint8_t *dat = dat8; for (i = 0; i < height; ++i) { @@ -219,12 +222,12 @@ static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height, #define USE_SGRPROJ_REFINEMENT_SEARCH 1 static int64_t finer_search_pixel_proj_error( - uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8, - int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride, - int32_t *flt2, int flt2_stride, int start_step, int *xqd) { + const uint8_t *src8, int width, int height, int src_stride, + const uint8_t *dat8, int dat_stride, int use_highbitdepth, int32_t *flt1, + int flt1_stride, int32_t *flt2, int flt2_stride, int start_step, int *xqd) { int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, bit_depth, flt1, flt1_stride, - flt2, flt2_stride, xqd); + dat_stride, use_highbitdepth, flt1, + flt1_stride, flt2, flt2_stride, xqd); (void)start_step; #if USE_SGRPROJ_REFINEMENT_SEARCH int64_t err2; @@ -237,8 +240,8 @@ static int64_t finer_search_pixel_proj_error( if (xqd[p] - s >= tap_min[p]) { xqd[p] -= s; err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, bit_depth, flt1, flt1_stride, - flt2, flt2_stride, xqd); + dat_stride, use_highbitdepth, flt1, + flt1_stride, flt2, flt2_stride, xqd); if (err2 > err) { xqd[p] += s; } else { @@ -255,8 +258,8 @@ static int64_t finer_search_pixel_proj_error( if (xqd[p] + s <= tap_max[p]) { xqd[p] += s; err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, bit_depth, flt1, flt1_stride, - flt2, flt2_stride, xqd); + dat_stride, use_highbitdepth, flt1, + flt1_stride, flt2, flt2_stride, xqd); if (err2 > err) { xqd[p] -= s; } else { @@ -273,10 +276,11 @@ static int64_t finer_search_pixel_proj_error( return err; } -static void get_proj_subspace(uint8_t *src8, int width, int height, +static void get_proj_subspace(const uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8, int dat_stride, - int bit_depth, int32_t *flt1, int flt1_stride, - int32_t *flt2, int flt2_stride, int *xq) { + int use_highbitdepth, int32_t *flt1, + int flt1_stride, int32_t *flt2, int flt2_stride, + int *xq) { int i, j; double H[2][2] = { { 0, 0 }, { 0, 0 } }; double C[2] = { 0, 0 }; @@ -289,7 +293,7 @@ static void get_proj_subspace(uint8_t *src8, int width, int height, // Default xq[0] = 0; xq[1] = 0; - if (bit_depth == 8) { + if (!use_highbitdepth) { const uint8_t *src = src8; const uint8_t *dat = dat8; for (i = 0; i < height; ++i) { @@ -346,54 +350,83 @@ void encode_xq(int *xq, int *xqd) { } static void search_selfguided_restoration(uint8_t *dat8, int width, int height, - int dat_stride, uint8_t *src8, - int src_stride, int bit_depth, - int *eps, int *xqd, int32_t *rstbuf) { + int dat_stride, const uint8_t *src8, + int src_stride, int use_highbitdepth, + int bit_depth, int pu_width, + int pu_height, int *eps, int *xqd, + int32_t *rstbuf) { int32_t *flt1 = rstbuf; int32_t *flt2 = flt1 + RESTORATION_TILEPELS_MAX; - int32_t *tmpbuf2 = flt2 + RESTORATION_TILEPELS_MAX; int ep, bestep = 0; int64_t err, besterr = -1; int exqd[2], bestxqd[2] = { 0, 0 }; + int flt1_stride = ((width + 7) & ~7) + 8; + int flt2_stride = ((width + 7) & ~7) + 8; + assert(pu_width == (RESTORATION_PROC_UNIT_SIZE >> 1) || + pu_width == RESTORATION_PROC_UNIT_SIZE); + assert(pu_height == (RESTORATION_PROC_UNIT_SIZE >> 1) || + pu_height == RESTORATION_PROC_UNIT_SIZE); +#if !CONFIG_HIGHBITDEPTH + (void)bit_depth; +#endif for (ep = 0; ep < SGRPROJ_PARAMS; ep++) { int exq[2]; #if CONFIG_HIGHBITDEPTH - if (bit_depth > 8) { + if (use_highbitdepth) { uint16_t *dat = CONVERT_TO_SHORTPTR(dat8); + for (int i = 0; i < height; i += pu_height) + for (int j = 0; j < width; j += pu_width) { + const int w = AOMMIN(pu_width, width - j); + const int h = AOMMIN(pu_height, height - i); + uint16_t *dat_p = dat + i * dat_stride + j; + int32_t *flt1_p = flt1 + i * flt1_stride + j; + int32_t *flt2_p = flt2 + i * flt2_stride + j; #if USE_HIGHPASS_IN_SGRPROJ - av1_highpass_filter_highbd(dat, width, height, dat_stride, flt1, width, - sgr_params[ep].corner, sgr_params[ep].edge); + av1_highpass_filter_highbd(dat_p, w, h, dat_stride, flt1_p, + flt1_stride, sgr_params[ep].corner, + sgr_params[ep].edge); #else - av1_selfguided_restoration_highbd(dat, width, height, dat_stride, flt1, - width, bit_depth, sgr_params[ep].r1, - sgr_params[ep].e1, tmpbuf2); + av1_selfguided_restoration_highbd( + dat_p, w, h, dat_stride, flt1_p, flt1_stride, bit_depth, + sgr_params[ep].r1, sgr_params[ep].e1); #endif // USE_HIGHPASS_IN_SGRPROJ - av1_selfguided_restoration_highbd(dat, width, height, dat_stride, flt2, - width, bit_depth, sgr_params[ep].r2, - sgr_params[ep].e2, tmpbuf2); + av1_selfguided_restoration_highbd( + dat_p, w, h, dat_stride, flt2_p, flt2_stride, bit_depth, + sgr_params[ep].r2, sgr_params[ep].e2); + } } else { #endif + for (int i = 0; i < height; i += pu_height) + for (int j = 0; j < width; j += pu_width) { + const int w = AOMMIN(pu_width, width - j); + const int h = AOMMIN(pu_height, height - i); + uint8_t *dat_p = dat8 + i * dat_stride + j; + int32_t *flt1_p = flt1 + i * flt1_stride + j; + int32_t *flt2_p = flt2 + i * flt2_stride + j; #if USE_HIGHPASS_IN_SGRPROJ - av1_highpass_filter(dat8, width, height, dat_stride, flt1, width, - sgr_params[ep].corner, sgr_params[ep].edge); + av1_highpass_filter(dat_p, w, h, dat_stride, flt1_p, flt1_stride, + sgr_params[ep].corner, sgr_params[ep].edge); #else - av1_selfguided_restoration(dat8, width, height, dat_stride, flt1, width, - sgr_params[ep].r1, sgr_params[ep].e1, tmpbuf2); + av1_selfguided_restoration(dat_p, w, h, dat_stride, flt1_p, flt1_stride, + sgr_params[ep].r1, sgr_params[ep].e1); #endif // USE_HIGHPASS_IN_SGRPROJ - av1_selfguided_restoration(dat8, width, height, dat_stride, flt2, width, - sgr_params[ep].r2, sgr_params[ep].e2, tmpbuf2); + av1_selfguided_restoration(dat_p, w, h, dat_stride, flt2_p, + flt2_stride, sgr_params[ep].r2, + sgr_params[ep].e2); + } #if CONFIG_HIGHBITDEPTH } #endif aom_clear_system_state(); get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride, - bit_depth, flt1, width, flt2, width, exq); + use_highbitdepth, flt1, flt1_stride, flt2, flt2_stride, + exq); aom_clear_system_state(); encode_xq(exq, exqd); - err = finer_search_pixel_proj_error(src8, width, height, src_stride, dat8, - dat_stride, bit_depth, flt1, width, - flt2, width, 2, exqd); + err = finer_search_pixel_proj_error( + src8, width, height, src_stride, dat8, dat_stride, use_highbitdepth, + flt1, flt1_stride, flt2, flt2_stride, 2, exqd); if (besterr == -1 || err < besterr) { bestep = ep; besterr = err; @@ -420,124 +453,258 @@ static int count_sgrproj_bits(SgrprojInfo *sgrproj_info, return bits; } -static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, int plane, - RestorationInfo *info, RestorationType *type, - double *best_tile_cost, - YV12_BUFFER_CONFIG *dst_frame) { - SgrprojInfo *sgrproj_info = info->sgrproj_info; - double err, cost_norestore, cost_sgrproj; - int bits; - MACROBLOCK *x = &cpi->td.mb; +struct rest_search_ctxt { + const YV12_BUFFER_CONFIG *src; + AV1_COMP *cpi; + uint8_t *dgd_buffer; + const uint8_t *src_buffer; + int dgd_stride; + int src_stride; + int partial_frame; + RestorationInfo *info; + RestorationType *type; + int64_t *best_tile_cost; + int plane; + int plane_width; + int plane_height; + int nrtiles_x; + int nrtiles_y; + YV12_BUFFER_CONFIG *dst_frame; +}; + +// Fill in ctxt. Returns the number of restoration tiles for this plane +static INLINE int init_rest_search_ctxt( + const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane, + RestorationInfo *info, RestorationType *type, int64_t *best_tile_cost, + YV12_BUFFER_CONFIG *dst_frame, struct rest_search_ctxt *ctxt) { AV1_COMMON *const cm = &cpi->common; + ctxt->src = src; + ctxt->cpi = cpi; + ctxt->partial_frame = partial_frame; + ctxt->info = info; + ctxt->type = type; + ctxt->best_tile_cost = best_tile_cost; + ctxt->plane = plane; + ctxt->dst_frame = dst_frame; + const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - RestorationInfo *rsi = &cpi->rst_search[0]; - int tile_idx, tile_width, tile_height, nhtiles, nvtiles; - int h_start, h_end, v_start, v_end; - int width, height, src_stride, dgd_stride; - uint8_t *dgd_buffer, *src_buffer; if (plane == AOM_PLANE_Y) { - width = src->y_crop_width; - height = src->y_crop_height; - src_buffer = src->y_buffer; - src_stride = src->y_stride; - dgd_buffer = dgd->y_buffer; - dgd_stride = dgd->y_stride; - assert(width == dgd->y_crop_width); - assert(height == dgd->y_crop_height); - assert(width == src->y_crop_width); - assert(height == src->y_crop_height); + ctxt->plane_width = src->y_crop_width; + ctxt->plane_height = src->y_crop_height; + ctxt->src_buffer = src->y_buffer; + ctxt->src_stride = src->y_stride; + ctxt->dgd_buffer = dgd->y_buffer; + ctxt->dgd_stride = dgd->y_stride; + assert(ctxt->plane_width == dgd->y_crop_width); + assert(ctxt->plane_height == dgd->y_crop_height); + assert(ctxt->plane_width == src->y_crop_width); + assert(ctxt->plane_height == src->y_crop_height); } else { - width = src->uv_crop_width; - height = src->uv_crop_height; - src_stride = src->uv_stride; - dgd_stride = dgd->uv_stride; - src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; - dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; - assert(width == dgd->uv_crop_width); - assert(height == dgd->uv_crop_height); + ctxt->plane_width = src->uv_crop_width; + ctxt->plane_height = src->uv_crop_height; + ctxt->src_stride = src->uv_stride; + ctxt->dgd_stride = dgd->uv_stride; + ctxt->src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; + ctxt->dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; + assert(ctxt->plane_width == dgd->uv_crop_width); + assert(ctxt->plane_height == dgd->uv_crop_height); } - const int ntiles = - av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize, - &tile_width, &tile_height, &nhtiles, &nvtiles); - SgrprojInfo ref_sgrproj_info; - set_default_sgrproj(&ref_sgrproj_info); - rsi[plane].frame_restoration_type = RESTORE_SGRPROJ; + return av1_get_rest_ntiles(ctxt->plane_width, ctxt->plane_height, + cm->rst_info[plane].restoration_tilesize, NULL, + NULL, &ctxt->nrtiles_x, &ctxt->nrtiles_y); +} - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; +typedef void (*rtile_visitor_t)(const struct rest_search_ctxt *search_ctxt, + int rtile_idx, + const RestorationTileLimits *limits, void *arg); + +static void foreach_rtile_in_tile(const struct rest_search_ctxt *ctxt, + int tile_row, int tile_col, + rtile_visitor_t fun, void *arg) { + const AV1_COMMON *const cm = &ctxt->cpi->common; + const RestorationInfo *rsi = ctxt->cpi->rst_search; + TileInfo tile_info; + + av1_tile_set_row(&tile_info, cm, tile_row); + av1_tile_set_col(&tile_info, cm, tile_col); + + int tile_col_start = tile_info.mi_col_start * MI_SIZE; + int tile_col_end = tile_info.mi_col_end * MI_SIZE; + int tile_row_start = tile_info.mi_row_start * MI_SIZE; + int tile_row_end = tile_info.mi_row_end * MI_SIZE; + if (ctxt->plane > 0) { + tile_col_start = ROUND_POWER_OF_TWO(tile_col_start, cm->subsampling_x); + tile_col_end = ROUND_POWER_OF_TWO(tile_col_end, cm->subsampling_x); + tile_row_start = ROUND_POWER_OF_TWO(tile_row_start, cm->subsampling_y); + tile_row_end = ROUND_POWER_OF_TWO(tile_row_end, cm->subsampling_y); } - // Compute best Sgrproj filters for each tile - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, 0, 0, &h_start, &h_end, - &v_start, &v_end); - err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, - (1 << plane)); - // #bits when a tile is not restored - bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0); - cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); - best_tile_cost[tile_idx] = DBL_MAX; - search_selfguided_restoration( - dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start, - v_end - v_start, dgd_stride, - src_buffer + v_start * src_stride + h_start, src_stride, + +#if CONFIG_FRAME_SUPERRES + // If upscaling is enabled, the tile limits need scaling to match the + // upscaled frame where the restoration tiles live. To do this, scale up the + // top-left and bottom-right of the tile. + if (!av1_superres_unscaled(cm)) { + av1_calculate_unscaled_superres_size(&tile_col_start, &tile_row_start, + cm->superres_scale_denominator); + av1_calculate_unscaled_superres_size(&tile_col_end, &tile_row_end, + cm->superres_scale_denominator); + // Make sure we don't fall off the bottom-right of the frame. + tile_col_end = AOMMIN(tile_col_end, ctxt->plane_width); + tile_row_end = AOMMIN(tile_row_end, ctxt->plane_height); + } +#endif // CONFIG_FRAME_SUPERRES + + const int rtile_size = rsi->restoration_tilesize; + const int rtile_col0 = (tile_col_start + rtile_size - 1) / rtile_size; + const int rtile_col1 = + AOMMIN((tile_col_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_x); + const int rtile_row0 = (tile_row_start + rtile_size - 1) / rtile_size; + const int rtile_row1 = + AOMMIN((tile_row_end + rtile_size - 1) / rtile_size, ctxt->nrtiles_y); + + const int rtile_width = AOMMIN(tile_col_end - tile_col_start, rtile_size); + const int rtile_height = AOMMIN(tile_row_end - tile_row_start, rtile_size); + + for (int rtile_row = rtile_row0; rtile_row < rtile_row1; ++rtile_row) { + for (int rtile_col = rtile_col0; rtile_col < rtile_col1; ++rtile_col) { + const int rtile_idx = rtile_row * ctxt->nrtiles_x + rtile_col; + RestorationTileLimits limits = av1_get_rest_tile_limits( + rtile_idx, ctxt->nrtiles_x, ctxt->nrtiles_y, rtile_width, + rtile_height, ctxt->plane_width, +#if CONFIG_STRIPED_LOOP_RESTORATION + ctxt->plane_height, ctxt->plane > 0 ? cm->subsampling_y : 0); +#else + ctxt->plane_height); +#endif + fun(ctxt, rtile_idx, &limits, arg); + } + } +} + +static void search_sgrproj_for_rtile(const struct rest_search_ctxt *ctxt, + int rtile_idx, + const RestorationTileLimits *limits, + void *arg) { + const MACROBLOCK *const x = &ctxt->cpi->td.mb; + const AV1_COMMON *const cm = &ctxt->cpi->common; + RestorationInfo *rsi = ctxt->cpi->rst_search; + SgrprojInfo *sgrproj_info = ctxt->info->sgrproj_info; + + SgrprojInfo *ref_sgrproj_info = (SgrprojInfo *)arg; + + int64_t err = + sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start, + limits->h_end - limits->h_start, limits->v_start, + limits->v_end - limits->v_start, (1 << ctxt->plane)); + // #bits when a tile is not restored + int bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0); + double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); + ctxt->best_tile_cost[rtile_idx] = INT64_MAX; + + RestorationInfo *plane_rsi = &rsi[ctxt->plane]; + SgrprojInfo *rtile_sgrproj_info = &plane_rsi->sgrproj_info[rtile_idx]; + uint8_t *dgd_start = + ctxt->dgd_buffer + limits->v_start * ctxt->dgd_stride + limits->h_start; + const uint8_t *src_start = + ctxt->src_buffer + limits->v_start * ctxt->src_stride + limits->h_start; + + search_selfguided_restoration( + dgd_start, limits->h_end - limits->h_start, + limits->v_end - limits->v_start, ctxt->dgd_stride, src_start, + ctxt->src_stride, #if CONFIG_HIGHBITDEPTH - cm->bit_depth, + cm->use_highbitdepth, cm->bit_depth, #else - 8, + 0, 8, #endif // CONFIG_HIGHBITDEPTH - &rsi[plane].sgrproj_info[tile_idx].ep, - rsi[plane].sgrproj_info[tile_idx].xqd, cm->rst_internal.tmpbuf); - rsi[plane].restoration_type[tile_idx] = RESTORE_SGRPROJ; - err = try_restoration_tile(src, cpi, rsi, (1 << plane), partial_frame, - tile_idx, 0, 0, dst_frame); - bits = count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx], - &ref_sgrproj_info) - << AV1_PROB_COST_SHIFT; - bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1); - cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err); - if (cost_sgrproj >= cost_norestore) { - type[tile_idx] = RESTORE_NONE; - } else { - type[tile_idx] = RESTORE_SGRPROJ; - memcpy(&sgrproj_info[tile_idx], &rsi[plane].sgrproj_info[tile_idx], - sizeof(sgrproj_info[tile_idx])); - memcpy(&ref_sgrproj_info, &sgrproj_info[tile_idx], - sizeof(ref_sgrproj_info)); - best_tile_cost[tile_idx] = err; + rsi[ctxt->plane].procunit_width, rsi[ctxt->plane].procunit_height, + &rtile_sgrproj_info->ep, rtile_sgrproj_info->xqd, + cm->rst_internal.tmpbuf); + plane_rsi->restoration_type[rtile_idx] = RESTORE_SGRPROJ; + err = try_restoration_tile(ctxt->src, ctxt->cpi, rsi, (1 << ctxt->plane), + ctxt->partial_frame, rtile_idx, ctxt->dst_frame); + bits = + count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx], ref_sgrproj_info) + << AV1_PROB_COST_SHIFT; + bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1); + double cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err); + if (cost_sgrproj >= cost_norestore) { + ctxt->type[rtile_idx] = RESTORE_NONE; + } else { + ctxt->type[rtile_idx] = RESTORE_SGRPROJ; + *ref_sgrproj_info = sgrproj_info[rtile_idx] = + plane_rsi->sgrproj_info[rtile_idx]; + ctxt->best_tile_cost[rtile_idx] = err; + } + plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE; +} + +static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, + int partial_frame, int plane, + RestorationInfo *info, RestorationType *type, + int64_t *best_tile_cost, + YV12_BUFFER_CONFIG *dst_frame) { + struct rest_search_ctxt ctxt; + const int nrtiles = + init_rest_search_ctxt(src, cpi, partial_frame, plane, info, type, + best_tile_cost, dst_frame, &ctxt); + + RestorationInfo *plane_rsi = &cpi->rst_search[plane]; + plane_rsi->frame_restoration_type = RESTORE_SGRPROJ; + for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) { + plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE; + } + + // Compute best Sgrproj filters for each rtile, one (encoder/decoder) + // tile at a time. + const AV1_COMMON *const cm = &cpi->common; +#if CONFIG_HIGHBITDEPTH + if (cm->use_highbitdepth) + extend_frame_highbd(CONVERT_TO_SHORTPTR(ctxt.dgd_buffer), ctxt.plane_width, + ctxt.plane_height, ctxt.dgd_stride, SGRPROJ_BORDER_HORZ, + SGRPROJ_BORDER_VERT); + else +#endif + extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height, + ctxt.dgd_stride, SGRPROJ_BORDER_HORZ, SGRPROJ_BORDER_VERT); + + for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) { + for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) { + SgrprojInfo ref_sgrproj_info; + set_default_sgrproj(&ref_sgrproj_info); + foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_sgrproj_for_rtile, + &ref_sgrproj_info); } - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } + // Cost for Sgrproj filtering + SgrprojInfo ref_sgrproj_info; set_default_sgrproj(&ref_sgrproj_info); - bits = frame_level_restore_bits[rsi[plane].frame_restoration_type] - << AV1_PROB_COST_SHIFT; - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - bits += - av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi[plane].sgrproj_info[tile_idx], &sgrproj_info[tile_idx], - sizeof(sgrproj_info[tile_idx])); - if (type[tile_idx] == RESTORE_SGRPROJ) { - bits += count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx], + SgrprojInfo *sgrproj_info = info->sgrproj_info; + + int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type] + << AV1_PROB_COST_SHIFT; + for (int rtile_idx = 0; rtile_idx < nrtiles; ++rtile_idx) { + bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, + type[rtile_idx] != RESTORE_NONE); + plane_rsi->sgrproj_info[rtile_idx] = sgrproj_info[rtile_idx]; + if (type[rtile_idx] == RESTORE_SGRPROJ) { + bits += count_sgrproj_bits(&plane_rsi->sgrproj_info[rtile_idx], &ref_sgrproj_info) << AV1_PROB_COST_SHIFT; - memcpy(&ref_sgrproj_info, &rsi[plane].sgrproj_info[tile_idx], - sizeof(ref_sgrproj_info)); + ref_sgrproj_info = plane_rsi->sgrproj_info[rtile_idx]; } - rsi[plane].restoration_type[tile_idx] = type[tile_idx]; + plane_rsi->restoration_type[rtile_idx] = type[rtile_idx]; } - err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame, - dst_frame); - cost_sgrproj = RDCOST_DBL(x->rdmult, (bits >> 4), err); - + int64_t err = try_restoration_frame(src, cpi, cpi->rst_search, (1 << plane), + partial_frame, dst_frame); + double cost_sgrproj = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err); return cost_sgrproj; } -static double find_average(uint8_t *src, int h_start, int h_end, int v_start, - int v_end, int stride) { +static double find_average(const uint8_t *src, int h_start, int h_end, + int v_start, int v_end, int stride) { uint64_t sum = 0; double avg = 0; int i, j; @@ -548,47 +715,51 @@ static double find_average(uint8_t *src, int h_start, int h_end, int v_start, return avg; } -static void compute_stats(uint8_t *dgd, uint8_t *src, int h_start, int h_end, +static void compute_stats(int wiener_win, const uint8_t *dgd, + const uint8_t *src, int h_start, int h_end, int v_start, int v_end, int dgd_stride, int src_stride, double *M, double *H) { int i, j, k, l; double Y[WIENER_WIN2]; + const int wiener_win2 = wiener_win * wiener_win; + const int wiener_halfwin = (wiener_win >> 1); const double avg = find_average(dgd, h_start, h_end, v_start, v_end, dgd_stride); - memset(M, 0, sizeof(*M) * WIENER_WIN2); - memset(H, 0, sizeof(*H) * WIENER_WIN2 * WIENER_WIN2); + memset(M, 0, sizeof(*M) * wiener_win2); + memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2); for (i = v_start; i < v_end; i++) { for (j = h_start; j < h_end; j++) { const double X = (double)src[i * src_stride + j] - avg; int idx = 0; - for (k = -WIENER_HALFWIN; k <= WIENER_HALFWIN; k++) { - for (l = -WIENER_HALFWIN; l <= WIENER_HALFWIN; l++) { + for (k = -wiener_halfwin; k <= wiener_halfwin; k++) { + for (l = -wiener_halfwin; l <= wiener_halfwin; l++) { Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg; idx++; } } - for (k = 0; k < WIENER_WIN2; ++k) { + assert(idx == wiener_win2); + for (k = 0; k < wiener_win2; ++k) { M[k] += Y[k] * X; - H[k * WIENER_WIN2 + k] += Y[k] * Y[k]; - for (l = k + 1; l < WIENER_WIN2; ++l) { + H[k * wiener_win2 + k] += Y[k] * Y[k]; + for (l = k + 1; l < wiener_win2; ++l) { // H is a symmetric matrix, so we only need to fill out the upper // triangle here. We can copy it down to the lower triangle outside // the (i, j) loops. - H[k * WIENER_WIN2 + l] += Y[k] * Y[l]; + H[k * wiener_win2 + l] += Y[k] * Y[l]; } } } } - for (k = 0; k < WIENER_WIN2; ++k) { - for (l = k + 1; l < WIENER_WIN2; ++l) { - H[l * WIENER_WIN2 + k] = H[k * WIENER_WIN2 + l]; + for (k = 0; k < wiener_win2; ++k) { + for (l = k + 1; l < wiener_win2; ++l) { + H[l * wiener_win2 + k] = H[k * wiener_win2 + l]; } } } #if CONFIG_HIGHBITDEPTH -static double find_average_highbd(uint16_t *src, int h_start, int h_end, +static double find_average_highbd(const uint16_t *src, int h_start, int h_end, int v_start, int v_end, int stride) { uint64_t sum = 0; double avg = 0; @@ -600,168 +771,184 @@ static double find_average_highbd(uint16_t *src, int h_start, int h_end, return avg; } -static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start, - int h_end, int v_start, int v_end, - int dgd_stride, int src_stride, double *M, - double *H) { +static void compute_stats_highbd(int wiener_win, const uint8_t *dgd8, + const uint8_t *src8, int h_start, int h_end, + int v_start, int v_end, int dgd_stride, + int src_stride, double *M, double *H) { int i, j, k, l; double Y[WIENER_WIN2]; - uint16_t *src = CONVERT_TO_SHORTPTR(src8); - uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); + const int wiener_win2 = wiener_win * wiener_win; + const int wiener_halfwin = (wiener_win >> 1); + const uint16_t *src = CONVERT_TO_SHORTPTR(src8); + const uint16_t *dgd = CONVERT_TO_SHORTPTR(dgd8); const double avg = find_average_highbd(dgd, h_start, h_end, v_start, v_end, dgd_stride); - memset(M, 0, sizeof(*M) * WIENER_WIN2); - memset(H, 0, sizeof(*H) * WIENER_WIN2 * WIENER_WIN2); + memset(M, 0, sizeof(*M) * wiener_win2); + memset(H, 0, sizeof(*H) * wiener_win2 * wiener_win2); for (i = v_start; i < v_end; i++) { for (j = h_start; j < h_end; j++) { const double X = (double)src[i * src_stride + j] - avg; int idx = 0; - for (k = -WIENER_HALFWIN; k <= WIENER_HALFWIN; k++) { - for (l = -WIENER_HALFWIN; l <= WIENER_HALFWIN; l++) { + for (k = -wiener_halfwin; k <= wiener_halfwin; k++) { + for (l = -wiener_halfwin; l <= wiener_halfwin; l++) { Y[idx] = (double)dgd[(i + l) * dgd_stride + (j + k)] - avg; idx++; } } - for (k = 0; k < WIENER_WIN2; ++k) { + assert(idx == wiener_win2); + for (k = 0; k < wiener_win2; ++k) { M[k] += Y[k] * X; - H[k * WIENER_WIN2 + k] += Y[k] * Y[k]; - for (l = k + 1; l < WIENER_WIN2; ++l) { + H[k * wiener_win2 + k] += Y[k] * Y[k]; + for (l = k + 1; l < wiener_win2; ++l) { // H is a symmetric matrix, so we only need to fill out the upper // triangle here. We can copy it down to the lower triangle outside // the (i, j) loops. - H[k * WIENER_WIN2 + l] += Y[k] * Y[l]; + H[k * wiener_win2 + l] += Y[k] * Y[l]; } } } } - for (k = 0; k < WIENER_WIN2; ++k) { - for (l = k + 1; l < WIENER_WIN2; ++l) { - H[l * WIENER_WIN2 + k] = H[k * WIENER_WIN2 + l]; + for (k = 0; k < wiener_win2; ++k) { + for (l = k + 1; l < wiener_win2; ++l) { + H[l * wiener_win2 + k] = H[k * wiener_win2 + l]; } } } #endif // CONFIG_HIGHBITDEPTH -static INLINE int wrap_index(int i) { - return (i >= WIENER_HALFWIN1 ? WIENER_WIN - 1 - i : i); +static INLINE int wrap_index(int i, int wiener_win) { + const int wiener_halfwin1 = (wiener_win >> 1) + 1; + return (i >= wiener_halfwin1 ? wiener_win - 1 - i : i); } // Fix vector b, update vector a -static void update_a_sep_sym(double **Mc, double **Hc, double *a, double *b) { +static void update_a_sep_sym(int wiener_win, double **Mc, double **Hc, + double *a, double *b) { int i, j; double S[WIENER_WIN]; double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; - int w, w2; + const int wiener_win2 = wiener_win * wiener_win; + const int wiener_halfwin1 = (wiener_win >> 1) + 1; memset(A, 0, sizeof(A)); memset(B, 0, sizeof(B)); - for (i = 0; i < WIENER_WIN; i++) { - for (j = 0; j < WIENER_WIN; ++j) { - const int jj = wrap_index(j); + for (i = 0; i < wiener_win; i++) { + for (j = 0; j < wiener_win; ++j) { + const int jj = wrap_index(j, wiener_win); A[jj] += Mc[i][j] * b[i]; } } - for (i = 0; i < WIENER_WIN; i++) { - for (j = 0; j < WIENER_WIN; j++) { + for (i = 0; i < wiener_win; i++) { + for (j = 0; j < wiener_win; j++) { int k, l; - for (k = 0; k < WIENER_WIN; ++k) - for (l = 0; l < WIENER_WIN; ++l) { - const int kk = wrap_index(k); - const int ll = wrap_index(l); - B[ll * WIENER_HALFWIN1 + kk] += - Hc[j * WIENER_WIN + i][k * WIENER_WIN2 + l] * b[i] * b[j]; + for (k = 0; k < wiener_win; ++k) + for (l = 0; l < wiener_win; ++l) { + const int kk = wrap_index(k, wiener_win); + const int ll = wrap_index(l, wiener_win); + B[ll * wiener_halfwin1 + kk] += + Hc[j * wiener_win + i][k * wiener_win2 + l] * b[i] * b[j]; } } } // Normalization enforcement in the system of equations itself - w = WIENER_WIN; - w2 = (w >> 1) + 1; - for (i = 0; i < w2 - 1; ++i) + for (i = 0; i < wiener_halfwin1 - 1; ++i) A[i] -= - A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)]; - for (i = 0; i < w2 - 1; ++i) - for (j = 0; j < w2 - 1; ++j) - B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] - - 2 * B[(w2 - 1) * w2 + (w2 - 1)]); - if (linsolve(w2 - 1, B, w2, A, S)) { - S[w2 - 1] = 1.0; - for (i = w2; i < w; ++i) { - S[i] = S[w - 1 - i]; - S[w2 - 1] -= 2 * S[i]; + A[wiener_halfwin1 - 1] * 2 + + B[i * wiener_halfwin1 + wiener_halfwin1 - 1] - + 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)]; + for (i = 0; i < wiener_halfwin1 - 1; ++i) + for (j = 0; j < wiener_halfwin1 - 1; ++j) + B[i * wiener_halfwin1 + j] -= + 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] + + B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] - + 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + + (wiener_halfwin1 - 1)]); + if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) { + S[wiener_halfwin1 - 1] = 1.0; + for (i = wiener_halfwin1; i < wiener_win; ++i) { + S[i] = S[wiener_win - 1 - i]; + S[wiener_halfwin1 - 1] -= 2 * S[i]; } - memcpy(a, S, w * sizeof(*a)); + memcpy(a, S, wiener_win * sizeof(*a)); } } // Fix vector a, update vector b -static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) { +static void update_b_sep_sym(int wiener_win, double **Mc, double **Hc, + double *a, double *b) { int i, j; double S[WIENER_WIN]; double A[WIENER_HALFWIN1], B[WIENER_HALFWIN1 * WIENER_HALFWIN1]; - int w, w2; + const int wiener_win2 = wiener_win * wiener_win; + const int wiener_halfwin1 = (wiener_win >> 1) + 1; memset(A, 0, sizeof(A)); memset(B, 0, sizeof(B)); - for (i = 0; i < WIENER_WIN; i++) { - const int ii = wrap_index(i); - for (j = 0; j < WIENER_WIN; j++) A[ii] += Mc[i][j] * a[j]; + for (i = 0; i < wiener_win; i++) { + const int ii = wrap_index(i, wiener_win); + for (j = 0; j < wiener_win; j++) A[ii] += Mc[i][j] * a[j]; } - for (i = 0; i < WIENER_WIN; i++) { - for (j = 0; j < WIENER_WIN; j++) { - const int ii = wrap_index(i); - const int jj = wrap_index(j); + for (i = 0; i < wiener_win; i++) { + for (j = 0; j < wiener_win; j++) { + const int ii = wrap_index(i, wiener_win); + const int jj = wrap_index(j, wiener_win); int k, l; - for (k = 0; k < WIENER_WIN; ++k) - for (l = 0; l < WIENER_WIN; ++l) - B[jj * WIENER_HALFWIN1 + ii] += - Hc[i * WIENER_WIN + j][k * WIENER_WIN2 + l] * a[k] * a[l]; + for (k = 0; k < wiener_win; ++k) + for (l = 0; l < wiener_win; ++l) + B[jj * wiener_halfwin1 + ii] += + Hc[i * wiener_win + j][k * wiener_win2 + l] * a[k] * a[l]; } } // Normalization enforcement in the system of equations itself - w = WIENER_WIN; - w2 = WIENER_HALFWIN1; - for (i = 0; i < w2 - 1; ++i) + for (i = 0; i < wiener_halfwin1 - 1; ++i) A[i] -= - A[w2 - 1] * 2 + B[i * w2 + w2 - 1] - 2 * B[(w2 - 1) * w2 + (w2 - 1)]; - for (i = 0; i < w2 - 1; ++i) - for (j = 0; j < w2 - 1; ++j) - B[i * w2 + j] -= 2 * (B[i * w2 + (w2 - 1)] + B[(w2 - 1) * w2 + j] - - 2 * B[(w2 - 1) * w2 + (w2 - 1)]); - if (linsolve(w2 - 1, B, w2, A, S)) { - S[w2 - 1] = 1.0; - for (i = w2; i < w; ++i) { - S[i] = S[w - 1 - i]; - S[w2 - 1] -= 2 * S[i]; + A[wiener_halfwin1 - 1] * 2 + + B[i * wiener_halfwin1 + wiener_halfwin1 - 1] - + 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + (wiener_halfwin1 - 1)]; + for (i = 0; i < wiener_halfwin1 - 1; ++i) + for (j = 0; j < wiener_halfwin1 - 1; ++j) + B[i * wiener_halfwin1 + j] -= + 2 * (B[i * wiener_halfwin1 + (wiener_halfwin1 - 1)] + + B[(wiener_halfwin1 - 1) * wiener_halfwin1 + j] - + 2 * B[(wiener_halfwin1 - 1) * wiener_halfwin1 + + (wiener_halfwin1 - 1)]); + if (linsolve(wiener_halfwin1 - 1, B, wiener_halfwin1, A, S)) { + S[wiener_halfwin1 - 1] = 1.0; + for (i = wiener_halfwin1; i < wiener_win; ++i) { + S[i] = S[wiener_win - 1 - i]; + S[wiener_halfwin1 - 1] -= 2 * S[i]; } - memcpy(b, S, w * sizeof(*b)); + memcpy(b, S, wiener_win * sizeof(*b)); } } -static int wiener_decompose_sep_sym(double *M, double *H, double *a, - double *b) { +static int wiener_decompose_sep_sym(int wiener_win, double *M, double *H, + double *a, double *b) { static const int init_filt[WIENER_WIN] = { WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP0_MIDV, }; - int i, j, iter; double *Hc[WIENER_WIN2]; double *Mc[WIENER_WIN]; - for (i = 0; i < WIENER_WIN; i++) { - Mc[i] = M + i * WIENER_WIN; - for (j = 0; j < WIENER_WIN; j++) { - Hc[i * WIENER_WIN + j] = - H + i * WIENER_WIN * WIENER_WIN2 + j * WIENER_WIN; - } + int i, j, iter; + const int plane_off = (WIENER_WIN - wiener_win) >> 1; + const int wiener_win2 = wiener_win * wiener_win; + for (i = 0; i < wiener_win; i++) { + a[i] = b[i] = (double)init_filt[i + plane_off] / WIENER_FILT_STEP; } - for (i = 0; i < WIENER_WIN; i++) { - a[i] = b[i] = (double)init_filt[i] / WIENER_FILT_STEP; + for (i = 0; i < wiener_win; i++) { + Mc[i] = M + i * wiener_win; + for (j = 0; j < wiener_win; j++) { + Hc[i * wiener_win + j] = + H + i * wiener_win * wiener_win2 + j * wiener_win; + } } iter = 1; while (iter < NUM_WIENER_ITERS) { - update_a_sep_sym(Mc, Hc, a, b); - update_b_sep_sym(Mc, Hc, a, b); + update_a_sep_sym(wiener_win, Mc, Hc, a, b); + update_b_sep_sym(wiener_win, Mc, Hc, a, b); iter++; } return 1; @@ -770,14 +957,16 @@ static int wiener_decompose_sep_sym(double *M, double *H, double *a, // Computes the function x'*H*x - x'*M for the learned 2D filter x, and compares // against identity filters; Final score is defined as the difference between // the function values -static double compute_score(double *M, double *H, InterpKernel vfilt, - InterpKernel hfilt) { +static double compute_score(int wiener_win, double *M, double *H, + InterpKernel vfilt, InterpKernel hfilt) { double ab[WIENER_WIN * WIENER_WIN]; int i, k, l; double P = 0, Q = 0; double iP = 0, iQ = 0; double Score, iScore; double a[WIENER_WIN], b[WIENER_WIN]; + const int plane_off = (WIENER_WIN - wiener_win) >> 1; + const int wiener_win2 = wiener_win * wiener_win; aom_clear_system_state(); @@ -788,32 +977,41 @@ static double compute_score(double *M, double *H, InterpKernel vfilt, a[WIENER_HALFWIN] -= 2 * a[i]; b[WIENER_HALFWIN] -= 2 * b[i]; } - for (k = 0; k < WIENER_WIN; ++k) { - for (l = 0; l < WIENER_WIN; ++l) ab[k * WIENER_WIN + l] = a[l] * b[k]; + memset(ab, 0, sizeof(ab)); + for (k = 0; k < wiener_win; ++k) { + for (l = 0; l < wiener_win; ++l) + ab[k * wiener_win + l] = a[l + plane_off] * b[k + plane_off]; } - for (k = 0; k < WIENER_WIN2; ++k) { + for (k = 0; k < wiener_win2; ++k) { P += ab[k] * M[k]; - for (l = 0; l < WIENER_WIN2; ++l) - Q += ab[k] * H[k * WIENER_WIN2 + l] * ab[l]; + for (l = 0; l < wiener_win2; ++l) + Q += ab[k] * H[k * wiener_win2 + l] * ab[l]; } Score = Q - 2 * P; - iP = M[WIENER_WIN2 >> 1]; - iQ = H[(WIENER_WIN2 >> 1) * WIENER_WIN2 + (WIENER_WIN2 >> 1)]; + iP = M[wiener_win2 >> 1]; + iQ = H[(wiener_win2 >> 1) * wiener_win2 + (wiener_win2 >> 1)]; iScore = iQ - 2 * iP; return Score - iScore; } -static void quantize_sym_filter(double *f, InterpKernel fi) { +static void quantize_sym_filter(int wiener_win, double *f, InterpKernel fi) { int i; - for (i = 0; i < WIENER_HALFWIN; ++i) { + const int wiener_halfwin = (wiener_win >> 1); + for (i = 0; i < wiener_halfwin; ++i) { fi[i] = RINT(f[i] * WIENER_FILT_STEP); } // Specialize for 7-tap filter - fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV); - fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV); - fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV); + if (wiener_win == WIENER_WIN) { + fi[0] = CLIP(fi[0], WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP0_MAXV); + fi[1] = CLIP(fi[1], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV); + fi[2] = CLIP(fi[2], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV); + } else { + fi[2] = CLIP(fi[1], WIENER_FILT_TAP2_MINV, WIENER_FILT_TAP2_MAXV); + fi[1] = CLIP(fi[0], WIENER_FILT_TAP1_MINV, WIENER_FILT_TAP1_MAXV); + fi[0] = 0; + } // Satisfy filter constraints fi[WIENER_WIN - 1] = fi[0]; fi[WIENER_WIN - 2] = fi[1]; @@ -822,14 +1020,15 @@ static void quantize_sym_filter(double *f, InterpKernel fi) { fi[3] = -2 * (fi[0] + fi[1] + fi[2]); } -static int count_wiener_bits(WienerInfo *wiener_info, +static int count_wiener_bits(int wiener_win, WienerInfo *wiener_info, WienerInfo *ref_wiener_info) { int bits = 0; - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, - WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, - wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV); + if (wiener_win == WIENER_WIN) + bits += aom_count_primitive_refsubexpfin( + WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, + WIENER_FILT_TAP0_SUBEXP_K, + ref_wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV, + wiener_info->vfilter[0] - WIENER_FILT_TAP0_MINV); bits += aom_count_primitive_refsubexpfin( WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, WIENER_FILT_TAP1_SUBEXP_K, @@ -840,11 +1039,12 @@ static int count_wiener_bits(WienerInfo *wiener_info, WIENER_FILT_TAP2_SUBEXP_K, ref_wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV, wiener_info->vfilter[2] - WIENER_FILT_TAP2_MINV); - bits += aom_count_primitive_refsubexpfin( - WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, - WIENER_FILT_TAP0_SUBEXP_K, - ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, - wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV); + if (wiener_win == WIENER_WIN) + bits += aom_count_primitive_refsubexpfin( + WIENER_FILT_TAP0_MAXV - WIENER_FILT_TAP0_MINV + 1, + WIENER_FILT_TAP0_SUBEXP_K, + ref_wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV, + wiener_info->hfilter[0] - WIENER_FILT_TAP0_MINV); bits += aom_count_primitive_refsubexpfin( WIENER_FILT_TAP1_MAXV - WIENER_FILT_TAP1_MINV + 1, WIENER_FILT_TAP1_SUBEXP_K, @@ -861,11 +1061,13 @@ static int count_wiener_bits(WienerInfo *wiener_info, #define USE_WIENER_REFINEMENT_SEARCH 1 static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, RestorationInfo *rsi, - int start_step, int plane, int tile_idx, + int start_step, int plane, + int wiener_win, int tile_idx, int partial_frame, YV12_BUFFER_CONFIG *dst_frame) { + const int plane_off = (WIENER_WIN - wiener_win) >> 1; int64_t err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); + tile_idx, dst_frame); (void)start_step; #if USE_WIENER_REFINEMENT_SEARCH int64_t err2; @@ -875,7 +1077,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, WIENER_FILT_TAP2_MAXV }; // printf("err pre = %"PRId64"\n", err); for (int s = start_step; s >= 1; s >>= 1) { - for (int p = 0; p < WIENER_HALFWIN; ++p) { + for (int p = plane_off; p < WIENER_HALFWIN; ++p) { int skip = 0; do { if (rsi[plane].wiener_info[tile_idx].hfilter[p] - s >= tap_min[p]) { @@ -883,7 +1085,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s; rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s; err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); + tile_idx, dst_frame); if (err2 > err) { rsi[plane].wiener_info[tile_idx].hfilter[p] += s; rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s; @@ -904,7 +1106,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s; rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s; err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); + tile_idx, dst_frame); if (err2 > err) { rsi[plane].wiener_info[tile_idx].hfilter[p] -= s; rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s; @@ -918,7 +1120,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, break; } while (1); } - for (int p = 0; p < WIENER_HALFWIN; ++p) { + for (int p = plane_off; p < WIENER_HALFWIN; ++p) { int skip = 0; do { if (rsi[plane].wiener_info[tile_idx].vfilter[p] - s >= tap_min[p]) { @@ -926,7 +1128,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s; rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s; err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); + tile_idx, dst_frame); if (err2 > err) { rsi[plane].wiener_info[tile_idx].vfilter[p] += s; rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s; @@ -947,7 +1149,7 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s; rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s; err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); + tile_idx, dst_frame); if (err2 > err) { rsi[plane].wiener_info[tile_idx].vfilter[p] -= s; rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s; @@ -967,154 +1169,157 @@ static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, return err; } -static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, int plane, RestorationInfo *info, - RestorationType *type, double *best_tile_cost, - YV12_BUFFER_CONFIG *dst_frame) { - WienerInfo *wiener_info = info->wiener_info; - AV1_COMMON *const cm = &cpi->common; - RestorationInfo *rsi = cpi->rst_search; - int64_t err; - int bits; - double cost_wiener, cost_norestore; - MACROBLOCK *x = &cpi->td.mb; +static void search_wiener_for_rtile(const struct rest_search_ctxt *ctxt, + int rtile_idx, + const RestorationTileLimits *limits, + void *arg) { + const MACROBLOCK *const x = &ctxt->cpi->td.mb; + const AV1_COMMON *const cm = &ctxt->cpi->common; + RestorationInfo *rsi = ctxt->cpi->rst_search; + + const int wiener_win = + (ctxt->plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA; + double M[WIENER_WIN2]; double H[WIENER_WIN2 * WIENER_WIN2]; double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN]; - const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - int width, height, src_stride, dgd_stride; - uint8_t *dgd_buffer, *src_buffer; - if (plane == AOM_PLANE_Y) { - width = src->y_crop_width; - height = src->y_crop_height; - src_buffer = src->y_buffer; - src_stride = src->y_stride; - dgd_buffer = dgd->y_buffer; - dgd_stride = dgd->y_stride; - assert(width == dgd->y_crop_width); - assert(height == dgd->y_crop_height); - assert(width == src->y_crop_width); - assert(height == src->y_crop_height); - } else { - width = src->uv_crop_width; - height = src->uv_crop_height; - src_stride = src->uv_stride; - dgd_stride = dgd->uv_stride; - src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; - dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; - assert(width == dgd->uv_crop_width); - assert(height == dgd->uv_crop_height); - } - double score; - int tile_idx, tile_width, tile_height, nhtiles, nvtiles; - int h_start, h_end, v_start, v_end; - const int ntiles = av1_get_rest_ntiles( - width, height, cm->rst_info[plane].restoration_tilesize, &tile_width, - &tile_height, &nhtiles, &nvtiles); - WienerInfo ref_wiener_info; - set_default_wiener(&ref_wiener_info); - rsi[plane].frame_restoration_type = RESTORE_WIENER; + WienerInfo *ref_wiener_info = (WienerInfo *)arg; - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; - } + int64_t err = + sse_restoration_tile(ctxt->src, cm->frame_to_show, cm, limits->h_start, + limits->h_end - limits->h_start, limits->v_start, + limits->v_end - limits->v_start, (1 << ctxt->plane)); + // #bits when a tile is not restored + int bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); + double cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); + ctxt->best_tile_cost[rtile_idx] = INT64_MAX; -// Construct a (WIENER_HALFWIN)-pixel border around the frame #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) - extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd_buffer), width, height, - dgd_stride); + compute_stats_highbd(wiener_win, ctxt->dgd_buffer, ctxt->src_buffer, + limits->h_start, limits->h_end, limits->v_start, + limits->v_end, ctxt->dgd_stride, ctxt->src_stride, M, + H); else -#endif - extend_frame(dgd_buffer, width, height, dgd_stride); - - // Compute best Wiener filters for each tile - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, 0, 0, &h_start, &h_end, - &v_start, &v_end); - err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, - (1 << plane)); - // #bits when a tile is not restored - bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); - cost_norestore = RDCOST_DBL(x->rdmult, (bits >> 4), err); - best_tile_cost[tile_idx] = DBL_MAX; - - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, 0, 0, &h_start, &h_end, - &v_start, &v_end); -#if CONFIG_HIGHBITDEPTH - if (cm->use_highbitdepth) - compute_stats_highbd(dgd_buffer, src_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); - else #endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd_buffer, src_buffer, h_start, h_end, v_start, v_end, - dgd_stride, src_stride, M, H); + compute_stats(wiener_win, ctxt->dgd_buffer, ctxt->src_buffer, + limits->h_start, limits->h_end, limits->v_start, + limits->v_end, ctxt->dgd_stride, ctxt->src_stride, M, H); - type[tile_idx] = RESTORE_WIENER; + ctxt->type[rtile_idx] = RESTORE_WIENER; - if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) { - type[tile_idx] = RESTORE_NONE; - continue; - } - quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter); - quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter); - - // Filter score computes the value of the function x'*A*x - x'*b for the - // learned filter and compares it against identity filer. If there is no - // reduction in the function, the filter is reverted back to identity - score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter, - rsi[plane].wiener_info[tile_idx].hfilter); - if (score > 0.0) { - type[tile_idx] = RESTORE_NONE; - continue; - } - aom_clear_system_state(); + if (!wiener_decompose_sep_sym(wiener_win, M, H, vfilterd, hfilterd)) { + ctxt->type[rtile_idx] = RESTORE_NONE; + return; + } - rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER; - err = finer_tile_search_wiener(src, cpi, rsi, 4, plane, tile_idx, - partial_frame, dst_frame); - bits = - count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); - cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err); - if (cost_wiener >= cost_norestore) { - type[tile_idx] = RESTORE_NONE; - } else { - type[tile_idx] = RESTORE_WIENER; - memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx], - sizeof(wiener_info[tile_idx])); - memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], - sizeof(ref_wiener_info)); - best_tile_cost[tile_idx] = err; + RestorationInfo *plane_rsi = &rsi[ctxt->plane]; + WienerInfo *rtile_wiener_info = &plane_rsi->wiener_info[rtile_idx]; + quantize_sym_filter(wiener_win, vfilterd, rtile_wiener_info->vfilter); + quantize_sym_filter(wiener_win, hfilterd, rtile_wiener_info->hfilter); + + // Filter score computes the value of the function x'*A*x - x'*b for the + // learned filter and compares it against identity filer. If there is no + // reduction in the function, the filter is reverted back to identity + double score = compute_score(wiener_win, M, H, rtile_wiener_info->vfilter, + rtile_wiener_info->hfilter); + if (score > 0.0) { + ctxt->type[rtile_idx] = RESTORE_NONE; + return; + } + aom_clear_system_state(); + + plane_rsi->restoration_type[rtile_idx] = RESTORE_WIENER; + err = finer_tile_search_wiener(ctxt->src, ctxt->cpi, rsi, 4, ctxt->plane, + wiener_win, rtile_idx, ctxt->partial_frame, + ctxt->dst_frame); + if (wiener_win != WIENER_WIN) { + assert(rtile_wiener_info->vfilter[0] == 0 && + rtile_wiener_info->vfilter[WIENER_WIN - 1] == 0); + assert(rtile_wiener_info->hfilter[0] == 0 && + rtile_wiener_info->hfilter[WIENER_WIN - 1] == 0); + } + bits = count_wiener_bits(wiener_win, rtile_wiener_info, ref_wiener_info) + << AV1_PROB_COST_SHIFT; + bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); + double cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err); + if (cost_wiener >= cost_norestore) { + ctxt->type[rtile_idx] = RESTORE_NONE; + } else { + ctxt->type[rtile_idx] = RESTORE_WIENER; + *ref_wiener_info = ctxt->info->wiener_info[rtile_idx] = *rtile_wiener_info; + ctxt->best_tile_cost[rtile_idx] = err; + } + plane_rsi->restoration_type[rtile_idx] = RESTORE_NONE; +} + +static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, + int partial_frame, int plane, RestorationInfo *info, + RestorationType *type, int64_t *best_tile_cost, + YV12_BUFFER_CONFIG *dst_frame) { + struct rest_search_ctxt ctxt; + const int nrtiles = + init_rest_search_ctxt(src, cpi, partial_frame, plane, info, type, + best_tile_cost, dst_frame, &ctxt); + + RestorationInfo *plane_rsi = &cpi->rst_search[plane]; + plane_rsi->frame_restoration_type = RESTORE_WIENER; + for (int tile_idx = 0; tile_idx < nrtiles; ++tile_idx) { + plane_rsi->restoration_type[tile_idx] = RESTORE_NONE; + } + + AV1_COMMON *const cm = &cpi->common; +// Construct a (WIENER_HALFWIN)-pixel border around the frame +// Note use this border to gather stats even though the actual filter +// may use less border on the top/bottom of a processing unit. +#if CONFIG_HIGHBITDEPTH + if (cm->use_highbitdepth) + extend_frame_highbd(CONVERT_TO_SHORTPTR(ctxt.dgd_buffer), ctxt.plane_width, + ctxt.plane_height, ctxt.dgd_stride, WIENER_HALFWIN, + WIENER_HALFWIN); + else +#endif + extend_frame(ctxt.dgd_buffer, ctxt.plane_width, ctxt.plane_height, + ctxt.dgd_stride, WIENER_HALFWIN, WIENER_HALFWIN); + + // Compute best Wiener filters for each rtile, one (encoder/decoder) + // tile at a time. + for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) { + for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) { + WienerInfo ref_wiener_info; + set_default_wiener(&ref_wiener_info); + + foreach_rtile_in_tile(&ctxt, tile_row, tile_col, search_wiener_for_rtile, + &ref_wiener_info); } - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } - // Cost for Wiener filtering + + // cost for Wiener filtering + WienerInfo ref_wiener_info; set_default_wiener(&ref_wiener_info); - bits = frame_level_restore_bits[rsi[plane].frame_restoration_type] - << AV1_PROB_COST_SHIFT; - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { + int bits = frame_level_restore_bits[plane_rsi->frame_restoration_type] + << AV1_PROB_COST_SHIFT; + WienerInfo *wiener_info = info->wiener_info; + const int wiener_win = + (plane == AOM_PLANE_Y) ? WIENER_WIN : WIENER_WIN_CHROMA; + + for (int tile_idx = 0; tile_idx < nrtiles; ++tile_idx) { bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx], - sizeof(wiener_info[tile_idx])); + plane_rsi->wiener_info[tile_idx] = wiener_info[tile_idx]; + if (type[tile_idx] == RESTORE_WIENER) { - bits += - count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], - sizeof(ref_wiener_info)); + bits += count_wiener_bits(wiener_win, &plane_rsi->wiener_info[tile_idx], + &ref_wiener_info) + << AV1_PROB_COST_SHIFT; + ref_wiener_info = plane_rsi->wiener_info[tile_idx]; } - rsi[plane].restoration_type[tile_idx] = type[tile_idx]; + plane_rsi->restoration_type[tile_idx] = type[tile_idx]; } - err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame, - dst_frame); - cost_wiener = RDCOST_DBL(x->rdmult, (bits >> 4), err); + int64_t err = try_restoration_frame(src, cpi, cpi->rst_search, 1 << plane, + partial_frame, dst_frame); + double cost_wiener = RDCOST_DBL(cpi->td.mb.rdmult, (bits >> 4), err); return cost_wiener; } @@ -1122,7 +1327,7 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *info, RestorationType *type, - double *best_tile_cost, + int64_t *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { int64_t err; double cost_norestore; @@ -1130,7 +1335,6 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, MACROBLOCK *x = &cpi->td.mb; AV1_COMMON *const cm = &cpi->common; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; - int h_start, h_end, v_start, v_end; int width, height; if (plane == AOM_PLANE_Y) { width = src->y_crop_width; @@ -1148,12 +1352,16 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, info->frame_restoration_type = RESTORE_NONE; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, 0, 0, &h_start, &h_end, - &v_start, &v_end); - err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, - 1 << plane); + RestorationTileLimits limits = av1_get_rest_tile_limits( + tile_idx, nhtiles, nvtiles, tile_width, tile_height, width, +#if CONFIG_STRIPED_LOOP_RESTORATION + height, plane != AOM_PLANE_Y ? cm->subsampling_y : 0); +#else + height); +#endif + err = sse_restoration_tile(src, cm->frame_to_show, cm, limits.h_start, + limits.h_end - limits.h_start, limits.v_start, + limits.v_end - limits.v_start, 1 << plane); type[tile_idx] = RESTORE_NONE; best_tile_cost[tile_idx] = err; } @@ -1164,74 +1372,88 @@ static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, return cost_norestore; } +struct switchable_rest_search_ctxt { + SgrprojInfo sgrproj_info; + WienerInfo wiener_info; + RestorationType *const *restore_types; + int64_t *const *tile_cost; + double cost_switchable; +}; + +static void search_switchable_for_rtile(const struct rest_search_ctxt *ctxt, + int rtile_idx, + const RestorationTileLimits *limits, + void *arg) { + const MACROBLOCK *x = &ctxt->cpi->td.mb; + RestorationInfo *rsi = &ctxt->cpi->common.rst_info[ctxt->plane]; + struct switchable_rest_search_ctxt *swctxt = + (struct switchable_rest_search_ctxt *)arg; + + (void)limits; + + double best_cost = + RDCOST_DBL(x->rdmult, (x->switchable_restore_cost[RESTORE_NONE] >> 4), + swctxt->tile_cost[RESTORE_NONE][rtile_idx]); + rsi->restoration_type[rtile_idx] = RESTORE_NONE; + for (RestorationType r = 1; r < RESTORE_SWITCHABLE_TYPES; r++) { + if (force_restore_type != RESTORE_TYPES) + if (r != force_restore_type) continue; + int tilebits = 0; + if (swctxt->restore_types[r][rtile_idx] != r) continue; + if (r == RESTORE_WIENER) + tilebits += count_wiener_bits( + (ctxt->plane == AOM_PLANE_Y ? WIENER_WIN : WIENER_WIN - 2), + &rsi->wiener_info[rtile_idx], &swctxt->wiener_info); + else if (r == RESTORE_SGRPROJ) + tilebits += count_sgrproj_bits(&rsi->sgrproj_info[rtile_idx], + &swctxt->sgrproj_info); + tilebits <<= AV1_PROB_COST_SHIFT; + tilebits += x->switchable_restore_cost[r]; + double cost = + RDCOST_DBL(x->rdmult, tilebits >> 4, swctxt->tile_cost[r][rtile_idx]); + + if (cost < best_cost) { + rsi->restoration_type[rtile_idx] = r; + best_cost = cost; + } + } + if (rsi->restoration_type[rtile_idx] == RESTORE_WIENER) + swctxt->wiener_info = rsi->wiener_info[rtile_idx]; + else if (rsi->restoration_type[rtile_idx] == RESTORE_SGRPROJ) + swctxt->sgrproj_info = rsi->sgrproj_info[rtile_idx]; + if (force_restore_type != RESTORE_TYPES) + assert(rsi->restoration_type[rtile_idx] == force_restore_type || + rsi->restoration_type[rtile_idx] == RESTORE_NONE); + swctxt->cost_switchable += best_cost; +} + static double search_switchable_restoration( const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, int plane, RestorationType *const restore_types[RESTORE_SWITCHABLE_TYPES], - double *const tile_cost[RESTORE_SWITCHABLE_TYPES], RestorationInfo *rsi) { - AV1_COMMON *const cm = &cpi->common; - MACROBLOCK *x = &cpi->td.mb; - double cost_switchable = 0; - int bits, tile_idx; - RestorationType r; - int width, height; - if (plane == AOM_PLANE_Y) { - width = src->y_crop_width; - height = src->y_crop_height; - } else { - width = src->uv_crop_width; - height = src->uv_crop_height; - } - const int ntiles = av1_get_rest_ntiles( - width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL, - NULL); - SgrprojInfo ref_sgrproj_info; - set_default_sgrproj(&ref_sgrproj_info); - WienerInfo ref_wiener_info; - set_default_wiener(&ref_wiener_info); - (void)partial_frame; + int64_t *const tile_cost[RESTORE_SWITCHABLE_TYPES], RestorationInfo *rsi) { + const AV1_COMMON *const cm = &cpi->common; + struct rest_search_ctxt ctxt; + init_rest_search_ctxt(src, cpi, partial_frame, plane, NULL, NULL, NULL, NULL, + &ctxt); + struct switchable_rest_search_ctxt swctxt; + swctxt.restore_types = restore_types; + swctxt.tile_cost = tile_cost; rsi->frame_restoration_type = RESTORE_SWITCHABLE; - bits = frame_level_restore_bits[rsi->frame_restoration_type] - << AV1_PROB_COST_SHIFT; - cost_switchable = RDCOST_DBL(x->rdmult, bits >> 4, 0); - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - double best_cost = - RDCOST_DBL(x->rdmult, (cpi->switchable_restore_cost[RESTORE_NONE] >> 4), - tile_cost[RESTORE_NONE][tile_idx]); - rsi->restoration_type[tile_idx] = RESTORE_NONE; - for (r = 1; r < RESTORE_SWITCHABLE_TYPES; r++) { - if (force_restore_type != 0) - if (r != force_restore_type) continue; - int tilebits = 0; - if (restore_types[r][tile_idx] != r) continue; - if (r == RESTORE_WIENER) - tilebits += - count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info); - else if (r == RESTORE_SGRPROJ) - tilebits += - count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info); - tilebits <<= AV1_PROB_COST_SHIFT; - tilebits += cpi->switchable_restore_cost[r]; - double cost = - RDCOST_DBL(x->rdmult, tilebits >> 4, tile_cost[r][tile_idx]); - - if (cost < best_cost) { - rsi->restoration_type[tile_idx] = r; - best_cost = cost; - } + int bits = frame_level_restore_bits[rsi->frame_restoration_type] + << AV1_PROB_COST_SHIFT; + swctxt.cost_switchable = RDCOST_DBL(cpi->td.mb.rdmult, bits >> 4, 0); + + for (int tile_row = 0; tile_row < cm->tile_rows; ++tile_row) { + for (int tile_col = 0; tile_col < cm->tile_cols; ++tile_col) { + set_default_sgrproj(&swctxt.sgrproj_info); + set_default_wiener(&swctxt.wiener_info); + foreach_rtile_in_tile(&ctxt, tile_row, tile_col, + search_switchable_for_rtile, &swctxt); } - if (rsi->restoration_type[tile_idx] == RESTORE_WIENER) - memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx], - sizeof(ref_wiener_info)); - else if (rsi->restoration_type[tile_idx] == RESTORE_SGRPROJ) - memcpy(&ref_sgrproj_info, &rsi->sgrproj_info[tile_idx], - sizeof(ref_sgrproj_info)); - if (force_restore_type != 0) - assert(rsi->restoration_type[tile_idx] == force_restore_type || - rsi->restoration_type[tile_idx] == RESTORE_NONE); - cost_switchable += best_cost; } - return cost_switchable; + + return swctxt.cost_switchable; } void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, @@ -1241,7 +1463,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, }; AV1_COMMON *const cm = &cpi->common; double cost_restore[RESTORE_TYPES]; - double *tile_cost[RESTORE_SWITCHABLE_TYPES]; + int64_t *tile_cost[RESTORE_SWITCHABLE_TYPES]; RestorationType *restore_types[RESTORE_SWITCHABLE_TYPES]; double best_cost_restore; RestorationType r, best_restore; @@ -1259,7 +1481,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, // Assume ntiles_uv is never larger that ntiles_y and so the same arrays work. for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) { - tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y); + tile_cost[r] = (int64_t *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y); restore_types[r] = (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles_y); } @@ -1267,7 +1489,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, for (int plane = AOM_PLANE_Y; plane <= AOM_PLANE_V; ++plane) { for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) { cost_restore[r] = DBL_MAX; - if (force_restore_type != 0) + if (force_restore_type != RESTORE_TYPES) if (r != RESTORE_NONE && r != force_restore_type) continue; cost_restore[r] = search_restore_fun[r](src, cpi, method == LPF_PICK_FROM_SUBIMAGE, @@ -1283,7 +1505,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, best_cost_restore = DBL_MAX; best_restore = 0; for (r = 0; r < RESTORE_TYPES; ++r) { - if (force_restore_type != 0) + if (force_restore_type != RESTORE_TYPES) if (r != RESTORE_NONE && r != force_restore_type) continue; if (cost_restore[r] < best_cost_restore) { best_restore = r; @@ -1291,7 +1513,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, } } cm->rst_info[plane].frame_restoration_type = best_restore; - if (force_restore_type != 0) + if (force_restore_type != RESTORE_TYPES) assert(best_restore == force_restore_type || best_restore == RESTORE_NONE); if (best_restore != RESTORE_SWITCHABLE) { |