diff options
Diffstat (limited to 'third_party/aom/av1/encoder/pickrst.c')
-rw-r--r-- | third_party/aom/av1/encoder/pickrst.c | 692 |
1 files changed, 368 insertions, 324 deletions
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c index 21410e0af..4a446d24e 100644 --- a/third_party/aom/av1/encoder/pickrst.c +++ b/third_party/aom/av1/encoder/pickrst.c @@ -31,17 +31,18 @@ #include "av1/encoder/encoder.h" #include "av1/encoder/picklpf.h" #include "av1/encoder/pickrst.h" +#include "av1/encoder/mathutils.h" // When set to RESTORE_WIENER or RESTORE_SGRPROJ only those are allowed. // When set to RESTORE_NONE (0) we allow switchable. const RestorationType force_restore_type = RESTORE_NONE; // Number of Wiener iterations -#define NUM_WIENER_ITERS 10 +#define NUM_WIENER_ITERS 5 typedef double (*search_restore_type)(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, int partial_frame, - RestorationInfo *info, + int plane, RestorationInfo *info, RestorationType *rest_level, double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame); @@ -216,6 +217,62 @@ static int64_t get_pixel_proj_error(uint8_t *src8, int width, int height, return err; } +#define USE_SGRPROJ_REFINEMENT_SEARCH 1 +static int64_t finer_search_pixel_proj_error( + uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8, + int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride, + int32_t *flt2, int flt2_stride, int start_step, int *xqd) { + int64_t err = get_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, flt1_stride, + flt2, flt2_stride, xqd); + (void)start_step; +#if USE_SGRPROJ_REFINEMENT_SEARCH + int64_t err2; + int tap_min[] = { SGRPROJ_PRJ_MIN0, SGRPROJ_PRJ_MIN1 }; + int tap_max[] = { SGRPROJ_PRJ_MAX0, SGRPROJ_PRJ_MAX1 }; + for (int s = start_step; s >= 1; s >>= 1) { + for (int p = 0; p < 2; ++p) { + int skip = 0; + do { + if (xqd[p] - s >= tap_min[p]) { + xqd[p] -= s; + err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, flt1_stride, + flt2, flt2_stride, xqd); + if (err2 > err) { + xqd[p] += s; + } else { + err = err2; + skip = 1; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + if (skip) break; + do { + if (xqd[p] + s <= tap_max[p]) { + xqd[p] += s; + err2 = get_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, flt1_stride, + flt2, flt2_stride, xqd); + if (err2 > err) { + xqd[p] -= s; + } else { + err = err2; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + } + } +#endif // USE_SGRPROJ_REFINEMENT_SEARCH + return err; +} + static void get_proj_subspace(uint8_t *src8, int width, int height, int src_stride, uint8_t *dat8, int dat_stride, int bit_depth, int32_t *flt1, int flt1_stride, @@ -329,12 +386,14 @@ static void search_selfguided_restoration(uint8_t *dat8, int width, int height, #if CONFIG_HIGHBITDEPTH } #endif + aom_clear_system_state(); get_proj_subspace(src8, width, height, src_stride, dat8, dat_stride, bit_depth, flt1, width, flt2, width, exq); + aom_clear_system_state(); encode_xq(exq, exqd); - err = - get_pixel_proj_error(src8, width, height, src_stride, dat8, dat_stride, - bit_depth, flt1, width, flt2, width, exqd); + err = finer_search_pixel_proj_error(src8, width, height, src_stride, dat8, + dat_stride, bit_depth, flt1, width, + flt2, width, 2, exqd); if (besterr == -1 || err < besterr) { bestep = ep; besterr = err; @@ -362,8 +421,9 @@ static int count_sgrproj_bits(SgrprojInfo *sgrproj_info, } static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, RestorationInfo *info, - RestorationType *type, double *best_tile_cost, + int partial_frame, int plane, + RestorationInfo *info, RestorationType *type, + double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { SgrprojInfo *sgrproj_info = info->sgrproj_info; double err, cost_norestore, cost_sgrproj; @@ -374,44 +434,68 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, RestorationInfo *rsi = &cpi->rst_search[0]; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; - // Allocate for the src buffer at high precision - const int ntiles = av1_get_rest_ntiles( - cm->width, cm->height, cm->rst_info[0].restoration_tilesize, &tile_width, - &tile_height, &nhtiles, &nvtiles); + int width, height, src_stride, dgd_stride; + uint8_t *dgd_buffer, *src_buffer; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + src_buffer = src->y_buffer; + src_stride = src->y_stride; + dgd_buffer = dgd->y_buffer; + dgd_stride = dgd->y_stride; + assert(width == dgd->y_crop_width); + assert(height == dgd->y_crop_height); + assert(width == src->y_crop_width); + assert(height == src->y_crop_height); + } else { + width = src->uv_crop_width; + height = src->uv_crop_height; + src_stride = src->uv_stride; + dgd_stride = dgd->uv_stride; + src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; + dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; + assert(width == dgd->uv_crop_width); + assert(height == dgd->uv_crop_height); + } + const int ntiles = + av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize, + &tile_width, &tile_height, &nhtiles, &nvtiles); SgrprojInfo ref_sgrproj_info; set_default_sgrproj(&ref_sgrproj_info); - rsi->frame_restoration_type = RESTORE_SGRPROJ; + rsi[plane].frame_restoration_type = RESTORE_SGRPROJ; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Compute best Sgrproj filters for each tile for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, cm->width, cm->height, 0, 0, &h_start, - &h_end, &v_start, &v_end); + tile_height, width, height, 0, 0, &h_start, &h_end, + &v_start, &v_end); err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, 1); + h_end - h_start, v_start, v_end - v_start, + (1 << plane)); // #bits when a tile is not restored bits = av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 0); cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); best_tile_cost[tile_idx] = DBL_MAX; search_selfguided_restoration( - dgd->y_buffer + v_start * dgd->y_stride + h_start, h_end - h_start, - v_end - v_start, dgd->y_stride, - src->y_buffer + v_start * src->y_stride + h_start, src->y_stride, + dgd_buffer + v_start * dgd_stride + h_start, h_end - h_start, + v_end - v_start, dgd_stride, + src_buffer + v_start * src_stride + h_start, src_stride, #if CONFIG_HIGHBITDEPTH cm->bit_depth, #else 8, #endif // CONFIG_HIGHBITDEPTH - &rsi->sgrproj_info[tile_idx].ep, rsi->sgrproj_info[tile_idx].xqd, - cm->rst_internal.tmpbuf); - rsi->restoration_type[tile_idx] = RESTORE_SGRPROJ; - err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0, - dst_frame); - bits = count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info) + &rsi[plane].sgrproj_info[tile_idx].ep, + rsi[plane].sgrproj_info[tile_idx].xqd, cm->rst_internal.tmpbuf); + rsi[plane].restoration_type[tile_idx] = RESTORE_SGRPROJ; + err = try_restoration_tile(src, cpi, rsi, (1 << plane), partial_frame, + tile_idx, 0, 0, dst_frame); + bits = count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx], + &ref_sgrproj_info) << AV1_PROB_COST_SHIFT; bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, 1); cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); @@ -419,35 +503,34 @@ static double search_sgrproj(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, type[tile_idx] = RESTORE_NONE; } else { type[tile_idx] = RESTORE_SGRPROJ; - memcpy(&sgrproj_info[tile_idx], &rsi->sgrproj_info[tile_idx], + memcpy(&sgrproj_info[tile_idx], &rsi[plane].sgrproj_info[tile_idx], sizeof(sgrproj_info[tile_idx])); - bits = count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info) - << AV1_PROB_COST_SHIFT; memcpy(&ref_sgrproj_info, &sgrproj_info[tile_idx], sizeof(ref_sgrproj_info)); best_tile_cost[tile_idx] = err; } - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Cost for Sgrproj filtering set_default_sgrproj(&ref_sgrproj_info); - bits = frame_level_restore_bits[rsi->frame_restoration_type] + bits = frame_level_restore_bits[rsi[plane].frame_restoration_type] << AV1_PROB_COST_SHIFT; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { bits += av1_cost_bit(RESTORE_NONE_SGRPROJ_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi->sgrproj_info[tile_idx], &sgrproj_info[tile_idx], + memcpy(&rsi[plane].sgrproj_info[tile_idx], &sgrproj_info[tile_idx], sizeof(sgrproj_info[tile_idx])); if (type[tile_idx] == RESTORE_SGRPROJ) { - bits += - count_sgrproj_bits(&rsi->sgrproj_info[tile_idx], &ref_sgrproj_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_sgrproj_info, &rsi->sgrproj_info[tile_idx], + bits += count_sgrproj_bits(&rsi[plane].sgrproj_info[tile_idx], + &ref_sgrproj_info) + << AV1_PROB_COST_SHIFT; + memcpy(&ref_sgrproj_info, &rsi[plane].sgrproj_info[tile_idx], sizeof(ref_sgrproj_info)); } - rsi->restoration_type[tile_idx] = type[tile_idx]; + rsi[plane].restoration_type[tile_idx] = type[tile_idx]; } - err = try_restoration_frame(src, cpi, rsi, 1, partial_frame, dst_frame); + err = try_restoration_frame(src, cpi, rsi, (1 << plane), partial_frame, + dst_frame); cost_sgrproj = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); return cost_sgrproj; @@ -560,46 +643,6 @@ static void compute_stats_highbd(uint8_t *dgd8, uint8_t *src8, int h_start, } #endif // CONFIG_HIGHBITDEPTH -// Solves Ax = b, where x and b are column vectors -static int linsolve(int n, double *A, int stride, double *b, double *x) { - int i, j, k; - double c; - - aom_clear_system_state(); - - // Forward elimination - for (k = 0; k < n - 1; k++) { - // Bring the largest magitude to the diagonal position - for (i = n - 1; i > k; i--) { - if (fabs(A[(i - 1) * stride + k]) < fabs(A[i * stride + k])) { - for (j = 0; j < n; j++) { - c = A[i * stride + j]; - A[i * stride + j] = A[(i - 1) * stride + j]; - A[(i - 1) * stride + j] = c; - } - c = b[i]; - b[i] = b[i - 1]; - b[i - 1] = c; - } - } - for (i = k; i < n - 1; i++) { - if (fabs(A[k * stride + k]) < 1e-10) return 0; - c = A[(i + 1) * stride + k] / A[k * stride + k]; - for (j = 0; j < n; j++) A[(i + 1) * stride + j] -= c * A[k * stride + j]; - b[i + 1] -= c * b[k]; - } - } - // Backward substitution - for (i = n - 1; i >= 0; i--) { - if (fabs(A[i * stride + i]) < 1e-10) return 0; - c = 0; - for (j = i + 1; j <= n - 1; j++) c += A[i * stride + j] * x[j]; - x[i] = (b[i] - c) / A[i * stride + i]; - } - - return 1; -} - static INLINE int wrap_index(int i) { return (i >= WIENER_HALFWIN1 ? WIENER_WIN - 1 - i : i); } @@ -696,8 +739,10 @@ static void update_b_sep_sym(double **Mc, double **Hc, double *a, double *b) { static int wiener_decompose_sep_sym(double *M, double *H, double *a, double *b) { - static const double init_filt[WIENER_WIN] = { - 0.035623, -0.127154, 0.211436, 0.760190, 0.211436, -0.127154, 0.035623, + static const int init_filt[WIENER_WIN] = { + WIENER_FILT_TAP0_MIDV, WIENER_FILT_TAP1_MIDV, WIENER_FILT_TAP2_MIDV, + WIENER_FILT_TAP3_MIDV, WIENER_FILT_TAP2_MIDV, WIENER_FILT_TAP1_MIDV, + WIENER_FILT_TAP0_MIDV, }; int i, j, iter; double *Hc[WIENER_WIN2]; @@ -709,8 +754,9 @@ static int wiener_decompose_sep_sym(double *M, double *H, double *a, H + i * WIENER_WIN * WIENER_WIN2 + j * WIENER_WIN; } } - memcpy(a, init_filt, sizeof(*a) * WIENER_WIN); - memcpy(b, init_filt, sizeof(*b) * WIENER_WIN); + for (i = 0; i < WIENER_WIN; i++) { + a[i] = b[i] = (double)init_filt[i] / WIENER_FILT_STEP; + } iter = 1; while (iter < NUM_WIENER_ITERS) { @@ -812,158 +858,117 @@ static int count_wiener_bits(WienerInfo *wiener_info, return bits; } -static double search_wiener_uv(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, int plane, - RestorationInfo *info, RestorationType *type, - YV12_BUFFER_CONFIG *dst_frame) { - WienerInfo *wiener_info = info->wiener_info; - AV1_COMMON *const cm = &cpi->common; - RestorationInfo *rsi = cpi->rst_search; - int64_t err; - int bits; - double cost_wiener, cost_norestore, cost_wiener_frame, cost_norestore_frame; - MACROBLOCK *x = &cpi->td.mb; - double M[WIENER_WIN2]; - double H[WIENER_WIN2 * WIENER_WIN2]; - double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN]; - const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - const int width = src->uv_crop_width; - const int height = src->uv_crop_height; - const int src_stride = src->uv_stride; - const int dgd_stride = dgd->uv_stride; - double score; - int tile_idx, tile_width, tile_height, nhtiles, nvtiles; - int h_start, h_end, v_start, v_end; - const int ntiles = - av1_get_rest_ntiles(width, height, cm->rst_info[1].restoration_tilesize, - &tile_width, &tile_height, &nhtiles, &nvtiles); - WienerInfo ref_wiener_info; - set_default_wiener(&ref_wiener_info); - assert(width == dgd->uv_crop_width); - assert(height == dgd->uv_crop_height); - - rsi[plane].frame_restoration_type = RESTORE_NONE; - err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane)); - bits = 0; - cost_norestore_frame = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - - rsi[plane].frame_restoration_type = RESTORE_WIENER; - - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; - } - - // Compute best Wiener filters for each tile - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, 0, 0, &h_start, &h_end, - &v_start, &v_end); - err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, - 1 << plane); - // #bits when a tile is not restored - bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); - cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - // best_tile_cost[tile_idx] = DBL_MAX; - - av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, width, height, WIENER_HALFWIN, - WIENER_HALFWIN, &h_start, &h_end, &v_start, - &v_end); - if (plane == AOM_PLANE_U) { -#if CONFIG_HIGHBITDEPTH - if (cm->use_highbitdepth) - compute_stats_highbd(dgd->u_buffer, src->u_buffer, h_start, h_end, - v_start, v_end, dgd_stride, src_stride, M, H); - else -#endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd->u_buffer, src->u_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); - } else if (plane == AOM_PLANE_V) { -#if CONFIG_HIGHBITDEPTH - if (cm->use_highbitdepth) - compute_stats_highbd(dgd->v_buffer, src->v_buffer, h_start, h_end, - v_start, v_end, dgd_stride, src_stride, M, H); - else -#endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd->v_buffer, src->v_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); - } else { - assert(0); - } - - type[tile_idx] = RESTORE_WIENER; - - if (!wiener_decompose_sep_sym(M, H, vfilterd, hfilterd)) { - type[tile_idx] = RESTORE_NONE; - continue; - } - quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter); - quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter); - - // Filter score computes the value of the function x'*A*x - x'*b for the - // learned filter and compares it against identity filer. If there is no - // reduction in the function, the filter is reverted back to identity - score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter, - rsi[plane].wiener_info[tile_idx].hfilter); - if (score > 0.0) { - type[tile_idx] = RESTORE_NONE; - continue; - } - - rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER; - err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, - tile_idx, 0, 0, dst_frame); - bits = - count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - // bits = WIENER_FILT_BITS << AV1_PROB_COST_SHIFT; - bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); - cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - if (cost_wiener >= cost_norestore) { - type[tile_idx] = RESTORE_NONE; - } else { - type[tile_idx] = RESTORE_WIENER; - memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx], - sizeof(wiener_info[tile_idx])); - memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], - sizeof(ref_wiener_info)); +#define USE_WIENER_REFINEMENT_SEARCH 1 +static int64_t finer_tile_search_wiener(const YV12_BUFFER_CONFIG *src, + AV1_COMP *cpi, RestorationInfo *rsi, + int start_step, int plane, int tile_idx, + int partial_frame, + YV12_BUFFER_CONFIG *dst_frame) { + int64_t err = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + (void)start_step; +#if USE_WIENER_REFINEMENT_SEARCH + int64_t err2; + int tap_min[] = { WIENER_FILT_TAP0_MINV, WIENER_FILT_TAP1_MINV, + WIENER_FILT_TAP2_MINV }; + int tap_max[] = { WIENER_FILT_TAP0_MAXV, WIENER_FILT_TAP1_MAXV, + WIENER_FILT_TAP2_MAXV }; + // printf("err pre = %"PRId64"\n", err); + for (int s = start_step; s >= 1; s >>= 1) { + for (int p = 0; p < WIENER_HALFWIN; ++p) { + int skip = 0; + do { + if (rsi[plane].wiener_info[tile_idx].hfilter[p] - s >= tap_min[p]) { + rsi[plane].wiener_info[tile_idx].hfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].hfilter[p] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s; + } else { + err = err2; + skip = 1; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + if (skip) break; + do { + if (rsi[plane].wiener_info[tile_idx].hfilter[p] + s <= tap_max[p]) { + rsi[plane].wiener_info[tile_idx].hfilter[p] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] -= 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].hfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].hfilter[WIENER_HALFWIN] += 2 * s; + } else { + err = err2; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); } - rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; - } - // Cost for Wiener filtering - set_default_wiener(&ref_wiener_info); - bits = 0; - for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - bits += - av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx], - sizeof(wiener_info[tile_idx])); - if (type[tile_idx] == RESTORE_WIENER) { - bits += - count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], - sizeof(ref_wiener_info)); + for (int p = 0; p < WIENER_HALFWIN; ++p) { + int skip = 0; + do { + if (rsi[plane].wiener_info[tile_idx].vfilter[p] - s >= tap_min[p]) { + rsi[plane].wiener_info[tile_idx].vfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].vfilter[p] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s; + } else { + err = err2; + skip = 1; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); + if (skip) break; + do { + if (rsi[plane].wiener_info[tile_idx].vfilter[p] + s <= tap_max[p]) { + rsi[plane].wiener_info[tile_idx].vfilter[p] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] += s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] -= 2 * s; + err2 = try_restoration_tile(src, cpi, rsi, 1 << plane, partial_frame, + tile_idx, 0, 0, dst_frame); + if (err2 > err) { + rsi[plane].wiener_info[tile_idx].vfilter[p] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_WIN - p - 1] -= s; + rsi[plane].wiener_info[tile_idx].vfilter[WIENER_HALFWIN] += 2 * s; + } else { + err = err2; + // At the highest step size continue moving in the same direction + if (s == start_step) continue; + } + } + break; + } while (1); } - rsi[plane].restoration_type[tile_idx] = type[tile_idx]; - } - err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame, - dst_frame); - cost_wiener_frame = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); - - if (cost_wiener_frame < cost_norestore_frame) { - info->frame_restoration_type = RESTORE_WIENER; - } else { - info->frame_restoration_type = RESTORE_NONE; } - - return info->frame_restoration_type == RESTORE_WIENER ? cost_wiener_frame - : cost_norestore_frame; +// printf("err post = %"PRId64"\n", err); +#endif // USE_WIENER_REFINEMENT_SEARCH + return err; } static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, RestorationInfo *info, + int partial_frame, int plane, RestorationInfo *info, RestorationType *type, double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { WienerInfo *wiener_info = info->wiener_info; @@ -977,38 +982,52 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, double H[WIENER_WIN2 * WIENER_WIN2]; double vfilterd[WIENER_WIN], hfilterd[WIENER_WIN]; const YV12_BUFFER_CONFIG *dgd = cm->frame_to_show; - const int width = cm->width; - const int height = cm->height; - const int src_stride = src->y_stride; - const int dgd_stride = dgd->y_stride; + int width, height, src_stride, dgd_stride; + uint8_t *dgd_buffer, *src_buffer; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + src_buffer = src->y_buffer; + src_stride = src->y_stride; + dgd_buffer = dgd->y_buffer; + dgd_stride = dgd->y_stride; + assert(width == dgd->y_crop_width); + assert(height == dgd->y_crop_height); + assert(width == src->y_crop_width); + assert(height == src->y_crop_height); + } else { + width = src->uv_crop_width; + height = src->uv_crop_height; + src_stride = src->uv_stride; + dgd_stride = dgd->uv_stride; + src_buffer = plane == AOM_PLANE_U ? src->u_buffer : src->v_buffer; + dgd_buffer = plane == AOM_PLANE_U ? dgd->u_buffer : dgd->v_buffer; + assert(width == dgd->uv_crop_width); + assert(height == dgd->uv_crop_height); + } double score; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; - const int ntiles = - av1_get_rest_ntiles(width, height, cm->rst_info[0].restoration_tilesize, - &tile_width, &tile_height, &nhtiles, &nvtiles); + const int ntiles = av1_get_rest_ntiles( + width, height, cm->rst_info[plane].restoration_tilesize, &tile_width, + &tile_height, &nhtiles, &nvtiles); WienerInfo ref_wiener_info; set_default_wiener(&ref_wiener_info); - assert(width == dgd->y_crop_width); - assert(height == dgd->y_crop_height); - assert(width == src->y_crop_width); - assert(height == src->y_crop_height); - - rsi->frame_restoration_type = RESTORE_WIENER; + rsi[plane].frame_restoration_type = RESTORE_WIENER; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Construct a (WIENER_HALFWIN)-pixel border around the frame #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) - extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd->y_buffer), width, height, + extend_frame_highbd(CONVERT_TO_SHORTPTR(dgd_buffer), width, height, dgd_stride); else #endif - extend_frame(dgd->y_buffer, width, height, dgd_stride); + extend_frame(dgd_buffer, width, height, dgd_stride); // Compute best Wiener filters for each tile for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { @@ -1016,7 +1035,8 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, tile_height, width, height, 0, 0, &h_start, &h_end, &v_start, &v_end); err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, 1); + h_end - h_start, v_start, v_end - v_start, + (1 << plane)); // #bits when a tile is not restored bits = av1_cost_bit(RESTORE_NONE_WIENER_PROB, 0); cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); @@ -1027,12 +1047,12 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, &v_start, &v_end); #if CONFIG_HIGHBITDEPTH if (cm->use_highbitdepth) - compute_stats_highbd(dgd->y_buffer, src->y_buffer, h_start, h_end, - v_start, v_end, dgd_stride, src_stride, M, H); + compute_stats_highbd(dgd_buffer, src_buffer, h_start, h_end, v_start, + v_end, dgd_stride, src_stride, M, H); else #endif // CONFIG_HIGHBITDEPTH - compute_stats(dgd->y_buffer, src->y_buffer, h_start, h_end, v_start, - v_end, dgd_stride, src_stride, M, H); + compute_stats(dgd_buffer, src_buffer, h_start, h_end, v_start, v_end, + dgd_stride, src_stride, M, H); type[tile_idx] = RESTORE_WIENER; @@ -1040,108 +1060,129 @@ static double search_wiener(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, type[tile_idx] = RESTORE_NONE; continue; } - quantize_sym_filter(vfilterd, rsi->wiener_info[tile_idx].vfilter); - quantize_sym_filter(hfilterd, rsi->wiener_info[tile_idx].hfilter); + quantize_sym_filter(vfilterd, rsi[plane].wiener_info[tile_idx].vfilter); + quantize_sym_filter(hfilterd, rsi[plane].wiener_info[tile_idx].hfilter); // Filter score computes the value of the function x'*A*x - x'*b for the // learned filter and compares it against identity filer. If there is no // reduction in the function, the filter is reverted back to identity - score = compute_score(M, H, rsi->wiener_info[tile_idx].vfilter, - rsi->wiener_info[tile_idx].hfilter); + score = compute_score(M, H, rsi[plane].wiener_info[tile_idx].vfilter, + rsi[plane].wiener_info[tile_idx].hfilter); if (score > 0.0) { type[tile_idx] = RESTORE_NONE; continue; } + aom_clear_system_state(); - rsi->restoration_type[tile_idx] = RESTORE_WIENER; - err = try_restoration_tile(src, cpi, rsi, 1, partial_frame, tile_idx, 0, 0, - dst_frame); - bits = count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; + rsi[plane].restoration_type[tile_idx] = RESTORE_WIENER; + err = finer_tile_search_wiener(src, cpi, rsi, 4, plane, tile_idx, + partial_frame, dst_frame); + bits = + count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) + << AV1_PROB_COST_SHIFT; bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, 1); cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); if (cost_wiener >= cost_norestore) { type[tile_idx] = RESTORE_NONE; } else { type[tile_idx] = RESTORE_WIENER; - memcpy(&wiener_info[tile_idx], &rsi->wiener_info[tile_idx], + memcpy(&wiener_info[tile_idx], &rsi[plane].wiener_info[tile_idx], sizeof(wiener_info[tile_idx])); - memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx], + memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], sizeof(ref_wiener_info)); - bits = count_wiener_bits(&wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; best_tile_cost[tile_idx] = err; } - rsi->restoration_type[tile_idx] = RESTORE_NONE; + rsi[plane].restoration_type[tile_idx] = RESTORE_NONE; } // Cost for Wiener filtering set_default_wiener(&ref_wiener_info); - bits = frame_level_restore_bits[rsi->frame_restoration_type] + bits = frame_level_restore_bits[rsi[plane].frame_restoration_type] << AV1_PROB_COST_SHIFT; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { bits += av1_cost_bit(RESTORE_NONE_WIENER_PROB, type[tile_idx] != RESTORE_NONE); - memcpy(&rsi->wiener_info[tile_idx], &wiener_info[tile_idx], + memcpy(&rsi[plane].wiener_info[tile_idx], &wiener_info[tile_idx], sizeof(wiener_info[tile_idx])); if (type[tile_idx] == RESTORE_WIENER) { - bits += count_wiener_bits(&rsi->wiener_info[tile_idx], &ref_wiener_info) - << AV1_PROB_COST_SHIFT; - memcpy(&ref_wiener_info, &rsi->wiener_info[tile_idx], + bits += + count_wiener_bits(&rsi[plane].wiener_info[tile_idx], &ref_wiener_info) + << AV1_PROB_COST_SHIFT; + memcpy(&ref_wiener_info, &rsi[plane].wiener_info[tile_idx], sizeof(ref_wiener_info)); } - rsi->restoration_type[tile_idx] = type[tile_idx]; + rsi[plane].restoration_type[tile_idx] = type[tile_idx]; } - err = try_restoration_frame(src, cpi, rsi, 1, partial_frame, dst_frame); + err = try_restoration_frame(src, cpi, rsi, 1 << plane, partial_frame, + dst_frame); cost_wiener = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); return cost_wiener; } static double search_norestore(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, - int partial_frame, RestorationInfo *info, - RestorationType *type, double *best_tile_cost, + int partial_frame, int plane, + RestorationInfo *info, RestorationType *type, + double *best_tile_cost, YV12_BUFFER_CONFIG *dst_frame) { - double err, cost_norestore; + int64_t err; + double cost_norestore; int bits; MACROBLOCK *x = &cpi->td.mb; AV1_COMMON *const cm = &cpi->common; int tile_idx, tile_width, tile_height, nhtiles, nvtiles; int h_start, h_end, v_start, v_end; + int width, height; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + } else { + width = src->uv_crop_width; + height = src->uv_crop_height; + } const int ntiles = av1_get_rest_ntiles( - cm->width, cm->height, cm->rst_info[0].restoration_tilesize, &tile_width, + width, height, cm->rst_info[plane].restoration_tilesize, &tile_width, &tile_height, &nhtiles, &nvtiles); (void)info; (void)dst_frame; (void)partial_frame; + info->frame_restoration_type = RESTORE_NONE; for (tile_idx = 0; tile_idx < ntiles; ++tile_idx) { av1_get_rest_tile_limits(tile_idx, 0, 0, nhtiles, nvtiles, tile_width, - tile_height, cm->width, cm->height, 0, 0, &h_start, - &h_end, &v_start, &v_end); + tile_height, width, height, 0, 0, &h_start, &h_end, + &v_start, &v_end); err = sse_restoration_tile(src, cm->frame_to_show, cm, h_start, - h_end - h_start, v_start, v_end - v_start, 1); + h_end - h_start, v_start, v_end - v_start, + 1 << plane); type[tile_idx] = RESTORE_NONE; best_tile_cost[tile_idx] = err; } // RD cost associated with no restoration - err = sse_restoration_tile(src, cm->frame_to_show, cm, 0, cm->width, 0, - cm->height, 1); + err = sse_restoration_frame(cm, src, cm->frame_to_show, (1 << plane)); bits = frame_level_restore_bits[RESTORE_NONE] << AV1_PROB_COST_SHIFT; cost_norestore = RDCOST_DBL(x->rdmult, x->rddiv, (bits >> 4), err); return cost_norestore; } static double search_switchable_restoration( - AV1_COMP *cpi, int partial_frame, RestorationInfo *rsi, + AV1_COMP *cpi, int partial_frame, int plane, RestorationInfo *rsi, double *tile_cost[RESTORE_SWITCHABLE_TYPES]) { AV1_COMMON *const cm = &cpi->common; MACROBLOCK *x = &cpi->td.mb; double cost_switchable = 0; int bits, tile_idx; RestorationType r; - const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, - cm->rst_info[0].restoration_tilesize, - NULL, NULL, NULL, NULL); + int width, height; + if (plane == AOM_PLANE_Y) { + width = cm->width; + height = cm->height; + } else { + width = ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x); + height = ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y); + } + const int ntiles = av1_get_rest_ntiles( + width, height, cm->rst_info[plane].restoration_tilesize, NULL, NULL, NULL, + NULL); SgrprojInfo ref_sgrproj_info; set_default_sgrproj(&ref_sgrproj_info); WienerInfo ref_wiener_info; @@ -1203,57 +1244,60 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi, double best_cost_restore; RestorationType r, best_restore; - const int ntiles = av1_get_rest_ntiles(cm->width, cm->height, - cm->rst_info[0].restoration_tilesize, - NULL, NULL, NULL, NULL); + const int ntiles_y = av1_get_rest_ntiles(cm->width, cm->height, + cm->rst_info[0].restoration_tilesize, + NULL, NULL, NULL, NULL); + const int ntiles_uv = av1_get_rest_ntiles( + ROUND_POWER_OF_TWO(cm->width, cm->subsampling_x), + ROUND_POWER_OF_TWO(cm->height, cm->subsampling_y), + cm->rst_info[1].restoration_tilesize, NULL, NULL, NULL, NULL); + // Assume ntiles_uv is never larger that ntiles_y and so the same arrays work. for (r = 0; r < RESTORE_SWITCHABLE_TYPES; r++) { - tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles); + tile_cost[r] = (double *)aom_malloc(sizeof(*tile_cost[0]) * ntiles_y); restore_types[r] = - (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles); - } - - for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) { - if (force_restore_type != 0) - if (r != RESTORE_NONE && r != force_restore_type) continue; - cost_restore[r] = search_restore_fun[r]( - src, cpi, method == LPF_PICK_FROM_SUBIMAGE, &cm->rst_info[0], - restore_types[r], tile_cost[r], &cpi->trial_frame_rst); + (RestorationType *)aom_malloc(sizeof(*restore_types[0]) * ntiles_y); } - cost_restore[RESTORE_SWITCHABLE] = search_switchable_restoration( - cpi, method == LPF_PICK_FROM_SUBIMAGE, &cm->rst_info[0], tile_cost); - best_cost_restore = DBL_MAX; - best_restore = 0; - for (r = 0; r < RESTORE_TYPES; ++r) { + for (int plane = AOM_PLANE_Y; plane <= AOM_PLANE_V; ++plane) { + for (r = 0; r < RESTORE_SWITCHABLE_TYPES; ++r) { + cost_restore[r] = DBL_MAX; + if (force_restore_type != 0) + if (r != RESTORE_NONE && r != force_restore_type) continue; + cost_restore[r] = + search_restore_fun[r](src, cpi, method == LPF_PICK_FROM_SUBIMAGE, + plane, &cm->rst_info[plane], restore_types[r], + tile_cost[r], &cpi->trial_frame_rst); + } + if (plane == AOM_PLANE_Y) + cost_restore[RESTORE_SWITCHABLE] = + search_switchable_restoration(cpi, method == LPF_PICK_FROM_SUBIMAGE, + plane, &cm->rst_info[plane], tile_cost); + else + cost_restore[RESTORE_SWITCHABLE] = DBL_MAX; + best_cost_restore = DBL_MAX; + best_restore = 0; + for (r = 0; r < RESTORE_TYPES; ++r) { + if (force_restore_type != 0) + if (r != RESTORE_NONE && r != force_restore_type) continue; + if (cost_restore[r] < best_cost_restore) { + best_restore = r; + best_cost_restore = cost_restore[r]; + } + } + cm->rst_info[plane].frame_restoration_type = best_restore; if (force_restore_type != 0) - if (r != RESTORE_NONE && r != force_restore_type) continue; - if (cost_restore[r] < best_cost_restore) { - best_restore = r; - best_cost_restore = cost_restore[r]; + assert(best_restore == force_restore_type || + best_restore == RESTORE_NONE); + if (best_restore != RESTORE_SWITCHABLE) { + const int nt = (plane == AOM_PLANE_Y ? ntiles_y : ntiles_uv); + memcpy(cm->rst_info[plane].restoration_type, restore_types[best_restore], + nt * sizeof(restore_types[best_restore][0])); } } - cm->rst_info[0].frame_restoration_type = best_restore; - if (force_restore_type != 0) - assert(best_restore == force_restore_type || best_restore == RESTORE_NONE); - if (best_restore != RESTORE_SWITCHABLE) { - memcpy(cm->rst_info[0].restoration_type, restore_types[best_restore], - ntiles * sizeof(restore_types[best_restore][0])); - } - - // Color components - search_wiener_uv(src, cpi, method == LPF_PICK_FROM_SUBIMAGE, AOM_PLANE_U, - &cm->rst_info[AOM_PLANE_U], - cm->rst_info[AOM_PLANE_U].restoration_type, - &cpi->trial_frame_rst); - search_wiener_uv(src, cpi, method == LPF_PICK_FROM_SUBIMAGE, AOM_PLANE_V, - &cm->rst_info[AOM_PLANE_V], - cm->rst_info[AOM_PLANE_V].restoration_type, - &cpi->trial_frame_rst); /* - printf("Frame %d/%d restore types: %d %d %d\n", - cm->current_video_frame, cm->show_frame, - cm->rst_info[0].frame_restoration_type, + printf("Frame %d/%d restore types: %d %d %d\n", cm->current_video_frame, + cm->show_frame, cm->rst_info[0].frame_restoration_type, cm->rst_info[1].frame_restoration_type, cm->rst_info[2].frame_restoration_type); printf("Frame %d/%d frame_restore_type %d : %f %f %f %f\n", |