summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/encoder/pickrst.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/encoder/pickrst.c')
-rw-r--r--third_party/aom/av1/encoder/pickrst.c128
1 files changed, 88 insertions, 40 deletions
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
index 28b693b08..e7804f6b4 100644
--- a/third_party/aom/av1/encoder/pickrst.c
+++ b/third_party/aom/av1/encoder/pickrst.c
@@ -15,6 +15,7 @@
#include <math.h>
#include "config/aom_scale_rtcd.h"
+#include "config/av1_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/binary_codes_writer.h"
@@ -22,7 +23,6 @@
#include "aom_mem/aom_mem.h"
#include "aom_ports/mem.h"
#include "aom_ports/system_state.h"
-
#include "av1/common/onyxc_int.h"
#include "av1/common/quant_common.h"
#include "av1/common/restoration.h"
@@ -181,6 +181,77 @@ static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd);
}
+int64_t av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int width, int height,
+ int src_stride, const uint8_t *dat8,
+ int dat_stride, int32_t *flt0,
+ int flt0_stride, int32_t *flt1,
+ int flt1_stride, int xq[2],
+ const sgr_params_type *params) {
+ int i, j;
+ const uint8_t *src = src8;
+ const uint8_t *dat = dat8;
+ int64_t err = 0;
+ if (params->r[0] > 0 && params->r[1] > 0) {
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
+ assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
+ const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
+ int32_t v = u << SGRPROJ_PRJ_BITS;
+ v += xq[0] * (flt0[j] - u) + xq[1] * (flt1[j] - u);
+ const int32_t e =
+ ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
+ err += e * e;
+ }
+ dat += dat_stride;
+ src += src_stride;
+ flt0 += flt0_stride;
+ flt1 += flt1_stride;
+ }
+ } else if (params->r[0] > 0) {
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
+ const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
+ int32_t v = u << SGRPROJ_PRJ_BITS;
+ v += xq[0] * (flt0[j] - u);
+ const int32_t e =
+ ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
+ err += e * e;
+ }
+ dat += dat_stride;
+ src += src_stride;
+ flt0 += flt0_stride;
+ }
+ } else if (params->r[1] > 0) {
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
+ const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
+ int32_t v = u << SGRPROJ_PRJ_BITS;
+ v += xq[1] * (flt1[j] - u);
+ const int32_t e =
+ ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
+ err += e * e;
+ }
+ dat += dat_stride;
+ src += src_stride;
+ flt1 += flt1_stride;
+ }
+ } else {
+ for (i = 0; i < height; ++i) {
+ for (j = 0; j < width; ++j) {
+ const int32_t e = (int32_t)(dat[j]) - src[j];
+ err += e * e;
+ }
+ dat += dat_stride;
+ src += src_stride;
+ }
+ }
+
+ return err;
+}
+
static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
int src_stride, const uint8_t *dat8,
int dat_stride, int use_highbitdepth,
@@ -192,21 +263,9 @@ static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
int xq[2];
decode_xq(xqd, xq, params);
if (!use_highbitdepth) {
- const uint8_t *src = src8;
- const uint8_t *dat = dat8;
- for (i = 0; i < height; ++i) {
- for (j = 0; j < width; ++j) {
- const int32_t u =
- (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
- int32_t v = u << SGRPROJ_PRJ_BITS;
- if (params->r[0] > 0) v += xq[0] * (flt0[i * flt0_stride + j] - u);
- if (params->r[1] > 0) v += xq[1] * (flt1[i * flt1_stride + j] - u);
- const int32_t e =
- ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
- src[i * src_stride + j];
- err += e * e;
- }
- }
+ err = av1_lowbd_pixel_proj_error(src8, width, height, src_stride, dat8,
+ dat_stride, flt0, flt0_stride, flt1,
+ flt1_stride, xq, params);
} else {
const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
@@ -463,9 +522,11 @@ static void apply_sgr(int sgr_params_idx, const uint8_t *dat8, int width,
// Iterate over the stripe in blocks of width pu_width
for (int j = 0; j < width; j += pu_width) {
const int w = AOMMIN(pu_width, width - j);
- av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt0_row + j,
- flt1_row + j, flt_stride, sgr_params_idx,
- bit_depth, use_highbd);
+ const int ret = av1_selfguided_restoration(
+ dat8_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j,
+ flt_stride, sgr_params_idx, bit_depth, use_highbd);
+ (void)ret;
+ assert(!ret);
}
}
}
@@ -588,22 +649,9 @@ static void search_sgrproj(const RestorationTileLimits *limits,
if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
}
-static double find_average(const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int stride) {
- uint64_t sum = 0;
- double avg = 0;
- int i, j;
- aom_clear_system_state();
- for (i = v_start; i < v_end; i++)
- for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
- avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
- return avg;
-}
-
-static void compute_stats(int wiener_win, const uint8_t *dgd,
- const uint8_t *src, int h_start, int h_end,
- int v_start, int v_end, int dgd_stride,
- int src_stride, double *M, double *H) {
+void av1_compute_stats_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
+ int h_start, int h_end, int v_start, int v_end,
+ int dgd_stride, int src_stride, double *M, double *H) {
int i, j, k, l;
double Y[WIENER_WIN2];
const int wiener_win2 = wiener_win * wiener_win;
@@ -626,8 +674,7 @@ static void compute_stats(int wiener_win, const uint8_t *dgd,
assert(idx == wiener_win2);
for (k = 0; k < wiener_win2; ++k) {
M[k] += Y[k] * X;
- H[k * wiener_win2 + k] += Y[k] * Y[k];
- for (l = k + 1; l < wiener_win2; ++l) {
+ for (l = k; l < wiener_win2; ++l) {
// H is a symmetric matrix, so we only need to fill out the upper
// triangle here. We can copy it down to the lower triangle outside
// the (i, j) loops.
@@ -1073,9 +1120,9 @@ static void search_wiener(const RestorationTileLimits *limits,
limits->h_start, limits->h_end, limits->v_start,
limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
} else {
- compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, limits->h_start,
- limits->h_end, limits->v_start, limits->v_end,
- rsc->dgd_stride, rsc->src_stride, M, H);
+ av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
+ limits->h_start, limits->h_end, limits->v_start,
+ limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
}
const MACROBLOCK *const x = rsc->x;
@@ -1266,6 +1313,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
// problem, as these elements are ignored later, but in order to quiet
// Valgrind's warnings we initialise the array below.
memset(rusi, 0, sizeof(*rusi) * ntiles[0]);
+ cpi->td.mb.rdmult = cpi->rd.RDMULT;
RestSearchCtxt rsc;
const int plane_start = AOM_PLANE_Y;