1 files changed, 88 insertions, 40 deletions
diff --git a/third_party/aom/av1/encoder/pickrst.c b/third_party/aom/av1/encoder/pickrst.c
index 28b693b08..e7804f6b4 100644
--- a/third_party/aom/av1/encoder/pickrst.c
+++ b/third_party/aom/av1/encoder/pickrst.c
@@ -15,6 +15,7 @@
 #include <math.h>
 
 #include "config/aom_scale_rtcd.h"
+#include "config/av1_rtcd.h"
 
 #include "aom_dsp/aom_dsp_common.h"
 #include "aom_dsp/binary_codes_writer.h"
@@ -22,7 +23,6 @@
 #include "aom_mem/aom_mem.h"
 #include "aom_ports/mem.h"
 #include "aom_ports/system_state.h"
-
 #include "av1/common/onyxc_int.h"
 #include "av1/common/quant_common.h"
 #include "av1/common/restoration.h"
@@ -181,6 +181,77 @@ static int64_t try_restoration_unit(const RestSearchCtxt *rsc,
   return sse_restoration_unit(limits, rsc->src, rsc->dst, plane, highbd);
 }
 
+int64_t av1_lowbd_pixel_proj_error_c(const uint8_t *src8, int width, int height,
+                                     int src_stride, const uint8_t *dat8,
+                                     int dat_stride, int32_t *flt0,
+                                     int flt0_stride, int32_t *flt1,
+                                     int flt1_stride, int xq[2],
+                                     const sgr_params_type *params) {
+  int i, j;
+  const uint8_t *src = src8;
+  const uint8_t *dat = dat8;
+  int64_t err = 0;
+  if (params->r[0] > 0 && params->r[1] > 0) {
+    for (i = 0; i < height; ++i) {
+      for (j = 0; j < width; ++j) {
+        assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
+        assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
+        const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
+        int32_t v = u << SGRPROJ_PRJ_BITS;
+        v += xq[0] * (flt0[j] - u) + xq[1] * (flt1[j] - u);
+        const int32_t e =
+            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
+        err += e * e;
+      }
+      dat += dat_stride;
+      src += src_stride;
+      flt0 += flt0_stride;
+      flt1 += flt1_stride;
+    }
+  } else if (params->r[0] > 0) {
+    for (i = 0; i < height; ++i) {
+      for (j = 0; j < width; ++j) {
+        assert(flt0[j] < (1 << 15) && flt0[j] > -(1 << 15));
+        const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
+        int32_t v = u << SGRPROJ_PRJ_BITS;
+        v += xq[0] * (flt0[j] - u);
+        const int32_t e =
+            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
+        err += e * e;
+      }
+      dat += dat_stride;
+      src += src_stride;
+      flt0 += flt0_stride;
+    }
+  } else if (params->r[1] > 0) {
+    for (i = 0; i < height; ++i) {
+      for (j = 0; j < width; ++j) {
+        assert(flt1[j] < (1 << 15) && flt1[j] > -(1 << 15));
+        const int32_t u = (int32_t)(dat[j] << SGRPROJ_RST_BITS);
+        int32_t v = u << SGRPROJ_PRJ_BITS;
+        v += xq[1] * (flt1[j] - u);
+        const int32_t e =
+            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) - src[j];
+        err += e * e;
+      }
+      dat += dat_stride;
+      src += src_stride;
+      flt1 += flt1_stride;
+    }
+  } else {
+    for (i = 0; i < height; ++i) {
+      for (j = 0; j < width; ++j) {
+        const int32_t e = (int32_t)(dat[j]) - src[j];
+        err += e * e;
+      }
+      dat += dat_stride;
+      src += src_stride;
+    }
+  }
+
+  return err;
+}
+
 static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
                                     int src_stride, const uint8_t *dat8,
                                     int dat_stride, int use_highbitdepth,
@@ -192,21 +263,9 @@ static int64_t get_pixel_proj_error(const uint8_t *src8, int width, int height,
   int xq[2];
   decode_xq(xqd, xq, params);
   if (!use_highbitdepth) {
-    const uint8_t *src = src8;
-    const uint8_t *dat = dat8;
-    for (i = 0; i < height; ++i) {
-      for (j = 0; j < width; ++j) {
-        const int32_t u =
-            (int32_t)(dat[i * dat_stride + j] << SGRPROJ_RST_BITS);
-        int32_t v = u << SGRPROJ_PRJ_BITS;
-        if (params->r[0] > 0) v += xq[0] * (flt0[i * flt0_stride + j] - u);
-        if (params->r[1] > 0) v += xq[1] * (flt1[i * flt1_stride + j] - u);
-        const int32_t e =
-            ROUND_POWER_OF_TWO(v, SGRPROJ_RST_BITS + SGRPROJ_PRJ_BITS) -
-            src[i * src_stride + j];
-        err += e * e;
-      }
-    }
+    err = av1_lowbd_pixel_proj_error(src8, width, height, src_stride, dat8,
+                                     dat_stride, flt0, flt0_stride, flt1,
+                                     flt1_stride, xq, params);
   } else {
     const uint16_t *src = CONVERT_TO_SHORTPTR(src8);
     const uint16_t *dat = CONVERT_TO_SHORTPTR(dat8);
@@ -463,9 +522,11 @@ static void apply_sgr(int sgr_params_idx, const uint8_t *dat8, int width,
     // Iterate over the stripe in blocks of width pu_width
     for (int j = 0; j < width; j += pu_width) {
       const int w = AOMMIN(pu_width, width - j);
-      av1_selfguided_restoration(dat8_row + j, w, h, dat_stride, flt0_row + j,
-                                 flt1_row + j, flt_stride, sgr_params_idx,
-                                 bit_depth, use_highbd);
+      const int ret = av1_selfguided_restoration(
+          dat8_row + j, w, h, dat_stride, flt0_row + j, flt1_row + j,
+          flt_stride, sgr_params_idx, bit_depth, use_highbd);
+      (void)ret;
+      assert(!ret);
     }
   }
 }
@@ -588,22 +649,9 @@ static void search_sgrproj(const RestorationTileLimits *limits,
   if (cost_sgr < cost_none) rsc->sgrproj = rusi->sgrproj;
 }
 
-static double find_average(const uint8_t *src, int h_start, int h_end,
-                           int v_start, int v_end, int stride) {
-  uint64_t sum = 0;
-  double avg = 0;
-  int i, j;
-  aom_clear_system_state();
-  for (i = v_start; i < v_end; i++)
-    for (j = h_start; j < h_end; j++) sum += src[i * stride + j];
-  avg = (double)sum / ((v_end - v_start) * (h_end - h_start));
-  return avg;
-}
-
-static void compute_stats(int wiener_win, const uint8_t *dgd,
-                          const uint8_t *src, int h_start, int h_end,
-                          int v_start, int v_end, int dgd_stride,
-                          int src_stride, double *M, double *H) {
+void av1_compute_stats_c(int wiener_win, const uint8_t *dgd, const uint8_t *src,
+                         int h_start, int h_end, int v_start, int v_end,
+                         int dgd_stride, int src_stride, double *M, double *H) {
   int i, j, k, l;
   double Y[WIENER_WIN2];
   const int wiener_win2 = wiener_win * wiener_win;
@@ -626,8 +674,7 @@ static void compute_stats(int wiener_win, const uint8_t *dgd,
       assert(idx == wiener_win2);
       for (k = 0; k < wiener_win2; ++k) {
         M[k] += Y[k] * X;
-        H[k * wiener_win2 + k] += Y[k] * Y[k];
-        for (l = k + 1; l < wiener_win2; ++l) {
+        for (l = k; l < wiener_win2; ++l) {
           // H is a symmetric matrix, so we only need to fill out the upper
           // triangle here. We can copy it down to the lower triangle outside
           // the (i, j) loops.
@@ -1073,9 +1120,9 @@ static void search_wiener(const RestorationTileLimits *limits,
                          limits->h_start, limits->h_end, limits->v_start,
                          limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
   } else {
-    compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer, limits->h_start,
-                  limits->h_end, limits->v_start, limits->v_end,
-                  rsc->dgd_stride, rsc->src_stride, M, H);
+    av1_compute_stats(wiener_win, rsc->dgd_buffer, rsc->src_buffer,
+                      limits->h_start, limits->h_end, limits->v_start,
+                      limits->v_end, rsc->dgd_stride, rsc->src_stride, M, H);
   }
 
   const MACROBLOCK *const x = rsc->x;
@@ -1266,6 +1313,7 @@ void av1_pick_filter_restoration(const YV12_BUFFER_CONFIG *src, AV1_COMP *cpi) {
   // problem, as these elements are ignored later, but in order to quiet
   // Valgrind's warnings we initialise the array below.
   memset(rusi, 0, sizeof(*rusi) * ntiles[0]);
+  cpi->td.mb.rdmult = cpi->rd.RDMULT;
 
   RestSearchCtxt rsc;
   const int plane_start = AOM_PLANE_Y;