summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/common/restoration.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/common/restoration.c')
-rw-r--r--third_party/aom/av1/common/restoration.c131
1 files changed, 41 insertions, 90 deletions
diff --git a/third_party/aom/av1/common/restoration.c b/third_party/aom/av1/common/restoration.c
index 632967957..d276a915b 100644
--- a/third_party/aom/av1/common/restoration.c
+++ b/third_party/aom/av1/common/restoration.c
@@ -661,9 +661,10 @@ const int32_t one_by_x[MAX_NELEM] = {
293, 273, 256, 241, 228, 216, 205, 195, 186, 178, 171, 164,
};
-static void selfguided_restoration_fast_internal(
- int32_t *dgd, int width, int height, int dgd_stride, int32_t *dst,
- int dst_stride, int bit_depth, int sgr_params_idx, int radius_idx) {
+static void calculate_intermediate_result(int32_t *dgd, int width, int height,
+ int dgd_stride, int bit_depth,
+ int sgr_params_idx, int radius_idx,
+ int pass, int32_t *A, int32_t *B) {
const sgr_params_type *const params = &sgr_params[sgr_params_idx];
const int r = params->r[radius_idx];
const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
@@ -673,10 +674,7 @@ static void selfguided_restoration_fast_internal(
// We also align the stride to a multiple of 16 bytes, for consistency
// with the SIMD version of this function.
int buf_stride = ((width_ext + 3) & ~3) + 16;
- int32_t A_[RESTORATION_PROC_UNIT_PELS];
- int32_t B_[RESTORATION_PROC_UNIT_PELS];
- int32_t *A = A_;
- int32_t *B = B_;
+ const int step = pass == 0 ? 1 : 2;
int i, j;
assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
@@ -691,7 +689,7 @@ static void selfguided_restoration_fast_internal(
B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
// Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
// for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
- for (i = -1; i < height + 1; i += 2) {
+ for (i = -1; i < height + 1; i += step) {
for (j = -1; j < width + 1; ++j) {
const int k = i * buf_stride + j;
const int n = (2 * r + 1) * (2 * r + 1);
@@ -754,7 +752,31 @@ static void selfguided_restoration_fast_internal(
SGRPROJ_RECIP_BITS);
}
}
+}
+
+static void selfguided_restoration_fast_internal(
+ int32_t *dgd, int width, int height, int dgd_stride, int32_t *dst,
+ int dst_stride, int bit_depth, int sgr_params_idx, int radius_idx) {
+ const sgr_params_type *const params = &sgr_params[sgr_params_idx];
+ const int r = params->r[radius_idx];
+ const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
+ // Adjusting the stride of A and B here appears to avoid bad cache effects,
+ // leading to a significant speed improvement.
+ // We also align the stride to a multiple of 16 bytes, for consistency
+ // with the SIMD version of this function.
+ int buf_stride = ((width_ext + 3) & ~3) + 16;
+ int32_t A_[RESTORATION_PROC_UNIT_PELS];
+ int32_t B_[RESTORATION_PROC_UNIT_PELS];
+ int32_t *A = A_;
+ int32_t *B = B_;
+ int i, j;
+ calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth,
+ sgr_params_idx, radius_idx, 1, A, B);
+ A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+ B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
+
// Use the A[] and B[] arrays to calculate the filtered image
+ (void)r;
assert(r == 2);
for (i = 0; i < height; ++i) {
if (!(i & 1)) { // even row
@@ -796,10 +818,7 @@ static void selfguided_restoration_internal(int32_t *dgd, int width, int height,
int dst_stride, int bit_depth,
int sgr_params_idx,
int radius_idx) {
- const sgr_params_type *const params = &sgr_params[sgr_params_idx];
- const int r = params->r[radius_idx];
const int width_ext = width + 2 * SGRPROJ_BORDER_HORZ;
- const int height_ext = height + 2 * SGRPROJ_BORDER_VERT;
// Adjusting the stride of A and B here appears to avoid bad cache effects,
// leading to a significant speed improvement.
// We also align the stride to a multiple of 16 bytes, for consistency
@@ -810,82 +829,11 @@ static void selfguided_restoration_internal(int32_t *dgd, int width, int height,
int32_t *A = A_;
int32_t *B = B_;
int i, j;
-
- assert(r <= MAX_RADIUS && "Need MAX_RADIUS >= r");
- assert(r <= SGRPROJ_BORDER_VERT - 1 && r <= SGRPROJ_BORDER_HORZ - 1 &&
- "Need SGRPROJ_BORDER_* >= r+1");
-
- boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
- width_ext, height_ext, dgd_stride, r, 0, B, buf_stride);
- boxsum(dgd - dgd_stride * SGRPROJ_BORDER_VERT - SGRPROJ_BORDER_HORZ,
- width_ext, height_ext, dgd_stride, r, 1, A, buf_stride);
+ calculate_intermediate_result(dgd, width, height, dgd_stride, bit_depth,
+ sgr_params_idx, radius_idx, 0, A, B);
A += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
B += SGRPROJ_BORDER_VERT * buf_stride + SGRPROJ_BORDER_HORZ;
- // Calculate the eventual A[] and B[] arrays. Include a 1-pixel border - ie,
- // for a 64x64 processing unit, we calculate 66x66 pixels of A[] and B[].
- for (i = -1; i < height + 1; ++i) {
- for (j = -1; j < width + 1; ++j) {
- const int k = i * buf_stride + j;
- const int n = (2 * r + 1) * (2 * r + 1);
-
- // a < 2^16 * n < 2^22 regardless of bit depth
- uint32_t a = ROUND_POWER_OF_TWO(A[k], 2 * (bit_depth - 8));
- // b < 2^8 * n < 2^14 regardless of bit depth
- uint32_t b = ROUND_POWER_OF_TWO(B[k], bit_depth - 8);
-
- // Each term in calculating p = a * n - b * b is < 2^16 * n^2 < 2^28,
- // and p itself satisfies p < 2^14 * n^2 < 2^26.
- // This bound on p is due to:
- // https://en.wikipedia.org/wiki/Popoviciu's_inequality_on_variances
- //
- // Note: Sometimes, in high bit depth, we can end up with a*n < b*b.
- // This is an artefact of rounding, and can only happen if all pixels
- // are (almost) identical, so in this case we saturate to p=0.
- uint32_t p = (a * n < b * b) ? 0 : a * n - b * b;
-
- const uint32_t s = params->s[radius_idx];
-
- // p * s < (2^14 * n^2) * round(2^20 / n^2 eps) < 2^34 / eps < 2^32
- // as long as eps >= 4. So p * s fits into a uint32_t, and z < 2^12
- // (this holds even after accounting for the rounding in s)
- const uint32_t z = ROUND_POWER_OF_TWO(p * s, SGRPROJ_MTABLE_BITS);
-
- // Note: We have to be quite careful about the value of A[k].
- // This is used as a blend factor between individual pixel values and the
- // local mean. So it logically has a range of [0, 256], including both
- // endpoints.
- //
- // This is a pain for hardware, as we'd like something which can be stored
- // in exactly 8 bits.
- // Further, in the calculation of B[k] below, if z == 0 and r == 2,
- // then A[k] "should be" 0. But then we can end up setting B[k] to a value
- // slightly above 2^(8 + bit depth), due to rounding in the value of
- // one_by_x[25-1].
- //
- // Thus we saturate so that, when z == 0, A[k] is set to 1 instead of 0.
- // This fixes the above issues (256 - A[k] fits in a uint8, and we can't
- // overflow), without significantly affecting the final result: z == 0
- // implies that the image is essentially "flat", so the local mean and
- // individual pixel values are very similar.
- //
- // Note that saturating on the other side, ie. requring A[k] <= 255,
- // would be a bad idea, as that corresponds to the case where the image
- // is very variable, when we want to preserve the local pixel value as
- // much as possible.
- A[k] = x_by_xplus1[AOMMIN(z, 255)]; // in range [1, 256]
- // SGRPROJ_SGR - A[k] < 2^8 (from above), B[k] < 2^(bit_depth) * n,
- // one_by_x[n - 1] = round(2^12 / n)
- // => the product here is < 2^(20 + bit_depth) <= 2^32,
- // and B[k] is set to a value < 2^(8 + bit depth)
- // This holds even with the rounding in one_by_x and in the overall
- // result, as long as SGRPROJ_SGR - A[k] is strictly less than 2^8.
- B[k] = (int32_t)ROUND_POWER_OF_TWO((uint32_t)(SGRPROJ_SGR - A[k]) *
- (uint32_t)B[k] *
- (uint32_t)one_by_x[n - 1],
- SGRPROJ_RECIP_BITS);
- }
- }
// Use the A[] and B[] arrays to calculate the filtered image
for (i = 0; i < height; ++i) {
for (j = 0; j < width; ++j) {
@@ -911,10 +859,10 @@ static void selfguided_restoration_internal(int32_t *dgd, int width, int height,
}
}
-void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
- int dgd_stride, int32_t *flt0, int32_t *flt1,
- int flt_stride, int sgr_params_idx,
- int bit_depth, int highbd) {
+int av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
+ int dgd_stride, int32_t *flt0, int32_t *flt1,
+ int flt_stride, int sgr_params_idx,
+ int bit_depth, int highbd) {
int32_t dgd32_[RESTORATION_PROC_UNIT_PELS];
const int dgd32_stride = width + 2 * SGRPROJ_BORDER_HORZ;
int32_t *dgd32 =
@@ -948,6 +896,7 @@ void av1_selfguided_restoration_c(const uint8_t *dgd8, int width, int height,
if (params->r[1] > 0)
selfguided_restoration_internal(dgd32, width, height, dgd32_stride, flt1,
flt_stride, bit_depth, sgr_params_idx, 1);
+ return 0;
}
void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
@@ -959,8 +908,10 @@ void apply_selfguided_restoration_c(const uint8_t *dat8, int width, int height,
int32_t *flt1 = flt0 + RESTORATION_UNITPELS_MAX;
assert(width * height <= RESTORATION_UNITPELS_MAX);
- av1_selfguided_restoration_c(dat8, width, height, stride, flt0, flt1, width,
- eps, bit_depth, highbd);
+ const int ret = av1_selfguided_restoration_c(
+ dat8, width, height, stride, flt0, flt1, width, eps, bit_depth, highbd);
+ (void)ret;
+ assert(!ret);
const sgr_params_type *const params = &sgr_params[eps];
int xq[2];
decode_xq(xqd, xq, params);