summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/intrapred.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/aom_dsp/intrapred.c')
-rw-r--r--third_party/aom/aom_dsp/intrapred.c816
1 files changed, 268 insertions, 548 deletions
diff --git a/third_party/aom/aom_dsp/intrapred.c b/third_party/aom/aom_dsp/intrapred.c
index 6d2ac37d9..c6aa6b207 100644
--- a/third_party/aom/aom_dsp/intrapred.c
+++ b/third_party/aom/aom_dsp/intrapred.c
@@ -12,152 +12,14 @@
#include <assert.h>
#include <math.h>
-#include "./aom_config.h"
-#include "./aom_dsp_rtcd.h"
+#include "config/aom_config.h"
+#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/intrapred_common.h"
#include "aom_mem/aom_mem.h"
#include "aom_ports/bitops.h"
-#define DST(x, y) dst[(x) + (y)*stride]
-#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
-#define AVG2(a, b) (((a) + (b) + 1) >> 1)
-
-static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r, c;
- (void)above;
-
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
- left[(c >> 1) + r + 2])
- : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
- }
- dst += stride;
- }
-}
-
-static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r, c;
- (void)left;
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
- above[(r >> 1) + c + 2])
- : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
- }
- dst += stride;
- }
-}
-
-static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r, c;
- (void)left;
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = AVG3(above[r + c], above[r + c + 1],
- above[r + c + 1 + (r + c + 2 < bw + bh)]);
- }
- dst += stride;
- }
-}
-
-static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r, c;
-
- // first row
- for (c = 0; c < bw; c++) dst[c] = AVG2(above[c - 1], above[c]);
- dst += stride;
-
- // second row
- dst[0] = AVG3(left[0], above[-1], above[0]);
- for (c = 1; c < bw; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
- dst += stride;
-
- // the rest of first col
- dst[0] = AVG3(above[-1], left[0], left[1]);
- for (r = 3; r < bh; ++r)
- dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
-
- // the rest of the block
- for (r = 2; r < bh; ++r) {
- for (c = 1; c < bw; c++) dst[c] = dst[-2 * stride + c - 1];
- dst += stride;
- }
-}
-
-static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int i;
-#if CONFIG_TX64X64
-#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
- // silence a spurious -Warray-bounds warning, possibly related to:
- // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
- uint8_t border[133];
-#else
- uint8_t border[64 + 64 - 1]; // outer border from bottom-left to top-right
-#endif
-#else
-#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7
- // silence a spurious -Warray-bounds warning, possibly related to:
- // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273
- uint8_t border[69];
-#else
- uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right
-#endif
-#endif // CONFIG_TX64X64
-
- // dst(bh, bh - 2)[0], i.e., border starting at bottom-left
- for (i = 0; i < bh - 2; ++i) {
- border[i] = AVG3(left[bh - 3 - i], left[bh - 2 - i], left[bh - 1 - i]);
- }
- border[bh - 2] = AVG3(above[-1], left[0], left[1]);
- border[bh - 1] = AVG3(left[0], above[-1], above[0]);
- border[bh - 0] = AVG3(above[-1], above[0], above[1]);
- // dst[0][2, size), i.e., remaining top border ascending
- for (i = 0; i < bw - 2; ++i) {
- border[bh + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]);
- }
-
- for (i = 0; i < bh; ++i) {
- memcpy(dst + i * stride, border + bh - 1 - i, bw);
- }
-}
-
-static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
- int bh, const uint8_t *above,
- const uint8_t *left) {
- int r, c;
- dst[0] = AVG2(above[-1], left[0]);
- for (r = 1; r < bh; r++) dst[r * stride] = AVG2(left[r - 1], left[r]);
- dst++;
-
- dst[0] = AVG3(left[0], above[-1], above[0]);
- dst[stride] = AVG3(above[-1], left[0], left[1]);
- for (r = 2; r < bh; r++)
- dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
- dst++;
-
- for (c = 0; c < bw - 2; c++)
- dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
- dst += stride;
-
- for (r = 1; r < bh; ++r) {
- for (c = 0; c < bw - 2; c++) dst[c] = dst[-stride + c - 2];
- dst += stride;
- }
-}
-
static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
const uint8_t *above, const uint8_t *left) {
int r;
@@ -244,13 +106,12 @@ static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel(divide_round(this_pred, log2_scale));
+ dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
-#if CONFIG_SMOOTH_HV
static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
const uint8_t *left) {
@@ -274,7 +135,7 @@ static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel(divide_round(this_pred, log2_scale));
+ dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
@@ -303,12 +164,11 @@ static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel(divide_round(this_pred, log2_scale));
+ dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
-#endif // CONFIG_SMOOTH_HV
static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw,
int bh, const uint8_t *above,
@@ -373,267 +233,133 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh,
}
}
-void aom_d45e_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
- const int D = above[3];
- (void)stride;
- (void)left;
-
- DST(0, 0) = AVG3(A, B, C);
- DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
- DST(1, 1) = AVG3(C, D, D);
+static INLINE int divide_using_multiply_shift(int num, int shift1,
+ int multiplier, int shift2) {
+ const int interm = num >> shift1;
+ return interm * multiplier >> shift2;
}
-void aom_d117_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int I = left[0];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- DST(0, 0) = AVG2(X, A);
- DST(1, 0) = AVG2(A, B);
- DST(0, 1) = AVG3(I, X, A);
- DST(1, 1) = AVG3(X, A, B);
-}
+ // The constants (multiplier and shifts) for a given block size are obtained
+ // as follows:
+ // - Let sum_w_h = block width + block height.
+ // - Shift 'sum_w_h' right until we reach an odd number. Let the number of
+ // shifts for that block size be called 'shift1' (see the parameter in
+ // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2
+ // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect
+ // block].
+ // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5,
+ // using the "Algorithm 1" in:
+ // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632
+ // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd
+ // shift will be 16, regardless of the block size.
-void aom_d135_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int I = left[0];
- const int J = left[1];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- (void)stride;
- DST(0, 1) = AVG3(X, I, J);
- DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
- DST(1, 0) = AVG3(B, A, X);
-}
+ // Note: For low bitdepth, assembly code may be optimized by using smaller
+ // constants for smaller block sizes, where the range of the 'sum' is
+ // restricted to fewer bits.
-void aom_d153_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int I = left[0];
- const int J = left[1];
- const int X = above[-1];
- const int A = above[0];
-
- DST(0, 0) = AVG2(I, X);
- DST(0, 1) = AVG2(J, I);
- DST(1, 0) = AVG3(I, X, A);
- DST(1, 1) = AVG3(J, I, X);
-}
+#define DC_MULTIPLIER_1X2 0x5556
+#define DC_MULTIPLIER_1X4 0x3334
-void aom_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
- const int D = above[3];
- const int E = above[4];
- const int F = above[5];
- const int G = above[6];
- const int H = above[7];
- (void)stride;
- (void)left;
- DST(0, 0) = AVG3(A, B, C);
- DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
- DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E);
- DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F);
- DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G);
- DST(3, 2) = DST(2, 3) = AVG3(F, G, H);
- DST(3, 3) = AVG3(G, H, H);
-}
+#define DC_SHIFT2 16
-void aom_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int I = left[0];
- const int J = left[1];
- const int K = left[2];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
- const int D = above[3];
- DST(0, 0) = DST(1, 2) = AVG2(X, A);
- DST(1, 0) = DST(2, 2) = AVG2(A, B);
- DST(2, 0) = DST(3, 2) = AVG2(B, C);
- DST(3, 0) = AVG2(C, D);
-
- DST(0, 3) = AVG3(K, J, I);
- DST(0, 2) = AVG3(J, I, X);
- DST(0, 1) = DST(1, 3) = AVG3(I, X, A);
- DST(1, 1) = DST(2, 3) = AVG3(X, A, B);
- DST(2, 1) = DST(3, 3) = AVG3(A, B, C);
- DST(3, 1) = AVG3(B, C, D);
-}
+static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw,
+ int bh, const uint8_t *above,
+ const uint8_t *left, int shift1,
+ int multiplier) {
+ int sum = 0;
-void aom_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int I = left[0];
- const int J = left[1];
- const int K = left[2];
- const int L = left[3];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
- const int D = above[3];
- (void)stride;
- DST(0, 3) = AVG3(J, K, L);
- DST(1, 3) = DST(0, 2) = AVG3(I, J, K);
- DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J);
- DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
- DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X);
- DST(3, 1) = DST(2, 0) = AVG3(C, B, A);
- DST(3, 0) = AVG3(D, C, B);
+ for (int i = 0; i < bw; i++) {
+ sum += above[i];
+ }
+ for (int i = 0; i < bh; i++) {
+ sum += left[i];
+ }
+
+ const int expected_dc = divide_using_multiply_shift(
+ sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2);
+ assert(expected_dc < (1 << 8));
+
+ for (int r = 0; r < bh; r++) {
+ memset(dst, expected_dc, bw);
+ dst += stride;
+ }
}
-void aom_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride,
- const uint8_t *above, const uint8_t *left) {
- const int I = left[0];
- const int J = left[1];
- const int K = left[2];
- const int L = left[3];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
-
- DST(0, 0) = DST(2, 1) = AVG2(I, X);
- DST(0, 1) = DST(2, 2) = AVG2(J, I);
- DST(0, 2) = DST(2, 3) = AVG2(K, J);
- DST(0, 3) = AVG2(L, K);
-
- DST(3, 0) = AVG3(A, B, C);
- DST(2, 0) = AVG3(X, A, B);
- DST(1, 0) = DST(3, 1) = AVG3(I, X, A);
- DST(1, 1) = DST(3, 2) = AVG3(J, I, X);
- DST(1, 2) = DST(3, 3) = AVG3(K, J, I);
- DST(1, 3) = AVG3(L, K, J);
+#undef DC_SHIFT2
+
+void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2);
}
-#if CONFIG_HIGHBITDEPTH
-static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- (void)above;
- (void)bd;
+void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2);
+}
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1],
- left[(c >> 1) + r + 2])
- : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]);
- }
- dst += stride;
- }
+void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4);
}
-static INLINE void highbd_d63e_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- (void)left;
- (void)bd;
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1],
- above[(r >> 1) + c + 2])
- : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]);
- }
- dst += stride;
- }
+void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4);
}
-static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- (void)left;
- (void)bd;
- for (r = 0; r < bh; ++r) {
- for (c = 0; c < bw; ++c) {
- dst[c] = AVG3(above[r + c], above[r + c + 1],
- above[r + c + 1 + (r + c + 2 < bw + bh)]);
- }
- dst += stride;
- }
+void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2);
}
-static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- (void)bd;
+void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2);
+}
- // first row
- for (c = 0; c < bw; c++) dst[c] = AVG2(above[c - 1], above[c]);
- dst += stride;
+void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4);
+}
- // second row
- dst[0] = AVG3(left[0], above[-1], above[0]);
- for (c = 1; c < bw; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
- dst += stride;
+void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4);
+}
- // the rest of first col
- dst[0] = AVG3(above[-1], left[0], left[1]);
- for (r = 3; r < bh; ++r)
- dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]);
+void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2);
+}
- // the rest of the block
- for (r = 2; r < bh; ++r) {
- for (c = 1; c < bw; c++) dst[c] = dst[-2 * stride + c - 1];
- dst += stride;
- }
+void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2);
}
-static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- (void)bd;
- dst[0] = AVG3(left[0], above[-1], above[0]);
- for (c = 1; c < bw; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]);
+void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4);
+}
- dst[stride] = AVG3(above[-1], left[0], left[1]);
- for (r = 2; r < bh; ++r)
- dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
+void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4);
+}
- dst += stride;
- for (r = 1; r < bh; ++r) {
- for (c = 1; c < bw; c++) dst[c] = dst[-stride + c - 1];
- dst += stride;
- }
+void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2);
}
-static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride,
- int bw, int bh, const uint16_t *above,
- const uint16_t *left, int bd) {
- int r, c;
- (void)bd;
- dst[0] = AVG2(above[-1], left[0]);
- for (r = 1; r < bh; r++) dst[r * stride] = AVG2(left[r - 1], left[r]);
- dst++;
-
- dst[0] = AVG3(left[0], above[-1], above[0]);
- dst[stride] = AVG3(above[-1], left[0], left[1]);
- for (r = 2; r < bh; r++)
- dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]);
- dst++;
-
- for (c = 0; c < bw - 2; c++)
- dst[c] = AVG3(above[c - 1], above[c], above[c + 1]);
- dst += stride;
-
- for (r = 1; r < bh; ++r) {
- for (c = 0; c < bw - 2; c++) dst[c] = dst[-stride + c - 2];
- dst += stride;
- }
+void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2);
}
+#undef DC_MULTIPLIER_1X2
+#undef DC_MULTIPLIER_1X4
+
static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
int bh, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -658,93 +384,6 @@ static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
}
}
-void aom_highbd_d207_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const int I = left[0];
- const int J = left[1];
- const int K = left[2];
- const int L = left[3];
- (void)above;
- (void)bd;
- DST(0, 0) = AVG2(I, J);
- DST(0, 1) = AVG2(J, K);
- DST(1, 0) = AVG3(I, J, K);
- DST(1, 1) = AVG3(J, K, L);
-}
-
-void aom_highbd_d63_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above, const uint16_t *left,
- int bd) {
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
- const int D = above[3];
- (void)left;
- (void)bd;
- DST(0, 0) = AVG2(A, B);
- DST(1, 0) = AVG2(B, C);
- DST(0, 1) = AVG3(A, B, C);
- DST(1, 1) = AVG3(B, C, D);
-}
-
-void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const int A = above[0];
- const int B = above[1];
- const int C = above[2];
- const int D = above[3];
- (void)stride;
- (void)left;
- (void)bd;
- DST(0, 0) = AVG3(A, B, C);
- DST(1, 0) = DST(0, 1) = AVG3(B, C, D);
- DST(1, 1) = AVG3(C, D, D);
-}
-
-void aom_highbd_d117_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const int I = left[0];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- (void)bd;
- DST(0, 0) = AVG2(X, A);
- DST(1, 0) = AVG2(A, B);
- DST(0, 1) = AVG3(I, X, A);
- DST(1, 1) = AVG3(X, A, B);
-}
-
-void aom_highbd_d135_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const int I = left[0];
- const int J = left[1];
- const int X = above[-1];
- const int A = above[0];
- const int B = above[1];
- (void)bd;
- DST(0, 1) = AVG3(X, I, J);
- DST(1, 1) = DST(0, 0) = AVG3(A, X, I);
- DST(1, 0) = AVG3(B, A, X);
-}
-
-void aom_highbd_d153_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride,
- const uint16_t *above,
- const uint16_t *left, int bd) {
- const int I = left[0];
- const int J = left[1];
- const int X = above[-1];
- const int A = above[0];
- (void)bd;
- DST(0, 0) = AVG2(I, X);
- DST(0, 1) = AVG2(J, I);
- DST(1, 0) = AVG3(I, X, A);
- DST(1, 1) = AVG3(J, I, X);
-}
-
static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh, const uint16_t *above,
const uint16_t *left, int bd) {
@@ -763,6 +402,7 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
+ (void)bd;
const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
const uint8_t *const sm_weights_w = sm_weight_arrays + bw;
@@ -785,17 +425,17 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride,
for (i = 0; i < 4; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd);
+ dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
-#if CONFIG_SMOOTH_HV
static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
+ (void)bd;
const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel
const uint8_t *const sm_weights = sm_weight_arrays + bh;
// scale = 2^sm_weight_log2_scale
@@ -816,7 +456,7 @@ static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride,
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd);
+ dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
@@ -826,6 +466,7 @@ static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
const uint16_t *above,
const uint16_t *left, int bd) {
+ (void)bd;
const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel
const uint8_t *const sm_weights = sm_weight_arrays + bw;
// scale = 2^sm_weight_log2_scale
@@ -846,12 +487,11 @@ static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride,
for (i = 0; i < 2; ++i) {
this_pred += weights[i] * pixels[i];
}
- dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd);
+ dst[c] = divide_round(this_pred, log2_scale);
}
dst += stride;
}
}
-#endif // CONFIG_SMOOTH_HV
static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride,
int bw, int bh,
@@ -922,7 +562,148 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
dst += stride;
}
}
-#endif // CONFIG_HIGHBITDEPTH
+
+// Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but
+// assume 2nd shift of 17 bits instead of 16.
+// Note: Strictly speaking, 2nd shift needs to be 17 only when:
+// - bit depth == 12, and
+// - bw + bh is divisible by 5 (as opposed to divisible by 3).
+// All other cases can use half the multipliers with a shift of 16 instead.
+// This special optimization can be used when writing assembly code.
+#define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB
+// Note: This constant is odd, but a smaller even constant (0x199a) with the
+// appropriate shift should work for neon in 8/10-bit.
+#define HIGHBD_DC_MULTIPLIER_1X4 0x6667
+
+#define HIGHBD_DC_SHIFT2 17
+
+static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride,
+ int bw, int bh,
+ const uint16_t *above,
+ const uint16_t *left, int bd,
+ int shift1, uint32_t multiplier) {
+ int sum = 0;
+ (void)bd;
+
+ for (int i = 0; i < bw; i++) {
+ sum += above[i];
+ }
+ for (int i = 0; i < bh; i++) {
+ sum += left[i];
+ }
+
+ const int expected_dc = divide_using_multiply_shift(
+ sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2);
+ assert(expected_dc < (1 << bd));
+
+ for (int r = 0; r < bh; r++) {
+ aom_memset16(dst, expected_dc, bw);
+ dst += stride;
+ }
+}
+
+#undef HIGHBD_DC_SHIFT2
+
+void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2,
+ HIGHBD_DC_MULTIPLIER_1X4);
+}
+
+void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2,
+ HIGHBD_DC_MULTIPLIER_1X4);
+}
+
+void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3,
+ HIGHBD_DC_MULTIPLIER_1X4);
+}
+
+void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above, const uint16_t *left,
+ int bd) {
+ highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3,
+ HIGHBD_DC_MULTIPLIER_1X4);
+}
+
+void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4,
+ HIGHBD_DC_MULTIPLIER_1X4);
+}
+
+void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4,
+ HIGHBD_DC_MULTIPLIER_1X4);
+}
+
+void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride,
+ const uint16_t *above,
+ const uint16_t *left, int bd) {
+ highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5,
+ HIGHBD_DC_MULTIPLIER_1X2);
+}
+
+#undef HIGHBD_DC_MULTIPLIER_1X2
+#undef HIGHBD_DC_MULTIPLIER_1X4
// This serves as a wrapper function, so that all the prediction functions
// can be unified and accessed as a pointer array. Note that the boundary
@@ -934,7 +715,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
type##_predictor(dst, stride, width, height, above, left); \
}
-#if CONFIG_HIGHBITDEPTH
#define intra_pred_highbd_sized(type, width, height) \
void aom_highbd_##type##_predictor_##width##x##height##_c( \
uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \
@@ -943,7 +723,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
}
/* clang-format off */
-#if CONFIG_TX64X64
#define intra_pred_rectangular(type) \
intra_pred_sized(type, 4, 8) \
intra_pred_sized(type, 8, 4) \
@@ -953,6 +732,12 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
intra_pred_sized(type, 32, 16) \
intra_pred_sized(type, 32, 64) \
intra_pred_sized(type, 64, 32) \
+ intra_pred_sized(type, 4, 16) \
+ intra_pred_sized(type, 16, 4) \
+ intra_pred_sized(type, 8, 32) \
+ intra_pred_sized(type, 32, 8) \
+ intra_pred_sized(type, 16, 64) \
+ intra_pred_sized(type, 64, 16) \
intra_pred_highbd_sized(type, 4, 8) \
intra_pred_highbd_sized(type, 8, 4) \
intra_pred_highbd_sized(type, 8, 16) \
@@ -960,7 +745,13 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
intra_pred_highbd_sized(type, 16, 32) \
intra_pred_highbd_sized(type, 32, 16) \
intra_pred_highbd_sized(type, 32, 64) \
- intra_pred_highbd_sized(type, 64, 32)
+ intra_pred_highbd_sized(type, 64, 32) \
+ intra_pred_highbd_sized(type, 4, 16) \
+ intra_pred_highbd_sized(type, 16, 4) \
+ intra_pred_highbd_sized(type, 8, 32) \
+ intra_pred_highbd_sized(type, 32, 8) \
+ intra_pred_highbd_sized(type, 16, 64) \
+ intra_pred_highbd_sized(type, 64, 16)
#define intra_pred_above_4x4(type) \
intra_pred_sized(type, 8, 8) \
intra_pred_sized(type, 16, 16) \
@@ -973,100 +764,29 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw,
intra_pred_highbd_sized(type, 64, 64) \
intra_pred_rectangular(type)
#define intra_pred_allsizes(type) \
- intra_pred_sized(type, 2, 2) \
intra_pred_sized(type, 4, 4) \
- intra_pred_highbd_sized(type, 2, 2) \
intra_pred_above_4x4(type)
-#else // CONFIG_TX64X64
-#define intra_pred_rectangular(type) \
- intra_pred_sized(type, 4, 8) \
- intra_pred_sized(type, 8, 4) \
- intra_pred_sized(type, 8, 16) \
- intra_pred_sized(type, 16, 8) \
- intra_pred_sized(type, 16, 32) \
- intra_pred_sized(type, 32, 16) \
- intra_pred_highbd_sized(type, 4, 8) \
- intra_pred_highbd_sized(type, 8, 4) \
- intra_pred_highbd_sized(type, 8, 16) \
- intra_pred_highbd_sized(type, 16, 8) \
- intra_pred_highbd_sized(type, 16, 32) \
- intra_pred_highbd_sized(type, 32, 16)
-#define intra_pred_above_4x4(type) \
+#define intra_pred_square(type) \
+ intra_pred_sized(type, 4, 4) \
intra_pred_sized(type, 8, 8) \
intra_pred_sized(type, 16, 16) \
intra_pred_sized(type, 32, 32) \
+ intra_pred_sized(type, 64, 64) \
intra_pred_highbd_sized(type, 4, 4) \
intra_pred_highbd_sized(type, 8, 8) \
intra_pred_highbd_sized(type, 16, 16) \
intra_pred_highbd_sized(type, 32, 32) \
- intra_pred_rectangular(type)
-#define intra_pred_allsizes(type) \
- intra_pred_sized(type, 2, 2) \
- intra_pred_sized(type, 4, 4) \
- intra_pred_highbd_sized(type, 2, 2) \
- intra_pred_above_4x4(type)
-#endif // CONFIG_TX64X64
-
-#else
-
-#if CONFIG_TX64X64
-#define intra_pred_rectangular(type) \
- intra_pred_sized(type, 4, 8) \
- intra_pred_sized(type, 8, 4) \
- intra_pred_sized(type, 8, 16) \
- intra_pred_sized(type, 16, 8) \
- intra_pred_sized(type, 16, 32) \
- intra_pred_sized(type, 32, 16) \
- intra_pred_sized(type, 32, 64) \
- intra_pred_sized(type, 64, 32)
-#define intra_pred_above_4x4(type) \
- intra_pred_sized(type, 8, 8) \
- intra_pred_sized(type, 16, 16) \
- intra_pred_sized(type, 32, 32) \
- intra_pred_sized(type, 64, 64) \
- intra_pred_rectangular(type)
-#define intra_pred_allsizes(type) \
- intra_pred_sized(type, 2, 2) \
- intra_pred_sized(type, 4, 4) \
- intra_pred_above_4x4(type)
-#else // CONFIG_TX64X64
-#define intra_pred_rectangular(type) \
- intra_pred_sized(type, 4, 8) \
- intra_pred_sized(type, 8, 4) \
- intra_pred_sized(type, 8, 16) \
- intra_pred_sized(type, 16, 8) \
- intra_pred_sized(type, 16, 32) \
- intra_pred_sized(type, 32, 16)
-#define intra_pred_above_4x4(type) \
- intra_pred_sized(type, 8, 8) \
- intra_pred_sized(type, 16, 16) \
- intra_pred_sized(type, 32, 32) \
- intra_pred_rectangular(type)
-#define intra_pred_allsizes(type) \
- intra_pred_sized(type, 2, 2) \
- intra_pred_sized(type, 4, 4) \
- intra_pred_above_4x4(type)
-#endif // CONFIG_TX64X64
-
-#endif // CONFIG_HIGHBITDEPTH
+ intra_pred_highbd_sized(type, 64, 64)
-intra_pred_allsizes(d207e)
-intra_pred_allsizes(d63e)
-intra_pred_above_4x4(d45e)
-intra_pred_above_4x4(d117)
-intra_pred_above_4x4(d135)
-intra_pred_above_4x4(d153)
intra_pred_allsizes(v)
intra_pred_allsizes(h)
intra_pred_allsizes(smooth)
-#if CONFIG_SMOOTH_HV
intra_pred_allsizes(smooth_v)
intra_pred_allsizes(smooth_h)
-#endif // CONFIG_SMOOTH_HV
intra_pred_allsizes(paeth)
intra_pred_allsizes(dc_128)
intra_pred_allsizes(dc_left)
intra_pred_allsizes(dc_top)
-intra_pred_allsizes(dc)
+intra_pred_square(dc)
/* clang-format on */
#undef intra_pred_allsizes