diff options
Diffstat (limited to 'third_party/aom/aom_dsp/intrapred.c')
-rw-r--r-- | third_party/aom/aom_dsp/intrapred.c | 816 |
1 files changed, 268 insertions, 548 deletions
diff --git a/third_party/aom/aom_dsp/intrapred.c b/third_party/aom/aom_dsp/intrapred.c index 6d2ac37d9..c6aa6b207 100644 --- a/third_party/aom/aom_dsp/intrapred.c +++ b/third_party/aom/aom_dsp/intrapred.c @@ -12,152 +12,14 @@ #include <assert.h> #include <math.h> -#include "./aom_config.h" -#include "./aom_dsp_rtcd.h" +#include "config/aom_config.h" +#include "config/aom_dsp_rtcd.h" #include "aom_dsp/aom_dsp_common.h" #include "aom_dsp/intrapred_common.h" #include "aom_mem/aom_mem.h" #include "aom_ports/bitops.h" -#define DST(x, y) dst[(x) + (y)*stride] -#define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) -#define AVG2(a, b) (((a) + (b) + 1) >> 1) - -static INLINE void d207e_predictor(uint8_t *dst, ptrdiff_t stride, int bw, - int bh, const uint8_t *above, - const uint8_t *left) { - int r, c; - (void)above; - - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], - left[(c >> 1) + r + 2]) - : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); - } - dst += stride; - } -} - -static INLINE void d63e_predictor(uint8_t *dst, ptrdiff_t stride, int bw, - int bh, const uint8_t *above, - const uint8_t *left) { - int r, c; - (void)left; - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } -} - -static INLINE void d45e_predictor(uint8_t *dst, ptrdiff_t stride, int bw, - int bh, const uint8_t *above, - const uint8_t *left) { - int r, c; - (void)left; - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - dst[c] = AVG3(above[r + c], above[r + c + 1], - above[r + c + 1 + (r + c + 2 < bw + bh)]); - } - dst += stride; - } -} - -static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bw, - int bh, const uint8_t *above, - const uint8_t *left) { - int r, c; - - // first row - for (c = 0; c < bw; c++) dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; - - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bw; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; - - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bh; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); - - // the rest of the block - for (r = 2; r < bh; ++r) { - for (c = 1; c < bw; c++) dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } -} - -static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bw, - int bh, const uint8_t *above, - const uint8_t *left) { - int i; -#if CONFIG_TX64X64 -#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 - // silence a spurious -Warray-bounds warning, possibly related to: - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 - uint8_t border[133]; -#else - uint8_t border[64 + 64 - 1]; // outer border from bottom-left to top-right -#endif -#else -#if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 - // silence a spurious -Warray-bounds warning, possibly related to: - // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 - uint8_t border[69]; -#else - uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right -#endif -#endif // CONFIG_TX64X64 - - // dst(bh, bh - 2)[0], i.e., border starting at bottom-left - for (i = 0; i < bh - 2; ++i) { - border[i] = AVG3(left[bh - 3 - i], left[bh - 2 - i], left[bh - 1 - i]); - } - border[bh - 2] = AVG3(above[-1], left[0], left[1]); - border[bh - 1] = AVG3(left[0], above[-1], above[0]); - border[bh - 0] = AVG3(above[-1], above[0], above[1]); - // dst[0][2, size), i.e., remaining top border ascending - for (i = 0; i < bw - 2; ++i) { - border[bh + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); - } - - for (i = 0; i < bh; ++i) { - memcpy(dst + i * stride, border + bh - 1 - i, bw); - } -} - -static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bw, - int bh, const uint8_t *above, - const uint8_t *left) { - int r, c; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bh; r++) dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bh; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bw - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bh; ++r) { - for (c = 0; c < bw - 2; c++) dst[c] = dst[-stride + c - 2]; - dst += stride; - } -} - static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left) { int r; @@ -244,13 +106,12 @@ static INLINE void smooth_predictor(uint8_t *dst, ptrdiff_t stride, int bw, for (i = 0; i < 4; ++i) { this_pred += weights[i] * pixels[i]; } - dst[c] = clip_pixel(divide_round(this_pred, log2_scale)); + dst[c] = divide_round(this_pred, log2_scale); } dst += stride; } } -#if CONFIG_SMOOTH_HV static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, const uint8_t *left) { @@ -274,7 +135,7 @@ static INLINE void smooth_v_predictor(uint8_t *dst, ptrdiff_t stride, int bw, for (i = 0; i < 2; ++i) { this_pred += weights[i] * pixels[i]; } - dst[c] = clip_pixel(divide_round(this_pred, log2_scale)); + dst[c] = divide_round(this_pred, log2_scale); } dst += stride; } @@ -303,12 +164,11 @@ static INLINE void smooth_h_predictor(uint8_t *dst, ptrdiff_t stride, int bw, for (i = 0; i < 2; ++i) { this_pred += weights[i] * pixels[i]; } - dst[c] = clip_pixel(divide_round(this_pred, log2_scale)); + dst[c] = divide_round(this_pred, log2_scale); } dst += stride; } } -#endif // CONFIG_SMOOTH_HV static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, const uint8_t *above, @@ -373,267 +233,133 @@ static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bw, int bh, } } -void aom_d45e_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)stride; - (void)left; - - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(1, 1) = AVG3(C, D, D); +static INLINE int divide_using_multiply_shift(int num, int shift1, + int multiplier, int shift2) { + const int interm = num >> shift1; + return interm * multiplier >> shift2; } -void aom_d117_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - DST(0, 0) = AVG2(X, A); - DST(1, 0) = AVG2(A, B); - DST(0, 1) = AVG3(I, X, A); - DST(1, 1) = AVG3(X, A, B); -} + // The constants (multiplier and shifts) for a given block size are obtained + // as follows: + // - Let sum_w_h = block width + block height. + // - Shift 'sum_w_h' right until we reach an odd number. Let the number of + // shifts for that block size be called 'shift1' (see the parameter in + // dc_predictor_rect() function), and let the odd number be 'd'. [d has only 2 + // possible values: d = 3 for a 1:2 rect block and d = 5 for a 1:4 rect + // block]. + // - Find multipliers for (i) dividing by 3, and (ii) dividing by 5, + // using the "Algorithm 1" in: + // http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1467632 + // by ensuring that m + n = 16 (in that algorithm). This ensures that our 2nd + // shift will be 16, regardless of the block size. -void aom_d135_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - (void)stride; - DST(0, 1) = AVG3(X, I, J); - DST(1, 1) = DST(0, 0) = AVG3(A, X, I); - DST(1, 0) = AVG3(B, A, X); -} + // Note: For low bitdepth, assembly code may be optimized by using smaller + // constants for smaller block sizes, where the range of the 'sum' is + // restricted to fewer bits. -void aom_d153_predictor_2x2_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int X = above[-1]; - const int A = above[0]; - - DST(0, 0) = AVG2(I, X); - DST(0, 1) = AVG2(J, I); - DST(1, 0) = AVG3(I, X, A); - DST(1, 1) = AVG3(J, I, X); -} +#define DC_MULTIPLIER_1X2 0x5556 +#define DC_MULTIPLIER_1X4 0x3334 -void aom_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - const int E = above[4]; - const int F = above[5]; - const int G = above[6]; - const int H = above[7]; - (void)stride; - (void)left; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); - DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); - DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); - DST(3, 2) = DST(2, 3) = AVG3(F, G, H); - DST(3, 3) = AVG3(G, H, H); -} +#define DC_SHIFT2 16 -void aom_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - DST(0, 0) = DST(1, 2) = AVG2(X, A); - DST(1, 0) = DST(2, 2) = AVG2(A, B); - DST(2, 0) = DST(3, 2) = AVG2(B, C); - DST(3, 0) = AVG2(C, D); - - DST(0, 3) = AVG3(K, J, I); - DST(0, 2) = AVG3(J, I, X); - DST(0, 1) = DST(1, 3) = AVG3(I, X, A); - DST(1, 1) = DST(2, 3) = AVG3(X, A, B); - DST(2, 1) = DST(3, 3) = AVG3(A, B, C); - DST(3, 1) = AVG3(B, C, D); -} +static INLINE void dc_predictor_rect(uint8_t *dst, ptrdiff_t stride, int bw, + int bh, const uint8_t *above, + const uint8_t *left, int shift1, + int multiplier) { + int sum = 0; -void aom_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)stride; - DST(0, 3) = AVG3(J, K, L); - DST(1, 3) = DST(0, 2) = AVG3(I, J, K); - DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); - DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); - DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); - DST(3, 1) = DST(2, 0) = AVG3(C, B, A); - DST(3, 0) = AVG3(D, C, B); + for (int i = 0; i < bw; i++) { + sum += above[i]; + } + for (int i = 0; i < bh; i++) { + sum += left[i]; + } + + const int expected_dc = divide_using_multiply_shift( + sum + ((bw + bh) >> 1), shift1, multiplier, DC_SHIFT2); + assert(expected_dc < (1 << 8)); + + for (int r = 0; r < bh; r++) { + memset(dst, expected_dc, bw); + dst += stride; + } } -void aom_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, - const uint8_t *above, const uint8_t *left) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - - DST(0, 0) = DST(2, 1) = AVG2(I, X); - DST(0, 1) = DST(2, 2) = AVG2(J, I); - DST(0, 2) = DST(2, 3) = AVG2(K, J); - DST(0, 3) = AVG2(L, K); - - DST(3, 0) = AVG3(A, B, C); - DST(2, 0) = AVG3(X, A, B); - DST(1, 0) = DST(3, 1) = AVG3(I, X, A); - DST(1, 1) = DST(3, 2) = AVG3(J, I, X); - DST(1, 2) = DST(3, 3) = AVG3(K, J, I); - DST(1, 3) = AVG3(L, K, J); +#undef DC_SHIFT2 + +void aom_dc_predictor_4x8_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 4, 8, above, left, 2, DC_MULTIPLIER_1X2); } -#if CONFIG_HIGHBITDEPTH -static INLINE void highbd_d207e_predictor(uint16_t *dst, ptrdiff_t stride, - int bw, int bh, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void)above; - (void)bd; +void aom_dc_predictor_8x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 8, 4, above, left, 2, DC_MULTIPLIER_1X2); +} - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - dst[c] = c & 1 ? AVG3(left[(c >> 1) + r], left[(c >> 1) + r + 1], - left[(c >> 1) + r + 2]) - : AVG2(left[(c >> 1) + r], left[(c >> 1) + r + 1]); - } - dst += stride; - } +void aom_dc_predictor_4x16_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 4, 16, above, left, 2, DC_MULTIPLIER_1X4); } -static INLINE void highbd_d63e_predictor(uint16_t *dst, ptrdiff_t stride, - int bw, int bh, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void)left; - (void)bd; - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - dst[c] = r & 1 ? AVG3(above[(r >> 1) + c], above[(r >> 1) + c + 1], - above[(r >> 1) + c + 2]) - : AVG2(above[(r >> 1) + c], above[(r >> 1) + c + 1]); - } - dst += stride; - } +void aom_dc_predictor_16x4_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 16, 4, above, left, 2, DC_MULTIPLIER_1X4); } -static INLINE void highbd_d45e_predictor(uint16_t *dst, ptrdiff_t stride, - int bw, int bh, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void)left; - (void)bd; - for (r = 0; r < bh; ++r) { - for (c = 0; c < bw; ++c) { - dst[c] = AVG3(above[r + c], above[r + c + 1], - above[r + c + 1 + (r + c + 2 < bw + bh)]); - } - dst += stride; - } +void aom_dc_predictor_8x16_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 8, 16, above, left, 3, DC_MULTIPLIER_1X2); } -static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, - int bw, int bh, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void)bd; +void aom_dc_predictor_16x8_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 16, 8, above, left, 3, DC_MULTIPLIER_1X2); +} - // first row - for (c = 0; c < bw; c++) dst[c] = AVG2(above[c - 1], above[c]); - dst += stride; +void aom_dc_predictor_8x32_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 8, 32, above, left, 3, DC_MULTIPLIER_1X4); +} - // second row - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bw; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); - dst += stride; +void aom_dc_predictor_32x8_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 32, 8, above, left, 3, DC_MULTIPLIER_1X4); +} - // the rest of first col - dst[0] = AVG3(above[-1], left[0], left[1]); - for (r = 3; r < bh; ++r) - dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); +void aom_dc_predictor_16x32_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 16, 32, above, left, 4, DC_MULTIPLIER_1X2); +} - // the rest of the block - for (r = 2; r < bh; ++r) { - for (c = 1; c < bw; c++) dst[c] = dst[-2 * stride + c - 1]; - dst += stride; - } +void aom_dc_predictor_32x16_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 32, 16, above, left, 4, DC_MULTIPLIER_1X2); } -static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, - int bw, int bh, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void)bd; - dst[0] = AVG3(left[0], above[-1], above[0]); - for (c = 1; c < bw; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); +void aom_dc_predictor_16x64_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 16, 64, above, left, 4, DC_MULTIPLIER_1X4); +} - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bh; ++r) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); +void aom_dc_predictor_64x16_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 64, 16, above, left, 4, DC_MULTIPLIER_1X4); +} - dst += stride; - for (r = 1; r < bh; ++r) { - for (c = 1; c < bw; c++) dst[c] = dst[-stride + c - 1]; - dst += stride; - } +void aom_dc_predictor_32x64_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 32, 64, above, left, 5, DC_MULTIPLIER_1X2); } -static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, - int bw, int bh, const uint16_t *above, - const uint16_t *left, int bd) { - int r, c; - (void)bd; - dst[0] = AVG2(above[-1], left[0]); - for (r = 1; r < bh; r++) dst[r * stride] = AVG2(left[r - 1], left[r]); - dst++; - - dst[0] = AVG3(left[0], above[-1], above[0]); - dst[stride] = AVG3(above[-1], left[0], left[1]); - for (r = 2; r < bh; r++) - dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); - dst++; - - for (c = 0; c < bw - 2; c++) - dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); - dst += stride; - - for (r = 1; r < bh; ++r) { - for (c = 0; c < bw - 2; c++) dst[c] = dst[-stride + c - 2]; - dst += stride; - } +void aom_dc_predictor_64x32_c(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left) { + dc_predictor_rect(dst, stride, 64, 32, above, left, 5, DC_MULTIPLIER_1X2); } +#undef DC_MULTIPLIER_1X2 +#undef DC_MULTIPLIER_1X4 + static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int bd) { @@ -658,93 +384,6 @@ static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw, } } -void aom_highbd_d207_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, - const uint16_t *left, int bd) { - const int I = left[0]; - const int J = left[1]; - const int K = left[2]; - const int L = left[3]; - (void)above; - (void)bd; - DST(0, 0) = AVG2(I, J); - DST(0, 1) = AVG2(J, K); - DST(1, 0) = AVG3(I, J, K); - DST(1, 1) = AVG3(J, K, L); -} - -void aom_highbd_d63_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, const uint16_t *left, - int bd) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)left; - (void)bd; - DST(0, 0) = AVG2(A, B); - DST(1, 0) = AVG2(B, C); - DST(0, 1) = AVG3(A, B, C); - DST(1, 1) = AVG3(B, C, D); -} - -void aom_highbd_d45e_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, - const uint16_t *left, int bd) { - const int A = above[0]; - const int B = above[1]; - const int C = above[2]; - const int D = above[3]; - (void)stride; - (void)left; - (void)bd; - DST(0, 0) = AVG3(A, B, C); - DST(1, 0) = DST(0, 1) = AVG3(B, C, D); - DST(1, 1) = AVG3(C, D, D); -} - -void aom_highbd_d117_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, - const uint16_t *left, int bd) { - const int I = left[0]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - (void)bd; - DST(0, 0) = AVG2(X, A); - DST(1, 0) = AVG2(A, B); - DST(0, 1) = AVG3(I, X, A); - DST(1, 1) = AVG3(X, A, B); -} - -void aom_highbd_d135_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, - const uint16_t *left, int bd) { - const int I = left[0]; - const int J = left[1]; - const int X = above[-1]; - const int A = above[0]; - const int B = above[1]; - (void)bd; - DST(0, 1) = AVG3(X, I, J); - DST(1, 1) = DST(0, 0) = AVG3(A, X, I); - DST(1, 0) = AVG3(B, A, X); -} - -void aom_highbd_d153_predictor_2x2_c(uint16_t *dst, ptrdiff_t stride, - const uint16_t *above, - const uint16_t *left, int bd) { - const int I = left[0]; - const int J = left[1]; - const int X = above[-1]; - const int A = above[0]; - (void)bd; - DST(0, 0) = AVG2(I, X); - DST(0, 1) = AVG2(J, I); - DST(1, 0) = AVG3(I, X, A); - DST(1, 1) = AVG3(J, I, X); -} - static INLINE void highbd_paeth_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int bd) { @@ -763,6 +402,7 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int bd) { + (void)bd; const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel const uint8_t *const sm_weights_w = sm_weight_arrays + bw; @@ -785,17 +425,17 @@ static INLINE void highbd_smooth_predictor(uint16_t *dst, ptrdiff_t stride, for (i = 0; i < 4; ++i) { this_pred += weights[i] * pixels[i]; } - dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd); + dst[c] = divide_round(this_pred, log2_scale); } dst += stride; } } -#if CONFIG_SMOOTH_HV static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int bd) { + (void)bd; const uint16_t below_pred = left[bh - 1]; // estimated by bottom-left pixel const uint8_t *const sm_weights = sm_weight_arrays + bh; // scale = 2^sm_weight_log2_scale @@ -816,7 +456,7 @@ static INLINE void highbd_smooth_v_predictor(uint16_t *dst, ptrdiff_t stride, for (i = 0; i < 2; ++i) { this_pred += weights[i] * pixels[i]; } - dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd); + dst[c] = divide_round(this_pred, log2_scale); } dst += stride; } @@ -826,6 +466,7 @@ static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, const uint16_t *above, const uint16_t *left, int bd) { + (void)bd; const uint16_t right_pred = above[bw - 1]; // estimated by top-right pixel const uint8_t *const sm_weights = sm_weight_arrays + bw; // scale = 2^sm_weight_log2_scale @@ -846,12 +487,11 @@ static INLINE void highbd_smooth_h_predictor(uint16_t *dst, ptrdiff_t stride, for (i = 0; i < 2; ++i) { this_pred += weights[i] * pixels[i]; } - dst[c] = clip_pixel_highbd(divide_round(this_pred, log2_scale), bd); + dst[c] = divide_round(this_pred, log2_scale); } dst += stride; } } -#endif // CONFIG_SMOOTH_HV static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, int bw, int bh, @@ -922,7 +562,148 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, dst += stride; } } -#endif // CONFIG_HIGHBITDEPTH + +// Obtained similarly as DC_MULTIPLIER_1X2 and DC_MULTIPLIER_1X4 above, but +// assume 2nd shift of 17 bits instead of 16. +// Note: Strictly speaking, 2nd shift needs to be 17 only when: +// - bit depth == 12, and +// - bw + bh is divisible by 5 (as opposed to divisible by 3). +// All other cases can use half the multipliers with a shift of 16 instead. +// This special optimization can be used when writing assembly code. +#define HIGHBD_DC_MULTIPLIER_1X2 0xAAAB +// Note: This constant is odd, but a smaller even constant (0x199a) with the +// appropriate shift should work for neon in 8/10-bit. +#define HIGHBD_DC_MULTIPLIER_1X4 0x6667 + +#define HIGHBD_DC_SHIFT2 17 + +static INLINE void highbd_dc_predictor_rect(uint16_t *dst, ptrdiff_t stride, + int bw, int bh, + const uint16_t *above, + const uint16_t *left, int bd, + int shift1, uint32_t multiplier) { + int sum = 0; + (void)bd; + + for (int i = 0; i < bw; i++) { + sum += above[i]; + } + for (int i = 0; i < bh; i++) { + sum += left[i]; + } + + const int expected_dc = divide_using_multiply_shift( + sum + ((bw + bh) >> 1), shift1, multiplier, HIGHBD_DC_SHIFT2); + assert(expected_dc < (1 << bd)); + + for (int r = 0; r < bh; r++) { + aom_memset16(dst, expected_dc, bw); + dst += stride; + } +} + +#undef HIGHBD_DC_SHIFT2 + +void aom_highbd_dc_predictor_4x8_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 4, 8, above, left, bd, 2, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_8x4_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 8, 4, above, left, bd, 2, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_4x16_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 4, 16, above, left, bd, 2, + HIGHBD_DC_MULTIPLIER_1X4); +} + +void aom_highbd_dc_predictor_16x4_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 16, 4, above, left, bd, 2, + HIGHBD_DC_MULTIPLIER_1X4); +} + +void aom_highbd_dc_predictor_8x16_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 8, 16, above, left, bd, 3, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_16x8_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 16, 8, above, left, bd, 3, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_8x32_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 8, 32, above, left, bd, 3, + HIGHBD_DC_MULTIPLIER_1X4); +} + +void aom_highbd_dc_predictor_32x8_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd) { + highbd_dc_predictor_rect(dst, stride, 32, 8, above, left, bd, 3, + HIGHBD_DC_MULTIPLIER_1X4); +} + +void aom_highbd_dc_predictor_16x32_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, + const uint16_t *left, int bd) { + highbd_dc_predictor_rect(dst, stride, 16, 32, above, left, bd, 4, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_32x16_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, + const uint16_t *left, int bd) { + highbd_dc_predictor_rect(dst, stride, 32, 16, above, left, bd, 4, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_16x64_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, + const uint16_t *left, int bd) { + highbd_dc_predictor_rect(dst, stride, 16, 64, above, left, bd, 4, + HIGHBD_DC_MULTIPLIER_1X4); +} + +void aom_highbd_dc_predictor_64x16_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, + const uint16_t *left, int bd) { + highbd_dc_predictor_rect(dst, stride, 64, 16, above, left, bd, 4, + HIGHBD_DC_MULTIPLIER_1X4); +} + +void aom_highbd_dc_predictor_32x64_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, + const uint16_t *left, int bd) { + highbd_dc_predictor_rect(dst, stride, 32, 64, above, left, bd, 5, + HIGHBD_DC_MULTIPLIER_1X2); +} + +void aom_highbd_dc_predictor_64x32_c(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, + const uint16_t *left, int bd) { + highbd_dc_predictor_rect(dst, stride, 64, 32, above, left, bd, 5, + HIGHBD_DC_MULTIPLIER_1X2); +} + +#undef HIGHBD_DC_MULTIPLIER_1X2 +#undef HIGHBD_DC_MULTIPLIER_1X4 // This serves as a wrapper function, so that all the prediction functions // can be unified and accessed as a pointer array. Note that the boundary @@ -934,7 +715,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, type##_predictor(dst, stride, width, height, above, left); \ } -#if CONFIG_HIGHBITDEPTH #define intra_pred_highbd_sized(type, width, height) \ void aom_highbd_##type##_predictor_##width##x##height##_c( \ uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ @@ -943,7 +723,6 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, } /* clang-format off */ -#if CONFIG_TX64X64 #define intra_pred_rectangular(type) \ intra_pred_sized(type, 4, 8) \ intra_pred_sized(type, 8, 4) \ @@ -953,6 +732,12 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, intra_pred_sized(type, 32, 16) \ intra_pred_sized(type, 32, 64) \ intra_pred_sized(type, 64, 32) \ + intra_pred_sized(type, 4, 16) \ + intra_pred_sized(type, 16, 4) \ + intra_pred_sized(type, 8, 32) \ + intra_pred_sized(type, 32, 8) \ + intra_pred_sized(type, 16, 64) \ + intra_pred_sized(type, 64, 16) \ intra_pred_highbd_sized(type, 4, 8) \ intra_pred_highbd_sized(type, 8, 4) \ intra_pred_highbd_sized(type, 8, 16) \ @@ -960,7 +745,13 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, intra_pred_highbd_sized(type, 16, 32) \ intra_pred_highbd_sized(type, 32, 16) \ intra_pred_highbd_sized(type, 32, 64) \ - intra_pred_highbd_sized(type, 64, 32) + intra_pred_highbd_sized(type, 64, 32) \ + intra_pred_highbd_sized(type, 4, 16) \ + intra_pred_highbd_sized(type, 16, 4) \ + intra_pred_highbd_sized(type, 8, 32) \ + intra_pred_highbd_sized(type, 32, 8) \ + intra_pred_highbd_sized(type, 16, 64) \ + intra_pred_highbd_sized(type, 64, 16) #define intra_pred_above_4x4(type) \ intra_pred_sized(type, 8, 8) \ intra_pred_sized(type, 16, 16) \ @@ -973,100 +764,29 @@ static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bw, intra_pred_highbd_sized(type, 64, 64) \ intra_pred_rectangular(type) #define intra_pred_allsizes(type) \ - intra_pred_sized(type, 2, 2) \ intra_pred_sized(type, 4, 4) \ - intra_pred_highbd_sized(type, 2, 2) \ intra_pred_above_4x4(type) -#else // CONFIG_TX64X64 -#define intra_pred_rectangular(type) \ - intra_pred_sized(type, 4, 8) \ - intra_pred_sized(type, 8, 4) \ - intra_pred_sized(type, 8, 16) \ - intra_pred_sized(type, 16, 8) \ - intra_pred_sized(type, 16, 32) \ - intra_pred_sized(type, 32, 16) \ - intra_pred_highbd_sized(type, 4, 8) \ - intra_pred_highbd_sized(type, 8, 4) \ - intra_pred_highbd_sized(type, 8, 16) \ - intra_pred_highbd_sized(type, 16, 8) \ - intra_pred_highbd_sized(type, 16, 32) \ - intra_pred_highbd_sized(type, 32, 16) -#define intra_pred_above_4x4(type) \ +#define intra_pred_square(type) \ + intra_pred_sized(type, 4, 4) \ intra_pred_sized(type, 8, 8) \ intra_pred_sized(type, 16, 16) \ intra_pred_sized(type, 32, 32) \ + intra_pred_sized(type, 64, 64) \ intra_pred_highbd_sized(type, 4, 4) \ intra_pred_highbd_sized(type, 8, 8) \ intra_pred_highbd_sized(type, 16, 16) \ intra_pred_highbd_sized(type, 32, 32) \ - intra_pred_rectangular(type) -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 2, 2) \ - intra_pred_sized(type, 4, 4) \ - intra_pred_highbd_sized(type, 2, 2) \ - intra_pred_above_4x4(type) -#endif // CONFIG_TX64X64 - -#else - -#if CONFIG_TX64X64 -#define intra_pred_rectangular(type) \ - intra_pred_sized(type, 4, 8) \ - intra_pred_sized(type, 8, 4) \ - intra_pred_sized(type, 8, 16) \ - intra_pred_sized(type, 16, 8) \ - intra_pred_sized(type, 16, 32) \ - intra_pred_sized(type, 32, 16) \ - intra_pred_sized(type, 32, 64) \ - intra_pred_sized(type, 64, 32) -#define intra_pred_above_4x4(type) \ - intra_pred_sized(type, 8, 8) \ - intra_pred_sized(type, 16, 16) \ - intra_pred_sized(type, 32, 32) \ - intra_pred_sized(type, 64, 64) \ - intra_pred_rectangular(type) -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 2, 2) \ - intra_pred_sized(type, 4, 4) \ - intra_pred_above_4x4(type) -#else // CONFIG_TX64X64 -#define intra_pred_rectangular(type) \ - intra_pred_sized(type, 4, 8) \ - intra_pred_sized(type, 8, 4) \ - intra_pred_sized(type, 8, 16) \ - intra_pred_sized(type, 16, 8) \ - intra_pred_sized(type, 16, 32) \ - intra_pred_sized(type, 32, 16) -#define intra_pred_above_4x4(type) \ - intra_pred_sized(type, 8, 8) \ - intra_pred_sized(type, 16, 16) \ - intra_pred_sized(type, 32, 32) \ - intra_pred_rectangular(type) -#define intra_pred_allsizes(type) \ - intra_pred_sized(type, 2, 2) \ - intra_pred_sized(type, 4, 4) \ - intra_pred_above_4x4(type) -#endif // CONFIG_TX64X64 - -#endif // CONFIG_HIGHBITDEPTH + intra_pred_highbd_sized(type, 64, 64) -intra_pred_allsizes(d207e) -intra_pred_allsizes(d63e) -intra_pred_above_4x4(d45e) -intra_pred_above_4x4(d117) -intra_pred_above_4x4(d135) -intra_pred_above_4x4(d153) intra_pred_allsizes(v) intra_pred_allsizes(h) intra_pred_allsizes(smooth) -#if CONFIG_SMOOTH_HV intra_pred_allsizes(smooth_v) intra_pred_allsizes(smooth_h) -#endif // CONFIG_SMOOTH_HV intra_pred_allsizes(paeth) intra_pred_allsizes(dc_128) intra_pred_allsizes(dc_left) intra_pred_allsizes(dc_top) -intra_pred_allsizes(dc) +intra_pred_square(dc) /* clang-format on */ #undef intra_pred_allsizes |