diff options
Diffstat (limited to 'third_party/aom/av1/common/daala_tx.c')
-rw-r--r-- | third_party/aom/av1/common/daala_tx.c | 591 |
1 files changed, 591 insertions, 0 deletions
diff --git a/third_party/aom/av1/common/daala_tx.c b/third_party/aom/av1/common/daala_tx.c new file mode 100644 index 000000000..31f03de53 --- /dev/null +++ b/third_party/aom/av1/common/daala_tx.c @@ -0,0 +1,591 @@ +#include "av1/common/daala_tx.h" +#include "av1/common/odintrin.h" + +/* clang-format off */ + +# define OD_DCT_RSHIFT(_a, _b) OD_UNBIASED_RSHIFT32(_a, _b) + +/* TODO: Daala DCT overflow checks need to be ported as a later test */ +# if defined(OD_DCT_CHECK_OVERFLOW) +# else +# define OD_DCT_OVERFLOW_CHECK(val, scale, offset, idx) +# endif + +#define OD_FDCT_2(p0, p1) \ + /* Embedded 2-point orthonormal Type-II fDCT. */ \ + do { \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(p1, 13573, 16384, 100); \ + p0 -= (p1*13573 + 16384) >> 15; \ + /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(p0, 5793, 4096, 101); \ + p1 += (p0*5793 + 4096) >> 13; \ + /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(p1, 3393, 4096, 102); \ + p0 -= (p1*3393 + 4096) >> 13; \ + } \ + while (0) + +#define OD_IDCT_2(p0, p1) \ + /* Embedded 2-point orthonormal Type-II iDCT. */ \ + do { \ + /* 3393/8192 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + p0 += (p1*3393 + 4096) >> 13; \ + /* 5793/8192 ~= Sin[pi/4] ~= 0.707106781186547 */ \ + p1 -= (p0*5793 + 4096) >> 13; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + p0 += (p1*13573 + 16384) >> 15; \ + } \ + while (0) + +#define OD_FDCT_2_ASYM(p0, p1, p1h) \ + /* Embedded 2-point asymmetric Type-II fDCT. */ \ + do { \ + p0 += p1h; \ + p1 = p0 - p1; \ + } \ + while (0) + +#define OD_IDCT_2_ASYM(p0, p1, p1h) \ + /* Embedded 2-point asymmetric Type-II iDCT. */ \ + do { \ + p1 = p0 - p1; \ + p1h = OD_DCT_RSHIFT(p1, 1); \ + p0 -= p1h; \ + } \ + while (0) + +#define OD_FDST_2(p0, p1) \ + /* Embedded 2-point orthonormal Type-IV fDST. */ \ + do { \ + /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 103); \ + p0 -= (p1*10947 + 8192) >> 14; \ + /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(p0, 473, 256, 104); \ + p1 += (p0*473 + 256) >> 9; \ + /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(p1, 10947, 8192, 105); \ + p0 -= (p1*10947 + 8192) >> 14; \ + } \ + while (0) + +#define OD_IDST_2(p0, p1) \ + /* Embedded 2-point orthonormal Type-IV iDST. */ \ + do { \ + /* 10947/16384 ~= Tan[3*Pi/16]) ~= 0.668178637919299 */ \ + p0 += (p1*10947 + 8192) >> 14; \ + /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + p1 -= (p0*473 + 256) >> 9; \ + /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + p0 += (p1*10947 + 8192) >> 14; \ + } \ + while (0) + +#define OD_FDST_2_ASYM(p0, p1) \ + /* Embedded 2-point asymmetric Type-IV fDST. */ \ + do { \ + /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \ + OD_DCT_OVERFLOW_CHECK(p1, 11507, 8192, 187); \ + p0 -= (p1*11507 + 8192) >> 14; \ + /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \ + OD_DCT_OVERFLOW_CHECK(p0, 669, 512, 188); \ + p1 += (p0*669 + 512) >> 10; \ + /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \ + OD_DCT_OVERFLOW_CHECK(p1, 4573, 2048, 189); \ + p0 -= (p1*4573 + 2048) >> 12; \ + } \ + while (0) + +#define OD_IDST_2_ASYM(p0, p1) \ + /* Embedded 2-point asymmetric Type-IV iDST. */ \ + do { \ + /* 4573/4096 ~= 4*Sin[Pi/8] - Tan[Pi/8] ~= 1.11652016708726 */ \ + p0 += (p1*4573 + 2048) >> 12; \ + /* 669/1024 ~= Cos[Pi/8]/Sqrt[2] ~= 0.653281482438188 */ \ + p1 -= (p0*669 + 512) >> 10; \ + /* 11507/16384 ~= 4*Sin[Pi/8] - 2*Tan[Pi/8] ~= 0.702306604714169 */ \ + p0 += (p1*11507 + 8192) >> 14; \ + } \ + while (0) + +#define OD_FDCT_4(q0, q2, q1, q3) \ + /* Embedded 4-point orthonormal Type-II fDCT. */ \ + do { \ + int q2h; \ + int q3h; \ + q3 = q0 - q3; \ + q3h = OD_DCT_RSHIFT(q3, 1); \ + q0 -= q3h; \ + q2 += q1; \ + q2h = OD_DCT_RSHIFT(q2, 1); \ + q1 = q2h - q1; \ + OD_FDCT_2_ASYM(q0, q2, q2h); \ + OD_FDST_2_ASYM(q3, q1); \ + } \ + while (0) + +#define OD_IDCT_4(q0, q2, q1, q3) \ + /* Embedded 4-point orthonormal Type-II iDCT. */ \ + do { \ + int q1h; \ + int q3h; \ + OD_IDST_2_ASYM(q3, q2); \ + OD_IDCT_2_ASYM(q0, q1, q1h); \ + q3h = OD_DCT_RSHIFT(q3, 1); \ + q0 += q3h; \ + q3 = q0 - q3; \ + q2 = q1h - q2; \ + q1 -= q2; \ + } \ + while (0) + +#define OD_FDCT_4_ASYM(q0, q2, q2h, q1, q3, q3h) \ + /* Embedded 4-point asymmetric Type-II fDCT. */ \ + do { \ + q0 += q3h; \ + q3 = q0 - q3; \ + q1 = q2h - q1; \ + q2 = q1 - q2; \ + OD_FDCT_2(q0, q2); \ + OD_FDST_2(q3, q1); \ + } \ + while (0) + +#define OD_IDCT_4_ASYM(q0, q2, q1, q1h, q3, q3h) \ + /* Embedded 4-point asymmetric Type-II iDCT. */ \ + do { \ + OD_IDST_2(q3, q2); \ + OD_IDCT_2(q0, q1); \ + q1 = q2 - q1; \ + q1h = OD_DCT_RSHIFT(q1, 1); \ + q2 = q1h - q2; \ + q3 = q0 - q3; \ + q3h = OD_DCT_RSHIFT(q3, 1); \ + q0 -= q3h; \ + } \ + while (0) + +#define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \ + /* Embedded 4-point asymmetric Type-IV fDST. */ \ + do { \ + /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 7489, 4096, 106); \ + t2 -= (t1*7489 + 4096) >> 13; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 11585, 8192, 107); \ + t1 += (t2*11585 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 108); \ + t2 += (t1*19195 + 16384) >> 15; \ + t3 += OD_DCT_RSHIFT(t2, 1); \ + t2 -= t3; \ + t1 = t0h - t1; \ + t0 -= t1; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 109); \ + t3 += (t0*6723 + 4096) >> 13; \ + /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 8035, 4096, 110); \ + t0 -= (t3*8035 + 4096) >> 13; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(t0, 6723, 4096, 111); \ + t3 += (t0*6723 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 112); \ + t2 += (t1*8757 + 8192) >> 14; \ + /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 6811, 4096, 113); \ + t1 -= (t2*6811 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 8757, 8192, 114); \ + t2 += (t1*8757 + 8192) >> 14; \ + } \ + while (0) + +#define OD_IDST_4_ASYM(t0, t0h, t2, t1, t3) \ + /* Embedded 4-point asymmetric Type-IV iDST. */ \ + do { \ + /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + t1 -= (t2*8757 + 8192) >> 14; \ + /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ + t2 += (t1*6811 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + t1 -= (t2*8757 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + t3 -= (t0*6723 + 4096) >> 13; \ + /* 8035/8192 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + t0 += (t3*8035 + 4096) >> 13; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + t3 -= (t0*6723 + 4096) >> 13; \ + t0 += t2; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t2 = t0h - t2; \ + t1 += t3; \ + t3 -= OD_DCT_RSHIFT(t1, 1); \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + t1 -= (t2*19195 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + t2 -= (t1*11585 + 8192) >> 14; \ + /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + t1 += (t2*7489 + 4096) >> 13; \ + } \ + while (0) + +#define OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \ + /* Embedded 8-point orthonormal Type-II fDCT. */ \ + do { \ + int r4h; \ + int r5h; \ + int r6h; \ + int r7h; \ + r7 = r0 - r7; \ + r7h = OD_DCT_RSHIFT(r7, 1); \ + r0 -= r7h; \ + r6 += r1; \ + r6h = OD_DCT_RSHIFT(r6, 1); \ + r1 = r6h - r1; \ + r5 = r2 - r5; \ + r5h = OD_DCT_RSHIFT(r5, 1); \ + r2 -= r5h; \ + r4 += r3; \ + r4h = OD_DCT_RSHIFT(r4, 1); \ + r3 = r4h - r3; \ + OD_FDCT_4_ASYM(r0, r4, r4h, r2, r6, r6h); \ + OD_FDST_4_ASYM(r7, r7h, r3, r5, r1); \ + } \ + while (0) + +#define OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7) \ + /* Embedded 8-point orthonormal Type-II iDCT. */ \ + do { \ + int r1h; \ + int r3h; \ + int r5h; \ + int r7h; \ + OD_IDST_4_ASYM(r7, r7h, r5, r6, r4); \ + OD_IDCT_4_ASYM(r0, r2, r1, r1h, r3, r3h); \ + r0 += r7h; \ + r7 = r0 - r7; \ + r6 = r1h - r6; \ + r1 -= r6; \ + r5h = OD_DCT_RSHIFT(r5, 1); \ + r2 += r5h; \ + r5 = r2 - r5; \ + r4 = r3h - r4; \ + r3 -= r4; \ + } \ + while (0) + +#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \ + /* Embedded 8-point orthonormal Type-IV fDST. */ \ + do { \ + int t0h; \ + int t2h; \ + int t5h; \ + int t7h; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 115); \ + t6 -= (t1*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 11585, 8192, 116); \ + t1 += (t6*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 13573, 16384, 117); \ + t6 -= (t1*13573 + 16384) >> 15; \ + /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 21895, 16384, 118); \ + t5 -= (t2*21895 + 16384) >> 15; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 15137, 8192, 119); \ + t2 += (t5*15137 + 8192) >> 14; \ + /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 10947, 8192, 120); \ + t5 -= (t2*10947 + 8192) >> 14; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 121); \ + t4 -= (t3*3259 + 8192) >> 14; \ + /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 3135, 4096, 122); \ + t3 += (t4*3135 + 4096) >> 13; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 3259, 8192, 123); \ + t4 -= (t3*3259 + 8192) >> 14; \ + t7 += t1; \ + t7h = OD_DCT_RSHIFT(t7, 1); \ + t1 -= t7h; \ + t2 = t3 - t2; \ + t2h = OD_DCT_RSHIFT(t2, 1); \ + t3 -= t2h; \ + t0 -= t6; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t6 += t0h; \ + t5 = t4 - t5; \ + t5h = OD_DCT_RSHIFT(t5, 1); \ + t4 -= t5h; \ + t1 += t5h; \ + t5 = t1 - t5; \ + t4 += t0h; \ + t0 -= t4; \ + t6 -= t2h; \ + t2 += t6; \ + t3 -= t7h; \ + t7 += t3; \ + /* TODO: Can we move this into another operation */ \ + t7 = -t7; \ + /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 124); \ + t0 -= (t7*7425 + 4096) >> 13; \ + /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \ + OD_DCT_OVERFLOW_CHECK(t0, 8153, 4096, 125); \ + t7 += (t0*8153 + 4096) >> 13; \ + /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 7425, 4096, 126); \ + t0 -= (t7*7425 + 4096) >> 13; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 127); \ + t6 -= (t1*4861 + 16384) >> 15; \ + /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 1189, 2048, 128); \ + t1 += (t6*1189 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 4861, 16384, 129); \ + t6 -= (t1*4861 + 16384) >> 15; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 130); \ + t2 -= (t5*2455 + 2048) >> 12; \ + /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 7225, 4096, 131); \ + t5 += (t2*7225 + 4096) >> 13; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 2455, 2048, 132); \ + t2 -= (t5*2455 + 2048) >> 12; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 133); \ + t4 -= (t3*11725 + 16384) >> 15; \ + /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 5197, 4096, 134); \ + t3 += (t4*5197 + 4096) >> 13; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 11725, 16384, 135); \ + t4 -= (t3*11725 + 16384) >> 15; \ + } \ + while (0) + +#define OD_IDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \ + /* Embedded 8-point orthonormal Type-IV iDST. */ \ + do { \ + int t0h; \ + int t2h; \ + int t5h_; \ + int t7h_; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ + t1 += (t6*11725 + 16384) >> 15; \ + /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.634393284163645 */ \ + t6 -= (t1*5197 + 4096) >> 13; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.357805721314524 */ \ + t1 += (t6*11725 + 16384) >> 15; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ + t2 += (t5*2455 + 2048) >> 12; \ + /* 7225/8192 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ + t5 -= (t2*7225 + 4096) >> 13; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.599376933681924 */ \ + t2 += (t5*2455 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ + t3 += (t4*4861 + 16384) >> 15; \ + /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.290284677254462 */ \ + t4 -= (t3*1189 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.148335987538347 */ \ + t3 += (t4*4861 + 16384) >> 15; \ + /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ + t0 += (t7*7425 + 4096) >> 13; \ + /* 8153/8192 ~= Sin[15*Pi/32] ~= 0.995184726672197 */ \ + t7 -= (t0*8153 + 4096) >> 13; \ + /* 7425/8192 ~= Tan[15*Pi/64] ~= 0.906347169019147 */ \ + t0 += (t7*7425 + 4096) >> 13; \ + /* TODO: Can we move this into another operation */ \ + t7 = -t7; \ + t7 -= t6; \ + t7h_ = OD_DCT_RSHIFT(t7, 1); \ + t6 += t7h_; \ + t2 -= t3; \ + t2h = OD_DCT_RSHIFT(t2, 1); \ + t3 += t2h; \ + t0 += t1; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t1 -= t0h; \ + t5 = t4 - t5; \ + t5h_ = OD_DCT_RSHIFT(t5, 1); \ + t4 -= t5h_; \ + t1 += t5h_; \ + t5 = t1 - t5; \ + t3 -= t0h; \ + t0 += t3; \ + t6 += t2h; \ + t2 = t6 - t2; \ + t4 += t7h_; \ + t7 -= t4; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + t1 += (t6*3259 + 8192) >> 14; \ + /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ + t6 -= (t1*3135 + 4096) >> 13; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + t1 += (t6*3259 + 8192) >> 14; \ + /* 10947/16384 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + t5 += (t2*10947 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t2 -= (t5*15137 + 8192) >> 14; \ + /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + t5 += (t2*21895 + 16384) >> 15; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + t3 += (t4*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + t4 -= (t3*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + t3 += (t4*13573 + 16384) >> 15; \ + } \ + while (0) + +void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) { + int q0; + int q1; + int q2; + int q3; + q0 = x[0*xstride]; + q2 = x[1*xstride]; + q1 = x[2*xstride]; + q3 = x[3*xstride]; + OD_FDCT_4(q0, q2, q1, q3); + y[0] = (od_coeff)q0; + y[1] = (od_coeff)q1; + y[2] = (od_coeff)q2; + y[3] = (od_coeff)q3; +} + +void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) { + int q0; + int q1; + int q2; + int q3; + q0 = y[0]; + q2 = y[1]; + q1 = y[2]; + q3 = y[3]; + OD_IDCT_4(q0, q2, q1, q3); + x[0*xstride] = q0; + x[1*xstride] = q1; + x[2*xstride] = q2; + x[3*xstride] = q3; +} + +void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) { + int r0; + int r1; + int r2; + int r3; + int r4; + int r5; + int r6; + int r7; + r0 = x[0*xstride]; + r4 = x[1*xstride]; + r2 = x[2*xstride]; + r6 = x[3*xstride]; + r1 = x[4*xstride]; + r5 = x[5*xstride]; + r3 = x[6*xstride]; + r7 = x[7*xstride]; + OD_FDCT_8(r0, r4, r2, r6, r1, r5, r3, r7); + y[0] = (od_coeff)r0; + y[1] = (od_coeff)r1; + y[2] = (od_coeff)r2; + y[3] = (od_coeff)r3; + y[4] = (od_coeff)r4; + y[5] = (od_coeff)r5; + y[6] = (od_coeff)r6; + y[7] = (od_coeff)r7; +} + +void od_bin_idct8(od_coeff *x, int xstride, const od_coeff y[8]) { + int r0; + int r1; + int r2; + int r3; + int r4; + int r5; + int r6; + int r7; + r0 = y[0]; + r4 = y[1]; + r2 = y[2]; + r6 = y[3]; + r1 = y[4]; + r5 = y[5]; + r3 = y[6]; + r7 = y[7]; + OD_IDCT_8(r0, r4, r2, r6, r1, r5, r3, r7); + x[0*xstride] = (od_coeff)r0; + x[1*xstride] = (od_coeff)r1; + x[2*xstride] = (od_coeff)r2; + x[3*xstride] = (od_coeff)r3; + x[4*xstride] = (od_coeff)r4; + x[5*xstride] = (od_coeff)r5; + x[6*xstride] = (od_coeff)r6; + x[7*xstride] = (od_coeff)r7; +} + +void od_bin_fdst8(od_coeff y[8], const od_coeff *x, int xstride) { + int r0; + int r1; + int r2; + int r3; + int r4; + int r5; + int r6; + int r7; + r0 = x[0*xstride]; + r4 = x[1*xstride]; + r2 = x[2*xstride]; + r6 = x[3*xstride]; + r1 = x[4*xstride]; + r5 = x[5*xstride]; + r3 = x[6*xstride]; + r7 = x[7*xstride]; + OD_FDST_8(r0, r4, r2, r6, r1, r5, r3, r7); + y[0] = (od_coeff)r0; + y[1] = (od_coeff)r1; + y[2] = (od_coeff)r2; + y[3] = (od_coeff)r3; + y[4] = (od_coeff)r4; + y[5] = (od_coeff)r5; + y[6] = (od_coeff)r6; + y[7] = (od_coeff)r7; +} + +void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) { + int r0; + int r1; + int r2; + int r3; + int r4; + int r5; + int r6; + int r7; + r0 = y[0]; + r4 = y[1]; + r2 = y[2]; + r6 = y[3]; + r1 = y[4]; + r5 = y[5]; + r3 = y[6]; + r7 = y[7]; + OD_IDST_8(r0, r4, r2, r6, r1, r5, r3, r7); + x[0*xstride] = (od_coeff)r0; + x[1*xstride] = (od_coeff)r1; + x[2*xstride] = (od_coeff)r2; + x[3*xstride] = (od_coeff)r3; + x[4*xstride] = (od_coeff)r4; + x[5*xstride] = (od_coeff)r5; + x[6*xstride] = (od_coeff)r6; + x[7*xstride] = (od_coeff)r7; +} |