diff options
Diffstat (limited to 'third_party/aom/av1/common/daala_tx.c')
-rw-r--r-- | third_party/aom/av1/common/daala_tx.c | 3742 |
1 files changed, 3741 insertions, 1 deletions
diff --git a/third_party/aom/av1/common/daala_tx.c b/third_party/aom/av1/common/daala_tx.c index 31f03de53..e5b2372e3 100644 --- a/third_party/aom/av1/common/daala_tx.c +++ b/third_party/aom/av1/common/daala_tx.c @@ -166,6 +166,87 @@ } \ while (0) +#define OD_FDST_4(q0, q2, q1, q3) \ + /* Embedded 4-point orthonormal Type-IV fDST. */ \ + do { \ + int q0h; \ + int q1h; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(q1, 13573, 16384, 190); \ + q2 += (q1*13573 + 16384) >> 15; \ + /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(q2, 5793, 4096, 191); \ + q1 -= (q2*5793 + 4096) >> 13; \ + /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(q1, 3393, 4096, 192); \ + q2 += (q1*3393 + 4096) >> 13; \ + q0 += q2; \ + q0h = OD_DCT_RSHIFT(q0, 1); \ + q2 = q0h - q2; \ + q1 += q3; \ + q1h = OD_DCT_RSHIFT(q1, 1); \ + q3 -= q1h; \ + /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~= + 0.524455699240090 */ \ + OD_DCT_OVERFLOW_CHECK(q1, 537, 512, 193); \ + q2 -= (q1*537 + 512) >> 10; \ + /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \ + OD_DCT_OVERFLOW_CHECK(q2, 1609, 1024, 194); \ + q1 += (q2*1609 + 1024) >> 11; \ + /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~= + 0.223847182092655 */ \ + OD_DCT_OVERFLOW_CHECK(q1, 7335, 16384, 195); \ + q2 += (q1*7335 + 16384) >> 15; \ + /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~= + 0.6215036383171189 */ \ + OD_DCT_OVERFLOW_CHECK(q0, 5091, 4096, 196); \ + q3 += (q0*5091 + 4096) >> 13; \ + /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \ + OD_DCT_OVERFLOW_CHECK(q3, 5681, 2048, 197); \ + q0 -= (q3*5681 + 2048) >> 12; \ + /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~= + 0.52204745462729 */ \ + OD_DCT_OVERFLOW_CHECK(q0, 4277, 4096, 198); \ + q3 += (q0*4277 + 4096) >> 13; \ + } \ + while (0) + +#define OD_IDST_4(q0, q2, q1, q3) \ + /* Embedded 4-point orthonormal Type-IV iDST. */ \ + do { \ + int q0h; \ + int q2h; \ + /* 4277/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16])/Sin[7*Pi/16] ~= + 0.52204745462729 */ \ + q3 -= (q0*4277 + 4096) >> 13; \ + /* 5681/4096 ~= Sqrt[2]*Sin[7*Pi/16] ~= 1.38703984532215 */ \ + q0 += (q3*5681 + 2048) >> 12; \ + /* 5091/8192 ~= (1/Sqrt[2] - Cos[7*Pi/16]/2)/Sin[7*Pi/16] ~= + 0.6215036383171189 */ \ + q3 -= (q0*5091 + 4096) >> 13; \ + /* 7335/32768 ~= (1/Sqrt[2] - Cos[3*Pi/16])/Sin[3*Pi/16] ~= + 0.223847182092655 */ \ + q1 -= (q2*7335 + 16384) >> 15; \ + /* 1609/2048 ~= Sqrt[2]*Sin[3*Pi/16] ~= 0.785694958387102 */ \ + q2 -= (q1*1609 + 1024) >> 11; \ + /* 537/1024 ~= (1/Sqrt[2] - Cos[3*Pi/16]/2)/Sin[3*Pi/16] ~= + 0.524455699240090 */ \ + q1 += (q2*537 + 512) >> 10; \ + q2h = OD_DCT_RSHIFT(q2, 1); \ + q3 += q2h; \ + q2 -= q3; \ + q0h = OD_DCT_RSHIFT(q0, 1); \ + q1 = q0h - q1; \ + q0 -= q1; \ + /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + q1 -= (q2*3393 + 4096) >> 13; \ + /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + q2 += (q1*5793 + 4096) >> 13; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + q1 -= (q2*13573 + 16384) >> 15; \ + } \ + while (0) + #define OD_FDST_4_ASYM(t0, t0h, t2, t1, t3) \ /* Embedded 4-point asymmetric Type-IV fDST. */ \ do { \ @@ -277,7 +358,43 @@ } \ while (0) -#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \ +#define OD_FDCT_8_ASYM(r0, r4, r4h, r2, r6, r6h, r1, r5, r5h, r3, r7, r7h) \ + /* Embedded 8-point asymmetric Type-II fDCT. */ \ + do { \ + r0 += r7h; \ + r7 = r0 - r7; \ + r1 = r6h - r1; \ + r6 -= r1; \ + r2 += r5h; \ + r5 = r2 - r5; \ + r3 = r4h - r3; \ + r4 -= r3; \ + OD_FDCT_4(r0, r4, r2, r6); \ + OD_FDST_4(r7, r3, r5, r1); \ + } \ + while (0) + +#define OD_IDCT_8_ASYM(r0, r4, r2, r6, r1, r1h, r5, r5h, r3, r3h, r7, r7h) \ + /* Embedded 8-point asymmetric Type-II iDCT. */ \ + do { \ + OD_IDST_4(r7, r5, r6, r4); \ + OD_IDCT_4(r0, r2, r1, r3); \ + r7 = r0 - r7; \ + r7h = OD_DCT_RSHIFT(r7, 1); \ + r0 -= r7h; \ + r1 += r6; \ + r1h = OD_DCT_RSHIFT(r1, 1); \ + r6 = r1h - r6; \ + r5 = r2 - r5; \ + r5h = OD_DCT_RSHIFT(r5, 1); \ + r2 -= r5h; \ + r3 += r4; \ + r3h = OD_DCT_RSHIFT(r3, 1); \ + r4 = r3h - r4; \ + } \ + while (0) + +#define OD_FDST_8(t0, t4, t2, t6, t1, t5, t3, t7) \ /* Embedded 8-point orthonormal Type-IV fDST. */ \ do { \ int t0h; \ @@ -446,6 +563,2561 @@ } \ while (0) +/* Rewrite this so that t0h can be passed in. */ +#define OD_FDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \ + /* Embedded 8-point asymmetric Type-IV fDST. */ \ + do { \ + int t0h; \ + int t2h; \ + int t5h; \ + int t7h; \ + /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \ + OD_DCT_OVERFLOW_CHECK(t1, 1035, 1024, 199); \ + t6 += (t1*1035 + 1024) >> 11; \ + /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t6, 3675, 2048, 200); \ + t1 -= (t6*3675 + 2048) >> 12; \ + /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t1, 851, 4096, 201); \ + t6 -= (t1*851 + 4096) >> 13; \ + /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \ + OD_DCT_OVERFLOW_CHECK(t2, 4379, 4096, 202); \ + t5 += (t2*4379 + 4096) >> 13; \ + /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t5, 10217, 4096, 203); \ + t2 -= (t5*10217 + 4096) >> 13; \ + /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t2, 4379, 8192, 204); \ + t5 += (t2*4379 + 8192) >> 14; \ + /* 12905/16384 ~= (Sqrt[2] - Cos[3*Pi/32])/(2*Sin[3*Pi/32]) */ \ + OD_DCT_OVERFLOW_CHECK(t3, 12905, 8192, 205); \ + t4 += (t3*12905 + 8192) >> 14; \ + /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t4, 3363, 4096, 206); \ + t3 -= (t4*3363 + 4096) >> 13; \ + /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t3, 3525, 2048, 207); \ + t4 -= (t3*3525 + 2048) >> 12; \ + /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \ + OD_DCT_OVERFLOW_CHECK(t0, 5417, 4096, 208); \ + t7 += (t0*5417 + 4096) >> 13; \ + /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t7, 5765, 2048, 209); \ + t0 -= (t7*5765 + 2048) >> 12; \ + /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \ + OD_DCT_OVERFLOW_CHECK(t0, 2507, 2048, 210); \ + t7 += (t0*2507 + 2048) >> 12; \ + t0 += t1; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t1 -= t0h; \ + t2 -= t3; \ + t2h = OD_DCT_RSHIFT(t2, 1); \ + t3 += t2h; \ + t5 -= t4; \ + t5h = OD_DCT_RSHIFT(t5, 1); \ + t4 += t5h; \ + t7 += t6; \ + t7h = OD_DCT_RSHIFT(t7, 1); \ + t6 = t7h - t6; \ + t4 = t7h - t4; \ + t7 -= t4; \ + t1 += t5h; \ + t5 = t1 - t5; \ + t6 += t2h; \ + t2 = t6 - t2; \ + t3 -= t0h; \ + t0 += t3; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 211); \ + t1 += (t6*3259 + 8192) >> 14; \ + /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 3135, 4096, 212); \ + t6 -= (t1*3135 + 4096) >> 13; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 3259, 8192, 213); \ + t1 += (t6*3259 + 8192) >> 14; \ + /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 214); \ + t5 += (t2*2737 + 2048) >> 12; \ + /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 473, 256, 215); \ + t2 -= (t5*473 + 256) >> 9; \ + /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 2737, 2048, 216); \ + t5 += (t2*2737 + 2048) >> 12; \ + /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 217); \ + t3 += (t4*3393 + 4096) >> 13; \ + /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 5793, 4096, 218); \ + t4 -= (t3*5793 + 4096) >> 13; \ + /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 3393, 4096, 219); \ + t3 += (t4*3393 + 4096) >> 13; \ + } \ + while (0) + +#define OD_IDST_8_ASYM(t0, t4, t2, t6, t1, t5, t3, t7) \ + /* Embedded 8-point asymmetric Type-IV iDST. */ \ + do { \ + int t0h; \ + int t2h; \ + int t5h__; \ + int t7h__; \ + /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + t6 -= (t1*3393 + 4096) >> 13; \ + /* 5793/8192 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + t1 += (t6*5793 + 4096) >> 13; \ + /* 3393/8192 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + t6 -= (t1*3393 + 4096) >> 13; \ + /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + t5 -= (t2*2737 + 2048) >> 12; \ + /* 473/512 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t2 += (t5*473 + 256) >> 9; \ + /* 2737/4096 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + t5 -= (t2*2737 + 2048) >> 12; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + t4 -= (t3*3259 + 8192) >> 14; \ + /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ + t3 += (t4*3135 + 4096) >> 13; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + t4 -= (t3*3259 + 8192) >> 14; \ + t0 -= t6; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t6 += t0h; \ + t2 = t3 - t2; \ + t2h = OD_DCT_RSHIFT(t2, 1); \ + t3 -= t2h; \ + t5 = t4 - t5; \ + t5h__ = OD_DCT_RSHIFT(t5, 1); \ + t4 -= t5h__; \ + t7 += t1; \ + t7h__ = OD_DCT_RSHIFT(t7, 1); \ + t1 = t7h__ - t1; \ + t3 = t7h__ - t3; \ + t7 -= t3; \ + t1 -= t5h__; \ + t5 += t1; \ + t6 -= t2h; \ + t2 += t6; \ + t4 += t0h; \ + t0 -= t4; \ + /* 2507/4096 ~= (1/Sqrt[2] - Sin[Pi/32])/Cos[Pi/32] */ \ + t7 -= (t0*2507 + 2048) >> 12; \ + /* 5765/4096 ~= Sqrt[2]*Cos[Pi/32] */ \ + t0 += (t7*5765 + 2048) >> 12; \ + /* 5417/8192 ~= (Sqrt[2] - Sin[Pi/32])/(2*Cos[Pi/32]) */ \ + t7 -= (t0*5417 + 4096) >> 13; \ + /* 3525/4096 ~= (Cos[3*Pi/32] - 1/Sqrt[2])/Sin[3*Pi/32] */ \ + t1 += (t6*3525 + 2048) >> 12; \ + /* 3363/8192 ~= Sqrt[2]*Sin[3*Pi/32] */ \ + t6 += (t1*3363 + 4096) >> 13; \ + /* 12905/16384 ~= (1/Sqrt[2] - Cos[3*Pi/32]/1)/Sin[3*Pi/32] */ \ + t1 -= (t6*12905 + 8192) >> 14; \ + /* 4379/16384 ~= (1/Sqrt[2] - Sin[5*Pi/32])/Cos[5*Pi/32] */ \ + t5 -= (t2*4379 + 8192) >> 14; \ + /* 10217/8192 ~= Sqrt[2]*Cos[5*Pi/32] */ \ + t2 += (t5*10217 + 4096) >> 13; \ + /* 4379/8192 ~= (Sqrt[2] - Sin[5*Pi/32])/(2*Cos[5*Pi/32]) */ \ + t5 -= (t2*4379 + 4096) >> 13; \ + /* 851/8192 ~= (Cos[7*Pi/32] - 1/Sqrt[2])/Sin[7*Pi/32] */ \ + t3 += (t4*851 + 4096) >> 13; \ + /* 3675/4096 ~= Sqrt[2]*Sin[7*Pi/32] */ \ + t4 += (t3*3675 + 2048) >> 12; \ + /* 1035/2048 ~= (Sqrt[2] - Cos[7*Pi/32])/(2*Sin[7*Pi/32]) */ \ + t3 -= (t4*1035 + 1024) >> 11; \ + } \ + while (0) + +#define OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \ + s1, s9, s5, sd, s3, sb, s7, sf) \ + /* Embedded 16-point orthonormal Type-II fDCT. */ \ + do { \ + int s8h; \ + int sah; \ + int sch; \ + int seh; \ + int sfh; \ + sf = s0 - sf; \ + sfh = OD_DCT_RSHIFT(sf, 1); \ + s0 -= sfh; \ + se += s1; \ + seh = OD_DCT_RSHIFT(se, 1); \ + s1 = seh - s1; \ + sd = s2 - sd; \ + s2 -= OD_DCT_RSHIFT(sd, 1); \ + sc += s3; \ + sch = OD_DCT_RSHIFT(sc, 1); \ + s3 = sch - s3; \ + sb = s4 - sb; \ + s4 -= OD_DCT_RSHIFT(sb, 1); \ + sa += s5; \ + sah = OD_DCT_RSHIFT(sa, 1); \ + s5 = sah - s5; \ + s9 = s6 - s9; \ + s6 -= OD_DCT_RSHIFT(s9, 1); \ + s8 += s7; \ + s8h = OD_DCT_RSHIFT(s8, 1); \ + s7 = s8h - s7; \ + OD_FDCT_8_ASYM(s0, s8, s8h, s4, sc, sch, s2, sa, sah, s6, se, seh); \ + OD_FDST_8_ASYM(sf, s7, sb, s3, sd, s5, s9, s1); \ + } \ + while (0) + +#define OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, \ + s1, s9, s5, sd, s3, sb, s7, sf) \ + /* Embedded 16-point orthonormal Type-II iDCT. */ \ + do { \ + int s1h; \ + int s3h; \ + int s5h; \ + int s7h; \ + int sfh; \ + OD_IDST_8_ASYM(sf, sb, sd, s9, se, sa, sc, s8); \ + OD_IDCT_8_ASYM(s0, s4, s2, s6, s1, s1h, s5, s5h, s3, s3h, s7, s7h); \ + sfh = OD_DCT_RSHIFT(sf, 1); \ + s0 += sfh; \ + sf = s0 - sf; \ + se = s1h - se; \ + s1 -= se; \ + s2 += OD_DCT_RSHIFT(sd, 1); \ + sd = s2 - sd; \ + sc = s3h - sc; \ + s3 -= sc; \ + s4 += OD_DCT_RSHIFT(sb, 1); \ + sb = s4 - sb; \ + sa = s5h - sa; \ + s5 -= sa; \ + s6 += OD_DCT_RSHIFT(s9, 1); \ + s9 = s6 - s9; \ + s8 = s7h - s8; \ + s7 -= s8; \ + } \ + while (0) + +#define OD_FDCT_16_ASYM(t0, t8, t8h, t4, tc, tch, t2, ta, tah, t6, te, teh, \ + t1, t9, t9h, t5, td, tdh, t3, tb, tbh, t7, tf, tfh) \ + /* Embedded 16-point asymmetric Type-II fDCT. */ \ + do { \ + t0 += tfh; \ + tf = t0 - tf; \ + t1 -= teh; \ + te += t1; \ + t2 += tdh; \ + td = t2 - td; \ + t3 -= tch; \ + tc += t3; \ + t4 += tbh; \ + tb = t4 - tb; \ + t5 -= tah; \ + ta += t5; \ + t6 += t9h; \ + t9 = t6 - t9; \ + t7 -= t8h; \ + t8 += t7; \ + OD_FDCT_8(t0, t8, t4, tc, t2, ta, t6, te); \ + OD_FDST_8(tf, t7, tb, t3, td, t5, t9, t1); \ + } \ + while (0) + +#define OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \ + t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh) \ + /* Embedded 16-point asymmetric Type-II iDCT. */ \ + do { \ + OD_IDST_8(tf, tb, td, t9, te, ta, tc, t8); \ + OD_IDCT_8(t0, t4, t2, t6, t1, t5, t3, t7); \ + t1 -= te; \ + t1h = OD_DCT_RSHIFT(t1, 1); \ + te += t1h; \ + t9 = t6 - t9; \ + t9h = OD_DCT_RSHIFT(t9, 1); \ + t6 -= t9h; \ + t5 -= ta; \ + t5h = OD_DCT_RSHIFT(t5, 1); \ + ta += t5h; \ + td = t2 - td; \ + tdh = OD_DCT_RSHIFT(td, 1); \ + t2 -= tdh; \ + t3 -= tc; \ + t3h = OD_DCT_RSHIFT(t3, 1); \ + tc += t3h; \ + tb = t4 - tb; \ + tbh = OD_DCT_RSHIFT(tb, 1); \ + t4 -= tbh; \ + t7 -= t8; \ + t7h = OD_DCT_RSHIFT(t7, 1); \ + t8 += t7h; \ + tf = t0 - tf; \ + tfh = OD_DCT_RSHIFT(tf, 1); \ + t0 -= tfh; \ + } \ + while (0) + +#define OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, \ + s1, s9, s5, sd, s3, sb, s7, sf) \ + /* Embedded 16-point orthonormal Type-IV fDST. */ \ + do { \ + int s0h; \ + int s2h; \ + int sdh; \ + int sfh; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 220); \ + s1 += (se*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(s1, 11585, 8192, 221); \ + se -= (s1*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(s3, 13573, 16384, 222); \ + s1 += (se*13573 + 16384) >> 15; \ + /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 223); \ + sd += (s2*21895 + 16384) >> 15; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(sd, 15137, 16384, 224); \ + s2 -= (sd*15137 + 8192) >> 14; \ + /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + OD_DCT_OVERFLOW_CHECK(s2, 21895, 16384, 225); \ + sd += (s2*21895 + 16384) >> 15; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 226); \ + sc += (s3*3259 + 8192) >> 14; \ + /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ + OD_DCT_OVERFLOW_CHECK(sc, 3135, 4096, 227); \ + s3 -= (sc*3135 + 4096) >> 13; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + OD_DCT_OVERFLOW_CHECK(s3, 3259, 8192, 228); \ + sc += (s3*3259 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 229); \ + sa += (s5*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(sa, 11585, 8192, 230); \ + s5 -= (sa*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[Pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(s5, 13573, 16384, 231); \ + sa += (s5*13573 + 16384) >> 15; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 232); \ + s6 += (s9*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ + OD_DCT_OVERFLOW_CHECK(s6, 11585, 8192, 233); \ + s9 -= (s6*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(s9, 13573, 16384, 234); \ + s6 += (s9*13573 + 16384) >> 15; \ + sf += se; \ + sfh = OD_DCT_RSHIFT(sf, 1); \ + se = sfh - se; \ + s0 += s1; \ + s0h = OD_DCT_RSHIFT(s0, 1); \ + s1 = s0h - s1; \ + s2 = s3 - s2; \ + s2h = OD_DCT_RSHIFT(s2, 1); \ + s3 -= s2h; \ + sd -= sc; \ + sdh = OD_DCT_RSHIFT(sd, 1); \ + sc += sdh; \ + sa = s4 - sa; \ + s4 -= OD_DCT_RSHIFT(sa, 1); \ + s5 += sb; \ + sb = OD_DCT_RSHIFT(s5, 1) - sb; \ + s8 += s6; \ + s6 -= OD_DCT_RSHIFT(s8, 1); \ + s7 = s9 - s7; \ + s9 -= OD_DCT_RSHIFT(s7, 1); \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 235); \ + s4 += (sb*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + OD_DCT_OVERFLOW_CHECK(s4, 16069, 8192, 236); \ + sb -= (s4*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(sb, 6723, 4096, 237); \ + s4 += (sb*6723 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ + OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 238); \ + sa += (s5*8757 + 8192) >> 14; \ + /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ + OD_DCT_OVERFLOW_CHECK(sa, 6811, 4096, 239); \ + s5 -= (sa*6811 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + OD_DCT_OVERFLOW_CHECK(s5, 8757, 8192, 240); \ + sa += (s5*8757 + 8192) >> 14; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 241); \ + s6 += (s9*2485 + 4096) >> 13; \ + /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + OD_DCT_OVERFLOW_CHECK(s6, 4551, 4096, 242); \ + s9 -= (s6*4551 + 4096) >> 13; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + OD_DCT_OVERFLOW_CHECK(s9, 2485, 4096, 243); \ + s6 += (s9*2485 + 4096) >> 13; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 244); \ + s7 += (s8*3227 + 16384) >> 15; \ + /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ + OD_DCT_OVERFLOW_CHECK(s7, 6393, 16384, 245); \ + s8 -= (s7*6393 + 16384) >> 15; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + OD_DCT_OVERFLOW_CHECK(s8, 3227, 16384, 246); \ + s7 += (s8*3227 + 16384) >> 15; \ + s1 -= s2h; \ + s2 += s1; \ + se += sdh; \ + sd = se - sd; \ + s3 += sfh; \ + sf -= s3; \ + sc = s0h - sc; \ + s0 -= sc; \ + sb += OD_DCT_RSHIFT(s8, 1); \ + s8 = sb - s8; \ + s4 += OD_DCT_RSHIFT(s7, 1); \ + s7 -= s4; \ + s6 += OD_DCT_RSHIFT(s5, 1); \ + s5 = s6 - s5; \ + s9 -= OD_DCT_RSHIFT(sa, 1); \ + sa += s9; \ + s8 += s0; \ + s0 -= OD_DCT_RSHIFT(s8, 1); \ + sf += s7; \ + s7 = OD_DCT_RSHIFT(sf, 1) - s7; \ + s1 -= s6; \ + s6 += OD_DCT_RSHIFT(s1, 1); \ + s9 += se; \ + se = OD_DCT_RSHIFT(s9, 1) - se; \ + s2 += sa; \ + sa = OD_DCT_RSHIFT(s2, 1) - sa; \ + s5 += sd; \ + sd -= OD_DCT_RSHIFT(s5, 1); \ + s4 = sc - s4; \ + sc -= OD_DCT_RSHIFT(s4, 1); \ + s3 -= sb; \ + sb += OD_DCT_RSHIFT(s3, 1); \ + /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(sf, 2799, 2048, 247); \ + s0 -= (sf*2799 + 2048) >> 12; \ + /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s0, 2893, 1024, 248); \ + sf += (s0*2893 + 1024) >> 11; \ + /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(sf, 5397, 4096, 249); \ + s0 -= (sf*5397 + 4096) >> 13; \ + /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s1, 41, 32, 250); \ + se += (s1*41 + 32) >> 6; \ + /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(se, 2865, 1024, 251); \ + s1 -= (se*2865 + 1024) >> 11; \ + /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s1, 4641, 4096, 252); \ + se += (s1*4641 + 4096) >> 13; \ + /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s2, 2473, 2048, 253); \ + sd += (s2*2473 + 2048) >> 12; \ + /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(sd, 5619, 2048, 254); \ + s2 -= (sd*5619 + 2048) >> 12; \ + /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s2, 7839, 8192, 255); \ + sd += (s2*7839 + 8192) >> 14; \ + /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s3, 5747, 4096, 256); \ + sc -= (s3*5747 + 4096) >> 13; \ + /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] ~= */ \ + OD_DCT_OVERFLOW_CHECK(sc, 3903, 4096, 257); \ + s3 += (sc*3903 + 4096) >> 13; \ + /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s3, 5701, 4096, 258); \ + sc += (s3*5701 + 4096) >> 13; \ + /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s4, 4471, 4096, 259); \ + sb += (s4*4471 + 4096) >> 13; \ + /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(sb, 1309, 512, 260); \ + s4 -= (sb*1309 + 512) >> 10; \ + /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s4, 5067, 8192, 261); \ + sb += (s4*5067 + 8192) >> 14; \ + /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s5, 2217, 2048, 262); \ + sa -= (s5*2217 + 2048) >> 12; \ + /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] ~= 0.72705107329128 */ \ + OD_DCT_OVERFLOW_CHECK(sa, 1489, 1024, 263); \ + s5 += (sa*1489 + 1024) >> 11; \ + /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s5, 75, 128, 264); \ + sa += (s5*75 + 128) >> 8; \ + /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s9, 2087, 2048, 265); \ + s6 -= (s9*2087 + 2048) >> 12; \ + /* 4653/4096 ~= Sqrt[2]*Sin[19*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s6, 4653, 2048, 266); \ + s9 += (s6*4653 + 2048) >> 12; \ + /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s9, 4545, 16384, 267); \ + s6 -= (s9*4545 + 16384) >> 15; \ + /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s8, 2053, 2048, 268); \ + s7 += (s8*2053 + 2048) >> 12; \ + /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s7, 1945, 1024, 269); \ + s8 -= (s7*1945 + 1024) >> 11; \ + /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \ + OD_DCT_OVERFLOW_CHECK(s8, 1651, 16384, 270); \ + s7 -= (s8*1651 + 16384) >> 15; \ + } \ + while (0) + +#define OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, \ + s1, s9, s5, sd, s3, sb, s7, sf) \ + /* Embedded 16-point orthonormal Type-IV iDST. */ \ + do { \ + int s0h; \ + int s4h; \ + int sbh; \ + int sfh; \ + /* 1651/32768 ~= (1/Sqrt[2] - Cos[15*Pi/64])/Sin[15*Pi/64] */ \ + se += (s1*1651 + 16384) >> 15; \ + /* 1945/2048 ~= Sqrt[2]*Sin[15*Pi/64] */ \ + s1 += (se*1945 + 1024) >> 11; \ + /* 2053/4096 ~= (1/Sqrt[2] - Cos[15*Pi/64]/2)/Sin[15*Pi/64] */ \ + se -= (s1*2053 + 2048) >> 12; \ + /* 4545/32768 ~= (1/Sqrt[2] - Cos[19*Pi/64])/Sin[19*Pi/64] */ \ + s6 += (s9*4545 + 16384) >> 15; \ + /* 4653/32768 ~= Sqrt[2]*Sin[19*Pi/64] */ \ + s9 -= (s6*4653 + 2048) >> 12; \ + /* 2087/4096 ~= (1/Sqrt[2] - Cos[19*Pi/64]/2)/Sin[19*Pi/64] */ \ + s6 += (s9*2087 + 2048) >> 12; \ + /* 75/256 ~= (1/Sqrt[2] - Cos[11*Pi/64])/Sin[11*Pi/64] */ \ + s5 -= (sa*75 + 128) >> 8; \ + /* 1489/2048 ~= Sqrt[2]*Sin[11*Pi/64] */ \ + sa -= (s5*1489 + 1024) >> 11; \ + /* 2217/4096 ~= (1/Sqrt[2] - Cos[11*Pi/64]/2)/Sin[11*Pi/64] */ \ + s5 += (sa*2217 + 2048) >> 12; \ + /* 5067/16384 ~= (1/Sqrt[2] - Cos[23*Pi/64])/Sin[23*Pi/64] */ \ + sd -= (s2*5067 + 8192) >> 14; \ + /* 1309/1024 ~= Sqrt[2]*Sin[23*Pi/64] */ \ + s2 += (sd*1309 + 512) >> 10; \ + /* 4471/8192 ~= (1/Sqrt[2] - Cos[23*Pi/64]/2)/Sin[23*Pi/64] */ \ + sd -= (s2*4471 + 4096) >> 13; \ + /* 5701/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64])/Sin[7*Pi/64] */ \ + s3 -= (sc*5701 + 4096) >> 13; \ + /* 3903/8192 ~= Sqrt[2]*Sin[7*Pi/64] */ \ + sc -= (s3*3903 + 4096) >> 13; \ + /* 5747/8192 ~= (1/Sqrt[2] - Cos[7*Pi/64]/2)/Sin[7*Pi/64] */ \ + s3 += (sc*5747 + 4096) >> 13; \ + /* 7839/16384 ~= (1/Sqrt[2] - Cos[27*Pi/64])/Sin[27*Pi/64] */ \ + sb -= (s4*7839 + 8192) >> 14; \ + /* 5619/4096 ~= Sqrt[2]*Sin[27*Pi/64] */ \ + s4 += (sb*5619 + 2048) >> 12; \ + /* 2473/4096 ~= (1/Sqrt[2] - Cos[27*Pi/64]/2)/Sin[27*Pi/64] */ \ + sb -= (s4*2473 + 2048) >> 12; \ + /* 4641/8192 ~= (1/Sqrt[2] - Cos[29*Pi/64])/Sin[29*Pi/64] */ \ + s7 -= (s8*4641 + 4096) >> 13; \ + /* 2865/2048 ~= Sqrt[2]*Sin[29*Pi/64] */ \ + s8 += (s7*2865 + 1024) >> 11; \ + /* 41/64 ~= (1/Sqrt[2] - Cos[29*Pi/64]/2)/Sin[29*Pi/64] */ \ + s7 -= (s8*41 + 32) >> 6; \ + /* 5397/8192 ~= (Cos[Pi/4] - Cos[31*Pi/64])/Sin[31*Pi/64] */ \ + s0 += (sf*5397 + 4096) >> 13; \ + /* 2893/2048 ~= Sqrt[2]*Sin[31*Pi/64] */ \ + sf -= (s0*2893 + 1024) >> 11; \ + /* 2799/4096 ~= (1/Sqrt[2] - Cos[31*Pi/64]/2)/Sin[31*Pi/64] */ \ + s0 += (sf*2799 + 2048) >> 12; \ + sd -= OD_DCT_RSHIFT(sc, 1); \ + sc += sd; \ + s3 += OD_DCT_RSHIFT(s2, 1); \ + s2 = s3 - s2; \ + sb += OD_DCT_RSHIFT(sa, 1); \ + sa -= sb; \ + s5 = OD_DCT_RSHIFT(s4, 1) - s5; \ + s4 -= s5; \ + s7 = OD_DCT_RSHIFT(s9, 1) - s7; \ + s9 -= s7; \ + s6 -= OD_DCT_RSHIFT(s8, 1); \ + s8 += s6; \ + se = OD_DCT_RSHIFT(sf, 1) - se; \ + sf -= se; \ + s0 += OD_DCT_RSHIFT(s1, 1); \ + s1 -= s0; \ + s5 -= s9; \ + s9 += OD_DCT_RSHIFT(s5, 1); \ + sa = s6 - sa; \ + s6 -= OD_DCT_RSHIFT(sa, 1); \ + se += s2; \ + s2 -= OD_DCT_RSHIFT(se, 1); \ + s1 = sd - s1; \ + sd -= OD_DCT_RSHIFT(s1, 1); \ + s0 += s3; \ + s0h = OD_DCT_RSHIFT(s0, 1); \ + s3 = s0h - s3; \ + sf += sc; \ + sfh = OD_DCT_RSHIFT(sf, 1); \ + sc -= sfh; \ + sb = s7 - sb; \ + sbh = OD_DCT_RSHIFT(sb, 1); \ + s7 -= sbh; \ + s4 -= s8; \ + s4h = OD_DCT_RSHIFT(s4, 1); \ + s8 += s4h; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + se -= (s1*3227 + 16384) >> 15; \ + /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ + s1 += (se*6393 + 16384) >> 15; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + se -= (s1*3227 + 16384) >> 15; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + s6 -= (s9*2485 + 4096) >> 13; \ + /* 4551/8192 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + s9 += (s6*4551 + 4096) >> 13; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + s6 -= (s9*2485 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + s5 -= (sa*8757 + 8192) >> 14; \ + /* 6811/8192 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ + sa += (s5*6811 + 4096) >> 13; \ + /* 8757/16384 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ + s5 -= (sa*8757 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ + s2 -= (sd*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + sd += (s2*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + s2 -= (sd*6723 + 4096) >> 13; \ + s9 += OD_DCT_RSHIFT(se, 1); \ + se = s9 - se; \ + s6 += OD_DCT_RSHIFT(s1, 1); \ + s1 -= s6; \ + sd = OD_DCT_RSHIFT(sa, 1) - sd; \ + sa -= sd; \ + s2 += OD_DCT_RSHIFT(s5, 1); \ + s5 = s2 - s5; \ + s3 -= sbh; \ + sb += s3; \ + sc += s4h; \ + s4 = sc - s4; \ + s8 = s0h - s8; \ + s0 -= s8; \ + s7 = sfh - s7; \ + sf -= s7; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + s6 -= (s9*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ + s9 += (s6*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + s6 -= (s9*13573 + 16384) >> 15; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + s5 -= (sa*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ + sa += (s5*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + s5 -= (sa*13573 + 16384) >> 15; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + s3 -= (sc*3259 + 8192) >> 14; \ + /* 3135/8192 ~= Sin[Pi/8] ~= 0.382683432365090 */ \ + sc += (s3*3135 + 4096) >> 13; \ + /* 3259/16384 ~= Tan[Pi/16] ~= 0.198912367379658 */ \ + s3 -= (sc*3259 + 8192) >> 14; \ + /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + sb -= (s4*21895 + 16384) >> 15; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + s4 += (sb*15137 + 8192) >> 14; \ + /* 21895/32768 ~= Tan[3*Pi/16] ~= 0.668178637919299 */ \ + sb -= (s4*21895 + 16384) >> 15; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + s8 -= (s7*13573 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[pi/4] ~= 0.707106781186547 */ \ + s7 += (s8*11585 + 8192) >> 14; \ + /* 13573/32768 ~= Tan[pi/8] ~= 0.414213562373095 */ \ + s8 -= (s7*13573 + 16384) >> 15; \ + } \ + while (0) + +/* TODO: rewrite this to match OD_FDST_16. */ +#define OD_FDST_16_ASYM(t0, t0h, t8, t4, t4h, tc, t2, ta, t6, te, \ + t1, t9, t5, td, t3, tb, t7, t7h, tf) \ + /* Embedded 16-point asymmetric Type-IV fDST. */ \ + do { \ + int t2h; \ + int t3h; \ + int t6h; \ + int t8h; \ + int t9h; \ + int tch; \ + int tdh; \ + /* TODO: Can we move these into another operation */ \ + t8 = -t8; \ + t9 = -t9; \ + ta = -ta; \ + tb = -tb; \ + td = -td; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 136); \ + t1 -= (te*13573 + 8192) >> 14; \ + /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 11585, 16384, 137); \ + te += (t1*11585 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + OD_DCT_OVERFLOW_CHECK(te, 13573, 8192, 138); \ + t1 -= (te*13573 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(td, 4161, 8192, 139); \ + t2 += (td*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 15137, 8192, 140); \ + td -= (t2*15137 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(td, 14341, 8192, 141); \ + t2 += (td*14341 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 14341, 8192, 142); \ + tc -= (t3*14341 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(tc, 15137, 8192, 143); \ + t3 += (tc*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 4161, 8192, 144); \ + tc -= (t3*4161 + 8192) >> 14; \ + te = t0h - te; \ + t0 -= te; \ + tf = OD_DCT_RSHIFT(t1, 1) - tf; \ + t1 -= tf; \ + /* TODO: Can we move this into another operation */ \ + tc = -tc; \ + t2 = OD_DCT_RSHIFT(tc, 1) - t2; \ + tc -= t2; \ + t3 = OD_DCT_RSHIFT(td, 1) - t3; \ + td = t3 - td; \ + /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 7489, 4096, 145); \ + t9 -= (t6*7489 + 4096) >> 13; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 146); \ + t6 += (t9*11585 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 147); \ + t9 += (t6*19195 + 16384) >> 15; \ + t8 += OD_DCT_RSHIFT(t9, 1); \ + t9 -= t8; \ + t6 = t7h - t6; \ + t7 -= t6; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 148); \ + t8 += (t7*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + OD_DCT_OVERFLOW_CHECK(t8, 16069, 8192, 149); \ + t7 -= (t8*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 6723, 4096, 150); \ + t8 += (t7*6723 + 4096) >> 13; \ + /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 151); \ + t9 += (t6*17515 + 16384) >> 15; \ + /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ + OD_DCT_OVERFLOW_CHECK(t9, 13623, 8192, 152); \ + t6 -= (t9*13623 + 8192) >> 14; \ + /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 17515, 16384, 153); \ + t9 += (t6*17515 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 154); \ + t5 += (ta*13573 + 8192) >> 14; \ + /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 155); \ + ta -= (t5*11585 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 13573, 8192, 156); \ + t5 += (ta*13573 + 8192) >> 14; \ + tb += OD_DCT_RSHIFT(t5, 1); \ + t5 = tb - t5; \ + ta += t4h; \ + t4 -= ta; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 157); \ + ta += (t5*2485 + 4096) >> 13; \ + /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 18205, 16384, 158); \ + t5 -= (ta*18205 + 16384) >> 15; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 2485, 4096, 159); \ + ta += (t5*2485 + 4096) >> 13; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 160); \ + tb -= (t4*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + OD_DCT_OVERFLOW_CHECK(tb, 16069, 8192, 161); \ + t4 += (tb*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 6723, 4096, 162); \ + tb -= (t4*6723 + 4096) >> 13; \ + /* TODO: Can we move this into another operation */ \ + t5 = -t5; \ + tc -= tf; \ + tch = OD_DCT_RSHIFT(tc, 1); \ + tf += tch; \ + t3 += t0; \ + t3h = OD_DCT_RSHIFT(t3, 1); \ + t0 -= t3h; \ + td -= t1; \ + tdh = OD_DCT_RSHIFT(td, 1); \ + t1 += tdh; \ + t2 += te; \ + t2h = OD_DCT_RSHIFT(t2, 1); \ + te -= t2h; \ + t8 += t4; \ + t8h = OD_DCT_RSHIFT(t8, 1); \ + t4 = t8h - t4; \ + t7 = tb - t7; \ + t7h = OD_DCT_RSHIFT(t7, 1); \ + tb = t7h - tb; \ + t6 -= ta; \ + t6h = OD_DCT_RSHIFT(t6, 1); \ + ta += t6h; \ + t9 = t5 - t9; \ + t9h = OD_DCT_RSHIFT(t9, 1); \ + t5 -= t9h; \ + t0 -= t7h; \ + t7 += t0; \ + tf += t8h; \ + t8 -= tf; \ + te -= t6h; \ + t6 += te; \ + t1 += t9h; \ + t9 -= t1; \ + tb -= tch; \ + tc += tb; \ + t4 += t3h; \ + t3 -= t4; \ + ta -= tdh; \ + td += ta; \ + t5 = t2h - t5; \ + t2 -= t5; \ + /* TODO: Can we move these into another operation */ \ + t8 = -t8; \ + t9 = -t9; \ + ta = -ta; \ + tb = -tb; \ + tc = -tc; \ + td = -td; \ + tf = -tf; \ + /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ + OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 163); \ + t0 -= (tf*7799 + 4096) >> 13; \ + /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \ + OD_DCT_OVERFLOW_CHECK(t0, 4091, 2048, 164); \ + tf += (t0*4091 + 2048) >> 12; \ + /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ + OD_DCT_OVERFLOW_CHECK(tf, 7799, 4096, 165); \ + t0 -= (tf*7799 + 4096) >> 13; \ + /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ + OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 166); \ + t1 += (te*2417 + 16384) >> 15; \ + /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 601, 2048, 167); \ + te -= (t1*601 + 2048) >> 12; \ + /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ + OD_DCT_OVERFLOW_CHECK(te, 2417, 16384, 168); \ + t1 += (te*2417 + 16384) >> 15; \ + /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ + OD_DCT_OVERFLOW_CHECK(t8, 14525, 16384, 169); \ + t7 -= (t8*14525 + 16384) >> 15; \ + /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 3035, 2048, 170); \ + t8 += (t7*3035 + 2048) >> 12; \ + /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ + OD_DCT_OVERFLOW_CHECK(t8, 7263, 8192, 171); \ + t7 -= (t8*7263 + 8192) >> 14; \ + /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ + OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 172); \ + t2 -= (td*6393 + 4096) >> 13; \ + /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 3973, 2048, 173); \ + td += (t2*3973 + 2048) >> 12; \ + /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ + OD_DCT_OVERFLOW_CHECK(td, 6393, 4096, 174); \ + t2 -= (td*6393 + 4096) >> 13; \ + /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 175); \ + t5 -= (ta*9281 + 8192) >> 14; \ + /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 7027, 4096, 176); \ + ta += (t5*7027 + 4096) >> 13; \ + /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 9281, 8192, 177); \ + t5 -= (ta*9281 + 8192) >> 14; \ + /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ + OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 178); \ + t3 -= (tc*11539 + 8192) >> 14; \ + /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 7713, 4096, 179); \ + tc += (t3*7713 + 4096) >> 13; \ + /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ + OD_DCT_OVERFLOW_CHECK(tc, 11539, 8192, 180); \ + t3 -= (tc*11539 + 8192) >> 14; \ + /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ + OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 181); \ + t4 -= (tb*10375 + 8192) >> 14; \ + /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 7405, 4096, 182); \ + tb += (t4*7405 + 4096) >> 13; \ + /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ + OD_DCT_OVERFLOW_CHECK(tb, 10375, 8192, 183); \ + t4 -= (tb*10375 + 8192) >> 14; \ + /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ + OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 184); \ + t6 -= (t9*8247 + 8192) >> 14; \ + /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 1645, 1024, 185); \ + t9 += (t6*1645 + 1024) >> 11; \ + /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ + OD_DCT_OVERFLOW_CHECK(t9, 8247, 8192, 186); \ + t6 -= (t9*8247 + 8192) >> 14; \ + } \ + while (0) + +#define OD_IDST_16_ASYM(t0, t0h, t8, t4, tc, t2, t2h, ta, t6, te, teh, \ + t1, t9, t5, td, t3, tb, t7, tf) \ + /* Embedded 16-point asymmetric Type-IV iDST. */ \ + do { \ + int t1h_; \ + int t3h_; \ + int t4h; \ + int t6h; \ + int t9h_; \ + int tbh_; \ + int tch; \ + /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ + t6 += (t9*8247 + 8192) >> 14; \ + /* 1645/2048 ~= Sin[19*Pi/64] ~= 0.803207531480645 */ \ + t9 -= (t6*1645 + 1024) >> 11; \ + /* 8247/16384 ~= Tan[19*Pi/128] ~= 0.503357699799294 */ \ + t6 += (t9*8247 + 8192) >> 14; \ + /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ + t2 += (td*10375 + 8192) >> 14; \ + /* 7405/8192 ~= Sin[23*Pi/64] ~= 0.903989293123443 */ \ + td -= (t2*7405 + 4096) >> 13; \ + /* 10375/16384 ~= Tan[23*Pi/128] ~= 0.633243016177569 */ \ + t2 += (td*10375 + 8192) >> 14; \ + /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ + tc += (t3*11539 + 8192) >> 14; \ + /* 7713/8192 ~= Sin[25*Pi/64] ~= 0.941544065183021 */ \ + t3 -= (tc*7713 + 4096) >> 13; \ + /* 11539/16384 ~= Tan[25*Pi/128] ~= 0.704279460865044 */ \ + tc += (t3*11539 + 8192) >> 14; \ + /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ + ta += (t5*9281 + 8192) >> 14; \ + /* 7027/8192 ~= Sin[21*Pi/64] ~= 0.857728610000272 */ \ + t5 -= (ta*7027 + 4096) >> 13; \ + /* 9281/16384 ~= Tan[21*Pi/128] ~= 0.566493002730344 */ \ + ta += (t5*9281 + 8192) >> 14; \ + /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ + t4 += (tb*6393 + 4096) >> 13; \ + /* 3973/4096 ~= Sin[27*Pi/64] ~= 0.970031253194544 */ \ + tb -= (t4*3973 + 2048) >> 12; \ + /* 6393/8192 ~= Tan[27*Pi/128] ~= 0.780407659653944 */ \ + t4 += (tb*6393 + 4096) >> 13; \ + /* 7263/16384 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ + te += (t1*7263 + 8192) >> 14; \ + /* 3035/4096 ~= Sin[17*Pi/64] ~= 0.740951125354959 */ \ + t1 -= (te*3035 + 2048) >> 12; \ + /* 14525/32768 ~= Tan[17*Pi/128] ~= 0.443269513890864 */ \ + te += (t1*14525 + 16384) >> 15; \ + /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ + t8 -= (t7*2417 + 16384) >> 15; \ + /* 601/4096 ~= Sin[3*Pi/64] ~= 0.146730474455362 */ \ + t7 += (t8*601 + 2048) >> 12; \ + /* 2417/32768 ~= Tan[3*Pi/128] ~= 0.0737644315224493 */ \ + t8 -= (t7*2417 + 16384) >> 15; \ + /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ + t0 += (tf*7799 + 4096) >> 13; \ + /* 4091/4096 ~= Sin[31*Pi/64] ~= 0.998795456205172 */ \ + tf -= (t0*4091 + 2048) >> 12; \ + /* 7799/8192 ~= Tan[31*Pi/128] ~= 0.952079146700925 */ \ + t0 += (tf*7799 + 4096) >> 13; \ + /* TODO: Can we move these into another operation */ \ + t1 = -t1; \ + t3 = -t3; \ + t5 = -t5; \ + t9 = -t9; \ + tb = -tb; \ + td = -td; \ + tf = -tf; \ + t4 += ta; \ + t4h = OD_DCT_RSHIFT(t4, 1); \ + ta = t4h - ta; \ + tb -= t5; \ + tbh_ = OD_DCT_RSHIFT(tb, 1); \ + t5 += tbh_; \ + tc += t2; \ + tch = OD_DCT_RSHIFT(tc, 1); \ + t2 -= tch; \ + t3 -= td; \ + t3h_ = OD_DCT_RSHIFT(t3, 1); \ + td += t3h_; \ + t9 += t8; \ + t9h_ = OD_DCT_RSHIFT(t9, 1); \ + t8 -= t9h_; \ + t6 -= t7; \ + t6h = OD_DCT_RSHIFT(t6, 1); \ + t7 += t6h; \ + t1 += tf; \ + t1h_ = OD_DCT_RSHIFT(t1, 1); \ + tf -= t1h_; \ + te -= t0; \ + teh = OD_DCT_RSHIFT(te, 1); \ + t0 += teh; \ + ta += t9h_; \ + t9 = ta - t9; \ + t5 -= t6h; \ + t6 += t5; \ + td = teh - td; \ + te = td - te; \ + t2 = t1h_ - t2; \ + t1 -= t2; \ + t7 += t4h; \ + t4 -= t7; \ + t8 -= tbh_; \ + tb += t8; \ + t0 += tch; \ + tc -= t0; \ + tf -= t3h_; \ + t3 += tf; \ + /* TODO: Can we move this into another operation */ \ + ta = -ta; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + td += (t2*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + t2 -= (td*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.820678790828660 */ \ + td += (t2*6723 + 4096) >> 13; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + t5 -= (ta*2485 + 4096) >> 13; \ + /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + ta += (t5*18205 + 16384) >> 15; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + t5 -= (ta*2485 + 4096) >> 13; \ + t2 += t5; \ + t2h = OD_DCT_RSHIFT(t2, 1); \ + t5 -= t2h; \ + ta = td - ta; \ + td -= OD_DCT_RSHIFT(ta, 1); \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + ta -= (t5*13573 + 8192) >> 14; \ + /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ + t5 += (ta*11585 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + ta -= (t5*13573 + 8192) >> 14; \ + /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.534511135950792 */ \ + t9 -= (t6*17515 + 16384) >> 15; \ + /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.831469612302545 */ \ + t6 += (t9*13623 + 8192) >> 14; \ + /* 17515/32768 ~= Tan[5*Pi/32]) ~= 0.534511135950792 */ \ + t9 -= (t6*17515 + 16384) >> 15; \ + /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ + t1 -= (te*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.980785280403230 */ \ + te += (t1*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32]) ~= 0.820678790828660 */ \ + t1 -= (te*6723 + 4096) >> 13; \ + te += t6; \ + teh = OD_DCT_RSHIFT(te, 1); \ + t6 = teh - t6; \ + t9 += t1; \ + t1 -= OD_DCT_RSHIFT(t9, 1); \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + t9 -= (t6*19195 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + t6 -= (t9*11585 + 8192) >> 14; \ + /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + t9 += (t6*7489 + 4096) >> 13; \ + tb = tc - tb; \ + tc = OD_DCT_RSHIFT(tb, 1) - tc; \ + t3 += t4; \ + t4 = OD_DCT_RSHIFT(t3, 1) - t4; \ + /* TODO: Can we move this into another operation */ \ + t3 = -t3; \ + t8 += tf; \ + tf = OD_DCT_RSHIFT(t8, 1) - tf; \ + t0 += t7; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t7 = t0h - t7; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + t3 += (tc*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + tc -= (t3*15137 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + t3 += (tc*14341 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + t4 -= (tb*14341 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + tb += (t4*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + t4 -= (tb*4161 + 8192) >> 14; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + t8 += (t7*13573 + 8192) >> 14; \ + /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ + t7 -= (t8*11585 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + t8 += (t7*13573 + 8192) >> 14; \ + /* TODO: Can we move these into another operation */ \ + t1 = -t1; \ + t5 = -t5; \ + t9 = -t9; \ + tb = -tb; \ + td = -td; \ + } \ + while (0) + +#define OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \ + te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ + /* Embedded 32-point orthonormal Type-II fDCT. */ \ + do { \ + int tgh; \ + int thh; \ + int tih; \ + int tkh; \ + int tmh; \ + int tnh; \ + int toh; \ + int tqh; \ + int tsh; \ + int tuh; \ + int tvh; \ + tv = t0 - tv; \ + tvh = OD_DCT_RSHIFT(tv, 1); \ + t0 -= tvh; \ + tu += t1; \ + tuh = OD_DCT_RSHIFT(tu, 1); \ + t1 = tuh - t1; \ + tt = t2 - tt; \ + t2 -= OD_DCT_RSHIFT(tt, 1); \ + ts += t3; \ + tsh = OD_DCT_RSHIFT(ts, 1); \ + t3 = tsh - t3; \ + tr = t4 - tr; \ + t4 -= OD_DCT_RSHIFT(tr, 1); \ + tq += t5; \ + tqh = OD_DCT_RSHIFT(tq, 1); \ + t5 = tqh - t5; \ + tp = t6 - tp; \ + t6 -= OD_DCT_RSHIFT(tp, 1); \ + to += t7; \ + toh = OD_DCT_RSHIFT(to, 1); \ + t7 = toh - t7; \ + tn = t8 - tn; \ + tnh = OD_DCT_RSHIFT(tn, 1); \ + t8 -= tnh; \ + tm += t9; \ + tmh = OD_DCT_RSHIFT(tm, 1); \ + t9 = tmh - t9; \ + tl = ta - tl; \ + ta -= OD_DCT_RSHIFT(tl, 1); \ + tk += tb; \ + tkh = OD_DCT_RSHIFT(tk, 1); \ + tb = tkh - tb; \ + tj = tc - tj; \ + tc -= OD_DCT_RSHIFT(tj, 1); \ + ti += td; \ + tih = OD_DCT_RSHIFT(ti, 1); \ + td = tih - td; \ + th = te - th; \ + thh = OD_DCT_RSHIFT(th, 1); \ + te -= thh; \ + tg += tf; \ + tgh = OD_DCT_RSHIFT(tg, 1); \ + tf = tgh - tf; \ + OD_FDCT_16_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \ + t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh); \ + OD_FDST_16_ASYM(tv, tvh, tf, tn, tnh, t7, tr, tb, tj, t3, \ + tt, td, tl, t5, tp, t9, th, thh, t1); \ + } \ + while (0) + +#define OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, \ + te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ + /* Embedded 32-point orthonormal Type-II iDCT. */ \ + do { \ + int t1h; \ + int t3h; \ + int t5h; \ + int t7h; \ + int t9h; \ + int tbh; \ + int tdh; \ + int tfh; \ + int thh; \ + int tth; \ + int tvh; \ + OD_IDST_16_ASYM(tv, tvh, tn, tr, tj, tt, tth, tl, tp, th, thh, \ + tu, tm, tq, ti, ts, tk, to, tg); \ + OD_IDCT_16_ASYM(t0, t8, t4, tc, t2, ta, t6, te, \ + t1, t1h, t9, t9h, t5, t5h, td, tdh, t3, t3h, tb, tbh, t7, t7h, tf, tfh); \ + tu = t1h - tu; \ + t1 -= tu; \ + te += thh; \ + th = te - th; \ + tm = t9h - tm; \ + t9 -= tm; \ + t6 += OD_DCT_RSHIFT(tp, 1); \ + tp = t6 - tp; \ + tq = t5h - tq; \ + t5 -= tq; \ + ta += OD_DCT_RSHIFT(tl, 1); \ + tl = ta - tl; \ + ti = tdh - ti; \ + td -= ti; \ + t2 += tth; \ + tt = t2 - tt; \ + ts = t3h - ts; \ + t3 -= ts; \ + tc += OD_DCT_RSHIFT(tj, 1); \ + tj = tc - tj; \ + tk = tbh - tk; \ + tb -= tk; \ + t4 += OD_DCT_RSHIFT(tr, 1); \ + tr = t4 - tr; \ + to = t7h - to; \ + t7 -= to; \ + t8 += OD_DCT_RSHIFT(tn, 1); \ + tn = t8 - tn; \ + tg = tfh - tg; \ + tf -= tg; \ + t0 += tvh; \ + tv = t0 - tv; \ + } \ + while (0) + +#if CONFIG_TX64X64 +#define OD_FDCT_32_ASYM(t0, tg, tgh, t8, to, toh, t4, tk, tkh, tc, ts, tsh, \ + t2, ti, tih, ta, tq, tqh, t6, tm, tmh, te, tu, tuh, t1, th, thh, \ + t9, tp, tph, t5, tl, tlh, td, tt, tth, t3, tj, tjh, tb, tr, trh, \ + t7, tn, tnh, tf, tv, tvh) \ + /* Embedded 32-point asymmetric Type-II fDCT. */ \ + do { \ + t0 += tvh; \ + tv = t0 - tv; \ + t1 = tuh - t1; \ + tu -= t1; \ + t2 += tth; \ + tt = t2 - tt; \ + t3 = tsh - t3; \ + ts -= t3; \ + t4 += trh; \ + tr = t4 - tr; \ + t5 = tqh - t5; \ + tq -= t5; \ + t6 += tph; \ + tp = t6 - tp; \ + t7 = toh - t7; \ + to -= t7; \ + t8 += tnh; \ + tn = t8 - tn; \ + t9 = tmh - t9; \ + tm -= t9; \ + ta += tlh; \ + tl = ta - tl; \ + tb = tkh - tb; \ + tk -= tb; \ + tc += tjh; \ + tj = tc - tj; \ + td = tih - td; \ + ti -= td; \ + te += thh; \ + th = te - th; \ + tf = tgh - tf; \ + tg -= tf; \ + OD_FDCT_16(t0, tg, t8, to, t4, tk, tc, ts, \ + t2, ti, ta, tq, t6, tm, te, tu); \ + OD_FDST_16(tv, tf, tn, t7, tr, tb, tj, t3, \ + tt, td, tl, t5, tp, t9, th, t1); \ + } \ + while (0) + +#define OD_IDCT_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, \ + t6, tm, te, tu, t1, t1h, th, thh, t9, t9h, tp, tph, t5, t5h, tl, tlh, \ + td, tdh, tt, tth, t3, t3h, tj, tjh, tb, tbh, tr, trh, t7, t7h, tn, tnh, \ + tf, tfh, tv, tvh) \ + /* Embedded 32-point asymmetric Type-II iDCT. */ \ + do { \ + OD_IDST_16(tv, tn, tr, tj, tt, tl, tp, th, \ + tu, tm, tq, ti, ts, tk, to, tg); \ + OD_IDCT_16(t0, t8, t4, tc, t2, ta, t6, te, \ + t1, t9, t5, td, t3, tb, t7, tf); \ + tv = t0 - tv; \ + tvh = OD_DCT_RSHIFT(tv, 1); \ + t0 -= tvh; \ + t1 += tu; \ + t1h = OD_DCT_RSHIFT(t1, 1); \ + tu = t1h - tu; \ + tt = t2 - tt; \ + tth = OD_DCT_RSHIFT(tt, 1); \ + t2 -= tth; \ + t3 += ts; \ + t3h = OD_DCT_RSHIFT(t3, 1); \ + ts = t3h - ts; \ + tr = t4 - tr; \ + trh = OD_DCT_RSHIFT(tr, 1); \ + t4 -= trh; \ + t5 += tq; \ + t5h = OD_DCT_RSHIFT(t5, 1); \ + tq = t5h - tq; \ + tp = t6 - tp; \ + tph = OD_DCT_RSHIFT(tp, 1); \ + t6 -= tph; \ + t7 += to; \ + t7h = OD_DCT_RSHIFT(t7, 1); \ + to = t7h - to; \ + tn = t8 - tn; \ + tnh = OD_DCT_RSHIFT(tn, 1); \ + t8 -= tnh; \ + t9 += tm; \ + t9h = OD_DCT_RSHIFT(t9, 1); \ + tm = t9h - tm; \ + tl = ta - tl; \ + tlh = OD_DCT_RSHIFT(tl, 1); \ + ta -= tlh; \ + tb += tk; \ + tbh = OD_DCT_RSHIFT(tb, 1); \ + tk = tbh - tk; \ + tj = tc - tj; \ + tjh = OD_DCT_RSHIFT(tj, 1); \ + tc -= tjh; \ + td += ti; \ + tdh = OD_DCT_RSHIFT(td, 1); \ + ti = tdh - ti; \ + th = te - th; \ + thh = OD_DCT_RSHIFT(th, 1); \ + te -= thh; \ + tf += tg; \ + tfh = OD_DCT_RSHIFT(tf, 1); \ + tg = tfh - tg; \ + } \ + while (0) + +#define OD_FDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \ + tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ + /* Embedded 32-point asymmetric Type-IV fDST. */ \ + do { \ + int t0h; \ + int t1h; \ + int t4h; \ + int t5h; \ + int tqh; \ + int trh; \ + int tuh; \ + int tvh; \ + \ + tu = -tu; \ + \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 271); \ + t5 -= (tq*13573 + 8192) >> 14; \ + /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 11585, 16384, 272); \ + tq += (t5*11585 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + OD_DCT_OVERFLOW_CHECK(tq, 13573, 8192, 273); \ + t5 -= (tq*13573 + 8192) >> 14; \ + /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 29957, 16384, 274); \ + tp += (t6*29957 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + OD_DCT_OVERFLOW_CHECK(tp, 11585, 8192, 275); \ + t6 -= (tp*11585 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 19195, 16384, 276); \ + tp -= (t6*19195 + 16384) >> 15; \ + /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 29957, 16384, 277); \ + tu += (t1*29957 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + OD_DCT_OVERFLOW_CHECK(tu, 11585, 8192, 278); \ + t1 -= (tu*11585 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + OD_DCT_OVERFLOW_CHECK(t1, 19195, 16384, 279); \ + tu -= (t1*19195 + 16384) >> 15; \ + /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 28681, 16384, 280); \ + tt += (t2*28681 + 16384) >> 15; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(tt, 15137, 8192, 281); \ + t2 -= (tt*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(t2, 4161, 8192, 282); \ + tt += (t2*4161 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(ts, 4161, 8192, 283); \ + t3 += (ts*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(t3, 15137, 8192, 284); \ + ts -= (t3*15137 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(ts, 14341, 8192, 285); \ + t3 += (ts*14341 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + OD_DCT_OVERFLOW_CHECK(tm, 19195, 16384, 286); \ + t9 -= (tm*19195 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + OD_DCT_OVERFLOW_CHECK(t9, 11585, 8192, 287); \ + tm -= (t9*11585 + 8192) >> 14; \ + /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(tm, 7489, 4096, 288); \ + t9 += (tm*7489 + 4096) >> 13; \ + /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ + OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 289); \ + ta += (tl*3259 + 4096) >> 13; \ + /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 3135, 8192, 290); \ + tl -= (ta*3135 + 8192) >> 14; \ + /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ + OD_DCT_OVERFLOW_CHECK(tl, 3259, 4096, 291); \ + ta += (tl*3259 + 4096) >> 13; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(tk, 4161, 8192, 292); \ + tb += (tk*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(tb, 15137, 8192, 293); \ + tk -= (tb*15137 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(tk, 14341, 8192, 294); \ + tb += (tk*14341 + 8192) >> 14; \ + /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + OD_DCT_OVERFLOW_CHECK(te, 29957, 16384, 295); \ + th += (te*29957 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + OD_DCT_OVERFLOW_CHECK(th, 11585, 8192, 296); \ + te -= (th*11585 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + OD_DCT_OVERFLOW_CHECK(te, 19195, 16384, 297); \ + th -= (te*19195 + 16384) >> 15; \ + /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(tc, 28681, 16384, 298); \ + tj += (tc*28681 + 16384) >> 15; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(tj, 15137, 8192, 299); \ + tc -= (tj*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(tc, 4161, 8192, 300); \ + tj += (tc*4161 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + OD_DCT_OVERFLOW_CHECK(ti, 4161, 8192, 301); \ + td += (ti*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + OD_DCT_OVERFLOW_CHECK(td, 15137, 8192, 302); \ + ti -= (td*15137 + 8192) >> 14; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + OD_DCT_OVERFLOW_CHECK(ti, 14341, 8192, 303); \ + td += (ti*14341 + 8192) >> 14; \ + \ + t1 = -t1; \ + t2 = -t2; \ + t3 = -t3; \ + td = -td; \ + tg = -tg; \ + to = -to; \ + ts = -ts; \ + \ + tr -= OD_DCT_RSHIFT(t5, 1); \ + t5 += tr; \ + tq -= OD_DCT_RSHIFT(t4, 1); /* pass */ \ + t4 += tq; \ + t6 -= OD_DCT_RSHIFT(t7, 1); \ + t7 += t6; \ + to -= OD_DCT_RSHIFT(tp, 1); /* pass */ \ + tp += to; \ + t1 += OD_DCT_RSHIFT(t0, 1); /* pass */ \ + t0 -= t1; \ + tv -= OD_DCT_RSHIFT(tu, 1); \ + tu += tv; \ + t3 -= OD_DCT_RSHIFT(tt, 1); \ + tt += t3; \ + t2 += OD_DCT_RSHIFT(ts, 1); \ + ts -= t2; \ + t9 -= OD_DCT_RSHIFT(t8, 1); /* pass */ \ + t8 += t9; \ + tn += OD_DCT_RSHIFT(tm, 1); \ + tm -= tn; \ + tb += OD_DCT_RSHIFT(ta, 1); \ + ta -= tb; \ + tl -= OD_DCT_RSHIFT(tk, 1); \ + tk += tl; \ + te -= OD_DCT_RSHIFT(tf, 1); /* pass */ \ + tf += te; \ + tg -= OD_DCT_RSHIFT(th, 1); \ + th += tg; \ + tc -= OD_DCT_RSHIFT(ti, 1); \ + ti += tc; \ + td += OD_DCT_RSHIFT(tj, 1); \ + tj -= td; \ + \ + t4 = -t4; \ + \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ + OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 304); \ + t4 += (tr*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \ + OD_DCT_OVERFLOW_CHECK(t4, 16069, 8192, 305); \ + tr -= (t4*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ + OD_DCT_OVERFLOW_CHECK(tr, 6723, 4096, 306); \ + t4 += (tr*6723 + 4096) >> 13; \ + /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ + OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 307); \ + t5 += (tq*17515 + 16384) >> 15; \ + /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \ + OD_DCT_OVERFLOW_CHECK(t5, 13623, 8192, 308); \ + tq -= (t5*13623 + 8192) >> 14; \ + /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ + OD_DCT_OVERFLOW_CHECK(tq, 17515, 16384, 309); \ + t5 += (tq*17515 + 16384) >> 15; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 310); \ + t7 += (to*3227 + 16384) >> 15; \ + /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ + OD_DCT_OVERFLOW_CHECK(t7, 6393, 16384, 311); \ + to -= (t7*6393 + 16384) >> 15; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + OD_DCT_OVERFLOW_CHECK(to, 3227, 16384, 312); \ + t7 += (to*3227 + 16384) >> 15; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 313); \ + t6 += (tp*2485 + 4096) >> 13; \ + /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + OD_DCT_OVERFLOW_CHECK(t6, 18205, 16384, 314); \ + tp -= (t6*18205 + 16384) >> 15; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + OD_DCT_OVERFLOW_CHECK(tp, 2485, 4096, 315); \ + t6 += (tp*2485 + 4096) >> 13; \ + \ + t5 = -t5; \ + \ + tr += to; \ + trh = OD_DCT_RSHIFT(tr, 1); \ + to -= trh; \ + t4 += t7; \ + t4h = OD_DCT_RSHIFT(t4, 1); \ + t7 -= t4h; \ + t5 += tp; \ + t5h = OD_DCT_RSHIFT(t5, 1); \ + tp -= t5h; \ + tq += t6; \ + tqh = OD_DCT_RSHIFT(tq, 1); \ + t6 -= tqh; \ + t0 -= t3; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t3 += t0h; \ + tv -= ts; \ + tvh = OD_DCT_RSHIFT(tv, 1); \ + ts += tvh; \ + tu += tt; \ + tuh = OD_DCT_RSHIFT(tu, 1); \ + tt -= tuh; \ + t1 -= t2; \ + t1h = OD_DCT_RSHIFT(t1, 1); \ + t2 += t1h; \ + t8 += tb; \ + tb -= OD_DCT_RSHIFT(t8, 1); \ + tn += tk; \ + tk -= OD_DCT_RSHIFT(tn, 1); \ + t9 += tl; \ + tl -= OD_DCT_RSHIFT(t9, 1); \ + tm -= ta; \ + ta += OD_DCT_RSHIFT(tm, 1); \ + tc -= tf; \ + tf += OD_DCT_RSHIFT(tc, 1); \ + tj += tg; \ + tg -= OD_DCT_RSHIFT(tj, 1); \ + td -= te; \ + te += OD_DCT_RSHIFT(td, 1); \ + ti += th; \ + th -= OD_DCT_RSHIFT(ti, 1); \ + \ + t9 = -t9; \ + tl = -tl; \ + \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 316); \ + t8 += (tn*805 + 8192) >> 14; \ + /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ + OD_DCT_OVERFLOW_CHECK(t8, 803, 4096, 317); \ + tn -= (t8*803 + 4096) >> 13; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + OD_DCT_OVERFLOW_CHECK(tn, 805, 8192, 318); \ + t8 += (tn*805 + 8192) >> 14; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 319); \ + tk += (tb*11725 + 16384) >> 15; \ + /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ + OD_DCT_OVERFLOW_CHECK(tk, 5197, 4096, 320); \ + tb -= (tk*5197 + 4096) >> 13; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + OD_DCT_OVERFLOW_CHECK(tb, 11725, 16384, 321); \ + tk += (tb*11725 + 16384) >> 15; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ + OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 322); \ + ta += (tl*2455 + 2048) >> 12; \ + /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ + OD_DCT_OVERFLOW_CHECK(ta, 14449, 8192, 323); \ + tl -= (ta*14449 + 8192) >> 14; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ + OD_DCT_OVERFLOW_CHECK(tl, 2455, 2048, 324); \ + ta += (tl*2455 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 325); \ + t9 += (tm*4861 + 16384) >> 15; \ + /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ + OD_DCT_OVERFLOW_CHECK(t9, 1189, 2048, 326); \ + tm -= (t9*1189 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + OD_DCT_OVERFLOW_CHECK(tm, 4861, 16384, 327); \ + t9 += (tm*4861 + 16384) >> 15; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 328); \ + tf += (tg*805 + 8192) >> 14; \ + /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ + OD_DCT_OVERFLOW_CHECK(tf, 803, 4096, 329); \ + tg -= (tf*803 + 4096) >> 13; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + OD_DCT_OVERFLOW_CHECK(tg, 805, 8192, 330); \ + tf += (tg*805 + 8192) >> 14; \ + /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 331); \ + tc += (tj*2931 + 4096) >> 13; \ + /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ + OD_DCT_OVERFLOW_CHECK(tc, 5197, 4096, 332); \ + tj -= (tc*5197 + 4096) >> 13; \ + /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + OD_DCT_OVERFLOW_CHECK(tj, 2931, 4096, 333); \ + tc += (tj*2931 + 4096) >> 13; \ + /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ + OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 334); \ + td += (ti*513 + 1024) >> 11; \ + /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \ + OD_DCT_OVERFLOW_CHECK(td, 7723, 8192, 335); \ + ti -= (td*7723 + 8192) >> 14; \ + /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ + OD_DCT_OVERFLOW_CHECK(ti, 513, 1024, 336); \ + td += (ti*513 + 1024) >> 11; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 337); \ + te += (th*4861 + 16384) >> 15; \ + /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ + OD_DCT_OVERFLOW_CHECK(te, 1189, 2048, 338); \ + th -= (te*1189 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + OD_DCT_OVERFLOW_CHECK(th, 4861, 16384, 339); \ + te += (th*4861 + 16384) >> 15; \ + \ + ta = -ta; \ + tb = -tb; \ + \ + tt += t5h; \ + t5 -= tt; \ + t2 -= tqh; \ + tq += t2; \ + tp += t1h; \ + t1 -= tp; \ + t6 -= tuh; \ + tu += t6; \ + t7 += tvh; \ + tv -= t7; \ + to += t0h; \ + t0 -= to; \ + t3 -= t4h; \ + t4 += t3; \ + ts += trh; \ + tr -= ts; \ + tf -= OD_DCT_RSHIFT(tn, 1); \ + tn += tf; \ + tg -= OD_DCT_RSHIFT(t8, 1); \ + t8 += tg; \ + tk += OD_DCT_RSHIFT(tc, 1); \ + tc -= tk; \ + tb += OD_DCT_RSHIFT(tj, 1); \ + tj -= tb; \ + ta += OD_DCT_RSHIFT(ti, 1); \ + ti -= ta; \ + tl += OD_DCT_RSHIFT(td, 1); \ + td -= tl; \ + te -= OD_DCT_RSHIFT(tm, 1); \ + tm += te; \ + th -= OD_DCT_RSHIFT(t9, 1); \ + t9 += th; \ + ta -= t5; \ + t5 += OD_DCT_RSHIFT(ta, 1); \ + tq -= tl; \ + tl += OD_DCT_RSHIFT(tq, 1); \ + t2 -= ti; \ + ti += OD_DCT_RSHIFT(t2, 1); \ + td -= tt; \ + tt += OD_DCT_RSHIFT(td, 1); \ + tm += tp; \ + tp -= OD_DCT_RSHIFT(tm, 1); \ + t6 += t9; \ + t9 -= OD_DCT_RSHIFT(t6, 1); \ + te -= tu; \ + tu += OD_DCT_RSHIFT(te, 1); \ + t1 -= th; \ + th += OD_DCT_RSHIFT(t1, 1); \ + t0 -= tg; \ + tg += OD_DCT_RSHIFT(t0, 1); \ + tf += tv; \ + tv -= OD_DCT_RSHIFT(tf, 1); \ + t8 -= t7; \ + t7 += OD_DCT_RSHIFT(t8, 1); \ + to -= tn; \ + tn += OD_DCT_RSHIFT(to, 1); \ + t4 -= tk; \ + tk += OD_DCT_RSHIFT(t4, 1); \ + tb -= tr; \ + tr += OD_DCT_RSHIFT(tb, 1); \ + t3 -= tj; \ + tj += OD_DCT_RSHIFT(t3, 1); \ + tc -= ts; \ + ts += OD_DCT_RSHIFT(tc, 1); \ + \ + tr = -tr; \ + ts = -ts; \ + tt = -tt; \ + tu = -tu; \ + \ + /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t0, 2847, 2048, 340); \ + tv += (t0*2847 + 2048) >> 12; \ + /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tv, 5791, 2048, 341); \ + t0 -= (tv*5791 + 2048) >> 12; \ + /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t0, 5593, 4096, 342); \ + tv += (t0*5593 + 4096) >> 13; \ + /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tf, 4099, 4096, 343); \ + tg -= (tf*4099 + 4096) >> 13; \ + /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tg, 1997, 1024, 344); \ + tf += (tg*1997 + 1024) >> 11; \ + /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tf, 815, 16384, 345); \ + tg += (tf*815 + 16384) >> 15; \ + /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t8, 2527, 2048, 346); \ + tn -= (t8*2527 + 2048) >> 12; \ + /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tn, 4695, 4096, 347); \ + t8 += (tn*4695 + 4096) >> 13; \ + /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t8, 4187, 4096, 348); \ + tn += (t8*4187 + 4096) >> 13; \ + /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(to, 5477, 4096, 349); \ + t7 += (to*5477 + 4096) >> 13; \ + /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t7, 4169, 4096, 350); \ + to -= (t7*4169 + 4096) >> 13; \ + /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(to, 2571, 2048, 351); \ + t7 -= (to*2571 + 2048) >> 12; \ + /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t2, 5331, 4096, 352); \ + tt += (t2*5331 + 4096) >> 13; \ + /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tt, 5749, 2048, 353); \ + t2 -= (tt*5749 + 2048) >> 12; \ + /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t2, 2413, 2048, 354); \ + tt += (t2*2413 + 2048) >> 12; \ + /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(td, 4167, 4096, 355); \ + ti -= (td*4167 + 4096) >> 13; \ + /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(ti, 891, 512, 356); \ + td += (ti*891 + 512) >> 10; \ + /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(td, 4327, 16384, 357); \ + ti += (td*4327 + 16384) >> 15; \ + /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(ta, 2261, 2048, 358); \ + tl -= (ta*2261 + 2048) >> 12; \ + /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tl, 2855, 2048, 359); \ + ta += (tl*2855 + 2048) >> 12; \ + /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(ta, 5417, 8192, 360); \ + tl += (ta*5417 + 8192) >> 14; \ + /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tq, 3459, 2048, 361); \ + t5 += (tq*3459 + 2048) >> 12; \ + /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t5, 1545, 2048, 362); \ + tq -= (t5*1545 + 2048) >> 12; \ + /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tq, 1971, 1024, 363); \ + t5 -= (tq*1971 + 1024) >> 11; \ + /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t3, 323, 256, 364); \ + ts += (t3*323 + 256) >> 9; \ + /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(ts, 5707, 2048, 365); \ + t3 -= (ts*5707 + 2048) >> 12; \ + /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t3, 2229, 2048, 366); \ + ts += (t3*2229 + 2048) >> 12; \ + /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tc, 1061, 1024, 367); \ + tj -= (tc*1061 + 1024) >> 11; \ + /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tj, 6671, 4096, 368); \ + tc += (tj*6671 + 4096) >> 13; \ + /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tc, 6287, 16384, 369); \ + tj += (tc*6287 + 16384) >> 15; \ + /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tb, 4359, 4096, 370); \ + tk -= (tb*4359 + 4096) >> 13; \ + /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tk, 3099, 2048, 371); \ + tb += (tk*3099 + 2048) >> 12; \ + /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tb, 2109, 4096, 372); \ + tk += (tb*2109 + 4096) >> 13; \ + /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t4, 5017, 4096, 373); \ + tr += (t4*5017 + 4096) >> 13; \ + /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tr, 1413, 512, 374); \ + t4 -= (tr*1413 + 512) >> 10; \ + /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t4, 8195, 8192, 375); \ + tr += (t4*8195 + 8192) >> 14; \ + /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tm, 2373, 2048, 376); \ + t9 += (tm*2373 + 2048) >> 12; \ + /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t9, 5209, 4096, 377); \ + tm -= (t9*5209 + 4096) >> 13; \ + /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tm, 3391, 4096, 378); \ + t9 -= (tm*3391 + 4096) >> 13; \ + /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t6, 1517, 1024, 379); \ + tp -= (t6*1517 + 1024) >> 11; \ + /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tp, 1817, 2048, 380); \ + t6 += (tp*1817 + 2048) >> 12; \ + /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t6, 6331, 4096, 381); \ + tp += (t6*6331 + 4096) >> 13; \ + /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(te, 515, 512, 382); \ + th -= (te*515 + 512) >> 10; \ + /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(th, 7567, 4096, 383); \ + te += (th*7567 + 4096) >> 13; \ + /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(te, 2513, 16384, 384); \ + th += (te*2513 + 16384) >> 15; \ + /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t1, 2753, 2048, 385); \ + tu += (t1*2753 + 2048) >> 12; \ + /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(tu, 5777, 2048, 386); \ + t1 -= (tu*5777 + 2048) >> 12; \ + /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \ + OD_DCT_OVERFLOW_CHECK(t1, 1301, 1024, 387); \ + tu += (t1*1301 + 1024) >> 11; \ + } \ + while (0) + +#define OD_IDST_32_ASYM(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, \ + tm, te, tu, t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv) \ + /* Embedded 32-point asymmetric Type-IV iDST. */ \ + do { \ + int t0h; \ + int t4h; \ + int tbh; \ + int tfh; \ + int tgh; \ + int tkh; \ + int trh; \ + int tvh; \ + /* 1301/2048 ~= (1/Sqrt[2] - Cos[61*Pi/128])/Sin[61*Pi/128] */ \ + tf -= (tg*1301 + 1024) >> 11; \ + /* 5777/4096 ~= Sqrt[2]*Sin[61*Pi/128] */ \ + tg += (tf*5777 + 2048) >> 12; \ + /* 2753/4096 ~= (1/Sqrt[2] - Cos[61*Pi/128]/2)/Sin[61*Pi/128] */ \ + tf -= (tg*2753 + 2048) >> 12; \ + /* -2513/32768 ~= (1/Sqrt[2] - Cos[29*Pi/128])/Sin[29*Pi/128] */ \ + th -= (te*2513 + 16384) >> 15; \ + /* 7567/8192 ~= Sqrt[2]*Sin[29*Pi/128] */ \ + te -= (th*7567 + 4096) >> 13; \ + /* 515/1024 ~= (1/Sqrt[2] - Cos[29*Pi/128]/2)/Sin[29*Pi/128] */ \ + th += (te*515 + 512) >> 10; \ + /* -6331/8192 ~= (1/Sqrt[2] - Cos[13*Pi/128])/Sin[13*Pi/128] */ \ + tj -= (tc*6331 + 4096) >> 13; \ + /* 1817/4096 ~= Sqrt[2]*Sin[13*Pi/128] */ \ + tc -= (tj*1817 + 2048) >> 12; \ + /* 1517/2048 ~= (1/Sqrt[2] - Cos[13*Pi/128]/2)/Sin[13*Pi/128] */ \ + tj += (tc*1517 + 1024) >> 11; \ + /* -3391/8192 ~= (1/Sqrt[2] - Cos[19*Pi/128])/Sin[19*Pi/128] */ \ + ti += (td*3391 + 4096) >> 13; \ + /* 5209/8192 ~= Sqrt[2]*Sin[19*Pi/128] */ \ + td += (ti*5209 + 4096) >> 13; \ + /* 2373/4096 ~= (1/Sqrt[2] - Cos[19*Pi/128]/2)/Sin[19*Pi/128] */ \ + ti -= (td*2373 + 2048) >> 12; \ + /* 8195/16384 ~= (1/Sqrt[2] - Cos[55*Pi/128])/Sin[55*Pi/128] */ \ + tr -= (t4*8195 + 8192) >> 14; \ + /* 1413/1024 ~= Sqrt[2]*Sin[55*Pi/128] */ \ + t4 += (tr*1413 + 512) >> 10; \ + /* 5017/8192 ~= (1/Sqrt[2] - Cos[55*Pi/128]/2)/Sin[55*Pi/128] */ \ + tr -= (t4*5017 + 4096) >> 13; \ + /* -2109/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128])/Sin[23*Pi/128] */ \ + t5 -= (tq*2109 + 4096) >> 13; \ + /* 3099/4096 ~= Sqrt[2]*Sin[23*Pi/128] */ \ + tq -= (t5*3099 + 2048) >> 12; \ + /* 4359/8192 ~= (1/Sqrt[2] - Cos[23*Pi/128]/2)/Sin[23*Pi/128] */ \ + t5 += (tq*4359 + 4096) >> 13; \ + /* -6287/32768 ~= (1/Sqrt[2] - Cos[25*Pi/128])/Sin[25*Pi/128] */ \ + tp -= (t6*6287 + 16384) >> 15; \ + /* 6671/8192 ~= Sqrt[2]*Sin[25*Pi/128] */ \ + t6 -= (tp*6671 + 4096) >> 13; \ + /* 1061/2048 ~= (1/Sqrt[2] - Cos[25*Pi/128]/2)/Sin[25*Pi/128] */ \ + tp += (t6*1061 + 1024) >> 11; \ + /* 2229/4096 ~= (1/Sqrt[2] - Cos[57*Pi/128])/Sin[57*Pi/128] */ \ + t7 -= (to*2229 + 2048) >> 12; \ + /* 5707/4096 ~= Sqrt[2]*Sin[57*Pi/128] */ \ + to += (t7*5707 + 2048) >> 12; \ + /* 323/512 ~= (1/Sqrt[2] - Cos[57*Pi/128]/2)/Sin[57*Pi/128] */ \ + t7 -= (to*323 + 256) >> 9; \ + /* -1971/2048 ~= (1/Sqrt[2] - Cos[11*Pi/128])/Sin[11*Pi/128] */ \ + tk += (tb*1971 + 1024) >> 11; \ + /* 1545/4096 ~= Sqrt[2]*Sin[11*Pi/128] */ \ + tb += (tk*1545 + 2048) >> 12; \ + /* 3459/4096 ~= (1/Sqrt[2] - Cos[11*Pi/128]/2)/Sin[11*Pi/128] */ \ + tk -= (tb*3459 + 2048) >> 12; \ + /* -5417/16384 ~= (1/Sqrt[2] - Cos[21*Pi/128])/Sin[21*Pi/128] */ \ + tl -= (ta*5417 + 8192) >> 14; \ + /* 2855/4096 ~= Sqrt[2]*Sin[21*Pi/128] */ \ + ta -= (tl*2855 + 2048) >> 12; \ + /* 2261/4096 ~= (1/Sqrt[2] - Cos[21*Pi/128]/2)/Sin[21*Pi/128] */ \ + tl += (ta*2261 + 2048) >> 12; \ + /* -4327/32768 ~= (1/Sqrt[2] - Cos[27*Pi/128])/Sin[27*Pi/128] */ \ + t9 -= (tm*4327 + 16384) >> 15; \ + /* 891/1024 ~= Sqrt[2]*Sin[27*Pi/128] */ \ + tm -= (t9*891 + 512) >> 10; \ + /* 4167/8192 ~= (1/Sqrt[2] - Cos[27*Pi/128]/2)/Sin[27*Pi/128] */ \ + t9 += (tm*4167 + 4096) >> 13; \ + /* 2413/4096 ~= (1/Sqrt[2] - Cos[59*Pi/128])/Sin[59*Pi/128] */ \ + tn -= (t8*2413 + 2048) >> 12; \ + /* 5749/4096 ~= Sqrt[2]*Sin[59*Pi/128] */ \ + t8 += (tn*5749 + 2048) >> 12; \ + /* 5331/8192 ~= (1/Sqrt[2] - Cos[59*Pi/128]/2)/Sin[59*Pi/128] */ \ + tn -= (t8*5331 + 4096) >> 13; \ + /* -2571/4096 ~= (1/Sqrt[2] - Cos[15*Pi/128])/Sin[15*Pi/128] */ \ + ts += (t3*2571 + 2048) >> 12; \ + /* 4169/8192 ~= Sqrt[2]*Sin[15*Pi/128] */ \ + t3 += (ts*4169 + 4096) >> 13; \ + /* 5477/8192 ~= (1/Sqrt[2] - Cos[15*Pi/128]/2)/Sin[15*Pi/128] */ \ + ts -= (t3*5477 + 4096) >> 13; \ + /* -4187/8192 ~= (1/Sqrt[2] - Cos[17*Pi/128])/Sin[17*Pi/128] */ \ + tt -= (t2*4187 + 4096) >> 13; \ + /* 4695/8192 ~= Sqrt[2]*Sin[17*Pi/128] */ \ + t2 -= (tt*4695 + 4096) >> 13; \ + /* 2527/4096 ~= (1/Sqrt[2] - Cos[17*Pi/128]/2)/Sin[17*Pi/128] */ \ + tt += (t2*2527 + 2048) >> 12; \ + /* -815/32768 ~= (1/Sqrt[2] - Cos[31*Pi/128])/Sin[31*Pi/128] */ \ + t1 -= (tu*815 + 16384) >> 15; \ + /* 1997/2048 ~= Sqrt[2]*Sin[31*Pi/128] */ \ + tu -= (t1*1997 + 1024) >> 11; \ + /* 4099/8192 ~= (1/Sqrt[2] - Cos[31*Pi/128]/2)/Sin[31*Pi/128] */ \ + t1 += (tu*4099 + 4096) >> 13; \ + /* 5593/8192 ~= (1/Sqrt[2] - Cos[63*Pi/128])/Sin[63*Pi/128] */ \ + tv -= (t0*5593 + 4096) >> 13; \ + /* 5791/4096 ~= Sqrt[2]*Sin[63*Pi/128] */ \ + t0 += (tv*5791 + 2048) >> 12; \ + /* 2847/4096 ~= (1/Sqrt[2] - Cos[63*Pi/128]/2)/Sin[63*Pi/128] */ \ + tv -= (t0*2847 + 2048) >> 12; \ + \ + t7 = -t7; \ + tf = -tf; \ + tn = -tn; \ + tr = -tr; \ + \ + t7 -= OD_DCT_RSHIFT(t6, 1); \ + t6 += t7; \ + tp -= OD_DCT_RSHIFT(to, 1); \ + to += tp; \ + tr -= OD_DCT_RSHIFT(tq, 1); \ + tq += tr; \ + t5 -= OD_DCT_RSHIFT(t4, 1); \ + t4 += t5; \ + tt -= OD_DCT_RSHIFT(t3, 1); \ + t3 += tt; \ + ts -= OD_DCT_RSHIFT(t2, 1); \ + t2 += ts; \ + tv += OD_DCT_RSHIFT(tu, 1); \ + tu -= tv; \ + t1 -= OD_DCT_RSHIFT(t0, 1); \ + t0 += t1; \ + th -= OD_DCT_RSHIFT(tg, 1); \ + tg += th; \ + tf -= OD_DCT_RSHIFT(te, 1); \ + te += tf; \ + ti += OD_DCT_RSHIFT(tc, 1); \ + tc -= ti; \ + tj += OD_DCT_RSHIFT(td, 1); \ + td -= tj; \ + tn -= OD_DCT_RSHIFT(tm, 1); \ + tm += tn; \ + t9 -= OD_DCT_RSHIFT(t8, 1); \ + t8 += t9; \ + tl -= OD_DCT_RSHIFT(tb, 1); \ + tb += tl; \ + tk -= OD_DCT_RSHIFT(ta, 1); \ + ta += tk; \ + \ + ti -= th; \ + th += OD_DCT_RSHIFT(ti, 1); \ + td -= te; \ + te += OD_DCT_RSHIFT(td, 1); \ + tm += tl; \ + tl -= OD_DCT_RSHIFT(tm, 1); \ + t9 += ta; \ + ta -= OD_DCT_RSHIFT(t9, 1); \ + tp += tq; \ + tq -= OD_DCT_RSHIFT(tp, 1); \ + t6 += t5; \ + t5 -= OD_DCT_RSHIFT(t6, 1); \ + t2 -= t1; \ + t1 += OD_DCT_RSHIFT(t2, 1); \ + tt -= tu; \ + tu += OD_DCT_RSHIFT(tt, 1); \ + tr += t7; \ + trh = OD_DCT_RSHIFT(tr, 1); \ + t7 -= trh; \ + t4 -= to; \ + t4h = OD_DCT_RSHIFT(t4, 1); \ + to += t4h; \ + t0 += t3; \ + t0h = OD_DCT_RSHIFT(t0, 1); \ + t3 -= t0h; \ + tv += ts; \ + tvh = OD_DCT_RSHIFT(tv, 1); \ + ts -= tvh; \ + tf -= tc; \ + tfh = OD_DCT_RSHIFT(tf, 1); \ + tc += tfh; \ + tg += tj; \ + tgh = OD_DCT_RSHIFT(tg, 1); \ + tj -= tgh; \ + tb -= t8; \ + tbh = OD_DCT_RSHIFT(tb, 1); \ + t8 += tbh; \ + tk += tn; \ + tkh = OD_DCT_RSHIFT(tk, 1); \ + tn -= tkh; \ + \ + ta = -ta; \ + tq = -tq; \ + \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + te -= (th*4861 + 16384) >> 15; \ + /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ + th += (te*1189 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + te -= (th*4861 + 16384) >> 15; \ + /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ + tm -= (t9*513 + 1024) >> 11; \ + /* 7723/16384 ~= Sin[5*Pi/32] ~= 0.47139673682599764 */ \ + t9 += (tm*7723 + 8192) >> 14; \ + /* 513/2048 ~= Tan[5*Pi/64] ~= 0.25048696019130545 */ \ + tm -= (t9*513 + 1024) >> 11; \ + /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + t6 -= (tp*2931 + 4096) >> 13; \ + /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ + tp += (t6*5197 + 4096) >> 13; \ + /* 2931/8192 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + t6 -= (tp*2931 + 4096) >> 13; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + tu -= (t1*805 + 8192) >> 14; \ + /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ + t1 += (tu*803 + 4096) >> 13; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + tu -= (t1*805 + 8192) >> 14; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + ti -= (td*4861 + 16384) >> 15; \ + /* 1189/4096 ~= Sin[3*Pi/32] ~= 0.29028467725446233 */ \ + td += (ti*1189 + 2048) >> 12; \ + /* 4861/32768 ~= Tan[3*Pi/64] ~= 0.14833598753834742 */ \ + ti -= (td*4861 + 16384) >> 15; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ + ta -= (tl*2455 + 2048) >> 12; \ + /* 14449/16384 ~= Sin[11*Pi/32] ~= 0.881921264348355 */ \ + tl += (ta*14449 + 8192) >> 14; \ + /* 2455/4096 ~= Tan[11*Pi/64] ~= 0.5993769336819237 */ \ + ta -= (tl*2455 + 2048) >> 12; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + t5 -= (tq*11725 + 16384) >> 15; \ + /* 5197/8192 ~= Sin[7*Pi/32] ~= 0.6343932841636455 */ \ + tq += (t5*5197 + 4096) >> 13; \ + /* 11725/32768 ~= Tan[7*Pi/64] ~= 0.3578057213145241 */ \ + t5 -= (tq*11725 + 16384) >> 15; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + t2 -= (tt*805 + 8192) >> 14; \ + /* 803/8192 ~= Sin[Pi/32] ~= 0.0980171403295606 */ \ + tt += (t2*803 + 4096) >> 13; \ + /* 805/16384 ~= Tan[Pi/64] ~= 0.04912684976946793 */ \ + t2 -= (tt*805 + 8192) >> 14; \ + \ + tl = -tl; \ + ti = -ti; \ + \ + th += OD_DCT_RSHIFT(t9, 1); \ + t9 -= th; \ + te -= OD_DCT_RSHIFT(tm, 1); \ + tm += te; \ + t1 += OD_DCT_RSHIFT(tp, 1); \ + tp -= t1; \ + tu -= OD_DCT_RSHIFT(t6, 1); \ + t6 += tu; \ + ta -= OD_DCT_RSHIFT(td, 1); \ + td += ta; \ + tl += OD_DCT_RSHIFT(ti, 1); \ + ti -= tl; \ + t5 += OD_DCT_RSHIFT(tt, 1); \ + tt -= t5; \ + tq += OD_DCT_RSHIFT(t2, 1); \ + t2 -= tq; \ + \ + t8 -= tgh; \ + tg += t8; \ + tn += tfh; \ + tf -= tn; \ + t7 -= tvh; \ + tv += t7; \ + to -= t0h; \ + t0 += to; \ + tc += tbh; \ + tb -= tc; \ + tj += tkh; \ + tk -= tj; \ + ts += t4h; \ + t4 -= ts; \ + t3 += trh; \ + tr -= t3; \ + \ + tk = -tk; \ + \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + tc -= (tj*2485 + 4096) >> 13; \ + /* 18205/32768 ~= Sin[3*Pi/16] ~= 0.555570233019602 */ \ + tj += (tc*18205 + 16384) >> 15; \ + /* 2485/8192 ~= Tan[3*Pi/32] ~= 0.303346683607342 */ \ + tc -= (tj*2485 + 4096) >> 13; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + ts -= (t3*3227 + 16384) >> 15; \ + /* 6393/32768 ~= Sin[Pi/16] ~= 0.19509032201612825 */ \ + t3 += (ts*6393 + 16384) >> 15; \ + /* 3227/32768 ~= Tan[Pi/32] ~= 0.09849140335716425 */ \ + ts -= (t3*3227 + 16384) >> 15; \ + /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ + tk -= (tb*17515 + 16384) >> 15; \ + /* 13623/16384 ~= Sin[5*Pi/16] ~= 0.8314696123025452 */ \ + tb += (tk*13623 + 8192) >> 14; \ + /* 17515/32768 ~= Tan[5*Pi/32] ~= 0.5345111359507916 */ \ + tk -= (tb*17515 + 16384) >> 15; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ + t4 -= (tr*6723 + 4096) >> 13; \ + /* 16069/16384 ~= Sin[7*Pi/16] ~= 0.9807852804032304 */ \ + tr += (t4*16069 + 8192) >> 14; \ + /* 6723/8192 ~= Tan[7*Pi/32] ~= 0.8206787908286602 */ \ + t4 -= (tr*6723 + 4096) >> 13; \ + \ + t4 = -t4; \ + \ + tp += tm; \ + tm -= OD_DCT_RSHIFT(tp, 1); \ + t9 -= t6; \ + t6 += OD_DCT_RSHIFT(t9, 1); \ + th -= t1; \ + t1 += OD_DCT_RSHIFT(th, 1); \ + tu -= te; \ + te += OD_DCT_RSHIFT(tu, 1); /* pass */ \ + t5 -= tl; \ + tl += OD_DCT_RSHIFT(t5, 1); \ + ta += tq; \ + tq -= OD_DCT_RSHIFT(ta, 1); \ + td += tt; \ + tt -= OD_DCT_RSHIFT(td, 1); \ + t2 -= ti; \ + ti += OD_DCT_RSHIFT(t2, 1); /* pass */ \ + t7 += t8; \ + t8 -= OD_DCT_RSHIFT(t7, 1); \ + tn -= to; \ + to += OD_DCT_RSHIFT(tn, 1); \ + tf -= tv; \ + tv += OD_DCT_RSHIFT(tf, 1); \ + t0 += tg; \ + tg -= OD_DCT_RSHIFT(t0, 1); /* pass */ \ + tj -= t3; \ + t3 += OD_DCT_RSHIFT(tj, 1); /* pass */ \ + ts -= tc; \ + tc += OD_DCT_RSHIFT(ts, 1); \ + t4 -= tb; \ + tb += OD_DCT_RSHIFT(t4, 1); /* pass */ \ + tk -= tr; \ + tr += OD_DCT_RSHIFT(tk, 1); \ + \ + t1 = -t1; \ + t3 = -t3; \ + t7 = -t7; \ + t8 = -t8; \ + tg = -tg; \ + tm = -tm; \ + to = -to; \ + \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + tm -= (t9*14341 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t9 += (tm*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + tm -= (t9*4161 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + tp -= (t6*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t6 += (tp*15137 + 8192) >> 14; \ + /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + tp -= (t6*28681 + 16384) >> 15; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + th += (te*19195 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + te += (th*11585 + 8192) >> 14; \ + /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + th -= (te*29957 + 16384) >> 15; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + tq -= (t5*14341 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t5 += (tq*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + tq -= (t5*4161 + 8192) >> 14; \ + /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ + ta -= (tl*3259 + 4096) >> 13; \ + /* 3135/16384 ~= Sin[Pi/8]/2 ~= 0.1913417161825449 */ \ + tl += (ta*3135 + 8192) >> 14; \ + /* 3259/8192 ~= 2*Tan[Pi/16] ~= 0.397824734759316 */ \ + ta -= (tl*3259 + 4096) >> 13; \ + /* 7489/8192 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + ti -= (td*7489 + 4096) >> 13; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + td += (ti*11585 + 8192) >> 14; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + ti += (td*19195 + 16384) >> 15; \ + /* 14341/16384 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + to -= (t7*14341 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t7 += (to*15137 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + to -= (t7*4161 + 8192) >> 14; \ + /* 4161/16384 ~= Tan[3*Pi/16] - Tan[Pi/8] ~= 0.253965075546204 */ \ + tn -= (t8*4161 + 8192) >> 14; \ + /* 15137/16384 ~= Sin[3*Pi/8] ~= 0.923879532511287 */ \ + t8 += (tn*15137 + 8192) >> 14; \ + /* 28681/32768 ~= Tan[3*Pi/16] + Tan[Pi/8]/2 ~= 0.875285419105846 */ \ + tn -= (t8*28681 + 16384) >> 15; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + tf += (tg*19195 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + tg += (tf*11585 + 8192) >> 14; \ + /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + tf -= (tg*29957 + 16384) >> 15; \ + /* -19195/32768 ~= Tan[Pi/8] - Tan[Pi/4] ~= -0.585786437626905 */ \ + tj += (tc*19195 + 16384) >> 15; \ + /* 11585/16384 ~= Sin[Pi/4] ~= 0.707106781186548 */ \ + tc += (tj*11585 + 8192) >> 14; \ + /* 29957/32768 ~= Tan[Pi/8] + Tan[Pi/4]/2 ~= 0.914213562373095 */ \ + tj -= (tc*29957 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + tk += (tb*13573 + 8192) >> 14; \ + /* 11585/32768 ~= Sin[Pi/4]/2 ~= 0.353553390593274 */ \ + tb -= (tk*11585 + 16384) >> 15; \ + /* 13573/16384 ~= 2*Tan[Pi/8] ~= 0.828427124746190 */ \ + tk += (tb*13573 + 8192) >> 14; \ + \ + tf = -tf; \ + \ + } \ + while (0) + +#define OD_FDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \ + us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \ + ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \ + ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \ + /* Embedded 64-point orthonormal Type-II fDCT. */ \ + do { \ + int uwh; \ + int uxh; \ + int uyh; \ + int uzh; \ + int uAh; \ + int uBh; \ + int uCh; \ + int uDh; \ + int uEh; \ + int uFh; \ + int uGh; \ + int uHh; \ + int uIh; \ + int uJh; \ + int uKh; \ + int uLh; \ + int uMh; \ + int uNh; \ + int uOh; \ + int uPh; \ + int uQh; \ + int uRh; \ + int uSh; \ + int uTh; \ + int uUh; \ + int uVh; \ + int uWh; \ + int uXh; \ + int uYh; \ + int uZh; \ + int u_h; \ + int uh_; \ + u = u0 - u; \ + uh_ = OD_DCT_RSHIFT(u, 1); \ + u0 -= uh_; \ + u_ += u1; \ + u_h = OD_DCT_RSHIFT(u_, 1); \ + u1 = u_h - u1; \ + uZ = u2 - uZ; \ + uZh = OD_DCT_RSHIFT(uZ, 1); \ + u2 -= uZh; \ + uY += u3; \ + uYh = OD_DCT_RSHIFT(uY, 1); \ + u3 = uYh - u3; \ + uX = u4 - uX; \ + uXh = OD_DCT_RSHIFT(uX, 1); \ + u4 -= uXh; \ + uW += u5; \ + uWh = OD_DCT_RSHIFT(uW, 1); \ + u5 = uWh - u5; \ + uV = u6 - uV; \ + uVh = OD_DCT_RSHIFT(uV, 1); \ + u6 -= uVh; \ + uU += u7; \ + uUh = OD_DCT_RSHIFT(uU, 1); \ + u7 = uUh - u7; \ + uT = u8 - uT; \ + uTh = OD_DCT_RSHIFT(uT, 1); \ + u8 -= uTh; \ + uS += u9; \ + uSh = OD_DCT_RSHIFT(uS, 1); \ + u9 = uSh - u9; \ + uR = ua - uR; \ + uRh = OD_DCT_RSHIFT(uR, 1); \ + ua -= uRh; \ + uQ += ub; \ + uQh = OD_DCT_RSHIFT(uQ, 1); \ + ub = uQh - ub; \ + uP = uc - uP; \ + uPh = OD_DCT_RSHIFT(uP, 1); \ + uc -= uPh; \ + uO += ud; \ + uOh = OD_DCT_RSHIFT(uO, 1); \ + ud = uOh - ud; \ + uN = ue - uN; \ + uNh = OD_DCT_RSHIFT(uN, 1); \ + ue -= uNh; \ + uM += uf; \ + uMh = OD_DCT_RSHIFT(uM, 1); \ + uf = uMh - uf; \ + uL = ug - uL; \ + uLh = OD_DCT_RSHIFT(uL, 1); \ + ug -= uLh; \ + uK += uh; \ + uKh = OD_DCT_RSHIFT(uK, 1); \ + uh = uKh - uh; \ + uJ = ui - uJ; \ + uJh = OD_DCT_RSHIFT(uJ, 1); \ + ui -= uJh; \ + uI += uj; \ + uIh = OD_DCT_RSHIFT(uI, 1); \ + uj = uIh - uj; \ + uH = uk - uH; \ + uHh = OD_DCT_RSHIFT(uH, 1); \ + uk -= uHh; \ + uG += ul; \ + uGh = OD_DCT_RSHIFT(uG, 1); \ + ul = uGh - ul; \ + uF = um - uF; \ + uFh = OD_DCT_RSHIFT(uF, 1); \ + um -= uFh; \ + uE += un; \ + uEh = OD_DCT_RSHIFT(uE, 1); \ + un = uEh - un; \ + uD = uo - uD; \ + uDh = OD_DCT_RSHIFT(uD, 1); \ + uo -= uDh; \ + uC += up; \ + uCh = OD_DCT_RSHIFT(uC, 1); \ + up = uCh - up; \ + uB = uq - uB; \ + uBh = OD_DCT_RSHIFT(uB, 1); \ + uq -= uBh; \ + uA += ur; \ + uAh = OD_DCT_RSHIFT(uA, 1); \ + ur = uAh - ur; \ + uz = us - uz; \ + uzh = OD_DCT_RSHIFT(uz, 1); \ + us -= uzh; \ + uy += ut; \ + uyh = OD_DCT_RSHIFT(uy, 1); \ + ut = uyh - ut; \ + ux = uu - ux; \ + uxh = OD_DCT_RSHIFT(ux, 1); \ + uu -= uxh; \ + uw += uv; \ + uwh = OD_DCT_RSHIFT(uw, 1); \ + uv = uwh - uv; \ + OD_FDCT_32_ASYM(u0, uw, uwh, ug, uM, uMh, u8, uE, uEh, uo, uU, uUh, \ + u4, uA, uAh, uk, uQ, uQh, uc, uI, uIh, us, uY, uYh, u2, uy, uyh, \ + ui, uO, uOh, ua, uG, uGh, uq, uW, uWh, u6, uC, uCh, um, uS, uSh, \ + ue, uK, uKh, uu, u_, u_h); \ + OD_FDST_32_ASYM(u, uv, uL, uf, uT, un, uD, u7, uX, ur, uH, ub, uP, uj, \ + uz, u3, uZ, ut, uJ, ud, uR, ul, uB, u5, uV, up, uF, u9, uN, uh, ux, u1); \ + } \ + while (0) + +#define OD_IDCT_64(u0, uw, ug, uM, u8, uE, uo, uU, u4, uA, uk, uQ, uc, uI, \ + us, uY, u2, uy, ui, uO, ua, uG, uq, uW, u6, uC, um, uS, ue, uK, uu, u_, u1, \ + ux, uh, uN, u9, uF, up, uV, u5, uB, ul, uR, ud, uJ, ut, uZ, u3, uz, uj, uP, \ + ub, uH, ur, uX, u7, uD, un, uT, uf, uL, uv, u) \ + /* Embedded 64-point orthonormal Type-II fDCT. */ \ + do { \ + int u1h; \ + int u3h; \ + int u5h; \ + int u7h; \ + int u9h; \ + int ubh; \ + int udh; \ + int ufh; \ + int uhh; \ + int ujh; \ + int ulh; \ + int unh; \ + int uph; \ + int urh; \ + int uth; \ + int uvh; \ + int uxh; \ + int uzh; \ + int uBh; \ + int uDh; \ + int uFh; \ + int uHh; \ + int uJh; \ + int uLh; \ + int uNh; \ + int uPh; \ + int uRh; \ + int uTh; \ + int uVh; \ + int uXh; \ + int uZh; \ + int uh_; \ + OD_IDST_32_ASYM(u, uL, uT, uD, uX, uH, uP, uz, uZ, uJ, uR, uB, uV, uF, \ + uN, ux, u_, uK, uS, uC, uW, uG, uO, uy, uY, uI, uQ, uA, uU, uE, uM, uw); \ + OD_IDCT_32_ASYM(u0, ug, u8, uo, u4, uk, uc, us, u2, ui, ua, uq, u6, um, \ + ue, uu, u1, u1h, uh, uhh, u9, u9h, up, uph, u5, u5h, ul, ulh, ud, udh, \ + ut, uth, u3, u3h, uj, ujh, ub, ubh, ur, urh, u7, u7h, un, unh, uf, ufh, \ + uv, uvh); \ + uh_ = OD_DCT_RSHIFT(u, 1); \ + u0 += uh_; \ + u = u0 - u; \ + u_ = u1h - u_; \ + u1 -= u_; \ + uZh = OD_DCT_RSHIFT(uZ, 1); \ + u2 += uZh; \ + uZ = u2 - uZ; \ + uY = u3h - uY; \ + u3 -= uY; \ + uXh = OD_DCT_RSHIFT(uX, 1); \ + u4 += uXh; \ + uX = u4 - uX; \ + uW = u5h - uW; \ + u5 -= uW; \ + uVh = OD_DCT_RSHIFT(uV, 1); \ + u6 += uVh; \ + uV = u6 - uV; \ + uU = u7h - uU; \ + u7 -= uU; \ + uTh = OD_DCT_RSHIFT(uT, 1); \ + u8 += uTh; \ + uT = u8 - uT; \ + uS = u9h - uS; \ + u9 -= uS; \ + uRh = OD_DCT_RSHIFT(uR, 1); \ + ua += uRh; \ + uR = ua - uR; \ + uQ = ubh - uQ; \ + ub -= uQ; \ + uPh = OD_DCT_RSHIFT(uP, 1); \ + uc += uPh; \ + uP = uc - uP; \ + uO = udh - uO; \ + ud -= uO; \ + uNh = OD_DCT_RSHIFT(uN, 1); \ + ue += uNh; \ + uN = ue - uN; \ + uM = ufh - uM; \ + uf -= uM; \ + uLh = OD_DCT_RSHIFT(uL, 1); \ + ug += uLh; \ + uL = ug - uL; \ + uK = uhh - uK; \ + uh -= uK; \ + uJh = OD_DCT_RSHIFT(uJ, 1); \ + ui += uJh; \ + uJ = ui - uJ; \ + uI = ujh - uI; \ + uj -= uI; \ + uHh = OD_DCT_RSHIFT(uH, 1); \ + uk += uHh; \ + uH = uk - uH; \ + uG = ulh - uG; \ + ul -= uG; \ + uFh = OD_DCT_RSHIFT(uF, 1); \ + um += uFh; \ + uF = um - uF; \ + uE = unh - uE; \ + un -= uE; \ + uDh = OD_DCT_RSHIFT(uD, 1); \ + uo += uDh; \ + uD = uo - uD; \ + uC = uph - uC; \ + up -= uC; \ + uBh = OD_DCT_RSHIFT(uB, 1); \ + uq += uBh; \ + uB = uq - uB; \ + uA = urh - uA; \ + ur -= uA; \ + uzh = OD_DCT_RSHIFT(uz, 1); \ + us += uzh; \ + uz = us - uz; \ + uy = uth - uy; \ + ut -= uy; \ + uxh = OD_DCT_RSHIFT(ux, 1); \ + uu += uxh; \ + ux = uu - ux; \ + uw = uvh - uw; \ + uv -= uw; \ + } while (0) +#endif + void od_bin_fdct4(od_coeff y[4], const od_coeff *x, int xstride) { int q0; int q1; @@ -478,6 +3150,38 @@ void od_bin_idct4(od_coeff *x, int xstride, const od_coeff y[4]) { x[3*xstride] = q3; } +void od_bin_fdst4(od_coeff y[4], const od_coeff *x, int xstride) { + int q0; + int q1; + int q2; + int q3; + q0 = x[3*xstride]; + q2 = x[2*xstride]; + q1 = x[1*xstride]; + q3 = x[0*xstride]; + OD_FDST_4(q0, q2, q1, q3); + y[0] = (od_coeff)q3; + y[1] = (od_coeff)q2; + y[2] = (od_coeff)q1; + y[3] = (od_coeff)q0; +} + +void od_bin_idst4(od_coeff *x, int xstride, const od_coeff y[4]) { + int q0; + int q1; + int q2; + int q3; + q0 = y[3]; + q2 = y[2]; + q1 = y[1]; + q3 = y[0]; + OD_IDST_4(q0, q2, q1, q3); + x[0*xstride] = q3; + x[1*xstride] = q2; + x[2*xstride] = q1; + x[3*xstride] = q0; +} + void od_bin_fdct8(od_coeff y[8], const od_coeff *x, int xstride) { int r0; int r1; @@ -589,3 +3293,1039 @@ void od_bin_idst8(od_coeff *x, int xstride, const od_coeff y[8]) { x[6*xstride] = (od_coeff)r6; x[7*xstride] = (od_coeff)r7; } + +void od_bin_fdct16(od_coeff y[16], const od_coeff *x, int xstride) { + int s0; + int s1; + int s2; + int s3; + int s4; + int s5; + int s6; + int s7; + int s8; + int s9; + int sa; + int sb; + int sc; + int sd; + int se; + int sf; + s0 = x[0*xstride]; + s8 = x[1*xstride]; + s4 = x[2*xstride]; + sc = x[3*xstride]; + s2 = x[4*xstride]; + sa = x[5*xstride]; + s6 = x[6*xstride]; + se = x[7*xstride]; + s1 = x[8*xstride]; + s9 = x[9*xstride]; + s5 = x[10*xstride]; + sd = x[11*xstride]; + s3 = x[12*xstride]; + sb = x[13*xstride]; + s7 = x[14*xstride]; + sf = x[15*xstride]; + OD_FDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); + y[0] = (od_coeff)s0; + y[1] = (od_coeff)s1; + y[2] = (od_coeff)s2; + y[3] = (od_coeff)s3; + y[4] = (od_coeff)s4; + y[5] = (od_coeff)s5; + y[6] = (od_coeff)s6; + y[7] = (od_coeff)s7; + y[8] = (od_coeff)s8; + y[9] = (od_coeff)s9; + y[10] = (od_coeff)sa; + y[11] = (od_coeff)sb; + y[12] = (od_coeff)sc; + y[13] = (od_coeff)sd; + y[14] = (od_coeff)se; + y[15] = (od_coeff)sf; +} + +void od_bin_idct16(od_coeff *x, int xstride, const od_coeff y[16]) { + int s0; + int s1; + int s2; + int s3; + int s4; + int s5; + int s6; + int s7; + int s8; + int s9; + int sa; + int sb; + int sc; + int sd; + int se; + int sf; + s0 = y[0]; + s8 = y[1]; + s4 = y[2]; + sc = y[3]; + s2 = y[4]; + sa = y[5]; + s6 = y[6]; + se = y[7]; + s1 = y[8]; + s9 = y[9]; + s5 = y[10]; + sd = y[11]; + s3 = y[12]; + sb = y[13]; + s7 = y[14]; + sf = y[15]; + OD_IDCT_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); + x[0*xstride] = (od_coeff)s0; + x[1*xstride] = (od_coeff)s1; + x[2*xstride] = (od_coeff)s2; + x[3*xstride] = (od_coeff)s3; + x[4*xstride] = (od_coeff)s4; + x[5*xstride] = (od_coeff)s5; + x[6*xstride] = (od_coeff)s6; + x[7*xstride] = (od_coeff)s7; + x[8*xstride] = (od_coeff)s8; + x[9*xstride] = (od_coeff)s9; + x[10*xstride] = (od_coeff)sa; + x[11*xstride] = (od_coeff)sb; + x[12*xstride] = (od_coeff)sc; + x[13*xstride] = (od_coeff)sd; + x[14*xstride] = (od_coeff)se; + x[15*xstride] = (od_coeff)sf; +} + +void od_bin_fdst16(od_coeff y[16], const od_coeff *x, int xstride) { + int s0; + int s1; + int s2; + int s3; + int s4; + int s5; + int s6; + int s7; + int s8; + int s9; + int sa; + int sb; + int sc; + int sd; + int se; + int sf; + s0 = x[15*xstride]; + s8 = x[14*xstride]; + s4 = x[13*xstride]; + sc = x[12*xstride]; + s2 = x[11*xstride]; + sa = x[10*xstride]; + s6 = x[9*xstride]; + se = x[8*xstride]; + s1 = x[7*xstride]; + s9 = x[6*xstride]; + s5 = x[5*xstride]; + sd = x[4*xstride]; + s3 = x[3*xstride]; + sb = x[2*xstride]; + s7 = x[1*xstride]; + sf = x[0*xstride]; + OD_FDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); + y[0] = (od_coeff)sf; + y[1] = (od_coeff)se; + y[2] = (od_coeff)sd; + y[3] = (od_coeff)sc; + y[4] = (od_coeff)sb; + y[5] = (od_coeff)sa; + y[6] = (od_coeff)s9; + y[7] = (od_coeff)s8; + y[8] = (od_coeff)s7; + y[9] = (od_coeff)s6; + y[10] = (od_coeff)s5; + y[11] = (od_coeff)s4; + y[12] = (od_coeff)s3; + y[13] = (od_coeff)s2; + y[14] = (od_coeff)s1; + y[15] = (od_coeff)s0; +} + +void od_bin_idst16(od_coeff *x, int xstride, const od_coeff y[16]) { + int s0; + int s1; + int s2; + int s3; + int s4; + int s5; + int s6; + int s7; + int s8; + int s9; + int sa; + int sb; + int sc; + int sd; + int se; + int sf; + s0 = y[15]; + s8 = y[14]; + s4 = y[13]; + sc = y[12]; + s2 = y[11]; + sa = y[10]; + s6 = y[9]; + se = y[8]; + s1 = y[7]; + s9 = y[6]; + s5 = y[5]; + sd = y[4]; + s3 = y[3]; + sb = y[2]; + s7 = y[1]; + sf = y[0]; + OD_IDST_16(s0, s8, s4, sc, s2, sa, s6, se, s1, s9, s5, sd, s3, sb, s7, sf); + x[0*xstride] = (od_coeff)sf; + x[1*xstride] = (od_coeff)se; + x[2*xstride] = (od_coeff)sd; + x[3*xstride] = (od_coeff)sc; + x[4*xstride] = (od_coeff)sb; + x[5*xstride] = (od_coeff)sa; + x[6*xstride] = (od_coeff)s9; + x[7*xstride] = (od_coeff)s8; + x[8*xstride] = (od_coeff)s7; + x[9*xstride] = (od_coeff)s6; + x[10*xstride] = (od_coeff)s5; + x[11*xstride] = (od_coeff)s4; + x[12*xstride] = (od_coeff)s3; + x[13*xstride] = (od_coeff)s2; + x[14*xstride] = (od_coeff)s1; + x[15*xstride] = (od_coeff)s0; +} + +void od_bin_fdct32(od_coeff y[32], const od_coeff *x, int xstride) { + /*215 adds, 38 shifts, 87 "muls".*/ + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int t8; + int t9; + int ta; + int tb; + int tc; + int td; + int te; + int tf; + int tg; + int th; + int ti; + int tj; + int tk; + int tl; + int tm; + int tn; + int to; + int tp; + int tq; + int tr; + int ts; + int tt; + int tu; + int tv; + t0 = x[0*xstride]; + tg = x[1*xstride]; + t8 = x[2*xstride]; + to = x[3*xstride]; + t4 = x[4*xstride]; + tk = x[5*xstride]; + tc = x[6*xstride]; + ts = x[7*xstride]; + t2 = x[8*xstride]; + ti = x[9*xstride]; + ta = x[10*xstride]; + tq = x[11*xstride]; + t6 = x[12*xstride]; + tm = x[13*xstride]; + te = x[14*xstride]; + tu = x[15*xstride]; + t1 = x[16*xstride]; + th = x[17*xstride]; + t9 = x[18*xstride]; + tp = x[19*xstride]; + t5 = x[20*xstride]; + tl = x[21*xstride]; + td = x[22*xstride]; + tt = x[23*xstride]; + t3 = x[24*xstride]; + tj = x[25*xstride]; + tb = x[26*xstride]; + tr = x[27*xstride]; + t7 = x[28*xstride]; + tn = x[29*xstride]; + tf = x[30*xstride]; + tv = x[31*xstride]; + OD_FDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu, + t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv); + y[0] = (od_coeff)t0; + y[1] = (od_coeff)t1; + y[2] = (od_coeff)t2; + y[3] = (od_coeff)t3; + y[4] = (od_coeff)t4; + y[5] = (od_coeff)t5; + y[6] = (od_coeff)t6; + y[7] = (od_coeff)t7; + y[8] = (od_coeff)t8; + y[9] = (od_coeff)t9; + y[10] = (od_coeff)ta; + y[11] = (od_coeff)tb; + y[12] = (od_coeff)tc; + y[13] = (od_coeff)td; + y[14] = (od_coeff)te; + y[15] = (od_coeff)tf; + y[16] = (od_coeff)tg; + y[17] = (od_coeff)th; + y[18] = (od_coeff)ti; + y[19] = (od_coeff)tj; + y[20] = (od_coeff)tk; + y[21] = (od_coeff)tl; + y[22] = (od_coeff)tm; + y[23] = (od_coeff)tn; + y[24] = (od_coeff)to; + y[25] = (od_coeff)tp; + y[26] = (od_coeff)tq; + y[27] = (od_coeff)tr; + y[28] = (od_coeff)ts; + y[29] = (od_coeff)tt; + y[30] = (od_coeff)tu; + y[31] = (od_coeff)tv; +} + +void od_bin_idct32(od_coeff *x, int xstride, const od_coeff y[32]) { + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int t8; + int t9; + int ta; + int tb; + int tc; + int td; + int te; + int tf; + int tg; + int th; + int ti; + int tj; + int tk; + int tl; + int tm; + int tn; + int to; + int tp; + int tq; + int tr; + int ts; + int tt; + int tu; + int tv; + t0 = y[0]; + tg = y[1]; + t8 = y[2]; + to = y[3]; + t4 = y[4]; + tk = y[5]; + tc = y[6]; + ts = y[7]; + t2 = y[8]; + ti = y[9]; + ta = y[10]; + tq = y[11]; + t6 = y[12]; + tm = y[13]; + te = y[14]; + tu = y[15]; + t1 = y[16]; + th = y[17]; + t9 = y[18]; + tp = y[19]; + t5 = y[20]; + tl = y[21]; + td = y[22]; + tt = y[23]; + t3 = y[24]; + tj = y[25]; + tb = y[26]; + tr = y[27]; + t7 = y[28]; + tn = y[29]; + tf = y[30]; + tv = y[31]; + OD_IDCT_32(t0, tg, t8, to, t4, tk, tc, ts, t2, ti, ta, tq, t6, tm, te, tu, + t1, th, t9, tp, t5, tl, td, tt, t3, tj, tb, tr, t7, tn, tf, tv); + x[0*xstride] = (od_coeff)t0; + x[1*xstride] = (od_coeff)t1; + x[2*xstride] = (od_coeff)t2; + x[3*xstride] = (od_coeff)t3; + x[4*xstride] = (od_coeff)t4; + x[5*xstride] = (od_coeff)t5; + x[6*xstride] = (od_coeff)t6; + x[7*xstride] = (od_coeff)t7; + x[8*xstride] = (od_coeff)t8; + x[9*xstride] = (od_coeff)t9; + x[10*xstride] = (od_coeff)ta; + x[11*xstride] = (od_coeff)tb; + x[12*xstride] = (od_coeff)tc; + x[13*xstride] = (od_coeff)td; + x[14*xstride] = (od_coeff)te; + x[15*xstride] = (od_coeff)tf; + x[16*xstride] = (od_coeff)tg; + x[17*xstride] = (od_coeff)th; + x[18*xstride] = (od_coeff)ti; + x[19*xstride] = (od_coeff)tj; + x[20*xstride] = (od_coeff)tk; + x[21*xstride] = (od_coeff)tl; + x[22*xstride] = (od_coeff)tm; + x[23*xstride] = (od_coeff)tn; + x[24*xstride] = (od_coeff)to; + x[25*xstride] = (od_coeff)tp; + x[26*xstride] = (od_coeff)tq; + x[27*xstride] = (od_coeff)tr; + x[28*xstride] = (od_coeff)ts; + x[29*xstride] = (od_coeff)tt; + x[30*xstride] = (od_coeff)tu; + x[31*xstride] = (od_coeff)tv; +} + +#if CONFIG_TX64X64 +void od_bin_fdct64(od_coeff y[64], const od_coeff *x, int xstride) { + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int t8; + int t9; + int ta; + int tb; + int tc; + int td; + int te; + int tf; + int tg; + int th; + int ti; + int tj; + int tk; + int tl; + int tm; + int tn; + int to; + int tp; + int tq; + int tr; + int ts; + int tt; + int tu; + int tv; + int tw; + int tx; + int ty; + int tz; + int tA; + int tB; + int tC; + int tD; + int tE; + int tF; + int tG; + int tH; + int tI; + int tJ; + int tK; + int tL; + int tM; + int tN; + int tO; + int tP; + int tQ; + int tR; + int tS; + int tT; + int tU; + int tV; + int tW; + int tX; + int tY; + int tZ; + int t_; + int t; + t0 = x[0*xstride]; + tw = x[1*xstride]; + tg = x[2*xstride]; + tM = x[3*xstride]; + t8 = x[4*xstride]; + tE = x[5*xstride]; + to = x[6*xstride]; + tU = x[7*xstride]; + t4 = x[8*xstride]; + tA = x[9*xstride]; + tk = x[10*xstride]; + tQ = x[11*xstride]; + tc = x[12*xstride]; + tI = x[13*xstride]; + ts = x[14*xstride]; + tY = x[15*xstride]; + t2 = x[16*xstride]; + ty = x[17*xstride]; + ti = x[18*xstride]; + tO = x[19*xstride]; + ta = x[20*xstride]; + tG = x[21*xstride]; + tq = x[22*xstride]; + tW = x[23*xstride]; + t6 = x[24*xstride]; + tC = x[25*xstride]; + tm = x[26*xstride]; + tS = x[27*xstride]; + te = x[28*xstride]; + tK = x[29*xstride]; + tu = x[30*xstride]; + t_ = x[31*xstride]; + t1 = x[32*xstride]; + tx = x[33*xstride]; + th = x[34*xstride]; + tN = x[35*xstride]; + t9 = x[36*xstride]; + tF = x[37*xstride]; + tp = x[38*xstride]; + tV = x[39*xstride]; + t5 = x[40*xstride]; + tB = x[41*xstride]; + tl = x[42*xstride]; + tR = x[43*xstride]; + td = x[44*xstride]; + tJ = x[45*xstride]; + tt = x[46*xstride]; + tZ = x[47*xstride]; + t3 = x[48*xstride]; + tz = x[49*xstride]; + tj = x[50*xstride]; + tP = x[51*xstride]; + tb = x[52*xstride]; + tH = x[53*xstride]; + tr = x[54*xstride]; + tX = x[55*xstride]; + t7 = x[56*xstride]; + tD = x[57*xstride]; + tn = x[58*xstride]; + tT = x[59*xstride]; + tf = x[60*xstride]; + tL = x[61*xstride]; + tv = x[62*xstride]; + t = x[63*xstride]; + OD_FDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY, + t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx, + th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP, + tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t); + y[0] = (od_coeff)t0; + y[1] = (od_coeff)t1; + y[2] = (od_coeff)t2; + y[3] = (od_coeff)t3; + y[4] = (od_coeff)t4; + y[5] = (od_coeff)t5; + y[6] = (od_coeff)t6; + y[7] = (od_coeff)t7; + y[8] = (od_coeff)t8; + y[9] = (od_coeff)t9; + y[10] = (od_coeff)ta; + y[11] = (od_coeff)tb; + y[12] = (od_coeff)tc; + y[13] = (od_coeff)td; + y[14] = (od_coeff)te; + y[15] = (od_coeff)tf; + y[16] = (od_coeff)tg; + y[17] = (od_coeff)th; + y[18] = (od_coeff)ti; + y[19] = (od_coeff)tj; + y[20] = (od_coeff)tk; + y[21] = (od_coeff)tl; + y[22] = (od_coeff)tm; + y[23] = (od_coeff)tn; + y[24] = (od_coeff)to; + y[25] = (od_coeff)tp; + y[26] = (od_coeff)tq; + y[27] = (od_coeff)tr; + y[28] = (od_coeff)ts; + y[29] = (od_coeff)tt; + y[30] = (od_coeff)tu; + y[31] = (od_coeff)tv; + y[32] = (od_coeff)tw; + y[33] = (od_coeff)tx; + y[34] = (od_coeff)ty; + y[35] = (od_coeff)tz; + y[36] = (od_coeff)tA; + y[37] = (od_coeff)tB; + y[38] = (od_coeff)tC; + y[39] = (od_coeff)tD; + y[40] = (od_coeff)tE; + y[41] = (od_coeff)tF; + y[41] = (od_coeff)tF; + y[42] = (od_coeff)tG; + y[43] = (od_coeff)tH; + y[44] = (od_coeff)tI; + y[45] = (od_coeff)tJ; + y[46] = (od_coeff)tK; + y[47] = (od_coeff)tL; + y[48] = (od_coeff)tM; + y[49] = (od_coeff)tN; + y[50] = (od_coeff)tO; + y[51] = (od_coeff)tP; + y[52] = (od_coeff)tQ; + y[53] = (od_coeff)tR; + y[54] = (od_coeff)tS; + y[55] = (od_coeff)tT; + y[56] = (od_coeff)tU; + y[57] = (od_coeff)tV; + y[58] = (od_coeff)tW; + y[59] = (od_coeff)tX; + y[60] = (od_coeff)tY; + y[61] = (od_coeff)tZ; + y[62] = (od_coeff)t_; + y[63] = (od_coeff)t; +} + +void od_bin_idct64(od_coeff *x, int xstride, const od_coeff y[64]) { + int t0; + int t1; + int t2; + int t3; + int t4; + int t5; + int t6; + int t7; + int t8; + int t9; + int ta; + int tb; + int tc; + int td; + int te; + int tf; + int tg; + int th; + int ti; + int tj; + int tk; + int tl; + int tm; + int tn; + int to; + int tp; + int tq; + int tr; + int ts; + int tt; + int tu; + int tv; + int tw; + int tx; + int ty; + int tz; + int tA; + int tB; + int tC; + int tD; + int tE; + int tF; + int tG; + int tH; + int tI; + int tJ; + int tK; + int tL; + int tM; + int tN; + int tO; + int tP; + int tQ; + int tR; + int tS; + int tT; + int tU; + int tV; + int tW; + int tX; + int tY; + int tZ; + int t_; + int t; + t0 = y[0]; + tw = y[1]; + tg = y[2]; + tM = y[3]; + t8 = y[4]; + tE = y[5]; + to = y[6]; + tU = y[7]; + t4 = y[8]; + tA = y[9]; + tk = y[10]; + tQ = y[11]; + tc = y[12]; + tI = y[13]; + ts = y[14]; + tY = y[15]; + t2 = y[16]; + ty = y[17]; + ti = y[18]; + tO = y[19]; + ta = y[20]; + tG = y[21]; + tq = y[22]; + tW = y[23]; + t6 = y[24]; + tC = y[25]; + tm = y[26]; + tS = y[27]; + te = y[28]; + tK = y[29]; + tu = y[30]; + t_ = y[31]; + t1 = y[32]; + tx = y[33]; + th = y[34]; + tN = y[35]; + t9 = y[36]; + tF = y[37]; + tp = y[38]; + tV = y[39]; + t5 = y[40]; + tB = y[41]; + tl = y[42]; + tR = y[43]; + td = y[44]; + tJ = y[45]; + tt = y[46]; + tZ = y[47]; + t3 = y[48]; + tz = y[49]; + tj = y[50]; + tP = y[51]; + tb = y[52]; + tH = y[53]; + tr = y[54]; + tX = y[55]; + t7 = y[56]; + tD = y[57]; + tn = y[58]; + tT = y[59]; + tf = y[60]; + tL = y[61]; + tv = y[62]; + t = y[63]; + OD_IDCT_64(t0, tw, tg, tM, t8, tE, to, tU, t4, tA, tk, tQ, tc, tI, ts, tY, + t2, ty, ti, tO, ta, tG, tq, tW, t6, tC, tm, tS, te, tK, tu, t_, t1, tx, + th, tN, t9, tF, tp, tV, t5, tB, tl, tR, td, tJ, tt, tZ, t3, tz, tj, tP, + tb, tH, tr, tX, t7, tD, tn, tT, tf, tL, tv, t); + x[0*xstride] = (od_coeff)t0; + x[1*xstride] = (od_coeff)t1; + x[2*xstride] = (od_coeff)t2; + x[3*xstride] = (od_coeff)t3; + x[4*xstride] = (od_coeff)t4; + x[5*xstride] = (od_coeff)t5; + x[6*xstride] = (od_coeff)t6; + x[7*xstride] = (od_coeff)t7; + x[8*xstride] = (od_coeff)t8; + x[9*xstride] = (od_coeff)t9; + x[10*xstride] = (od_coeff)ta; + x[11*xstride] = (od_coeff)tb; + x[12*xstride] = (od_coeff)tc; + x[13*xstride] = (od_coeff)td; + x[14*xstride] = (od_coeff)te; + x[15*xstride] = (od_coeff)tf; + x[16*xstride] = (od_coeff)tg; + x[17*xstride] = (od_coeff)th; + x[18*xstride] = (od_coeff)ti; + x[19*xstride] = (od_coeff)tj; + x[20*xstride] = (od_coeff)tk; + x[21*xstride] = (od_coeff)tl; + x[22*xstride] = (od_coeff)tm; + x[23*xstride] = (od_coeff)tn; + x[24*xstride] = (od_coeff)to; + x[25*xstride] = (od_coeff)tp; + x[26*xstride] = (od_coeff)tq; + x[27*xstride] = (od_coeff)tr; + x[28*xstride] = (od_coeff)ts; + x[29*xstride] = (od_coeff)tt; + x[30*xstride] = (od_coeff)tu; + x[31*xstride] = (od_coeff)tv; + x[32*xstride] = (od_coeff)tw; + x[33*xstride] = (od_coeff)tx; + x[34*xstride] = (od_coeff)ty; + x[35*xstride] = (od_coeff)tz; + x[36*xstride] = (od_coeff)tA; + x[37*xstride] = (od_coeff)tB; + x[38*xstride] = (od_coeff)tC; + x[39*xstride] = (od_coeff)tD; + x[40*xstride] = (od_coeff)tE; + x[41*xstride] = (od_coeff)tF; + x[41*xstride] = (od_coeff)tF; + x[42*xstride] = (od_coeff)tG; + x[43*xstride] = (od_coeff)tH; + x[44*xstride] = (od_coeff)tI; + x[45*xstride] = (od_coeff)tJ; + x[46*xstride] = (od_coeff)tK; + x[47*xstride] = (od_coeff)tL; + x[48*xstride] = (od_coeff)tM; + x[49*xstride] = (od_coeff)tN; + x[50*xstride] = (od_coeff)tO; + x[51*xstride] = (od_coeff)tP; + x[52*xstride] = (od_coeff)tQ; + x[53*xstride] = (od_coeff)tR; + x[54*xstride] = (od_coeff)tS; + x[55*xstride] = (od_coeff)tT; + x[56*xstride] = (od_coeff)tU; + x[57*xstride] = (od_coeff)tV; + x[58*xstride] = (od_coeff)tW; + x[59*xstride] = (od_coeff)tX; + x[60*xstride] = (od_coeff)tY; + x[61*xstride] = (od_coeff)tZ; + x[62*xstride] = (od_coeff)t_; + x[63*xstride] = (od_coeff)t; +} +#endif + +void daala_fdct4(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[4]; + od_coeff y[4]; + for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct4(y, x, 1); + for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idct4(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[4]; + od_coeff y[4]; + for (i = 0; i < 4; i++) y[i] = input[i]; + od_bin_idct4(x, 1, y); + for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i]; +} + +void daala_fdst4(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[4]; + od_coeff y[4]; + for (i = 0; i < 4; i++) x[i] = (od_coeff)input[i]; + od_bin_fdst4(y, x, 1); + for (i = 0; i < 4; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idst4(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[4]; + od_coeff y[4]; + for (i = 0; i < 4; i++) y[i] = input[i]; + od_bin_idst4(x, 1, y); + for (i = 0; i < 4; i++) output[i] = (tran_low_t)x[i]; +} + +void daala_idtx4(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 4; i++) output[i] = input[i]; +} + +void daala_fdct8(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[8]; + od_coeff y[8]; + for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct8(y, x, 1); + for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idct8(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[8]; + od_coeff y[8]; + for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i]; + od_bin_idct8(x, 1, y); + for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i]; +} + +void daala_fdst8(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[8]; + od_coeff y[8]; + for (i = 0; i < 8; i++) x[i] = (od_coeff)input[i]; + od_bin_fdst8(y, x, 1); + for (i = 0; i < 8; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idst8(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[8]; + od_coeff y[8]; + for (i = 0; i < 8; i++) y[i] = (od_coeff)input[i]; + od_bin_idst8(x, 1, y); + for (i = 0; i < 8; i++) output[i] = (tran_low_t)x[i]; +} + +void daala_idtx8(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 8; i++) output[i] = input[i]; +} + +void daala_fdct16(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[16]; + od_coeff y[16]; + for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct16(y, x, 1); + for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idct16(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[16]; + od_coeff y[16]; + for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i]; + od_bin_idct16(x, 1, y); + for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i]; +} + +void daala_fdst16(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[16]; + od_coeff y[16]; + for (i = 0; i < 16; i++) x[i] = (od_coeff)input[i]; + od_bin_fdst16(y, x, 1); + for (i = 0; i < 16; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idst16(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[16]; + od_coeff y[16]; + for (i = 0; i < 16; i++) y[i] = (od_coeff)input[i]; + od_bin_idst16(x, 1, y); + for (i = 0; i < 16; i++) output[i] = (tran_low_t)x[i]; +} + +void daala_idtx16(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 16; i++) output[i] = input[i]; +} + +void daala_fdct32(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[32]; + od_coeff y[32]; + for (i = 0; i < 32; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct32(y, x, 1); + for (i = 0; i < 32; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idct32(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[32]; + od_coeff y[32]; + for (i = 0; i < 32; i++) y[i] = (od_coeff)input[i]; + od_bin_idct32(x, 1, y); + for (i = 0; i < 32; i++) output[i] = (tran_low_t)x[i]; +} + +/* Preserve the "half-right" transform behavior. */ +void daala_fdst32(const tran_low_t *input, tran_low_t *output) { + int i; + tran_low_t inputhalf[16]; + for (i = 0; i < 16; ++i) { + output[16 + i] = input[i]; + } + for (i = 0; i < 16; ++i) { + inputhalf[i] = input[i + 16]; + } + daala_fdct16(inputhalf, output); +} + +/* Preserve the "half-right" transform behavior. */ +void daala_idst32(const tran_low_t *input, tran_low_t *output) { + int i; + tran_low_t inputhalf[16]; + for (i = 0; i < 16; ++i) { + inputhalf[i] = input[i]; + } + for (i = 0; i < 16; ++i) { + output[i] = input[16 + i]; + } + daala_idct16(inputhalf, output + 16); +} + +void daala_idtx32(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 32; i++) output[i] = input[i]; +} + +#if CONFIG_TX64X64 +void daala_fdct64(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[64]; + od_coeff y[64]; + for (i = 0; i < 64; i++) x[i] = (od_coeff)input[i]; + od_bin_fdct64(y, x, 1); + for (i = 0; i < 64; i++) output[i] = (tran_low_t)y[i]; +} + +void daala_idct64(const tran_low_t *input, tran_low_t *output) { + int i; + od_coeff x[64]; + od_coeff y[64]; + for (i = 0; i < 64; i++) y[i] = (od_coeff)input[i]; + od_bin_idct64(x, 1, y); + for (i = 0; i < 64; i++) output[i] = (tran_low_t)x[i]; +} + +/* Preserve the "half-right" transform behavior. */ +void daala_fdst64(const tran_low_t *input, tran_low_t *output) { + int i; + tran_low_t inputhalf[32]; + for (i = 0; i < 32; ++i) { + output[32 + i] = input[i]; + } + for (i = 0; i < 32; ++i) { + inputhalf[i] = input[i + 32]; + } + daala_fdct32(inputhalf, output); +} + +/* Preserve the "half-right" transform behavior. */ +void daala_idst64(const tran_low_t *input, tran_low_t *output) { + int i; + tran_low_t inputhalf[32]; + for (i = 0; i < 32; ++i) { + inputhalf[i] = input[i]; + } + for (i = 0; i < 32; ++i) { + output[i] = input[32 + i]; + } + daala_idct32(inputhalf, output + 32); +} + +void daala_idtx64(const tran_low_t *input, tran_low_t *output) { + int i; + for (i = 0; i < 64; i++) output[i] = input[i]; +} +#endif |