summaryrefslogtreecommitdiffstats
path: root/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c')
-rw-r--r--third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c74
1 files changed, 8 insertions, 66 deletions
diff --git a/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c b/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
index efc8d1e24..c69614e42 100644
--- a/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
+++ b/third_party/aom/av1/common/x86/hybrid_inv_txfm_avx2.c
@@ -14,67 +14,9 @@
#include "./aom_config.h"
#include "./av1_rtcd.h"
-#include "aom_dsp/x86/txfm_common_avx2.h"
-
-static INLINE void load_coeff(const tran_low_t *coeff, __m256i *in) {
-#if CONFIG_HIGHBITDEPTH
- *in = _mm256_setr_epi16(
- (int16_t)coeff[0], (int16_t)coeff[1], (int16_t)coeff[2],
- (int16_t)coeff[3], (int16_t)coeff[4], (int16_t)coeff[5],
- (int16_t)coeff[6], (int16_t)coeff[7], (int16_t)coeff[8],
- (int16_t)coeff[9], (int16_t)coeff[10], (int16_t)coeff[11],
- (int16_t)coeff[12], (int16_t)coeff[13], (int16_t)coeff[14],
- (int16_t)coeff[15]);
-#else
- *in = _mm256_loadu_si256((const __m256i *)coeff);
-#endif
-}
-
-static void load_buffer_16x16(const tran_low_t *coeff, __m256i *in) {
- int i = 0;
- while (i < 16) {
- load_coeff(coeff + (i << 4), &in[i]);
- i += 1;
- }
-}
-
-static void recon_and_store(const __m256i *res, uint8_t *output) {
- const __m128i zero = _mm_setzero_si128();
- __m128i x = _mm_loadu_si128((__m128i const *)output);
- __m128i p0 = _mm_unpacklo_epi8(x, zero);
- __m128i p1 = _mm_unpackhi_epi8(x, zero);
-
- p0 = _mm_add_epi16(p0, _mm256_castsi256_si128(*res));
- p1 = _mm_add_epi16(p1, _mm256_extractf128_si256(*res, 1));
- x = _mm_packus_epi16(p0, p1);
- _mm_storeu_si128((__m128i *)output, x);
-}
-
-#define IDCT_ROUNDING_POS (6)
-
-static void write_buffer_16x16(__m256i *in, const int stride, uint8_t *output) {
- const __m256i rounding = _mm256_set1_epi16(1 << (IDCT_ROUNDING_POS - 1));
- int i = 0;
-
- while (i < 16) {
- in[i] = _mm256_add_epi16(in[i], rounding);
- in[i] = _mm256_srai_epi16(in[i], IDCT_ROUNDING_POS);
- recon_and_store(&in[i], output + i * stride);
- i += 1;
- }
-}
-
-static INLINE void unpack_butter_fly(const __m256i *a0, const __m256i *a1,
- const __m256i *c0, const __m256i *c1,
- __m256i *b0, __m256i *b1) {
- __m256i x0, x1;
- x0 = _mm256_unpacklo_epi16(*a0, *a1);
- x1 = _mm256_unpackhi_epi16(*a0, *a1);
- *b0 = butter_fly(x0, x1, *c0);
- *b1 = butter_fly(x0, x1, *c1);
-}
+#include "aom_dsp/x86/inv_txfm_common_avx2.h"
-static void idct16_avx2(__m256i *in) {
+void av1_idct16_avx2(__m256i *in) {
const __m256i cospi_p30_m02 = pair256_set_epi16(cospi_30_64, -cospi_2_64);
const __m256i cospi_p02_p30 = pair256_set_epi16(cospi_2_64, cospi_30_64);
const __m256i cospi_p14_m18 = pair256_set_epi16(cospi_14_64, -cospi_18_64);
@@ -216,8 +158,8 @@ static void idct16_avx2(__m256i *in) {
}
static void idct16(__m256i *in) {
- mm256_transpose_16x16(in);
- idct16_avx2(in);
+ mm256_transpose_16x16(in, in);
+ av1_idct16_avx2(in);
}
static INLINE void butterfly_32b(const __m256i *a0, const __m256i *a1,
@@ -398,7 +340,7 @@ static void iadst16_avx2(__m256i *in) {
}
static void iadst16(__m256i *in) {
- mm256_transpose_16x16(in);
+ mm256_transpose_16x16(in, in);
iadst16_avx2(in);
}
@@ -416,8 +358,8 @@ static void flip_col(uint8_t **dest, int *stride, int rows) {
}
static void iidtx16(__m256i *in) {
- mm256_transpose_16x16(in);
- txfm_scaling16_avx2(Sqrt2, in);
+ mm256_transpose_16x16(in, in);
+ txfm_scaling16_avx2((int16_t)Sqrt2, in);
}
#endif
@@ -503,5 +445,5 @@ void av1_iht16x16_256_add_avx2(const tran_low_t *input, uint8_t *dest,
#endif // CONFIG_EXT_TX
default: assert(0); break;
}
- write_buffer_16x16(in, stride, dest);
+ store_buffer_16xN(in, stride, dest, 16);
}