/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent. */ #ifndef AOM_DSP_X86_TXFM_COMMON_AVX2_H #define AOM_DSP_X86_TXFM_COMMON_AVX2_H #include #include "aom_dsp/txfm_common.h" #include "aom_dsp/x86/common_avx2.h" #define pair256_set_epi16(a, b) \ _mm256_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) #define pair256_set_epi32(a, b) \ _mm256_set_epi32((int)(b), (int)(a), (int)(b), (int)(a), (int)(b), (int)(a), \ (int)(b), (int)(a)) static INLINE void mm256_reverse_epi16(__m256i *u) { const __m256i control = _mm256_set_epi16( 0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0B0A, 0x0D0C, 0x0F0E, 0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0B0A, 0x0D0C, 0x0F0E); __m256i v = _mm256_shuffle_epi8(*u, control); *u = _mm256_permute2x128_si256(v, v, 1); } static INLINE __m256i butter_fly(const __m256i *a0, const __m256i *a1, const __m256i *cospi) { const __m256i dct_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING); __m256i y0 = _mm256_madd_epi16(*a0, *cospi); __m256i y1 = _mm256_madd_epi16(*a1, *cospi); y0 = _mm256_add_epi32(y0, dct_rounding); y1 = _mm256_add_epi32(y1, dct_rounding); y0 = _mm256_srai_epi32(y0, DCT_CONST_BITS); y1 = _mm256_srai_epi32(y1, DCT_CONST_BITS); return _mm256_packs_epi32(y0, y1); } static INLINE void txfm_scaling16_avx2(const int16_t c, __m256i *in) { const __m256i zero = _mm256_setzero_si256(); const __m256i sqrt2_epi16 = _mm256_set1_epi16(c); const __m256i dct_const_rounding = _mm256_set1_epi32(DCT_CONST_ROUNDING); __m256i u0, u1; int i = 0; while (i < 16) { in[i] = _mm256_slli_epi16(in[i], 1); u0 = _mm256_unpacklo_epi16(zero, in[i]); u1 = _mm256_unpackhi_epi16(zero, in[i]); u0 = _mm256_madd_epi16(u0, sqrt2_epi16); u1 = _mm256_madd_epi16(u1, sqrt2_epi16); u0 = _mm256_add_epi32(u0, dct_const_rounding); u1 = _mm256_add_epi32(u1, dct_const_rounding); u0 = _mm256_srai_epi32(u0, DCT_CONST_BITS); u1 = _mm256_srai_epi32(u1, DCT_CONST_BITS); in[i] = _mm256_packs_epi32(u0, u1); i++; } } #endif // AOM_DSP_X86_TXFM_COMMON_AVX2_H