diff options
Diffstat (limited to 'third_party/aom/test/simd_cmp_impl.h')
-rw-r--r-- | third_party/aom/test/simd_cmp_impl.h | 522 |
1 files changed, 521 insertions, 1 deletions
diff --git a/third_party/aom/test/simd_cmp_impl.h b/third_party/aom/test/simd_cmp_impl.h index 28bd64a5b..4a9c1f7be 100644 --- a/third_party/aom/test/simd_cmp_impl.h +++ b/third_party/aom/test/simd_cmp_impl.h @@ -16,7 +16,7 @@ #include "aom_dsp/aom_simd.h" #undef SIMD_INLINE #define SIMD_INLINE static // Don't enforce inlining -#include "aom_dsp/simd/v128_intrinsics_c.h" +#include "aom_dsp/simd/v256_intrinsics_c.h" // Machine tuned code goes into this file. This file is included from // simd_cmp_sse2.cc, simd_cmp_ssse3.cc etc which define the macros @@ -224,6 +224,104 @@ c_v128 c_imm_v128_align(c_v128 a, c_v128 b) { return c_v128_align(a, b, shift); } +template <int shift> +v256 imm_v256_shl_n_byte(v256 a) { + return v256_shl_n_byte(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_byte(v256 a) { + return v256_shr_n_byte(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_8(v256 a) { + return v256_shl_n_8(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u8(v256 a) { + return v256_shr_n_u8(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s8(v256 a) { + return v256_shr_n_s8(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_16(v256 a) { + return v256_shl_n_16(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u16(v256 a) { + return v256_shr_n_u16(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s16(v256 a) { + return v256_shr_n_s16(a, shift); +} +template <int shift> +v256 imm_v256_shl_n_32(v256 a) { + return v256_shl_n_32(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_u32(v256 a) { + return v256_shr_n_u32(a, shift); +} +template <int shift> +v256 imm_v256_shr_n_s32(v256 a) { + return v256_shr_n_s32(a, shift); +} +template <int shift> +v256 imm_v256_align(v256 a, v256 b) { + return v256_align(a, b, shift); +} + +template <int shift> +c_v256 c_imm_v256_shl_n_byte(c_v256 a) { + return c_v256_shl_n_byte(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_byte(c_v256 a) { + return c_v256_shr_n_byte(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_8(c_v256 a) { + return c_v256_shl_n_8(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u8(c_v256 a) { + return c_v256_shr_n_u8(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s8(c_v256 a) { + return c_v256_shr_n_s8(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_16(c_v256 a) { + return c_v256_shl_n_16(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u16(c_v256 a) { + return c_v256_shr_n_u16(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s16(c_v256 a) { + return c_v256_shr_n_s16(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shl_n_32(c_v256 a) { + return c_v256_shl_n_32(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_u32(c_v256 a) { + return c_v256_shr_n_u32(a, shift); +} +template <int shift> +c_v256 c_imm_v256_shr_n_s32(c_v256 a) { + return c_v256_shr_n_s32(a, shift); +} +template <int shift> +c_v256 c_imm_v256_align(c_v256 a, c_v256 b) { + return c_v256_align(a, b, shift); +} + // Wrappers around the the SAD and SSD functions uint32_t v64_sad_u8(v64 a, v64 b) { return v64_sad_u8_sum(::v64_sad_u8(v64_sad_u8_init(), a, b)); @@ -250,6 +348,18 @@ uint32_t c_v128_sad_u8(c_v128 a, c_v128 b) { uint32_t c_v128_ssd_u8(c_v128 a, c_v128 b) { return c_v128_ssd_u8_sum(::c_v128_ssd_u8(c_v128_ssd_u8_init(), a, b)); } +uint32_t v256_sad_u8(v256 a, v256 b) { + return v256_sad_u8_sum(::v256_sad_u8(v256_sad_u8_init(), a, b)); +} +uint32_t v256_ssd_u8(v256 a, v256 b) { + return v256_ssd_u8_sum(::v256_ssd_u8(v256_ssd_u8_init(), a, b)); +} +uint32_t c_v256_sad_u8(c_v256 a, c_v256 b) { + return c_v256_sad_u8_sum(::c_v256_sad_u8(c_v256_sad_u8_init(), a, b)); +} +uint32_t c_v256_ssd_u8(c_v256 a, c_v256 b) { + return c_v256_ssd_u8_sum(::c_v256_ssd_u8(c_v256_ssd_u8_init(), a, b)); +} namespace { @@ -657,6 +767,265 @@ const mapping m[] = { MAP(v64_sad_u8), MAP(v64_store_unaligned), MAP(v128_load_unaligned), MAP(v128_store_unaligned), + MAP(v256_sad_u8), + MAP(v256_ssd_u8), + MAP(v256_hadd_u8), + MAP(v256_dotp_s16), + MAP(v256_add_8), + MAP(v256_add_16), + MAP(v256_sadd_s16), + MAP(v256_add_32), + MAP(v256_sub_8), + MAP(v256_ssub_u8), + MAP(v256_ssub_s8), + MAP(v256_sub_16), + MAP(v256_ssub_u16), + MAP(v256_ssub_s16), + MAP(v256_sub_32), + MAP(v256_ziplo_8), + MAP(v256_ziphi_8), + MAP(v256_ziplo_16), + MAP(v256_ziphi_16), + MAP(v256_ziplo_32), + MAP(v256_ziphi_32), + MAP(v256_ziplo_64), + MAP(v256_ziphi_64), + MAP(v256_unziphi_8), + MAP(v256_unziplo_8), + MAP(v256_unziphi_16), + MAP(v256_unziplo_16), + MAP(v256_unziphi_32), + MAP(v256_unziplo_32), + MAP(v256_pack_s32_s16), + MAP(v256_pack_s16_u8), + MAP(v256_pack_s16_s8), + MAP(v256_or), + MAP(v256_xor), + MAP(v256_and), + MAP(v256_andn), + MAP(v256_mullo_s16), + MAP(v256_mulhi_s16), + MAP(v256_mullo_s32), + MAP(v256_madd_s16), + MAP(v256_madd_us8), + MAP(v256_avg_u8), + MAP(v256_rdavg_u8), + MAP(v256_avg_u16), + MAP(v256_min_u8), + MAP(v256_max_u8), + MAP(v256_min_s8), + MAP(v256_max_s8), + MAP(v256_min_s16), + MAP(v256_max_s16), + MAP(v256_cmpgt_s8), + MAP(v256_cmplt_s8), + MAP(v256_cmpeq_8), + MAP(v256_cmpgt_s16), + MAP(v256_cmplt_s16), + MAP(v256_cmpeq_16), + MAP(v256_shuffle_8), + MAP(v256_pshuffle_8), + MAP(imm_v256_align<1>), + MAP(imm_v256_align<2>), + MAP(imm_v256_align<3>), + MAP(imm_v256_align<4>), + MAP(imm_v256_align<5>), + MAP(imm_v256_align<6>), + MAP(imm_v256_align<7>), + MAP(imm_v256_align<8>), + MAP(imm_v256_align<9>), + MAP(imm_v256_align<10>), + MAP(imm_v256_align<11>), + MAP(imm_v256_align<12>), + MAP(imm_v256_align<13>), + MAP(imm_v256_align<14>), + MAP(imm_v256_align<15>), + MAP(imm_v256_align<16>), + MAP(imm_v256_align<17>), + MAP(imm_v256_align<18>), + MAP(imm_v256_align<19>), + MAP(imm_v256_align<20>), + MAP(imm_v256_align<21>), + MAP(imm_v256_align<22>), + MAP(imm_v256_align<23>), + MAP(imm_v256_align<24>), + MAP(imm_v256_align<25>), + MAP(imm_v256_align<26>), + MAP(imm_v256_align<27>), + MAP(imm_v256_align<28>), + MAP(imm_v256_align<29>), + MAP(imm_v256_align<30>), + MAP(imm_v256_align<31>), + MAP(v256_from_v128), + MAP(v256_zip_8), + MAP(v256_zip_16), + MAP(v256_zip_32), + MAP(v256_mul_s16), + MAP(v256_unpack_u8_s16), + MAP(v256_unpack_s8_s16), + MAP(v256_unpack_u16_s32), + MAP(v256_unpack_s16_s32), + MAP(v256_shl_8), + MAP(v256_shr_u8), + MAP(v256_shr_s8), + MAP(v256_shl_16), + MAP(v256_shr_u16), + MAP(v256_shr_s16), + MAP(v256_shl_32), + MAP(v256_shr_u32), + MAP(v256_shr_s32), + MAP(v256_abs_s8), + MAP(v256_abs_s16), + MAP(v256_padd_s16), + MAP(v256_unpacklo_u16_s32), + MAP(v256_unpacklo_s16_s32), + MAP(v256_unpackhi_u16_s32), + MAP(v256_unpackhi_s16_s32), + MAP(imm_v256_shr_n_byte<1>), + MAP(imm_v256_shr_n_byte<2>), + MAP(imm_v256_shr_n_byte<3>), + MAP(imm_v256_shr_n_byte<4>), + MAP(imm_v256_shr_n_byte<5>), + MAP(imm_v256_shr_n_byte<6>), + MAP(imm_v256_shr_n_byte<7>), + MAP(imm_v256_shr_n_byte<8>), + MAP(imm_v256_shr_n_byte<9>), + MAP(imm_v256_shr_n_byte<10>), + MAP(imm_v256_shr_n_byte<11>), + MAP(imm_v256_shr_n_byte<12>), + MAP(imm_v256_shr_n_byte<13>), + MAP(imm_v256_shr_n_byte<14>), + MAP(imm_v256_shr_n_byte<15>), + MAP(imm_v256_shr_n_byte<16>), + MAP(imm_v256_shr_n_byte<17>), + MAP(imm_v256_shr_n_byte<18>), + MAP(imm_v256_shr_n_byte<19>), + MAP(imm_v256_shr_n_byte<20>), + MAP(imm_v256_shr_n_byte<21>), + MAP(imm_v256_shr_n_byte<22>), + MAP(imm_v256_shr_n_byte<23>), + MAP(imm_v256_shr_n_byte<24>), + MAP(imm_v256_shr_n_byte<25>), + MAP(imm_v256_shr_n_byte<26>), + MAP(imm_v256_shr_n_byte<27>), + MAP(imm_v256_shr_n_byte<28>), + MAP(imm_v256_shr_n_byte<29>), + MAP(imm_v256_shr_n_byte<30>), + MAP(imm_v256_shr_n_byte<31>), + MAP(imm_v256_shl_n_byte<1>), + MAP(imm_v256_shl_n_byte<2>), + MAP(imm_v256_shl_n_byte<3>), + MAP(imm_v256_shl_n_byte<4>), + MAP(imm_v256_shl_n_byte<5>), + MAP(imm_v256_shl_n_byte<6>), + MAP(imm_v256_shl_n_byte<7>), + MAP(imm_v256_shl_n_byte<8>), + MAP(imm_v256_shl_n_byte<9>), + MAP(imm_v256_shl_n_byte<10>), + MAP(imm_v256_shl_n_byte<11>), + MAP(imm_v256_shl_n_byte<12>), + MAP(imm_v256_shl_n_byte<13>), + MAP(imm_v256_shl_n_byte<14>), + MAP(imm_v256_shl_n_byte<15>), + MAP(imm_v256_shl_n_byte<16>), + MAP(imm_v256_shl_n_byte<17>), + MAP(imm_v256_shl_n_byte<18>), + MAP(imm_v256_shl_n_byte<19>), + MAP(imm_v256_shl_n_byte<20>), + MAP(imm_v256_shl_n_byte<21>), + MAP(imm_v256_shl_n_byte<22>), + MAP(imm_v256_shl_n_byte<23>), + MAP(imm_v256_shl_n_byte<24>), + MAP(imm_v256_shl_n_byte<25>), + MAP(imm_v256_shl_n_byte<26>), + MAP(imm_v256_shl_n_byte<27>), + MAP(imm_v256_shl_n_byte<28>), + MAP(imm_v256_shl_n_byte<29>), + MAP(imm_v256_shl_n_byte<30>), + MAP(imm_v256_shl_n_byte<31>), + MAP(imm_v256_shl_n_8<1>), + MAP(imm_v256_shl_n_8<2>), + MAP(imm_v256_shl_n_8<3>), + MAP(imm_v256_shl_n_8<4>), + MAP(imm_v256_shl_n_8<5>), + MAP(imm_v256_shl_n_8<6>), + MAP(imm_v256_shl_n_8<7>), + MAP(imm_v256_shr_n_u8<1>), + MAP(imm_v256_shr_n_u8<2>), + MAP(imm_v256_shr_n_u8<3>), + MAP(imm_v256_shr_n_u8<4>), + MAP(imm_v256_shr_n_u8<5>), + MAP(imm_v256_shr_n_u8<6>), + MAP(imm_v256_shr_n_u8<7>), + MAP(imm_v256_shr_n_s8<1>), + MAP(imm_v256_shr_n_s8<2>), + MAP(imm_v256_shr_n_s8<3>), + MAP(imm_v256_shr_n_s8<4>), + MAP(imm_v256_shr_n_s8<5>), + MAP(imm_v256_shr_n_s8<6>), + MAP(imm_v256_shr_n_s8<7>), + MAP(imm_v256_shl_n_16<1>), + MAP(imm_v256_shl_n_16<2>), + MAP(imm_v256_shl_n_16<4>), + MAP(imm_v256_shl_n_16<6>), + MAP(imm_v256_shl_n_16<8>), + MAP(imm_v256_shl_n_16<10>), + MAP(imm_v256_shl_n_16<12>), + MAP(imm_v256_shl_n_16<14>), + MAP(imm_v256_shr_n_u16<1>), + MAP(imm_v256_shr_n_u16<2>), + MAP(imm_v256_shr_n_u16<4>), + MAP(imm_v256_shr_n_u16<6>), + MAP(imm_v256_shr_n_u16<8>), + MAP(imm_v256_shr_n_u16<10>), + MAP(imm_v256_shr_n_u16<12>), + MAP(imm_v256_shr_n_u16<14>), + MAP(imm_v256_shr_n_s16<1>), + MAP(imm_v256_shr_n_s16<2>), + MAP(imm_v256_shr_n_s16<4>), + MAP(imm_v256_shr_n_s16<6>), + MAP(imm_v256_shr_n_s16<8>), + MAP(imm_v256_shr_n_s16<10>), + MAP(imm_v256_shr_n_s16<12>), + MAP(imm_v256_shr_n_s16<14>), + MAP(imm_v256_shl_n_32<1>), + MAP(imm_v256_shl_n_32<4>), + MAP(imm_v256_shl_n_32<8>), + MAP(imm_v256_shl_n_32<12>), + MAP(imm_v256_shl_n_32<16>), + MAP(imm_v256_shl_n_32<20>), + MAP(imm_v256_shl_n_32<24>), + MAP(imm_v256_shl_n_32<28>), + MAP(imm_v256_shr_n_u32<1>), + MAP(imm_v256_shr_n_u32<4>), + MAP(imm_v256_shr_n_u32<8>), + MAP(imm_v256_shr_n_u32<12>), + MAP(imm_v256_shr_n_u32<16>), + MAP(imm_v256_shr_n_u32<20>), + MAP(imm_v256_shr_n_u32<24>), + MAP(imm_v256_shr_n_u32<28>), + MAP(imm_v256_shr_n_s32<1>), + MAP(imm_v256_shr_n_s32<4>), + MAP(imm_v256_shr_n_s32<8>), + MAP(imm_v256_shr_n_s32<12>), + MAP(imm_v256_shr_n_s32<16>), + MAP(imm_v256_shr_n_s32<20>), + MAP(imm_v256_shr_n_s32<24>), + MAP(imm_v256_shr_n_s32<28>), + MAP(v256_zero), + MAP(v256_dup_8), + MAP(v256_dup_16), + MAP(v256_dup_32), + MAP(v256_low_u32), + MAP(v256_low_v64), + MAP(v256_from_64), + MAP(v256_from_v64), + MAP(v256_ziplo_128), + MAP(v256_ziphi_128), + MAP(v256_unpacklo_u8_s16), + MAP(v256_unpackhi_u8_s16), + MAP(v256_unpacklo_s8_s16), + MAP(v256_unpackhi_s8_s16), { NULL, NULL, NULL } }; #undef MAP @@ -922,6 +1291,14 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth, reinterpret_cast<fptr>(v128_load_aligned), simd, d, reinterpret_cast<fptr>(c_u64_store_aligned), reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint64_t) && + typeid(CArg) == typeid(c_v256)) { + // U64_V256 + error = CompareSimd1Arg<uint64_t, v256, CRet, CArg>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); } else if (typeid(CRet) == typeid(c_v64) && typeid(CArg) == typeid(c_v128)) { // V64_V128 @@ -970,6 +1347,62 @@ void TestSimd1Arg(uint32_t iterations, uint32_t mask, uint32_t maskwidth, reinterpret_cast<fptr>(u32_load_aligned), simd, d, reinterpret_cast<fptr>(c_v128_store_aligned), reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(c_v256)) { + // V256_V256 + error = CompareSimd1Arg<v256, v256, CRet, CArg>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(c_v128)) { + // V256_V128 + error = CompareSimd1Arg<v256, v128, CRet, CArg>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint8_t)) { + // V256_U8 + error = CompareSimd1Arg<v256, uint8_t, CRet, CArg>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u8_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u8_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint16_t)) { + // V256_U16 + error = CompareSimd1Arg<v256, uint16_t, CRet, CArg>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u16_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u16_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg) == typeid(uint32_t)) { + // V256_U32 + error = CompareSimd1Arg<v256, uint32_t, CRet, CArg>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg) == typeid(c_v256)) { + // U32_V256 + error = CompareSimd1Arg<uint32_t, v256, CRet, CArg>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); + } else if (typeid(CRet) == typeid(c_v64) && + typeid(CArg) == typeid(c_v256)) { + // V64_V256 + error = CompareSimd1Arg<v64, v256, CRet, CArg>( + reinterpret_cast<fptr>(v64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), ref_simd, ref_d, s); } else { FAIL() << "Internal error: Unknown intrinsic function " << typeid(CRet).name() << " " << name << "(" << typeid(CArg).name() @@ -1140,6 +1573,67 @@ void TestSimd2Args(uint32_t iterations, uint32_t mask, uint32_t maskwidth, reinterpret_cast<fptr>(c_v128_load_aligned), reinterpret_cast<fptr>(c_u32_load_aligned), reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // V256_V256V256 + error = CompareSimd2Args<v256, v256, v256, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(int64_t) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // S64_V256V256 + error = CompareSimd2Args<int64_t, v256, v256, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(u64_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u64_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(uint32_t) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(c_v256)) { + // U32_V256V256 + error = CompareSimd2Args<uint32_t, v256, v256, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(u32_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(v256_load_aligned), simd, d, + reinterpret_cast<fptr>(c_u32_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v128) && + typeid(CArg2) == typeid(c_v128)) { + // V256_V128V128 + error = CompareSimd2Args<v256, v128, v128, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v128_load_aligned), + reinterpret_cast<fptr>(v128_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(c_v128_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else if (typeid(CRet) == typeid(c_v256) && + typeid(CArg1) == typeid(c_v256) && + typeid(CArg2) == typeid(uint32_t)) { + // V256_V256U32 + error = CompareSimd2Args<v256, v256, uint32_t, CRet, CArg1, CArg2>( + reinterpret_cast<fptr>(v256_store_aligned), + reinterpret_cast<fptr>(v256_load_aligned), + reinterpret_cast<fptr>(u32_load_aligned), simd, d, + reinterpret_cast<fptr>(c_v256_store_aligned), + reinterpret_cast<fptr>(c_v256_load_aligned), + reinterpret_cast<fptr>(c_u32_load_aligned), + reinterpret_cast<fptr>(ref_simd), ref_d, s1, s2); + } else { FAIL() << "Internal error: Unknown intrinsic function " << typeid(CRet).name() << " " << name << "(" @@ -1208,5 +1702,31 @@ template void TestSimd2Args<int64_t, c_v128, c_v128>(uint32_t, uint32_t, uint32_t, const char *); template void TestSimd2Args<uint32_t, c_v128, c_v128>(uint32_t, uint32_t, uint32_t, const char *); +template void TestSimd1Arg<c_v256, c_v128>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint64_t, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint8_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint16_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v256, uint32_t>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<uint32_t, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd1Arg<c_v64, c_v256>(uint32_t, uint32_t, uint32_t, + const char *); +template void TestSimd2Args<c_v256, c_v128, c_v128>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v256, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<c_v256, c_v256, uint32_t>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<int64_t, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); +template void TestSimd2Args<uint32_t, c_v256, c_v256>(uint32_t, uint32_t, + uint32_t, const char *); } // namespace SIMD_NAMESPACE |