summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
diff options
context:
space:
mode:
authortrav90 <travawine@palemoon.org>2018-10-19 21:52:15 -0500
committertrav90 <travawine@palemoon.org>2018-10-19 21:52:20 -0500
commitbbcc64772580c8a979288791afa02d30bc476d2e (patch)
tree437ce94c3fdd7497508e5b55de06c6d011678597 /third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
parent14805f6ddbfb173c327768fff9f81f40ce5e81b0 (diff)
downloadUXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.gz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.lz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.tar.xz
UXP-bbcc64772580c8a979288791afa02d30bc476d2e.zip
Update aom to v1.0.0
Update aom to commit id d14c5bb4f336ef1842046089849dee4a301fbbf0.
Diffstat (limited to 'third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h')
-rw-r--r--third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h41
1 files changed, 31 insertions, 10 deletions
diff --git a/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h b/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
index 8dcc9f6fc..130052ee1 100644
--- a/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
+++ b/third_party/aom/aom_dsp/simd/v64_intrinsics_x86.h
@@ -90,8 +90,7 @@ SIMD_INLINE void v64_store_unaligned(void *p, v64 a) {
_mm_storel_epi64((__m128i *)p, a);
}
-// The following function requires an immediate.
-#if defined(__OPTIMIZE__) && __OPTIMIZE__
+#if defined(__OPTIMIZE__) && __OPTIMIZE__ && !defined(__clang__)
#define v64_align(a, b, c) \
((c) ? _mm_srli_si128(_mm_unpacklo_epi64(b, a), (c)) : b)
#else
@@ -112,6 +111,10 @@ SIMD_INLINE v64 v64_add_8(v64 a, v64 b) { return _mm_add_epi8(a, b); }
SIMD_INLINE v64 v64_add_16(v64 a, v64 b) { return _mm_add_epi16(a, b); }
+SIMD_INLINE v64 v64_sadd_u8(v64 a, v64 b) { return _mm_adds_epu8(a, b); }
+
+SIMD_INLINE v64 v64_sadd_s8(v64 a, v64 b) { return _mm_adds_epi8(a, b); }
+
SIMD_INLINE v64 v64_sadd_s16(v64 a, v64 b) { return _mm_adds_epi16(a, b); }
SIMD_INLINE v64 v64_add_32(v64 a, v64 b) { return _mm_add_epi32(a, b); }
@@ -170,6 +173,22 @@ SIMD_INLINE v64 v64_pack_s32_s16(v64 a, v64 b) {
return _mm_packs_epi32(t, t);
}
+SIMD_INLINE v64 v64_pack_s32_u16(v64 a, v64 b) {
+#if defined(__SSE4_1__)
+ __m128i t = _mm_unpacklo_epi64(b, a);
+ return _mm_packus_epi32(t, t);
+#else
+ int32_t ah = v64_high_u32(a);
+ int32_t al = v64_low_u32(a);
+ int32_t bh = v64_high_u32(b);
+ int32_t bl = v64_low_u32(b);
+ return v64_from_16(ah > 65535 ? 65535 : ah < 0 ? 0 : ah,
+ al > 65535 ? 65535 : al < 0 ? 0 : al,
+ bh > 65535 ? 65535 : bh < 0 ? 0 : bh,
+ bl > 65535 ? 65535 : bl < 0 ? 0 : bl);
+#endif
+}
+
SIMD_INLINE v64 v64_pack_s16_u8(v64 a, v64 b) {
__m128i t = _mm_unpacklo_epi64(b, a);
return _mm_packus_epi16(t, t);
@@ -272,14 +291,11 @@ SIMD_INLINE v64 v64_shuffle_8(v64 x, v64 pattern) {
}
SIMD_INLINE int64_t v64_dotp_su8(v64 a, v64 b) {
- __m128i r, r1, r2, z;
- z = _mm_setzero_si128();
- r1 = _mm_madd_epi16(_mm_slli_epi16(_mm_unpacklo_epi8(a, z), 8),
- _mm_unpacklo_epi8(b, z));
- r2 = _mm_srli_si128(r1, 8);
- r = _mm_add_epi32(r1, r2);
- r = _mm_add_epi32(r, _mm_srli_si128(r, 4));
- return ((int32_t)v64_low_u32(r)) >> 8;
+ __m128i t = _mm_madd_epi16(_mm_srai_epi16(_mm_unpacklo_epi8(a, a), 8),
+ _mm_unpacklo_epi8(b, _mm_setzero_si128()));
+ t = _mm_add_epi32(t, _mm_srli_si128(t, 8));
+ t = _mm_add_epi32(t, _mm_srli_si128(t, 4));
+ return (int32_t)v64_low_u32(t);
}
SIMD_INLINE int64_t v64_dotp_s16(v64 a, v64 b) {
@@ -371,6 +387,11 @@ SIMD_INLINE v64 v64_rdavg_u8(v64 a, v64 b) {
_mm_and_si128(_mm_xor_si128(a, b), v64_dup_8(1)));
}
+SIMD_INLINE v64 v64_rdavg_u16(v64 a, v64 b) {
+ return _mm_sub_epi16(_mm_avg_epu16(a, b),
+ _mm_and_si128(_mm_xor_si128(a, b), v64_dup_16(1)));
+}
+
SIMD_INLINE v64 v64_avg_u16(v64 a, v64 b) { return _mm_avg_epu16(a, b); }
SIMD_INLINE v64 v64_min_u8(v64 a, v64 b) { return _mm_min_epu8(a, b); }