summaryrefslogtreecommitdiffstats
path: root/intl/uconv/nsUTF8ToUnicodeSSE2.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/uconv/nsUTF8ToUnicodeSSE2.cpp')
-rw-r--r--intl/uconv/nsUTF8ToUnicodeSSE2.cpp96
1 files changed, 96 insertions, 0 deletions
diff --git a/intl/uconv/nsUTF8ToUnicodeSSE2.cpp b/intl/uconv/nsUTF8ToUnicodeSSE2.cpp
new file mode 100644
index 000000000..58e92be9b
--- /dev/null
+++ b/intl/uconv/nsUTF8ToUnicodeSSE2.cpp
@@ -0,0 +1,96 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This file should only be compiled if you're on x86 or x86_64. Additionally,
+// you'll need to compile this file with -msse2 if you're using gcc.
+
+#include <emmintrin.h>
+#include "nscore.h"
+
+namespace mozilla {
+namespace SSE2 {
+
+void
+Convert_ascii_run(const char *&src,
+ char16_t *&dst,
+ int32_t len)
+{
+ if (len > 15) {
+ __m128i in, out1, out2;
+ __m128d *outp1, *outp2;
+ __m128i zeroes;
+ uint32_t offset;
+
+ // align input to 16 bytes
+ while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) {
+ if (*src & 0x80U)
+ return;
+ *dst++ = (char16_t) *src++;
+ len--;
+ }
+
+ zeroes = _mm_setzero_si128();
+
+ offset = NS_PTR_TO_UINT32(dst) & 15;
+
+ // Note: all these inner loops have to break, not return; we need
+ // to let the single-char loop below catch any leftover
+ // byte-at-a-time ASCII chars, since this function must consume
+ // all available ASCII chars before it returns
+
+ if (offset == 0) {
+ while (len > 15) {
+ in = _mm_load_si128((__m128i *) src);
+ if (_mm_movemask_epi8(in))
+ break;
+ out1 = _mm_unpacklo_epi8(in, zeroes);
+ out2 = _mm_unpackhi_epi8(in, zeroes);
+ _mm_stream_si128((__m128i *) dst, out1);
+ _mm_stream_si128((__m128i *) (dst + 8), out2);
+ dst += 16;
+ src += 16;
+ len -= 16;
+ }
+ } else if (offset == 8) {
+ outp1 = (__m128d *) &out1;
+ outp2 = (__m128d *) &out2;
+ while (len > 15) {
+ in = _mm_load_si128((__m128i *) src);
+ if (_mm_movemask_epi8(in))
+ break;
+ out1 = _mm_unpacklo_epi8(in, zeroes);
+ out2 = _mm_unpackhi_epi8(in, zeroes);
+ _mm_storel_epi64((__m128i *) dst, out1);
+ _mm_storel_epi64((__m128i *) (dst + 8), out2);
+ _mm_storeh_pd((double *) (dst + 4), *outp1);
+ _mm_storeh_pd((double *) (dst + 12), *outp2);
+ src += 16;
+ dst += 16;
+ len -= 16;
+ }
+ } else {
+ while (len > 15) {
+ in = _mm_load_si128((__m128i *) src);
+ if (_mm_movemask_epi8(in))
+ break;
+ out1 = _mm_unpacklo_epi8(in, zeroes);
+ out2 = _mm_unpackhi_epi8(in, zeroes);
+ _mm_storeu_si128((__m128i *) dst, out1);
+ _mm_storeu_si128((__m128i *) (dst + 8), out2);
+ src += 16;
+ dst += 16;
+ len -= 16;
+ }
+ }
+ }
+
+ // finish off a byte at a time
+
+ while (len-- > 0 && (*src & 0x80U) == 0) {
+ *dst++ = (char16_t) *src++;
+ }
+}
+
+} // namespace SSE2
+} // namespace mozilla