From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- intl/uconv/nsUTF8ToUnicodeSSE2.cpp | 96 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 intl/uconv/nsUTF8ToUnicodeSSE2.cpp (limited to 'intl/uconv/nsUTF8ToUnicodeSSE2.cpp') diff --git a/intl/uconv/nsUTF8ToUnicodeSSE2.cpp b/intl/uconv/nsUTF8ToUnicodeSSE2.cpp new file mode 100644 index 000000000..58e92be9b --- /dev/null +++ b/intl/uconv/nsUTF8ToUnicodeSSE2.cpp @@ -0,0 +1,96 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// This file should only be compiled if you're on x86 or x86_64. Additionally, +// you'll need to compile this file with -msse2 if you're using gcc. + +#include +#include "nscore.h" + +namespace mozilla { +namespace SSE2 { + +void +Convert_ascii_run(const char *&src, + char16_t *&dst, + int32_t len) +{ + if (len > 15) { + __m128i in, out1, out2; + __m128d *outp1, *outp2; + __m128i zeroes; + uint32_t offset; + + // align input to 16 bytes + while ((NS_PTR_TO_UINT32(src) & 15) && len > 0) { + if (*src & 0x80U) + return; + *dst++ = (char16_t) *src++; + len--; + } + + zeroes = _mm_setzero_si128(); + + offset = NS_PTR_TO_UINT32(dst) & 15; + + // Note: all these inner loops have to break, not return; we need + // to let the single-char loop below catch any leftover + // byte-at-a-time ASCII chars, since this function must consume + // all available ASCII chars before it returns + + if (offset == 0) { + while (len > 15) { + in = _mm_load_si128((__m128i *) src); + if (_mm_movemask_epi8(in)) + break; + out1 = _mm_unpacklo_epi8(in, zeroes); + out2 = _mm_unpackhi_epi8(in, zeroes); + _mm_stream_si128((__m128i *) dst, out1); + _mm_stream_si128((__m128i *) (dst + 8), out2); + dst += 16; + src += 16; + len -= 16; + } + } else if (offset == 8) { + outp1 = (__m128d *) &out1; + outp2 = (__m128d *) &out2; + while (len > 15) { + in = _mm_load_si128((__m128i *) src); + if (_mm_movemask_epi8(in)) + break; + out1 = _mm_unpacklo_epi8(in, zeroes); + out2 = _mm_unpackhi_epi8(in, zeroes); + _mm_storel_epi64((__m128i *) dst, out1); + _mm_storel_epi64((__m128i *) (dst + 8), out2); + _mm_storeh_pd((double *) (dst + 4), *outp1); + _mm_storeh_pd((double *) (dst + 12), *outp2); + src += 16; + dst += 16; + len -= 16; + } + } else { + while (len > 15) { + in = _mm_load_si128((__m128i *) src); + if (_mm_movemask_epi8(in)) + break; + out1 = _mm_unpacklo_epi8(in, zeroes); + out2 = _mm_unpackhi_epi8(in, zeroes); + _mm_storeu_si128((__m128i *) dst, out1); + _mm_storeu_si128((__m128i *) (dst + 8), out2); + src += 16; + dst += 16; + len -= 16; + } + } + } + + // finish off a byte at a time + + while (len-- > 0 && (*src & 0x80U) == 0) { + *dst++ = (char16_t) *src++; + } +} + +} // namespace SSE2 +} // namespace mozilla -- cgit v1.2.3