summaryrefslogtreecommitdiffstats
path: root/gfx/ycbcr/yuv_convert_sse2.cpp
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /gfx/ycbcr/yuv_convert_sse2.cpp
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'gfx/ycbcr/yuv_convert_sse2.cpp')
-rw-r--r--gfx/ycbcr/yuv_convert_sse2.cpp47
1 files changed, 47 insertions, 0 deletions
diff --git a/gfx/ycbcr/yuv_convert_sse2.cpp b/gfx/ycbcr/yuv_convert_sse2.cpp
new file mode 100644
index 000000000..25fe20639
--- /dev/null
+++ b/gfx/ycbcr/yuv_convert_sse2.cpp
@@ -0,0 +1,47 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <emmintrin.h>
+#include "yuv_row.h"
+
+namespace mozilla {
+namespace gfx {
+
+// FilterRows combines two rows of the image using linear interpolation.
+// SSE2 version does 16 pixels at a time.
+void FilterRows_SSE2(uint8* ybuf, const uint8* y0_ptr, const uint8* y1_ptr,
+ int source_width, int source_y_fraction) {
+ __m128i zero = _mm_setzero_si128();
+ __m128i y1_fraction = _mm_set1_epi16(source_y_fraction);
+ __m128i y0_fraction = _mm_set1_epi16(256 - source_y_fraction);
+
+ const __m128i* y0_ptr128 = reinterpret_cast<const __m128i*>(y0_ptr);
+ const __m128i* y1_ptr128 = reinterpret_cast<const __m128i*>(y1_ptr);
+ __m128i* dest128 = reinterpret_cast<__m128i*>(ybuf);
+ __m128i* end128 = reinterpret_cast<__m128i*>(ybuf + source_width);
+
+ do {
+ __m128i y0 = _mm_loadu_si128(y0_ptr128);
+ __m128i y1 = _mm_loadu_si128(y1_ptr128);
+ __m128i y2 = _mm_unpackhi_epi8(y0, zero);
+ __m128i y3 = _mm_unpackhi_epi8(y1, zero);
+ y0 = _mm_unpacklo_epi8(y0, zero);
+ y1 = _mm_unpacklo_epi8(y1, zero);
+ y0 = _mm_mullo_epi16(y0, y0_fraction);
+ y1 = _mm_mullo_epi16(y1, y1_fraction);
+ y2 = _mm_mullo_epi16(y2, y0_fraction);
+ y3 = _mm_mullo_epi16(y3, y1_fraction);
+ y0 = _mm_add_epi16(y0, y1);
+ y2 = _mm_add_epi16(y2, y3);
+ y0 = _mm_srli_epi16(y0, 8);
+ y2 = _mm_srli_epi16(y2, 8);
+ y0 = _mm_packus_epi16(y0, y2);
+ *dest128++ = y0;
+ ++y0_ptr128;
+ ++y1_ptr128;
+ } while (dest128 < end128);
+}
+
+} // namespace gfx
+} // namespace mozilla