summaryrefslogtreecommitdiffstats
path: root/gfx/ycbcr/win64.patch
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /gfx/ycbcr/win64.patch
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'gfx/ycbcr/win64.patch')
-rw-r--r--gfx/ycbcr/win64.patch210
1 files changed, 210 insertions, 0 deletions
diff --git a/gfx/ycbcr/win64.patch b/gfx/ycbcr/win64.patch
new file mode 100644
index 000000000..bdccf2784
--- /dev/null
+++ b/gfx/ycbcr/win64.patch
@@ -0,0 +1,210 @@
+diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp
+new file mode 100644
+--- /dev/null
++++ b/gfx/ycbcr/yuv_row_win64.cpp
+@@ -0,0 +1,205 @@
++// Copyright (c) 2010 The Chromium Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style license that can be
++// found in the LICENSE file.
++
++#include "yuv_row.h"
++
++extern "C" {
++
++// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics.
++
++#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048)
++#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096)
++
++#include <emmintrin.h>
++
++static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width) {
++ __m128i xmm0, xmmY1, xmmY2;
++ __m128 xmmY;
++
++ while (width >= 2) {
++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
++
++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
++
++ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++));
++ xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
++
++ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
++ 0x44);
++ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
++
++ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
++ rgb_buf += 8;
++ width -= 2;
++ }
++
++ if (width) {
++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf));
++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
++ xmmY1 = _mm_srai_epi16(xmmY1, 6);
++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
++ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
++ }
++}
++
++static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int source_dx) {
++ __m128i xmm0, xmmY1, xmmY2;
++ __m128 xmmY;
++ uint8 u, v, y;
++ int x = 0;
++
++ while (width >= 2) {
++ u = u_buf[x >> 17];
++ v = v_buf[x >> 17];
++ y = y_buf[x >> 16];
++ x += source_dx;
++
++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
++
++ y = y_buf[x >> 16];
++ x += source_dx;
++
++ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
++ xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
++
++ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
++ 0x44);
++ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
++
++ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
++ rgb_buf += 8;
++ width -= 2;
++ }
++
++ if (width) {
++ u = u_buf[x >> 17];
++ v = v_buf[x >> 17];
++ y = y_buf[x >> 16];
++
++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
++ xmmY1 = _mm_srai_epi16(xmmY1, 6);
++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
++ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
++ }
++}
++
++static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int source_dx) {
++ __m128i xmm0, xmmY1, xmmY2;
++ __m128 xmmY;
++ uint8 u0, u1, v0, v1, y0, y1;
++ uint32 uv_frac, y_frac, u, v, y;
++ int x = 0;
++
++ if (source_dx >= 0x20000) {
++ x = 32768;
++ }
++
++ while(width >= 2) {
++ u0 = u_buf[x >> 17];
++ u1 = u_buf[(x >> 17) + 1];
++ v0 = v_buf[x >> 17];
++ v1 = v_buf[(x >> 17) + 1];
++ y0 = y_buf[x >> 16];
++ y1 = y_buf[(x >> 16) + 1];
++ uv_frac = (x & 0x1fffe);
++ y_frac = (x & 0xffff);
++ u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17;
++ v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17;
++ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
++ x += source_dx;
++
++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
++
++ y0 = y_buf[x >> 16];
++ y1 = y_buf[(x >> 16) + 1];
++ y_frac = (x & 0xffff);
++ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16;
++ x += source_dx;
++
++ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
++ xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
++
++ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
++ 0x44);
++ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6);
++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
++
++ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1);
++ rgb_buf += 8;
++ width -= 2;
++ }
++
++ if (width) {
++ u = u_buf[x >> 17];
++ v = v_buf[x >> 17];
++ y = y_buf[x >> 16];
++
++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)),
++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v)));
++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y));
++
++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
++ xmmY1 = _mm_srai_epi16(xmmY1, 6);
++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
++ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
++ }
++}
++
++void FastConvertYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width) {
++ FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
++}
++
++void ScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int source_dx) {
++ ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
++}
++
++void LinearScaleYUVToRGB32Row(const uint8* y_buf,
++ const uint8* u_buf,
++ const uint8* v_buf,
++ uint8* rgb_buf,
++ int width,
++ int source_dx) {
++ LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
++ source_dx);
++}
++
++} // extern "C"