diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /gfx/ycbcr/win64.patch | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'gfx/ycbcr/win64.patch')
-rw-r--r-- | gfx/ycbcr/win64.patch | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/gfx/ycbcr/win64.patch b/gfx/ycbcr/win64.patch new file mode 100644 index 000000000..bdccf2784 --- /dev/null +++ b/gfx/ycbcr/win64.patch @@ -0,0 +1,210 @@ +diff --git a/gfx/ycbcr/yuv_row_win64.cpp b/gfx/ycbcr/yuv_row_win64.cpp +new file mode 100644 +--- /dev/null ++++ b/gfx/ycbcr/yuv_row_win64.cpp +@@ -0,0 +1,205 @@ ++// Copyright (c) 2010 The Chromium Authors. All rights reserved. ++// Use of this source code is governed by a BSD-style license that can be ++// found in the LICENSE file. ++ ++#include "yuv_row.h" ++ ++extern "C" { ++ ++// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics. ++ ++#define kCoefficientsRgbU (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 2048) ++#define kCoefficientsRgbV (reinterpret_cast<uint8*>(kCoefficientsRgbY) + 4096) ++ ++#include <emmintrin.h> ++ ++static void FastConvertYUVToRGB32Row_SSE2(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int width) { ++ __m128i xmm0, xmmY1, xmmY2; ++ __m128 xmmY; ++ ++ while (width >= 2) { ++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf++)), ++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf++))); ++ ++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++)); ++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); ++ ++ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf++)); ++ xmmY2 = _mm_adds_epi16(xmmY2, xmm0); ++ ++ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), ++ 0x44); ++ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); ++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); ++ ++ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); ++ rgb_buf += 8; ++ width -= 2; ++ } ++ ++ if (width) { ++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * *u_buf)), ++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * *v_buf))); ++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * *y_buf)); ++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); ++ xmmY1 = _mm_srai_epi16(xmmY1, 6); ++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); ++ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); ++ } ++} ++ ++static void ScaleYUVToRGB32Row_SSE2(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int width, ++ int source_dx) { ++ __m128i xmm0, xmmY1, xmmY2; ++ __m128 xmmY; ++ uint8 u, v, y; ++ int x = 0; ++ ++ while (width >= 2) { ++ u = u_buf[x >> 17]; ++ v = v_buf[x >> 17]; ++ y = y_buf[x >> 16]; ++ x += source_dx; ++ ++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), ++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); ++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); ++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); ++ ++ y = y_buf[x >> 16]; ++ x += source_dx; ++ ++ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); ++ xmmY2 = _mm_adds_epi16(xmmY2, xmm0); ++ ++ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), ++ 0x44); ++ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); ++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); ++ ++ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); ++ rgb_buf += 8; ++ width -= 2; ++ } ++ ++ if (width) { ++ u = u_buf[x >> 17]; ++ v = v_buf[x >> 17]; ++ y = y_buf[x >> 16]; ++ ++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), ++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); ++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); ++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); ++ xmmY1 = _mm_srai_epi16(xmmY1, 6); ++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); ++ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); ++ } ++} ++ ++static void LinearScaleYUVToRGB32Row_SSE2(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int width, ++ int source_dx) { ++ __m128i xmm0, xmmY1, xmmY2; ++ __m128 xmmY; ++ uint8 u0, u1, v0, v1, y0, y1; ++ uint32 uv_frac, y_frac, u, v, y; ++ int x = 0; ++ ++ if (source_dx >= 0x20000) { ++ x = 32768; ++ } ++ ++ while(width >= 2) { ++ u0 = u_buf[x >> 17]; ++ u1 = u_buf[(x >> 17) + 1]; ++ v0 = v_buf[x >> 17]; ++ v1 = v_buf[(x >> 17) + 1]; ++ y0 = y_buf[x >> 16]; ++ y1 = y_buf[(x >> 16) + 1]; ++ uv_frac = (x & 0x1fffe); ++ y_frac = (x & 0xffff); ++ u = (uv_frac * u1 + (uv_frac ^ 0x1fffe) * u0) >> 17; ++ v = (uv_frac * v1 + (uv_frac ^ 0x1fffe) * v0) >> 17; ++ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16; ++ x += source_dx; ++ ++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), ++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); ++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); ++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); ++ ++ y0 = y_buf[x >> 16]; ++ y1 = y_buf[(x >> 16) + 1]; ++ y_frac = (x & 0xffff); ++ y = (y_frac * y1 + (y_frac ^ 0xffff) * y0) >> 16; ++ x += source_dx; ++ ++ xmmY2 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); ++ xmmY2 = _mm_adds_epi16(xmmY2, xmm0); ++ ++ xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2), ++ 0x44); ++ xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6); ++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); ++ ++ _mm_storel_epi64(reinterpret_cast<__m128i*>(rgb_buf), xmmY1); ++ rgb_buf += 8; ++ width -= 2; ++ } ++ ++ if (width) { ++ u = u_buf[x >> 17]; ++ v = v_buf[x >> 17]; ++ y = y_buf[x >> 16]; ++ ++ xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbU + 8 * u)), ++ _mm_loadl_epi64(reinterpret_cast<__m128i*>(kCoefficientsRgbV + 8 * v))); ++ xmmY1 = _mm_loadl_epi64(reinterpret_cast<__m128i*>(reinterpret_cast<uint8*>(kCoefficientsRgbY) + 8 * y)); ++ ++ xmmY1 = _mm_adds_epi16(xmmY1, xmm0); ++ xmmY1 = _mm_srai_epi16(xmmY1, 6); ++ xmmY1 = _mm_packus_epi16(xmmY1, xmmY1); ++ *reinterpret_cast<uint32*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1); ++ } ++} ++ ++void FastConvertYUVToRGB32Row(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int width) { ++ FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width); ++} ++ ++void ScaleYUVToRGB32Row(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int width, ++ int source_dx) { ++ ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx); ++} ++ ++void LinearScaleYUVToRGB32Row(const uint8* y_buf, ++ const uint8* u_buf, ++ const uint8* v_buf, ++ uint8* rgb_buf, ++ int width, ++ int source_dx) { ++ LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, ++ source_dx); ++} ++ ++} // extern "C" |