diff options
author | Moonchild <mcwerewolf@gmail.com> | 2018-10-01 14:34:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-01 14:34:26 +0200 |
commit | 79b00fc33b5cb6d56d29b50efac6d62ce3a89018 (patch) | |
tree | 77d8179bb3d9d76d9ec4fcfe396a35afcc8f73cd /media/ffvpx/libavcodec/x86 | |
parent | ab881a3bf513e591b6cc2966560cdab2b63a0f2a (diff) | |
parent | 7d1ee0e5e4958175ccde5d153b025f97a17caeb2 (diff) | |
download | UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar.gz UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar.lz UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar.xz UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.zip |
Merge pull request #801 from trav90/update-from-upstream
Update ffvpx code to 4.0.2
Diffstat (limited to 'media/ffvpx/libavcodec/x86')
-rw-r--r-- | media/ffvpx/libavcodec/x86/constants.c | 23 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/constants.h | 2 | ||||
-rw-r--r-- | media/ffvpx/libavcodec/x86/vp8dsp.asm | 143 |
3 files changed, 79 insertions, 89 deletions
diff --git a/media/ffvpx/libavcodec/x86/constants.c b/media/ffvpx/libavcodec/x86/constants.c index 11002ee61..4bfb78cc3 100644 --- a/media/ffvpx/libavcodec/x86/constants.c +++ b/media/ffvpx/libavcodec/x86/constants.c @@ -26,23 +26,23 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x000 0x0001000100010001ULL, 0x0001000100010001ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL, 0x0002000200020002ULL, 0x0002000200020002ULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL }; -DECLARE_ALIGNED(32, const ymm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL, +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL }; +DECLARE_ASM_ALIGNED(32, const ymm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL, 0x0004000400040004ULL, 0x0004000400040004ULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL }; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_20) = { 0x0014001400140014ULL, 0x0014001400140014ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL }; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL; -DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL }; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL; -DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL; +DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL }; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL; +DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL, @@ -74,7 +74,8 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x020 DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL, 0x0303030303030303ULL, 0x0303030303030303ULL }; DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL }; -DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL }; +DECLARE_ALIGNED(32, const ymm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL, + 0x8080808080808080ULL, 0x8080808080808080ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL; diff --git a/media/ffvpx/libavcodec/x86/constants.h b/media/ffvpx/libavcodec/x86/constants.h index bbb0ef844..85da38b7b 100644 --- a/media/ffvpx/libavcodec/x86/constants.h +++ b/media/ffvpx/libavcodec/x86/constants.h @@ -57,7 +57,7 @@ extern const ymm_reg ff_pb_0; extern const ymm_reg ff_pb_1; extern const ymm_reg ff_pb_2; extern const ymm_reg ff_pb_3; -extern const xmm_reg ff_pb_80; +extern const ymm_reg ff_pb_80; extern const ymm_reg ff_pb_FE; extern const uint64_t ff_pb_FC; diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm index e303b8029..75de5690a 100644 --- a/media/ffvpx/libavcodec/x86/vp8dsp.asm +++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm @@ -664,6 +664,37 @@ INIT_XMM sse2 FILTER_V 8 %macro FILTER_BILINEAR 1 +%if cpuflag(ssse3) +cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m3, [bilinear_filter_vb+myq-16] +.nextrow: + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m2, [srcq+srcstrideq*2] + punpcklbw m0, m1 + punpcklbw m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 %ifdef PIC @@ -701,6 +732,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif +%endif ; cpuflag(ssse3) lea dstq, [dstq+dststrideq*2] lea srcq, [srcq+srcstrideq*2] @@ -708,6 +740,37 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p jg .nextrow REP_RET +%if cpuflag(ssse3) +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m2, [filter_h2_shuf] + mova m3, [bilinear_filter_vb+mxq-16] +.nextrow: + movu m0, [srcq+srcstrideq*0] + movu m1, [srcq+srcstrideq*1] + pshufb m0, m2 + pshufb m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 %ifdef PIC @@ -746,6 +809,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif +%endif ; cpuflag(ssse3) lea dstq, [dstq+dststrideq*2] lea srcq, [srcq+srcstrideq*2] @@ -758,85 +822,10 @@ INIT_MMX mmxext FILTER_BILINEAR 4 INIT_XMM sse2 FILTER_BILINEAR 8 - -%macro FILTER_BILINEAR_SSSE3 1 -cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my - shl myd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m3, [bilinear_filter_vb+myq-16] -.nextrow: - movh m0, [srcq+srcstrideq*0] - movh m1, [srcq+srcstrideq*1] - movh m2, [srcq+srcstrideq*2] - punpcklbw m0, m1 - punpcklbw m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif - - lea dstq, [dstq+dststrideq*2] - lea srcq, [srcq+srcstrideq*2] - sub heightd, 2 - jg .nextrow - REP_RET - -cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg - shl mxd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m2, [filter_h2_shuf] - mova m3, [bilinear_filter_vb+mxq-16] -.nextrow: - movu m0, [srcq+srcstrideq*0] - movu m1, [srcq+srcstrideq*1] - pshufb m0, m2 - pshufb m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif - - lea dstq, [dstq+dststrideq*2] - lea srcq, [srcq+srcstrideq*2] - sub heightd, 2 - jg .nextrow - REP_RET -%endmacro - INIT_MMX ssse3 -FILTER_BILINEAR_SSSE3 4 +FILTER_BILINEAR 4 INIT_XMM ssse3 -FILTER_BILINEAR_SSSE3 8 +FILTER_BILINEAR 8 INIT_MMX mmx cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height |