summaryrefslogtreecommitdiffstats
path: root/media/ffvpx/libavcodec/x86/vp8dsp.asm
diff options
context:
space:
mode:
authorMoonchild <mcwerewolf@gmail.com>2018-10-01 14:34:26 +0200
committerGitHub <noreply@github.com>2018-10-01 14:34:26 +0200
commit79b00fc33b5cb6d56d29b50efac6d62ce3a89018 (patch)
tree77d8179bb3d9d76d9ec4fcfe396a35afcc8f73cd /media/ffvpx/libavcodec/x86/vp8dsp.asm
parentab881a3bf513e591b6cc2966560cdab2b63a0f2a (diff)
parent7d1ee0e5e4958175ccde5d153b025f97a17caeb2 (diff)
downloadUXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar
UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar.gz
UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar.lz
UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.tar.xz
UXP-79b00fc33b5cb6d56d29b50efac6d62ce3a89018.zip
Merge pull request #801 from trav90/update-from-upstream
Update ffvpx code to 4.0.2
Diffstat (limited to 'media/ffvpx/libavcodec/x86/vp8dsp.asm')
-rw-r--r--media/ffvpx/libavcodec/x86/vp8dsp.asm143
1 files changed, 66 insertions, 77 deletions
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm
index e303b8029..75de5690a 100644
--- a/media/ffvpx/libavcodec/x86/vp8dsp.asm
+++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm
@@ -664,6 +664,37 @@ INIT_XMM sse2
FILTER_V 8
%macro FILTER_BILINEAR 1
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+ shl myd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+ movh m0, [srcq+srcstrideq*0]
+ movh m1, [srcq+srcstrideq*1]
+ movh m2, [srcq+srcstrideq*2]
+ punpcklbw m0, m1
+ punpcklbw m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
@@ -701,6 +732,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
+%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -708,6 +740,37 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
jg .nextrow
REP_RET
+%if cpuflag(ssse3)
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+ shl mxd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m2, [filter_h2_shuf]
+ mova m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+ movu m0, [srcq+srcstrideq*0]
+ movu m1, [srcq+srcstrideq*1]
+ pshufb m0, m2
+ pshufb m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
@@ -746,6 +809,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
+%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -758,85 +822,10 @@ INIT_MMX mmxext
FILTER_BILINEAR 4
INIT_XMM sse2
FILTER_BILINEAR 8
-
-%macro FILTER_BILINEAR_SSSE3 1
-cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
- shl myd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m3, [bilinear_filter_vb+myq-16]
-.nextrow:
- movh m0, [srcq+srcstrideq*0]
- movh m1, [srcq+srcstrideq*1]
- movh m2, [srcq+srcstrideq*2]
- punpcklbw m0, m1
- punpcklbw m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-
- lea dstq, [dstq+dststrideq*2]
- lea srcq, [srcq+srcstrideq*2]
- sub heightd, 2
- jg .nextrow
- REP_RET
-
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
- shl mxd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m2, [filter_h2_shuf]
- mova m3, [bilinear_filter_vb+mxq-16]
-.nextrow:
- movu m0, [srcq+srcstrideq*0]
- movu m1, [srcq+srcstrideq*1]
- pshufb m0, m2
- pshufb m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-
- lea dstq, [dstq+dststrideq*2]
- lea srcq, [srcq+srcstrideq*2]
- sub heightd, 2
- jg .nextrow
- REP_RET
-%endmacro
-
INIT_MMX ssse3
-FILTER_BILINEAR_SSSE3 4
+FILTER_BILINEAR 4
INIT_XMM ssse3
-FILTER_BILINEAR_SSSE3 8
+FILTER_BILINEAR 8
INIT_MMX mmx
cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height