summaryrefslogtreecommitdiffstats
path: root/media/ffvpx/libavcodec/x86/vp8dsp.asm
diff options
context:
space:
mode:
Diffstat (limited to 'media/ffvpx/libavcodec/x86/vp8dsp.asm')
-rw-r--r--media/ffvpx/libavcodec/x86/vp8dsp.asm143
1 files changed, 77 insertions, 66 deletions
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm
index 75de5690a..e303b8029 100644
--- a/media/ffvpx/libavcodec/x86/vp8dsp.asm
+++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm
@@ -664,37 +664,6 @@ INIT_XMM sse2
FILTER_V 8
%macro FILTER_BILINEAR 1
-%if cpuflag(ssse3)
-cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
- shl myd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m3, [bilinear_filter_vb+myq-16]
-.nextrow:
- movh m0, [srcq+srcstrideq*0]
- movh m1, [srcq+srcstrideq*1]
- movh m2, [srcq+srcstrideq*2]
- punpcklbw m0, m1
- punpcklbw m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
@@ -732,7 +701,6 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
-%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -740,37 +708,6 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
jg .nextrow
REP_RET
-%if cpuflag(ssse3)
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
- shl mxd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m2, [filter_h2_shuf]
- mova m3, [bilinear_filter_vb+mxq-16]
-.nextrow:
- movu m0, [srcq+srcstrideq*0]
- movu m1, [srcq+srcstrideq*1]
- pshufb m0, m2
- pshufb m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
@@ -809,7 +746,6 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
-%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -822,10 +758,85 @@ INIT_MMX mmxext
FILTER_BILINEAR 4
INIT_XMM sse2
FILTER_BILINEAR 8
+
+%macro FILTER_BILINEAR_SSSE3 1
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+ shl myd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+ movh m0, [srcq+srcstrideq*0]
+ movh m1, [srcq+srcstrideq*1]
+ movh m2, [srcq+srcstrideq*2]
+ punpcklbw m0, m1
+ punpcklbw m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+
+ lea dstq, [dstq+dststrideq*2]
+ lea srcq, [srcq+srcstrideq*2]
+ sub heightd, 2
+ jg .nextrow
+ REP_RET
+
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+ shl mxd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m2, [filter_h2_shuf]
+ mova m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+ movu m0, [srcq+srcstrideq*0]
+ movu m1, [srcq+srcstrideq*1]
+ pshufb m0, m2
+ pshufb m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+
+ lea dstq, [dstq+dststrideq*2]
+ lea srcq, [srcq+srcstrideq*2]
+ sub heightd, 2
+ jg .nextrow
+ REP_RET
+%endmacro
+
INIT_MMX ssse3
-FILTER_BILINEAR 4
+FILTER_BILINEAR_SSSE3 4
INIT_XMM ssse3
-FILTER_BILINEAR 8
+FILTER_BILINEAR_SSSE3 8
INIT_MMX mmx
cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height