summaryrefslogtreecommitdiffstats
path: root/media/ffvpx/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to 'media/ffvpx/libavcodec/x86')
-rw-r--r--media/ffvpx/libavcodec/x86/constants.c23
-rw-r--r--media/ffvpx/libavcodec/x86/constants.h2
-rw-r--r--media/ffvpx/libavcodec/x86/vp8dsp.asm143
3 files changed, 89 insertions, 79 deletions
diff --git a/media/ffvpx/libavcodec/x86/constants.c b/media/ffvpx/libavcodec/x86/constants.c
index 4bfb78cc3..11002ee61 100644
--- a/media/ffvpx/libavcodec/x86/constants.c
+++ b/media/ffvpx/libavcodec/x86/constants.c
@@ -26,23 +26,23 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pw_1) = { 0x0001000100010001ULL, 0x000
0x0001000100010001ULL, 0x0001000100010001ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_2) = { 0x0002000200020002ULL, 0x0002000200020002ULL,
0x0002000200020002ULL, 0x0002000200020002ULL };
-DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL };
-DECLARE_ASM_ALIGNED(32, const ymm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL,
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_3) = { 0x0003000300030003ULL, 0x0003000300030003ULL };
+DECLARE_ALIGNED(32, const ymm_reg, ff_pw_4) = { 0x0004000400040004ULL, 0x0004000400040004ULL,
0x0004000400040004ULL, 0x0004000400040004ULL };
-DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_5) = { 0x0005000500050005ULL, 0x0005000500050005ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8) = { 0x0008000800080008ULL, 0x0008000800080008ULL };
-DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_9) = { 0x0009000900090009ULL, 0x0009000900090009ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = 0x000F000F000F000FULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_16) = { 0x0010001000100010ULL, 0x0010001000100010ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_17) = { 0x0011001100110011ULL, 0x0011001100110011ULL };
-DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_18) = { 0x0012001200120012ULL, 0x0012001200120012ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_20) = { 0x0014001400140014ULL, 0x0014001400140014ULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pw_32) = { 0x0020002000200020ULL, 0x0020002000200020ULL };
-DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL;
-DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL;
-DECLARE_ASM_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL };
-DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
-DECLARE_ASM_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_42) = 0x002A002A002A002AULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = 0x0035003500350035ULL;
+DECLARE_ALIGNED(16, const xmm_reg, ff_pw_64) = { 0x0040004000400040ULL, 0x0040004000400040ULL };
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_96) = 0x0060006000600060ULL;
+DECLARE_ALIGNED(8, const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_255) = { 0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL,
0x00ff00ff00ff00ffULL, 0x00ff00ff00ff00ffULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_256) = { 0x0100010001000100ULL, 0x0100010001000100ULL,
@@ -74,8 +74,7 @@ DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x020
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL,
0x0303030303030303ULL, 0x0303030303030303ULL };
DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
-DECLARE_ALIGNED(32, const ymm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL,
- 0x8080808080808080ULL, 0x8080808080808080ULL };
+DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_FE) = { 0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL,
0xFEFEFEFEFEFEFEFEULL, 0xFEFEFEFEFEFEFEFEULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
diff --git a/media/ffvpx/libavcodec/x86/constants.h b/media/ffvpx/libavcodec/x86/constants.h
index 85da38b7b..bbb0ef844 100644
--- a/media/ffvpx/libavcodec/x86/constants.h
+++ b/media/ffvpx/libavcodec/x86/constants.h
@@ -57,7 +57,7 @@ extern const ymm_reg ff_pb_0;
extern const ymm_reg ff_pb_1;
extern const ymm_reg ff_pb_2;
extern const ymm_reg ff_pb_3;
-extern const ymm_reg ff_pb_80;
+extern const xmm_reg ff_pb_80;
extern const ymm_reg ff_pb_FE;
extern const uint64_t ff_pb_FC;
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm
index 75de5690a..e303b8029 100644
--- a/media/ffvpx/libavcodec/x86/vp8dsp.asm
+++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm
@@ -664,37 +664,6 @@ INIT_XMM sse2
FILTER_V 8
%macro FILTER_BILINEAR 1
-%if cpuflag(ssse3)
-cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
- shl myd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m3, [bilinear_filter_vb+myq-16]
-.nextrow:
- movh m0, [srcq+srcstrideq*0]
- movh m1, [srcq+srcstrideq*1]
- movh m2, [srcq+srcstrideq*2]
- punpcklbw m0, m1
- punpcklbw m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
%ifdef PIC
@@ -732,7 +701,6 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
-%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -740,37 +708,6 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
jg .nextrow
REP_RET
-%if cpuflag(ssse3)
-cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
- shl mxd, 4
-%ifdef PIC
- lea picregq, [bilinear_filter_vb_m]
-%endif
- pxor m4, m4
- mova m2, [filter_h2_shuf]
- mova m3, [bilinear_filter_vb+mxq-16]
-.nextrow:
- movu m0, [srcq+srcstrideq*0]
- movu m1, [srcq+srcstrideq*1]
- pshufb m0, m2
- pshufb m1, m2
- pmaddubsw m0, m3
- pmaddubsw m1, m3
- psraw m0, 2
- psraw m1, 2
- pavgw m0, m4
- pavgw m1, m4
-%if mmsize==8
- packuswb m0, m0
- packuswb m1, m1
- movh [dstq+dststrideq*0], m0
- movh [dstq+dststrideq*1], m1
-%else
- packuswb m0, m1
- movh [dstq+dststrideq*0], m0
- movhps [dstq+dststrideq*1], m0
-%endif
-%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
%ifdef PIC
@@ -809,7 +746,6 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
movh [dstq+dststrideq*0], m0
movhps [dstq+dststrideq*1], m0
%endif
-%endif ; cpuflag(ssse3)
lea dstq, [dstq+dststrideq*2]
lea srcq, [srcq+srcstrideq*2]
@@ -822,10 +758,85 @@ INIT_MMX mmxext
FILTER_BILINEAR 4
INIT_XMM sse2
FILTER_BILINEAR 8
+
+%macro FILTER_BILINEAR_SSSE3 1
+cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
+ shl myd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m3, [bilinear_filter_vb+myq-16]
+.nextrow:
+ movh m0, [srcq+srcstrideq*0]
+ movh m1, [srcq+srcstrideq*1]
+ movh m2, [srcq+srcstrideq*2]
+ punpcklbw m0, m1
+ punpcklbw m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+
+ lea dstq, [dstq+dststrideq*2]
+ lea srcq, [srcq+srcstrideq*2]
+ sub heightd, 2
+ jg .nextrow
+ REP_RET
+
+cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
+ shl mxd, 4
+%ifdef PIC
+ lea picregq, [bilinear_filter_vb_m]
+%endif
+ pxor m4, m4
+ mova m2, [filter_h2_shuf]
+ mova m3, [bilinear_filter_vb+mxq-16]
+.nextrow:
+ movu m0, [srcq+srcstrideq*0]
+ movu m1, [srcq+srcstrideq*1]
+ pshufb m0, m2
+ pshufb m1, m2
+ pmaddubsw m0, m3
+ pmaddubsw m1, m3
+ psraw m0, 2
+ psraw m1, 2
+ pavgw m0, m4
+ pavgw m1, m4
+%if mmsize==8
+ packuswb m0, m0
+ packuswb m1, m1
+ movh [dstq+dststrideq*0], m0
+ movh [dstq+dststrideq*1], m1
+%else
+ packuswb m0, m1
+ movh [dstq+dststrideq*0], m0
+ movhps [dstq+dststrideq*1], m0
+%endif
+
+ lea dstq, [dstq+dststrideq*2]
+ lea srcq, [srcq+srcstrideq*2]
+ sub heightd, 2
+ jg .nextrow
+ REP_RET
+%endmacro
+
INIT_MMX ssse3
-FILTER_BILINEAR 4
+FILTER_BILINEAR_SSSE3 4
INIT_XMM ssse3
-FILTER_BILINEAR 8
+FILTER_BILINEAR_SSSE3 8
INIT_MMX mmx
cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height