diff options
author | Moonchild <mcwerewolf@gmail.com> | 2018-10-01 15:25:44 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-10-01 15:25:44 +0200 |
commit | 8e8fcee4a55de171303ebe526d3cf051522111bf (patch) | |
tree | fef75d382fc6216a093eeaf80560473dff19d883 /media/ffvpx/libavcodec/x86/vp8dsp.asm | |
parent | 79b00fc33b5cb6d56d29b50efac6d62ce3a89018 (diff) | |
parent | 45c24f05d023a2cd8289ed40a13708392ce2e6a4 (diff) | |
download | UXP-8e8fcee4a55de171303ebe526d3cf051522111bf.tar UXP-8e8fcee4a55de171303ebe526d3cf051522111bf.tar.gz UXP-8e8fcee4a55de171303ebe526d3cf051522111bf.tar.lz UXP-8e8fcee4a55de171303ebe526d3cf051522111bf.tar.xz UXP-8e8fcee4a55de171303ebe526d3cf051522111bf.zip |
Merge pull request #805 from MoonchildProductions/revert-801-update-from-upstream
Revert "Update ffvpx code to 4.0.2"
Diffstat (limited to 'media/ffvpx/libavcodec/x86/vp8dsp.asm')
-rw-r--r-- | media/ffvpx/libavcodec/x86/vp8dsp.asm | 143 |
1 files changed, 77 insertions, 66 deletions
diff --git a/media/ffvpx/libavcodec/x86/vp8dsp.asm b/media/ffvpx/libavcodec/x86/vp8dsp.asm index 75de5690a..e303b8029 100644 --- a/media/ffvpx/libavcodec/x86/vp8dsp.asm +++ b/media/ffvpx/libavcodec/x86/vp8dsp.asm @@ -664,37 +664,6 @@ INIT_XMM sse2 FILTER_V 8 %macro FILTER_BILINEAR 1 -%if cpuflag(ssse3) -cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my - shl myd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m3, [bilinear_filter_vb+myq-16] -.nextrow: - movh m0, [srcq+srcstrideq*0] - movh m1, [srcq+srcstrideq*1] - movh m2, [srcq+srcstrideq*2] - punpcklbw m0, m1 - punpcklbw m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif -%else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 %ifdef PIC @@ -732,7 +701,6 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif -%endif ; cpuflag(ssse3) lea dstq, [dstq+dststrideq*2] lea srcq, [srcq+srcstrideq*2] @@ -740,37 +708,6 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p jg .nextrow REP_RET -%if cpuflag(ssse3) -cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg - shl mxd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m2, [filter_h2_shuf] - mova m3, [bilinear_filter_vb+mxq-16] -.nextrow: - movu m0, [srcq+srcstrideq*0] - movu m1, [srcq+srcstrideq*1] - pshufb m0, m2 - pshufb m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif -%else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 %ifdef PIC @@ -809,7 +746,6 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif -%endif ; cpuflag(ssse3) lea dstq, [dstq+dststrideq*2] lea srcq, [srcq+srcstrideq*2] @@ -822,10 +758,85 @@ INIT_MMX mmxext FILTER_BILINEAR 4 INIT_XMM sse2 FILTER_BILINEAR 8 + +%macro FILTER_BILINEAR_SSSE3 1 +cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m3, [bilinear_filter_vb+myq-16] +.nextrow: + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m2, [srcq+srcstrideq*2] + punpcklbw m0, m1 + punpcklbw m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif + + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET + +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m2, [filter_h2_shuf] + mova m3, [bilinear_filter_vb+mxq-16] +.nextrow: + movu m0, [srcq+srcstrideq*0] + movu m1, [srcq+srcstrideq*1] + pshufb m0, m2 + pshufb m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif + + lea dstq, [dstq+dststrideq*2] + lea srcq, [srcq+srcstrideq*2] + sub heightd, 2 + jg .nextrow + REP_RET +%endmacro + INIT_MMX ssse3 -FILTER_BILINEAR 4 +FILTER_BILINEAR_SSSE3 4 INIT_XMM ssse3 -FILTER_BILINEAR 8 +FILTER_BILINEAR_SSSE3 8 INIT_MMX mmx cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height |