diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /media/libvpx/vp8/common/x86/iwalsh_mmx.asm | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'media/libvpx/vp8/common/x86/iwalsh_mmx.asm')
-rw-r--r-- | media/libvpx/vp8/common/x86/iwalsh_mmx.asm | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/media/libvpx/vp8/common/x86/iwalsh_mmx.asm b/media/libvpx/vp8/common/x86/iwalsh_mmx.asm new file mode 100644 index 000000000..158c3b745 --- /dev/null +++ b/media/libvpx/vp8/common/x86/iwalsh_mmx.asm @@ -0,0 +1,140 @@ +; +; Copyright (c) 2010 The WebM project authors. All Rights Reserved. +; +; Use of this source code is governed by a BSD-style license +; that can be found in the LICENSE file in the root of the source +; tree. An additional intellectual property rights grant can be found +; in the file PATENTS. All contributing project authors may +; be found in the AUTHORS file in the root of the source tree. +; + + +%include "vpx_ports/x86_abi_support.asm" + +;void vp8_short_inv_walsh4x4_mmx(short *input, short *output) +global sym(vp8_short_inv_walsh4x4_mmx) PRIVATE +sym(vp8_short_inv_walsh4x4_mmx): + push rbp + mov rbp, rsp + SHADOW_ARGS_TO_STACK 2 + ; end prolog + + mov rdx, arg(0) + mov rax, 30003h + + movq mm0, [rdx + 0] ;ip[0] + movq mm1, [rdx + 8] ;ip[4] + movq mm7, rax + + movq mm2, [rdx + 16] ;ip[8] + movq mm3, [rdx + 24] ;ip[12] + punpcklwd mm7, mm7 ;0003000300030003h + mov rdx, arg(1) + + movq mm4, mm0 + movq mm5, mm1 + + paddw mm4, mm3 ;ip[0] + ip[12] aka al + paddw mm5, mm2 ;ip[4] + ip[8] aka bl + + movq mm6, mm4 ;temp al + paddw mm4, mm5 ;al + bl + psubw mm6, mm5 ;al - bl + + psubw mm0, mm3 ;ip[0] - ip[12] aka d1 + psubw mm1, mm2 ;ip[4] - ip[8] aka c1 + + movq mm5, mm0 ;temp dl + paddw mm0, mm1 ;dl + cl + psubw mm5, mm1 ;dl - cl + + ; 03 02 01 00 + ; 13 12 11 10 + ; 23 22 21 20 + ; 33 32 31 30 + + movq mm3, mm4 ; 03 02 01 00 + punpcklwd mm4, mm0 ; 11 01 10 00 + punpckhwd mm3, mm0 ; 13 03 12 02 + + movq mm1, mm6 ; 23 22 21 20 + punpcklwd mm6, mm5 ; 31 21 30 20 + punpckhwd mm1, mm5 ; 33 23 32 22 + + movq mm0, mm4 ; 11 01 10 00 + movq mm2, mm3 ; 13 03 12 02 + + punpckldq mm0, mm6 ; 30 20 10 00 aka ip[0] + punpckhdq mm4, mm6 ; 31 21 11 01 aka ip[4] + + punpckldq mm2, mm1 ; 32 22 12 02 aka ip[8] + punpckhdq mm3, mm1 ; 33 23 13 03 aka ip[12] +;~~~~~~~~~~~~~~~~~~~~~ + movq mm1, mm0 + movq mm5, mm4 + paddw mm1, mm3 ;ip[0] + ip[12] aka al + paddw mm5, mm2 ;ip[4] + ip[8] aka bl + + movq mm6, mm1 ;temp al + paddw mm1, mm5 ;al + bl + psubw mm6, mm5 ;al - bl + paddw mm1, mm7 + paddw mm6, mm7 + psraw mm1, 3 + psraw mm6, 3 + + psubw mm0, mm3 ;ip[0] - ip[12] aka d1 + psubw mm4, mm2 ;ip[4] - ip[8] aka c1 + + movq mm5, mm0 ;temp dl + paddw mm0, mm4 ;dl + cl + psubw mm5, mm4 ;dl - cl + paddw mm0, mm7 + paddw mm5, mm7 + psraw mm0, 3 + psraw mm5, 3 +;~~~~~~~~~~~~~~~~~~~~~ + + movd eax, mm1 + movd ecx, mm0 + psrlq mm0, 32 + psrlq mm1, 32 + mov word ptr[rdx+32*0], ax + mov word ptr[rdx+32*1], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*4], ax + mov word ptr[rdx+32*5], cx + movd eax, mm1 + movd ecx, mm0 + mov word ptr[rdx+32*8], ax + mov word ptr[rdx+32*9], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*12], ax + mov word ptr[rdx+32*13], cx + + movd eax, mm6 + movd ecx, mm5 + psrlq mm5, 32 + psrlq mm6, 32 + mov word ptr[rdx+32*2], ax + mov word ptr[rdx+32*3], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*6], ax + mov word ptr[rdx+32*7], cx + movd eax, mm6 + movd ecx, mm5 + mov word ptr[rdx+32*10], ax + mov word ptr[rdx+32*11], cx + shr eax, 16 + shr ecx, 16 + mov word ptr[rdx+32*14], ax + mov word ptr[rdx+32*15], cx + + ; begin epilog + UNSHADOW_ARGS + pop rbp + ret + |