diff options
Diffstat (limited to 'third_party/aom/aom_dsp/x86/sad_ssse3.asm')
-rw-r--r-- | third_party/aom/aom_dsp/x86/sad_ssse3.asm | 373 |
1 files changed, 0 insertions, 373 deletions
diff --git a/third_party/aom/aom_dsp/x86/sad_ssse3.asm b/third_party/aom/aom_dsp/x86/sad_ssse3.asm deleted file mode 100644 index 96b64b040..000000000 --- a/third_party/aom/aom_dsp/x86/sad_ssse3.asm +++ /dev/null @@ -1,373 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - - -%include "aom_ports/x86_abi_support.asm" - -%macro PROCESS_16X2X3 1 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm5, XMMWORD PTR [rdi] - lddqu xmm6, XMMWORD PTR [rdi+1] - lddqu xmm7, XMMWORD PTR [rdi+2] - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - lddqu xmm1, XMMWORD PTR [rdi] - lddqu xmm2, XMMWORD PTR [rdi+1] - lddqu xmm3, XMMWORD PTR [rdi+2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - lddqu xmm1, XMMWORD PTR [rdi+rdx] - lddqu xmm2, XMMWORD PTR [rdi+rdx+1] - lddqu xmm3, XMMWORD PTR [rdi+rdx+2] - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X2X3_OFFSET 2 -%if %1 - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm7, XMMWORD PTR [rdi+16] - - movdqa xmm5, xmm7 - palignr xmm5, xmm4, %2 - - movdqa xmm6, xmm7 - palignr xmm6, xmm4, (%2+1) - - palignr xmm7, xmm4, (%2+2) - - psadbw xmm5, xmm0 - psadbw xmm6, xmm0 - psadbw xmm7, xmm0 -%else - movdqa xmm0, XMMWORD PTR [rsi] - movdqa xmm4, XMMWORD PTR [rdi] - movdqa xmm3, XMMWORD PTR [rdi+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endif - movdqa xmm0, XMMWORD PTR [rsi+rax] - movdqa xmm4, XMMWORD PTR [rdi+rdx] - movdqa xmm3, XMMWORD PTR [rdi+rdx+16] - - movdqa xmm1, xmm3 - palignr xmm1, xmm4, %2 - - movdqa xmm2, xmm3 - palignr xmm2, xmm4, (%2+1) - - palignr xmm3, xmm4, (%2+2) - - lea rsi, [rsi+rax*2] - lea rdi, [rdi+rdx*2] - - psadbw xmm1, xmm0 - psadbw xmm2, xmm0 - psadbw xmm3, xmm0 - - paddw xmm5, xmm1 - paddw xmm6, xmm2 - paddw xmm7, xmm3 -%endmacro - -%macro PROCESS_16X16X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -%macro PROCESS_16X8X3_OFFSET 2 -%2_aligned_by_%1: - - sub rdi, %1 - - PROCESS_16X2X3_OFFSET 1, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - PROCESS_16X2X3_OFFSET 0, %1 - - jmp %2_store_off - -%endmacro - -;void int aom_sad16x16x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad16x16x3_ssse3) PRIVATE -sym(aom_sad16x16x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .aom_sad16x16x3_ssse3_skiptable -.aom_sad16x16x3_ssse3_jumptable: - dd .aom_sad16x16x3_ssse3_aligned_by_0 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_1 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_2 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_3 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_4 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_5 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_6 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_7 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_8 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_9 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_10 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_11 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_12 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_13 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_14 - .aom_sad16x16x3_ssse3_do_jump - dd .aom_sad16x16x3_ssse3_aligned_by_15 - .aom_sad16x16x3_ssse3_do_jump -.aom_sad16x16x3_ssse3_skiptable: - - call .aom_sad16x16x3_ssse3_do_jump -.aom_sad16x16x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .aom_sad16x16x3_ssse3_jumptable - .aom_sad16x16x3_ssse3_do_jump - add rax, rcx ; get the absolute address of aom_sad16x16x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X16X3_OFFSET 0, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 1, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 2, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 3, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 4, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 5, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 6, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 7, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 8, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 9, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 10, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 11, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 12, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 13, .aom_sad16x16x3_ssse3 - PROCESS_16X16X3_OFFSET 14, .aom_sad16x16x3_ssse3 - -.aom_sad16x16x3_ssse3_aligned_by_15: - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.aom_sad16x16x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret - -;void int aom_sad16x8x3_ssse3( -; unsigned char *src_ptr, -; int src_stride, -; unsigned char *ref_ptr, -; int ref_stride, -; int *results) -global sym(aom_sad16x8x3_ssse3) PRIVATE -sym(aom_sad16x8x3_ssse3): - push rbp - mov rbp, rsp - SHADOW_ARGS_TO_STACK 5 - SAVE_XMM 7 - push rsi - push rdi - push rcx - ; end prolog - - mov rsi, arg(0) ;src_ptr - mov rdi, arg(2) ;ref_ptr - - mov rdx, 0xf - and rdx, rdi - - jmp .aom_sad16x8x3_ssse3_skiptable -.aom_sad16x8x3_ssse3_jumptable: - dd .aom_sad16x8x3_ssse3_aligned_by_0 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_1 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_2 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_3 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_4 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_5 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_6 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_7 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_8 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_9 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_10 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_11 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_12 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_13 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_14 - .aom_sad16x8x3_ssse3_do_jump - dd .aom_sad16x8x3_ssse3_aligned_by_15 - .aom_sad16x8x3_ssse3_do_jump -.aom_sad16x8x3_ssse3_skiptable: - - call .aom_sad16x8x3_ssse3_do_jump -.aom_sad16x8x3_ssse3_do_jump: - pop rcx ; get the address of do_jump - mov rax, .aom_sad16x8x3_ssse3_jumptable - .aom_sad16x8x3_ssse3_do_jump - add rax, rcx ; get the absolute address of aom_sad16x8x3_ssse3_jumptable - - movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable - add rcx, rax - - movsxd rax, dword ptr arg(1) ;src_stride - movsxd rdx, dword ptr arg(3) ;ref_stride - - jmp rcx - - PROCESS_16X8X3_OFFSET 0, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 1, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 2, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 3, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 4, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 5, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 6, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 7, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 8, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 9, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 10, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 11, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 12, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 13, .aom_sad16x8x3_ssse3 - PROCESS_16X8X3_OFFSET 14, .aom_sad16x8x3_ssse3 - -.aom_sad16x8x3_ssse3_aligned_by_15: - - PROCESS_16X2X3 1 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - PROCESS_16X2X3 0 - -.aom_sad16x8x3_ssse3_store_off: - mov rdi, arg(4) ;Results - - movq xmm0, xmm5 - psrldq xmm5, 8 - - paddw xmm0, xmm5 - movd [rdi], xmm0 -;- - movq xmm0, xmm6 - psrldq xmm6, 8 - - paddw xmm0, xmm6 - movd [rdi+4], xmm0 -;- - movq xmm0, xmm7 - psrldq xmm7, 8 - - paddw xmm0, xmm7 - movd [rdi+8], xmm0 - - ; begin epilog - pop rcx - pop rdi - pop rsi - RESTORE_XMM - UNSHADOW_ARGS - pop rbp - ret |