diff options
Diffstat (limited to 'third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm')
-rw-r--r-- | third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm | 297 |
1 files changed, 0 insertions, 297 deletions
diff --git a/third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm b/third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm deleted file mode 100644 index 7283c32b8..000000000 --- a/third_party/aom/aom_dsp/x86/aom_convolve_copy_sse2.asm +++ /dev/null @@ -1,297 +0,0 @@ -; -; Copyright (c) 2016, Alliance for Open Media. All rights reserved -; -; This source code is subject to the terms of the BSD 2 Clause License and -; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License -; was not distributed with this source code in the LICENSE file, you can -; obtain it at www.aomedia.org/license/software. If the Alliance for Open -; Media Patent License 1.0 was not distributed with this source code in the -; PATENTS file, you can obtain it at www.aomedia.org/license/patent. -; - -; - -%include "third_party/x86inc/x86inc.asm" - -SECTION .text - -%macro convolve_fn 1-2 -%ifidn %1, avg -%define AUX_XMM_REGS 4 -%else -%define AUX_XMM_REGS 0 -%endif -%ifidn %2, highbd -%define pavg pavgw -cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ - dst, dst_stride, \ - fx, fxs, fy, fys, w, h, bd -%else -%define pavg pavgb -cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \ - dst, dst_stride, \ - fx, fxs, fy, fys, w, h -%endif - mov r4d, dword wm -%ifidn %2, highbd - shl r4d, 1 - shl srcq, 1 - shl src_strideq, 1 - shl dstq, 1 - shl dst_strideq, 1 -%else - cmp r4d, 4 - je .w4 -%endif - cmp r4d, 8 - je .w8 - cmp r4d, 16 - je .w16 - cmp r4d, 32 - je .w32 - - cmp r4d, 64 - je .w64 -%ifidn %2, highbd - cmp r4d, 128 - je .w128 - -.w256: - mov r4d, dword hm -.loop256: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - movu m0, [srcq+64] - movu m1, [srcq+80] - movu m2, [srcq+96] - movu m3, [srcq+112] -%ifidn %1, avg - pavg m0, [dstq+64] - pavg m1, [dstq+80] - pavg m2, [dstq+96] - pavg m3, [dstq+112] -%endif - mova [dstq+64], m0 - mova [dstq+80], m1 - mova [dstq+96], m2 - mova [dstq+112], m3 - movu m0, [srcq+128] - movu m1, [srcq+128+16] - movu m2, [srcq+128+32] - movu m3, [srcq+128+48] -%ifidn %1, avg - pavg m0, [dstq+128] - pavg m1, [dstq+128+16] - pavg m2, [dstq+128+32] - pavg m3, [dstq+128+48] -%endif - mova [dstq+128 ], m0 - mova [dstq+128+16], m1 - mova [dstq+128+32], m2 - mova [dstq+128+48], m3 - movu m0, [srcq+128+64] - movu m1, [srcq+128+80] - movu m2, [srcq+128+96] - movu m3, [srcq+128+112] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq+128+64] - pavg m1, [dstq+128+80] - pavg m2, [dstq+128+96] - pavg m3, [dstq+128+112] -%endif - mova [dstq+128+64], m0 - mova [dstq+128+80], m1 - mova [dstq+128+96], m2 - mova [dstq+128+112], m3 - add dstq, dst_strideq - sub r4d, 1 - jnz .loop256 - RET -%endif - -.w128: - mov r4d, dword hm -.loop128: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - movu m0, [srcq+64] - movu m1, [srcq+80] - movu m2, [srcq+96] - movu m3, [srcq+112] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq+64] - pavg m1, [dstq+80] - pavg m2, [dstq+96] - pavg m3, [dstq+112] -%endif - mova [dstq+64], m0 - mova [dstq+80], m1 - mova [dstq+96], m2 - mova [dstq+112], m3 - add dstq, dst_strideq - sub r4d, 1 - jnz .loop128 - RET - -.w64: - mov r4d, dword hm -.loop64: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+32] - movu m3, [srcq+48] - add srcq, src_strideq -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+16] - pavg m2, [dstq+32] - pavg m3, [dstq+48] -%endif - mova [dstq ], m0 - mova [dstq+16], m1 - mova [dstq+32], m2 - mova [dstq+48], m3 - add dstq, dst_strideq - sub r4d, 1 - jnz .loop64 - RET - -.w32: - mov r4d, dword hm -.loop32: - movu m0, [srcq] - movu m1, [srcq+16] - movu m2, [srcq+src_strideq] - movu m3, [srcq+src_strideq+16] - lea srcq, [srcq+src_strideq*2] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq +16] - pavg m2, [dstq+dst_strideq] - pavg m3, [dstq+dst_strideq+16] -%endif - mova [dstq ], m0 - mova [dstq +16], m1 - mova [dstq+dst_strideq ], m2 - mova [dstq+dst_strideq+16], m3 - lea dstq, [dstq+dst_strideq*2] - sub r4d, 2 - jnz .loop32 - RET - -.w16: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop16: - movu m0, [srcq] - movu m1, [srcq+src_strideq] - movu m2, [srcq+src_strideq*2] - movu m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - pavg m0, [dstq] - pavg m1, [dstq+dst_strideq] - pavg m2, [dstq+dst_strideq*2] - pavg m3, [dstq+r6q] -%endif - mova [dstq ], m0 - mova [dstq+dst_strideq ], m1 - mova [dstq+dst_strideq*2], m2 - mova [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop16 - RET - -.w8: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop8: - movh m0, [srcq] - movh m1, [srcq+src_strideq] - movh m2, [srcq+src_strideq*2] - movh m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - movh m4, [dstq] - movh m5, [dstq+dst_strideq] - movh m6, [dstq+dst_strideq*2] - movh m7, [dstq+r6q] - pavg m0, m4 - pavg m1, m5 - pavg m2, m6 - pavg m3, m7 -%endif - movh [dstq ], m0 - movh [dstq+dst_strideq ], m1 - movh [dstq+dst_strideq*2], m2 - movh [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop8 - RET - -%ifnidn %2, highbd -.w4: - mov r4d, dword hm - lea r5q, [src_strideq*3] - lea r6q, [dst_strideq*3] -.loop4: - movd m0, [srcq] - movd m1, [srcq+src_strideq] - movd m2, [srcq+src_strideq*2] - movd m3, [srcq+r5q] - lea srcq, [srcq+src_strideq*4] -%ifidn %1, avg - movd m4, [dstq] - movd m5, [dstq+dst_strideq] - movd m6, [dstq+dst_strideq*2] - movd m7, [dstq+r6q] - pavg m0, m4 - pavg m1, m5 - pavg m2, m6 - pavg m3, m7 -%endif - movd [dstq ], m0 - movd [dstq+dst_strideq ], m1 - movd [dstq+dst_strideq*2], m2 - movd [dstq+r6q ], m3 - lea dstq, [dstq+dst_strideq*4] - sub r4d, 4 - jnz .loop4 - RET -%endif -%endmacro - -INIT_XMM sse2 -convolve_fn copy -convolve_fn avg -convolve_fn copy, highbd |