summaryrefslogtreecommitdiffstats
path: root/third_party/aom/aom_dsp/x86/subtract_sse2.asm
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/aom/aom_dsp/x86/subtract_sse2.asm')
-rw-r--r--third_party/aom/aom_dsp/x86/subtract_sse2.asm146
1 files changed, 0 insertions, 146 deletions
diff --git a/third_party/aom/aom_dsp/x86/subtract_sse2.asm b/third_party/aom/aom_dsp/x86/subtract_sse2.asm
deleted file mode 100644
index 1a75a234f..000000000
--- a/third_party/aom/aom_dsp/x86/subtract_sse2.asm
+++ /dev/null
@@ -1,146 +0,0 @@
-;
-; Copyright (c) 2016, Alliance for Open Media. All rights reserved
-;
-; This source code is subject to the terms of the BSD 2 Clause License and
-; the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
-; was not distributed with this source code in the LICENSE file, you can
-; obtain it at www.aomedia.org/license/software. If the Alliance for Open
-; Media Patent License 1.0 was not distributed with this source code in the
-; PATENTS file, you can obtain it at www.aomedia.org/license/patent.
-;
-
-;
-
-%include "third_party/x86inc/x86inc.asm"
-
-SECTION .text
-
-; void aom_subtract_block(int rows, int cols,
-; int16_t *diff, ptrdiff_t diff_stride,
-; const uint8_t *src, ptrdiff_t src_stride,
-; const uint8_t *pred, ptrdiff_t pred_stride)
-
-INIT_XMM sse2
-cglobal subtract_block, 7, 7, 8, \
- rows, cols, diff, diff_stride, src, src_stride, \
- pred, pred_stride
-%define pred_str colsq
- pxor m7, m7 ; dedicated zero register
- cmp colsd, 4
- je .case_4
- cmp colsd, 8
- je .case_8
- cmp colsd, 16
- je .case_16
- cmp colsd, 32
- je .case_32
- cmp colsd, 64
- je .case_64
-
-%macro loop16 6
- mova m0, [srcq+%1]
- mova m4, [srcq+%2]
- mova m1, [predq+%3]
- mova m5, [predq+%4]
- punpckhbw m2, m0, m7
- punpckhbw m3, m1, m7
- punpcklbw m0, m7
- punpcklbw m1, m7
- psubw m2, m3
- psubw m0, m1
- punpckhbw m1, m4, m7
- punpckhbw m3, m5, m7
- punpcklbw m4, m7
- punpcklbw m5, m7
- psubw m1, m3
- psubw m4, m5
- mova [diffq+mmsize*0+%5], m0
- mova [diffq+mmsize*1+%5], m2
- mova [diffq+mmsize*0+%6], m4
- mova [diffq+mmsize*1+%6], m1
-%endmacro
-
- mov pred_str, pred_stridemp
-.loop_128:
- loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
- loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
- loop16 4*mmsize, 5*mmsize, 4*mmsize, 5*mmsize, 8*mmsize, 10*mmsize
- loop16 6*mmsize, 7*mmsize, 6*mmsize, 7*mmsize, 12*mmsize, 14*mmsize
- lea diffq, [diffq+diff_strideq*2]
- add predq, pred_str
- add srcq, src_strideq
- sub rowsd, 1
- jnz .loop_128
- RET
-
-.case_64:
- mov pred_str, pred_stridemp
-.loop_64:
- loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize
- loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize
- lea diffq, [diffq+diff_strideq*2]
- add predq, pred_str
- add srcq, src_strideq
- dec rowsd
- jg .loop_64
- RET
-
-.case_32:
- mov pred_str, pred_stridemp
-.loop_32:
- loop16 0, mmsize, 0, mmsize, 0, 2*mmsize
- lea diffq, [diffq+diff_strideq*2]
- add predq, pred_str
- add srcq, src_strideq
- dec rowsd
- jg .loop_32
- RET
-
-.case_16:
- mov pred_str, pred_stridemp
-.loop_16:
- loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2
- lea diffq, [diffq+diff_strideq*4]
- lea predq, [predq+pred_str*2]
- lea srcq, [srcq+src_strideq*2]
- sub rowsd, 2
- jg .loop_16
- RET
-
-%macro loop_h 0
- movh m0, [srcq]
- movh m2, [srcq+src_strideq]
- movh m1, [predq]
- movh m3, [predq+pred_str]
- punpcklbw m0, m7
- punpcklbw m1, m7
- punpcklbw m2, m7
- punpcklbw m3, m7
- psubw m0, m1
- psubw m2, m3
- mova [diffq], m0
- mova [diffq+diff_strideq*2], m2
-%endmacro
-
-.case_8:
- mov pred_str, pred_stridemp
-.loop_8:
- loop_h
- lea diffq, [diffq+diff_strideq*4]
- lea srcq, [srcq+src_strideq*2]
- lea predq, [predq+pred_str*2]
- sub rowsd, 2
- jg .loop_8
- RET
-
-INIT_MMX
-.case_4:
- mov pred_str, pred_stridemp
-.loop_4:
- loop_h
- lea diffq, [diffq+diff_strideq*4]
- lea srcq, [srcq+src_strideq*2]
- lea predq, [predq+pred_str*2]
- sub rowsd, 2
- jg .loop_4
- RET