diff options
author | trav90 <travawine@palemoon.org> | 2018-10-19 23:05:00 -0500 |
---|---|---|
committer | trav90 <travawine@palemoon.org> | 2018-10-19 23:05:03 -0500 |
commit | d2499ead93dc4298c0882fe98902acb1b5209f99 (patch) | |
tree | cb0b942aed59e5108f9a3e9d64e7b77854383421 /third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm | |
parent | 41fbdea457bf50c0a43e1c27c5cbf7f0a3a9eb33 (diff) | |
download | UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar.gz UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar.lz UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.tar.xz UXP-d2499ead93dc4298c0882fe98902acb1b5209f99.zip |
Update libaom to commit ID 1e227d41f0616de9548a673a83a21ef990b62591
Diffstat (limited to 'third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm')
-rw-r--r-- | third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm | 67 |
1 files changed, 16 insertions, 51 deletions
diff --git a/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm b/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm index e6b40262d..216a0bd8f 100644 --- a/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm +++ b/third_party/aom/aom_dsp/x86/quantize_avx_x86_64.asm @@ -16,16 +16,12 @@ SECTION .text %macro QUANTIZE_FN 2 -cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ +cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, zbin, round, quant, \ shift, qcoeff, dqcoeff, dequant, \ eob, scan, iscan vzeroupper - ; If we can skip this block, then just zero the output - cmp skipmp, 0 - jne .blank - %ifnidn %1, b_32x32 ; Special case for ncoeff == 16, as it is frequent and we can save on @@ -83,14 +79,14 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ .single_nonzero: ; Actual quantization of size 16 block - setup pointers, rounders, etc. - movifnidn r4, roundmp - movifnidn r5, quantmp - mov r3, dequantmp - mov r6, shiftmp - mova m1, [r4] ; m1 = round - mova m2, [r5] ; m2 = quant - mova m3, [r3] ; m3 = dequant - mova m4, [r6] ; m4 = shift + movifnidn r3, roundmp + movifnidn r4, quantmp + mov r6, dequantmp + mov r5, shiftmp + mova m1, [r3] ; m1 = round + mova m2, [r4] ; m2 = quant + mova m3, [r6] ; m3 = dequant + mova m4, [r5] ; m4 = shift mov r3, iscanmp @@ -174,20 +170,20 @@ cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, zbin, round, quant, \ %endif ; %ifnidn %1, b_32x32 -DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ +DEFINE_ARGS coeff, ncoeff, zbin, round, quant, shift, \ qcoeff, dqcoeff, dequant, eob, scan, iscan ; Actual quantization loop - setup pointers, rounders, etc. movifnidn coeffq, coeffmp movifnidn ncoeffq, ncoeffmp - mov r2, dequantmp movifnidn zbinq, zbinmp movifnidn roundq, roundmp movifnidn quantq, quantmp + movifnidn dequantq, dequantmp mova m0, [zbinq] ; m0 = zbin mova m1, [roundq] ; m1 = round mova m2, [quantq] ; m2 = quant - mova m3, [r2] ; m3 = dequant + mova m3, [dequantq] ; m3 = dequant pcmpeqw m4, m4 ; All lanes -1 %ifidn %1, b_32x32 psubw m0, m4 @@ -199,7 +195,7 @@ DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ mov r2, shiftmp mov r3, qcoeffmp - mova m4, [r2] ; m4 = shift + mova m4, [r2] ; m4 = shift mov r4, dqcoeffmp mov r5, iscanmp %ifidn %1, b_32x32 @@ -207,7 +203,7 @@ DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ %endif pxor m5, m5 ; m5 = dedicated zero - DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, d5, eob + DEFINE_ARGS coeff, ncoeff, d1, qcoeff, dqcoeff, iscan, d2, d3, d4, eob lea coeffq, [ coeffq+ncoeffq*4] @@ -432,39 +428,8 @@ DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ mov [r2], ax vzeroupper RET - - ; Skip-block, i.e. just write all zeroes -.blank: - -DEFINE_ARGS coeff, ncoeff, skip, zbin, round, quant, shift, \ - qcoeff, dqcoeff, dequant, eob, scan, iscan - - mov r0, dqcoeffmp - movifnidn ncoeffq, ncoeffmp - mov r2, qcoeffmp - mov r3, eobmp - -DEFINE_ARGS dqcoeff, ncoeff, qcoeff, eob - - lea dqcoeffq, [dqcoeffq+ncoeffq*4] - lea qcoeffq, [ qcoeffq+ncoeffq*4] - neg ncoeffq - pxor m7, m7 - -.blank_loop: - mova [dqcoeffq+ncoeffq*4+ 0], ymm7 - mova [dqcoeffq+ncoeffq*4+32], ymm7 - mova [qcoeffq+ncoeffq*4+ 0], ymm7 - mova [qcoeffq+ncoeffq*4+32], ymm7 - add ncoeffq, mmsize - jl .blank_loop - - mov [eobq], word 0 - - vzeroupper - RET %endmacro INIT_XMM avx -QUANTIZE_FN b, 7 -QUANTIZE_FN b_32x32, 7 +QUANTIZE_FN b, 9 +QUANTIZE_FN b_32x32, 9 |