summaryrefslogtreecommitdiffstats
path: root/other-licenses/7zstub/src/Asm/x86/AesOpt.asm
diff options
context:
space:
mode:
authorwolfbeast <mcwerewolf@wolfbeast.com>2019-03-29 16:04:01 +0100
committerwolfbeast <mcwerewolf@wolfbeast.com>2019-03-29 16:04:01 +0100
commit88083f8c683c18f4de68a20c863a82a9da65db8f (patch)
tree926656892d9d80260da02ea8ea71031b140c51df /other-licenses/7zstub/src/Asm/x86/AesOpt.asm
parentf999f544aad04069b03704d994a99352263f600b (diff)
parent843e4ceffd6ce21a6e6db37419335eafdc543e18 (diff)
downloadUXP-88083f8c683c18f4de68a20c863a82a9da65db8f.tar
UXP-88083f8c683c18f4de68a20c863a82a9da65db8f.tar.gz
UXP-88083f8c683c18f4de68a20c863a82a9da65db8f.tar.lz
UXP-88083f8c683c18f4de68a20c863a82a9da65db8f.tar.xz
UXP-88083f8c683c18f4de68a20c863a82a9da65db8f.zip
Merge branch 'master' into Sync-weave
Diffstat (limited to 'other-licenses/7zstub/src/Asm/x86/AesOpt.asm')
-rw-r--r--other-licenses/7zstub/src/Asm/x86/AesOpt.asm237
1 files changed, 237 insertions, 0 deletions
diff --git a/other-licenses/7zstub/src/Asm/x86/AesOpt.asm b/other-licenses/7zstub/src/Asm/x86/AesOpt.asm
new file mode 100644
index 000000000..c32e48f88
--- /dev/null
+++ b/other-licenses/7zstub/src/Asm/x86/AesOpt.asm
@@ -0,0 +1,237 @@
+; AesOpt.asm -- Intel's AES.
+; 2009-12-12 : Igor Pavlov : Public domain
+
+include 7zAsm.asm
+
+MY_ASM_START
+
+ifndef x64
+ .xmm
+endif
+
+ifdef x64
+ num equ r8
+else
+ num equ [r4 + REG_SIZE * 4]
+endif
+
+rD equ r2
+rN equ r0
+
+MY_PROLOG macro reg:req
+ ifdef x64
+ movdqa [r4 + 8], xmm6
+ movdqa [r4 + 8 + 16], xmm7
+ endif
+
+ push r3
+ push r5
+ push r6
+
+ mov rN, num
+ mov x6, [r1 + 16]
+ shl x6, 5
+
+ movdqa reg, [r1]
+ add r1, 32
+endm
+
+MY_EPILOG macro
+ pop r6
+ pop r5
+ pop r3
+
+ ifdef x64
+ movdqa xmm6, [r4 + 8]
+ movdqa xmm7, [r4 + 8 + 16]
+ endif
+
+ MY_ENDP
+endm
+
+ways equ 4
+ways16 equ (ways * 16)
+
+OP_W macro op, op2
+ i = 0
+ rept ways
+ op @CatStr(xmm,%i), op2
+ i = i + 1
+ endm
+endm
+
+LOAD_OP macro op:req, offs:req
+ op xmm0, [r1 + r3 offs]
+endm
+
+LOAD_OP_W macro op:req, offs:req
+ movdqa xmm7, [r1 + r3 offs]
+ OP_W op, xmm7
+endm
+
+
+; ---------- AES-CBC Decode ----------
+
+CBC_DEC_UPDATE macro reg, offs
+ pxor reg, xmm6
+ movdqa xmm6, [rD + offs]
+ movdqa [rD + offs], reg
+endm
+
+DECODE macro op:req
+ op aesdec, +16
+ @@:
+ op aesdec, +0
+ op aesdec, -16
+ sub x3, 32
+ jnz @B
+ op aesdeclast, +0
+endm
+
+MY_PROC AesCbc_Decode_Intel, 3
+ MY_PROLOG xmm6
+
+ sub x6, 32
+
+ jmp check2
+
+ align 16
+ nextBlocks2:
+ mov x3, x6
+ OP_W movdqa, [rD + i * 16]
+ LOAD_OP_W pxor, +32
+ DECODE LOAD_OP_W
+ OP_W CBC_DEC_UPDATE, i * 16
+ add rD, ways16
+ check2:
+ sub rN, ways
+ jnc nextBlocks2
+
+ add rN, ways
+ jmp check
+
+ nextBlock:
+ mov x3, x6
+ movdqa xmm1, [rD]
+ LOAD_OP movdqa, +32
+ pxor xmm0, xmm1
+ DECODE LOAD_OP
+ pxor xmm0, xmm6
+ movdqa [rD], xmm0
+ movdqa xmm6, xmm1
+ add rD, 16
+ check:
+ sub rN, 1
+ jnc nextBlock
+
+ movdqa [r1 - 32], xmm6
+ MY_EPILOG
+
+
+; ---------- AES-CBC Encode ----------
+
+ENCODE macro op:req
+ op aesenc, -16
+ @@:
+ op aesenc, +0
+ op aesenc, +16
+ add r3, 32
+ jnz @B
+ op aesenclast, +0
+endm
+
+MY_PROC AesCbc_Encode_Intel, 3
+ MY_PROLOG xmm0
+
+ add r1, r6
+ neg r6
+ add r6, 32
+
+ jmp check_e
+
+ align 16
+ nextBlock_e:
+ mov r3, r6
+ pxor xmm0, [rD]
+ pxor xmm0, [r1 + r3 - 32]
+ ENCODE LOAD_OP
+ movdqa [rD], xmm0
+ add rD, 16
+ check_e:
+ sub rN, 1
+ jnc nextBlock_e
+
+ movdqa [r1 + r6 - 64], xmm0
+ MY_EPILOG
+
+
+; ---------- AES-CTR ----------
+
+XOR_UPD_1 macro reg, offs
+ pxor reg, [rD + offs]
+endm
+
+XOR_UPD_2 macro reg, offs
+ movdqa [rD + offs], reg
+endm
+
+MY_PROC AesCtr_Code_Intel, 3
+ MY_PROLOG xmm6
+
+ mov r5, r4
+ shr r5, 4
+ dec r5
+ shl r5, 4
+
+ mov DWORD PTR [r5], 1
+ mov DWORD PTR [r5 + 4], 0
+ mov DWORD PTR [r5 + 8], 0
+ mov DWORD PTR [r5 + 12], 0
+
+ add r1, r6
+ neg r6
+ add r6, 32
+
+ jmp check2_c
+
+ align 16
+ nextBlocks2_c:
+ movdqa xmm7, [r5]
+
+ i = 0
+ rept ways
+ paddq xmm6, xmm7
+ movdqa @CatStr(xmm,%i), xmm6
+ i = i + 1
+ endm
+
+ mov r3, r6
+ LOAD_OP_W pxor, -32
+ ENCODE LOAD_OP_W
+ OP_W XOR_UPD_1, i * 16
+ OP_W XOR_UPD_2, i * 16
+ add rD, ways16
+ check2_c:
+ sub rN, ways
+ jnc nextBlocks2_c
+
+ add rN, ways
+ jmp check_c
+
+ nextBlock_c:
+ paddq xmm6, [r5]
+ mov r3, r6
+ movdqa xmm0, [r1 + r3 - 32]
+ pxor xmm0, xmm6
+ ENCODE LOAD_OP
+ XOR_UPD_1 xmm0, 0
+ XOR_UPD_2 xmm0, 0
+ add rD, 16
+ check_c:
+ sub rN, 1
+ jnc nextBlock_c
+
+ movdqa [r1 + r6 - 64], xmm6
+ MY_EPILOG
+
+end