From b87b5210445fd8dab9c4456e691191f128cdb2fc Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Mon, 15 Apr 2024 15:57:47 +1000 Subject: [PATCH] AES XTS asm x64 MSVC Use assembly code for AES-XTS with MSVC for x64. --- IDE/ECLIPSE/MICRIUM/README.md | 2 +- IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj | 16 +- IDE/WIN10/user_settings.h | 4 + IDE/WIN10/wolfssl-fips.vcxproj | 14 + IDE/WORKBENCH/README.md | 1 + wolfcrypt/src/aes_xts_asm.asm | 1477 +++++++++++++++++++ wolfcrypt/src/include.am | 1 + wolfssl.vcxproj | 14 + 8 files changed, 1527 insertions(+), 2 deletions(-) create mode 100644 wolfcrypt/src/aes_xts_asm.asm diff --git a/IDE/ECLIPSE/MICRIUM/README.md b/IDE/ECLIPSE/MICRIUM/README.md index bd0c8bc9f5..517dfdc4fc 100644 --- a/IDE/ECLIPSE/MICRIUM/README.md +++ b/IDE/ECLIPSE/MICRIUM/README.md @@ -40,7 +40,7 @@ The folder hierarchy is the same as the wolfSSL folders with an exception of the 4. Right click on each folders, add or link all the source code in the corresponding folder in wolfSSL. -5. Remove non-C platform dependent files from your build. At the moment, only aes_asm.asm, aes_gcm_asm.asm and aes_asm.s must be removed from your wolfssl/wolfcrypt/src folder. +5. Remove non-C platform dependent files from your build. At the moment, only aes_asm.asm, aes_gcm_asm.asm, aes_xts_asm.asm and aes_asm.s must be removed from your wolfssl/wolfcrypt/src folder. 6. In your C/C++ compiler preprocessor settings, add the wolfSSL directories to your include paths. Here's an example of the paths that must be added. diff --git a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj index 88980d7ee5..dfe4877e18 100644 --- a/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj +++ b/IDE/WIN-SRTP-KDF-140-3/wolfssl-fips.vcxproj @@ -347,6 +347,20 @@ $(IntDir)%(Filename).obj $(IntDir)%(Filename).obj + + false + false + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + $(IntDir)%(Filename).obj + $(IntDir)%(Filename).obj + false + false + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + $(IntDir)%(Filename).obj + $(IntDir)%(Filename).obj + @@ -374,4 +388,4 @@ - \ No newline at end of file + diff --git a/IDE/WIN10/user_settings.h b/IDE/WIN10/user_settings.h index cc1c34edeb..1fcb317318 100644 --- a/IDE/WIN10/user_settings.h +++ b/IDE/WIN10/user_settings.h @@ -119,6 +119,9 @@ #define WOLFSSL_AES_OFB #define FP_MAX_BITS 16384 #endif /* FIPS v5 */ + #if defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 6) + #define WOLFSSL_AES_XTS + #endif #else /* Enables blinding mode, to prevent timing attacks */ #define WC_RSA_BLINDING @@ -133,6 +136,7 @@ #define HAVE_SECURE_RENEGOTIATION #define HAVE_AESGCM + #define WOLFSSL_AES_XTS #define WOLFSSL_SHA384 #define WOLFSSL_SHA512 diff --git a/IDE/WIN10/wolfssl-fips.vcxproj b/IDE/WIN10/wolfssl-fips.vcxproj index 41d268a878..2736bc4448 100644 --- a/IDE/WIN10/wolfssl-fips.vcxproj +++ b/IDE/WIN10/wolfssl-fips.vcxproj @@ -347,6 +347,20 @@ $(IntDir)%(Filename).obj $(IntDir)%(Filename).obj + + false + false + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + $(IntDir)%(Filename).obj + $(IntDir)%(Filename).obj + false + false + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + ml64.exe /DHAVE_FIPS /DHAVE_FIPS_VERSION=5 /DHAVE_FIPS_VERSION_MINOR=1 /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + $(IntDir)%(Filename).obj + $(IntDir)%(Filename).obj + diff --git a/IDE/WORKBENCH/README.md b/IDE/WORKBENCH/README.md index 6020d6ac2c..6565045790 100644 --- a/IDE/WORKBENCH/README.md +++ b/IDE/WORKBENCH/README.md @@ -10,6 +10,7 @@ src and wolfcrypt directories. Uncheck the following: ``` wolfcrypt/src/aes_asm.asm wolfcrypt/src/aes_gcm_asm.asm + wolfcrypt/src/aes_xts_asm.asm wolfcrypt/src/aes_asm.s examples/echoclient/ examples/echoserver/ diff --git a/wolfcrypt/src/aes_xts_asm.asm b/wolfcrypt/src/aes_xts_asm.asm new file mode 100644 index 0000000000..3185ec224a --- /dev/null +++ b/wolfcrypt/src/aes_xts_asm.asm @@ -0,0 +1,1477 @@ +; /* aes_xts_asm.asm */ +; /* +; * Copyright (C) 2006-2024 wolfSSL Inc. +; * +; * This file is part of wolfSSL. +; * +; * wolfSSL is free software; you can redistribute it and/or modify +; * it under the terms of the GNU General Public License as published by +; * the Free Software Foundation; either version 2 of the License, or +; * (at your option) any later version. +; * +; * wolfSSL is distributed in the hope that it will be useful, +; * but WITHOUT ANY WARRANTY; without even the implied warranty of +; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; * GNU General Public License for more details. +; * +; * You should have received a copy of the GNU General Public License +; * along with this program; if not, write to the Free Software +; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA +; */ +IF @Version LT 1200 +; AVX2 instructions not recognized by old versions of MASM +IFNDEF NO_AVX2_SUPPORT +NO_AVX2_SUPPORT = 1 +ENDIF +; MOVBE instruction not recognized by old versions of MASM +IFNDEF NO_MOVBE_SUPPORT +NO_MOVBE_SUPPORT = 1 +ENDIF +ENDIF + +IFNDEF HAVE_INTEL_AVX1 +HAVE_INTEL_AVX1 = 1 +ENDIF +IFNDEF NO_AVX2_SUPPORT +HAVE_INTEL_AVX2 = 1 +ENDIF + +IFNDEF _WIN64 +_WIN64 = 1 +ENDIF + +_DATA SEGMENT +ALIGN 16 +L_aes_xts_gc_xts DWORD 135,1,1,1 +ptr_L_aes_xts_gc_xts QWORD L_aes_xts_gc_xts +_DATA ENDS +_text SEGMENT READONLY PARA +AES_XTS_encrypt_aesni PROC + push rdi + push rsi + push r12 + push r13 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r12, r9 + mov r8, QWORD PTR [rsp+72] + mov r9, QWORD PTR [rsp+80] + mov r10d, DWORD PTR [rsp+88] + sub rsp, 176 + movdqu OWORD PTR [rsp+64], xmm6 + movdqu OWORD PTR [rsp+80], xmm7 + movdqu OWORD PTR [rsp+96], xmm8 + movdqu OWORD PTR [rsp+112], xmm9 + movdqu OWORD PTR [rsp+128], xmm10 + movdqu OWORD PTR [rsp+144], xmm11 + movdqu OWORD PTR [rsp+160], xmm12 + movdqu xmm12, OWORD PTR L_aes_xts_gc_xts + movdqu xmm0, OWORD PTR [r12] + ; aes_enc_block + pxor xmm0, [r9] + movdqu xmm5, OWORD PTR [r9+16] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+32] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+48] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+64] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+80] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+96] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+112] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+128] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+144] + aesenc xmm0, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r9+160] + jl L_AES_XTS_encrypt_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm5 + movdqu xmm6, OWORD PTR [r9+176] + aesenc xmm0, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r9+192] + jl L_AES_XTS_encrypt_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm5 + movdqu xmm6, OWORD PTR [r9+208] + aesenc xmm0, xmm6 + movdqu xmm5, OWORD PTR [r9+224] +L_AES_XTS_encrypt_aesni_tweak_aes_enc_block_last: + aesenclast xmm0, xmm5 + xor r13d, r13d + cmp eax, 64 + mov r11d, eax + jl L_AES_XTS_encrypt_aesni_done_64 + and r11d, 4294967232 +L_AES_XTS_encrypt_aesni_enc_64: + ; 64 bytes of input + ; aes_enc_64 + lea rcx, QWORD PTR [rdi+r13] + lea rdx, QWORD PTR [rsi+r13] + movdqu xmm8, OWORD PTR [rcx] + movdqu xmm9, OWORD PTR [rcx+16] + movdqu xmm10, OWORD PTR [rcx+32] + movdqu xmm11, OWORD PTR [rcx+48] + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + psrad xmm4, 31 + pslld xmm1, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm1, xmm4 + movdqa xmm4, xmm1 + movdqa xmm2, xmm1 + psrad xmm4, 31 + pslld xmm2, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm2, xmm4 + movdqa xmm4, xmm2 + movdqa xmm3, xmm2 + psrad xmm4, 31 + pslld xmm3, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm3, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + ; aes_enc_block + movdqu xmm4, OWORD PTR [r8] + pxor xmm8, xmm4 + pxor xmm9, xmm4 + pxor xmm10, xmm4 + pxor xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+16] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+32] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+48] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+64] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+80] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+96] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+112] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+128] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+144] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + cmp r10d, 11 + movdqu xmm4, OWORD PTR [r8+160] + jl L_AES_XTS_encrypt_aesni_aes_enc_64_aes_enc_block_last + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+176] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + cmp r10d, 13 + movdqu xmm4, OWORD PTR [r8+192] + jl L_AES_XTS_encrypt_aesni_aes_enc_64_aes_enc_block_last + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+208] + aesenc xmm8, xmm4 + aesenc xmm9, xmm4 + aesenc xmm10, xmm4 + aesenc xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+224] +L_AES_XTS_encrypt_aesni_aes_enc_64_aes_enc_block_last: + aesenclast xmm8, xmm4 + aesenclast xmm9, xmm4 + aesenclast xmm10, xmm4 + aesenclast xmm11, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + movdqu OWORD PTR [rdx], xmm8 + movdqu OWORD PTR [rdx+16], xmm9 + movdqu OWORD PTR [rdx+32], xmm10 + movdqu OWORD PTR [rdx+48], xmm11 + movdqa xmm4, xmm3 + movdqa xmm0, xmm3 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r13d, 64 + cmp r13d, r11d + jl L_AES_XTS_encrypt_aesni_enc_64 +L_AES_XTS_encrypt_aesni_done_64: + cmp r13d, eax + mov r11d, eax + je L_AES_XTS_encrypt_aesni_done_enc + sub r11d, r13d + cmp r11d, 16 + mov r11d, eax + jl L_AES_XTS_encrypt_aesni_last_15 + and r11d, 4294967280 + ; 16 bytes of input +L_AES_XTS_encrypt_aesni_enc_16: + lea rcx, QWORD PTR [rdi+r13] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm0 + ; aes_enc_block + pxor xmm8, [r8] + movdqu xmm5, OWORD PTR [r8+16] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+32] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+48] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+64] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+80] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+96] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+112] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+128] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+144] + aesenc xmm8, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_encrypt_aesni_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+176] + aesenc xmm8, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_encrypt_aesni_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+208] + aesenc xmm8, xmm6 + movdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_encrypt_aesni_aes_enc_block_last: + aesenclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r13] + movdqu OWORD PTR [rcx], xmm8 + movdqa xmm4, xmm0 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r13d, 16 + cmp r13d, r11d + jl L_AES_XTS_encrypt_aesni_enc_16 + cmp r13d, eax + je L_AES_XTS_encrypt_aesni_done_enc +L_AES_XTS_encrypt_aesni_last_15: + sub r13, 16 + lea rcx, QWORD PTR [rsi+r13] + movdqu xmm8, OWORD PTR [rcx] + add r13, 16 + movdqu OWORD PTR [rsp], xmm8 + xor rdx, rdx +L_AES_XTS_encrypt_aesni_last_15_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r13] + mov BYTE PTR [rsi+r13], r11b + mov BYTE PTR [rsp+rdx], cl + inc r13d + inc edx + cmp r13d, eax + jl L_AES_XTS_encrypt_aesni_last_15_byte_loop + sub r13, rdx + movdqu xmm8, OWORD PTR [rsp] + sub r13, 16 + pxor xmm8, xmm0 + ; aes_enc_block + pxor xmm8, [r8] + movdqu xmm5, OWORD PTR [r8+16] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+32] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+48] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+64] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+80] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+96] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+112] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+128] + aesenc xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+144] + aesenc xmm8, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_encrypt_aesni_last_15_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+176] + aesenc xmm8, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_encrypt_aesni_last_15_aes_enc_block_last + aesenc xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+208] + aesenc xmm8, xmm6 + movdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_encrypt_aesni_last_15_aes_enc_block_last: + aesenclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r13] + movdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_encrypt_aesni_done_enc: + movdqu xmm6, OWORD PTR [rsp+64] + movdqu xmm7, OWORD PTR [rsp+80] + movdqu xmm8, OWORD PTR [rsp+96] + movdqu xmm9, OWORD PTR [rsp+112] + movdqu xmm10, OWORD PTR [rsp+128] + movdqu xmm11, OWORD PTR [rsp+144] + movdqu xmm12, OWORD PTR [rsp+160] + add rsp, 176 + pop r13 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_encrypt_aesni ENDP +_text ENDS +_text SEGMENT READONLY PARA +AES_XTS_decrypt_aesni PROC + push rdi + push rsi + push r12 + push r13 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r12, r9 + mov r8, QWORD PTR [rsp+72] + mov r9, QWORD PTR [rsp+80] + mov r10d, DWORD PTR [rsp+88] + sub rsp, 128 + movdqu OWORD PTR [rsp+16], xmm6 + movdqu OWORD PTR [rsp+32], xmm7 + movdqu OWORD PTR [rsp+48], xmm8 + movdqu OWORD PTR [rsp+64], xmm9 + movdqu OWORD PTR [rsp+80], xmm10 + movdqu OWORD PTR [rsp+96], xmm11 + movdqu OWORD PTR [rsp+112], xmm12 + movdqu xmm12, OWORD PTR L_aes_xts_gc_xts + movdqu xmm0, OWORD PTR [r12] + ; aes_enc_block + pxor xmm0, [r9] + movdqu xmm5, OWORD PTR [r9+16] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+32] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+48] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+64] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+80] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+96] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+112] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+128] + aesenc xmm0, xmm5 + movdqu xmm5, OWORD PTR [r9+144] + aesenc xmm0, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r9+160] + jl L_AES_XTS_decrypt_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm5 + movdqu xmm6, OWORD PTR [r9+176] + aesenc xmm0, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r9+192] + jl L_AES_XTS_decrypt_aesni_tweak_aes_enc_block_last + aesenc xmm0, xmm5 + movdqu xmm6, OWORD PTR [r9+208] + aesenc xmm0, xmm6 + movdqu xmm5, OWORD PTR [r9+224] +L_AES_XTS_decrypt_aesni_tweak_aes_enc_block_last: + aesenclast xmm0, xmm5 + xor r13d, r13d + mov r11d, eax + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_aesni_mul16_64 + sub r11d, 16 + cmp r11d, 16 + jl L_AES_XTS_decrypt_aesni_last_31_start +L_AES_XTS_decrypt_aesni_mul16_64: + cmp r11d, 64 + jl L_AES_XTS_decrypt_aesni_done_64 + and r11d, 4294967232 +L_AES_XTS_decrypt_aesni_dec_64: + ; 64 bytes of input + ; aes_dec_64 + lea rcx, QWORD PTR [rdi+r13] + lea rdx, QWORD PTR [rsi+r13] + movdqu xmm8, OWORD PTR [rcx] + movdqu xmm9, OWORD PTR [rcx+16] + movdqu xmm10, OWORD PTR [rcx+32] + movdqu xmm11, OWORD PTR [rcx+48] + movdqa xmm4, xmm0 + movdqa xmm1, xmm0 + psrad xmm4, 31 + pslld xmm1, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm1, xmm4 + movdqa xmm4, xmm1 + movdqa xmm2, xmm1 + psrad xmm4, 31 + pslld xmm2, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm2, xmm4 + movdqa xmm4, xmm2 + movdqa xmm3, xmm2 + psrad xmm4, 31 + pslld xmm3, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm3, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + ; aes_dec_block + movdqu xmm4, OWORD PTR [r8] + pxor xmm8, xmm4 + pxor xmm9, xmm4 + pxor xmm10, xmm4 + pxor xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+16] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+32] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+48] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+64] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+80] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+96] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+112] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+128] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+144] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + cmp r10d, 11 + movdqu xmm4, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_aesni_aes_dec_64_aes_dec_block_last + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+176] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + cmp r10d, 13 + movdqu xmm4, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_aesni_aes_dec_64_aes_dec_block_last + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+208] + aesdec xmm8, xmm4 + aesdec xmm9, xmm4 + aesdec xmm10, xmm4 + aesdec xmm11, xmm4 + movdqu xmm4, OWORD PTR [r8+224] +L_AES_XTS_decrypt_aesni_aes_dec_64_aes_dec_block_last: + aesdeclast xmm8, xmm4 + aesdeclast xmm9, xmm4 + aesdeclast xmm10, xmm4 + aesdeclast xmm11, xmm4 + pxor xmm8, xmm0 + pxor xmm9, xmm1 + pxor xmm10, xmm2 + pxor xmm11, xmm3 + movdqu OWORD PTR [rdx], xmm8 + movdqu OWORD PTR [rdx+16], xmm9 + movdqu OWORD PTR [rdx+32], xmm10 + movdqu OWORD PTR [rdx+48], xmm11 + movdqa xmm4, xmm3 + movdqa xmm0, xmm3 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r13d, 64 + cmp r13d, r11d + jl L_AES_XTS_decrypt_aesni_dec_64 +L_AES_XTS_decrypt_aesni_done_64: + cmp r13d, eax + mov r11d, eax + je L_AES_XTS_decrypt_aesni_done_dec + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_aesni_mul16 + sub r11d, 16 + sub r11d, r13d + cmp r11d, 16 + jl L_AES_XTS_decrypt_aesni_last_31_start + add r11d, r13d +L_AES_XTS_decrypt_aesni_mul16: +L_AES_XTS_decrypt_aesni_dec_16: + ; 16 bytes of input + lea rcx, QWORD PTR [rdi+r13] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm0 + ; aes_dec_block + pxor xmm8, [r8] + movdqu xmm5, OWORD PTR [r8+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+144] + aesdec xmm8, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_aesni_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+176] + aesdec xmm8, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_aesni_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_decrypt_aesni_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r13] + movdqu OWORD PTR [rcx], xmm8 + movdqa xmm4, xmm0 + psrad xmm4, 31 + pslld xmm0, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm0, xmm4 + add r13d, 16 + cmp r13d, r11d + jl L_AES_XTS_decrypt_aesni_dec_16 + cmp r13d, eax + je L_AES_XTS_decrypt_aesni_done_dec +L_AES_XTS_decrypt_aesni_last_31_start: + movdqa xmm4, xmm0 + movdqa xmm7, xmm0 + psrad xmm4, 31 + pslld xmm7, 1 + pshufd xmm4, xmm4, 147 + pand xmm4, xmm12 + pxor xmm7, xmm4 + lea rcx, QWORD PTR [rdi+r13] + movdqu xmm8, OWORD PTR [rcx] + pxor xmm8, xmm7 + ; aes_dec_block + pxor xmm8, [r8] + movdqu xmm5, OWORD PTR [r8+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+144] + aesdec xmm8, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_aesni_last_31_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+176] + aesdec xmm8, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_aesni_last_31_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_decrypt_aesni_last_31_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm7 + movdqu OWORD PTR [rsp], xmm8 + add r13, 16 + xor rdx, rdx +L_AES_XTS_decrypt_aesni_last_31_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r13] + mov BYTE PTR [rsi+r13], r11b + mov BYTE PTR [rsp+rdx], cl + inc r13d + inc edx + cmp r13d, eax + jl L_AES_XTS_decrypt_aesni_last_31_byte_loop + sub r13, rdx + movdqu xmm8, OWORD PTR [rsp] + pxor xmm8, xmm0 + ; aes_dec_block + pxor xmm8, [r8] + movdqu xmm5, OWORD PTR [r8+16] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+32] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+48] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+64] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+80] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+96] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+112] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+128] + aesdec xmm8, xmm5 + movdqu xmm5, OWORD PTR [r8+144] + aesdec xmm8, xmm5 + cmp r10d, 11 + movdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_aesni_last_31_2_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+176] + aesdec xmm8, xmm6 + cmp r10d, 13 + movdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_aesni_last_31_2_aes_dec_block_last + aesdec xmm8, xmm5 + movdqu xmm6, OWORD PTR [r8+208] + aesdec xmm8, xmm6 + movdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_decrypt_aesni_last_31_2_aes_dec_block_last: + aesdeclast xmm8, xmm5 + pxor xmm8, xmm0 + sub r13, 16 + lea rcx, QWORD PTR [rsi+r13] + movdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_decrypt_aesni_done_dec: + movdqu xmm6, OWORD PTR [rsp+16] + movdqu xmm7, OWORD PTR [rsp+32] + movdqu xmm8, OWORD PTR [rsp+48] + movdqu xmm9, OWORD PTR [rsp+64] + movdqu xmm10, OWORD PTR [rsp+80] + movdqu xmm11, OWORD PTR [rsp+96] + movdqu xmm12, OWORD PTR [rsp+112] + add rsp, 128 + pop r13 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_decrypt_aesni ENDP +_text ENDS +IFDEF HAVE_INTEL_AVX1 +_DATA SEGMENT +ALIGN 16 +L_avx1_aes_xts_gc_xts DWORD 135,1,1,1 +ptr_L_avx1_aes_xts_gc_xts QWORD L_avx1_aes_xts_gc_xts +_DATA ENDS +_text SEGMENT READONLY PARA +AES_XTS_encrypt_avx1 PROC + push rdi + push rsi + push r12 + push r13 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r12, r9 + mov r8, QWORD PTR [rsp+72] + mov r9, QWORD PTR [rsp+80] + mov r10d, DWORD PTR [rsp+88] + sub rsp, 176 + vmovdqu OWORD PTR [rsp+64], xmm6 + vmovdqu OWORD PTR [rsp+80], xmm7 + vmovdqu OWORD PTR [rsp+96], xmm8 + vmovdqu OWORD PTR [rsp+112], xmm9 + vmovdqu OWORD PTR [rsp+128], xmm10 + vmovdqu OWORD PTR [rsp+144], xmm11 + vmovdqu OWORD PTR [rsp+160], xmm12 + vmovdqu xmm12, OWORD PTR L_avx1_aes_xts_gc_xts + vmovdqu xmm0, OWORD PTR [r12] + ; aes_enc_block + vpxor xmm0, xmm0, [r9] + vmovdqu xmm5, OWORD PTR [r9+16] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+32] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+48] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+64] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+80] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+96] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+112] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+128] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+144] + vaesenc xmm0, xmm0, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r9+160] + jl L_AES_XTS_encrypt_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm6, OWORD PTR [r9+176] + vaesenc xmm0, xmm0, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r9+192] + jl L_AES_XTS_encrypt_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm6, OWORD PTR [r9+208] + vaesenc xmm0, xmm0, xmm6 + vmovdqu xmm5, OWORD PTR [r9+224] +L_AES_XTS_encrypt_avx1_tweak_aes_enc_block_last: + vaesenclast xmm0, xmm0, xmm5 + xor r13d, r13d + cmp eax, 64 + mov r11d, eax + jl L_AES_XTS_encrypt_avx1_done_64 + and r11d, 4294967232 +L_AES_XTS_encrypt_avx1_enc_64: + ; 64 bytes of input + ; aes_enc_64 + lea rcx, QWORD PTR [rdi+r13] + lea rdx, QWORD PTR [rsi+r13] + vmovdqu xmm8, OWORD PTR [rcx] + vmovdqu xmm9, OWORD PTR [rcx+16] + vmovdqu xmm10, OWORD PTR [rcx+32] + vmovdqu xmm11, OWORD PTR [rcx+48] + vpsrad xmm4, xmm0, 31 + vpslld xmm1, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm1, xmm1, xmm4 + vpsrad xmm4, xmm1, 31 + vpslld xmm2, xmm1, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm2, xmm2, xmm4 + vpsrad xmm4, xmm2, 31 + vpslld xmm3, xmm2, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm3, xmm3, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + ; aes_enc_block + vmovdqu xmm4, OWORD PTR [r8] + vpxor xmm8, xmm8, xmm4 + vpxor xmm9, xmm9, xmm4 + vpxor xmm10, xmm10, xmm4 + vpxor xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+16] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+32] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+48] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+64] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+80] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+96] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+112] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+128] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+144] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + cmp r10d, 11 + vmovdqu xmm4, OWORD PTR [r8+160] + jl L_AES_XTS_encrypt_avx1_aes_enc_64_aes_enc_block_last + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+176] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + cmp r10d, 13 + vmovdqu xmm4, OWORD PTR [r8+192] + jl L_AES_XTS_encrypt_avx1_aes_enc_64_aes_enc_block_last + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+208] + vaesenc xmm8, xmm8, xmm4 + vaesenc xmm9, xmm9, xmm4 + vaesenc xmm10, xmm10, xmm4 + vaesenc xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+224] +L_AES_XTS_encrypt_avx1_aes_enc_64_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm4 + vaesenclast xmm9, xmm9, xmm4 + vaesenclast xmm10, xmm10, xmm4 + vaesenclast xmm11, xmm11, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + vmovdqu OWORD PTR [rdx], xmm8 + vmovdqu OWORD PTR [rdx+16], xmm9 + vmovdqu OWORD PTR [rdx+32], xmm10 + vmovdqu OWORD PTR [rdx+48], xmm11 + vpsrad xmm4, xmm3, 31 + vpslld xmm0, xmm3, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r13d, 64 + cmp r13d, r11d + jl L_AES_XTS_encrypt_avx1_enc_64 +L_AES_XTS_encrypt_avx1_done_64: + cmp r13d, eax + mov r11d, eax + je L_AES_XTS_encrypt_avx1_done_enc + sub r11d, r13d + cmp r11d, 16 + mov r11d, eax + jl L_AES_XTS_encrypt_avx1_last_15 + and r11d, 4294967280 + ; 16 bytes of input +L_AES_XTS_encrypt_avx1_enc_16: + lea rcx, QWORD PTR [rdi+r13] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm0 + ; aes_enc_block + vpxor xmm8, xmm8, [r8] + vmovdqu xmm5, OWORD PTR [r8+16] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+32] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+48] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+64] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+80] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+96] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+112] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+128] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+144] + vaesenc xmm8, xmm8, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_encrypt_avx1_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+176] + vaesenc xmm8, xmm8, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_encrypt_avx1_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+208] + vaesenc xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_encrypt_avx1_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r13] + vmovdqu OWORD PTR [rcx], xmm8 + vpsrad xmm4, xmm0, 31 + vpslld xmm0, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r13d, 16 + cmp r13d, r11d + jl L_AES_XTS_encrypt_avx1_enc_16 + cmp r13d, eax + je L_AES_XTS_encrypt_avx1_done_enc +L_AES_XTS_encrypt_avx1_last_15: + sub r13, 16 + lea rcx, QWORD PTR [rsi+r13] + vmovdqu xmm8, OWORD PTR [rcx] + add r13, 16 + vmovdqu OWORD PTR [rsp], xmm8 + xor rdx, rdx +L_AES_XTS_encrypt_avx1_last_15_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r13] + mov BYTE PTR [rsi+r13], r11b + mov BYTE PTR [rsp+rdx], cl + inc r13d + inc edx + cmp r13d, eax + jl L_AES_XTS_encrypt_avx1_last_15_byte_loop + sub r13, rdx + vmovdqu xmm8, OWORD PTR [rsp] + sub r13, 16 + vpxor xmm8, xmm8, xmm0 + ; aes_enc_block + vpxor xmm8, xmm8, [r8] + vmovdqu xmm5, OWORD PTR [r8+16] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+32] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+48] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+64] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+80] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+96] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+112] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+128] + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+144] + vaesenc xmm8, xmm8, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+176] + vaesenc xmm8, xmm8, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last + vaesenc xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+208] + vaesenc xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_encrypt_avx1_last_15_aes_enc_block_last: + vaesenclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r13] + vmovdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_encrypt_avx1_done_enc: + vzeroupper + vmovdqu xmm6, OWORD PTR [rsp+64] + vmovdqu xmm7, OWORD PTR [rsp+80] + vmovdqu xmm8, OWORD PTR [rsp+96] + vmovdqu xmm9, OWORD PTR [rsp+112] + vmovdqu xmm10, OWORD PTR [rsp+128] + vmovdqu xmm11, OWORD PTR [rsp+144] + vmovdqu xmm12, OWORD PTR [rsp+160] + add rsp, 176 + pop r13 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_encrypt_avx1 ENDP +_text ENDS +_text SEGMENT READONLY PARA +AES_XTS_decrypt_avx1 PROC + push rdi + push rsi + push r12 + push r13 + mov rdi, rcx + mov rsi, rdx + mov rax, r8 + mov r12, r9 + mov r8, QWORD PTR [rsp+72] + mov r9, QWORD PTR [rsp+80] + mov r10d, DWORD PTR [rsp+88] + sub rsp, 128 + vmovdqu OWORD PTR [rsp+16], xmm6 + vmovdqu OWORD PTR [rsp+32], xmm7 + vmovdqu OWORD PTR [rsp+48], xmm8 + vmovdqu OWORD PTR [rsp+64], xmm9 + vmovdqu OWORD PTR [rsp+80], xmm10 + vmovdqu OWORD PTR [rsp+96], xmm11 + vmovdqu OWORD PTR [rsp+112], xmm12 + vmovdqu xmm12, OWORD PTR L_avx1_aes_xts_gc_xts + vmovdqu xmm0, OWORD PTR [r12] + ; aes_enc_block + vpxor xmm0, xmm0, [r9] + vmovdqu xmm5, OWORD PTR [r9+16] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+32] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+48] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+64] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+80] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+96] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+112] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+128] + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm5, OWORD PTR [r9+144] + vaesenc xmm0, xmm0, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r9+160] + jl L_AES_XTS_decrypt_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm6, OWORD PTR [r9+176] + vaesenc xmm0, xmm0, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r9+192] + jl L_AES_XTS_decrypt_avx1_tweak_aes_enc_block_last + vaesenc xmm0, xmm0, xmm5 + vmovdqu xmm6, OWORD PTR [r9+208] + vaesenc xmm0, xmm0, xmm6 + vmovdqu xmm5, OWORD PTR [r9+224] +L_AES_XTS_decrypt_avx1_tweak_aes_enc_block_last: + vaesenclast xmm0, xmm0, xmm5 + xor r13d, r13d + mov r11d, eax + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_avx1_mul16_64 + sub r11d, 16 + cmp r11d, 16 + jl L_AES_XTS_decrypt_avx1_last_31_start +L_AES_XTS_decrypt_avx1_mul16_64: + cmp r11d, 64 + jl L_AES_XTS_decrypt_avx1_done_64 + and r11d, 4294967232 +L_AES_XTS_decrypt_avx1_dec_64: + ; 64 bytes of input + ; aes_dec_64 + lea rcx, QWORD PTR [rdi+r13] + lea rdx, QWORD PTR [rsi+r13] + vmovdqu xmm8, OWORD PTR [rcx] + vmovdqu xmm9, OWORD PTR [rcx+16] + vmovdqu xmm10, OWORD PTR [rcx+32] + vmovdqu xmm11, OWORD PTR [rcx+48] + vpsrad xmm4, xmm0, 31 + vpslld xmm1, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm1, xmm1, xmm4 + vpsrad xmm4, xmm1, 31 + vpslld xmm2, xmm1, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm2, xmm2, xmm4 + vpsrad xmm4, xmm2, 31 + vpslld xmm3, xmm2, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm3, xmm3, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + ; aes_dec_block + vmovdqu xmm4, OWORD PTR [r8] + vpxor xmm8, xmm8, xmm4 + vpxor xmm9, xmm9, xmm4 + vpxor xmm10, xmm10, xmm4 + vpxor xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+16] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+32] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+48] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+64] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+80] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+96] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+112] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+128] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+144] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + cmp r10d, 11 + vmovdqu xmm4, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_avx1_aes_dec_64_aes_dec_block_last + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+176] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + cmp r10d, 13 + vmovdqu xmm4, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_avx1_aes_dec_64_aes_dec_block_last + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+208] + vaesdec xmm8, xmm8, xmm4 + vaesdec xmm9, xmm9, xmm4 + vaesdec xmm10, xmm10, xmm4 + vaesdec xmm11, xmm11, xmm4 + vmovdqu xmm4, OWORD PTR [r8+224] +L_AES_XTS_decrypt_avx1_aes_dec_64_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm4 + vaesdeclast xmm9, xmm9, xmm4 + vaesdeclast xmm10, xmm10, xmm4 + vaesdeclast xmm11, xmm11, xmm4 + vpxor xmm8, xmm8, xmm0 + vpxor xmm9, xmm9, xmm1 + vpxor xmm10, xmm10, xmm2 + vpxor xmm11, xmm11, xmm3 + vmovdqu OWORD PTR [rdx], xmm8 + vmovdqu OWORD PTR [rdx+16], xmm9 + vmovdqu OWORD PTR [rdx+32], xmm10 + vmovdqu OWORD PTR [rdx+48], xmm11 + vpsrad xmm4, xmm3, 31 + vpslld xmm0, xmm3, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r13d, 64 + cmp r13d, r11d + jl L_AES_XTS_decrypt_avx1_dec_64 +L_AES_XTS_decrypt_avx1_done_64: + cmp r13d, eax + mov r11d, eax + je L_AES_XTS_decrypt_avx1_done_dec + and r11d, 4294967280 + cmp r11d, eax + je L_AES_XTS_decrypt_avx1_mul16 + sub r11d, 16 + sub r11d, r13d + cmp r11d, 16 + jl L_AES_XTS_decrypt_avx1_last_31_start + add r11d, r13d +L_AES_XTS_decrypt_avx1_mul16: +L_AES_XTS_decrypt_avx1_dec_16: + ; 16 bytes of input + lea rcx, QWORD PTR [rdi+r13] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm0 + ; aes_dec_block + vpxor xmm8, xmm8, [r8] + vmovdqu xmm5, OWORD PTR [r8+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+144] + vaesdec xmm8, xmm8, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_avx1_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+176] + vaesdec xmm8, xmm8, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_avx1_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_decrypt_avx1_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + lea rcx, QWORD PTR [rsi+r13] + vmovdqu OWORD PTR [rcx], xmm8 + vpsrad xmm4, xmm0, 31 + vpslld xmm0, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm0, xmm0, xmm4 + add r13d, 16 + cmp r13d, r11d + jl L_AES_XTS_decrypt_avx1_dec_16 + cmp r13d, eax + je L_AES_XTS_decrypt_avx1_done_dec +L_AES_XTS_decrypt_avx1_last_31_start: + vpsrad xmm4, xmm0, 31 + vpslld xmm7, xmm0, 1 + vpshufd xmm4, xmm4, 147 + vpand xmm4, xmm4, xmm12 + vpxor xmm7, xmm7, xmm4 + lea rcx, QWORD PTR [rdi+r13] + vmovdqu xmm8, OWORD PTR [rcx] + vpxor xmm8, xmm8, xmm7 + ; aes_dec_block + vpxor xmm8, xmm8, [r8] + vmovdqu xmm5, OWORD PTR [r8+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+144] + vaesdec xmm8, xmm8, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_avx1_last_31_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+176] + vaesdec xmm8, xmm8, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_avx1_last_31_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_decrypt_avx1_last_31_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm7 + vmovdqu OWORD PTR [rsp], xmm8 + add r13, 16 + xor rdx, rdx +L_AES_XTS_decrypt_avx1_last_31_byte_loop: + mov r11b, BYTE PTR [rsp+rdx] + mov cl, BYTE PTR [rdi+r13] + mov BYTE PTR [rsi+r13], r11b + mov BYTE PTR [rsp+rdx], cl + inc r13d + inc edx + cmp r13d, eax + jl L_AES_XTS_decrypt_avx1_last_31_byte_loop + sub r13, rdx + vmovdqu xmm8, OWORD PTR [rsp] + vpxor xmm8, xmm8, xmm0 + ; aes_dec_block + vpxor xmm8, xmm8, [r8] + vmovdqu xmm5, OWORD PTR [r8+16] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+32] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+48] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+64] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+80] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+96] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+112] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+128] + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm5, OWORD PTR [r8+144] + vaesdec xmm8, xmm8, xmm5 + cmp r10d, 11 + vmovdqu xmm5, OWORD PTR [r8+160] + jl L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+176] + vaesdec xmm8, xmm8, xmm6 + cmp r10d, 13 + vmovdqu xmm5, OWORD PTR [r8+192] + jl L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last + vaesdec xmm8, xmm8, xmm5 + vmovdqu xmm6, OWORD PTR [r8+208] + vaesdec xmm8, xmm8, xmm6 + vmovdqu xmm5, OWORD PTR [r8+224] +L_AES_XTS_decrypt_avx1_last_31_2_aes_dec_block_last: + vaesdeclast xmm8, xmm8, xmm5 + vpxor xmm8, xmm8, xmm0 + sub r13, 16 + lea rcx, QWORD PTR [rsi+r13] + vmovdqu OWORD PTR [rcx], xmm8 +L_AES_XTS_decrypt_avx1_done_dec: + vzeroupper + vmovdqu xmm6, OWORD PTR [rsp+16] + vmovdqu xmm7, OWORD PTR [rsp+32] + vmovdqu xmm8, OWORD PTR [rsp+48] + vmovdqu xmm9, OWORD PTR [rsp+64] + vmovdqu xmm10, OWORD PTR [rsp+80] + vmovdqu xmm11, OWORD PTR [rsp+96] + vmovdqu xmm12, OWORD PTR [rsp+112] + add rsp, 128 + pop r13 + pop r12 + pop rsi + pop rdi + ret +AES_XTS_decrypt_avx1 ENDP +_text ENDS +ENDIF +END diff --git a/wolfcrypt/src/include.am b/wolfcrypt/src/include.am index 6c9b6bcc5c..3d82782536 100644 --- a/wolfcrypt/src/include.am +++ b/wolfcrypt/src/include.am @@ -15,6 +15,7 @@ EXTRA_DIST += wolfcrypt/src/evp.c EXTRA_DIST += wolfcrypt/src/asm.c EXTRA_DIST += wolfcrypt/src/aes_asm.asm EXTRA_DIST += wolfcrypt/src/aes_gcm_asm.asm +EXTRA_DIST += wolfcrypt/src/aes_xts_asm.asm EXTRA_DIST += wolfcrypt/src/chacha_asm.asm EXTRA_DIST += wolfcrypt/src/poly1305_asm.asm EXTRA_DIST += wolfcrypt/src/wc_dsp.c diff --git a/wolfssl.vcxproj b/wolfssl.vcxproj index 662922b488..58025da783 100644 --- a/wolfssl.vcxproj +++ b/wolfssl.vcxproj @@ -384,6 +384,20 @@ $(OutDir)%(Filename).obj $(IntDir)%(Filename).obj + + false + false + ml64.exe /c /Zi /Fo"$(OutDir)%(Filename).obj" %(Identity) + ml64.exe /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + $(OutDir)%(Filename).obj + $(IntDir)%(Filename).obj + false + false + ml64.exe /c /Zi /Fo"$(OutDir)%(Filename).obj" %(Identity) + ml64.exe /c /Zi /Fo"$(IntDir)%(Filename).obj" %(Identity) + $(OutDir)%(Filename).obj + $(IntDir)%(Filename).obj + false false