Path: blob/main/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-avx2-vaes.S
48775 views
// SPDX-License-Identifier: Apache-2.01// This file is generated from a similarly-named Perl script in the BoringSSL2// source tree. Do not edit by hand.34#if defined(__x86_64__) && defined(HAVE_AVX) && \5defined(HAVE_VAES) && defined(HAVE_VPCLMULQDQ)67#define _ASM8#include <sys/asm_linkage.h>910/* Windows userland links with OpenSSL */11#if !defined (_WIN32) || defined (_KERNEL)1213.section .rodata14.balign 16151617.Lbswap_mask:18.quad 0x08090a0b0c0d0e0f, 0x0001020304050607192021222324252627.Lgfpoly:28.quad 1, 0xc200000000000000293031.Lgfpoly_and_internal_carrybit:32.quad 1, 0xc2000000000000013334.balign 323536.Lctr_pattern:37.quad 0, 038.quad 1, 039.Linc_2blocks:40.quad 2, 041.quad 2, 04243ENTRY_ALIGN(gcm_init_vpclmulqdq_avx2, 32)44.cfi_startproc4546ENDBR474849505152vmovdqu (%rsi),%xmm353// KCF/ICP stores H in network byte order with the hi qword first54// so we need to swap all bytes, not the 2 qwords.55vmovdqu .Lbswap_mask(%rip),%xmm456vpshufb %xmm4,%xmm3,%xmm3575859606162vpshufd $0xd3,%xmm3,%xmm063vpsrad $31,%xmm0,%xmm064vpaddq %xmm3,%xmm3,%xmm365vpand .Lgfpoly_and_internal_carrybit(%rip),%xmm0,%xmm066vpxor %xmm0,%xmm3,%xmm36768vbroadcasti128 .Lgfpoly(%rip),%ymm6697071vpclmulqdq $0x00,%xmm3,%xmm3,%xmm072vpclmulqdq $0x11,%xmm3,%xmm3,%xmm573vpclmulqdq $0x01,%xmm0,%xmm6,%xmm174vpshufd $0x4e,%xmm0,%xmm075vpxor %xmm0,%xmm1,%xmm176vpclmulqdq $0x01,%xmm1,%xmm6,%xmm077vpshufd $0x4e,%xmm1,%xmm178vpxor %xmm1,%xmm5,%xmm579vpxor %xmm0,%xmm5,%xmm580818283vinserti128 $1,%xmm3,%ymm5,%ymm384vinserti128 $1,%xmm5,%ymm5,%ymm5858687vpclmulqdq $0x00,%ymm5,%ymm3,%ymm088vpclmulqdq $0x01,%ymm5,%ymm3,%ymm189vpclmulqdq $0x10,%ymm5,%ymm3,%ymm290vpxor %ymm2,%ymm1,%ymm191vpclmulqdq $0x01,%ymm0,%ymm6,%ymm292vpshufd $0x4e,%ymm0,%ymm093vpxor %ymm0,%ymm1,%ymm194vpxor %ymm2,%ymm1,%ymm195vpclmulqdq $0x11,%ymm5,%ymm3,%ymm496vpclmulqdq $0x01,%ymm1,%ymm6,%ymm097vpshufd $0x4e,%ymm1,%ymm198vpxor %ymm1,%ymm4,%ymm499vpxor %ymm0,%ymm4,%ymm4100101102103vmovdqu %ymm3,96(%rdi)104vmovdqu %ymm4,64(%rdi)105106107108vpunpcklqdq %ymm3,%ymm4,%ymm0109vpunpckhqdq %ymm3,%ymm4,%ymm1110vpxor %ymm1,%ymm0,%ymm0111vmovdqu %ymm0,128+32(%rdi)112113114vpclmulqdq $0x00,%ymm5,%ymm4,%ymm0115vpclmulqdq $0x01,%ymm5,%ymm4,%ymm1116vpclmulqdq $0x10,%ymm5,%ymm4,%ymm2117vpxor %ymm2,%ymm1,%ymm1118vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2119vpshufd $0x4e,%ymm0,%ymm0120vpxor %ymm0,%ymm1,%ymm1121vpxor %ymm2,%ymm1,%ymm1122vpclmulqdq $0x11,%ymm5,%ymm4,%ymm3123vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0124vpshufd $0x4e,%ymm1,%ymm1125vpxor %ymm1,%ymm3,%ymm3126vpxor %ymm0,%ymm3,%ymm3127128vpclmulqdq $0x00,%ymm5,%ymm3,%ymm0129vpclmulqdq $0x01,%ymm5,%ymm3,%ymm1130vpclmulqdq $0x10,%ymm5,%ymm3,%ymm2131vpxor %ymm2,%ymm1,%ymm1132vpclmulqdq $0x01,%ymm0,%ymm6,%ymm2133vpshufd $0x4e,%ymm0,%ymm0134vpxor %ymm0,%ymm1,%ymm1135vpxor %ymm2,%ymm1,%ymm1136vpclmulqdq $0x11,%ymm5,%ymm3,%ymm4137vpclmulqdq $0x01,%ymm1,%ymm6,%ymm0138vpshufd $0x4e,%ymm1,%ymm1139vpxor %ymm1,%ymm4,%ymm4140vpxor %ymm0,%ymm4,%ymm4141142vmovdqu %ymm3,32(%rdi)143vmovdqu %ymm4,0(%rdi)144145146147vpunpcklqdq %ymm3,%ymm4,%ymm0148vpunpckhqdq %ymm3,%ymm4,%ymm1149vpxor %ymm1,%ymm0,%ymm0150vmovdqu %ymm0,128(%rdi)151152vzeroupper153RET154155.cfi_endproc156SET_SIZE(gcm_init_vpclmulqdq_avx2)157ENTRY_ALIGN(gcm_gmult_vpclmulqdq_avx2, 32)158.cfi_startproc159160ENDBR161162163164vmovdqu (%rdi),%xmm0165vmovdqu .Lbswap_mask(%rip),%xmm1166vmovdqu 128-16(%rsi),%xmm2167vmovdqu .Lgfpoly(%rip),%xmm3168vpshufb %xmm1,%xmm0,%xmm0169170vpclmulqdq $0x00,%xmm2,%xmm0,%xmm4171vpclmulqdq $0x01,%xmm2,%xmm0,%xmm5172vpclmulqdq $0x10,%xmm2,%xmm0,%xmm6173vpxor %xmm6,%xmm5,%xmm5174vpclmulqdq $0x01,%xmm4,%xmm3,%xmm6175vpshufd $0x4e,%xmm4,%xmm4176vpxor %xmm4,%xmm5,%xmm5177vpxor %xmm6,%xmm5,%xmm5178vpclmulqdq $0x11,%xmm2,%xmm0,%xmm0179vpclmulqdq $0x01,%xmm5,%xmm3,%xmm4180vpshufd $0x4e,%xmm5,%xmm5181vpxor %xmm5,%xmm0,%xmm0182vpxor %xmm4,%xmm0,%xmm0183184185vpshufb %xmm1,%xmm0,%xmm0186vmovdqu %xmm0,(%rdi)187188189RET190191.cfi_endproc192SET_SIZE(gcm_gmult_vpclmulqdq_avx2)193ENTRY_ALIGN(gcm_ghash_vpclmulqdq_avx2, 32)194.cfi_startproc195196ENDBR197198199200201202203vmovdqu .Lbswap_mask(%rip),%xmm6204vmovdqu .Lgfpoly(%rip),%xmm7205206207vmovdqu (%rdi),%xmm5208vpshufb %xmm6,%xmm5,%xmm5209210211cmpq $32,%rcx212jb .Lghash_lastblock213214215216vinserti128 $1,%xmm6,%ymm6,%ymm6217vinserti128 $1,%xmm7,%ymm7,%ymm7218219cmpq $127,%rcx220jbe .Lghash_loop_1x221222223vmovdqu 128(%rsi),%ymm8224vmovdqu 128+32(%rsi),%ymm9225.Lghash_loop_4x:226227vmovdqu 0(%rdx),%ymm1228vpshufb %ymm6,%ymm1,%ymm1229vmovdqu 0(%rsi),%ymm2230vpxor %ymm5,%ymm1,%ymm1231vpclmulqdq $0x00,%ymm2,%ymm1,%ymm3232vpclmulqdq $0x11,%ymm2,%ymm1,%ymm5233vpunpckhqdq %ymm1,%ymm1,%ymm0234vpxor %ymm1,%ymm0,%ymm0235vpclmulqdq $0x00,%ymm8,%ymm0,%ymm4236237vmovdqu 32(%rdx),%ymm1238vpshufb %ymm6,%ymm1,%ymm1239vmovdqu 32(%rsi),%ymm2240vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0241vpxor %ymm0,%ymm3,%ymm3242vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0243vpxor %ymm0,%ymm5,%ymm5244vpunpckhqdq %ymm1,%ymm1,%ymm0245vpxor %ymm1,%ymm0,%ymm0246vpclmulqdq $0x10,%ymm8,%ymm0,%ymm0247vpxor %ymm0,%ymm4,%ymm4248249vmovdqu 64(%rdx),%ymm1250vpshufb %ymm6,%ymm1,%ymm1251vmovdqu 64(%rsi),%ymm2252vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0253vpxor %ymm0,%ymm3,%ymm3254vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0255vpxor %ymm0,%ymm5,%ymm5256vpunpckhqdq %ymm1,%ymm1,%ymm0257vpxor %ymm1,%ymm0,%ymm0258vpclmulqdq $0x00,%ymm9,%ymm0,%ymm0259vpxor %ymm0,%ymm4,%ymm4260261262vmovdqu 96(%rdx),%ymm1263vpshufb %ymm6,%ymm1,%ymm1264vmovdqu 96(%rsi),%ymm2265vpclmulqdq $0x00,%ymm2,%ymm1,%ymm0266vpxor %ymm0,%ymm3,%ymm3267vpclmulqdq $0x11,%ymm2,%ymm1,%ymm0268vpxor %ymm0,%ymm5,%ymm5269vpunpckhqdq %ymm1,%ymm1,%ymm0270vpxor %ymm1,%ymm0,%ymm0271vpclmulqdq $0x10,%ymm9,%ymm0,%ymm0272vpxor %ymm0,%ymm4,%ymm4273274vpxor %ymm3,%ymm4,%ymm4275vpxor %ymm5,%ymm4,%ymm4276277278vbroadcasti128 .Lgfpoly(%rip),%ymm2279vpclmulqdq $0x01,%ymm3,%ymm2,%ymm0280vpshufd $0x4e,%ymm3,%ymm3281vpxor %ymm3,%ymm4,%ymm4282vpxor %ymm0,%ymm4,%ymm4283284vpclmulqdq $0x01,%ymm4,%ymm2,%ymm0285vpshufd $0x4e,%ymm4,%ymm4286vpxor %ymm4,%ymm5,%ymm5287vpxor %ymm0,%ymm5,%ymm5288vextracti128 $1,%ymm5,%xmm0289vpxor %xmm0,%xmm5,%xmm5290291subq $-128,%rdx292addq $-128,%rcx293cmpq $127,%rcx294ja .Lghash_loop_4x295296297cmpq $32,%rcx298jb .Lghash_loop_1x_done299.Lghash_loop_1x:300vmovdqu (%rdx),%ymm0301vpshufb %ymm6,%ymm0,%ymm0302vpxor %ymm0,%ymm5,%ymm5303vmovdqu 128-32(%rsi),%ymm0304vpclmulqdq $0x00,%ymm0,%ymm5,%ymm1305vpclmulqdq $0x01,%ymm0,%ymm5,%ymm2306vpclmulqdq $0x10,%ymm0,%ymm5,%ymm3307vpxor %ymm3,%ymm2,%ymm2308vpclmulqdq $0x01,%ymm1,%ymm7,%ymm3309vpshufd $0x4e,%ymm1,%ymm1310vpxor %ymm1,%ymm2,%ymm2311vpxor %ymm3,%ymm2,%ymm2312vpclmulqdq $0x11,%ymm0,%ymm5,%ymm5313vpclmulqdq $0x01,%ymm2,%ymm7,%ymm1314vpshufd $0x4e,%ymm2,%ymm2315vpxor %ymm2,%ymm5,%ymm5316vpxor %ymm1,%ymm5,%ymm5317318vextracti128 $1,%ymm5,%xmm0319vpxor %xmm0,%xmm5,%xmm5320addq $32,%rdx321subq $32,%rcx322cmpq $32,%rcx323jae .Lghash_loop_1x324.Lghash_loop_1x_done:325326327.Lghash_lastblock:328testq %rcx,%rcx329jz .Lghash_done330vmovdqu (%rdx),%xmm0331vpshufb %xmm6,%xmm0,%xmm0332vpxor %xmm0,%xmm5,%xmm5333vmovdqu 128-16(%rsi),%xmm0334vpclmulqdq $0x00,%xmm0,%xmm5,%xmm1335vpclmulqdq $0x01,%xmm0,%xmm5,%xmm2336vpclmulqdq $0x10,%xmm0,%xmm5,%xmm3337vpxor %xmm3,%xmm2,%xmm2338vpclmulqdq $0x01,%xmm1,%xmm7,%xmm3339vpshufd $0x4e,%xmm1,%xmm1340vpxor %xmm1,%xmm2,%xmm2341vpxor %xmm3,%xmm2,%xmm2342vpclmulqdq $0x11,%xmm0,%xmm5,%xmm5343vpclmulqdq $0x01,%xmm2,%xmm7,%xmm1344vpshufd $0x4e,%xmm2,%xmm2345vpxor %xmm2,%xmm5,%xmm5346vpxor %xmm1,%xmm5,%xmm5347348349.Lghash_done:350351vpshufb %xmm6,%xmm5,%xmm5352vmovdqu %xmm5,(%rdi)353354vzeroupper355RET356357.cfi_endproc358SET_SIZE(gcm_ghash_vpclmulqdq_avx2)359ENTRY_ALIGN(aes_gcm_enc_update_vaes_avx2, 32)360.cfi_startproc361362ENDBR363pushq %r12364.cfi_adjust_cfa_offset 8365.cfi_offset %r12,-16366367movq 16(%rsp),%r12368#ifdef BORINGSSL_DISPATCH_TEST369.extern BORINGSSL_function_hit370.hidden BORINGSSL_function_hit371movb $1,BORINGSSL_function_hit+6(%rip)372#endif373vbroadcasti128 .Lbswap_mask(%rip),%ymm0374375376377vmovdqu (%r12),%xmm1378vpshufb %xmm0,%xmm1,%xmm1379vbroadcasti128 (%r8),%ymm11380vpshufb %ymm0,%ymm11,%ymm11381382383384movl 504(%rcx),%r10d // ICP has a larger offset for rounds.385leal -24(,%r10,4),%r10d // ICP uses 10,12,14 not 9,11,13 for rounds.386387388389390leaq 96(%rcx,%r10,4),%r11391vbroadcasti128 (%rcx),%ymm9392vbroadcasti128 (%r11),%ymm10393394395vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11396397398399cmpq $127,%rdx400jbe .Lcrypt_loop_4x_done__func1401402vmovdqu 128(%r9),%ymm7403vmovdqu 128+32(%r9),%ymm8404405406407vmovdqu .Linc_2blocks(%rip),%ymm2408vpshufb %ymm0,%ymm11,%ymm12409vpaddd %ymm2,%ymm11,%ymm11410vpshufb %ymm0,%ymm11,%ymm13411vpaddd %ymm2,%ymm11,%ymm11412vpshufb %ymm0,%ymm11,%ymm14413vpaddd %ymm2,%ymm11,%ymm11414vpshufb %ymm0,%ymm11,%ymm15415vpaddd %ymm2,%ymm11,%ymm11416417418vpxor %ymm9,%ymm12,%ymm12419vpxor %ymm9,%ymm13,%ymm13420vpxor %ymm9,%ymm14,%ymm14421vpxor %ymm9,%ymm15,%ymm15422423leaq 16(%rcx),%rax424.Lvaesenc_loop_first_4_vecs__func1:425vbroadcasti128 (%rax),%ymm2426vaesenc %ymm2,%ymm12,%ymm12427vaesenc %ymm2,%ymm13,%ymm13428vaesenc %ymm2,%ymm14,%ymm14429vaesenc %ymm2,%ymm15,%ymm15430431addq $16,%rax432cmpq %rax,%r11433jne .Lvaesenc_loop_first_4_vecs__func1434vpxor 0(%rdi),%ymm10,%ymm2435vpxor 32(%rdi),%ymm10,%ymm3436vpxor 64(%rdi),%ymm10,%ymm5437vpxor 96(%rdi),%ymm10,%ymm6438vaesenclast %ymm2,%ymm12,%ymm12439vaesenclast %ymm3,%ymm13,%ymm13440vaesenclast %ymm5,%ymm14,%ymm14441vaesenclast %ymm6,%ymm15,%ymm15442vmovdqu %ymm12,0(%rsi)443vmovdqu %ymm13,32(%rsi)444vmovdqu %ymm14,64(%rsi)445vmovdqu %ymm15,96(%rsi)446447subq $-128,%rdi448addq $-128,%rdx449cmpq $127,%rdx450jbe .Lghash_last_ciphertext_4x__func1451.balign 16452.Lcrypt_loop_4x__func1:453454455456457vmovdqu .Linc_2blocks(%rip),%ymm2458vpshufb %ymm0,%ymm11,%ymm12459vpaddd %ymm2,%ymm11,%ymm11460vpshufb %ymm0,%ymm11,%ymm13461vpaddd %ymm2,%ymm11,%ymm11462vpshufb %ymm0,%ymm11,%ymm14463vpaddd %ymm2,%ymm11,%ymm11464vpshufb %ymm0,%ymm11,%ymm15465vpaddd %ymm2,%ymm11,%ymm11466467468vpxor %ymm9,%ymm12,%ymm12469vpxor %ymm9,%ymm13,%ymm13470vpxor %ymm9,%ymm14,%ymm14471vpxor %ymm9,%ymm15,%ymm15472473cmpl $24,%r10d474jl .Laes128__func1475je .Laes192__func1476477vbroadcasti128 -208(%r11),%ymm2478vaesenc %ymm2,%ymm12,%ymm12479vaesenc %ymm2,%ymm13,%ymm13480vaesenc %ymm2,%ymm14,%ymm14481vaesenc %ymm2,%ymm15,%ymm15482483vbroadcasti128 -192(%r11),%ymm2484vaesenc %ymm2,%ymm12,%ymm12485vaesenc %ymm2,%ymm13,%ymm13486vaesenc %ymm2,%ymm14,%ymm14487vaesenc %ymm2,%ymm15,%ymm15488489.Laes192__func1:490vbroadcasti128 -176(%r11),%ymm2491vaesenc %ymm2,%ymm12,%ymm12492vaesenc %ymm2,%ymm13,%ymm13493vaesenc %ymm2,%ymm14,%ymm14494vaesenc %ymm2,%ymm15,%ymm15495496vbroadcasti128 -160(%r11),%ymm2497vaesenc %ymm2,%ymm12,%ymm12498vaesenc %ymm2,%ymm13,%ymm13499vaesenc %ymm2,%ymm14,%ymm14500vaesenc %ymm2,%ymm15,%ymm15501502.Laes128__func1:503prefetcht0 512(%rdi)504prefetcht0 512+64(%rdi)505506vmovdqu 0(%rsi),%ymm3507vpshufb %ymm0,%ymm3,%ymm3508vmovdqu 0(%r9),%ymm4509vpxor %ymm1,%ymm3,%ymm3510vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5511vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1512vpunpckhqdq %ymm3,%ymm3,%ymm2513vpxor %ymm3,%ymm2,%ymm2514vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6515516vbroadcasti128 -144(%r11),%ymm2517vaesenc %ymm2,%ymm12,%ymm12518vaesenc %ymm2,%ymm13,%ymm13519vaesenc %ymm2,%ymm14,%ymm14520vaesenc %ymm2,%ymm15,%ymm15521522523vbroadcasti128 -128(%r11),%ymm2524vaesenc %ymm2,%ymm12,%ymm12525vaesenc %ymm2,%ymm13,%ymm13526vaesenc %ymm2,%ymm14,%ymm14527vaesenc %ymm2,%ymm15,%ymm15528529530vmovdqu 32(%rsi),%ymm3531vpshufb %ymm0,%ymm3,%ymm3532vmovdqu 32(%r9),%ymm4533vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2534vpxor %ymm2,%ymm5,%ymm5535vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2536vpxor %ymm2,%ymm1,%ymm1537vpunpckhqdq %ymm3,%ymm3,%ymm2538vpxor %ymm3,%ymm2,%ymm2539vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2540vpxor %ymm2,%ymm6,%ymm6541542vbroadcasti128 -112(%r11),%ymm2543vaesenc %ymm2,%ymm12,%ymm12544vaesenc %ymm2,%ymm13,%ymm13545vaesenc %ymm2,%ymm14,%ymm14546vaesenc %ymm2,%ymm15,%ymm15547548549vmovdqu 64(%rsi),%ymm3550vpshufb %ymm0,%ymm3,%ymm3551vmovdqu 64(%r9),%ymm4552553vbroadcasti128 -96(%r11),%ymm2554vaesenc %ymm2,%ymm12,%ymm12555vaesenc %ymm2,%ymm13,%ymm13556vaesenc %ymm2,%ymm14,%ymm14557vaesenc %ymm2,%ymm15,%ymm15558559vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2560vpxor %ymm2,%ymm5,%ymm5561vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2562vpxor %ymm2,%ymm1,%ymm1563564vbroadcasti128 -80(%r11),%ymm2565vaesenc %ymm2,%ymm12,%ymm12566vaesenc %ymm2,%ymm13,%ymm13567vaesenc %ymm2,%ymm14,%ymm14568vaesenc %ymm2,%ymm15,%ymm15569570vpunpckhqdq %ymm3,%ymm3,%ymm2571vpxor %ymm3,%ymm2,%ymm2572vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2573vpxor %ymm2,%ymm6,%ymm6574575576vmovdqu 96(%rsi),%ymm3577vpshufb %ymm0,%ymm3,%ymm3578579vbroadcasti128 -64(%r11),%ymm2580vaesenc %ymm2,%ymm12,%ymm12581vaesenc %ymm2,%ymm13,%ymm13582vaesenc %ymm2,%ymm14,%ymm14583vaesenc %ymm2,%ymm15,%ymm15584585vmovdqu 96(%r9),%ymm4586vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2587vpxor %ymm2,%ymm5,%ymm5588vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2589vpxor %ymm2,%ymm1,%ymm1590vpunpckhqdq %ymm3,%ymm3,%ymm2591vpxor %ymm3,%ymm2,%ymm2592vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2593vpxor %ymm2,%ymm6,%ymm6594595vbroadcasti128 -48(%r11),%ymm2596vaesenc %ymm2,%ymm12,%ymm12597vaesenc %ymm2,%ymm13,%ymm13598vaesenc %ymm2,%ymm14,%ymm14599vaesenc %ymm2,%ymm15,%ymm15600601602vpxor %ymm5,%ymm6,%ymm6603vpxor %ymm1,%ymm6,%ymm6604605606vbroadcasti128 .Lgfpoly(%rip),%ymm4607vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2608vpshufd $0x4e,%ymm5,%ymm5609vpxor %ymm5,%ymm6,%ymm6610vpxor %ymm2,%ymm6,%ymm6611612vbroadcasti128 -32(%r11),%ymm2613vaesenc %ymm2,%ymm12,%ymm12614vaesenc %ymm2,%ymm13,%ymm13615vaesenc %ymm2,%ymm14,%ymm14616vaesenc %ymm2,%ymm15,%ymm15617618619vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2620vpshufd $0x4e,%ymm6,%ymm6621vpxor %ymm6,%ymm1,%ymm1622vpxor %ymm2,%ymm1,%ymm1623624vbroadcasti128 -16(%r11),%ymm2625vaesenc %ymm2,%ymm12,%ymm12626vaesenc %ymm2,%ymm13,%ymm13627vaesenc %ymm2,%ymm14,%ymm14628vaesenc %ymm2,%ymm15,%ymm15629630vextracti128 $1,%ymm1,%xmm2631vpxor %xmm2,%xmm1,%xmm1632633634subq $-128,%rsi635vpxor 0(%rdi),%ymm10,%ymm2636vpxor 32(%rdi),%ymm10,%ymm3637vpxor 64(%rdi),%ymm10,%ymm5638vpxor 96(%rdi),%ymm10,%ymm6639vaesenclast %ymm2,%ymm12,%ymm12640vaesenclast %ymm3,%ymm13,%ymm13641vaesenclast %ymm5,%ymm14,%ymm14642vaesenclast %ymm6,%ymm15,%ymm15643vmovdqu %ymm12,0(%rsi)644vmovdqu %ymm13,32(%rsi)645vmovdqu %ymm14,64(%rsi)646vmovdqu %ymm15,96(%rsi)647648subq $-128,%rdi649650addq $-128,%rdx651cmpq $127,%rdx652ja .Lcrypt_loop_4x__func1653.Lghash_last_ciphertext_4x__func1:654655vmovdqu 0(%rsi),%ymm3656vpshufb %ymm0,%ymm3,%ymm3657vmovdqu 0(%r9),%ymm4658vpxor %ymm1,%ymm3,%ymm3659vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5660vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1661vpunpckhqdq %ymm3,%ymm3,%ymm2662vpxor %ymm3,%ymm2,%ymm2663vpclmulqdq $0x00,%ymm7,%ymm2,%ymm6664665vmovdqu 32(%rsi),%ymm3666vpshufb %ymm0,%ymm3,%ymm3667vmovdqu 32(%r9),%ymm4668vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2669vpxor %ymm2,%ymm5,%ymm5670vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2671vpxor %ymm2,%ymm1,%ymm1672vpunpckhqdq %ymm3,%ymm3,%ymm2673vpxor %ymm3,%ymm2,%ymm2674vpclmulqdq $0x10,%ymm7,%ymm2,%ymm2675vpxor %ymm2,%ymm6,%ymm6676677vmovdqu 64(%rsi),%ymm3678vpshufb %ymm0,%ymm3,%ymm3679vmovdqu 64(%r9),%ymm4680vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2681vpxor %ymm2,%ymm5,%ymm5682vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2683vpxor %ymm2,%ymm1,%ymm1684vpunpckhqdq %ymm3,%ymm3,%ymm2685vpxor %ymm3,%ymm2,%ymm2686vpclmulqdq $0x00,%ymm8,%ymm2,%ymm2687vpxor %ymm2,%ymm6,%ymm6688689690vmovdqu 96(%rsi),%ymm3691vpshufb %ymm0,%ymm3,%ymm3692vmovdqu 96(%r9),%ymm4693vpclmulqdq $0x00,%ymm4,%ymm3,%ymm2694vpxor %ymm2,%ymm5,%ymm5695vpclmulqdq $0x11,%ymm4,%ymm3,%ymm2696vpxor %ymm2,%ymm1,%ymm1697vpunpckhqdq %ymm3,%ymm3,%ymm2698vpxor %ymm3,%ymm2,%ymm2699vpclmulqdq $0x10,%ymm8,%ymm2,%ymm2700vpxor %ymm2,%ymm6,%ymm6701702vpxor %ymm5,%ymm6,%ymm6703vpxor %ymm1,%ymm6,%ymm6704705706vbroadcasti128 .Lgfpoly(%rip),%ymm4707vpclmulqdq $0x01,%ymm5,%ymm4,%ymm2708vpshufd $0x4e,%ymm5,%ymm5709vpxor %ymm5,%ymm6,%ymm6710vpxor %ymm2,%ymm6,%ymm6711712vpclmulqdq $0x01,%ymm6,%ymm4,%ymm2713vpshufd $0x4e,%ymm6,%ymm6714vpxor %ymm6,%ymm1,%ymm1715vpxor %ymm2,%ymm1,%ymm1716vextracti128 $1,%ymm1,%xmm2717vpxor %xmm2,%xmm1,%xmm1718719subq $-128,%rsi720.Lcrypt_loop_4x_done__func1:721722testq %rdx,%rdx723jz .Ldone__func1724725726727728729leaq 128(%r9),%r8730subq %rdx,%r8731732733vpxor %xmm5,%xmm5,%xmm5734vpxor %xmm6,%xmm6,%xmm6735vpxor %xmm7,%xmm7,%xmm7736737cmpq $64,%rdx738jb .Llessthan64bytes__func1739740741vpshufb %ymm0,%ymm11,%ymm12742vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11743vpshufb %ymm0,%ymm11,%ymm13744vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11745vpxor %ymm9,%ymm12,%ymm12746vpxor %ymm9,%ymm13,%ymm13747leaq 16(%rcx),%rax748.Lvaesenc_loop_tail_1__func1:749vbroadcasti128 (%rax),%ymm2750vaesenc %ymm2,%ymm12,%ymm12751vaesenc %ymm2,%ymm13,%ymm13752addq $16,%rax753cmpq %rax,%r11754jne .Lvaesenc_loop_tail_1__func1755vaesenclast %ymm10,%ymm12,%ymm12756vaesenclast %ymm10,%ymm13,%ymm13757758759vmovdqu 0(%rdi),%ymm2760vmovdqu 32(%rdi),%ymm3761vpxor %ymm2,%ymm12,%ymm12762vpxor %ymm3,%ymm13,%ymm13763vmovdqu %ymm12,0(%rsi)764vmovdqu %ymm13,32(%rsi)765766767vpshufb %ymm0,%ymm12,%ymm12768vpshufb %ymm0,%ymm13,%ymm13769vpxor %ymm1,%ymm12,%ymm12770vmovdqu (%r8),%ymm2771vmovdqu 32(%r8),%ymm3772vpclmulqdq $0x00,%ymm2,%ymm12,%ymm5773vpclmulqdq $0x01,%ymm2,%ymm12,%ymm6774vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4775vpxor %ymm4,%ymm6,%ymm6776vpclmulqdq $0x11,%ymm2,%ymm12,%ymm7777vpclmulqdq $0x00,%ymm3,%ymm13,%ymm4778vpxor %ymm4,%ymm5,%ymm5779vpclmulqdq $0x01,%ymm3,%ymm13,%ymm4780vpxor %ymm4,%ymm6,%ymm6781vpclmulqdq $0x10,%ymm3,%ymm13,%ymm4782vpxor %ymm4,%ymm6,%ymm6783vpclmulqdq $0x11,%ymm3,%ymm13,%ymm4784vpxor %ymm4,%ymm7,%ymm7785786addq $64,%r8787addq $64,%rdi788addq $64,%rsi789subq $64,%rdx790jz .Lreduce__func1791792vpxor %xmm1,%xmm1,%xmm1793794795.Llessthan64bytes__func1:796vpshufb %ymm0,%ymm11,%ymm12797vpaddd .Linc_2blocks(%rip),%ymm11,%ymm11798vpshufb %ymm0,%ymm11,%ymm13799vpxor %ymm9,%ymm12,%ymm12800vpxor %ymm9,%ymm13,%ymm13801leaq 16(%rcx),%rax802.Lvaesenc_loop_tail_2__func1:803vbroadcasti128 (%rax),%ymm2804vaesenc %ymm2,%ymm12,%ymm12805vaesenc %ymm2,%ymm13,%ymm13806addq $16,%rax807cmpq %rax,%r11808jne .Lvaesenc_loop_tail_2__func1809vaesenclast %ymm10,%ymm12,%ymm12810vaesenclast %ymm10,%ymm13,%ymm13811812813814815cmpq $32,%rdx816jb .Lxor_one_block__func1817je .Lxor_two_blocks__func1818819.Lxor_three_blocks__func1:820vmovdqu 0(%rdi),%ymm2821vmovdqu 32(%rdi),%xmm3822vpxor %ymm2,%ymm12,%ymm12823vpxor %xmm3,%xmm13,%xmm13824vmovdqu %ymm12,0(%rsi)825vmovdqu %xmm13,32(%rsi)826827vpshufb %ymm0,%ymm12,%ymm12828vpshufb %xmm0,%xmm13,%xmm13829vpxor %ymm1,%ymm12,%ymm12830vmovdqu (%r8),%ymm2831vmovdqu 32(%r8),%xmm3832vpclmulqdq $0x00,%xmm3,%xmm13,%xmm4833vpxor %ymm4,%ymm5,%ymm5834vpclmulqdq $0x01,%xmm3,%xmm13,%xmm4835vpxor %ymm4,%ymm6,%ymm6836vpclmulqdq $0x10,%xmm3,%xmm13,%xmm4837vpxor %ymm4,%ymm6,%ymm6838vpclmulqdq $0x11,%xmm3,%xmm13,%xmm4839vpxor %ymm4,%ymm7,%ymm7840jmp .Lghash_mul_one_vec_unreduced__func1841842.Lxor_two_blocks__func1:843vmovdqu (%rdi),%ymm2844vpxor %ymm2,%ymm12,%ymm12845vmovdqu %ymm12,(%rsi)846vpshufb %ymm0,%ymm12,%ymm12847vpxor %ymm1,%ymm12,%ymm12848vmovdqu (%r8),%ymm2849jmp .Lghash_mul_one_vec_unreduced__func1850851.Lxor_one_block__func1:852vmovdqu (%rdi),%xmm2853vpxor %xmm2,%xmm12,%xmm12854vmovdqu %xmm12,(%rsi)855vpshufb %xmm0,%xmm12,%xmm12856vpxor %xmm1,%xmm12,%xmm12857vmovdqu (%r8),%xmm2858859.Lghash_mul_one_vec_unreduced__func1:860vpclmulqdq $0x00,%ymm2,%ymm12,%ymm4861vpxor %ymm4,%ymm5,%ymm5862vpclmulqdq $0x01,%ymm2,%ymm12,%ymm4863vpxor %ymm4,%ymm6,%ymm6864vpclmulqdq $0x10,%ymm2,%ymm12,%ymm4865vpxor %ymm4,%ymm6,%ymm6866vpclmulqdq $0x11,%ymm2,%ymm12,%ymm4867vpxor %ymm4,%ymm7,%ymm7868869.Lreduce__func1:870871vbroadcasti128 .Lgfpoly(%rip),%ymm2872vpclmulqdq $0x01,%ymm5,%ymm2,%ymm3873vpshufd $0x4e,%ymm5,%ymm5874vpxor %ymm5,%ymm6,%ymm6875vpxor %ymm3,%ymm6,%ymm6876vpclmulqdq $0x01,%ymm6,%ymm2,%ymm3877vpshufd $0x4e,%ymm6,%ymm6878vpxor %ymm6,%ymm7,%ymm7879vpxor %ymm3,%ymm7,%ymm7880vextracti128 $1,%ymm7,%xmm1881vpxor %xmm7,%xmm1,%xmm1882883.Ldone__func1:884885vpshufb %xmm0,%xmm1,%xmm1886vmovdqu %xmm1,(%r12)887888vzeroupper889popq %r12890.cfi_adjust_cfa_offset -8891.cfi_restore %r12892RET893894.cfi_endproc895SET_SIZE(aes_gcm_enc_update_vaes_avx2)896ENTRY_ALIGN(aes_gcm_dec_update_vaes_avx2, 32)897.cfi_startproc898899ENDBR900pushq %r12901.cfi_adjust_cfa_offset 8902.cfi_offset %r12,-16903904movq 16(%rsp),%r12905vbroadcasti128 .Lbswap_mask(%rip),%ymm0906907908909vmovdqu (%r12),%xmm1910vpshufb %xmm0,%xmm1,%xmm1911vbroadcasti128 (%r8),%ymm11912vpshufb %ymm0,%ymm11,%ymm11913914915916movl 504(%rcx),%r10d // ICP has a larger offset for rounds.917leal -24(,%r10,4),%r10d // ICP uses 10,12,14 not 9,11,13 for rounds.918919920921922leaq 96(%rcx,%r10,4),%r11923vbroadcasti128 (%rcx),%ymm9924vbroadcasti128 (%r11),%ymm10925926927vpaddd .Lctr_pattern(%rip),%ymm11,%ymm11928929930931cmpq $127,%rdx932jbe .Lcrypt_loop_4x_done__func2933934vmovdqu 128(%r9),%ymm7935vmovdqu 128+32(%r9),%ymm8936.balign 16937.Lcrypt_loop_4x__func2:938939940941942vmovdqu .Linc_2blocks(%rip),%ymm2943vpshufb %ymm0,%ymm11,%ymm12944vpaddd %ymm2,%ymm11,%ymm11945vpshufb %ymm0,%ymm11,%ymm13946vpaddd %ymm2,%ymm11,%ymm11947vpshufb %ymm0,%ymm11,%ymm14948vpaddd %ymm2,%ymm11,%ymm11949vpshufb %ymm0,%ymm11,%ymm15950vpaddd %ymm2,%ymm11,%ymm11951952953vpxor %ymm9,%ymm12,%ymm12954vpxor %ymm9,%ymm13,%ymm13955vpxor %ymm9,%ymm14,%ymm14956vpxor %ymm9,%ymm15,%ymm15957958cmpl $24,%r10d959jl .Laes128__func2960je .Laes192__func2961962vbroadcasti128 -208(%r11),%ymm2963vaesenc %ymm2,%ymm12,%ymm12964vaesenc %ymm2,%ymm13,%ymm13965vaesenc %ymm2,%ymm14,%ymm14966vaesenc %ymm2,%ymm15,%ymm15967968vbroadcasti128 -192(%r11),%ymm2969vaesenc %ymm2,%ymm12,%ymm12970vaesenc %ymm2,%ymm13,%ymm13971vaesenc %ymm2,%ymm14,%ymm14972vaesenc %ymm2,%ymm15,%ymm15973974.Laes192__func2:975vbroadcasti128 -176(%r11),%ymm2976vaesenc %ymm2,%ymm12,%ymm12977vaesenc %ymm2,%ymm13,%ymm13978vaesenc %ymm2,%ymm14,%ymm14979vaesenc %ymm2,%ymm15,%ymm15980981vbroadcasti128 -160(%r11),%ymm2982vaesenc %ymm2,%ymm12,%ymm12983vaesenc %ymm2,%ymm13,%ymm13984vaesenc %ymm2,%ymm14,%ymm14985vaesenc %ymm2,%ymm15,%ymm15986987.Laes128__func2:988prefetcht0 512(%rdi)989prefetcht0 512+64(%rdi)990991vmovdqu 0(%rdi),%ymm3992vpshufb %ymm0,%ymm3,%ymm3993vmovdqu 0(%r9),%ymm4994vpxor %ymm1,%ymm3,%ymm3995vpclmulqdq $0x00,%ymm4,%ymm3,%ymm5996vpclmulqdq $0x11,%ymm4,%ymm3,%ymm1997vpunpckhqdq %ymm3,%ymm3,%ymm2998vpxor %ymm3,%ymm2,%ymm2999vpclmulqdq $0x00,%ymm7,%ymm2,%ymm610001001vbroadcasti128 -144(%r11),%ymm21002vaesenc %ymm2,%ymm12,%ymm121003vaesenc %ymm2,%ymm13,%ymm131004vaesenc %ymm2,%ymm14,%ymm141005vaesenc %ymm2,%ymm15,%ymm15100610071008vbroadcasti128 -128(%r11),%ymm21009vaesenc %ymm2,%ymm12,%ymm121010vaesenc %ymm2,%ymm13,%ymm131011vaesenc %ymm2,%ymm14,%ymm141012vaesenc %ymm2,%ymm15,%ymm15101310141015vmovdqu 32(%rdi),%ymm31016vpshufb %ymm0,%ymm3,%ymm31017vmovdqu 32(%r9),%ymm41018vpclmulqdq $0x00,%ymm4,%ymm3,%ymm21019vpxor %ymm2,%ymm5,%ymm51020vpclmulqdq $0x11,%ymm4,%ymm3,%ymm21021vpxor %ymm2,%ymm1,%ymm11022vpunpckhqdq %ymm3,%ymm3,%ymm21023vpxor %ymm3,%ymm2,%ymm21024vpclmulqdq $0x10,%ymm7,%ymm2,%ymm21025vpxor %ymm2,%ymm6,%ymm610261027vbroadcasti128 -112(%r11),%ymm21028vaesenc %ymm2,%ymm12,%ymm121029vaesenc %ymm2,%ymm13,%ymm131030vaesenc %ymm2,%ymm14,%ymm141031vaesenc %ymm2,%ymm15,%ymm15103210331034vmovdqu 64(%rdi),%ymm31035vpshufb %ymm0,%ymm3,%ymm31036vmovdqu 64(%r9),%ymm410371038vbroadcasti128 -96(%r11),%ymm21039vaesenc %ymm2,%ymm12,%ymm121040vaesenc %ymm2,%ymm13,%ymm131041vaesenc %ymm2,%ymm14,%ymm141042vaesenc %ymm2,%ymm15,%ymm1510431044vpclmulqdq $0x00,%ymm4,%ymm3,%ymm21045vpxor %ymm2,%ymm5,%ymm51046vpclmulqdq $0x11,%ymm4,%ymm3,%ymm21047vpxor %ymm2,%ymm1,%ymm110481049vbroadcasti128 -80(%r11),%ymm21050vaesenc %ymm2,%ymm12,%ymm121051vaesenc %ymm2,%ymm13,%ymm131052vaesenc %ymm2,%ymm14,%ymm141053vaesenc %ymm2,%ymm15,%ymm1510541055vpunpckhqdq %ymm3,%ymm3,%ymm21056vpxor %ymm3,%ymm2,%ymm21057vpclmulqdq $0x00,%ymm8,%ymm2,%ymm21058vpxor %ymm2,%ymm6,%ymm6105910601061vmovdqu 96(%rdi),%ymm31062vpshufb %ymm0,%ymm3,%ymm310631064vbroadcasti128 -64(%r11),%ymm21065vaesenc %ymm2,%ymm12,%ymm121066vaesenc %ymm2,%ymm13,%ymm131067vaesenc %ymm2,%ymm14,%ymm141068vaesenc %ymm2,%ymm15,%ymm1510691070vmovdqu 96(%r9),%ymm41071vpclmulqdq $0x00,%ymm4,%ymm3,%ymm21072vpxor %ymm2,%ymm5,%ymm51073vpclmulqdq $0x11,%ymm4,%ymm3,%ymm21074vpxor %ymm2,%ymm1,%ymm11075vpunpckhqdq %ymm3,%ymm3,%ymm21076vpxor %ymm3,%ymm2,%ymm21077vpclmulqdq $0x10,%ymm8,%ymm2,%ymm21078vpxor %ymm2,%ymm6,%ymm610791080vbroadcasti128 -48(%r11),%ymm21081vaesenc %ymm2,%ymm12,%ymm121082vaesenc %ymm2,%ymm13,%ymm131083vaesenc %ymm2,%ymm14,%ymm141084vaesenc %ymm2,%ymm15,%ymm15108510861087vpxor %ymm5,%ymm6,%ymm61088vpxor %ymm1,%ymm6,%ymm6108910901091vbroadcasti128 .Lgfpoly(%rip),%ymm41092vpclmulqdq $0x01,%ymm5,%ymm4,%ymm21093vpshufd $0x4e,%ymm5,%ymm51094vpxor %ymm5,%ymm6,%ymm61095vpxor %ymm2,%ymm6,%ymm610961097vbroadcasti128 -32(%r11),%ymm21098vaesenc %ymm2,%ymm12,%ymm121099vaesenc %ymm2,%ymm13,%ymm131100vaesenc %ymm2,%ymm14,%ymm141101vaesenc %ymm2,%ymm15,%ymm15110211031104vpclmulqdq $0x01,%ymm6,%ymm4,%ymm21105vpshufd $0x4e,%ymm6,%ymm61106vpxor %ymm6,%ymm1,%ymm11107vpxor %ymm2,%ymm1,%ymm111081109vbroadcasti128 -16(%r11),%ymm21110vaesenc %ymm2,%ymm12,%ymm121111vaesenc %ymm2,%ymm13,%ymm131112vaesenc %ymm2,%ymm14,%ymm141113vaesenc %ymm2,%ymm15,%ymm1511141115vextracti128 $1,%ymm1,%xmm21116vpxor %xmm2,%xmm1,%xmm11117111811191120vpxor 0(%rdi),%ymm10,%ymm21121vpxor 32(%rdi),%ymm10,%ymm31122vpxor 64(%rdi),%ymm10,%ymm51123vpxor 96(%rdi),%ymm10,%ymm61124vaesenclast %ymm2,%ymm12,%ymm121125vaesenclast %ymm3,%ymm13,%ymm131126vaesenclast %ymm5,%ymm14,%ymm141127vaesenclast %ymm6,%ymm15,%ymm151128vmovdqu %ymm12,0(%rsi)1129vmovdqu %ymm13,32(%rsi)1130vmovdqu %ymm14,64(%rsi)1131vmovdqu %ymm15,96(%rsi)11321133subq $-128,%rdi1134subq $-128,%rsi1135addq $-128,%rdx1136cmpq $127,%rdx1137ja .Lcrypt_loop_4x__func21138.Lcrypt_loop_4x_done__func2:11391140testq %rdx,%rdx1141jz .Ldone__func2114211431144114511461147leaq 128(%r9),%r81148subq %rdx,%r8114911501151vpxor %xmm5,%xmm5,%xmm51152vpxor %xmm6,%xmm6,%xmm61153vpxor %xmm7,%xmm7,%xmm711541155cmpq $64,%rdx1156jb .Llessthan64bytes__func2115711581159vpshufb %ymm0,%ymm11,%ymm121160vpaddd .Linc_2blocks(%rip),%ymm11,%ymm111161vpshufb %ymm0,%ymm11,%ymm131162vpaddd .Linc_2blocks(%rip),%ymm11,%ymm111163vpxor %ymm9,%ymm12,%ymm121164vpxor %ymm9,%ymm13,%ymm131165leaq 16(%rcx),%rax1166.Lvaesenc_loop_tail_1__func2:1167vbroadcasti128 (%rax),%ymm21168vaesenc %ymm2,%ymm12,%ymm121169vaesenc %ymm2,%ymm13,%ymm131170addq $16,%rax1171cmpq %rax,%r111172jne .Lvaesenc_loop_tail_1__func21173vaesenclast %ymm10,%ymm12,%ymm121174vaesenclast %ymm10,%ymm13,%ymm13117511761177vmovdqu 0(%rdi),%ymm21178vmovdqu 32(%rdi),%ymm31179vpxor %ymm2,%ymm12,%ymm121180vpxor %ymm3,%ymm13,%ymm131181vmovdqu %ymm12,0(%rsi)1182vmovdqu %ymm13,32(%rsi)118311841185vpshufb %ymm0,%ymm2,%ymm121186vpshufb %ymm0,%ymm3,%ymm131187vpxor %ymm1,%ymm12,%ymm121188vmovdqu (%r8),%ymm21189vmovdqu 32(%r8),%ymm31190vpclmulqdq $0x00,%ymm2,%ymm12,%ymm51191vpclmulqdq $0x01,%ymm2,%ymm12,%ymm61192vpclmulqdq $0x10,%ymm2,%ymm12,%ymm41193vpxor %ymm4,%ymm6,%ymm61194vpclmulqdq $0x11,%ymm2,%ymm12,%ymm71195vpclmulqdq $0x00,%ymm3,%ymm13,%ymm41196vpxor %ymm4,%ymm5,%ymm51197vpclmulqdq $0x01,%ymm3,%ymm13,%ymm41198vpxor %ymm4,%ymm6,%ymm61199vpclmulqdq $0x10,%ymm3,%ymm13,%ymm41200vpxor %ymm4,%ymm6,%ymm61201vpclmulqdq $0x11,%ymm3,%ymm13,%ymm41202vpxor %ymm4,%ymm7,%ymm712031204addq $64,%r81205addq $64,%rdi1206addq $64,%rsi1207subq $64,%rdx1208jz .Lreduce__func212091210vpxor %xmm1,%xmm1,%xmm1121112121213.Llessthan64bytes__func2:1214vpshufb %ymm0,%ymm11,%ymm121215vpaddd .Linc_2blocks(%rip),%ymm11,%ymm111216vpshufb %ymm0,%ymm11,%ymm131217vpxor %ymm9,%ymm12,%ymm121218vpxor %ymm9,%ymm13,%ymm131219leaq 16(%rcx),%rax1220.Lvaesenc_loop_tail_2__func2:1221vbroadcasti128 (%rax),%ymm21222vaesenc %ymm2,%ymm12,%ymm121223vaesenc %ymm2,%ymm13,%ymm131224addq $16,%rax1225cmpq %rax,%r111226jne .Lvaesenc_loop_tail_2__func21227vaesenclast %ymm10,%ymm12,%ymm121228vaesenclast %ymm10,%ymm13,%ymm1312291230123112321233cmpq $32,%rdx1234jb .Lxor_one_block__func21235je .Lxor_two_blocks__func212361237.Lxor_three_blocks__func2:1238vmovdqu 0(%rdi),%ymm21239vmovdqu 32(%rdi),%xmm31240vpxor %ymm2,%ymm12,%ymm121241vpxor %xmm3,%xmm13,%xmm131242vmovdqu %ymm12,0(%rsi)1243vmovdqu %xmm13,32(%rsi)12441245vpshufb %ymm0,%ymm2,%ymm121246vpshufb %xmm0,%xmm3,%xmm131247vpxor %ymm1,%ymm12,%ymm121248vmovdqu (%r8),%ymm21249vmovdqu 32(%r8),%xmm31250vpclmulqdq $0x00,%xmm3,%xmm13,%xmm41251vpxor %ymm4,%ymm5,%ymm51252vpclmulqdq $0x01,%xmm3,%xmm13,%xmm41253vpxor %ymm4,%ymm6,%ymm61254vpclmulqdq $0x10,%xmm3,%xmm13,%xmm41255vpxor %ymm4,%ymm6,%ymm61256vpclmulqdq $0x11,%xmm3,%xmm13,%xmm41257vpxor %ymm4,%ymm7,%ymm71258jmp .Lghash_mul_one_vec_unreduced__func212591260.Lxor_two_blocks__func2:1261vmovdqu (%rdi),%ymm21262vpxor %ymm2,%ymm12,%ymm121263vmovdqu %ymm12,(%rsi)1264vpshufb %ymm0,%ymm2,%ymm121265vpxor %ymm1,%ymm12,%ymm121266vmovdqu (%r8),%ymm21267jmp .Lghash_mul_one_vec_unreduced__func212681269.Lxor_one_block__func2:1270vmovdqu (%rdi),%xmm21271vpxor %xmm2,%xmm12,%xmm121272vmovdqu %xmm12,(%rsi)1273vpshufb %xmm0,%xmm2,%xmm121274vpxor %xmm1,%xmm12,%xmm121275vmovdqu (%r8),%xmm212761277.Lghash_mul_one_vec_unreduced__func2:1278vpclmulqdq $0x00,%ymm2,%ymm12,%ymm41279vpxor %ymm4,%ymm5,%ymm51280vpclmulqdq $0x01,%ymm2,%ymm12,%ymm41281vpxor %ymm4,%ymm6,%ymm61282vpclmulqdq $0x10,%ymm2,%ymm12,%ymm41283vpxor %ymm4,%ymm6,%ymm61284vpclmulqdq $0x11,%ymm2,%ymm12,%ymm41285vpxor %ymm4,%ymm7,%ymm712861287.Lreduce__func2:12881289vbroadcasti128 .Lgfpoly(%rip),%ymm21290vpclmulqdq $0x01,%ymm5,%ymm2,%ymm31291vpshufd $0x4e,%ymm5,%ymm51292vpxor %ymm5,%ymm6,%ymm61293vpxor %ymm3,%ymm6,%ymm61294vpclmulqdq $0x01,%ymm6,%ymm2,%ymm31295vpshufd $0x4e,%ymm6,%ymm61296vpxor %ymm6,%ymm7,%ymm71297vpxor %ymm3,%ymm7,%ymm71298vextracti128 $1,%ymm7,%xmm11299vpxor %xmm7,%xmm1,%xmm113001301.Ldone__func2:13021303vpshufb %xmm0,%xmm1,%xmm11304vmovdqu %xmm1,(%r12)13051306vzeroupper1307popq %r121308.cfi_adjust_cfa_offset -81309.cfi_restore %r121310RET13111312.cfi_endproc1313SET_SIZE(aes_gcm_dec_update_vaes_avx2)13141315#endif /* !_WIN32 || _KERNEL */13161317/* Mark the stack non-executable. */1318#if defined(__linux__) && defined(__ELF__)1319.section .note.GNU-stack,"",%progbits1320#endif13211322#endif /* defined(__x86_64__) && defined(HAVE_AVX) && defined(HAVE_AES) ... */132313241325