Path: blob/main/sys/crypto/openssl/amd64/rsaz-2k-avx512.S
39482 views
/* Do not modify. This file is auto-generated from rsaz-2k-avx512.pl. */12.globl ossl_rsaz_avx512ifma_eligible3.type ossl_rsaz_avx512ifma_eligible,@function4.align 325ossl_rsaz_avx512ifma_eligible:6movl OPENSSL_ia32cap_P+8(%rip),%ecx7xorl %eax,%eax8andl $2149777408,%ecx9cmpl $2149777408,%ecx10cmovel %ecx,%eax11.byte 0xf3,0xc312.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible13.text1415.globl ossl_rsaz_amm52x20_x1_ifma25616.type ossl_rsaz_amm52x20_x1_ifma256,@function17.align 3218ossl_rsaz_amm52x20_x1_ifma256:19.cfi_startproc20.byte 243,15,30,25021pushq %rbx22.cfi_adjust_cfa_offset 823.cfi_offset %rbx,-1624pushq %rbp25.cfi_adjust_cfa_offset 826.cfi_offset %rbp,-2427pushq %r1228.cfi_adjust_cfa_offset 829.cfi_offset %r12,-3230pushq %r1331.cfi_adjust_cfa_offset 832.cfi_offset %r13,-4033pushq %r1434.cfi_adjust_cfa_offset 835.cfi_offset %r14,-4836pushq %r1537.cfi_adjust_cfa_offset 838.cfi_offset %r15,-5639.Lossl_rsaz_amm52x20_x1_ifma256_body:404142vpxord %ymm0,%ymm0,%ymm043vmovdqa64 %ymm0,%ymm344vmovdqa64 %ymm0,%ymm1645vmovdqa64 %ymm0,%ymm1746vmovdqa64 %ymm0,%ymm1847vmovdqa64 %ymm0,%ymm194849xorl %r9d,%r9d5051movq %rdx,%r1152movq $0xfffffffffffff,%rax535455movl $5,%ebx5657.align 3258.Lloop5:59movq 0(%r11),%r136061vpbroadcastq %r13,%ymm162movq 0(%rsi),%rdx63mulxq %r13,%r13,%r1264addq %r13,%r965movq %r12,%r1066adcq $0,%r106768movq %r8,%r1369imulq %r9,%r1370andq %rax,%r137172vpbroadcastq %r13,%ymm273movq 0(%rcx),%rdx74mulxq %r13,%r13,%r1275addq %r13,%r976adcq %r12,%r107778shrq $52,%r979salq $12,%r1080orq %r10,%r98182vpmadd52luq 0(%rsi),%ymm1,%ymm383vpmadd52luq 32(%rsi),%ymm1,%ymm1684vpmadd52luq 64(%rsi),%ymm1,%ymm1785vpmadd52luq 96(%rsi),%ymm1,%ymm1886vpmadd52luq 128(%rsi),%ymm1,%ymm198788vpmadd52luq 0(%rcx),%ymm2,%ymm389vpmadd52luq 32(%rcx),%ymm2,%ymm1690vpmadd52luq 64(%rcx),%ymm2,%ymm1791vpmadd52luq 96(%rcx),%ymm2,%ymm1892vpmadd52luq 128(%rcx),%ymm2,%ymm19939495valignq $1,%ymm3,%ymm16,%ymm396valignq $1,%ymm16,%ymm17,%ymm1697valignq $1,%ymm17,%ymm18,%ymm1798valignq $1,%ymm18,%ymm19,%ymm1899valignq $1,%ymm19,%ymm0,%ymm19100101vmovq %xmm3,%r13102addq %r13,%r9103104vpmadd52huq 0(%rsi),%ymm1,%ymm3105vpmadd52huq 32(%rsi),%ymm1,%ymm16106vpmadd52huq 64(%rsi),%ymm1,%ymm17107vpmadd52huq 96(%rsi),%ymm1,%ymm18108vpmadd52huq 128(%rsi),%ymm1,%ymm19109110vpmadd52huq 0(%rcx),%ymm2,%ymm3111vpmadd52huq 32(%rcx),%ymm2,%ymm16112vpmadd52huq 64(%rcx),%ymm2,%ymm17113vpmadd52huq 96(%rcx),%ymm2,%ymm18114vpmadd52huq 128(%rcx),%ymm2,%ymm19115movq 8(%r11),%r13116117vpbroadcastq %r13,%ymm1118movq 0(%rsi),%rdx119mulxq %r13,%r13,%r12120addq %r13,%r9121movq %r12,%r10122adcq $0,%r10123124movq %r8,%r13125imulq %r9,%r13126andq %rax,%r13127128vpbroadcastq %r13,%ymm2129movq 0(%rcx),%rdx130mulxq %r13,%r13,%r12131addq %r13,%r9132adcq %r12,%r10133134shrq $52,%r9135salq $12,%r10136orq %r10,%r9137138vpmadd52luq 0(%rsi),%ymm1,%ymm3139vpmadd52luq 32(%rsi),%ymm1,%ymm16140vpmadd52luq 64(%rsi),%ymm1,%ymm17141vpmadd52luq 96(%rsi),%ymm1,%ymm18142vpmadd52luq 128(%rsi),%ymm1,%ymm19143144vpmadd52luq 0(%rcx),%ymm2,%ymm3145vpmadd52luq 32(%rcx),%ymm2,%ymm16146vpmadd52luq 64(%rcx),%ymm2,%ymm17147vpmadd52luq 96(%rcx),%ymm2,%ymm18148vpmadd52luq 128(%rcx),%ymm2,%ymm19149150151valignq $1,%ymm3,%ymm16,%ymm3152valignq $1,%ymm16,%ymm17,%ymm16153valignq $1,%ymm17,%ymm18,%ymm17154valignq $1,%ymm18,%ymm19,%ymm18155valignq $1,%ymm19,%ymm0,%ymm19156157vmovq %xmm3,%r13158addq %r13,%r9159160vpmadd52huq 0(%rsi),%ymm1,%ymm3161vpmadd52huq 32(%rsi),%ymm1,%ymm16162vpmadd52huq 64(%rsi),%ymm1,%ymm17163vpmadd52huq 96(%rsi),%ymm1,%ymm18164vpmadd52huq 128(%rsi),%ymm1,%ymm19165166vpmadd52huq 0(%rcx),%ymm2,%ymm3167vpmadd52huq 32(%rcx),%ymm2,%ymm16168vpmadd52huq 64(%rcx),%ymm2,%ymm17169vpmadd52huq 96(%rcx),%ymm2,%ymm18170vpmadd52huq 128(%rcx),%ymm2,%ymm19171movq 16(%r11),%r13172173vpbroadcastq %r13,%ymm1174movq 0(%rsi),%rdx175mulxq %r13,%r13,%r12176addq %r13,%r9177movq %r12,%r10178adcq $0,%r10179180movq %r8,%r13181imulq %r9,%r13182andq %rax,%r13183184vpbroadcastq %r13,%ymm2185movq 0(%rcx),%rdx186mulxq %r13,%r13,%r12187addq %r13,%r9188adcq %r12,%r10189190shrq $52,%r9191salq $12,%r10192orq %r10,%r9193194vpmadd52luq 0(%rsi),%ymm1,%ymm3195vpmadd52luq 32(%rsi),%ymm1,%ymm16196vpmadd52luq 64(%rsi),%ymm1,%ymm17197vpmadd52luq 96(%rsi),%ymm1,%ymm18198vpmadd52luq 128(%rsi),%ymm1,%ymm19199200vpmadd52luq 0(%rcx),%ymm2,%ymm3201vpmadd52luq 32(%rcx),%ymm2,%ymm16202vpmadd52luq 64(%rcx),%ymm2,%ymm17203vpmadd52luq 96(%rcx),%ymm2,%ymm18204vpmadd52luq 128(%rcx),%ymm2,%ymm19205206207valignq $1,%ymm3,%ymm16,%ymm3208valignq $1,%ymm16,%ymm17,%ymm16209valignq $1,%ymm17,%ymm18,%ymm17210valignq $1,%ymm18,%ymm19,%ymm18211valignq $1,%ymm19,%ymm0,%ymm19212213vmovq %xmm3,%r13214addq %r13,%r9215216vpmadd52huq 0(%rsi),%ymm1,%ymm3217vpmadd52huq 32(%rsi),%ymm1,%ymm16218vpmadd52huq 64(%rsi),%ymm1,%ymm17219vpmadd52huq 96(%rsi),%ymm1,%ymm18220vpmadd52huq 128(%rsi),%ymm1,%ymm19221222vpmadd52huq 0(%rcx),%ymm2,%ymm3223vpmadd52huq 32(%rcx),%ymm2,%ymm16224vpmadd52huq 64(%rcx),%ymm2,%ymm17225vpmadd52huq 96(%rcx),%ymm2,%ymm18226vpmadd52huq 128(%rcx),%ymm2,%ymm19227movq 24(%r11),%r13228229vpbroadcastq %r13,%ymm1230movq 0(%rsi),%rdx231mulxq %r13,%r13,%r12232addq %r13,%r9233movq %r12,%r10234adcq $0,%r10235236movq %r8,%r13237imulq %r9,%r13238andq %rax,%r13239240vpbroadcastq %r13,%ymm2241movq 0(%rcx),%rdx242mulxq %r13,%r13,%r12243addq %r13,%r9244adcq %r12,%r10245246shrq $52,%r9247salq $12,%r10248orq %r10,%r9249250vpmadd52luq 0(%rsi),%ymm1,%ymm3251vpmadd52luq 32(%rsi),%ymm1,%ymm16252vpmadd52luq 64(%rsi),%ymm1,%ymm17253vpmadd52luq 96(%rsi),%ymm1,%ymm18254vpmadd52luq 128(%rsi),%ymm1,%ymm19255256vpmadd52luq 0(%rcx),%ymm2,%ymm3257vpmadd52luq 32(%rcx),%ymm2,%ymm16258vpmadd52luq 64(%rcx),%ymm2,%ymm17259vpmadd52luq 96(%rcx),%ymm2,%ymm18260vpmadd52luq 128(%rcx),%ymm2,%ymm19261262263valignq $1,%ymm3,%ymm16,%ymm3264valignq $1,%ymm16,%ymm17,%ymm16265valignq $1,%ymm17,%ymm18,%ymm17266valignq $1,%ymm18,%ymm19,%ymm18267valignq $1,%ymm19,%ymm0,%ymm19268269vmovq %xmm3,%r13270addq %r13,%r9271272vpmadd52huq 0(%rsi),%ymm1,%ymm3273vpmadd52huq 32(%rsi),%ymm1,%ymm16274vpmadd52huq 64(%rsi),%ymm1,%ymm17275vpmadd52huq 96(%rsi),%ymm1,%ymm18276vpmadd52huq 128(%rsi),%ymm1,%ymm19277278vpmadd52huq 0(%rcx),%ymm2,%ymm3279vpmadd52huq 32(%rcx),%ymm2,%ymm16280vpmadd52huq 64(%rcx),%ymm2,%ymm17281vpmadd52huq 96(%rcx),%ymm2,%ymm18282vpmadd52huq 128(%rcx),%ymm2,%ymm19283leaq 32(%r11),%r11284decl %ebx285jne .Lloop5286287vpbroadcastq %r9,%ymm0288vpblendd $3,%ymm0,%ymm3,%ymm3289290291292vpsrlq $52,%ymm3,%ymm0293vpsrlq $52,%ymm16,%ymm1294vpsrlq $52,%ymm17,%ymm2295vpsrlq $52,%ymm18,%ymm25296vpsrlq $52,%ymm19,%ymm26297298299valignq $3,%ymm25,%ymm26,%ymm26300valignq $3,%ymm2,%ymm25,%ymm25301valignq $3,%ymm1,%ymm2,%ymm2302valignq $3,%ymm0,%ymm1,%ymm1303valignq $3,.Lzeros(%rip),%ymm0,%ymm0304305306vpandq .Lmask52x4(%rip),%ymm3,%ymm3307vpandq .Lmask52x4(%rip),%ymm16,%ymm16308vpandq .Lmask52x4(%rip),%ymm17,%ymm17309vpandq .Lmask52x4(%rip),%ymm18,%ymm18310vpandq .Lmask52x4(%rip),%ymm19,%ymm19311312313vpaddq %ymm0,%ymm3,%ymm3314vpaddq %ymm1,%ymm16,%ymm16315vpaddq %ymm2,%ymm17,%ymm17316vpaddq %ymm25,%ymm18,%ymm18317vpaddq %ymm26,%ymm19,%ymm19318319320321vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1322vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2323vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k3324vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k4325vpcmpuq $6,.Lmask52x4(%rip),%ymm19,%k5326kmovb %k1,%r14d327kmovb %k2,%r13d328kmovb %k3,%r12d329kmovb %k4,%r11d330kmovb %k5,%r10d331332333vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1334vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2335vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k3336vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k4337vpcmpuq $0,.Lmask52x4(%rip),%ymm19,%k5338kmovb %k1,%r9d339kmovb %k2,%r8d340kmovb %k3,%ebx341kmovb %k4,%ecx342kmovb %k5,%edx343344345346shlb $4,%r13b347orb %r13b,%r14b348shlb $4,%r11b349orb %r11b,%r12b350351addb %r14b,%r14b352adcb %r12b,%r12b353adcb %r10b,%r10b354355shlb $4,%r8b356orb %r8b,%r9b357shlb $4,%cl358orb %cl,%bl359360addb %r9b,%r14b361adcb %bl,%r12b362adcb %dl,%r10b363364xorb %r9b,%r14b365xorb %bl,%r12b366xorb %dl,%r10b367368kmovb %r14d,%k1369shrb $4,%r14b370kmovb %r14d,%k2371kmovb %r12d,%k3372shrb $4,%r12b373kmovb %r12d,%k4374kmovb %r10d,%k5375376377vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1}378vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k2}379vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k3}380vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k4}381vpsubq .Lmask52x4(%rip),%ymm19,%ymm19{%k5}382383vpandq .Lmask52x4(%rip),%ymm3,%ymm3384vpandq .Lmask52x4(%rip),%ymm16,%ymm16385vpandq .Lmask52x4(%rip),%ymm17,%ymm17386vpandq .Lmask52x4(%rip),%ymm18,%ymm18387vpandq .Lmask52x4(%rip),%ymm19,%ymm19388389vmovdqu64 %ymm3,0(%rdi)390vmovdqu64 %ymm16,32(%rdi)391vmovdqu64 %ymm17,64(%rdi)392vmovdqu64 %ymm18,96(%rdi)393vmovdqu64 %ymm19,128(%rdi)394395vzeroupper396movq 0(%rsp),%r15397.cfi_restore %r15398movq 8(%rsp),%r14399.cfi_restore %r14400movq 16(%rsp),%r13401.cfi_restore %r13402movq 24(%rsp),%r12403.cfi_restore %r12404movq 32(%rsp),%rbp405.cfi_restore %rbp406movq 40(%rsp),%rbx407.cfi_restore %rbx408leaq 48(%rsp),%rsp409.cfi_adjust_cfa_offset -48410.Lossl_rsaz_amm52x20_x1_ifma256_epilogue:411.byte 0xf3,0xc3412.cfi_endproc413.size ossl_rsaz_amm52x20_x1_ifma256, .-ossl_rsaz_amm52x20_x1_ifma256414.section .rodata415.align 32416.Lmask52x4:417.quad 0xfffffffffffff418.quad 0xfffffffffffff419.quad 0xfffffffffffff420.quad 0xfffffffffffff421.text422423.globl ossl_rsaz_amm52x20_x2_ifma256424.type ossl_rsaz_amm52x20_x2_ifma256,@function425.align 32426ossl_rsaz_amm52x20_x2_ifma256:427.cfi_startproc428.byte 243,15,30,250429pushq %rbx430.cfi_adjust_cfa_offset 8431.cfi_offset %rbx,-16432pushq %rbp433.cfi_adjust_cfa_offset 8434.cfi_offset %rbp,-24435pushq %r12436.cfi_adjust_cfa_offset 8437.cfi_offset %r12,-32438pushq %r13439.cfi_adjust_cfa_offset 8440.cfi_offset %r13,-40441pushq %r14442.cfi_adjust_cfa_offset 8443.cfi_offset %r14,-48444pushq %r15445.cfi_adjust_cfa_offset 8446.cfi_offset %r15,-56447.Lossl_rsaz_amm52x20_x2_ifma256_body:448449450vpxord %ymm0,%ymm0,%ymm0451vmovdqa64 %ymm0,%ymm3452vmovdqa64 %ymm0,%ymm16453vmovdqa64 %ymm0,%ymm17454vmovdqa64 %ymm0,%ymm18455vmovdqa64 %ymm0,%ymm19456vmovdqa64 %ymm0,%ymm4457vmovdqa64 %ymm0,%ymm20458vmovdqa64 %ymm0,%ymm21459vmovdqa64 %ymm0,%ymm22460vmovdqa64 %ymm0,%ymm23461462xorl %r9d,%r9d463xorl %r15d,%r15d464465movq %rdx,%r11466movq $0xfffffffffffff,%rax467468movl $20,%ebx469470.align 32471.Lloop20:472movq 0(%r11),%r13473474vpbroadcastq %r13,%ymm1475movq 0(%rsi),%rdx476mulxq %r13,%r13,%r12477addq %r13,%r9478movq %r12,%r10479adcq $0,%r10480481movq (%r8),%r13482imulq %r9,%r13483andq %rax,%r13484485vpbroadcastq %r13,%ymm2486movq 0(%rcx),%rdx487mulxq %r13,%r13,%r12488addq %r13,%r9489adcq %r12,%r10490491shrq $52,%r9492salq $12,%r10493orq %r10,%r9494495vpmadd52luq 0(%rsi),%ymm1,%ymm3496vpmadd52luq 32(%rsi),%ymm1,%ymm16497vpmadd52luq 64(%rsi),%ymm1,%ymm17498vpmadd52luq 96(%rsi),%ymm1,%ymm18499vpmadd52luq 128(%rsi),%ymm1,%ymm19500501vpmadd52luq 0(%rcx),%ymm2,%ymm3502vpmadd52luq 32(%rcx),%ymm2,%ymm16503vpmadd52luq 64(%rcx),%ymm2,%ymm17504vpmadd52luq 96(%rcx),%ymm2,%ymm18505vpmadd52luq 128(%rcx),%ymm2,%ymm19506507508valignq $1,%ymm3,%ymm16,%ymm3509valignq $1,%ymm16,%ymm17,%ymm16510valignq $1,%ymm17,%ymm18,%ymm17511valignq $1,%ymm18,%ymm19,%ymm18512valignq $1,%ymm19,%ymm0,%ymm19513514vmovq %xmm3,%r13515addq %r13,%r9516517vpmadd52huq 0(%rsi),%ymm1,%ymm3518vpmadd52huq 32(%rsi),%ymm1,%ymm16519vpmadd52huq 64(%rsi),%ymm1,%ymm17520vpmadd52huq 96(%rsi),%ymm1,%ymm18521vpmadd52huq 128(%rsi),%ymm1,%ymm19522523vpmadd52huq 0(%rcx),%ymm2,%ymm3524vpmadd52huq 32(%rcx),%ymm2,%ymm16525vpmadd52huq 64(%rcx),%ymm2,%ymm17526vpmadd52huq 96(%rcx),%ymm2,%ymm18527vpmadd52huq 128(%rcx),%ymm2,%ymm19528movq 160(%r11),%r13529530vpbroadcastq %r13,%ymm1531movq 160(%rsi),%rdx532mulxq %r13,%r13,%r12533addq %r13,%r15534movq %r12,%r10535adcq $0,%r10536537movq 8(%r8),%r13538imulq %r15,%r13539andq %rax,%r13540541vpbroadcastq %r13,%ymm2542movq 160(%rcx),%rdx543mulxq %r13,%r13,%r12544addq %r13,%r15545adcq %r12,%r10546547shrq $52,%r15548salq $12,%r10549orq %r10,%r15550551vpmadd52luq 160(%rsi),%ymm1,%ymm4552vpmadd52luq 192(%rsi),%ymm1,%ymm20553vpmadd52luq 224(%rsi),%ymm1,%ymm21554vpmadd52luq 256(%rsi),%ymm1,%ymm22555vpmadd52luq 288(%rsi),%ymm1,%ymm23556557vpmadd52luq 160(%rcx),%ymm2,%ymm4558vpmadd52luq 192(%rcx),%ymm2,%ymm20559vpmadd52luq 224(%rcx),%ymm2,%ymm21560vpmadd52luq 256(%rcx),%ymm2,%ymm22561vpmadd52luq 288(%rcx),%ymm2,%ymm23562563564valignq $1,%ymm4,%ymm20,%ymm4565valignq $1,%ymm20,%ymm21,%ymm20566valignq $1,%ymm21,%ymm22,%ymm21567valignq $1,%ymm22,%ymm23,%ymm22568valignq $1,%ymm23,%ymm0,%ymm23569570vmovq %xmm4,%r13571addq %r13,%r15572573vpmadd52huq 160(%rsi),%ymm1,%ymm4574vpmadd52huq 192(%rsi),%ymm1,%ymm20575vpmadd52huq 224(%rsi),%ymm1,%ymm21576vpmadd52huq 256(%rsi),%ymm1,%ymm22577vpmadd52huq 288(%rsi),%ymm1,%ymm23578579vpmadd52huq 160(%rcx),%ymm2,%ymm4580vpmadd52huq 192(%rcx),%ymm2,%ymm20581vpmadd52huq 224(%rcx),%ymm2,%ymm21582vpmadd52huq 256(%rcx),%ymm2,%ymm22583vpmadd52huq 288(%rcx),%ymm2,%ymm23584leaq 8(%r11),%r11585decl %ebx586jne .Lloop20587588vpbroadcastq %r9,%ymm0589vpblendd $3,%ymm0,%ymm3,%ymm3590591592593vpsrlq $52,%ymm3,%ymm0594vpsrlq $52,%ymm16,%ymm1595vpsrlq $52,%ymm17,%ymm2596vpsrlq $52,%ymm18,%ymm25597vpsrlq $52,%ymm19,%ymm26598599600valignq $3,%ymm25,%ymm26,%ymm26601valignq $3,%ymm2,%ymm25,%ymm25602valignq $3,%ymm1,%ymm2,%ymm2603valignq $3,%ymm0,%ymm1,%ymm1604valignq $3,.Lzeros(%rip),%ymm0,%ymm0605606607vpandq .Lmask52x4(%rip),%ymm3,%ymm3608vpandq .Lmask52x4(%rip),%ymm16,%ymm16609vpandq .Lmask52x4(%rip),%ymm17,%ymm17610vpandq .Lmask52x4(%rip),%ymm18,%ymm18611vpandq .Lmask52x4(%rip),%ymm19,%ymm19612613614vpaddq %ymm0,%ymm3,%ymm3615vpaddq %ymm1,%ymm16,%ymm16616vpaddq %ymm2,%ymm17,%ymm17617vpaddq %ymm25,%ymm18,%ymm18618vpaddq %ymm26,%ymm19,%ymm19619620621622vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1623vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k2624vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k3625vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k4626vpcmpuq $6,.Lmask52x4(%rip),%ymm19,%k5627kmovb %k1,%r14d628kmovb %k2,%r13d629kmovb %k3,%r12d630kmovb %k4,%r11d631kmovb %k5,%r10d632633634vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1635vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k2636vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k3637vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k4638vpcmpuq $0,.Lmask52x4(%rip),%ymm19,%k5639kmovb %k1,%r9d640kmovb %k2,%r8d641kmovb %k3,%ebx642kmovb %k4,%ecx643kmovb %k5,%edx644645646647shlb $4,%r13b648orb %r13b,%r14b649shlb $4,%r11b650orb %r11b,%r12b651652addb %r14b,%r14b653adcb %r12b,%r12b654adcb %r10b,%r10b655656shlb $4,%r8b657orb %r8b,%r9b658shlb $4,%cl659orb %cl,%bl660661addb %r9b,%r14b662adcb %bl,%r12b663adcb %dl,%r10b664665xorb %r9b,%r14b666xorb %bl,%r12b667xorb %dl,%r10b668669kmovb %r14d,%k1670shrb $4,%r14b671kmovb %r14d,%k2672kmovb %r12d,%k3673shrb $4,%r12b674kmovb %r12d,%k4675kmovb %r10d,%k5676677678vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1}679vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k2}680vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k3}681vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k4}682vpsubq .Lmask52x4(%rip),%ymm19,%ymm19{%k5}683684vpandq .Lmask52x4(%rip),%ymm3,%ymm3685vpandq .Lmask52x4(%rip),%ymm16,%ymm16686vpandq .Lmask52x4(%rip),%ymm17,%ymm17687vpandq .Lmask52x4(%rip),%ymm18,%ymm18688vpandq .Lmask52x4(%rip),%ymm19,%ymm19689690vpbroadcastq %r15,%ymm0691vpblendd $3,%ymm0,%ymm4,%ymm4692693694695vpsrlq $52,%ymm4,%ymm0696vpsrlq $52,%ymm20,%ymm1697vpsrlq $52,%ymm21,%ymm2698vpsrlq $52,%ymm22,%ymm25699vpsrlq $52,%ymm23,%ymm26700701702valignq $3,%ymm25,%ymm26,%ymm26703valignq $3,%ymm2,%ymm25,%ymm25704valignq $3,%ymm1,%ymm2,%ymm2705valignq $3,%ymm0,%ymm1,%ymm1706valignq $3,.Lzeros(%rip),%ymm0,%ymm0707708709vpandq .Lmask52x4(%rip),%ymm4,%ymm4710vpandq .Lmask52x4(%rip),%ymm20,%ymm20711vpandq .Lmask52x4(%rip),%ymm21,%ymm21712vpandq .Lmask52x4(%rip),%ymm22,%ymm22713vpandq .Lmask52x4(%rip),%ymm23,%ymm23714715716vpaddq %ymm0,%ymm4,%ymm4717vpaddq %ymm1,%ymm20,%ymm20718vpaddq %ymm2,%ymm21,%ymm21719vpaddq %ymm25,%ymm22,%ymm22720vpaddq %ymm26,%ymm23,%ymm23721722723724vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k1725vpcmpuq $6,.Lmask52x4(%rip),%ymm20,%k2726vpcmpuq $6,.Lmask52x4(%rip),%ymm21,%k3727vpcmpuq $6,.Lmask52x4(%rip),%ymm22,%k4728vpcmpuq $6,.Lmask52x4(%rip),%ymm23,%k5729kmovb %k1,%r14d730kmovb %k2,%r13d731kmovb %k3,%r12d732kmovb %k4,%r11d733kmovb %k5,%r10d734735736vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k1737vpcmpuq $0,.Lmask52x4(%rip),%ymm20,%k2738vpcmpuq $0,.Lmask52x4(%rip),%ymm21,%k3739vpcmpuq $0,.Lmask52x4(%rip),%ymm22,%k4740vpcmpuq $0,.Lmask52x4(%rip),%ymm23,%k5741kmovb %k1,%r9d742kmovb %k2,%r8d743kmovb %k3,%ebx744kmovb %k4,%ecx745kmovb %k5,%edx746747748749shlb $4,%r13b750orb %r13b,%r14b751shlb $4,%r11b752orb %r11b,%r12b753754addb %r14b,%r14b755adcb %r12b,%r12b756adcb %r10b,%r10b757758shlb $4,%r8b759orb %r8b,%r9b760shlb $4,%cl761orb %cl,%bl762763addb %r9b,%r14b764adcb %bl,%r12b765adcb %dl,%r10b766767xorb %r9b,%r14b768xorb %bl,%r12b769xorb %dl,%r10b770771kmovb %r14d,%k1772shrb $4,%r14b773kmovb %r14d,%k2774kmovb %r12d,%k3775shrb $4,%r12b776kmovb %r12d,%k4777kmovb %r10d,%k5778779780vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k1}781vpsubq .Lmask52x4(%rip),%ymm20,%ymm20{%k2}782vpsubq .Lmask52x4(%rip),%ymm21,%ymm21{%k3}783vpsubq .Lmask52x4(%rip),%ymm22,%ymm22{%k4}784vpsubq .Lmask52x4(%rip),%ymm23,%ymm23{%k5}785786vpandq .Lmask52x4(%rip),%ymm4,%ymm4787vpandq .Lmask52x4(%rip),%ymm20,%ymm20788vpandq .Lmask52x4(%rip),%ymm21,%ymm21789vpandq .Lmask52x4(%rip),%ymm22,%ymm22790vpandq .Lmask52x4(%rip),%ymm23,%ymm23791792vmovdqu64 %ymm3,0(%rdi)793vmovdqu64 %ymm16,32(%rdi)794vmovdqu64 %ymm17,64(%rdi)795vmovdqu64 %ymm18,96(%rdi)796vmovdqu64 %ymm19,128(%rdi)797798vmovdqu64 %ymm4,160(%rdi)799vmovdqu64 %ymm20,192(%rdi)800vmovdqu64 %ymm21,224(%rdi)801vmovdqu64 %ymm22,256(%rdi)802vmovdqu64 %ymm23,288(%rdi)803804vzeroupper805movq 0(%rsp),%r15806.cfi_restore %r15807movq 8(%rsp),%r14808.cfi_restore %r14809movq 16(%rsp),%r13810.cfi_restore %r13811movq 24(%rsp),%r12812.cfi_restore %r12813movq 32(%rsp),%rbp814.cfi_restore %rbp815movq 40(%rsp),%rbx816.cfi_restore %rbx817leaq 48(%rsp),%rsp818.cfi_adjust_cfa_offset -48819.Lossl_rsaz_amm52x20_x2_ifma256_epilogue:820.byte 0xf3,0xc3821.cfi_endproc822.size ossl_rsaz_amm52x20_x2_ifma256, .-ossl_rsaz_amm52x20_x2_ifma256823.text824825.align 32826.globl ossl_extract_multiplier_2x20_win5827.type ossl_extract_multiplier_2x20_win5,@function828ossl_extract_multiplier_2x20_win5:829.cfi_startproc830.byte 243,15,30,250831vmovdqa64 .Lones(%rip),%ymm24832vpbroadcastq %rdx,%ymm22833vpbroadcastq %rcx,%ymm23834leaq 10240(%rsi),%rax835836837vpxor %xmm0,%xmm0,%xmm0838vmovdqa64 %ymm0,%ymm21839vmovdqa64 %ymm0,%ymm1840vmovdqa64 %ymm0,%ymm2841vmovdqa64 %ymm0,%ymm3842vmovdqa64 %ymm0,%ymm4843vmovdqa64 %ymm0,%ymm5844vmovdqa64 %ymm0,%ymm16845vmovdqa64 %ymm0,%ymm17846vmovdqa64 %ymm0,%ymm18847vmovdqa64 %ymm0,%ymm19848849.align 32850.Lloop:851vpcmpq $0,%ymm21,%ymm22,%k1852vpcmpq $0,%ymm21,%ymm23,%k2853vmovdqu64 0(%rsi),%ymm20854vpblendmq %ymm20,%ymm0,%ymm0{%k1}855vmovdqu64 32(%rsi),%ymm20856vpblendmq %ymm20,%ymm1,%ymm1{%k1}857vmovdqu64 64(%rsi),%ymm20858vpblendmq %ymm20,%ymm2,%ymm2{%k1}859vmovdqu64 96(%rsi),%ymm20860vpblendmq %ymm20,%ymm3,%ymm3{%k1}861vmovdqu64 128(%rsi),%ymm20862vpblendmq %ymm20,%ymm4,%ymm4{%k1}863vmovdqu64 160(%rsi),%ymm20864vpblendmq %ymm20,%ymm5,%ymm5{%k2}865vmovdqu64 192(%rsi),%ymm20866vpblendmq %ymm20,%ymm16,%ymm16{%k2}867vmovdqu64 224(%rsi),%ymm20868vpblendmq %ymm20,%ymm17,%ymm17{%k2}869vmovdqu64 256(%rsi),%ymm20870vpblendmq %ymm20,%ymm18,%ymm18{%k2}871vmovdqu64 288(%rsi),%ymm20872vpblendmq %ymm20,%ymm19,%ymm19{%k2}873vpaddq %ymm24,%ymm21,%ymm21874addq $320,%rsi875cmpq %rsi,%rax876jne .Lloop877vmovdqu64 %ymm0,0(%rdi)878vmovdqu64 %ymm1,32(%rdi)879vmovdqu64 %ymm2,64(%rdi)880vmovdqu64 %ymm3,96(%rdi)881vmovdqu64 %ymm4,128(%rdi)882vmovdqu64 %ymm5,160(%rdi)883vmovdqu64 %ymm16,192(%rdi)884vmovdqu64 %ymm17,224(%rdi)885vmovdqu64 %ymm18,256(%rdi)886vmovdqu64 %ymm19,288(%rdi)887.byte 0xf3,0xc3888.cfi_endproc889.size ossl_extract_multiplier_2x20_win5, .-ossl_extract_multiplier_2x20_win5890.section .rodata891.align 32892.Lones:893.quad 1,1,1,1894.Lzeros:895.quad 0,0,0,0896.section ".note.gnu.property", "a"897.p2align 3898.long 1f - 0f899.long 4f - 1f900.long 59010:902# "GNU" encoded with .byte, since .asciz isn't supported903# on Solaris.904.byte 0x47905.byte 0x4e906.byte 0x55907.byte 09081:909.p2align 3910.long 0xc0000002911.long 3f - 2f9122:913.long 39143:915.p2align 39164:917918919