Path: blob/main/sys/crypto/openssl/amd64/rsaz-2k-avxifma.S
39482 views
/* Do not modify. This file is auto-generated from rsaz-2k-avxifma.pl. */1.text23.globl ossl_rsaz_avxifma_eligible4.type ossl_rsaz_avxifma_eligible,@function5.align 326ossl_rsaz_avxifma_eligible:7movl OPENSSL_ia32cap_P+20(%rip),%ecx8xorl %eax,%eax9andl $8388608,%ecx10cmpl $8388608,%ecx11cmovel %ecx,%eax12.byte 0xf3,0xc313.size ossl_rsaz_avxifma_eligible, .-ossl_rsaz_avxifma_eligible14.text1516.globl ossl_rsaz_amm52x20_x1_avxifma25617.type ossl_rsaz_amm52x20_x1_avxifma256,@function18.align 3219ossl_rsaz_amm52x20_x1_avxifma256:20.cfi_startproc21.byte 243,15,30,25022pushq %rbx23.cfi_adjust_cfa_offset 824.cfi_offset %rbx,-1625pushq %rbp26.cfi_adjust_cfa_offset 827.cfi_offset %rbp,-2428pushq %r1229.cfi_adjust_cfa_offset 830.cfi_offset %r12,-3231pushq %r1332.cfi_adjust_cfa_offset 833.cfi_offset %r13,-4034pushq %r1435.cfi_adjust_cfa_offset 836.cfi_offset %r14,-4837pushq %r1538.cfi_adjust_cfa_offset 839.cfi_offset %r15,-5640.Lossl_rsaz_amm52x20_x1_avxifma256_body:414243vpxor %ymm0,%ymm0,%ymm044vmovapd %ymm0,%ymm345vmovapd %ymm0,%ymm546vmovapd %ymm0,%ymm647vmovapd %ymm0,%ymm748vmovapd %ymm0,%ymm84950xorl %r9d,%r9d5152movq %rdx,%r1153movq $0xfffffffffffff,%rax545556movl $5,%ebx5758.align 3259.Lloop5:60movq 0(%r11),%r136162vpbroadcastq 0(%r11),%ymm163movq 0(%rsi),%rdx64mulxq %r13,%r13,%r1265addq %r13,%r966movq %r12,%r1067adcq $0,%r106869movq %r8,%r1370imulq %r9,%r1371andq %rax,%r137273vmovq %r13,%xmm274vpbroadcastq %xmm2,%ymm275movq 0(%rcx),%rdx76mulxq %r13,%r13,%r1277addq %r13,%r978adcq %r12,%r107980shrq $52,%r981salq $12,%r1082orq %r10,%r98384leaq -168(%rsp),%rsp85{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm386{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm587{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm688{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm789{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm89091{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm392{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm593{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm694{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm795{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8969798vmovdqu %ymm3,0(%rsp)99vmovdqu %ymm5,32(%rsp)100vmovdqu %ymm6,64(%rsp)101vmovdqu %ymm7,96(%rsp)102vmovdqu %ymm8,128(%rsp)103movq $0,160(%rsp)104105vmovdqu 8(%rsp),%ymm3106vmovdqu 40(%rsp),%ymm5107vmovdqu 72(%rsp),%ymm6108vmovdqu 104(%rsp),%ymm7109vmovdqu 136(%rsp),%ymm8110111addq 8(%rsp),%r9112113{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3114{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5115{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6116{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7117{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8118119{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3120{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5121{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6122{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7123{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8124leaq 168(%rsp),%rsp125movq 8(%r11),%r13126127vpbroadcastq 8(%r11),%ymm1128movq 0(%rsi),%rdx129mulxq %r13,%r13,%r12130addq %r13,%r9131movq %r12,%r10132adcq $0,%r10133134movq %r8,%r13135imulq %r9,%r13136andq %rax,%r13137138vmovq %r13,%xmm2139vpbroadcastq %xmm2,%ymm2140movq 0(%rcx),%rdx141mulxq %r13,%r13,%r12142addq %r13,%r9143adcq %r12,%r10144145shrq $52,%r9146salq $12,%r10147orq %r10,%r9148149leaq -168(%rsp),%rsp150{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3151{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5152{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6153{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7154{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8155156{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3157{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5158{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6159{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7160{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8161162163vmovdqu %ymm3,0(%rsp)164vmovdqu %ymm5,32(%rsp)165vmovdqu %ymm6,64(%rsp)166vmovdqu %ymm7,96(%rsp)167vmovdqu %ymm8,128(%rsp)168movq $0,160(%rsp)169170vmovdqu 8(%rsp),%ymm3171vmovdqu 40(%rsp),%ymm5172vmovdqu 72(%rsp),%ymm6173vmovdqu 104(%rsp),%ymm7174vmovdqu 136(%rsp),%ymm8175176addq 8(%rsp),%r9177178{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3179{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5180{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6181{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7182{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8183184{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3185{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5186{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6187{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7188{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8189leaq 168(%rsp),%rsp190movq 16(%r11),%r13191192vpbroadcastq 16(%r11),%ymm1193movq 0(%rsi),%rdx194mulxq %r13,%r13,%r12195addq %r13,%r9196movq %r12,%r10197adcq $0,%r10198199movq %r8,%r13200imulq %r9,%r13201andq %rax,%r13202203vmovq %r13,%xmm2204vpbroadcastq %xmm2,%ymm2205movq 0(%rcx),%rdx206mulxq %r13,%r13,%r12207addq %r13,%r9208adcq %r12,%r10209210shrq $52,%r9211salq $12,%r10212orq %r10,%r9213214leaq -168(%rsp),%rsp215{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3216{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5217{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6218{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7219{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8220221{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3222{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5223{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6224{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7225{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8226227228vmovdqu %ymm3,0(%rsp)229vmovdqu %ymm5,32(%rsp)230vmovdqu %ymm6,64(%rsp)231vmovdqu %ymm7,96(%rsp)232vmovdqu %ymm8,128(%rsp)233movq $0,160(%rsp)234235vmovdqu 8(%rsp),%ymm3236vmovdqu 40(%rsp),%ymm5237vmovdqu 72(%rsp),%ymm6238vmovdqu 104(%rsp),%ymm7239vmovdqu 136(%rsp),%ymm8240241addq 8(%rsp),%r9242243{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3244{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5245{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6246{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7247{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8248249{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3250{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5251{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6252{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7253{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8254leaq 168(%rsp),%rsp255movq 24(%r11),%r13256257vpbroadcastq 24(%r11),%ymm1258movq 0(%rsi),%rdx259mulxq %r13,%r13,%r12260addq %r13,%r9261movq %r12,%r10262adcq $0,%r10263264movq %r8,%r13265imulq %r9,%r13266andq %rax,%r13267268vmovq %r13,%xmm2269vpbroadcastq %xmm2,%ymm2270movq 0(%rcx),%rdx271mulxq %r13,%r13,%r12272addq %r13,%r9273adcq %r12,%r10274275shrq $52,%r9276salq $12,%r10277orq %r10,%r9278279leaq -168(%rsp),%rsp280{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3281{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5282{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6283{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7284{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8285286{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3287{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5288{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6289{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7290{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8291292293vmovdqu %ymm3,0(%rsp)294vmovdqu %ymm5,32(%rsp)295vmovdqu %ymm6,64(%rsp)296vmovdqu %ymm7,96(%rsp)297vmovdqu %ymm8,128(%rsp)298movq $0,160(%rsp)299300vmovdqu 8(%rsp),%ymm3301vmovdqu 40(%rsp),%ymm5302vmovdqu 72(%rsp),%ymm6303vmovdqu 104(%rsp),%ymm7304vmovdqu 136(%rsp),%ymm8305306addq 8(%rsp),%r9307308{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3309{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5310{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6311{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7312{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8313314{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3315{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5316{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6317{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7318{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8319leaq 168(%rsp),%rsp320leaq 32(%r11),%r11321decl %ebx322jne .Lloop5323324vmovq %r9,%xmm0325vpbroadcastq %xmm0,%ymm0326vpblendd $3,%ymm0,%ymm3,%ymm3327328329330vpsrlq $52,%ymm3,%ymm0331vpsrlq $52,%ymm5,%ymm1332vpsrlq $52,%ymm6,%ymm2333vpsrlq $52,%ymm7,%ymm13334vpsrlq $52,%ymm8,%ymm14335336337vpermq $144,%ymm14,%ymm14338vpermq $3,%ymm13,%ymm15339vblendpd $1,%ymm15,%ymm14,%ymm14340341vpermq $144,%ymm13,%ymm13342vpermq $3,%ymm2,%ymm15343vblendpd $1,%ymm15,%ymm13,%ymm13344345vpermq $144,%ymm2,%ymm2346vpermq $3,%ymm1,%ymm15347vblendpd $1,%ymm15,%ymm2,%ymm2348349vpermq $144,%ymm1,%ymm1350vpermq $3,%ymm0,%ymm15351vblendpd $1,%ymm15,%ymm1,%ymm1352353vpermq $144,%ymm0,%ymm0354vpand .Lhigh64x3(%rip),%ymm0,%ymm0355356357vpand .Lmask52x4(%rip),%ymm3,%ymm3358vpand .Lmask52x4(%rip),%ymm5,%ymm5359vpand .Lmask52x4(%rip),%ymm6,%ymm6360vpand .Lmask52x4(%rip),%ymm7,%ymm7361vpand .Lmask52x4(%rip),%ymm8,%ymm8362363364vpaddq %ymm0,%ymm3,%ymm3365vpaddq %ymm1,%ymm5,%ymm5366vpaddq %ymm2,%ymm6,%ymm6367vpaddq %ymm13,%ymm7,%ymm7368vpaddq %ymm14,%ymm8,%ymm8369370371372vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0373vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1374vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2375vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13376vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14377vmovmskpd %ymm0,%r14d378vmovmskpd %ymm1,%r13d379vmovmskpd %ymm2,%r12d380vmovmskpd %ymm13,%r11d381vmovmskpd %ymm14,%r10d382383384vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0385vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1386vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2387vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13388vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14389vmovmskpd %ymm0,%r9d390vmovmskpd %ymm1,%r8d391vmovmskpd %ymm2,%ebx392vmovmskpd %ymm13,%ecx393vmovmskpd %ymm14,%edx394395396397shlb $4,%r13b398orb %r13b,%r14b399shlb $4,%r11b400orb %r11b,%r12b401402addb %r14b,%r14b403adcb %r12b,%r12b404adcb %r10b,%r10b405406shlb $4,%r8b407orb %r8b,%r9b408shlb $4,%cl409orb %cl,%bl410411addb %r9b,%r14b412adcb %bl,%r12b413adcb %dl,%r10b414415xorb %r9b,%r14b416xorb %bl,%r12b417xorb %dl,%r10b418419leaq .Lkmasklut(%rip),%rdx420421movb %r14b,%r13b422andq $0xf,%r14423vpsubq .Lmask52x4(%rip),%ymm3,%ymm0424shlq $5,%r14425vmovapd (%rdx,%r14,1),%ymm2426vblendvpd %ymm2,%ymm0,%ymm3,%ymm3427428shrb $4,%r13b429andq $0xf,%r13430vpsubq .Lmask52x4(%rip),%ymm5,%ymm0431shlq $5,%r13432vmovapd (%rdx,%r13,1),%ymm2433vblendvpd %ymm2,%ymm0,%ymm5,%ymm5434435movb %r12b,%r11b436andq $0xf,%r12437vpsubq .Lmask52x4(%rip),%ymm6,%ymm0438shlq $5,%r12439vmovapd (%rdx,%r12,1),%ymm2440vblendvpd %ymm2,%ymm0,%ymm6,%ymm6441442shrb $4,%r11b443andq $0xf,%r11444vpsubq .Lmask52x4(%rip),%ymm7,%ymm0445shlq $5,%r11446vmovapd (%rdx,%r11,1),%ymm2447vblendvpd %ymm2,%ymm0,%ymm7,%ymm7448449andq $0xf,%r10450vpsubq .Lmask52x4(%rip),%ymm8,%ymm0451shlq $5,%r10452vmovapd (%rdx,%r10,1),%ymm2453vblendvpd %ymm2,%ymm0,%ymm8,%ymm8454455456vpand .Lmask52x4(%rip),%ymm3,%ymm3457vpand .Lmask52x4(%rip),%ymm5,%ymm5458vpand .Lmask52x4(%rip),%ymm6,%ymm6459vpand .Lmask52x4(%rip),%ymm7,%ymm7460vpand .Lmask52x4(%rip),%ymm8,%ymm8461462vmovdqu %ymm3,0(%rdi)463vmovdqu %ymm5,32(%rdi)464vmovdqu %ymm6,64(%rdi)465vmovdqu %ymm7,96(%rdi)466vmovdqu %ymm8,128(%rdi)467468vzeroupper469movq 0(%rsp),%r15470.cfi_restore %r15471movq 8(%rsp),%r14472.cfi_restore %r14473movq 16(%rsp),%r13474.cfi_restore %r13475movq 24(%rsp),%r12476.cfi_restore %r12477movq 32(%rsp),%rbp478.cfi_restore %rbp479movq 40(%rsp),%rbx480.cfi_restore %rbx481leaq 48(%rsp),%rsp482.cfi_adjust_cfa_offset -48483.Lossl_rsaz_amm52x20_x1_avxifma256_epilogue:484.byte 0xf3,0xc3485.cfi_endproc486.size ossl_rsaz_amm52x20_x1_avxifma256, .-ossl_rsaz_amm52x20_x1_avxifma256487.section .rodata488.align 32489.Lmask52x4:490.quad 0xfffffffffffff491.quad 0xfffffffffffff492.quad 0xfffffffffffff493.quad 0xfffffffffffff494.Lhigh64x3:495.quad 0x0496.quad 0xffffffffffffffff497.quad 0xffffffffffffffff498.quad 0xffffffffffffffff499.Lkmasklut:500501.quad 0x0502.quad 0x0503.quad 0x0504.quad 0x0505506.quad 0xffffffffffffffff507.quad 0x0508.quad 0x0509.quad 0x0510511.quad 0x0512.quad 0xffffffffffffffff513.quad 0x0514.quad 0x0515516.quad 0xffffffffffffffff517.quad 0xffffffffffffffff518.quad 0x0519.quad 0x0520521.quad 0x0522.quad 0x0523.quad 0xffffffffffffffff524.quad 0x0525526.quad 0xffffffffffffffff527.quad 0x0528.quad 0xffffffffffffffff529.quad 0x0530531.quad 0x0532.quad 0xffffffffffffffff533.quad 0xffffffffffffffff534.quad 0x0535536.quad 0xffffffffffffffff537.quad 0xffffffffffffffff538.quad 0xffffffffffffffff539.quad 0x0540541.quad 0x0542.quad 0x0543.quad 0x0544.quad 0xffffffffffffffff545546.quad 0xffffffffffffffff547.quad 0x0548.quad 0x0549.quad 0xffffffffffffffff550551.quad 0x0552.quad 0xffffffffffffffff553.quad 0x0554.quad 0xffffffffffffffff555556.quad 0xffffffffffffffff557.quad 0xffffffffffffffff558.quad 0x0559.quad 0xffffffffffffffff560561.quad 0x0562.quad 0x0563.quad 0xffffffffffffffff564.quad 0xffffffffffffffff565566.quad 0xffffffffffffffff567.quad 0x0568.quad 0xffffffffffffffff569.quad 0xffffffffffffffff570571.quad 0x0572.quad 0xffffffffffffffff573.quad 0xffffffffffffffff574.quad 0xffffffffffffffff575576.quad 0xffffffffffffffff577.quad 0xffffffffffffffff578.quad 0xffffffffffffffff579.quad 0xffffffffffffffff580.text581582.globl ossl_rsaz_amm52x20_x2_avxifma256583.type ossl_rsaz_amm52x20_x2_avxifma256,@function584.align 32585ossl_rsaz_amm52x20_x2_avxifma256:586.cfi_startproc587.byte 243,15,30,250588pushq %rbx589.cfi_adjust_cfa_offset 8590.cfi_offset %rbx,-16591pushq %rbp592.cfi_adjust_cfa_offset 8593.cfi_offset %rbp,-24594pushq %r12595.cfi_adjust_cfa_offset 8596.cfi_offset %r12,-32597pushq %r13598.cfi_adjust_cfa_offset 8599.cfi_offset %r13,-40600pushq %r14601.cfi_adjust_cfa_offset 8602.cfi_offset %r14,-48603pushq %r15604.cfi_adjust_cfa_offset 8605.cfi_offset %r15,-56606.Lossl_rsaz_amm52x20_x2_avxifma256_body:607608609vpxor %ymm0,%ymm0,%ymm0610vmovapd %ymm0,%ymm3611vmovapd %ymm0,%ymm5612vmovapd %ymm0,%ymm6613vmovapd %ymm0,%ymm7614vmovapd %ymm0,%ymm8615vmovapd %ymm0,%ymm4616vmovapd %ymm0,%ymm9617vmovapd %ymm0,%ymm10618vmovapd %ymm0,%ymm11619vmovapd %ymm0,%ymm12620621xorl %r9d,%r9d622xorl %r15d,%r15d623624movq %rdx,%r11625movq $0xfffffffffffff,%rax626627movl $20,%ebx628629.align 32630.Lloop20:631movq 0(%r11),%r13632633vpbroadcastq 0(%r11),%ymm1634movq 0(%rsi),%rdx635mulxq %r13,%r13,%r12636addq %r13,%r9637movq %r12,%r10638adcq $0,%r10639640movq (%r8),%r13641imulq %r9,%r13642andq %rax,%r13643644vmovq %r13,%xmm2645vpbroadcastq %xmm2,%ymm2646movq 0(%rcx),%rdx647mulxq %r13,%r13,%r12648addq %r13,%r9649adcq %r12,%r10650651shrq $52,%r9652salq $12,%r10653orq %r10,%r9654655leaq -168(%rsp),%rsp656{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3657{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm5658{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm6659{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm7660{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm8661662{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3663{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm5664{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm6665{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm7666{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm8667668669vmovdqu %ymm3,0(%rsp)670vmovdqu %ymm5,32(%rsp)671vmovdqu %ymm6,64(%rsp)672vmovdqu %ymm7,96(%rsp)673vmovdqu %ymm8,128(%rsp)674movq $0,160(%rsp)675676vmovdqu 8(%rsp),%ymm3677vmovdqu 40(%rsp),%ymm5678vmovdqu 72(%rsp),%ymm6679vmovdqu 104(%rsp),%ymm7680vmovdqu 136(%rsp),%ymm8681682addq 8(%rsp),%r9683684{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3685{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm5686{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm6687{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm7688{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm8689690{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3691{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm5692{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm6693{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm7694{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm8695leaq 168(%rsp),%rsp696movq 160(%r11),%r13697698vpbroadcastq 160(%r11),%ymm1699movq 160(%rsi),%rdx700mulxq %r13,%r13,%r12701addq %r13,%r15702movq %r12,%r10703adcq $0,%r10704705movq 8(%r8),%r13706imulq %r15,%r13707andq %rax,%r13708709vmovq %r13,%xmm2710vpbroadcastq %xmm2,%ymm2711movq 160(%rcx),%rdx712mulxq %r13,%r13,%r12713addq %r13,%r15714adcq %r12,%r10715716shrq $52,%r15717salq $12,%r10718orq %r10,%r15719720leaq -168(%rsp),%rsp721{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm4722{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9723{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10724{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11725{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12726727{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm4728{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9729{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10730{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11731{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12732733734vmovdqu %ymm4,0(%rsp)735vmovdqu %ymm9,32(%rsp)736vmovdqu %ymm10,64(%rsp)737vmovdqu %ymm11,96(%rsp)738vmovdqu %ymm12,128(%rsp)739movq $0,160(%rsp)740741vmovdqu 8(%rsp),%ymm4742vmovdqu 40(%rsp),%ymm9743vmovdqu 72(%rsp),%ymm10744vmovdqu 104(%rsp),%ymm11745vmovdqu 136(%rsp),%ymm12746747addq 8(%rsp),%r15748749{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm4750{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9751{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10752{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11753{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12754755{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm4756{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9757{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10758{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11759{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12760leaq 168(%rsp),%rsp761leaq 8(%r11),%r11762decl %ebx763jne .Lloop20764765vmovq %r9,%xmm0766vpbroadcastq %xmm0,%ymm0767vpblendd $3,%ymm0,%ymm3,%ymm3768769770771vpsrlq $52,%ymm3,%ymm0772vpsrlq $52,%ymm5,%ymm1773vpsrlq $52,%ymm6,%ymm2774vpsrlq $52,%ymm7,%ymm13775vpsrlq $52,%ymm8,%ymm14776777778vpermq $144,%ymm14,%ymm14779vpermq $3,%ymm13,%ymm15780vblendpd $1,%ymm15,%ymm14,%ymm14781782vpermq $144,%ymm13,%ymm13783vpermq $3,%ymm2,%ymm15784vblendpd $1,%ymm15,%ymm13,%ymm13785786vpermq $144,%ymm2,%ymm2787vpermq $3,%ymm1,%ymm15788vblendpd $1,%ymm15,%ymm2,%ymm2789790vpermq $144,%ymm1,%ymm1791vpermq $3,%ymm0,%ymm15792vblendpd $1,%ymm15,%ymm1,%ymm1793794vpermq $144,%ymm0,%ymm0795vpand .Lhigh64x3(%rip),%ymm0,%ymm0796797798vpand .Lmask52x4(%rip),%ymm3,%ymm3799vpand .Lmask52x4(%rip),%ymm5,%ymm5800vpand .Lmask52x4(%rip),%ymm6,%ymm6801vpand .Lmask52x4(%rip),%ymm7,%ymm7802vpand .Lmask52x4(%rip),%ymm8,%ymm8803804805vpaddq %ymm0,%ymm3,%ymm3806vpaddq %ymm1,%ymm5,%ymm5807vpaddq %ymm2,%ymm6,%ymm6808vpaddq %ymm13,%ymm7,%ymm7809vpaddq %ymm14,%ymm8,%ymm8810811812813vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0814vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm1815vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm2816vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13817vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm14818vmovmskpd %ymm0,%r14d819vmovmskpd %ymm1,%r13d820vmovmskpd %ymm2,%r12d821vmovmskpd %ymm13,%r11d822vmovmskpd %ymm14,%r10d823824825vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0826vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm1827vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm2828vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13829vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm14830vmovmskpd %ymm0,%r9d831vmovmskpd %ymm1,%r8d832vmovmskpd %ymm2,%ebx833vmovmskpd %ymm13,%ecx834vmovmskpd %ymm14,%edx835836837838shlb $4,%r13b839orb %r13b,%r14b840shlb $4,%r11b841orb %r11b,%r12b842843addb %r14b,%r14b844adcb %r12b,%r12b845adcb %r10b,%r10b846847shlb $4,%r8b848orb %r8b,%r9b849shlb $4,%cl850orb %cl,%bl851852addb %r9b,%r14b853adcb %bl,%r12b854adcb %dl,%r10b855856xorb %r9b,%r14b857xorb %bl,%r12b858xorb %dl,%r10b859860leaq .Lkmasklut(%rip),%rdx861862movb %r14b,%r13b863andq $0xf,%r14864vpsubq .Lmask52x4(%rip),%ymm3,%ymm0865shlq $5,%r14866vmovapd (%rdx,%r14,1),%ymm2867vblendvpd %ymm2,%ymm0,%ymm3,%ymm3868869shrb $4,%r13b870andq $0xf,%r13871vpsubq .Lmask52x4(%rip),%ymm5,%ymm0872shlq $5,%r13873vmovapd (%rdx,%r13,1),%ymm2874vblendvpd %ymm2,%ymm0,%ymm5,%ymm5875876movb %r12b,%r11b877andq $0xf,%r12878vpsubq .Lmask52x4(%rip),%ymm6,%ymm0879shlq $5,%r12880vmovapd (%rdx,%r12,1),%ymm2881vblendvpd %ymm2,%ymm0,%ymm6,%ymm6882883shrb $4,%r11b884andq $0xf,%r11885vpsubq .Lmask52x4(%rip),%ymm7,%ymm0886shlq $5,%r11887vmovapd (%rdx,%r11,1),%ymm2888vblendvpd %ymm2,%ymm0,%ymm7,%ymm7889890andq $0xf,%r10891vpsubq .Lmask52x4(%rip),%ymm8,%ymm0892shlq $5,%r10893vmovapd (%rdx,%r10,1),%ymm2894vblendvpd %ymm2,%ymm0,%ymm8,%ymm8895896897vpand .Lmask52x4(%rip),%ymm3,%ymm3898vpand .Lmask52x4(%rip),%ymm5,%ymm5899vpand .Lmask52x4(%rip),%ymm6,%ymm6900vpand .Lmask52x4(%rip),%ymm7,%ymm7901vpand .Lmask52x4(%rip),%ymm8,%ymm8902903vmovq %r15,%xmm0904vpbroadcastq %xmm0,%ymm0905vpblendd $3,%ymm0,%ymm4,%ymm4906907908909vpsrlq $52,%ymm4,%ymm0910vpsrlq $52,%ymm9,%ymm1911vpsrlq $52,%ymm10,%ymm2912vpsrlq $52,%ymm11,%ymm13913vpsrlq $52,%ymm12,%ymm14914915916vpermq $144,%ymm14,%ymm14917vpermq $3,%ymm13,%ymm15918vblendpd $1,%ymm15,%ymm14,%ymm14919920vpermq $144,%ymm13,%ymm13921vpermq $3,%ymm2,%ymm15922vblendpd $1,%ymm15,%ymm13,%ymm13923924vpermq $144,%ymm2,%ymm2925vpermq $3,%ymm1,%ymm15926vblendpd $1,%ymm15,%ymm2,%ymm2927928vpermq $144,%ymm1,%ymm1929vpermq $3,%ymm0,%ymm15930vblendpd $1,%ymm15,%ymm1,%ymm1931932vpermq $144,%ymm0,%ymm0933vpand .Lhigh64x3(%rip),%ymm0,%ymm0934935936vpand .Lmask52x4(%rip),%ymm4,%ymm4937vpand .Lmask52x4(%rip),%ymm9,%ymm9938vpand .Lmask52x4(%rip),%ymm10,%ymm10939vpand .Lmask52x4(%rip),%ymm11,%ymm11940vpand .Lmask52x4(%rip),%ymm12,%ymm12941942943vpaddq %ymm0,%ymm4,%ymm4944vpaddq %ymm1,%ymm9,%ymm9945vpaddq %ymm2,%ymm10,%ymm10946vpaddq %ymm13,%ymm11,%ymm11947vpaddq %ymm14,%ymm12,%ymm12948949950951vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm0952vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm1953vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm2954vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13955vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm14956vmovmskpd %ymm0,%r14d957vmovmskpd %ymm1,%r13d958vmovmskpd %ymm2,%r12d959vmovmskpd %ymm13,%r11d960vmovmskpd %ymm14,%r10d961962963vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm0964vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm1965vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm2966vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13967vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm14968vmovmskpd %ymm0,%r9d969vmovmskpd %ymm1,%r8d970vmovmskpd %ymm2,%ebx971vmovmskpd %ymm13,%ecx972vmovmskpd %ymm14,%edx973974975976shlb $4,%r13b977orb %r13b,%r14b978shlb $4,%r11b979orb %r11b,%r12b980981addb %r14b,%r14b982adcb %r12b,%r12b983adcb %r10b,%r10b984985shlb $4,%r8b986orb %r8b,%r9b987shlb $4,%cl988orb %cl,%bl989990addb %r9b,%r14b991adcb %bl,%r12b992adcb %dl,%r10b993994xorb %r9b,%r14b995xorb %bl,%r12b996xorb %dl,%r10b997998leaq .Lkmasklut(%rip),%rdx9991000movb %r14b,%r13b1001andq $0xf,%r141002vpsubq .Lmask52x4(%rip),%ymm4,%ymm01003shlq $5,%r141004vmovapd (%rdx,%r14,1),%ymm21005vblendvpd %ymm2,%ymm0,%ymm4,%ymm410061007shrb $4,%r13b1008andq $0xf,%r131009vpsubq .Lmask52x4(%rip),%ymm9,%ymm01010shlq $5,%r131011vmovapd (%rdx,%r13,1),%ymm21012vblendvpd %ymm2,%ymm0,%ymm9,%ymm910131014movb %r12b,%r11b1015andq $0xf,%r121016vpsubq .Lmask52x4(%rip),%ymm10,%ymm01017shlq $5,%r121018vmovapd (%rdx,%r12,1),%ymm21019vblendvpd %ymm2,%ymm0,%ymm10,%ymm1010201021shrb $4,%r11b1022andq $0xf,%r111023vpsubq .Lmask52x4(%rip),%ymm11,%ymm01024shlq $5,%r111025vmovapd (%rdx,%r11,1),%ymm21026vblendvpd %ymm2,%ymm0,%ymm11,%ymm1110271028andq $0xf,%r101029vpsubq .Lmask52x4(%rip),%ymm12,%ymm01030shlq $5,%r101031vmovapd (%rdx,%r10,1),%ymm21032vblendvpd %ymm2,%ymm0,%ymm12,%ymm12103310341035vpand .Lmask52x4(%rip),%ymm4,%ymm41036vpand .Lmask52x4(%rip),%ymm9,%ymm91037vpand .Lmask52x4(%rip),%ymm10,%ymm101038vpand .Lmask52x4(%rip),%ymm11,%ymm111039vpand .Lmask52x4(%rip),%ymm12,%ymm1210401041vmovdqu %ymm3,0(%rdi)1042vmovdqu %ymm5,32(%rdi)1043vmovdqu %ymm6,64(%rdi)1044vmovdqu %ymm7,96(%rdi)1045vmovdqu %ymm8,128(%rdi)10461047vmovdqu %ymm4,160(%rdi)1048vmovdqu %ymm9,192(%rdi)1049vmovdqu %ymm10,224(%rdi)1050vmovdqu %ymm11,256(%rdi)1051vmovdqu %ymm12,288(%rdi)10521053vzeroupper1054movq 0(%rsp),%r151055.cfi_restore %r151056movq 8(%rsp),%r141057.cfi_restore %r141058movq 16(%rsp),%r131059.cfi_restore %r131060movq 24(%rsp),%r121061.cfi_restore %r121062movq 32(%rsp),%rbp1063.cfi_restore %rbp1064movq 40(%rsp),%rbx1065.cfi_restore %rbx1066leaq 48(%rsp),%rsp1067.cfi_adjust_cfa_offset -481068.Lossl_rsaz_amm52x20_x2_avxifma256_epilogue:1069.byte 0xf3,0xc31070.cfi_endproc1071.size ossl_rsaz_amm52x20_x2_avxifma256, .-ossl_rsaz_amm52x20_x2_avxifma2561072.text10731074.align 321075.globl ossl_extract_multiplier_2x20_win5_avx1076.type ossl_extract_multiplier_2x20_win5_avx,@function1077ossl_extract_multiplier_2x20_win5_avx:1078.cfi_startproc1079.byte 243,15,30,2501080vmovapd .Lones(%rip),%ymm141081vmovq %rdx,%xmm101082vpbroadcastq %xmm10,%ymm121083vmovq %rcx,%xmm101084vpbroadcastq %xmm10,%ymm131085leaq 10240(%rsi),%rax108610871088vpxor %xmm0,%xmm0,%xmm01089vmovapd %ymm0,%ymm111090vmovapd %ymm0,%ymm11091vmovapd %ymm0,%ymm21092vmovapd %ymm0,%ymm31093vmovapd %ymm0,%ymm41094vmovapd %ymm0,%ymm51095vmovapd %ymm0,%ymm61096vmovapd %ymm0,%ymm71097vmovapd %ymm0,%ymm81098vmovapd %ymm0,%ymm910991100.align 321101.Lloop:1102vpcmpeqq %ymm11,%ymm12,%ymm151103vmovdqu 0(%rsi),%ymm101104vblendvpd %ymm15,%ymm10,%ymm0,%ymm01105vmovdqu 32(%rsi),%ymm101106vblendvpd %ymm15,%ymm10,%ymm1,%ymm11107vmovdqu 64(%rsi),%ymm101108vblendvpd %ymm15,%ymm10,%ymm2,%ymm21109vmovdqu 96(%rsi),%ymm101110vblendvpd %ymm15,%ymm10,%ymm3,%ymm31111vmovdqu 128(%rsi),%ymm101112vblendvpd %ymm15,%ymm10,%ymm4,%ymm41113vpcmpeqq %ymm11,%ymm13,%ymm151114vmovdqu 160(%rsi),%ymm101115vblendvpd %ymm15,%ymm10,%ymm5,%ymm51116vmovdqu 192(%rsi),%ymm101117vblendvpd %ymm15,%ymm10,%ymm6,%ymm61118vmovdqu 224(%rsi),%ymm101119vblendvpd %ymm15,%ymm10,%ymm7,%ymm71120vmovdqu 256(%rsi),%ymm101121vblendvpd %ymm15,%ymm10,%ymm8,%ymm81122vmovdqu 288(%rsi),%ymm101123vblendvpd %ymm15,%ymm10,%ymm9,%ymm91124vpaddq %ymm14,%ymm11,%ymm111125addq $320,%rsi1126cmpq %rsi,%rax1127jne .Lloop1128vmovdqu %ymm0,0(%rdi)1129vmovdqu %ymm1,32(%rdi)1130vmovdqu %ymm2,64(%rdi)1131vmovdqu %ymm3,96(%rdi)1132vmovdqu %ymm4,128(%rdi)1133vmovdqu %ymm5,160(%rdi)1134vmovdqu %ymm6,192(%rdi)1135vmovdqu %ymm7,224(%rdi)1136vmovdqu %ymm8,256(%rdi)1137vmovdqu %ymm9,288(%rdi)1138.byte 0xf3,0xc31139.cfi_endproc1140.size ossl_extract_multiplier_2x20_win5_avx, .-ossl_extract_multiplier_2x20_win5_avx1141.section .rodata1142.align 321143.Lones:1144.quad 1,1,1,11145.Lzeros:1146.quad 0,0,0,01147.section ".note.gnu.property", "a"1148.p2align 31149.long 1f - 0f1150.long 4f - 1f1151.long 511520:1153# "GNU" encoded with .byte, since .asciz isn't supported1154# on Solaris.1155.byte 0x471156.byte 0x4e1157.byte 0x551158.byte 011591:1160.p2align 31161.long 0xc00000021162.long 3f - 2f11632:1164.long 311653:1166.p2align 311674:116811691170