Path: blob/main/sys/crypto/openssl/amd64/rsaz-4k-avxifma.S
39483 views
/* Do not modify. This file is auto-generated from rsaz-4k-avxifma.pl. */1.text23.globl ossl_rsaz_amm52x40_x1_avxifma2564.type ossl_rsaz_amm52x40_x1_avxifma256,@function5.align 326ossl_rsaz_amm52x40_x1_avxifma256:7.cfi_startproc8.byte 243,15,30,2509pushq %rbx10.cfi_adjust_cfa_offset 811.cfi_offset %rbx,-1612pushq %rbp13.cfi_adjust_cfa_offset 814.cfi_offset %rbp,-2415pushq %r1216.cfi_adjust_cfa_offset 817.cfi_offset %r12,-3218pushq %r1319.cfi_adjust_cfa_offset 820.cfi_offset %r13,-4021pushq %r1422.cfi_adjust_cfa_offset 823.cfi_offset %r14,-4824pushq %r1525.cfi_adjust_cfa_offset 826.cfi_offset %r15,-562728vpxor %ymm0,%ymm0,%ymm029vmovapd %ymm0,%ymm330vmovapd %ymm0,%ymm431vmovapd %ymm0,%ymm532vmovapd %ymm0,%ymm633vmovapd %ymm0,%ymm734vmovapd %ymm0,%ymm835vmovapd %ymm0,%ymm936vmovapd %ymm0,%ymm1037vmovapd %ymm0,%ymm1138vmovapd %ymm0,%ymm123940xorl %r9d,%r9d4142movq %rdx,%r1143movq $0xfffffffffffff,%rax444546movl $10,%ebx4748.align 3249.Lloop10:50movq 0(%r11),%r135152vpbroadcastq 0(%r11),%ymm153movq 0(%rsi),%rdx54mulxq %r13,%r13,%r1255addq %r13,%r956movq %r12,%r1057adcq $0,%r105859movq %r8,%r1360imulq %r9,%r1361andq %rax,%r136263vmovq %r13,%xmm264vpbroadcastq %xmm2,%ymm265movq 0(%rcx),%rdx66mulxq %r13,%r13,%r1267addq %r13,%r968adcq %r12,%r106970shrq $52,%r971salq $12,%r1072orq %r10,%r97374leaq -328(%rsp),%rsp7576{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm377{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm478{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm579{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm680{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm781{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm882{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm983{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm1084{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm1185{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm128687{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm388{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm489{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm590{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm691{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm792{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm893{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm994{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm1095{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm1196{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm1297vmovdqu %ymm3,0(%rsp)98vmovdqu %ymm4,32(%rsp)99vmovdqu %ymm5,64(%rsp)100vmovdqu %ymm6,96(%rsp)101vmovdqu %ymm7,128(%rsp)102vmovdqu %ymm8,160(%rsp)103vmovdqu %ymm9,192(%rsp)104vmovdqu %ymm10,224(%rsp)105vmovdqu %ymm11,256(%rsp)106vmovdqu %ymm12,288(%rsp)107movq $0,320(%rsp)108109vmovdqu 8(%rsp),%ymm3110vmovdqu 40(%rsp),%ymm4111vmovdqu 72(%rsp),%ymm5112vmovdqu 104(%rsp),%ymm6113vmovdqu 136(%rsp),%ymm7114vmovdqu 168(%rsp),%ymm8115vmovdqu 200(%rsp),%ymm9116vmovdqu 232(%rsp),%ymm10117vmovdqu 264(%rsp),%ymm11118vmovdqu 296(%rsp),%ymm12119120addq 8(%rsp),%r9121122{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3123{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4124{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5125{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6126{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7127{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8128{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9129{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10130{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11131{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12132133{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3134{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4135{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5136{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6137{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7138{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8139{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9140{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10141{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11142{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12143leaq 328(%rsp),%rsp144movq 8(%r11),%r13145146vpbroadcastq 8(%r11),%ymm1147movq 0(%rsi),%rdx148mulxq %r13,%r13,%r12149addq %r13,%r9150movq %r12,%r10151adcq $0,%r10152153movq %r8,%r13154imulq %r9,%r13155andq %rax,%r13156157vmovq %r13,%xmm2158vpbroadcastq %xmm2,%ymm2159movq 0(%rcx),%rdx160mulxq %r13,%r13,%r12161addq %r13,%r9162adcq %r12,%r10163164shrq $52,%r9165salq $12,%r10166orq %r10,%r9167168leaq -328(%rsp),%rsp169170{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3171{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4172{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5173{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6174{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7175{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8176{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9177{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10178{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11179{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12180181{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3182{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4183{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5184{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6185{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7186{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8187{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9188{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10189{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11190{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12191vmovdqu %ymm3,0(%rsp)192vmovdqu %ymm4,32(%rsp)193vmovdqu %ymm5,64(%rsp)194vmovdqu %ymm6,96(%rsp)195vmovdqu %ymm7,128(%rsp)196vmovdqu %ymm8,160(%rsp)197vmovdqu %ymm9,192(%rsp)198vmovdqu %ymm10,224(%rsp)199vmovdqu %ymm11,256(%rsp)200vmovdqu %ymm12,288(%rsp)201movq $0,320(%rsp)202203vmovdqu 8(%rsp),%ymm3204vmovdqu 40(%rsp),%ymm4205vmovdqu 72(%rsp),%ymm5206vmovdqu 104(%rsp),%ymm6207vmovdqu 136(%rsp),%ymm7208vmovdqu 168(%rsp),%ymm8209vmovdqu 200(%rsp),%ymm9210vmovdqu 232(%rsp),%ymm10211vmovdqu 264(%rsp),%ymm11212vmovdqu 296(%rsp),%ymm12213214addq 8(%rsp),%r9215216{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3217{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4218{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5219{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6220{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7221{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8222{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9223{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10224{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11225{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12226227{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3228{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4229{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5230{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6231{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7232{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8233{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9234{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10235{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11236{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12237leaq 328(%rsp),%rsp238movq 16(%r11),%r13239240vpbroadcastq 16(%r11),%ymm1241movq 0(%rsi),%rdx242mulxq %r13,%r13,%r12243addq %r13,%r9244movq %r12,%r10245adcq $0,%r10246247movq %r8,%r13248imulq %r9,%r13249andq %rax,%r13250251vmovq %r13,%xmm2252vpbroadcastq %xmm2,%ymm2253movq 0(%rcx),%rdx254mulxq %r13,%r13,%r12255addq %r13,%r9256adcq %r12,%r10257258shrq $52,%r9259salq $12,%r10260orq %r10,%r9261262leaq -328(%rsp),%rsp263264{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3265{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4266{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5267{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6268{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7269{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8270{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9271{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10272{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11273{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12274275{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3276{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4277{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5278{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6279{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7280{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8281{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9282{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10283{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11284{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12285vmovdqu %ymm3,0(%rsp)286vmovdqu %ymm4,32(%rsp)287vmovdqu %ymm5,64(%rsp)288vmovdqu %ymm6,96(%rsp)289vmovdqu %ymm7,128(%rsp)290vmovdqu %ymm8,160(%rsp)291vmovdqu %ymm9,192(%rsp)292vmovdqu %ymm10,224(%rsp)293vmovdqu %ymm11,256(%rsp)294vmovdqu %ymm12,288(%rsp)295movq $0,320(%rsp)296297vmovdqu 8(%rsp),%ymm3298vmovdqu 40(%rsp),%ymm4299vmovdqu 72(%rsp),%ymm5300vmovdqu 104(%rsp),%ymm6301vmovdqu 136(%rsp),%ymm7302vmovdqu 168(%rsp),%ymm8303vmovdqu 200(%rsp),%ymm9304vmovdqu 232(%rsp),%ymm10305vmovdqu 264(%rsp),%ymm11306vmovdqu 296(%rsp),%ymm12307308addq 8(%rsp),%r9309310{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3311{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4312{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5313{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6314{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7315{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8316{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9317{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10318{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11319{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12320321{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3322{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4323{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5324{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6325{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7326{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8327{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9328{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10329{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11330{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12331leaq 328(%rsp),%rsp332movq 24(%r11),%r13333334vpbroadcastq 24(%r11),%ymm1335movq 0(%rsi),%rdx336mulxq %r13,%r13,%r12337addq %r13,%r9338movq %r12,%r10339adcq $0,%r10340341movq %r8,%r13342imulq %r9,%r13343andq %rax,%r13344345vmovq %r13,%xmm2346vpbroadcastq %xmm2,%ymm2347movq 0(%rcx),%rdx348mulxq %r13,%r13,%r12349addq %r13,%r9350adcq %r12,%r10351352shrq $52,%r9353salq $12,%r10354orq %r10,%r9355356leaq -328(%rsp),%rsp357358{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3359{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4360{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5361{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6362{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7363{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8364{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9365{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10366{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11367{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12368369{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3370{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4371{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5372{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6373{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7374{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8375{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9376{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10377{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11378{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12379vmovdqu %ymm3,0(%rsp)380vmovdqu %ymm4,32(%rsp)381vmovdqu %ymm5,64(%rsp)382vmovdqu %ymm6,96(%rsp)383vmovdqu %ymm7,128(%rsp)384vmovdqu %ymm8,160(%rsp)385vmovdqu %ymm9,192(%rsp)386vmovdqu %ymm10,224(%rsp)387vmovdqu %ymm11,256(%rsp)388vmovdqu %ymm12,288(%rsp)389movq $0,320(%rsp)390391vmovdqu 8(%rsp),%ymm3392vmovdqu 40(%rsp),%ymm4393vmovdqu 72(%rsp),%ymm5394vmovdqu 104(%rsp),%ymm6395vmovdqu 136(%rsp),%ymm7396vmovdqu 168(%rsp),%ymm8397vmovdqu 200(%rsp),%ymm9398vmovdqu 232(%rsp),%ymm10399vmovdqu 264(%rsp),%ymm11400vmovdqu 296(%rsp),%ymm12401402addq 8(%rsp),%r9403404{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3405{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4406{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5407{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6408{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7409{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8410{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9411{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10412{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11413{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12414415{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3416{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4417{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5418{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6419{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7420{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8421{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9422{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10423{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11424{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12425leaq 328(%rsp),%rsp426leaq 32(%r11),%r11427decl %ebx428jne .Lloop10429430vmovq %r9,%xmm0431vpbroadcastq %xmm0,%ymm0432vpblendd $3,%ymm0,%ymm3,%ymm3433434leaq -640(%rsp),%rsp435vmovupd %ymm3,0(%rsp)436vmovupd %ymm4,32(%rsp)437vmovupd %ymm5,64(%rsp)438vmovupd %ymm6,96(%rsp)439vmovupd %ymm7,128(%rsp)440vmovupd %ymm8,160(%rsp)441vmovupd %ymm9,192(%rsp)442vmovupd %ymm10,224(%rsp)443vmovupd %ymm11,256(%rsp)444vmovupd %ymm12,288(%rsp)445446447448vpsrlq $52,%ymm3,%ymm3449vpsrlq $52,%ymm4,%ymm4450vpsrlq $52,%ymm5,%ymm5451vpsrlq $52,%ymm6,%ymm6452vpsrlq $52,%ymm7,%ymm7453vpsrlq $52,%ymm8,%ymm8454vpsrlq $52,%ymm9,%ymm9455vpsrlq $52,%ymm10,%ymm10456vpsrlq $52,%ymm11,%ymm11457vpsrlq $52,%ymm12,%ymm12458459460vpermq $144,%ymm12,%ymm12461vpermq $3,%ymm11,%ymm13462vblendpd $1,%ymm13,%ymm12,%ymm12463464vpermq $144,%ymm11,%ymm11465vpermq $3,%ymm10,%ymm13466vblendpd $1,%ymm13,%ymm11,%ymm11467468vpermq $144,%ymm10,%ymm10469vpermq $3,%ymm9,%ymm13470vblendpd $1,%ymm13,%ymm10,%ymm10471472vpermq $144,%ymm9,%ymm9473vpermq $3,%ymm8,%ymm13474vblendpd $1,%ymm13,%ymm9,%ymm9475476vpermq $144,%ymm8,%ymm8477vpermq $3,%ymm7,%ymm13478vblendpd $1,%ymm13,%ymm8,%ymm8479480vpermq $144,%ymm7,%ymm7481vpermq $3,%ymm6,%ymm13482vblendpd $1,%ymm13,%ymm7,%ymm7483484vpermq $144,%ymm6,%ymm6485vpermq $3,%ymm5,%ymm13486vblendpd $1,%ymm13,%ymm6,%ymm6487488vpermq $144,%ymm5,%ymm5489vpermq $3,%ymm4,%ymm13490vblendpd $1,%ymm13,%ymm5,%ymm5491492vpermq $144,%ymm4,%ymm4493vpermq $3,%ymm3,%ymm13494vblendpd $1,%ymm13,%ymm4,%ymm4495496vpermq $144,%ymm3,%ymm3497vpand .Lhigh64x3(%rip),%ymm3,%ymm3498499vmovupd %ymm3,320(%rsp)500vmovupd %ymm4,352(%rsp)501vmovupd %ymm5,384(%rsp)502vmovupd %ymm6,416(%rsp)503vmovupd %ymm7,448(%rsp)504vmovupd %ymm8,480(%rsp)505vmovupd %ymm9,512(%rsp)506vmovupd %ymm10,544(%rsp)507vmovupd %ymm11,576(%rsp)508vmovupd %ymm12,608(%rsp)509510vmovupd 0(%rsp),%ymm3511vmovupd 32(%rsp),%ymm4512vmovupd 64(%rsp),%ymm5513vmovupd 96(%rsp),%ymm6514vmovupd 128(%rsp),%ymm7515vmovupd 160(%rsp),%ymm8516vmovupd 192(%rsp),%ymm9517vmovupd 224(%rsp),%ymm10518vmovupd 256(%rsp),%ymm11519vmovupd 288(%rsp),%ymm12520521522vpand .Lmask52x4(%rip),%ymm3,%ymm3523vpand .Lmask52x4(%rip),%ymm4,%ymm4524vpand .Lmask52x4(%rip),%ymm5,%ymm5525vpand .Lmask52x4(%rip),%ymm6,%ymm6526vpand .Lmask52x4(%rip),%ymm7,%ymm7527vpand .Lmask52x4(%rip),%ymm8,%ymm8528vpand .Lmask52x4(%rip),%ymm9,%ymm9529vpand .Lmask52x4(%rip),%ymm10,%ymm10530vpand .Lmask52x4(%rip),%ymm11,%ymm11531vpand .Lmask52x4(%rip),%ymm12,%ymm12532533534vpaddq 320(%rsp),%ymm3,%ymm3535vpaddq 352(%rsp),%ymm4,%ymm4536vpaddq 384(%rsp),%ymm5,%ymm5537vpaddq 416(%rsp),%ymm6,%ymm6538vpaddq 448(%rsp),%ymm7,%ymm7539vpaddq 480(%rsp),%ymm8,%ymm8540vpaddq 512(%rsp),%ymm9,%ymm9541vpaddq 544(%rsp),%ymm10,%ymm10542vpaddq 576(%rsp),%ymm11,%ymm11543vpaddq 608(%rsp),%ymm12,%ymm12544545leaq 640(%rsp),%rsp546547548549vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm13550vmovmskpd %ymm13,%r14d551vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm13552vmovmskpd %ymm13,%r13d553shlb $4,%r13b554orb %r13b,%r14b555556vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm13557vmovmskpd %ymm13,%r13d558vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm13559vmovmskpd %ymm13,%r12d560shlb $4,%r12b561orb %r12b,%r13b562563vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm13564vmovmskpd %ymm13,%r12d565vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13566vmovmskpd %ymm13,%r11d567shlb $4,%r11b568orb %r11b,%r12b569570vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm13571vmovmskpd %ymm13,%r11d572vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm13573vmovmskpd %ymm13,%r10d574shlb $4,%r10b575orb %r10b,%r11b576577vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm13578vmovmskpd %ymm13,%r10d579vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm13580vmovmskpd %ymm13,%r9d581shlb $4,%r9b582orb %r9b,%r10b583584addb %r14b,%r14b585adcb %r13b,%r13b586adcb %r12b,%r12b587adcb %r11b,%r11b588adcb %r10b,%r10b589590591vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm13592vmovmskpd %ymm13,%r9d593vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm13594vmovmskpd %ymm13,%r8d595shlb $4,%r8b596orb %r8b,%r9b597598vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm13599vmovmskpd %ymm13,%r8d600vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm13601vmovmskpd %ymm13,%edx602shlb $4,%dl603orb %dl,%r8b604605vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm13606vmovmskpd %ymm13,%edx607vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13608vmovmskpd %ymm13,%ecx609shlb $4,%cl610orb %cl,%dl611612vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm13613vmovmskpd %ymm13,%ecx614vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm13615vmovmskpd %ymm13,%ebx616shlb $4,%bl617orb %bl,%cl618619vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm13620vmovmskpd %ymm13,%ebx621vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm13622vmovmskpd %ymm13,%eax623shlb $4,%al624orb %al,%bl625626addb %r9b,%r14b627adcb %r8b,%r13b628adcb %dl,%r12b629adcb %cl,%r11b630adcb %bl,%r10b631632xorb %r9b,%r14b633xorb %r8b,%r13b634xorb %dl,%r12b635xorb %cl,%r11b636xorb %bl,%r10b637638pushq %r9639pushq %r8640641leaq .Lkmasklut(%rip),%r8642643movb %r14b,%r9b644andq $0xf,%r14645vpsubq .Lmask52x4(%rip),%ymm3,%ymm13646shlq $5,%r14647vmovapd (%r8,%r14,1),%ymm14648vblendvpd %ymm14,%ymm13,%ymm3,%ymm3649650shrb $4,%r9b651andq $0xf,%r9652vpsubq .Lmask52x4(%rip),%ymm4,%ymm13653shlq $5,%r9654vmovapd (%r8,%r9,1),%ymm14655vblendvpd %ymm14,%ymm13,%ymm4,%ymm4656657movb %r13b,%r9b658andq $0xf,%r13659vpsubq .Lmask52x4(%rip),%ymm5,%ymm13660shlq $5,%r13661vmovapd (%r8,%r13,1),%ymm14662vblendvpd %ymm14,%ymm13,%ymm5,%ymm5663664shrb $4,%r9b665andq $0xf,%r9666vpsubq .Lmask52x4(%rip),%ymm6,%ymm13667shlq $5,%r9668vmovapd (%r8,%r9,1),%ymm14669vblendvpd %ymm14,%ymm13,%ymm6,%ymm6670671movb %r12b,%r9b672andq $0xf,%r12673vpsubq .Lmask52x4(%rip),%ymm7,%ymm13674shlq $5,%r12675vmovapd (%r8,%r12,1),%ymm14676vblendvpd %ymm14,%ymm13,%ymm7,%ymm7677678shrb $4,%r9b679andq $0xf,%r9680vpsubq .Lmask52x4(%rip),%ymm8,%ymm13681shlq $5,%r9682vmovapd (%r8,%r9,1),%ymm14683vblendvpd %ymm14,%ymm13,%ymm8,%ymm8684685movb %r11b,%r9b686andq $0xf,%r11687vpsubq .Lmask52x4(%rip),%ymm9,%ymm13688shlq $5,%r11689vmovapd (%r8,%r11,1),%ymm14690vblendvpd %ymm14,%ymm13,%ymm9,%ymm9691692shrb $4,%r9b693andq $0xf,%r9694vpsubq .Lmask52x4(%rip),%ymm10,%ymm13695shlq $5,%r9696vmovapd (%r8,%r9,1),%ymm14697vblendvpd %ymm14,%ymm13,%ymm10,%ymm10698699movb %r10b,%r9b700andq $0xf,%r10701vpsubq .Lmask52x4(%rip),%ymm11,%ymm13702shlq $5,%r10703vmovapd (%r8,%r10,1),%ymm14704vblendvpd %ymm14,%ymm13,%ymm11,%ymm11705706shrb $4,%r9b707andq $0xf,%r9708vpsubq .Lmask52x4(%rip),%ymm12,%ymm13709shlq $5,%r9710vmovapd (%r8,%r9,1),%ymm14711vblendvpd %ymm14,%ymm13,%ymm12,%ymm12712713popq %r8714popq %r9715716vpand .Lmask52x4(%rip),%ymm3,%ymm3717vpand .Lmask52x4(%rip),%ymm4,%ymm4718vpand .Lmask52x4(%rip),%ymm5,%ymm5719vpand .Lmask52x4(%rip),%ymm6,%ymm6720vpand .Lmask52x4(%rip),%ymm7,%ymm7721vpand .Lmask52x4(%rip),%ymm8,%ymm8722vpand .Lmask52x4(%rip),%ymm9,%ymm9723724vpand .Lmask52x4(%rip),%ymm10,%ymm10725vpand .Lmask52x4(%rip),%ymm11,%ymm11726vpand .Lmask52x4(%rip),%ymm12,%ymm12727728vmovdqu %ymm3,0(%rdi)729vmovdqu %ymm4,32(%rdi)730vmovdqu %ymm5,64(%rdi)731vmovdqu %ymm6,96(%rdi)732vmovdqu %ymm7,128(%rdi)733vmovdqu %ymm8,160(%rdi)734vmovdqu %ymm9,192(%rdi)735vmovdqu %ymm10,224(%rdi)736vmovdqu %ymm11,256(%rdi)737vmovdqu %ymm12,288(%rdi)738739vzeroupper740leaq (%rsp),%rax741.cfi_def_cfa_register %rax742movq 0(%rax),%r15743.cfi_restore %r15744movq 8(%rax),%r14745.cfi_restore %r14746movq 16(%rax),%r13747.cfi_restore %r13748movq 24(%rax),%r12749.cfi_restore %r12750movq 32(%rax),%rbp751.cfi_restore %rbp752movq 40(%rax),%rbx753.cfi_restore %rbx754leaq 48(%rax),%rsp755.cfi_def_cfa %rsp,8756.Lossl_rsaz_amm52x40_x1_avxifma256_epilogue:757758.byte 0xf3,0xc3759.cfi_endproc760.size ossl_rsaz_amm52x40_x1_avxifma256, .-ossl_rsaz_amm52x40_x1_avxifma256761.section .rodata762.align 32763.Lmask52x4:764.quad 0xfffffffffffff765.quad 0xfffffffffffff766.quad 0xfffffffffffff767.quad 0xfffffffffffff768.Lhigh64x3:769.quad 0x0770.quad 0xffffffffffffffff771.quad 0xffffffffffffffff772.quad 0xffffffffffffffff773.Lkmasklut:774775.quad 0x0776.quad 0x0777.quad 0x0778.quad 0x0779780.quad 0xffffffffffffffff781.quad 0x0782.quad 0x0783.quad 0x0784785.quad 0x0786.quad 0xffffffffffffffff787.quad 0x0788.quad 0x0789790.quad 0xffffffffffffffff791.quad 0xffffffffffffffff792.quad 0x0793.quad 0x0794795.quad 0x0796.quad 0x0797.quad 0xffffffffffffffff798.quad 0x0799800.quad 0xffffffffffffffff801.quad 0x0802.quad 0xffffffffffffffff803.quad 0x0804805.quad 0x0806.quad 0xffffffffffffffff807.quad 0xffffffffffffffff808.quad 0x0809810.quad 0xffffffffffffffff811.quad 0xffffffffffffffff812.quad 0xffffffffffffffff813.quad 0x0814815.quad 0x0816.quad 0x0817.quad 0x0818.quad 0xffffffffffffffff819820.quad 0xffffffffffffffff821.quad 0x0822.quad 0x0823.quad 0xffffffffffffffff824825.quad 0x0826.quad 0xffffffffffffffff827.quad 0x0828.quad 0xffffffffffffffff829830.quad 0xffffffffffffffff831.quad 0xffffffffffffffff832.quad 0x0833.quad 0xffffffffffffffff834835.quad 0x0836.quad 0x0837.quad 0xffffffffffffffff838.quad 0xffffffffffffffff839840.quad 0xffffffffffffffff841.quad 0x0842.quad 0xffffffffffffffff843.quad 0xffffffffffffffff844845.quad 0x0846.quad 0xffffffffffffffff847.quad 0xffffffffffffffff848.quad 0xffffffffffffffff849850.quad 0xffffffffffffffff851.quad 0xffffffffffffffff852.quad 0xffffffffffffffff853.quad 0xffffffffffffffff854.text855856.globl ossl_rsaz_amm52x40_x2_avxifma256857.type ossl_rsaz_amm52x40_x2_avxifma256,@function858.align 32859ossl_rsaz_amm52x40_x2_avxifma256:860.cfi_startproc861.byte 243,15,30,250862pushq %rbx863.cfi_adjust_cfa_offset 8864.cfi_offset %rbx,-16865pushq %rbp866.cfi_adjust_cfa_offset 8867.cfi_offset %rbp,-24868pushq %r12869.cfi_adjust_cfa_offset 8870.cfi_offset %r12,-32871pushq %r13872.cfi_adjust_cfa_offset 8873.cfi_offset %r13,-40874pushq %r14875.cfi_adjust_cfa_offset 8876.cfi_offset %r14,-48877pushq %r15878.cfi_adjust_cfa_offset 8879.cfi_offset %r15,-56880881vpxor %ymm0,%ymm0,%ymm0882vmovapd %ymm0,%ymm3883vmovapd %ymm0,%ymm4884vmovapd %ymm0,%ymm5885vmovapd %ymm0,%ymm6886vmovapd %ymm0,%ymm7887vmovapd %ymm0,%ymm8888vmovapd %ymm0,%ymm9889vmovapd %ymm0,%ymm10890vmovapd %ymm0,%ymm11891vmovapd %ymm0,%ymm12892893xorl %r9d,%r9d894895movq %rdx,%r11896movq $0xfffffffffffff,%rax897898movl $40,%ebx899900.align 32901.Lloop40:902movq 0(%r11),%r13903904vpbroadcastq 0(%r11),%ymm1905movq 0(%rsi),%rdx906mulxq %r13,%r13,%r12907addq %r13,%r9908movq %r12,%r10909adcq $0,%r10910911movq (%r8),%r13912imulq %r9,%r13913andq %rax,%r13914915vmovq %r13,%xmm2916vpbroadcastq %xmm2,%ymm2917movq 0(%rcx),%rdx918mulxq %r13,%r13,%r12919addq %r13,%r9920adcq %r12,%r10921922shrq $52,%r9923salq $12,%r10924orq %r10,%r9925926leaq -328(%rsp),%rsp927928{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3929{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4930{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5931{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6932{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7933{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8934{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9935{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10936{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm11937{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm12938939{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3940{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4941{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5942{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6943{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7944{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8945{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9946{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10947{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm11948{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm12949vmovdqu %ymm3,0(%rsp)950vmovdqu %ymm4,32(%rsp)951vmovdqu %ymm5,64(%rsp)952vmovdqu %ymm6,96(%rsp)953vmovdqu %ymm7,128(%rsp)954vmovdqu %ymm8,160(%rsp)955vmovdqu %ymm9,192(%rsp)956vmovdqu %ymm10,224(%rsp)957vmovdqu %ymm11,256(%rsp)958vmovdqu %ymm12,288(%rsp)959movq $0,320(%rsp)960961vmovdqu 8(%rsp),%ymm3962vmovdqu 40(%rsp),%ymm4963vmovdqu 72(%rsp),%ymm5964vmovdqu 104(%rsp),%ymm6965vmovdqu 136(%rsp),%ymm7966vmovdqu 168(%rsp),%ymm8967vmovdqu 200(%rsp),%ymm9968vmovdqu 232(%rsp),%ymm10969vmovdqu 264(%rsp),%ymm11970vmovdqu 296(%rsp),%ymm12971972addq 8(%rsp),%r9973974{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3975{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4976{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5977{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6978{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7979{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8980{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9981{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10982{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm11983{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm12984985{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3986{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4987{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5988{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6989{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7990{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8991{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9992{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10993{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm11994{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm12995leaq 328(%rsp),%rsp996leaq 8(%r11),%r11997decl %ebx998jne .Lloop409991000pushq %r111001pushq %rsi1002pushq %rcx1003pushq %r810041005vmovq %r9,%xmm01006vpbroadcastq %xmm0,%ymm01007vpblendd $3,%ymm0,%ymm3,%ymm310081009leaq -640(%rsp),%rsp1010vmovupd %ymm3,0(%rsp)1011vmovupd %ymm4,32(%rsp)1012vmovupd %ymm5,64(%rsp)1013vmovupd %ymm6,96(%rsp)1014vmovupd %ymm7,128(%rsp)1015vmovupd %ymm8,160(%rsp)1016vmovupd %ymm9,192(%rsp)1017vmovupd %ymm10,224(%rsp)1018vmovupd %ymm11,256(%rsp)1019vmovupd %ymm12,288(%rsp)1020102110221023vpsrlq $52,%ymm3,%ymm31024vpsrlq $52,%ymm4,%ymm41025vpsrlq $52,%ymm5,%ymm51026vpsrlq $52,%ymm6,%ymm61027vpsrlq $52,%ymm7,%ymm71028vpsrlq $52,%ymm8,%ymm81029vpsrlq $52,%ymm9,%ymm91030vpsrlq $52,%ymm10,%ymm101031vpsrlq $52,%ymm11,%ymm111032vpsrlq $52,%ymm12,%ymm12103310341035vpermq $144,%ymm12,%ymm121036vpermq $3,%ymm11,%ymm131037vblendpd $1,%ymm13,%ymm12,%ymm1210381039vpermq $144,%ymm11,%ymm111040vpermq $3,%ymm10,%ymm131041vblendpd $1,%ymm13,%ymm11,%ymm1110421043vpermq $144,%ymm10,%ymm101044vpermq $3,%ymm9,%ymm131045vblendpd $1,%ymm13,%ymm10,%ymm1010461047vpermq $144,%ymm9,%ymm91048vpermq $3,%ymm8,%ymm131049vblendpd $1,%ymm13,%ymm9,%ymm910501051vpermq $144,%ymm8,%ymm81052vpermq $3,%ymm7,%ymm131053vblendpd $1,%ymm13,%ymm8,%ymm810541055vpermq $144,%ymm7,%ymm71056vpermq $3,%ymm6,%ymm131057vblendpd $1,%ymm13,%ymm7,%ymm710581059vpermq $144,%ymm6,%ymm61060vpermq $3,%ymm5,%ymm131061vblendpd $1,%ymm13,%ymm6,%ymm610621063vpermq $144,%ymm5,%ymm51064vpermq $3,%ymm4,%ymm131065vblendpd $1,%ymm13,%ymm5,%ymm510661067vpermq $144,%ymm4,%ymm41068vpermq $3,%ymm3,%ymm131069vblendpd $1,%ymm13,%ymm4,%ymm410701071vpermq $144,%ymm3,%ymm31072vpand .Lhigh64x3(%rip),%ymm3,%ymm310731074vmovupd %ymm3,320(%rsp)1075vmovupd %ymm4,352(%rsp)1076vmovupd %ymm5,384(%rsp)1077vmovupd %ymm6,416(%rsp)1078vmovupd %ymm7,448(%rsp)1079vmovupd %ymm8,480(%rsp)1080vmovupd %ymm9,512(%rsp)1081vmovupd %ymm10,544(%rsp)1082vmovupd %ymm11,576(%rsp)1083vmovupd %ymm12,608(%rsp)10841085vmovupd 0(%rsp),%ymm31086vmovupd 32(%rsp),%ymm41087vmovupd 64(%rsp),%ymm51088vmovupd 96(%rsp),%ymm61089vmovupd 128(%rsp),%ymm71090vmovupd 160(%rsp),%ymm81091vmovupd 192(%rsp),%ymm91092vmovupd 224(%rsp),%ymm101093vmovupd 256(%rsp),%ymm111094vmovupd 288(%rsp),%ymm12109510961097vpand .Lmask52x4(%rip),%ymm3,%ymm31098vpand .Lmask52x4(%rip),%ymm4,%ymm41099vpand .Lmask52x4(%rip),%ymm5,%ymm51100vpand .Lmask52x4(%rip),%ymm6,%ymm61101vpand .Lmask52x4(%rip),%ymm7,%ymm71102vpand .Lmask52x4(%rip),%ymm8,%ymm81103vpand .Lmask52x4(%rip),%ymm9,%ymm91104vpand .Lmask52x4(%rip),%ymm10,%ymm101105vpand .Lmask52x4(%rip),%ymm11,%ymm111106vpand .Lmask52x4(%rip),%ymm12,%ymm12110711081109vpaddq 320(%rsp),%ymm3,%ymm31110vpaddq 352(%rsp),%ymm4,%ymm41111vpaddq 384(%rsp),%ymm5,%ymm51112vpaddq 416(%rsp),%ymm6,%ymm61113vpaddq 448(%rsp),%ymm7,%ymm71114vpaddq 480(%rsp),%ymm8,%ymm81115vpaddq 512(%rsp),%ymm9,%ymm91116vpaddq 544(%rsp),%ymm10,%ymm101117vpaddq 576(%rsp),%ymm11,%ymm111118vpaddq 608(%rsp),%ymm12,%ymm1211191120leaq 640(%rsp),%rsp1121112211231124vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm131125vmovmskpd %ymm13,%r14d1126vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm131127vmovmskpd %ymm13,%r13d1128shlb $4,%r13b1129orb %r13b,%r14b11301131vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm131132vmovmskpd %ymm13,%r13d1133vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm131134vmovmskpd %ymm13,%r12d1135shlb $4,%r12b1136orb %r12b,%r13b11371138vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm131139vmovmskpd %ymm13,%r12d1140vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm131141vmovmskpd %ymm13,%r11d1142shlb $4,%r11b1143orb %r11b,%r12b11441145vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm131146vmovmskpd %ymm13,%r11d1147vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm131148vmovmskpd %ymm13,%r10d1149shlb $4,%r10b1150orb %r10b,%r11b11511152vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm131153vmovmskpd %ymm13,%r10d1154vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm131155vmovmskpd %ymm13,%r9d1156shlb $4,%r9b1157orb %r9b,%r10b11581159addb %r14b,%r14b1160adcb %r13b,%r13b1161adcb %r12b,%r12b1162adcb %r11b,%r11b1163adcb %r10b,%r10b116411651166vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm131167vmovmskpd %ymm13,%r9d1168vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm131169vmovmskpd %ymm13,%r8d1170shlb $4,%r8b1171orb %r8b,%r9b11721173vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm131174vmovmskpd %ymm13,%r8d1175vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm131176vmovmskpd %ymm13,%edx1177shlb $4,%dl1178orb %dl,%r8b11791180vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm131181vmovmskpd %ymm13,%edx1182vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm131183vmovmskpd %ymm13,%ecx1184shlb $4,%cl1185orb %cl,%dl11861187vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm131188vmovmskpd %ymm13,%ecx1189vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm131190vmovmskpd %ymm13,%ebx1191shlb $4,%bl1192orb %bl,%cl11931194vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm131195vmovmskpd %ymm13,%ebx1196vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm131197vmovmskpd %ymm13,%eax1198shlb $4,%al1199orb %al,%bl12001201addb %r9b,%r14b1202adcb %r8b,%r13b1203adcb %dl,%r12b1204adcb %cl,%r11b1205adcb %bl,%r10b12061207xorb %r9b,%r14b1208xorb %r8b,%r13b1209xorb %dl,%r12b1210xorb %cl,%r11b1211xorb %bl,%r10b12121213pushq %r91214pushq %r812151216leaq .Lkmasklut(%rip),%r812171218movb %r14b,%r9b1219andq $0xf,%r141220vpsubq .Lmask52x4(%rip),%ymm3,%ymm131221shlq $5,%r141222vmovapd (%r8,%r14,1),%ymm141223vblendvpd %ymm14,%ymm13,%ymm3,%ymm312241225shrb $4,%r9b1226andq $0xf,%r91227vpsubq .Lmask52x4(%rip),%ymm4,%ymm131228shlq $5,%r91229vmovapd (%r8,%r9,1),%ymm141230vblendvpd %ymm14,%ymm13,%ymm4,%ymm412311232movb %r13b,%r9b1233andq $0xf,%r131234vpsubq .Lmask52x4(%rip),%ymm5,%ymm131235shlq $5,%r131236vmovapd (%r8,%r13,1),%ymm141237vblendvpd %ymm14,%ymm13,%ymm5,%ymm512381239shrb $4,%r9b1240andq $0xf,%r91241vpsubq .Lmask52x4(%rip),%ymm6,%ymm131242shlq $5,%r91243vmovapd (%r8,%r9,1),%ymm141244vblendvpd %ymm14,%ymm13,%ymm6,%ymm612451246movb %r12b,%r9b1247andq $0xf,%r121248vpsubq .Lmask52x4(%rip),%ymm7,%ymm131249shlq $5,%r121250vmovapd (%r8,%r12,1),%ymm141251vblendvpd %ymm14,%ymm13,%ymm7,%ymm712521253shrb $4,%r9b1254andq $0xf,%r91255vpsubq .Lmask52x4(%rip),%ymm8,%ymm131256shlq $5,%r91257vmovapd (%r8,%r9,1),%ymm141258vblendvpd %ymm14,%ymm13,%ymm8,%ymm812591260movb %r11b,%r9b1261andq $0xf,%r111262vpsubq .Lmask52x4(%rip),%ymm9,%ymm131263shlq $5,%r111264vmovapd (%r8,%r11,1),%ymm141265vblendvpd %ymm14,%ymm13,%ymm9,%ymm912661267shrb $4,%r9b1268andq $0xf,%r91269vpsubq .Lmask52x4(%rip),%ymm10,%ymm131270shlq $5,%r91271vmovapd (%r8,%r9,1),%ymm141272vblendvpd %ymm14,%ymm13,%ymm10,%ymm1012731274movb %r10b,%r9b1275andq $0xf,%r101276vpsubq .Lmask52x4(%rip),%ymm11,%ymm131277shlq $5,%r101278vmovapd (%r8,%r10,1),%ymm141279vblendvpd %ymm14,%ymm13,%ymm11,%ymm1112801281shrb $4,%r9b1282andq $0xf,%r91283vpsubq .Lmask52x4(%rip),%ymm12,%ymm131284shlq $5,%r91285vmovapd (%r8,%r9,1),%ymm141286vblendvpd %ymm14,%ymm13,%ymm12,%ymm1212871288popq %r81289popq %r912901291vpand .Lmask52x4(%rip),%ymm3,%ymm31292vpand .Lmask52x4(%rip),%ymm4,%ymm41293vpand .Lmask52x4(%rip),%ymm5,%ymm51294vpand .Lmask52x4(%rip),%ymm6,%ymm61295vpand .Lmask52x4(%rip),%ymm7,%ymm71296vpand .Lmask52x4(%rip),%ymm8,%ymm81297vpand .Lmask52x4(%rip),%ymm9,%ymm912981299vpand .Lmask52x4(%rip),%ymm10,%ymm101300vpand .Lmask52x4(%rip),%ymm11,%ymm111301vpand .Lmask52x4(%rip),%ymm12,%ymm1213021303popq %r81304popq %rcx1305popq %rsi1306popq %r1113071308vmovdqu %ymm3,0(%rdi)1309vmovdqu %ymm4,32(%rdi)1310vmovdqu %ymm5,64(%rdi)1311vmovdqu %ymm6,96(%rdi)1312vmovdqu %ymm7,128(%rdi)1313vmovdqu %ymm8,160(%rdi)1314vmovdqu %ymm9,192(%rdi)1315vmovdqu %ymm10,224(%rdi)1316vmovdqu %ymm11,256(%rdi)1317vmovdqu %ymm12,288(%rdi)13181319xorl %r15d,%r15d13201321movq $0xfffffffffffff,%rax13221323movl $40,%ebx13241325vpxor %ymm0,%ymm0,%ymm01326vmovapd %ymm0,%ymm31327vmovapd %ymm0,%ymm41328vmovapd %ymm0,%ymm51329vmovapd %ymm0,%ymm61330vmovapd %ymm0,%ymm71331vmovapd %ymm0,%ymm81332vmovapd %ymm0,%ymm91333vmovapd %ymm0,%ymm101334vmovapd %ymm0,%ymm111335vmovapd %ymm0,%ymm121336.align 321337.Lloop40_1:1338movq 0(%r11),%r1313391340vpbroadcastq 0(%r11),%ymm11341movq 320(%rsi),%rdx1342mulxq %r13,%r13,%r121343addq %r13,%r91344movq %r12,%r101345adcq $0,%r1013461347movq 8(%r8),%r131348imulq %r9,%r131349andq %rax,%r1313501351vmovq %r13,%xmm21352vpbroadcastq %xmm2,%ymm21353movq 320(%rcx),%rdx1354mulxq %r13,%r13,%r121355addq %r13,%r91356adcq %r12,%r1013571358shrq $52,%r91359salq $12,%r101360orq %r10,%r913611362leaq -328(%rsp),%rsp13631364{vex} vpmadd52luq 320(%rsi),%ymm1,%ymm31365{vex} vpmadd52luq 352(%rsi),%ymm1,%ymm41366{vex} vpmadd52luq 384(%rsi),%ymm1,%ymm51367{vex} vpmadd52luq 416(%rsi),%ymm1,%ymm61368{vex} vpmadd52luq 448(%rsi),%ymm1,%ymm71369{vex} vpmadd52luq 480(%rsi),%ymm1,%ymm81370{vex} vpmadd52luq 512(%rsi),%ymm1,%ymm91371{vex} vpmadd52luq 544(%rsi),%ymm1,%ymm101372{vex} vpmadd52luq 576(%rsi),%ymm1,%ymm111373{vex} vpmadd52luq 608(%rsi),%ymm1,%ymm1213741375{vex} vpmadd52luq 320(%rcx),%ymm2,%ymm31376{vex} vpmadd52luq 352(%rcx),%ymm2,%ymm41377{vex} vpmadd52luq 384(%rcx),%ymm2,%ymm51378{vex} vpmadd52luq 416(%rcx),%ymm2,%ymm61379{vex} vpmadd52luq 448(%rcx),%ymm2,%ymm71380{vex} vpmadd52luq 480(%rcx),%ymm2,%ymm81381{vex} vpmadd52luq 512(%rcx),%ymm2,%ymm91382{vex} vpmadd52luq 544(%rcx),%ymm2,%ymm101383{vex} vpmadd52luq 576(%rcx),%ymm2,%ymm111384{vex} vpmadd52luq 608(%rcx),%ymm2,%ymm121385vmovdqu %ymm3,0(%rsp)1386vmovdqu %ymm4,32(%rsp)1387vmovdqu %ymm5,64(%rsp)1388vmovdqu %ymm6,96(%rsp)1389vmovdqu %ymm7,128(%rsp)1390vmovdqu %ymm8,160(%rsp)1391vmovdqu %ymm9,192(%rsp)1392vmovdqu %ymm10,224(%rsp)1393vmovdqu %ymm11,256(%rsp)1394vmovdqu %ymm12,288(%rsp)1395movq $0,320(%rsp)13961397vmovdqu 8(%rsp),%ymm31398vmovdqu 40(%rsp),%ymm41399vmovdqu 72(%rsp),%ymm51400vmovdqu 104(%rsp),%ymm61401vmovdqu 136(%rsp),%ymm71402vmovdqu 168(%rsp),%ymm81403vmovdqu 200(%rsp),%ymm91404vmovdqu 232(%rsp),%ymm101405vmovdqu 264(%rsp),%ymm111406vmovdqu 296(%rsp),%ymm1214071408addq 8(%rsp),%r914091410{vex} vpmadd52huq 320(%rsi),%ymm1,%ymm31411{vex} vpmadd52huq 352(%rsi),%ymm1,%ymm41412{vex} vpmadd52huq 384(%rsi),%ymm1,%ymm51413{vex} vpmadd52huq 416(%rsi),%ymm1,%ymm61414{vex} vpmadd52huq 448(%rsi),%ymm1,%ymm71415{vex} vpmadd52huq 480(%rsi),%ymm1,%ymm81416{vex} vpmadd52huq 512(%rsi),%ymm1,%ymm91417{vex} vpmadd52huq 544(%rsi),%ymm1,%ymm101418{vex} vpmadd52huq 576(%rsi),%ymm1,%ymm111419{vex} vpmadd52huq 608(%rsi),%ymm1,%ymm1214201421{vex} vpmadd52huq 320(%rcx),%ymm2,%ymm31422{vex} vpmadd52huq 352(%rcx),%ymm2,%ymm41423{vex} vpmadd52huq 384(%rcx),%ymm2,%ymm51424{vex} vpmadd52huq 416(%rcx),%ymm2,%ymm61425{vex} vpmadd52huq 448(%rcx),%ymm2,%ymm71426{vex} vpmadd52huq 480(%rcx),%ymm2,%ymm81427{vex} vpmadd52huq 512(%rcx),%ymm2,%ymm91428{vex} vpmadd52huq 544(%rcx),%ymm2,%ymm101429{vex} vpmadd52huq 576(%rcx),%ymm2,%ymm111430{vex} vpmadd52huq 608(%rcx),%ymm2,%ymm121431leaq 328(%rsp),%rsp1432leaq 8(%r11),%r111433decl %ebx1434jne .Lloop40_114351436vmovq %r9,%xmm01437vpbroadcastq %xmm0,%ymm01438vpblendd $3,%ymm0,%ymm3,%ymm314391440leaq -640(%rsp),%rsp1441vmovupd %ymm3,0(%rsp)1442vmovupd %ymm4,32(%rsp)1443vmovupd %ymm5,64(%rsp)1444vmovupd %ymm6,96(%rsp)1445vmovupd %ymm7,128(%rsp)1446vmovupd %ymm8,160(%rsp)1447vmovupd %ymm9,192(%rsp)1448vmovupd %ymm10,224(%rsp)1449vmovupd %ymm11,256(%rsp)1450vmovupd %ymm12,288(%rsp)1451145214531454vpsrlq $52,%ymm3,%ymm31455vpsrlq $52,%ymm4,%ymm41456vpsrlq $52,%ymm5,%ymm51457vpsrlq $52,%ymm6,%ymm61458vpsrlq $52,%ymm7,%ymm71459vpsrlq $52,%ymm8,%ymm81460vpsrlq $52,%ymm9,%ymm91461vpsrlq $52,%ymm10,%ymm101462vpsrlq $52,%ymm11,%ymm111463vpsrlq $52,%ymm12,%ymm12146414651466vpermq $144,%ymm12,%ymm121467vpermq $3,%ymm11,%ymm131468vblendpd $1,%ymm13,%ymm12,%ymm1214691470vpermq $144,%ymm11,%ymm111471vpermq $3,%ymm10,%ymm131472vblendpd $1,%ymm13,%ymm11,%ymm1114731474vpermq $144,%ymm10,%ymm101475vpermq $3,%ymm9,%ymm131476vblendpd $1,%ymm13,%ymm10,%ymm1014771478vpermq $144,%ymm9,%ymm91479vpermq $3,%ymm8,%ymm131480vblendpd $1,%ymm13,%ymm9,%ymm914811482vpermq $144,%ymm8,%ymm81483vpermq $3,%ymm7,%ymm131484vblendpd $1,%ymm13,%ymm8,%ymm814851486vpermq $144,%ymm7,%ymm71487vpermq $3,%ymm6,%ymm131488vblendpd $1,%ymm13,%ymm7,%ymm714891490vpermq $144,%ymm6,%ymm61491vpermq $3,%ymm5,%ymm131492vblendpd $1,%ymm13,%ymm6,%ymm614931494vpermq $144,%ymm5,%ymm51495vpermq $3,%ymm4,%ymm131496vblendpd $1,%ymm13,%ymm5,%ymm514971498vpermq $144,%ymm4,%ymm41499vpermq $3,%ymm3,%ymm131500vblendpd $1,%ymm13,%ymm4,%ymm415011502vpermq $144,%ymm3,%ymm31503vpand .Lhigh64x3(%rip),%ymm3,%ymm315041505vmovupd %ymm3,320(%rsp)1506vmovupd %ymm4,352(%rsp)1507vmovupd %ymm5,384(%rsp)1508vmovupd %ymm6,416(%rsp)1509vmovupd %ymm7,448(%rsp)1510vmovupd %ymm8,480(%rsp)1511vmovupd %ymm9,512(%rsp)1512vmovupd %ymm10,544(%rsp)1513vmovupd %ymm11,576(%rsp)1514vmovupd %ymm12,608(%rsp)15151516vmovupd 0(%rsp),%ymm31517vmovupd 32(%rsp),%ymm41518vmovupd 64(%rsp),%ymm51519vmovupd 96(%rsp),%ymm61520vmovupd 128(%rsp),%ymm71521vmovupd 160(%rsp),%ymm81522vmovupd 192(%rsp),%ymm91523vmovupd 224(%rsp),%ymm101524vmovupd 256(%rsp),%ymm111525vmovupd 288(%rsp),%ymm12152615271528vpand .Lmask52x4(%rip),%ymm3,%ymm31529vpand .Lmask52x4(%rip),%ymm4,%ymm41530vpand .Lmask52x4(%rip),%ymm5,%ymm51531vpand .Lmask52x4(%rip),%ymm6,%ymm61532vpand .Lmask52x4(%rip),%ymm7,%ymm71533vpand .Lmask52x4(%rip),%ymm8,%ymm81534vpand .Lmask52x4(%rip),%ymm9,%ymm91535vpand .Lmask52x4(%rip),%ymm10,%ymm101536vpand .Lmask52x4(%rip),%ymm11,%ymm111537vpand .Lmask52x4(%rip),%ymm12,%ymm12153815391540vpaddq 320(%rsp),%ymm3,%ymm31541vpaddq 352(%rsp),%ymm4,%ymm41542vpaddq 384(%rsp),%ymm5,%ymm51543vpaddq 416(%rsp),%ymm6,%ymm61544vpaddq 448(%rsp),%ymm7,%ymm71545vpaddq 480(%rsp),%ymm8,%ymm81546vpaddq 512(%rsp),%ymm9,%ymm91547vpaddq 544(%rsp),%ymm10,%ymm101548vpaddq 576(%rsp),%ymm11,%ymm111549vpaddq 608(%rsp),%ymm12,%ymm1215501551leaq 640(%rsp),%rsp1552155315541555vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm131556vmovmskpd %ymm13,%r14d1557vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm131558vmovmskpd %ymm13,%r13d1559shlb $4,%r13b1560orb %r13b,%r14b15611562vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm131563vmovmskpd %ymm13,%r13d1564vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm131565vmovmskpd %ymm13,%r12d1566shlb $4,%r12b1567orb %r12b,%r13b15681569vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm131570vmovmskpd %ymm13,%r12d1571vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm131572vmovmskpd %ymm13,%r11d1573shlb $4,%r11b1574orb %r11b,%r12b15751576vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm131577vmovmskpd %ymm13,%r11d1578vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm131579vmovmskpd %ymm13,%r10d1580shlb $4,%r10b1581orb %r10b,%r11b15821583vpcmpgtq .Lmask52x4(%rip),%ymm11,%ymm131584vmovmskpd %ymm13,%r10d1585vpcmpgtq .Lmask52x4(%rip),%ymm12,%ymm131586vmovmskpd %ymm13,%r9d1587shlb $4,%r9b1588orb %r9b,%r10b15891590addb %r14b,%r14b1591adcb %r13b,%r13b1592adcb %r12b,%r12b1593adcb %r11b,%r11b1594adcb %r10b,%r10b159515961597vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm131598vmovmskpd %ymm13,%r9d1599vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm131600vmovmskpd %ymm13,%r8d1601shlb $4,%r8b1602orb %r8b,%r9b16031604vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm131605vmovmskpd %ymm13,%r8d1606vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm131607vmovmskpd %ymm13,%edx1608shlb $4,%dl1609orb %dl,%r8b16101611vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm131612vmovmskpd %ymm13,%edx1613vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm131614vmovmskpd %ymm13,%ecx1615shlb $4,%cl1616orb %cl,%dl16171618vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm131619vmovmskpd %ymm13,%ecx1620vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm131621vmovmskpd %ymm13,%ebx1622shlb $4,%bl1623orb %bl,%cl16241625vpcmpeqq .Lmask52x4(%rip),%ymm11,%ymm131626vmovmskpd %ymm13,%ebx1627vpcmpeqq .Lmask52x4(%rip),%ymm12,%ymm131628vmovmskpd %ymm13,%eax1629shlb $4,%al1630orb %al,%bl16311632addb %r9b,%r14b1633adcb %r8b,%r13b1634adcb %dl,%r12b1635adcb %cl,%r11b1636adcb %bl,%r10b16371638xorb %r9b,%r14b1639xorb %r8b,%r13b1640xorb %dl,%r12b1641xorb %cl,%r11b1642xorb %bl,%r10b16431644pushq %r91645pushq %r816461647leaq .Lkmasklut(%rip),%r816481649movb %r14b,%r9b1650andq $0xf,%r141651vpsubq .Lmask52x4(%rip),%ymm3,%ymm131652shlq $5,%r141653vmovapd (%r8,%r14,1),%ymm141654vblendvpd %ymm14,%ymm13,%ymm3,%ymm316551656shrb $4,%r9b1657andq $0xf,%r91658vpsubq .Lmask52x4(%rip),%ymm4,%ymm131659shlq $5,%r91660vmovapd (%r8,%r9,1),%ymm141661vblendvpd %ymm14,%ymm13,%ymm4,%ymm416621663movb %r13b,%r9b1664andq $0xf,%r131665vpsubq .Lmask52x4(%rip),%ymm5,%ymm131666shlq $5,%r131667vmovapd (%r8,%r13,1),%ymm141668vblendvpd %ymm14,%ymm13,%ymm5,%ymm516691670shrb $4,%r9b1671andq $0xf,%r91672vpsubq .Lmask52x4(%rip),%ymm6,%ymm131673shlq $5,%r91674vmovapd (%r8,%r9,1),%ymm141675vblendvpd %ymm14,%ymm13,%ymm6,%ymm616761677movb %r12b,%r9b1678andq $0xf,%r121679vpsubq .Lmask52x4(%rip),%ymm7,%ymm131680shlq $5,%r121681vmovapd (%r8,%r12,1),%ymm141682vblendvpd %ymm14,%ymm13,%ymm7,%ymm716831684shrb $4,%r9b1685andq $0xf,%r91686vpsubq .Lmask52x4(%rip),%ymm8,%ymm131687shlq $5,%r91688vmovapd (%r8,%r9,1),%ymm141689vblendvpd %ymm14,%ymm13,%ymm8,%ymm816901691movb %r11b,%r9b1692andq $0xf,%r111693vpsubq .Lmask52x4(%rip),%ymm9,%ymm131694shlq $5,%r111695vmovapd (%r8,%r11,1),%ymm141696vblendvpd %ymm14,%ymm13,%ymm9,%ymm916971698shrb $4,%r9b1699andq $0xf,%r91700vpsubq .Lmask52x4(%rip),%ymm10,%ymm131701shlq $5,%r91702vmovapd (%r8,%r9,1),%ymm141703vblendvpd %ymm14,%ymm13,%ymm10,%ymm1017041705movb %r10b,%r9b1706andq $0xf,%r101707vpsubq .Lmask52x4(%rip),%ymm11,%ymm131708shlq $5,%r101709vmovapd (%r8,%r10,1),%ymm141710vblendvpd %ymm14,%ymm13,%ymm11,%ymm1117111712shrb $4,%r9b1713andq $0xf,%r91714vpsubq .Lmask52x4(%rip),%ymm12,%ymm131715shlq $5,%r91716vmovapd (%r8,%r9,1),%ymm141717vblendvpd %ymm14,%ymm13,%ymm12,%ymm1217181719popq %r81720popq %r917211722vpand .Lmask52x4(%rip),%ymm3,%ymm31723vpand .Lmask52x4(%rip),%ymm4,%ymm41724vpand .Lmask52x4(%rip),%ymm5,%ymm51725vpand .Lmask52x4(%rip),%ymm6,%ymm61726vpand .Lmask52x4(%rip),%ymm7,%ymm71727vpand .Lmask52x4(%rip),%ymm8,%ymm81728vpand .Lmask52x4(%rip),%ymm9,%ymm917291730vpand .Lmask52x4(%rip),%ymm10,%ymm101731vpand .Lmask52x4(%rip),%ymm11,%ymm111732vpand .Lmask52x4(%rip),%ymm12,%ymm1217331734vmovdqu %ymm3,320(%rdi)1735vmovdqu %ymm4,352(%rdi)1736vmovdqu %ymm5,384(%rdi)1737vmovdqu %ymm6,416(%rdi)1738vmovdqu %ymm7,448(%rdi)1739vmovdqu %ymm8,480(%rdi)1740vmovdqu %ymm9,512(%rdi)1741vmovdqu %ymm10,544(%rdi)1742vmovdqu %ymm11,576(%rdi)1743vmovdqu %ymm12,608(%rdi)17441745vzeroupper1746leaq (%rsp),%rax1747.cfi_def_cfa_register %rax1748movq 0(%rax),%r151749.cfi_restore %r151750movq 8(%rax),%r141751.cfi_restore %r141752movq 16(%rax),%r131753.cfi_restore %r131754movq 24(%rax),%r121755.cfi_restore %r121756movq 32(%rax),%rbp1757.cfi_restore %rbp1758movq 40(%rax),%rbx1759.cfi_restore %rbx1760leaq 48(%rax),%rsp1761.cfi_def_cfa %rsp,81762.Lossl_rsaz_amm52x40_x2_avxifma256_epilogue:1763.byte 0xf3,0xc31764.cfi_endproc1765.size ossl_rsaz_amm52x40_x2_avxifma256, .-ossl_rsaz_amm52x40_x2_avxifma2561766.text17671768.align 321769.globl ossl_extract_multiplier_2x40_win5_avx1770.type ossl_extract_multiplier_2x40_win5_avx,@function1771ossl_extract_multiplier_2x40_win5_avx:1772.cfi_startproc1773.byte 243,15,30,2501774vmovapd .Lones(%rip),%ymm141775vmovq %rdx,%xmm101776vpbroadcastq %xmm10,%ymm121777vmovq %rcx,%xmm101778vpbroadcastq %xmm10,%ymm131779leaq 20480(%rsi),%rax178017811782movq %rsi,%r10178317841785vpxor %xmm0,%xmm0,%xmm01786vmovapd %ymm0,%ymm11787vmovapd %ymm0,%ymm21788vmovapd %ymm0,%ymm31789vmovapd %ymm0,%ymm41790vmovapd %ymm0,%ymm51791vmovapd %ymm0,%ymm61792vmovapd %ymm0,%ymm71793vmovapd %ymm0,%ymm81794vmovapd %ymm0,%ymm91795vpxor %ymm11,%ymm11,%ymm111796.align 321797.Lloop_0:1798vpcmpeqq %ymm11,%ymm12,%ymm151799vmovdqu 0(%rsi),%ymm1018001801vblendvpd %ymm15,%ymm10,%ymm0,%ymm01802vmovdqu 32(%rsi),%ymm1018031804vblendvpd %ymm15,%ymm10,%ymm1,%ymm11805vmovdqu 64(%rsi),%ymm1018061807vblendvpd %ymm15,%ymm10,%ymm2,%ymm21808vmovdqu 96(%rsi),%ymm1018091810vblendvpd %ymm15,%ymm10,%ymm3,%ymm31811vmovdqu 128(%rsi),%ymm1018121813vblendvpd %ymm15,%ymm10,%ymm4,%ymm41814vmovdqu 160(%rsi),%ymm1018151816vblendvpd %ymm15,%ymm10,%ymm5,%ymm51817vmovdqu 192(%rsi),%ymm1018181819vblendvpd %ymm15,%ymm10,%ymm6,%ymm61820vmovdqu 224(%rsi),%ymm1018211822vblendvpd %ymm15,%ymm10,%ymm7,%ymm71823vmovdqu 256(%rsi),%ymm1018241825vblendvpd %ymm15,%ymm10,%ymm8,%ymm81826vmovdqu 288(%rsi),%ymm1018271828vblendvpd %ymm15,%ymm10,%ymm9,%ymm91829vpaddq %ymm14,%ymm11,%ymm111830addq $640,%rsi1831cmpq %rsi,%rax1832jne .Lloop_01833vmovdqu %ymm0,0(%rdi)1834vmovdqu %ymm1,32(%rdi)1835vmovdqu %ymm2,64(%rdi)1836vmovdqu %ymm3,96(%rdi)1837vmovdqu %ymm4,128(%rdi)1838vmovdqu %ymm5,160(%rdi)1839vmovdqu %ymm6,192(%rdi)1840vmovdqu %ymm7,224(%rdi)1841vmovdqu %ymm8,256(%rdi)1842vmovdqu %ymm9,288(%rdi)1843movq %r10,%rsi1844vpxor %ymm11,%ymm11,%ymm111845.align 321846.Lloop_320:1847vpcmpeqq %ymm11,%ymm13,%ymm151848vmovdqu 320(%rsi),%ymm1018491850vblendvpd %ymm15,%ymm10,%ymm0,%ymm01851vmovdqu 352(%rsi),%ymm1018521853vblendvpd %ymm15,%ymm10,%ymm1,%ymm11854vmovdqu 384(%rsi),%ymm1018551856vblendvpd %ymm15,%ymm10,%ymm2,%ymm21857vmovdqu 416(%rsi),%ymm1018581859vblendvpd %ymm15,%ymm10,%ymm3,%ymm31860vmovdqu 448(%rsi),%ymm1018611862vblendvpd %ymm15,%ymm10,%ymm4,%ymm41863vmovdqu 480(%rsi),%ymm1018641865vblendvpd %ymm15,%ymm10,%ymm5,%ymm51866vmovdqu 512(%rsi),%ymm1018671868vblendvpd %ymm15,%ymm10,%ymm6,%ymm61869vmovdqu 544(%rsi),%ymm1018701871vblendvpd %ymm15,%ymm10,%ymm7,%ymm71872vmovdqu 576(%rsi),%ymm1018731874vblendvpd %ymm15,%ymm10,%ymm8,%ymm81875vmovdqu 608(%rsi),%ymm1018761877vblendvpd %ymm15,%ymm10,%ymm9,%ymm91878vpaddq %ymm14,%ymm11,%ymm111879addq $640,%rsi1880cmpq %rsi,%rax1881jne .Lloop_3201882vmovdqu %ymm0,320(%rdi)1883vmovdqu %ymm1,352(%rdi)1884vmovdqu %ymm2,384(%rdi)1885vmovdqu %ymm3,416(%rdi)1886vmovdqu %ymm4,448(%rdi)1887vmovdqu %ymm5,480(%rdi)1888vmovdqu %ymm6,512(%rdi)1889vmovdqu %ymm7,544(%rdi)1890vmovdqu %ymm8,576(%rdi)1891vmovdqu %ymm9,608(%rdi)18921893.byte 0xf3,0xc31894.cfi_endproc1895.size ossl_extract_multiplier_2x40_win5_avx, .-ossl_extract_multiplier_2x40_win5_avx1896.section .rodata1897.align 321898.Lones:1899.quad 1,1,1,11900.Lzeros:1901.quad 0,0,0,01902.section ".note.gnu.property", "a"1903.p2align 31904.long 1f - 0f1905.long 4f - 1f1906.long 519070:1908# "GNU" encoded with .byte, since .asciz isn't supported1909# on Solaris.1910.byte 0x471911.byte 0x4e1912.byte 0x551913.byte 019141:1915.p2align 31916.long 0xc00000021917.long 3f - 2f19182:1919.long 319203:1921.p2align 319224:192319241925