Path: blob/main/sys/crypto/openssl/amd64/rsaz-4k-avx512.S
39483 views
/* Do not modify. This file is auto-generated from rsaz-4k-avx512.pl. */1.text23.globl ossl_rsaz_amm52x40_x1_ifma2564.type ossl_rsaz_amm52x40_x1_ifma256,@function5.align 326ossl_rsaz_amm52x40_x1_ifma256:7.cfi_startproc8.byte 243,15,30,2509pushq %rbx10.cfi_adjust_cfa_offset 811.cfi_offset %rbx,-1612pushq %rbp13.cfi_adjust_cfa_offset 814.cfi_offset %rbp,-2415pushq %r1216.cfi_adjust_cfa_offset 817.cfi_offset %r12,-3218pushq %r1319.cfi_adjust_cfa_offset 820.cfi_offset %r13,-4021pushq %r1422.cfi_adjust_cfa_offset 823.cfi_offset %r14,-4824pushq %r1525.cfi_adjust_cfa_offset 826.cfi_offset %r15,-562728vpxord %ymm0,%ymm0,%ymm029vmovdqa64 %ymm0,%ymm330vmovdqa64 %ymm0,%ymm431vmovdqa64 %ymm0,%ymm532vmovdqa64 %ymm0,%ymm633vmovdqa64 %ymm0,%ymm734vmovdqa64 %ymm0,%ymm835vmovdqa64 %ymm0,%ymm936vmovdqa64 %ymm0,%ymm1037vmovdqa64 %ymm0,%ymm1138vmovdqa64 %ymm0,%ymm123940xorl %r9d,%r9d4142movq %rdx,%r1143movq $0xfffffffffffff,%rax444546movl $10,%ebx4748.align 3249.Lloop10:50movq 0(%r11),%r135152vpbroadcastq %r13,%ymm153movq 0(%rsi),%rdx54mulxq %r13,%r13,%r1255addq %r13,%r956movq %r12,%r1057adcq $0,%r105859movq %r8,%r1360imulq %r9,%r1361andq %rax,%r136263vpbroadcastq %r13,%ymm264movq 0(%rcx),%rdx65mulxq %r13,%r13,%r1266addq %r13,%r967adcq %r12,%r106869shrq $52,%r970salq $12,%r1071orq %r10,%r97273vpmadd52luq 0(%rsi),%ymm1,%ymm374vpmadd52luq 32(%rsi),%ymm1,%ymm475vpmadd52luq 64(%rsi),%ymm1,%ymm576vpmadd52luq 96(%rsi),%ymm1,%ymm677vpmadd52luq 128(%rsi),%ymm1,%ymm778vpmadd52luq 160(%rsi),%ymm1,%ymm879vpmadd52luq 192(%rsi),%ymm1,%ymm980vpmadd52luq 224(%rsi),%ymm1,%ymm1081vpmadd52luq 256(%rsi),%ymm1,%ymm1182vpmadd52luq 288(%rsi),%ymm1,%ymm128384vpmadd52luq 0(%rcx),%ymm2,%ymm385vpmadd52luq 32(%rcx),%ymm2,%ymm486vpmadd52luq 64(%rcx),%ymm2,%ymm587vpmadd52luq 96(%rcx),%ymm2,%ymm688vpmadd52luq 128(%rcx),%ymm2,%ymm789vpmadd52luq 160(%rcx),%ymm2,%ymm890vpmadd52luq 192(%rcx),%ymm2,%ymm991vpmadd52luq 224(%rcx),%ymm2,%ymm1092vpmadd52luq 256(%rcx),%ymm2,%ymm1193vpmadd52luq 288(%rcx),%ymm2,%ymm12949596valignq $1,%ymm3,%ymm4,%ymm397valignq $1,%ymm4,%ymm5,%ymm498valignq $1,%ymm5,%ymm6,%ymm599valignq $1,%ymm6,%ymm7,%ymm6100valignq $1,%ymm7,%ymm8,%ymm7101valignq $1,%ymm8,%ymm9,%ymm8102valignq $1,%ymm9,%ymm10,%ymm9103valignq $1,%ymm10,%ymm11,%ymm10104valignq $1,%ymm11,%ymm12,%ymm11105valignq $1,%ymm12,%ymm0,%ymm12106107vmovq %xmm3,%r13108addq %r13,%r9109110vpmadd52huq 0(%rsi),%ymm1,%ymm3111vpmadd52huq 32(%rsi),%ymm1,%ymm4112vpmadd52huq 64(%rsi),%ymm1,%ymm5113vpmadd52huq 96(%rsi),%ymm1,%ymm6114vpmadd52huq 128(%rsi),%ymm1,%ymm7115vpmadd52huq 160(%rsi),%ymm1,%ymm8116vpmadd52huq 192(%rsi),%ymm1,%ymm9117vpmadd52huq 224(%rsi),%ymm1,%ymm10118vpmadd52huq 256(%rsi),%ymm1,%ymm11119vpmadd52huq 288(%rsi),%ymm1,%ymm12120121vpmadd52huq 0(%rcx),%ymm2,%ymm3122vpmadd52huq 32(%rcx),%ymm2,%ymm4123vpmadd52huq 64(%rcx),%ymm2,%ymm5124vpmadd52huq 96(%rcx),%ymm2,%ymm6125vpmadd52huq 128(%rcx),%ymm2,%ymm7126vpmadd52huq 160(%rcx),%ymm2,%ymm8127vpmadd52huq 192(%rcx),%ymm2,%ymm9128vpmadd52huq 224(%rcx),%ymm2,%ymm10129vpmadd52huq 256(%rcx),%ymm2,%ymm11130vpmadd52huq 288(%rcx),%ymm2,%ymm12131movq 8(%r11),%r13132133vpbroadcastq %r13,%ymm1134movq 0(%rsi),%rdx135mulxq %r13,%r13,%r12136addq %r13,%r9137movq %r12,%r10138adcq $0,%r10139140movq %r8,%r13141imulq %r9,%r13142andq %rax,%r13143144vpbroadcastq %r13,%ymm2145movq 0(%rcx),%rdx146mulxq %r13,%r13,%r12147addq %r13,%r9148adcq %r12,%r10149150shrq $52,%r9151salq $12,%r10152orq %r10,%r9153154vpmadd52luq 0(%rsi),%ymm1,%ymm3155vpmadd52luq 32(%rsi),%ymm1,%ymm4156vpmadd52luq 64(%rsi),%ymm1,%ymm5157vpmadd52luq 96(%rsi),%ymm1,%ymm6158vpmadd52luq 128(%rsi),%ymm1,%ymm7159vpmadd52luq 160(%rsi),%ymm1,%ymm8160vpmadd52luq 192(%rsi),%ymm1,%ymm9161vpmadd52luq 224(%rsi),%ymm1,%ymm10162vpmadd52luq 256(%rsi),%ymm1,%ymm11163vpmadd52luq 288(%rsi),%ymm1,%ymm12164165vpmadd52luq 0(%rcx),%ymm2,%ymm3166vpmadd52luq 32(%rcx),%ymm2,%ymm4167vpmadd52luq 64(%rcx),%ymm2,%ymm5168vpmadd52luq 96(%rcx),%ymm2,%ymm6169vpmadd52luq 128(%rcx),%ymm2,%ymm7170vpmadd52luq 160(%rcx),%ymm2,%ymm8171vpmadd52luq 192(%rcx),%ymm2,%ymm9172vpmadd52luq 224(%rcx),%ymm2,%ymm10173vpmadd52luq 256(%rcx),%ymm2,%ymm11174vpmadd52luq 288(%rcx),%ymm2,%ymm12175176177valignq $1,%ymm3,%ymm4,%ymm3178valignq $1,%ymm4,%ymm5,%ymm4179valignq $1,%ymm5,%ymm6,%ymm5180valignq $1,%ymm6,%ymm7,%ymm6181valignq $1,%ymm7,%ymm8,%ymm7182valignq $1,%ymm8,%ymm9,%ymm8183valignq $1,%ymm9,%ymm10,%ymm9184valignq $1,%ymm10,%ymm11,%ymm10185valignq $1,%ymm11,%ymm12,%ymm11186valignq $1,%ymm12,%ymm0,%ymm12187188vmovq %xmm3,%r13189addq %r13,%r9190191vpmadd52huq 0(%rsi),%ymm1,%ymm3192vpmadd52huq 32(%rsi),%ymm1,%ymm4193vpmadd52huq 64(%rsi),%ymm1,%ymm5194vpmadd52huq 96(%rsi),%ymm1,%ymm6195vpmadd52huq 128(%rsi),%ymm1,%ymm7196vpmadd52huq 160(%rsi),%ymm1,%ymm8197vpmadd52huq 192(%rsi),%ymm1,%ymm9198vpmadd52huq 224(%rsi),%ymm1,%ymm10199vpmadd52huq 256(%rsi),%ymm1,%ymm11200vpmadd52huq 288(%rsi),%ymm1,%ymm12201202vpmadd52huq 0(%rcx),%ymm2,%ymm3203vpmadd52huq 32(%rcx),%ymm2,%ymm4204vpmadd52huq 64(%rcx),%ymm2,%ymm5205vpmadd52huq 96(%rcx),%ymm2,%ymm6206vpmadd52huq 128(%rcx),%ymm2,%ymm7207vpmadd52huq 160(%rcx),%ymm2,%ymm8208vpmadd52huq 192(%rcx),%ymm2,%ymm9209vpmadd52huq 224(%rcx),%ymm2,%ymm10210vpmadd52huq 256(%rcx),%ymm2,%ymm11211vpmadd52huq 288(%rcx),%ymm2,%ymm12212movq 16(%r11),%r13213214vpbroadcastq %r13,%ymm1215movq 0(%rsi),%rdx216mulxq %r13,%r13,%r12217addq %r13,%r9218movq %r12,%r10219adcq $0,%r10220221movq %r8,%r13222imulq %r9,%r13223andq %rax,%r13224225vpbroadcastq %r13,%ymm2226movq 0(%rcx),%rdx227mulxq %r13,%r13,%r12228addq %r13,%r9229adcq %r12,%r10230231shrq $52,%r9232salq $12,%r10233orq %r10,%r9234235vpmadd52luq 0(%rsi),%ymm1,%ymm3236vpmadd52luq 32(%rsi),%ymm1,%ymm4237vpmadd52luq 64(%rsi),%ymm1,%ymm5238vpmadd52luq 96(%rsi),%ymm1,%ymm6239vpmadd52luq 128(%rsi),%ymm1,%ymm7240vpmadd52luq 160(%rsi),%ymm1,%ymm8241vpmadd52luq 192(%rsi),%ymm1,%ymm9242vpmadd52luq 224(%rsi),%ymm1,%ymm10243vpmadd52luq 256(%rsi),%ymm1,%ymm11244vpmadd52luq 288(%rsi),%ymm1,%ymm12245246vpmadd52luq 0(%rcx),%ymm2,%ymm3247vpmadd52luq 32(%rcx),%ymm2,%ymm4248vpmadd52luq 64(%rcx),%ymm2,%ymm5249vpmadd52luq 96(%rcx),%ymm2,%ymm6250vpmadd52luq 128(%rcx),%ymm2,%ymm7251vpmadd52luq 160(%rcx),%ymm2,%ymm8252vpmadd52luq 192(%rcx),%ymm2,%ymm9253vpmadd52luq 224(%rcx),%ymm2,%ymm10254vpmadd52luq 256(%rcx),%ymm2,%ymm11255vpmadd52luq 288(%rcx),%ymm2,%ymm12256257258valignq $1,%ymm3,%ymm4,%ymm3259valignq $1,%ymm4,%ymm5,%ymm4260valignq $1,%ymm5,%ymm6,%ymm5261valignq $1,%ymm6,%ymm7,%ymm6262valignq $1,%ymm7,%ymm8,%ymm7263valignq $1,%ymm8,%ymm9,%ymm8264valignq $1,%ymm9,%ymm10,%ymm9265valignq $1,%ymm10,%ymm11,%ymm10266valignq $1,%ymm11,%ymm12,%ymm11267valignq $1,%ymm12,%ymm0,%ymm12268269vmovq %xmm3,%r13270addq %r13,%r9271272vpmadd52huq 0(%rsi),%ymm1,%ymm3273vpmadd52huq 32(%rsi),%ymm1,%ymm4274vpmadd52huq 64(%rsi),%ymm1,%ymm5275vpmadd52huq 96(%rsi),%ymm1,%ymm6276vpmadd52huq 128(%rsi),%ymm1,%ymm7277vpmadd52huq 160(%rsi),%ymm1,%ymm8278vpmadd52huq 192(%rsi),%ymm1,%ymm9279vpmadd52huq 224(%rsi),%ymm1,%ymm10280vpmadd52huq 256(%rsi),%ymm1,%ymm11281vpmadd52huq 288(%rsi),%ymm1,%ymm12282283vpmadd52huq 0(%rcx),%ymm2,%ymm3284vpmadd52huq 32(%rcx),%ymm2,%ymm4285vpmadd52huq 64(%rcx),%ymm2,%ymm5286vpmadd52huq 96(%rcx),%ymm2,%ymm6287vpmadd52huq 128(%rcx),%ymm2,%ymm7288vpmadd52huq 160(%rcx),%ymm2,%ymm8289vpmadd52huq 192(%rcx),%ymm2,%ymm9290vpmadd52huq 224(%rcx),%ymm2,%ymm10291vpmadd52huq 256(%rcx),%ymm2,%ymm11292vpmadd52huq 288(%rcx),%ymm2,%ymm12293movq 24(%r11),%r13294295vpbroadcastq %r13,%ymm1296movq 0(%rsi),%rdx297mulxq %r13,%r13,%r12298addq %r13,%r9299movq %r12,%r10300adcq $0,%r10301302movq %r8,%r13303imulq %r9,%r13304andq %rax,%r13305306vpbroadcastq %r13,%ymm2307movq 0(%rcx),%rdx308mulxq %r13,%r13,%r12309addq %r13,%r9310adcq %r12,%r10311312shrq $52,%r9313salq $12,%r10314orq %r10,%r9315316vpmadd52luq 0(%rsi),%ymm1,%ymm3317vpmadd52luq 32(%rsi),%ymm1,%ymm4318vpmadd52luq 64(%rsi),%ymm1,%ymm5319vpmadd52luq 96(%rsi),%ymm1,%ymm6320vpmadd52luq 128(%rsi),%ymm1,%ymm7321vpmadd52luq 160(%rsi),%ymm1,%ymm8322vpmadd52luq 192(%rsi),%ymm1,%ymm9323vpmadd52luq 224(%rsi),%ymm1,%ymm10324vpmadd52luq 256(%rsi),%ymm1,%ymm11325vpmadd52luq 288(%rsi),%ymm1,%ymm12326327vpmadd52luq 0(%rcx),%ymm2,%ymm3328vpmadd52luq 32(%rcx),%ymm2,%ymm4329vpmadd52luq 64(%rcx),%ymm2,%ymm5330vpmadd52luq 96(%rcx),%ymm2,%ymm6331vpmadd52luq 128(%rcx),%ymm2,%ymm7332vpmadd52luq 160(%rcx),%ymm2,%ymm8333vpmadd52luq 192(%rcx),%ymm2,%ymm9334vpmadd52luq 224(%rcx),%ymm2,%ymm10335vpmadd52luq 256(%rcx),%ymm2,%ymm11336vpmadd52luq 288(%rcx),%ymm2,%ymm12337338339valignq $1,%ymm3,%ymm4,%ymm3340valignq $1,%ymm4,%ymm5,%ymm4341valignq $1,%ymm5,%ymm6,%ymm5342valignq $1,%ymm6,%ymm7,%ymm6343valignq $1,%ymm7,%ymm8,%ymm7344valignq $1,%ymm8,%ymm9,%ymm8345valignq $1,%ymm9,%ymm10,%ymm9346valignq $1,%ymm10,%ymm11,%ymm10347valignq $1,%ymm11,%ymm12,%ymm11348valignq $1,%ymm12,%ymm0,%ymm12349350vmovq %xmm3,%r13351addq %r13,%r9352353vpmadd52huq 0(%rsi),%ymm1,%ymm3354vpmadd52huq 32(%rsi),%ymm1,%ymm4355vpmadd52huq 64(%rsi),%ymm1,%ymm5356vpmadd52huq 96(%rsi),%ymm1,%ymm6357vpmadd52huq 128(%rsi),%ymm1,%ymm7358vpmadd52huq 160(%rsi),%ymm1,%ymm8359vpmadd52huq 192(%rsi),%ymm1,%ymm9360vpmadd52huq 224(%rsi),%ymm1,%ymm10361vpmadd52huq 256(%rsi),%ymm1,%ymm11362vpmadd52huq 288(%rsi),%ymm1,%ymm12363364vpmadd52huq 0(%rcx),%ymm2,%ymm3365vpmadd52huq 32(%rcx),%ymm2,%ymm4366vpmadd52huq 64(%rcx),%ymm2,%ymm5367vpmadd52huq 96(%rcx),%ymm2,%ymm6368vpmadd52huq 128(%rcx),%ymm2,%ymm7369vpmadd52huq 160(%rcx),%ymm2,%ymm8370vpmadd52huq 192(%rcx),%ymm2,%ymm9371vpmadd52huq 224(%rcx),%ymm2,%ymm10372vpmadd52huq 256(%rcx),%ymm2,%ymm11373vpmadd52huq 288(%rcx),%ymm2,%ymm12374leaq 32(%r11),%r11375decl %ebx376jne .Lloop10377378vpbroadcastq %r9,%ymm0379vpblendd $3,%ymm0,%ymm3,%ymm3380381382383vpsrlq $52,%ymm3,%ymm0384vpsrlq $52,%ymm4,%ymm1385vpsrlq $52,%ymm5,%ymm2386vpsrlq $52,%ymm6,%ymm23387vpsrlq $52,%ymm7,%ymm24388vpsrlq $52,%ymm8,%ymm25389vpsrlq $52,%ymm9,%ymm26390vpsrlq $52,%ymm10,%ymm27391vpsrlq $52,%ymm11,%ymm28392vpsrlq $52,%ymm12,%ymm29393394395valignq $3,%ymm28,%ymm29,%ymm29396valignq $3,%ymm27,%ymm28,%ymm28397valignq $3,%ymm26,%ymm27,%ymm27398valignq $3,%ymm25,%ymm26,%ymm26399valignq $3,%ymm24,%ymm25,%ymm25400valignq $3,%ymm23,%ymm24,%ymm24401valignq $3,%ymm2,%ymm23,%ymm23402valignq $3,%ymm1,%ymm2,%ymm2403valignq $3,%ymm0,%ymm1,%ymm1404valignq $3,.Lzeros(%rip),%ymm0,%ymm0405406407vpandq .Lmask52x4(%rip),%ymm3,%ymm3408vpandq .Lmask52x4(%rip),%ymm4,%ymm4409vpandq .Lmask52x4(%rip),%ymm5,%ymm5410vpandq .Lmask52x4(%rip),%ymm6,%ymm6411vpandq .Lmask52x4(%rip),%ymm7,%ymm7412vpandq .Lmask52x4(%rip),%ymm8,%ymm8413vpandq .Lmask52x4(%rip),%ymm9,%ymm9414vpandq .Lmask52x4(%rip),%ymm10,%ymm10415vpandq .Lmask52x4(%rip),%ymm11,%ymm11416vpandq .Lmask52x4(%rip),%ymm12,%ymm12417418419vpaddq %ymm0,%ymm3,%ymm3420vpaddq %ymm1,%ymm4,%ymm4421vpaddq %ymm2,%ymm5,%ymm5422vpaddq %ymm23,%ymm6,%ymm6423vpaddq %ymm24,%ymm7,%ymm7424vpaddq %ymm25,%ymm8,%ymm8425vpaddq %ymm26,%ymm9,%ymm9426vpaddq %ymm27,%ymm10,%ymm10427vpaddq %ymm28,%ymm11,%ymm11428vpaddq %ymm29,%ymm12,%ymm12429430431432vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1433vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2434kmovb %k1,%r14d435kmovb %k2,%r13d436shlb $4,%r13b437orb %r13b,%r14b438439vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1440vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2441kmovb %k1,%r13d442kmovb %k2,%r12d443shlb $4,%r12b444orb %r12b,%r13b445446vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1447vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2448kmovb %k1,%r12d449kmovb %k2,%r11d450shlb $4,%r11b451orb %r11b,%r12b452453vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1454vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2455kmovb %k1,%r11d456kmovb %k2,%r10d457shlb $4,%r10b458orb %r10b,%r11b459460vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1461vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2462kmovb %k1,%r10d463kmovb %k2,%r9d464shlb $4,%r9b465orb %r9b,%r10b466467addb %r14b,%r14b468adcb %r13b,%r13b469adcb %r12b,%r12b470adcb %r11b,%r11b471adcb %r10b,%r10b472473474vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1475vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2476kmovb %k1,%r9d477kmovb %k2,%r8d478shlb $4,%r8b479orb %r8b,%r9b480481vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1482vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2483kmovb %k1,%r8d484kmovb %k2,%edx485shlb $4,%dl486orb %dl,%r8b487488vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1489vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2490kmovb %k1,%edx491kmovb %k2,%ecx492shlb $4,%cl493orb %cl,%dl494495vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1496vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2497kmovb %k1,%ecx498kmovb %k2,%ebx499shlb $4,%bl500orb %bl,%cl501502vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1503vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2504kmovb %k1,%ebx505kmovb %k2,%eax506shlb $4,%al507orb %al,%bl508509addb %r9b,%r14b510adcb %r8b,%r13b511adcb %dl,%r12b512adcb %cl,%r11b513adcb %bl,%r10b514515xorb %r9b,%r14b516xorb %r8b,%r13b517xorb %dl,%r12b518xorb %cl,%r11b519xorb %bl,%r10b520521kmovb %r14d,%k1522shrb $4,%r14b523kmovb %r14d,%k2524kmovb %r13d,%k3525shrb $4,%r13b526kmovb %r13d,%k4527kmovb %r12d,%k5528shrb $4,%r12b529kmovb %r12d,%k6530kmovb %r11d,%k7531532vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1}533vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2}534vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3}535vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4}536vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5}537vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6}538vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7}539540vpandq .Lmask52x4(%rip),%ymm3,%ymm3541vpandq .Lmask52x4(%rip),%ymm4,%ymm4542vpandq .Lmask52x4(%rip),%ymm5,%ymm5543vpandq .Lmask52x4(%rip),%ymm6,%ymm6544vpandq .Lmask52x4(%rip),%ymm7,%ymm7545vpandq .Lmask52x4(%rip),%ymm8,%ymm8546vpandq .Lmask52x4(%rip),%ymm9,%ymm9547548shrb $4,%r11b549kmovb %r11d,%k1550kmovb %r10d,%k2551shrb $4,%r10b552kmovb %r10d,%k3553554vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1}555vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k2}556vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k3}557558vpandq .Lmask52x4(%rip),%ymm10,%ymm10559vpandq .Lmask52x4(%rip),%ymm11,%ymm11560vpandq .Lmask52x4(%rip),%ymm12,%ymm12561562vmovdqu64 %ymm3,0(%rdi)563vmovdqu64 %ymm4,32(%rdi)564vmovdqu64 %ymm5,64(%rdi)565vmovdqu64 %ymm6,96(%rdi)566vmovdqu64 %ymm7,128(%rdi)567vmovdqu64 %ymm8,160(%rdi)568vmovdqu64 %ymm9,192(%rdi)569vmovdqu64 %ymm10,224(%rdi)570vmovdqu64 %ymm11,256(%rdi)571vmovdqu64 %ymm12,288(%rdi)572573vzeroupper574leaq (%rsp),%rax575.cfi_def_cfa_register %rax576movq 0(%rax),%r15577.cfi_restore %r15578movq 8(%rax),%r14579.cfi_restore %r14580movq 16(%rax),%r13581.cfi_restore %r13582movq 24(%rax),%r12583.cfi_restore %r12584movq 32(%rax),%rbp585.cfi_restore %rbp586movq 40(%rax),%rbx587.cfi_restore %rbx588leaq 48(%rax),%rsp589.cfi_def_cfa %rsp,8590.Lossl_rsaz_amm52x40_x1_ifma256_epilogue:591592.byte 0xf3,0xc3593.cfi_endproc594.size ossl_rsaz_amm52x40_x1_ifma256, .-ossl_rsaz_amm52x40_x1_ifma256595.section .rodata596.align 32597.Lmask52x4:598.quad 0xfffffffffffff599.quad 0xfffffffffffff600.quad 0xfffffffffffff601.quad 0xfffffffffffff602.text603604.globl ossl_rsaz_amm52x40_x2_ifma256605.type ossl_rsaz_amm52x40_x2_ifma256,@function606.align 32607ossl_rsaz_amm52x40_x2_ifma256:608.cfi_startproc609.byte 243,15,30,250610pushq %rbx611.cfi_adjust_cfa_offset 8612.cfi_offset %rbx,-16613pushq %rbp614.cfi_adjust_cfa_offset 8615.cfi_offset %rbp,-24616pushq %r12617.cfi_adjust_cfa_offset 8618.cfi_offset %r12,-32619pushq %r13620.cfi_adjust_cfa_offset 8621.cfi_offset %r13,-40622pushq %r14623.cfi_adjust_cfa_offset 8624.cfi_offset %r14,-48625pushq %r15626.cfi_adjust_cfa_offset 8627.cfi_offset %r15,-56628629vpxord %ymm0,%ymm0,%ymm0630vmovdqa64 %ymm0,%ymm3631vmovdqa64 %ymm0,%ymm4632vmovdqa64 %ymm0,%ymm5633vmovdqa64 %ymm0,%ymm6634vmovdqa64 %ymm0,%ymm7635vmovdqa64 %ymm0,%ymm8636vmovdqa64 %ymm0,%ymm9637vmovdqa64 %ymm0,%ymm10638vmovdqa64 %ymm0,%ymm11639vmovdqa64 %ymm0,%ymm12640641vmovdqa64 %ymm0,%ymm13642vmovdqa64 %ymm0,%ymm14643vmovdqa64 %ymm0,%ymm15644vmovdqa64 %ymm0,%ymm16645vmovdqa64 %ymm0,%ymm17646vmovdqa64 %ymm0,%ymm18647vmovdqa64 %ymm0,%ymm19648vmovdqa64 %ymm0,%ymm20649vmovdqa64 %ymm0,%ymm21650vmovdqa64 %ymm0,%ymm22651652653xorl %r9d,%r9d654xorl %r15d,%r15d655656movq %rdx,%r11657movq $0xfffffffffffff,%rax658659movl $40,%ebx660661.align 32662.Lloop40:663movq 0(%r11),%r13664665vpbroadcastq %r13,%ymm1666movq 0(%rsi),%rdx667mulxq %r13,%r13,%r12668addq %r13,%r9669movq %r12,%r10670adcq $0,%r10671672movq (%r8),%r13673imulq %r9,%r13674andq %rax,%r13675676vpbroadcastq %r13,%ymm2677movq 0(%rcx),%rdx678mulxq %r13,%r13,%r12679addq %r13,%r9680adcq %r12,%r10681682shrq $52,%r9683salq $12,%r10684orq %r10,%r9685686vpmadd52luq 0(%rsi),%ymm1,%ymm3687vpmadd52luq 32(%rsi),%ymm1,%ymm4688vpmadd52luq 64(%rsi),%ymm1,%ymm5689vpmadd52luq 96(%rsi),%ymm1,%ymm6690vpmadd52luq 128(%rsi),%ymm1,%ymm7691vpmadd52luq 160(%rsi),%ymm1,%ymm8692vpmadd52luq 192(%rsi),%ymm1,%ymm9693vpmadd52luq 224(%rsi),%ymm1,%ymm10694vpmadd52luq 256(%rsi),%ymm1,%ymm11695vpmadd52luq 288(%rsi),%ymm1,%ymm12696697vpmadd52luq 0(%rcx),%ymm2,%ymm3698vpmadd52luq 32(%rcx),%ymm2,%ymm4699vpmadd52luq 64(%rcx),%ymm2,%ymm5700vpmadd52luq 96(%rcx),%ymm2,%ymm6701vpmadd52luq 128(%rcx),%ymm2,%ymm7702vpmadd52luq 160(%rcx),%ymm2,%ymm8703vpmadd52luq 192(%rcx),%ymm2,%ymm9704vpmadd52luq 224(%rcx),%ymm2,%ymm10705vpmadd52luq 256(%rcx),%ymm2,%ymm11706vpmadd52luq 288(%rcx),%ymm2,%ymm12707708709valignq $1,%ymm3,%ymm4,%ymm3710valignq $1,%ymm4,%ymm5,%ymm4711valignq $1,%ymm5,%ymm6,%ymm5712valignq $1,%ymm6,%ymm7,%ymm6713valignq $1,%ymm7,%ymm8,%ymm7714valignq $1,%ymm8,%ymm9,%ymm8715valignq $1,%ymm9,%ymm10,%ymm9716valignq $1,%ymm10,%ymm11,%ymm10717valignq $1,%ymm11,%ymm12,%ymm11718valignq $1,%ymm12,%ymm0,%ymm12719720vmovq %xmm3,%r13721addq %r13,%r9722723vpmadd52huq 0(%rsi),%ymm1,%ymm3724vpmadd52huq 32(%rsi),%ymm1,%ymm4725vpmadd52huq 64(%rsi),%ymm1,%ymm5726vpmadd52huq 96(%rsi),%ymm1,%ymm6727vpmadd52huq 128(%rsi),%ymm1,%ymm7728vpmadd52huq 160(%rsi),%ymm1,%ymm8729vpmadd52huq 192(%rsi),%ymm1,%ymm9730vpmadd52huq 224(%rsi),%ymm1,%ymm10731vpmadd52huq 256(%rsi),%ymm1,%ymm11732vpmadd52huq 288(%rsi),%ymm1,%ymm12733734vpmadd52huq 0(%rcx),%ymm2,%ymm3735vpmadd52huq 32(%rcx),%ymm2,%ymm4736vpmadd52huq 64(%rcx),%ymm2,%ymm5737vpmadd52huq 96(%rcx),%ymm2,%ymm6738vpmadd52huq 128(%rcx),%ymm2,%ymm7739vpmadd52huq 160(%rcx),%ymm2,%ymm8740vpmadd52huq 192(%rcx),%ymm2,%ymm9741vpmadd52huq 224(%rcx),%ymm2,%ymm10742vpmadd52huq 256(%rcx),%ymm2,%ymm11743vpmadd52huq 288(%rcx),%ymm2,%ymm12744movq 320(%r11),%r13745746vpbroadcastq %r13,%ymm1747movq 320(%rsi),%rdx748mulxq %r13,%r13,%r12749addq %r13,%r15750movq %r12,%r10751adcq $0,%r10752753movq 8(%r8),%r13754imulq %r15,%r13755andq %rax,%r13756757vpbroadcastq %r13,%ymm2758movq 320(%rcx),%rdx759mulxq %r13,%r13,%r12760addq %r13,%r15761adcq %r12,%r10762763shrq $52,%r15764salq $12,%r10765orq %r10,%r15766767vpmadd52luq 320(%rsi),%ymm1,%ymm13768vpmadd52luq 352(%rsi),%ymm1,%ymm14769vpmadd52luq 384(%rsi),%ymm1,%ymm15770vpmadd52luq 416(%rsi),%ymm1,%ymm16771vpmadd52luq 448(%rsi),%ymm1,%ymm17772vpmadd52luq 480(%rsi),%ymm1,%ymm18773vpmadd52luq 512(%rsi),%ymm1,%ymm19774vpmadd52luq 544(%rsi),%ymm1,%ymm20775vpmadd52luq 576(%rsi),%ymm1,%ymm21776vpmadd52luq 608(%rsi),%ymm1,%ymm22777778vpmadd52luq 320(%rcx),%ymm2,%ymm13779vpmadd52luq 352(%rcx),%ymm2,%ymm14780vpmadd52luq 384(%rcx),%ymm2,%ymm15781vpmadd52luq 416(%rcx),%ymm2,%ymm16782vpmadd52luq 448(%rcx),%ymm2,%ymm17783vpmadd52luq 480(%rcx),%ymm2,%ymm18784vpmadd52luq 512(%rcx),%ymm2,%ymm19785vpmadd52luq 544(%rcx),%ymm2,%ymm20786vpmadd52luq 576(%rcx),%ymm2,%ymm21787vpmadd52luq 608(%rcx),%ymm2,%ymm22788789790valignq $1,%ymm13,%ymm14,%ymm13791valignq $1,%ymm14,%ymm15,%ymm14792valignq $1,%ymm15,%ymm16,%ymm15793valignq $1,%ymm16,%ymm17,%ymm16794valignq $1,%ymm17,%ymm18,%ymm17795valignq $1,%ymm18,%ymm19,%ymm18796valignq $1,%ymm19,%ymm20,%ymm19797valignq $1,%ymm20,%ymm21,%ymm20798valignq $1,%ymm21,%ymm22,%ymm21799valignq $1,%ymm22,%ymm0,%ymm22800801vmovq %xmm13,%r13802addq %r13,%r15803804vpmadd52huq 320(%rsi),%ymm1,%ymm13805vpmadd52huq 352(%rsi),%ymm1,%ymm14806vpmadd52huq 384(%rsi),%ymm1,%ymm15807vpmadd52huq 416(%rsi),%ymm1,%ymm16808vpmadd52huq 448(%rsi),%ymm1,%ymm17809vpmadd52huq 480(%rsi),%ymm1,%ymm18810vpmadd52huq 512(%rsi),%ymm1,%ymm19811vpmadd52huq 544(%rsi),%ymm1,%ymm20812vpmadd52huq 576(%rsi),%ymm1,%ymm21813vpmadd52huq 608(%rsi),%ymm1,%ymm22814815vpmadd52huq 320(%rcx),%ymm2,%ymm13816vpmadd52huq 352(%rcx),%ymm2,%ymm14817vpmadd52huq 384(%rcx),%ymm2,%ymm15818vpmadd52huq 416(%rcx),%ymm2,%ymm16819vpmadd52huq 448(%rcx),%ymm2,%ymm17820vpmadd52huq 480(%rcx),%ymm2,%ymm18821vpmadd52huq 512(%rcx),%ymm2,%ymm19822vpmadd52huq 544(%rcx),%ymm2,%ymm20823vpmadd52huq 576(%rcx),%ymm2,%ymm21824vpmadd52huq 608(%rcx),%ymm2,%ymm22825leaq 8(%r11),%r11826decl %ebx827jne .Lloop40828829vpbroadcastq %r9,%ymm0830vpblendd $3,%ymm0,%ymm3,%ymm3831832833834vpsrlq $52,%ymm3,%ymm0835vpsrlq $52,%ymm4,%ymm1836vpsrlq $52,%ymm5,%ymm2837vpsrlq $52,%ymm6,%ymm23838vpsrlq $52,%ymm7,%ymm24839vpsrlq $52,%ymm8,%ymm25840vpsrlq $52,%ymm9,%ymm26841vpsrlq $52,%ymm10,%ymm27842vpsrlq $52,%ymm11,%ymm28843vpsrlq $52,%ymm12,%ymm29844845846valignq $3,%ymm28,%ymm29,%ymm29847valignq $3,%ymm27,%ymm28,%ymm28848valignq $3,%ymm26,%ymm27,%ymm27849valignq $3,%ymm25,%ymm26,%ymm26850valignq $3,%ymm24,%ymm25,%ymm25851valignq $3,%ymm23,%ymm24,%ymm24852valignq $3,%ymm2,%ymm23,%ymm23853valignq $3,%ymm1,%ymm2,%ymm2854valignq $3,%ymm0,%ymm1,%ymm1855valignq $3,.Lzeros(%rip),%ymm0,%ymm0856857858vpandq .Lmask52x4(%rip),%ymm3,%ymm3859vpandq .Lmask52x4(%rip),%ymm4,%ymm4860vpandq .Lmask52x4(%rip),%ymm5,%ymm5861vpandq .Lmask52x4(%rip),%ymm6,%ymm6862vpandq .Lmask52x4(%rip),%ymm7,%ymm7863vpandq .Lmask52x4(%rip),%ymm8,%ymm8864vpandq .Lmask52x4(%rip),%ymm9,%ymm9865vpandq .Lmask52x4(%rip),%ymm10,%ymm10866vpandq .Lmask52x4(%rip),%ymm11,%ymm11867vpandq .Lmask52x4(%rip),%ymm12,%ymm12868869870vpaddq %ymm0,%ymm3,%ymm3871vpaddq %ymm1,%ymm4,%ymm4872vpaddq %ymm2,%ymm5,%ymm5873vpaddq %ymm23,%ymm6,%ymm6874vpaddq %ymm24,%ymm7,%ymm7875vpaddq %ymm25,%ymm8,%ymm8876vpaddq %ymm26,%ymm9,%ymm9877vpaddq %ymm27,%ymm10,%ymm10878vpaddq %ymm28,%ymm11,%ymm11879vpaddq %ymm29,%ymm12,%ymm12880881882883vpcmpuq $6,.Lmask52x4(%rip),%ymm3,%k1884vpcmpuq $6,.Lmask52x4(%rip),%ymm4,%k2885kmovb %k1,%r14d886kmovb %k2,%r13d887shlb $4,%r13b888orb %r13b,%r14b889890vpcmpuq $6,.Lmask52x4(%rip),%ymm5,%k1891vpcmpuq $6,.Lmask52x4(%rip),%ymm6,%k2892kmovb %k1,%r13d893kmovb %k2,%r12d894shlb $4,%r12b895orb %r12b,%r13b896897vpcmpuq $6,.Lmask52x4(%rip),%ymm7,%k1898vpcmpuq $6,.Lmask52x4(%rip),%ymm8,%k2899kmovb %k1,%r12d900kmovb %k2,%r11d901shlb $4,%r11b902orb %r11b,%r12b903904vpcmpuq $6,.Lmask52x4(%rip),%ymm9,%k1905vpcmpuq $6,.Lmask52x4(%rip),%ymm10,%k2906kmovb %k1,%r11d907kmovb %k2,%r10d908shlb $4,%r10b909orb %r10b,%r11b910911vpcmpuq $6,.Lmask52x4(%rip),%ymm11,%k1912vpcmpuq $6,.Lmask52x4(%rip),%ymm12,%k2913kmovb %k1,%r10d914kmovb %k2,%r9d915shlb $4,%r9b916orb %r9b,%r10b917918addb %r14b,%r14b919adcb %r13b,%r13b920adcb %r12b,%r12b921adcb %r11b,%r11b922adcb %r10b,%r10b923924925vpcmpuq $0,.Lmask52x4(%rip),%ymm3,%k1926vpcmpuq $0,.Lmask52x4(%rip),%ymm4,%k2927kmovb %k1,%r9d928kmovb %k2,%r8d929shlb $4,%r8b930orb %r8b,%r9b931932vpcmpuq $0,.Lmask52x4(%rip),%ymm5,%k1933vpcmpuq $0,.Lmask52x4(%rip),%ymm6,%k2934kmovb %k1,%r8d935kmovb %k2,%edx936shlb $4,%dl937orb %dl,%r8b938939vpcmpuq $0,.Lmask52x4(%rip),%ymm7,%k1940vpcmpuq $0,.Lmask52x4(%rip),%ymm8,%k2941kmovb %k1,%edx942kmovb %k2,%ecx943shlb $4,%cl944orb %cl,%dl945946vpcmpuq $0,.Lmask52x4(%rip),%ymm9,%k1947vpcmpuq $0,.Lmask52x4(%rip),%ymm10,%k2948kmovb %k1,%ecx949kmovb %k2,%ebx950shlb $4,%bl951orb %bl,%cl952953vpcmpuq $0,.Lmask52x4(%rip),%ymm11,%k1954vpcmpuq $0,.Lmask52x4(%rip),%ymm12,%k2955kmovb %k1,%ebx956kmovb %k2,%eax957shlb $4,%al958orb %al,%bl959960addb %r9b,%r14b961adcb %r8b,%r13b962adcb %dl,%r12b963adcb %cl,%r11b964adcb %bl,%r10b965966xorb %r9b,%r14b967xorb %r8b,%r13b968xorb %dl,%r12b969xorb %cl,%r11b970xorb %bl,%r10b971972kmovb %r14d,%k1973shrb $4,%r14b974kmovb %r14d,%k2975kmovb %r13d,%k3976shrb $4,%r13b977kmovb %r13d,%k4978kmovb %r12d,%k5979shrb $4,%r12b980kmovb %r12d,%k6981kmovb %r11d,%k7982983vpsubq .Lmask52x4(%rip),%ymm3,%ymm3{%k1}984vpsubq .Lmask52x4(%rip),%ymm4,%ymm4{%k2}985vpsubq .Lmask52x4(%rip),%ymm5,%ymm5{%k3}986vpsubq .Lmask52x4(%rip),%ymm6,%ymm6{%k4}987vpsubq .Lmask52x4(%rip),%ymm7,%ymm7{%k5}988vpsubq .Lmask52x4(%rip),%ymm8,%ymm8{%k6}989vpsubq .Lmask52x4(%rip),%ymm9,%ymm9{%k7}990991vpandq .Lmask52x4(%rip),%ymm3,%ymm3992vpandq .Lmask52x4(%rip),%ymm4,%ymm4993vpandq .Lmask52x4(%rip),%ymm5,%ymm5994vpandq .Lmask52x4(%rip),%ymm6,%ymm6995vpandq .Lmask52x4(%rip),%ymm7,%ymm7996vpandq .Lmask52x4(%rip),%ymm8,%ymm8997vpandq .Lmask52x4(%rip),%ymm9,%ymm9998999shrb $4,%r11b1000kmovb %r11d,%k11001kmovb %r10d,%k21002shrb $4,%r10b1003kmovb %r10d,%k310041005vpsubq .Lmask52x4(%rip),%ymm10,%ymm10{%k1}1006vpsubq .Lmask52x4(%rip),%ymm11,%ymm11{%k2}1007vpsubq .Lmask52x4(%rip),%ymm12,%ymm12{%k3}10081009vpandq .Lmask52x4(%rip),%ymm10,%ymm101010vpandq .Lmask52x4(%rip),%ymm11,%ymm111011vpandq .Lmask52x4(%rip),%ymm12,%ymm1210121013vpbroadcastq %r15,%ymm01014vpblendd $3,%ymm0,%ymm13,%ymm131015101610171018vpsrlq $52,%ymm13,%ymm01019vpsrlq $52,%ymm14,%ymm11020vpsrlq $52,%ymm15,%ymm21021vpsrlq $52,%ymm16,%ymm231022vpsrlq $52,%ymm17,%ymm241023vpsrlq $52,%ymm18,%ymm251024vpsrlq $52,%ymm19,%ymm261025vpsrlq $52,%ymm20,%ymm271026vpsrlq $52,%ymm21,%ymm281027vpsrlq $52,%ymm22,%ymm29102810291030valignq $3,%ymm28,%ymm29,%ymm291031valignq $3,%ymm27,%ymm28,%ymm281032valignq $3,%ymm26,%ymm27,%ymm271033valignq $3,%ymm25,%ymm26,%ymm261034valignq $3,%ymm24,%ymm25,%ymm251035valignq $3,%ymm23,%ymm24,%ymm241036valignq $3,%ymm2,%ymm23,%ymm231037valignq $3,%ymm1,%ymm2,%ymm21038valignq $3,%ymm0,%ymm1,%ymm11039valignq $3,.Lzeros(%rip),%ymm0,%ymm0104010411042vpandq .Lmask52x4(%rip),%ymm13,%ymm131043vpandq .Lmask52x4(%rip),%ymm14,%ymm141044vpandq .Lmask52x4(%rip),%ymm15,%ymm151045vpandq .Lmask52x4(%rip),%ymm16,%ymm161046vpandq .Lmask52x4(%rip),%ymm17,%ymm171047vpandq .Lmask52x4(%rip),%ymm18,%ymm181048vpandq .Lmask52x4(%rip),%ymm19,%ymm191049vpandq .Lmask52x4(%rip),%ymm20,%ymm201050vpandq .Lmask52x4(%rip),%ymm21,%ymm211051vpandq .Lmask52x4(%rip),%ymm22,%ymm22105210531054vpaddq %ymm0,%ymm13,%ymm131055vpaddq %ymm1,%ymm14,%ymm141056vpaddq %ymm2,%ymm15,%ymm151057vpaddq %ymm23,%ymm16,%ymm161058vpaddq %ymm24,%ymm17,%ymm171059vpaddq %ymm25,%ymm18,%ymm181060vpaddq %ymm26,%ymm19,%ymm191061vpaddq %ymm27,%ymm20,%ymm201062vpaddq %ymm28,%ymm21,%ymm211063vpaddq %ymm29,%ymm22,%ymm221064106510661067vpcmpuq $6,.Lmask52x4(%rip),%ymm13,%k11068vpcmpuq $6,.Lmask52x4(%rip),%ymm14,%k21069kmovb %k1,%r14d1070kmovb %k2,%r13d1071shlb $4,%r13b1072orb %r13b,%r14b10731074vpcmpuq $6,.Lmask52x4(%rip),%ymm15,%k11075vpcmpuq $6,.Lmask52x4(%rip),%ymm16,%k21076kmovb %k1,%r13d1077kmovb %k2,%r12d1078shlb $4,%r12b1079orb %r12b,%r13b10801081vpcmpuq $6,.Lmask52x4(%rip),%ymm17,%k11082vpcmpuq $6,.Lmask52x4(%rip),%ymm18,%k21083kmovb %k1,%r12d1084kmovb %k2,%r11d1085shlb $4,%r11b1086orb %r11b,%r12b10871088vpcmpuq $6,.Lmask52x4(%rip),%ymm19,%k11089vpcmpuq $6,.Lmask52x4(%rip),%ymm20,%k21090kmovb %k1,%r11d1091kmovb %k2,%r10d1092shlb $4,%r10b1093orb %r10b,%r11b10941095vpcmpuq $6,.Lmask52x4(%rip),%ymm21,%k11096vpcmpuq $6,.Lmask52x4(%rip),%ymm22,%k21097kmovb %k1,%r10d1098kmovb %k2,%r9d1099shlb $4,%r9b1100orb %r9b,%r10b11011102addb %r14b,%r14b1103adcb %r13b,%r13b1104adcb %r12b,%r12b1105adcb %r11b,%r11b1106adcb %r10b,%r10b110711081109vpcmpuq $0,.Lmask52x4(%rip),%ymm13,%k11110vpcmpuq $0,.Lmask52x4(%rip),%ymm14,%k21111kmovb %k1,%r9d1112kmovb %k2,%r8d1113shlb $4,%r8b1114orb %r8b,%r9b11151116vpcmpuq $0,.Lmask52x4(%rip),%ymm15,%k11117vpcmpuq $0,.Lmask52x4(%rip),%ymm16,%k21118kmovb %k1,%r8d1119kmovb %k2,%edx1120shlb $4,%dl1121orb %dl,%r8b11221123vpcmpuq $0,.Lmask52x4(%rip),%ymm17,%k11124vpcmpuq $0,.Lmask52x4(%rip),%ymm18,%k21125kmovb %k1,%edx1126kmovb %k2,%ecx1127shlb $4,%cl1128orb %cl,%dl11291130vpcmpuq $0,.Lmask52x4(%rip),%ymm19,%k11131vpcmpuq $0,.Lmask52x4(%rip),%ymm20,%k21132kmovb %k1,%ecx1133kmovb %k2,%ebx1134shlb $4,%bl1135orb %bl,%cl11361137vpcmpuq $0,.Lmask52x4(%rip),%ymm21,%k11138vpcmpuq $0,.Lmask52x4(%rip),%ymm22,%k21139kmovb %k1,%ebx1140kmovb %k2,%eax1141shlb $4,%al1142orb %al,%bl11431144addb %r9b,%r14b1145adcb %r8b,%r13b1146adcb %dl,%r12b1147adcb %cl,%r11b1148adcb %bl,%r10b11491150xorb %r9b,%r14b1151xorb %r8b,%r13b1152xorb %dl,%r12b1153xorb %cl,%r11b1154xorb %bl,%r10b11551156kmovb %r14d,%k11157shrb $4,%r14b1158kmovb %r14d,%k21159kmovb %r13d,%k31160shrb $4,%r13b1161kmovb %r13d,%k41162kmovb %r12d,%k51163shrb $4,%r12b1164kmovb %r12d,%k61165kmovb %r11d,%k711661167vpsubq .Lmask52x4(%rip),%ymm13,%ymm13{%k1}1168vpsubq .Lmask52x4(%rip),%ymm14,%ymm14{%k2}1169vpsubq .Lmask52x4(%rip),%ymm15,%ymm15{%k3}1170vpsubq .Lmask52x4(%rip),%ymm16,%ymm16{%k4}1171vpsubq .Lmask52x4(%rip),%ymm17,%ymm17{%k5}1172vpsubq .Lmask52x4(%rip),%ymm18,%ymm18{%k6}1173vpsubq .Lmask52x4(%rip),%ymm19,%ymm19{%k7}11741175vpandq .Lmask52x4(%rip),%ymm13,%ymm131176vpandq .Lmask52x4(%rip),%ymm14,%ymm141177vpandq .Lmask52x4(%rip),%ymm15,%ymm151178vpandq .Lmask52x4(%rip),%ymm16,%ymm161179vpandq .Lmask52x4(%rip),%ymm17,%ymm171180vpandq .Lmask52x4(%rip),%ymm18,%ymm181181vpandq .Lmask52x4(%rip),%ymm19,%ymm1911821183shrb $4,%r11b1184kmovb %r11d,%k11185kmovb %r10d,%k21186shrb $4,%r10b1187kmovb %r10d,%k311881189vpsubq .Lmask52x4(%rip),%ymm20,%ymm20{%k1}1190vpsubq .Lmask52x4(%rip),%ymm21,%ymm21{%k2}1191vpsubq .Lmask52x4(%rip),%ymm22,%ymm22{%k3}11921193vpandq .Lmask52x4(%rip),%ymm20,%ymm201194vpandq .Lmask52x4(%rip),%ymm21,%ymm211195vpandq .Lmask52x4(%rip),%ymm22,%ymm2211961197vmovdqu64 %ymm3,0(%rdi)1198vmovdqu64 %ymm4,32(%rdi)1199vmovdqu64 %ymm5,64(%rdi)1200vmovdqu64 %ymm6,96(%rdi)1201vmovdqu64 %ymm7,128(%rdi)1202vmovdqu64 %ymm8,160(%rdi)1203vmovdqu64 %ymm9,192(%rdi)1204vmovdqu64 %ymm10,224(%rdi)1205vmovdqu64 %ymm11,256(%rdi)1206vmovdqu64 %ymm12,288(%rdi)12071208vmovdqu64 %ymm13,320(%rdi)1209vmovdqu64 %ymm14,352(%rdi)1210vmovdqu64 %ymm15,384(%rdi)1211vmovdqu64 %ymm16,416(%rdi)1212vmovdqu64 %ymm17,448(%rdi)1213vmovdqu64 %ymm18,480(%rdi)1214vmovdqu64 %ymm19,512(%rdi)1215vmovdqu64 %ymm20,544(%rdi)1216vmovdqu64 %ymm21,576(%rdi)1217vmovdqu64 %ymm22,608(%rdi)12181219vzeroupper1220leaq (%rsp),%rax1221.cfi_def_cfa_register %rax1222movq 0(%rax),%r151223.cfi_restore %r151224movq 8(%rax),%r141225.cfi_restore %r141226movq 16(%rax),%r131227.cfi_restore %r131228movq 24(%rax),%r121229.cfi_restore %r121230movq 32(%rax),%rbp1231.cfi_restore %rbp1232movq 40(%rax),%rbx1233.cfi_restore %rbx1234leaq 48(%rax),%rsp1235.cfi_def_cfa %rsp,81236.Lossl_rsaz_amm52x40_x2_ifma256_epilogue:1237.byte 0xf3,0xc31238.cfi_endproc1239.size ossl_rsaz_amm52x40_x2_ifma256, .-ossl_rsaz_amm52x40_x2_ifma2561240.text12411242.align 321243.globl ossl_extract_multiplier_2x40_win51244.type ossl_extract_multiplier_2x40_win5,@function1245ossl_extract_multiplier_2x40_win5:1246.cfi_startproc1247.byte 243,15,30,2501248vmovdqa64 .Lones(%rip),%ymm241249vpbroadcastq %rdx,%ymm221250vpbroadcastq %rcx,%ymm231251leaq 20480(%rsi),%rax125212531254movq %rsi,%r10125512561257vpxor %xmm0,%xmm0,%xmm01258vmovdqa64 %ymm0,%ymm11259vmovdqa64 %ymm0,%ymm21260vmovdqa64 %ymm0,%ymm31261vmovdqa64 %ymm0,%ymm41262vmovdqa64 %ymm0,%ymm51263vmovdqa64 %ymm0,%ymm161264vmovdqa64 %ymm0,%ymm171265vmovdqa64 %ymm0,%ymm181266vmovdqa64 %ymm0,%ymm191267vpxorq %ymm21,%ymm21,%ymm211268.align 321269.Lloop_0:1270vpcmpq $0,%ymm21,%ymm22,%k11271vmovdqu64 0(%rsi),%ymm201272vpblendmq %ymm20,%ymm0,%ymm0{%k1}1273vmovdqu64 32(%rsi),%ymm201274vpblendmq %ymm20,%ymm1,%ymm1{%k1}1275vmovdqu64 64(%rsi),%ymm201276vpblendmq %ymm20,%ymm2,%ymm2{%k1}1277vmovdqu64 96(%rsi),%ymm201278vpblendmq %ymm20,%ymm3,%ymm3{%k1}1279vmovdqu64 128(%rsi),%ymm201280vpblendmq %ymm20,%ymm4,%ymm4{%k1}1281vmovdqu64 160(%rsi),%ymm201282vpblendmq %ymm20,%ymm5,%ymm5{%k1}1283vmovdqu64 192(%rsi),%ymm201284vpblendmq %ymm20,%ymm16,%ymm16{%k1}1285vmovdqu64 224(%rsi),%ymm201286vpblendmq %ymm20,%ymm17,%ymm17{%k1}1287vmovdqu64 256(%rsi),%ymm201288vpblendmq %ymm20,%ymm18,%ymm18{%k1}1289vmovdqu64 288(%rsi),%ymm201290vpblendmq %ymm20,%ymm19,%ymm19{%k1}1291vpaddq %ymm24,%ymm21,%ymm211292addq $640,%rsi1293cmpq %rsi,%rax1294jne .Lloop_01295vmovdqu64 %ymm0,0(%rdi)1296vmovdqu64 %ymm1,32(%rdi)1297vmovdqu64 %ymm2,64(%rdi)1298vmovdqu64 %ymm3,96(%rdi)1299vmovdqu64 %ymm4,128(%rdi)1300vmovdqu64 %ymm5,160(%rdi)1301vmovdqu64 %ymm16,192(%rdi)1302vmovdqu64 %ymm17,224(%rdi)1303vmovdqu64 %ymm18,256(%rdi)1304vmovdqu64 %ymm19,288(%rdi)1305movq %r10,%rsi1306vpxorq %ymm21,%ymm21,%ymm211307.align 321308.Lloop_320:1309vpcmpq $0,%ymm21,%ymm23,%k11310vmovdqu64 320(%rsi),%ymm201311vpblendmq %ymm20,%ymm0,%ymm0{%k1}1312vmovdqu64 352(%rsi),%ymm201313vpblendmq %ymm20,%ymm1,%ymm1{%k1}1314vmovdqu64 384(%rsi),%ymm201315vpblendmq %ymm20,%ymm2,%ymm2{%k1}1316vmovdqu64 416(%rsi),%ymm201317vpblendmq %ymm20,%ymm3,%ymm3{%k1}1318vmovdqu64 448(%rsi),%ymm201319vpblendmq %ymm20,%ymm4,%ymm4{%k1}1320vmovdqu64 480(%rsi),%ymm201321vpblendmq %ymm20,%ymm5,%ymm5{%k1}1322vmovdqu64 512(%rsi),%ymm201323vpblendmq %ymm20,%ymm16,%ymm16{%k1}1324vmovdqu64 544(%rsi),%ymm201325vpblendmq %ymm20,%ymm17,%ymm17{%k1}1326vmovdqu64 576(%rsi),%ymm201327vpblendmq %ymm20,%ymm18,%ymm18{%k1}1328vmovdqu64 608(%rsi),%ymm201329vpblendmq %ymm20,%ymm19,%ymm19{%k1}1330vpaddq %ymm24,%ymm21,%ymm211331addq $640,%rsi1332cmpq %rsi,%rax1333jne .Lloop_3201334vmovdqu64 %ymm0,320(%rdi)1335vmovdqu64 %ymm1,352(%rdi)1336vmovdqu64 %ymm2,384(%rdi)1337vmovdqu64 %ymm3,416(%rdi)1338vmovdqu64 %ymm4,448(%rdi)1339vmovdqu64 %ymm5,480(%rdi)1340vmovdqu64 %ymm16,512(%rdi)1341vmovdqu64 %ymm17,544(%rdi)1342vmovdqu64 %ymm18,576(%rdi)1343vmovdqu64 %ymm19,608(%rdi)13441345.byte 0xf3,0xc31346.cfi_endproc1347.size ossl_extract_multiplier_2x40_win5, .-ossl_extract_multiplier_2x40_win51348.section .rodata1349.align 321350.Lones:1351.quad 1,1,1,11352.Lzeros:1353.quad 0,0,0,01354.section ".note.gnu.property", "a"1355.p2align 31356.long 1f - 0f1357.long 4f - 1f1358.long 513590:1360# "GNU" encoded with .byte, since .asciz isn't supported1361# on Solaris.1362.byte 0x471363.byte 0x4e1364.byte 0x551365.byte 013661:1367.p2align 31368.long 0xc00000021369.long 3f - 2f13702:1371.long 313723:1373.p2align 313744:137513761377