Path: blob/main/sys/crypto/openssl/amd64/rsaz-3k-avxifma.S
39483 views
/* Do not modify. This file is auto-generated from rsaz-3k-avxifma.pl. */1.text23.globl ossl_rsaz_amm52x30_x1_avxifma2564.type ossl_rsaz_amm52x30_x1_avxifma256,@function5.align 326ossl_rsaz_amm52x30_x1_avxifma256:7.cfi_startproc8.byte 243,15,30,2509pushq %rbx10.cfi_adjust_cfa_offset 811.cfi_offset %rbx,-1612pushq %rbp13.cfi_adjust_cfa_offset 814.cfi_offset %rbp,-2415pushq %r1216.cfi_adjust_cfa_offset 817.cfi_offset %r12,-3218pushq %r1319.cfi_adjust_cfa_offset 820.cfi_offset %r13,-4021pushq %r1422.cfi_adjust_cfa_offset 823.cfi_offset %r14,-4824pushq %r1525.cfi_adjust_cfa_offset 826.cfi_offset %r15,-562728vpxor %ymm0,%ymm0,%ymm029vmovapd %ymm0,%ymm330vmovapd %ymm0,%ymm431vmovapd %ymm0,%ymm532vmovapd %ymm0,%ymm633vmovapd %ymm0,%ymm734vmovapd %ymm0,%ymm835vmovapd %ymm0,%ymm936vmovapd %ymm0,%ymm103738xorl %r9d,%r9d3940movq %rdx,%r1141movq $0xfffffffffffff,%rax424344movl $7,%ebx4546.align 3247.Lloop7:48movq 0(%r11),%r134950vpbroadcastq 0(%r11),%ymm151movq 0(%rsi),%rdx52mulxq %r13,%r13,%r1253addq %r13,%r954movq %r12,%r1055adcq $0,%r105657movq %r8,%r1358imulq %r9,%r1359andq %rax,%r136061vmovq %r13,%xmm262vpbroadcastq %xmm2,%ymm263movq 0(%rcx),%rdx64mulxq %r13,%r13,%r1265addq %r13,%r966adcq %r12,%r106768shrq $52,%r969salq $12,%r1070orq %r10,%r97172leaq -264(%rsp),%rsp7374{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm375{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm476{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm577{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm678{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm779{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm880{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm981{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm108283{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm384{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm485{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm586{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm687{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm788{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm889{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm990{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10919293vmovdqu %ymm3,0(%rsp)94vmovdqu %ymm4,32(%rsp)95vmovdqu %ymm5,64(%rsp)96vmovdqu %ymm6,96(%rsp)97vmovdqu %ymm7,128(%rsp)98vmovdqu %ymm8,160(%rsp)99vmovdqu %ymm9,192(%rsp)100vmovdqu %ymm10,224(%rsp)101movq $0,256(%rsp)102103vmovdqu 8(%rsp),%ymm3104vmovdqu 40(%rsp),%ymm4105vmovdqu 72(%rsp),%ymm5106vmovdqu 104(%rsp),%ymm6107vmovdqu 136(%rsp),%ymm7108vmovdqu 168(%rsp),%ymm8109vmovdqu 200(%rsp),%ymm9110vmovdqu 232(%rsp),%ymm10111112addq 8(%rsp),%r9113114{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3115{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4116{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5117{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6118{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7119{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8120{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9121{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10122123{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3124{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4125{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5126{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6127{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7128{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8129{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9130{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10131132leaq 264(%rsp),%rsp133movq 8(%r11),%r13134135vpbroadcastq 8(%r11),%ymm1136movq 0(%rsi),%rdx137mulxq %r13,%r13,%r12138addq %r13,%r9139movq %r12,%r10140adcq $0,%r10141142movq %r8,%r13143imulq %r9,%r13144andq %rax,%r13145146vmovq %r13,%xmm2147vpbroadcastq %xmm2,%ymm2148movq 0(%rcx),%rdx149mulxq %r13,%r13,%r12150addq %r13,%r9151adcq %r12,%r10152153shrq $52,%r9154salq $12,%r10155orq %r10,%r9156157leaq -264(%rsp),%rsp158159{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3160{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4161{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5162{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6163{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7164{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8165{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9166{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10167168{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3169{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4170{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5171{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6172{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7173{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8174{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9175{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10176177178vmovdqu %ymm3,0(%rsp)179vmovdqu %ymm4,32(%rsp)180vmovdqu %ymm5,64(%rsp)181vmovdqu %ymm6,96(%rsp)182vmovdqu %ymm7,128(%rsp)183vmovdqu %ymm8,160(%rsp)184vmovdqu %ymm9,192(%rsp)185vmovdqu %ymm10,224(%rsp)186movq $0,256(%rsp)187188vmovdqu 8(%rsp),%ymm3189vmovdqu 40(%rsp),%ymm4190vmovdqu 72(%rsp),%ymm5191vmovdqu 104(%rsp),%ymm6192vmovdqu 136(%rsp),%ymm7193vmovdqu 168(%rsp),%ymm8194vmovdqu 200(%rsp),%ymm9195vmovdqu 232(%rsp),%ymm10196197addq 8(%rsp),%r9198199{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3200{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4201{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5202{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6203{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7204{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8205{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9206{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10207208{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3209{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4210{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5211{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6212{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7213{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8214{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9215{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10216217leaq 264(%rsp),%rsp218movq 16(%r11),%r13219220vpbroadcastq 16(%r11),%ymm1221movq 0(%rsi),%rdx222mulxq %r13,%r13,%r12223addq %r13,%r9224movq %r12,%r10225adcq $0,%r10226227movq %r8,%r13228imulq %r9,%r13229andq %rax,%r13230231vmovq %r13,%xmm2232vpbroadcastq %xmm2,%ymm2233movq 0(%rcx),%rdx234mulxq %r13,%r13,%r12235addq %r13,%r9236adcq %r12,%r10237238shrq $52,%r9239salq $12,%r10240orq %r10,%r9241242leaq -264(%rsp),%rsp243244{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3245{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4246{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5247{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6248{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7249{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8250{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9251{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10252253{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3254{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4255{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5256{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6257{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7258{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8259{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9260{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10261262263vmovdqu %ymm3,0(%rsp)264vmovdqu %ymm4,32(%rsp)265vmovdqu %ymm5,64(%rsp)266vmovdqu %ymm6,96(%rsp)267vmovdqu %ymm7,128(%rsp)268vmovdqu %ymm8,160(%rsp)269vmovdqu %ymm9,192(%rsp)270vmovdqu %ymm10,224(%rsp)271movq $0,256(%rsp)272273vmovdqu 8(%rsp),%ymm3274vmovdqu 40(%rsp),%ymm4275vmovdqu 72(%rsp),%ymm5276vmovdqu 104(%rsp),%ymm6277vmovdqu 136(%rsp),%ymm7278vmovdqu 168(%rsp),%ymm8279vmovdqu 200(%rsp),%ymm9280vmovdqu 232(%rsp),%ymm10281282addq 8(%rsp),%r9283284{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3285{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4286{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5287{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6288{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7289{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8290{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9291{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10292293{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3294{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4295{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5296{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6297{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7298{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8299{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9300{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10301302leaq 264(%rsp),%rsp303movq 24(%r11),%r13304305vpbroadcastq 24(%r11),%ymm1306movq 0(%rsi),%rdx307mulxq %r13,%r13,%r12308addq %r13,%r9309movq %r12,%r10310adcq $0,%r10311312movq %r8,%r13313imulq %r9,%r13314andq %rax,%r13315316vmovq %r13,%xmm2317vpbroadcastq %xmm2,%ymm2318movq 0(%rcx),%rdx319mulxq %r13,%r13,%r12320addq %r13,%r9321adcq %r12,%r10322323shrq $52,%r9324salq $12,%r10325orq %r10,%r9326327leaq -264(%rsp),%rsp328329{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3330{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4331{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5332{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6333{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7334{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8335{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9336{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10337338{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3339{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4340{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5341{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6342{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7343{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8344{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9345{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10346347348vmovdqu %ymm3,0(%rsp)349vmovdqu %ymm4,32(%rsp)350vmovdqu %ymm5,64(%rsp)351vmovdqu %ymm6,96(%rsp)352vmovdqu %ymm7,128(%rsp)353vmovdqu %ymm8,160(%rsp)354vmovdqu %ymm9,192(%rsp)355vmovdqu %ymm10,224(%rsp)356movq $0,256(%rsp)357358vmovdqu 8(%rsp),%ymm3359vmovdqu 40(%rsp),%ymm4360vmovdqu 72(%rsp),%ymm5361vmovdqu 104(%rsp),%ymm6362vmovdqu 136(%rsp),%ymm7363vmovdqu 168(%rsp),%ymm8364vmovdqu 200(%rsp),%ymm9365vmovdqu 232(%rsp),%ymm10366367addq 8(%rsp),%r9368369{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3370{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4371{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5372{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6373{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7374{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8375{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9376{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10377378{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3379{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4380{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5381{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6382{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7383{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8384{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9385{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10386387leaq 264(%rsp),%rsp388leaq 32(%r11),%r11389decl %ebx390jne .Lloop7391movq 0(%r11),%r13392393vpbroadcastq 0(%r11),%ymm1394movq 0(%rsi),%rdx395mulxq %r13,%r13,%r12396addq %r13,%r9397movq %r12,%r10398adcq $0,%r10399400movq %r8,%r13401imulq %r9,%r13402andq %rax,%r13403404vmovq %r13,%xmm2405vpbroadcastq %xmm2,%ymm2406movq 0(%rcx),%rdx407mulxq %r13,%r13,%r12408addq %r13,%r9409adcq %r12,%r10410411shrq $52,%r9412salq $12,%r10413orq %r10,%r9414415leaq -264(%rsp),%rsp416417{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3418{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4419{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5420{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6421{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7422{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8423{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9424{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10425426{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3427{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4428{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5429{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6430{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7431{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8432{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9433{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10434435436vmovdqu %ymm3,0(%rsp)437vmovdqu %ymm4,32(%rsp)438vmovdqu %ymm5,64(%rsp)439vmovdqu %ymm6,96(%rsp)440vmovdqu %ymm7,128(%rsp)441vmovdqu %ymm8,160(%rsp)442vmovdqu %ymm9,192(%rsp)443vmovdqu %ymm10,224(%rsp)444movq $0,256(%rsp)445446vmovdqu 8(%rsp),%ymm3447vmovdqu 40(%rsp),%ymm4448vmovdqu 72(%rsp),%ymm5449vmovdqu 104(%rsp),%ymm6450vmovdqu 136(%rsp),%ymm7451vmovdqu 168(%rsp),%ymm8452vmovdqu 200(%rsp),%ymm9453vmovdqu 232(%rsp),%ymm10454455addq 8(%rsp),%r9456457{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3458{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4459{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5460{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6461{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7462{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8463{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9464{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10465466{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3467{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4468{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5469{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6470{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7471{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8472{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9473{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10474475leaq 264(%rsp),%rsp476movq 8(%r11),%r13477478vpbroadcastq 8(%r11),%ymm1479movq 0(%rsi),%rdx480mulxq %r13,%r13,%r12481addq %r13,%r9482movq %r12,%r10483adcq $0,%r10484485movq %r8,%r13486imulq %r9,%r13487andq %rax,%r13488489vmovq %r13,%xmm2490vpbroadcastq %xmm2,%ymm2491movq 0(%rcx),%rdx492mulxq %r13,%r13,%r12493addq %r13,%r9494adcq %r12,%r10495496shrq $52,%r9497salq $12,%r10498orq %r10,%r9499500leaq -264(%rsp),%rsp501502{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3503{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4504{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5505{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6506{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7507{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8508{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9509{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10510511{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3512{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4513{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5514{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6515{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7516{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8517{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9518{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10519520521vmovdqu %ymm3,0(%rsp)522vmovdqu %ymm4,32(%rsp)523vmovdqu %ymm5,64(%rsp)524vmovdqu %ymm6,96(%rsp)525vmovdqu %ymm7,128(%rsp)526vmovdqu %ymm8,160(%rsp)527vmovdqu %ymm9,192(%rsp)528vmovdqu %ymm10,224(%rsp)529movq $0,256(%rsp)530531vmovdqu 8(%rsp),%ymm3532vmovdqu 40(%rsp),%ymm4533vmovdqu 72(%rsp),%ymm5534vmovdqu 104(%rsp),%ymm6535vmovdqu 136(%rsp),%ymm7536vmovdqu 168(%rsp),%ymm8537vmovdqu 200(%rsp),%ymm9538vmovdqu 232(%rsp),%ymm10539540addq 8(%rsp),%r9541542{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm3543{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm4544{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm5545{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm6546{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm7547{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm8548{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm9549{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm10550551{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm3552{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm4553{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm5554{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm6555{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm7556{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm8557{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm9558{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm10559560leaq 264(%rsp),%rsp561562vmovq %r9,%xmm0563vpbroadcastq %xmm0,%ymm0564vpblendd $3,%ymm0,%ymm3,%ymm3565566567568vpsrlq $52,%ymm3,%ymm0569vpsrlq $52,%ymm4,%ymm1570vpsrlq $52,%ymm5,%ymm2571vpsrlq $52,%ymm6,%ymm11572vpsrlq $52,%ymm7,%ymm12573vpsrlq $52,%ymm8,%ymm13574vpsrlq $52,%ymm9,%ymm14575vpsrlq $52,%ymm10,%ymm15576577leaq -32(%rsp),%rsp578vmovupd %ymm3,(%rsp)579580581vpermq $144,%ymm15,%ymm15582vpermq $3,%ymm14,%ymm3583vblendpd $1,%ymm3,%ymm15,%ymm15584585vpermq $144,%ymm14,%ymm14586vpermq $3,%ymm13,%ymm3587vblendpd $1,%ymm3,%ymm14,%ymm14588589vpermq $144,%ymm13,%ymm13590vpermq $3,%ymm12,%ymm3591vblendpd $1,%ymm3,%ymm13,%ymm13592593vpermq $144,%ymm12,%ymm12594vpermq $3,%ymm11,%ymm3595vblendpd $1,%ymm3,%ymm12,%ymm12596597vpermq $144,%ymm11,%ymm11598vpermq $3,%ymm2,%ymm3599vblendpd $1,%ymm3,%ymm11,%ymm11600601vpermq $144,%ymm2,%ymm2602vpermq $3,%ymm1,%ymm3603vblendpd $1,%ymm3,%ymm2,%ymm2604605vpermq $144,%ymm1,%ymm1606vpermq $3,%ymm0,%ymm3607vblendpd $1,%ymm3,%ymm1,%ymm1608609vpermq $144,%ymm0,%ymm0610vpand .Lhigh64x3(%rip),%ymm0,%ymm0611612vmovupd (%rsp),%ymm3613leaq 32(%rsp),%rsp614615616vpand .Lmask52x4(%rip),%ymm3,%ymm3617vpand .Lmask52x4(%rip),%ymm4,%ymm4618vpand .Lmask52x4(%rip),%ymm5,%ymm5619vpand .Lmask52x4(%rip),%ymm6,%ymm6620vpand .Lmask52x4(%rip),%ymm7,%ymm7621vpand .Lmask52x4(%rip),%ymm8,%ymm8622vpand .Lmask52x4(%rip),%ymm9,%ymm9623vpand .Lmask52x4(%rip),%ymm10,%ymm10624625626vpaddq %ymm0,%ymm3,%ymm3627vpaddq %ymm1,%ymm4,%ymm4628vpaddq %ymm2,%ymm5,%ymm5629vpaddq %ymm11,%ymm6,%ymm6630vpaddq %ymm12,%ymm7,%ymm7631vpaddq %ymm13,%ymm8,%ymm8632vpaddq %ymm14,%ymm9,%ymm9633vpaddq %ymm15,%ymm10,%ymm10634635636637vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm0638vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm1639vmovmskpd %ymm0,%r14d640vmovmskpd %ymm1,%r13d641shlb $4,%r13b642orb %r13b,%r14b643644vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm2645vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm11646vmovmskpd %ymm2,%r13d647vmovmskpd %ymm11,%r12d648shlb $4,%r12b649orb %r12b,%r13b650651vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm12652vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm13653vmovmskpd %ymm12,%r12d654vmovmskpd %ymm13,%r11d655shlb $4,%r11b656orb %r11b,%r12b657658vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm14659vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm15660vmovmskpd %ymm14,%r11d661vmovmskpd %ymm15,%r10d662shlb $4,%r10b663orb %r10b,%r11b664665addb %r14b,%r14b666adcb %r13b,%r13b667adcb %r12b,%r12b668adcb %r11b,%r11b669670671vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm0672vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm1673vmovmskpd %ymm0,%r9d674vmovmskpd %ymm1,%r8d675shlb $4,%r8b676orb %r8b,%r9b677678vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm2679vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm11680vmovmskpd %ymm2,%r8d681vmovmskpd %ymm11,%edx682shlb $4,%dl683orb %dl,%r8b684685vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm12686vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm13687vmovmskpd %ymm12,%edx688vmovmskpd %ymm13,%ecx689shlb $4,%cl690orb %cl,%dl691692vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm14693vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm15694vmovmskpd %ymm14,%ecx695vmovmskpd %ymm15,%ebx696shlb $4,%bl697orb %bl,%cl698699addb %r9b,%r14b700adcb %r8b,%r13b701adcb %dl,%r12b702adcb %cl,%r11b703704xorb %r9b,%r14b705xorb %r8b,%r13b706xorb %dl,%r12b707xorb %cl,%r11b708709leaq .Lkmasklut(%rip),%rdx710711movb %r14b,%r10b712andq $0xf,%r14713vpsubq .Lmask52x4(%rip),%ymm3,%ymm0714shlq $5,%r14715vmovapd (%rdx,%r14,1),%ymm2716vblendvpd %ymm2,%ymm0,%ymm3,%ymm3717718shrb $4,%r10b719andq $0xf,%r10720vpsubq .Lmask52x4(%rip),%ymm4,%ymm0721shlq $5,%r10722vmovapd (%rdx,%r10,1),%ymm2723vblendvpd %ymm2,%ymm0,%ymm4,%ymm4724725movb %r13b,%r10b726andq $0xf,%r13727vpsubq .Lmask52x4(%rip),%ymm5,%ymm0728shlq $5,%r13729vmovapd (%rdx,%r13,1),%ymm2730vblendvpd %ymm2,%ymm0,%ymm5,%ymm5731732shrb $4,%r10b733andq $0xf,%r10734vpsubq .Lmask52x4(%rip),%ymm6,%ymm0735shlq $5,%r10736vmovapd (%rdx,%r10,1),%ymm2737vblendvpd %ymm2,%ymm0,%ymm6,%ymm6738739movb %r12b,%r10b740andq $0xf,%r12741vpsubq .Lmask52x4(%rip),%ymm7,%ymm0742shlq $5,%r12743vmovapd (%rdx,%r12,1),%ymm2744vblendvpd %ymm2,%ymm0,%ymm7,%ymm7745746shrb $4,%r10b747andq $0xf,%r10748vpsubq .Lmask52x4(%rip),%ymm8,%ymm0749shlq $5,%r10750vmovapd (%rdx,%r10,1),%ymm2751vblendvpd %ymm2,%ymm0,%ymm8,%ymm8752753movb %r11b,%r10b754andq $0xf,%r11755vpsubq .Lmask52x4(%rip),%ymm9,%ymm0756shlq $5,%r11757vmovapd (%rdx,%r11,1),%ymm2758vblendvpd %ymm2,%ymm0,%ymm9,%ymm9759760shrb $4,%r10b761andq $0xf,%r10762vpsubq .Lmask52x4(%rip),%ymm10,%ymm0763shlq $5,%r10764vmovapd (%rdx,%r10,1),%ymm2765vblendvpd %ymm2,%ymm0,%ymm10,%ymm10766767vpand .Lmask52x4(%rip),%ymm3,%ymm3768vpand .Lmask52x4(%rip),%ymm4,%ymm4769vpand .Lmask52x4(%rip),%ymm5,%ymm5770vpand .Lmask52x4(%rip),%ymm6,%ymm6771vpand .Lmask52x4(%rip),%ymm7,%ymm7772vpand .Lmask52x4(%rip),%ymm8,%ymm8773vpand .Lmask52x4(%rip),%ymm9,%ymm9774775vpand .Lmask52x4(%rip),%ymm10,%ymm10776777vmovdqu %ymm3,0(%rdi)778vmovdqu %ymm4,32(%rdi)779vmovdqu %ymm5,64(%rdi)780vmovdqu %ymm6,96(%rdi)781vmovdqu %ymm7,128(%rdi)782vmovdqu %ymm8,160(%rdi)783vmovdqu %ymm9,192(%rdi)784vmovdqu %ymm10,224(%rdi)785786vzeroupper787leaq (%rsp),%rax788.cfi_def_cfa_register %rax789movq 0(%rax),%r15790.cfi_restore %r15791movq 8(%rax),%r14792.cfi_restore %r14793movq 16(%rax),%r13794.cfi_restore %r13795movq 24(%rax),%r12796.cfi_restore %r12797movq 32(%rax),%rbp798.cfi_restore %rbp799movq 40(%rax),%rbx800.cfi_restore %rbx801leaq 48(%rax),%rsp802.cfi_def_cfa %rsp,8803.Lossl_rsaz_amm52x30_x1_avxifma256_epilogue:804.byte 0xf3,0xc3805.cfi_endproc806.size ossl_rsaz_amm52x30_x1_avxifma256, .-ossl_rsaz_amm52x30_x1_avxifma256807.section .rodata808.align 32809.Lmask52x4:810.quad 0xfffffffffffff811.quad 0xfffffffffffff812.quad 0xfffffffffffff813.quad 0xfffffffffffff814.Lhigh64x3:815.quad 0x0816.quad 0xffffffffffffffff817.quad 0xffffffffffffffff818.quad 0xffffffffffffffff819.Lkmasklut:820821.quad 0x0822.quad 0x0823.quad 0x0824.quad 0x0825826.quad 0xffffffffffffffff827.quad 0x0828.quad 0x0829.quad 0x0830831.quad 0x0832.quad 0xffffffffffffffff833.quad 0x0834.quad 0x0835836.quad 0xffffffffffffffff837.quad 0xffffffffffffffff838.quad 0x0839.quad 0x0840841.quad 0x0842.quad 0x0843.quad 0xffffffffffffffff844.quad 0x0845846.quad 0xffffffffffffffff847.quad 0x0848.quad 0xffffffffffffffff849.quad 0x0850851.quad 0x0852.quad 0xffffffffffffffff853.quad 0xffffffffffffffff854.quad 0x0855856.quad 0xffffffffffffffff857.quad 0xffffffffffffffff858.quad 0xffffffffffffffff859.quad 0x0860861.quad 0x0862.quad 0x0863.quad 0x0864.quad 0xffffffffffffffff865866.quad 0xffffffffffffffff867.quad 0x0868.quad 0x0869.quad 0xffffffffffffffff870871.quad 0x0872.quad 0xffffffffffffffff873.quad 0x0874.quad 0xffffffffffffffff875876.quad 0xffffffffffffffff877.quad 0xffffffffffffffff878.quad 0x0879.quad 0xffffffffffffffff880881.quad 0x0882.quad 0x0883.quad 0xffffffffffffffff884.quad 0xffffffffffffffff885886.quad 0xffffffffffffffff887.quad 0x0888.quad 0xffffffffffffffff889.quad 0xffffffffffffffff890891.quad 0x0892.quad 0xffffffffffffffff893.quad 0xffffffffffffffff894.quad 0xffffffffffffffff895896.quad 0xffffffffffffffff897.quad 0xffffffffffffffff898.quad 0xffffffffffffffff899.quad 0xffffffffffffffff900.text901902.globl ossl_rsaz_amm52x30_x2_avxifma256903.type ossl_rsaz_amm52x30_x2_avxifma256,@function904.align 32905ossl_rsaz_amm52x30_x2_avxifma256:906.cfi_startproc907.byte 243,15,30,250908pushq %rbx909.cfi_adjust_cfa_offset 8910.cfi_offset %rbx,-16911pushq %rbp912.cfi_adjust_cfa_offset 8913.cfi_offset %rbp,-24914pushq %r12915.cfi_adjust_cfa_offset 8916.cfi_offset %r12,-32917pushq %r13918.cfi_adjust_cfa_offset 8919.cfi_offset %r13,-40920pushq %r14921.cfi_adjust_cfa_offset 8922.cfi_offset %r14,-48923pushq %r15924.cfi_adjust_cfa_offset 8925.cfi_offset %r15,-56926927vpxor %ymm0,%ymm0,%ymm0928vmovapd %ymm0,%ymm3929vmovapd %ymm0,%ymm4930vmovapd %ymm0,%ymm5931vmovapd %ymm0,%ymm6932vmovapd %ymm0,%ymm7933vmovapd %ymm0,%ymm8934vmovapd %ymm0,%ymm9935vmovapd %ymm0,%ymm10936937xorl %r9d,%r9d938939movq %rdx,%r11940movq $0xfffffffffffff,%rax941942movl $30,%ebx943944.align 32945.Lloop30:946movq 0(%r11),%r13947948vpbroadcastq 0(%r11),%ymm1949movq 0(%rsi),%rdx950mulxq %r13,%r13,%r12951addq %r13,%r9952movq %r12,%r10953adcq $0,%r10954955movq (%r8),%r13956imulq %r9,%r13957andq %rax,%r13958959vmovq %r13,%xmm2960vpbroadcastq %xmm2,%ymm2961movq 0(%rcx),%rdx962mulxq %r13,%r13,%r12963addq %r13,%r9964adcq %r12,%r10965966shrq $52,%r9967salq $12,%r10968orq %r10,%r9969970leaq -264(%rsp),%rsp971972{vex} vpmadd52luq 0(%rsi),%ymm1,%ymm3973{vex} vpmadd52luq 32(%rsi),%ymm1,%ymm4974{vex} vpmadd52luq 64(%rsi),%ymm1,%ymm5975{vex} vpmadd52luq 96(%rsi),%ymm1,%ymm6976{vex} vpmadd52luq 128(%rsi),%ymm1,%ymm7977{vex} vpmadd52luq 160(%rsi),%ymm1,%ymm8978{vex} vpmadd52luq 192(%rsi),%ymm1,%ymm9979{vex} vpmadd52luq 224(%rsi),%ymm1,%ymm10980981{vex} vpmadd52luq 0(%rcx),%ymm2,%ymm3982{vex} vpmadd52luq 32(%rcx),%ymm2,%ymm4983{vex} vpmadd52luq 64(%rcx),%ymm2,%ymm5984{vex} vpmadd52luq 96(%rcx),%ymm2,%ymm6985{vex} vpmadd52luq 128(%rcx),%ymm2,%ymm7986{vex} vpmadd52luq 160(%rcx),%ymm2,%ymm8987{vex} vpmadd52luq 192(%rcx),%ymm2,%ymm9988{vex} vpmadd52luq 224(%rcx),%ymm2,%ymm10989990991vmovdqu %ymm3,0(%rsp)992vmovdqu %ymm4,32(%rsp)993vmovdqu %ymm5,64(%rsp)994vmovdqu %ymm6,96(%rsp)995vmovdqu %ymm7,128(%rsp)996vmovdqu %ymm8,160(%rsp)997vmovdqu %ymm9,192(%rsp)998vmovdqu %ymm10,224(%rsp)999movq $0,256(%rsp)10001001vmovdqu 8(%rsp),%ymm31002vmovdqu 40(%rsp),%ymm41003vmovdqu 72(%rsp),%ymm51004vmovdqu 104(%rsp),%ymm61005vmovdqu 136(%rsp),%ymm71006vmovdqu 168(%rsp),%ymm81007vmovdqu 200(%rsp),%ymm91008vmovdqu 232(%rsp),%ymm1010091010addq 8(%rsp),%r910111012{vex} vpmadd52huq 0(%rsi),%ymm1,%ymm31013{vex} vpmadd52huq 32(%rsi),%ymm1,%ymm41014{vex} vpmadd52huq 64(%rsi),%ymm1,%ymm51015{vex} vpmadd52huq 96(%rsi),%ymm1,%ymm61016{vex} vpmadd52huq 128(%rsi),%ymm1,%ymm71017{vex} vpmadd52huq 160(%rsi),%ymm1,%ymm81018{vex} vpmadd52huq 192(%rsi),%ymm1,%ymm91019{vex} vpmadd52huq 224(%rsi),%ymm1,%ymm1010201021{vex} vpmadd52huq 0(%rcx),%ymm2,%ymm31022{vex} vpmadd52huq 32(%rcx),%ymm2,%ymm41023{vex} vpmadd52huq 64(%rcx),%ymm2,%ymm51024{vex} vpmadd52huq 96(%rcx),%ymm2,%ymm61025{vex} vpmadd52huq 128(%rcx),%ymm2,%ymm71026{vex} vpmadd52huq 160(%rcx),%ymm2,%ymm81027{vex} vpmadd52huq 192(%rcx),%ymm2,%ymm91028{vex} vpmadd52huq 224(%rcx),%ymm2,%ymm1010291030leaq 264(%rsp),%rsp1031leaq 8(%r11),%r111032decl %ebx1033jne .Lloop3010341035pushq %r111036pushq %rsi1037pushq %rcx1038pushq %r810391040vmovq %r9,%xmm01041vpbroadcastq %xmm0,%ymm01042vpblendd $3,%ymm0,%ymm3,%ymm31043104410451046vpsrlq $52,%ymm3,%ymm01047vpsrlq $52,%ymm4,%ymm11048vpsrlq $52,%ymm5,%ymm21049vpsrlq $52,%ymm6,%ymm111050vpsrlq $52,%ymm7,%ymm121051vpsrlq $52,%ymm8,%ymm131052vpsrlq $52,%ymm9,%ymm141053vpsrlq $52,%ymm10,%ymm1510541055leaq -32(%rsp),%rsp1056vmovupd %ymm3,(%rsp)105710581059vpermq $144,%ymm15,%ymm151060vpermq $3,%ymm14,%ymm31061vblendpd $1,%ymm3,%ymm15,%ymm1510621063vpermq $144,%ymm14,%ymm141064vpermq $3,%ymm13,%ymm31065vblendpd $1,%ymm3,%ymm14,%ymm1410661067vpermq $144,%ymm13,%ymm131068vpermq $3,%ymm12,%ymm31069vblendpd $1,%ymm3,%ymm13,%ymm1310701071vpermq $144,%ymm12,%ymm121072vpermq $3,%ymm11,%ymm31073vblendpd $1,%ymm3,%ymm12,%ymm1210741075vpermq $144,%ymm11,%ymm111076vpermq $3,%ymm2,%ymm31077vblendpd $1,%ymm3,%ymm11,%ymm1110781079vpermq $144,%ymm2,%ymm21080vpermq $3,%ymm1,%ymm31081vblendpd $1,%ymm3,%ymm2,%ymm210821083vpermq $144,%ymm1,%ymm11084vpermq $3,%ymm0,%ymm31085vblendpd $1,%ymm3,%ymm1,%ymm110861087vpermq $144,%ymm0,%ymm01088vpand .Lhigh64x3(%rip),%ymm0,%ymm010891090vmovupd (%rsp),%ymm31091leaq 32(%rsp),%rsp109210931094vpand .Lmask52x4(%rip),%ymm3,%ymm31095vpand .Lmask52x4(%rip),%ymm4,%ymm41096vpand .Lmask52x4(%rip),%ymm5,%ymm51097vpand .Lmask52x4(%rip),%ymm6,%ymm61098vpand .Lmask52x4(%rip),%ymm7,%ymm71099vpand .Lmask52x4(%rip),%ymm8,%ymm81100vpand .Lmask52x4(%rip),%ymm9,%ymm91101vpand .Lmask52x4(%rip),%ymm10,%ymm10110211031104vpaddq %ymm0,%ymm3,%ymm31105vpaddq %ymm1,%ymm4,%ymm41106vpaddq %ymm2,%ymm5,%ymm51107vpaddq %ymm11,%ymm6,%ymm61108vpaddq %ymm12,%ymm7,%ymm71109vpaddq %ymm13,%ymm8,%ymm81110vpaddq %ymm14,%ymm9,%ymm91111vpaddq %ymm15,%ymm10,%ymm101112111311141115vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm01116vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm11117vmovmskpd %ymm0,%r14d1118vmovmskpd %ymm1,%r13d1119shlb $4,%r13b1120orb %r13b,%r14b11211122vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm21123vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm111124vmovmskpd %ymm2,%r13d1125vmovmskpd %ymm11,%r12d1126shlb $4,%r12b1127orb %r12b,%r13b11281129vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm121130vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm131131vmovmskpd %ymm12,%r12d1132vmovmskpd %ymm13,%r11d1133shlb $4,%r11b1134orb %r11b,%r12b11351136vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm141137vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm151138vmovmskpd %ymm14,%r11d1139vmovmskpd %ymm15,%r10d1140shlb $4,%r10b1141orb %r10b,%r11b11421143addb %r14b,%r14b1144adcb %r13b,%r13b1145adcb %r12b,%r12b1146adcb %r11b,%r11b114711481149vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm01150vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm11151vmovmskpd %ymm0,%r9d1152vmovmskpd %ymm1,%r8d1153shlb $4,%r8b1154orb %r8b,%r9b11551156vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm21157vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm111158vmovmskpd %ymm2,%r8d1159vmovmskpd %ymm11,%edx1160shlb $4,%dl1161orb %dl,%r8b11621163vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm121164vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm131165vmovmskpd %ymm12,%edx1166vmovmskpd %ymm13,%ecx1167shlb $4,%cl1168orb %cl,%dl11691170vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm141171vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm151172vmovmskpd %ymm14,%ecx1173vmovmskpd %ymm15,%ebx1174shlb $4,%bl1175orb %bl,%cl11761177addb %r9b,%r14b1178adcb %r8b,%r13b1179adcb %dl,%r12b1180adcb %cl,%r11b11811182xorb %r9b,%r14b1183xorb %r8b,%r13b1184xorb %dl,%r12b1185xorb %cl,%r11b11861187leaq .Lkmasklut(%rip),%rdx11881189movb %r14b,%r10b1190andq $0xf,%r141191vpsubq .Lmask52x4(%rip),%ymm3,%ymm01192shlq $5,%r141193vmovapd (%rdx,%r14,1),%ymm21194vblendvpd %ymm2,%ymm0,%ymm3,%ymm311951196shrb $4,%r10b1197andq $0xf,%r101198vpsubq .Lmask52x4(%rip),%ymm4,%ymm01199shlq $5,%r101200vmovapd (%rdx,%r10,1),%ymm21201vblendvpd %ymm2,%ymm0,%ymm4,%ymm412021203movb %r13b,%r10b1204andq $0xf,%r131205vpsubq .Lmask52x4(%rip),%ymm5,%ymm01206shlq $5,%r131207vmovapd (%rdx,%r13,1),%ymm21208vblendvpd %ymm2,%ymm0,%ymm5,%ymm512091210shrb $4,%r10b1211andq $0xf,%r101212vpsubq .Lmask52x4(%rip),%ymm6,%ymm01213shlq $5,%r101214vmovapd (%rdx,%r10,1),%ymm21215vblendvpd %ymm2,%ymm0,%ymm6,%ymm612161217movb %r12b,%r10b1218andq $0xf,%r121219vpsubq .Lmask52x4(%rip),%ymm7,%ymm01220shlq $5,%r121221vmovapd (%rdx,%r12,1),%ymm21222vblendvpd %ymm2,%ymm0,%ymm7,%ymm712231224shrb $4,%r10b1225andq $0xf,%r101226vpsubq .Lmask52x4(%rip),%ymm8,%ymm01227shlq $5,%r101228vmovapd (%rdx,%r10,1),%ymm21229vblendvpd %ymm2,%ymm0,%ymm8,%ymm812301231movb %r11b,%r10b1232andq $0xf,%r111233vpsubq .Lmask52x4(%rip),%ymm9,%ymm01234shlq $5,%r111235vmovapd (%rdx,%r11,1),%ymm21236vblendvpd %ymm2,%ymm0,%ymm9,%ymm912371238shrb $4,%r10b1239andq $0xf,%r101240vpsubq .Lmask52x4(%rip),%ymm10,%ymm01241shlq $5,%r101242vmovapd (%rdx,%r10,1),%ymm21243vblendvpd %ymm2,%ymm0,%ymm10,%ymm1012441245vpand .Lmask52x4(%rip),%ymm3,%ymm31246vpand .Lmask52x4(%rip),%ymm4,%ymm41247vpand .Lmask52x4(%rip),%ymm5,%ymm51248vpand .Lmask52x4(%rip),%ymm6,%ymm61249vpand .Lmask52x4(%rip),%ymm7,%ymm71250vpand .Lmask52x4(%rip),%ymm8,%ymm81251vpand .Lmask52x4(%rip),%ymm9,%ymm912521253vpand .Lmask52x4(%rip),%ymm10,%ymm101254popq %r81255popq %rcx1256popq %rsi1257popq %r1112581259vmovdqu %ymm3,0(%rdi)1260vmovdqu %ymm4,32(%rdi)1261vmovdqu %ymm5,64(%rdi)1262vmovdqu %ymm6,96(%rdi)1263vmovdqu %ymm7,128(%rdi)1264vmovdqu %ymm8,160(%rdi)1265vmovdqu %ymm9,192(%rdi)1266vmovdqu %ymm10,224(%rdi)12671268xorl %r15d,%r15d12691270leaq 16(%r11),%r111271movq $0xfffffffffffff,%rax12721273movl $30,%ebx12741275vpxor %ymm0,%ymm0,%ymm01276vmovapd %ymm0,%ymm31277vmovapd %ymm0,%ymm41278vmovapd %ymm0,%ymm51279vmovapd %ymm0,%ymm61280vmovapd %ymm0,%ymm71281vmovapd %ymm0,%ymm81282vmovapd %ymm0,%ymm91283vmovapd %ymm0,%ymm101284.align 321285.Lloop40:1286movq 0(%r11),%r1312871288vpbroadcastq 0(%r11),%ymm11289movq 256(%rsi),%rdx1290mulxq %r13,%r13,%r121291addq %r13,%r91292movq %r12,%r101293adcq $0,%r1012941295movq 8(%r8),%r131296imulq %r9,%r131297andq %rax,%r1312981299vmovq %r13,%xmm21300vpbroadcastq %xmm2,%ymm21301movq 256(%rcx),%rdx1302mulxq %r13,%r13,%r121303addq %r13,%r91304adcq %r12,%r1013051306shrq $52,%r91307salq $12,%r101308orq %r10,%r913091310leaq -264(%rsp),%rsp13111312{vex} vpmadd52luq 256(%rsi),%ymm1,%ymm31313{vex} vpmadd52luq 288(%rsi),%ymm1,%ymm41314{vex} vpmadd52luq 320(%rsi),%ymm1,%ymm51315{vex} vpmadd52luq 352(%rsi),%ymm1,%ymm61316{vex} vpmadd52luq 384(%rsi),%ymm1,%ymm71317{vex} vpmadd52luq 416(%rsi),%ymm1,%ymm81318{vex} vpmadd52luq 448(%rsi),%ymm1,%ymm91319{vex} vpmadd52luq 480(%rsi),%ymm1,%ymm1013201321{vex} vpmadd52luq 256(%rcx),%ymm2,%ymm31322{vex} vpmadd52luq 288(%rcx),%ymm2,%ymm41323{vex} vpmadd52luq 320(%rcx),%ymm2,%ymm51324{vex} vpmadd52luq 352(%rcx),%ymm2,%ymm61325{vex} vpmadd52luq 384(%rcx),%ymm2,%ymm71326{vex} vpmadd52luq 416(%rcx),%ymm2,%ymm81327{vex} vpmadd52luq 448(%rcx),%ymm2,%ymm91328{vex} vpmadd52luq 480(%rcx),%ymm2,%ymm10132913301331vmovdqu %ymm3,0(%rsp)1332vmovdqu %ymm4,32(%rsp)1333vmovdqu %ymm5,64(%rsp)1334vmovdqu %ymm6,96(%rsp)1335vmovdqu %ymm7,128(%rsp)1336vmovdqu %ymm8,160(%rsp)1337vmovdqu %ymm9,192(%rsp)1338vmovdqu %ymm10,224(%rsp)1339movq $0,256(%rsp)13401341vmovdqu 8(%rsp),%ymm31342vmovdqu 40(%rsp),%ymm41343vmovdqu 72(%rsp),%ymm51344vmovdqu 104(%rsp),%ymm61345vmovdqu 136(%rsp),%ymm71346vmovdqu 168(%rsp),%ymm81347vmovdqu 200(%rsp),%ymm91348vmovdqu 232(%rsp),%ymm1013491350addq 8(%rsp),%r913511352{vex} vpmadd52huq 256(%rsi),%ymm1,%ymm31353{vex} vpmadd52huq 288(%rsi),%ymm1,%ymm41354{vex} vpmadd52huq 320(%rsi),%ymm1,%ymm51355{vex} vpmadd52huq 352(%rsi),%ymm1,%ymm61356{vex} vpmadd52huq 384(%rsi),%ymm1,%ymm71357{vex} vpmadd52huq 416(%rsi),%ymm1,%ymm81358{vex} vpmadd52huq 448(%rsi),%ymm1,%ymm91359{vex} vpmadd52huq 480(%rsi),%ymm1,%ymm1013601361{vex} vpmadd52huq 256(%rcx),%ymm2,%ymm31362{vex} vpmadd52huq 288(%rcx),%ymm2,%ymm41363{vex} vpmadd52huq 320(%rcx),%ymm2,%ymm51364{vex} vpmadd52huq 352(%rcx),%ymm2,%ymm61365{vex} vpmadd52huq 384(%rcx),%ymm2,%ymm71366{vex} vpmadd52huq 416(%rcx),%ymm2,%ymm81367{vex} vpmadd52huq 448(%rcx),%ymm2,%ymm91368{vex} vpmadd52huq 480(%rcx),%ymm2,%ymm1013691370leaq 264(%rsp),%rsp1371leaq 8(%r11),%r111372decl %ebx1373jne .Lloop4013741375vmovq %r9,%xmm01376vpbroadcastq %xmm0,%ymm01377vpblendd $3,%ymm0,%ymm3,%ymm31378137913801381vpsrlq $52,%ymm3,%ymm01382vpsrlq $52,%ymm4,%ymm11383vpsrlq $52,%ymm5,%ymm21384vpsrlq $52,%ymm6,%ymm111385vpsrlq $52,%ymm7,%ymm121386vpsrlq $52,%ymm8,%ymm131387vpsrlq $52,%ymm9,%ymm141388vpsrlq $52,%ymm10,%ymm1513891390leaq -32(%rsp),%rsp1391vmovupd %ymm3,(%rsp)139213931394vpermq $144,%ymm15,%ymm151395vpermq $3,%ymm14,%ymm31396vblendpd $1,%ymm3,%ymm15,%ymm1513971398vpermq $144,%ymm14,%ymm141399vpermq $3,%ymm13,%ymm31400vblendpd $1,%ymm3,%ymm14,%ymm1414011402vpermq $144,%ymm13,%ymm131403vpermq $3,%ymm12,%ymm31404vblendpd $1,%ymm3,%ymm13,%ymm1314051406vpermq $144,%ymm12,%ymm121407vpermq $3,%ymm11,%ymm31408vblendpd $1,%ymm3,%ymm12,%ymm1214091410vpermq $144,%ymm11,%ymm111411vpermq $3,%ymm2,%ymm31412vblendpd $1,%ymm3,%ymm11,%ymm1114131414vpermq $144,%ymm2,%ymm21415vpermq $3,%ymm1,%ymm31416vblendpd $1,%ymm3,%ymm2,%ymm214171418vpermq $144,%ymm1,%ymm11419vpermq $3,%ymm0,%ymm31420vblendpd $1,%ymm3,%ymm1,%ymm114211422vpermq $144,%ymm0,%ymm01423vpand .Lhigh64x3(%rip),%ymm0,%ymm014241425vmovupd (%rsp),%ymm31426leaq 32(%rsp),%rsp142714281429vpand .Lmask52x4(%rip),%ymm3,%ymm31430vpand .Lmask52x4(%rip),%ymm4,%ymm41431vpand .Lmask52x4(%rip),%ymm5,%ymm51432vpand .Lmask52x4(%rip),%ymm6,%ymm61433vpand .Lmask52x4(%rip),%ymm7,%ymm71434vpand .Lmask52x4(%rip),%ymm8,%ymm81435vpand .Lmask52x4(%rip),%ymm9,%ymm91436vpand .Lmask52x4(%rip),%ymm10,%ymm10143714381439vpaddq %ymm0,%ymm3,%ymm31440vpaddq %ymm1,%ymm4,%ymm41441vpaddq %ymm2,%ymm5,%ymm51442vpaddq %ymm11,%ymm6,%ymm61443vpaddq %ymm12,%ymm7,%ymm71444vpaddq %ymm13,%ymm8,%ymm81445vpaddq %ymm14,%ymm9,%ymm91446vpaddq %ymm15,%ymm10,%ymm101447144814491450vpcmpgtq .Lmask52x4(%rip),%ymm3,%ymm01451vpcmpgtq .Lmask52x4(%rip),%ymm4,%ymm11452vmovmskpd %ymm0,%r14d1453vmovmskpd %ymm1,%r13d1454shlb $4,%r13b1455orb %r13b,%r14b14561457vpcmpgtq .Lmask52x4(%rip),%ymm5,%ymm21458vpcmpgtq .Lmask52x4(%rip),%ymm6,%ymm111459vmovmskpd %ymm2,%r13d1460vmovmskpd %ymm11,%r12d1461shlb $4,%r12b1462orb %r12b,%r13b14631464vpcmpgtq .Lmask52x4(%rip),%ymm7,%ymm121465vpcmpgtq .Lmask52x4(%rip),%ymm8,%ymm131466vmovmskpd %ymm12,%r12d1467vmovmskpd %ymm13,%r11d1468shlb $4,%r11b1469orb %r11b,%r12b14701471vpcmpgtq .Lmask52x4(%rip),%ymm9,%ymm141472vpcmpgtq .Lmask52x4(%rip),%ymm10,%ymm151473vmovmskpd %ymm14,%r11d1474vmovmskpd %ymm15,%r10d1475shlb $4,%r10b1476orb %r10b,%r11b14771478addb %r14b,%r14b1479adcb %r13b,%r13b1480adcb %r12b,%r12b1481adcb %r11b,%r11b148214831484vpcmpeqq .Lmask52x4(%rip),%ymm3,%ymm01485vpcmpeqq .Lmask52x4(%rip),%ymm4,%ymm11486vmovmskpd %ymm0,%r9d1487vmovmskpd %ymm1,%r8d1488shlb $4,%r8b1489orb %r8b,%r9b14901491vpcmpeqq .Lmask52x4(%rip),%ymm5,%ymm21492vpcmpeqq .Lmask52x4(%rip),%ymm6,%ymm111493vmovmskpd %ymm2,%r8d1494vmovmskpd %ymm11,%edx1495shlb $4,%dl1496orb %dl,%r8b14971498vpcmpeqq .Lmask52x4(%rip),%ymm7,%ymm121499vpcmpeqq .Lmask52x4(%rip),%ymm8,%ymm131500vmovmskpd %ymm12,%edx1501vmovmskpd %ymm13,%ecx1502shlb $4,%cl1503orb %cl,%dl15041505vpcmpeqq .Lmask52x4(%rip),%ymm9,%ymm141506vpcmpeqq .Lmask52x4(%rip),%ymm10,%ymm151507vmovmskpd %ymm14,%ecx1508vmovmskpd %ymm15,%ebx1509shlb $4,%bl1510orb %bl,%cl15111512addb %r9b,%r14b1513adcb %r8b,%r13b1514adcb %dl,%r12b1515adcb %cl,%r11b15161517xorb %r9b,%r14b1518xorb %r8b,%r13b1519xorb %dl,%r12b1520xorb %cl,%r11b15211522leaq .Lkmasklut(%rip),%rdx15231524movb %r14b,%r10b1525andq $0xf,%r141526vpsubq .Lmask52x4(%rip),%ymm3,%ymm01527shlq $5,%r141528vmovapd (%rdx,%r14,1),%ymm21529vblendvpd %ymm2,%ymm0,%ymm3,%ymm315301531shrb $4,%r10b1532andq $0xf,%r101533vpsubq .Lmask52x4(%rip),%ymm4,%ymm01534shlq $5,%r101535vmovapd (%rdx,%r10,1),%ymm21536vblendvpd %ymm2,%ymm0,%ymm4,%ymm415371538movb %r13b,%r10b1539andq $0xf,%r131540vpsubq .Lmask52x4(%rip),%ymm5,%ymm01541shlq $5,%r131542vmovapd (%rdx,%r13,1),%ymm21543vblendvpd %ymm2,%ymm0,%ymm5,%ymm515441545shrb $4,%r10b1546andq $0xf,%r101547vpsubq .Lmask52x4(%rip),%ymm6,%ymm01548shlq $5,%r101549vmovapd (%rdx,%r10,1),%ymm21550vblendvpd %ymm2,%ymm0,%ymm6,%ymm615511552movb %r12b,%r10b1553andq $0xf,%r121554vpsubq .Lmask52x4(%rip),%ymm7,%ymm01555shlq $5,%r121556vmovapd (%rdx,%r12,1),%ymm21557vblendvpd %ymm2,%ymm0,%ymm7,%ymm715581559shrb $4,%r10b1560andq $0xf,%r101561vpsubq .Lmask52x4(%rip),%ymm8,%ymm01562shlq $5,%r101563vmovapd (%rdx,%r10,1),%ymm21564vblendvpd %ymm2,%ymm0,%ymm8,%ymm815651566movb %r11b,%r10b1567andq $0xf,%r111568vpsubq .Lmask52x4(%rip),%ymm9,%ymm01569shlq $5,%r111570vmovapd (%rdx,%r11,1),%ymm21571vblendvpd %ymm2,%ymm0,%ymm9,%ymm915721573shrb $4,%r10b1574andq $0xf,%r101575vpsubq .Lmask52x4(%rip),%ymm10,%ymm01576shlq $5,%r101577vmovapd (%rdx,%r10,1),%ymm21578vblendvpd %ymm2,%ymm0,%ymm10,%ymm1015791580vpand .Lmask52x4(%rip),%ymm3,%ymm31581vpand .Lmask52x4(%rip),%ymm4,%ymm41582vpand .Lmask52x4(%rip),%ymm5,%ymm51583vpand .Lmask52x4(%rip),%ymm6,%ymm61584vpand .Lmask52x4(%rip),%ymm7,%ymm71585vpand .Lmask52x4(%rip),%ymm8,%ymm81586vpand .Lmask52x4(%rip),%ymm9,%ymm915871588vpand .Lmask52x4(%rip),%ymm10,%ymm1015891590vmovdqu %ymm3,256(%rdi)1591vmovdqu %ymm4,288(%rdi)1592vmovdqu %ymm5,320(%rdi)1593vmovdqu %ymm6,352(%rdi)1594vmovdqu %ymm7,384(%rdi)1595vmovdqu %ymm8,416(%rdi)1596vmovdqu %ymm9,448(%rdi)1597vmovdqu %ymm10,480(%rdi)15981599vzeroupper1600leaq (%rsp),%rax1601.cfi_def_cfa_register %rax1602movq 0(%rax),%r151603.cfi_restore %r151604movq 8(%rax),%r141605.cfi_restore %r141606movq 16(%rax),%r131607.cfi_restore %r131608movq 24(%rax),%r121609.cfi_restore %r121610movq 32(%rax),%rbp1611.cfi_restore %rbp1612movq 40(%rax),%rbx1613.cfi_restore %rbx1614leaq 48(%rax),%rsp1615.cfi_def_cfa %rsp,81616.Lossl_rsaz_amm52x30_x2_avxifma256_epilogue:1617.byte 0xf3,0xc31618.cfi_endproc1619.size ossl_rsaz_amm52x30_x2_avxifma256, .-ossl_rsaz_amm52x30_x2_avxifma2561620.text16211622.align 321623.globl ossl_extract_multiplier_2x30_win5_avx1624.type ossl_extract_multiplier_2x30_win5_avx,@function1625ossl_extract_multiplier_2x30_win5_avx:1626.cfi_startproc1627.byte 243,15,30,2501628vmovapd .Lones(%rip),%ymm121629vmovq %rdx,%xmm81630vpbroadcastq %xmm8,%ymm101631vmovq %rcx,%xmm81632vpbroadcastq %xmm8,%ymm111633leaq 16384(%rsi),%rax163416351636vpxor %xmm0,%xmm0,%xmm01637vmovapd %ymm0,%ymm91638vmovapd %ymm0,%ymm11639vmovapd %ymm0,%ymm21640vmovapd %ymm0,%ymm31641vmovapd %ymm0,%ymm41642vmovapd %ymm0,%ymm51643vmovapd %ymm0,%ymm61644vmovapd %ymm0,%ymm716451646.align 321647.Lloop:1648vpcmpeqq %ymm9,%ymm10,%ymm131649vmovdqu 0(%rsi),%ymm816501651vblendvpd %ymm13,%ymm8,%ymm0,%ymm01652vmovdqu 32(%rsi),%ymm816531654vblendvpd %ymm13,%ymm8,%ymm1,%ymm11655vmovdqu 64(%rsi),%ymm816561657vblendvpd %ymm13,%ymm8,%ymm2,%ymm21658vmovdqu 96(%rsi),%ymm816591660vblendvpd %ymm13,%ymm8,%ymm3,%ymm31661vmovdqu 128(%rsi),%ymm816621663vblendvpd %ymm13,%ymm8,%ymm4,%ymm41664vmovdqu 160(%rsi),%ymm816651666vblendvpd %ymm13,%ymm8,%ymm5,%ymm51667vmovdqu 192(%rsi),%ymm816681669vblendvpd %ymm13,%ymm8,%ymm6,%ymm61670vmovdqu 224(%rsi),%ymm816711672vblendvpd %ymm13,%ymm8,%ymm7,%ymm71673vpaddq %ymm12,%ymm9,%ymm91674addq $512,%rsi1675cmpq %rsi,%rax1676jne .Lloop1677vmovdqu %ymm0,0(%rdi)1678vmovdqu %ymm1,32(%rdi)1679vmovdqu %ymm2,64(%rdi)1680vmovdqu %ymm3,96(%rdi)1681vmovdqu %ymm4,128(%rdi)1682vmovdqu %ymm5,160(%rdi)1683vmovdqu %ymm6,192(%rdi)1684vmovdqu %ymm7,224(%rdi)1685leaq -16384(%rax),%rsi168616871688vpxor %xmm0,%xmm0,%xmm01689vmovapd %ymm0,%ymm91690vmovapd %ymm0,%ymm01691vmovapd %ymm0,%ymm11692vmovapd %ymm0,%ymm21693vmovapd %ymm0,%ymm31694vmovapd %ymm0,%ymm41695vmovapd %ymm0,%ymm51696vmovapd %ymm0,%ymm61697vmovapd %ymm0,%ymm716981699.align 321700.Lloop_8_15:1701vpcmpeqq %ymm9,%ymm11,%ymm131702vmovdqu 256(%rsi),%ymm817031704vblendvpd %ymm13,%ymm8,%ymm0,%ymm01705vmovdqu 288(%rsi),%ymm817061707vblendvpd %ymm13,%ymm8,%ymm1,%ymm11708vmovdqu 320(%rsi),%ymm817091710vblendvpd %ymm13,%ymm8,%ymm2,%ymm21711vmovdqu 352(%rsi),%ymm817121713vblendvpd %ymm13,%ymm8,%ymm3,%ymm31714vmovdqu 384(%rsi),%ymm817151716vblendvpd %ymm13,%ymm8,%ymm4,%ymm41717vmovdqu 416(%rsi),%ymm817181719vblendvpd %ymm13,%ymm8,%ymm5,%ymm51720vmovdqu 448(%rsi),%ymm817211722vblendvpd %ymm13,%ymm8,%ymm6,%ymm61723vmovdqu 480(%rsi),%ymm817241725vblendvpd %ymm13,%ymm8,%ymm7,%ymm71726vpaddq %ymm12,%ymm9,%ymm91727addq $512,%rsi1728cmpq %rsi,%rax1729jne .Lloop_8_151730vmovdqu %ymm0,256(%rdi)1731vmovdqu %ymm1,288(%rdi)1732vmovdqu %ymm2,320(%rdi)1733vmovdqu %ymm3,352(%rdi)1734vmovdqu %ymm4,384(%rdi)1735vmovdqu %ymm5,416(%rdi)1736vmovdqu %ymm6,448(%rdi)1737vmovdqu %ymm7,480(%rdi)17381739.byte 0xf3,0xc31740.cfi_endproc1741.size ossl_extract_multiplier_2x30_win5_avx, .-ossl_extract_multiplier_2x30_win5_avx1742.section .rodata1743.align 321744.Lones:1745.quad 1,1,1,11746.Lzeros:1747.quad 0,0,0,01748.section ".note.gnu.property", "a"1749.p2align 31750.long 1f - 0f1751.long 4f - 1f1752.long 517530:1754# "GNU" encoded with .byte, since .asciz isn't supported1755# on Solaris.1756.byte 0x471757.byte 0x4e1758.byte 0x551759.byte 017601:1761.p2align 31762.long 0xc00000021763.long 3f - 2f17642:1765.long 317663:1767.p2align 317684:176917701771