Path: blob/main/sys/crypto/openssl/amd64/poly1305-x86_64.S
39482 views
/* Do not modify. This file is auto-generated from poly1305-x86_64.pl. */1.text2345.globl poly1305_init6.hidden poly1305_init7.globl poly1305_blocks8.hidden poly1305_blocks9.globl poly1305_emit10.hidden poly1305_emit1112.type poly1305_init,@function13.align 3214poly1305_init:15.cfi_startproc16xorq %rax,%rax17movq %rax,0(%rdi)18movq %rax,8(%rdi)19movq %rax,16(%rdi)2021cmpq $0,%rsi22je .Lno_key2324leaq poly1305_blocks(%rip),%r1025leaq poly1305_emit(%rip),%r1126movq OPENSSL_ia32cap_P+4(%rip),%r927leaq poly1305_blocks_avx(%rip),%rax28leaq poly1305_emit_avx(%rip),%rcx29btq $28,%r930cmovcq %rax,%r1031cmovcq %rcx,%r1132leaq poly1305_blocks_avx2(%rip),%rax33btq $37,%r934cmovcq %rax,%r1035movq $0x0ffffffc0fffffff,%rax36movq $0x0ffffffc0ffffffc,%rcx37andq 0(%rsi),%rax38andq 8(%rsi),%rcx39movq %rax,24(%rdi)40movq %rcx,32(%rdi)41movq %r10,0(%rdx)42movq %r11,8(%rdx)43movl $1,%eax44.Lno_key:45.byte 0xf3,0xc346.cfi_endproc47.size poly1305_init,.-poly1305_init4849.type poly1305_blocks,@function50.align 3251poly1305_blocks:52.cfi_startproc53.byte 243,15,30,25054.Lblocks:55shrq $4,%rdx56jz .Lno_data5758pushq %rbx59.cfi_adjust_cfa_offset 860.cfi_offset %rbx,-1661pushq %rbp62.cfi_adjust_cfa_offset 863.cfi_offset %rbp,-2464pushq %r1265.cfi_adjust_cfa_offset 866.cfi_offset %r12,-3267pushq %r1368.cfi_adjust_cfa_offset 869.cfi_offset %r13,-4070pushq %r1471.cfi_adjust_cfa_offset 872.cfi_offset %r14,-4873pushq %r1574.cfi_adjust_cfa_offset 875.cfi_offset %r15,-5676.Lblocks_body:7778movq %rdx,%r157980movq 24(%rdi),%r1181movq 32(%rdi),%r138283movq 0(%rdi),%r1484movq 8(%rdi),%rbx85movq 16(%rdi),%rbp8687movq %r13,%r1288shrq $2,%r1389movq %r12,%rax90addq %r12,%r1391jmp .Loop9293.align 3294.Loop:95addq 0(%rsi),%r1496adcq 8(%rsi),%rbx97leaq 16(%rsi),%rsi98adcq %rcx,%rbp99mulq %r14100movq %rax,%r9101movq %r11,%rax102movq %rdx,%r10103104mulq %r14105movq %rax,%r14106movq %r11,%rax107movq %rdx,%r8108109mulq %rbx110addq %rax,%r9111movq %r13,%rax112adcq %rdx,%r10113114mulq %rbx115movq %rbp,%rbx116addq %rax,%r14117adcq %rdx,%r8118119imulq %r13,%rbx120addq %rbx,%r9121movq %r8,%rbx122adcq $0,%r10123124imulq %r11,%rbp125addq %r9,%rbx126movq $-4,%rax127adcq %rbp,%r10128129andq %r10,%rax130movq %r10,%rbp131shrq $2,%r10132andq $3,%rbp133addq %r10,%rax134addq %rax,%r14135adcq $0,%rbx136adcq $0,%rbp137movq %r12,%rax138decq %r15139jnz .Loop140141movq %r14,0(%rdi)142movq %rbx,8(%rdi)143movq %rbp,16(%rdi)144145movq 0(%rsp),%r15146.cfi_restore %r15147movq 8(%rsp),%r14148.cfi_restore %r14149movq 16(%rsp),%r13150.cfi_restore %r13151movq 24(%rsp),%r12152.cfi_restore %r12153movq 32(%rsp),%rbp154.cfi_restore %rbp155movq 40(%rsp),%rbx156.cfi_restore %rbx157leaq 48(%rsp),%rsp158.cfi_adjust_cfa_offset -48159.Lno_data:160.Lblocks_epilogue:161.byte 0xf3,0xc3162.cfi_endproc163.size poly1305_blocks,.-poly1305_blocks164165.type poly1305_emit,@function166.align 32167poly1305_emit:168.cfi_startproc169.byte 243,15,30,250170.Lemit:171movq 0(%rdi),%r8172movq 8(%rdi),%r9173movq 16(%rdi),%r10174175movq %r8,%rax176addq $5,%r8177movq %r9,%rcx178adcq $0,%r9179adcq $0,%r10180shrq $2,%r10181cmovnzq %r8,%rax182cmovnzq %r9,%rcx183184addq 0(%rdx),%rax185adcq 8(%rdx),%rcx186movq %rax,0(%rsi)187movq %rcx,8(%rsi)188189.byte 0xf3,0xc3190.cfi_endproc191.size poly1305_emit,.-poly1305_emit192.type __poly1305_block,@function193.align 32194__poly1305_block:195.cfi_startproc196mulq %r14197movq %rax,%r9198movq %r11,%rax199movq %rdx,%r10200201mulq %r14202movq %rax,%r14203movq %r11,%rax204movq %rdx,%r8205206mulq %rbx207addq %rax,%r9208movq %r13,%rax209adcq %rdx,%r10210211mulq %rbx212movq %rbp,%rbx213addq %rax,%r14214adcq %rdx,%r8215216imulq %r13,%rbx217addq %rbx,%r9218movq %r8,%rbx219adcq $0,%r10220221imulq %r11,%rbp222addq %r9,%rbx223movq $-4,%rax224adcq %rbp,%r10225226andq %r10,%rax227movq %r10,%rbp228shrq $2,%r10229andq $3,%rbp230addq %r10,%rax231addq %rax,%r14232adcq $0,%rbx233adcq $0,%rbp234.byte 0xf3,0xc3235.cfi_endproc236.size __poly1305_block,.-__poly1305_block237238.type __poly1305_init_avx,@function239.align 32240__poly1305_init_avx:241.cfi_startproc242movq %r11,%r14243movq %r12,%rbx244xorq %rbp,%rbp245246leaq 48+64(%rdi),%rdi247248movq %r12,%rax249call __poly1305_block250251movl $0x3ffffff,%eax252movl $0x3ffffff,%edx253movq %r14,%r8254andl %r14d,%eax255movq %r11,%r9256andl %r11d,%edx257movl %eax,-64(%rdi)258shrq $26,%r8259movl %edx,-60(%rdi)260shrq $26,%r9261262movl $0x3ffffff,%eax263movl $0x3ffffff,%edx264andl %r8d,%eax265andl %r9d,%edx266movl %eax,-48(%rdi)267leal (%rax,%rax,4),%eax268movl %edx,-44(%rdi)269leal (%rdx,%rdx,4),%edx270movl %eax,-32(%rdi)271shrq $26,%r8272movl %edx,-28(%rdi)273shrq $26,%r9274275movq %rbx,%rax276movq %r12,%rdx277shlq $12,%rax278shlq $12,%rdx279orq %r8,%rax280orq %r9,%rdx281andl $0x3ffffff,%eax282andl $0x3ffffff,%edx283movl %eax,-16(%rdi)284leal (%rax,%rax,4),%eax285movl %edx,-12(%rdi)286leal (%rdx,%rdx,4),%edx287movl %eax,0(%rdi)288movq %rbx,%r8289movl %edx,4(%rdi)290movq %r12,%r9291292movl $0x3ffffff,%eax293movl $0x3ffffff,%edx294shrq $14,%r8295shrq $14,%r9296andl %r8d,%eax297andl %r9d,%edx298movl %eax,16(%rdi)299leal (%rax,%rax,4),%eax300movl %edx,20(%rdi)301leal (%rdx,%rdx,4),%edx302movl %eax,32(%rdi)303shrq $26,%r8304movl %edx,36(%rdi)305shrq $26,%r9306307movq %rbp,%rax308shlq $24,%rax309orq %rax,%r8310movl %r8d,48(%rdi)311leaq (%r8,%r8,4),%r8312movl %r9d,52(%rdi)313leaq (%r9,%r9,4),%r9314movl %r8d,64(%rdi)315movl %r9d,68(%rdi)316317movq %r12,%rax318call __poly1305_block319320movl $0x3ffffff,%eax321movq %r14,%r8322andl %r14d,%eax323shrq $26,%r8324movl %eax,-52(%rdi)325326movl $0x3ffffff,%edx327andl %r8d,%edx328movl %edx,-36(%rdi)329leal (%rdx,%rdx,4),%edx330shrq $26,%r8331movl %edx,-20(%rdi)332333movq %rbx,%rax334shlq $12,%rax335orq %r8,%rax336andl $0x3ffffff,%eax337movl %eax,-4(%rdi)338leal (%rax,%rax,4),%eax339movq %rbx,%r8340movl %eax,12(%rdi)341342movl $0x3ffffff,%edx343shrq $14,%r8344andl %r8d,%edx345movl %edx,28(%rdi)346leal (%rdx,%rdx,4),%edx347shrq $26,%r8348movl %edx,44(%rdi)349350movq %rbp,%rax351shlq $24,%rax352orq %rax,%r8353movl %r8d,60(%rdi)354leaq (%r8,%r8,4),%r8355movl %r8d,76(%rdi)356357movq %r12,%rax358call __poly1305_block359360movl $0x3ffffff,%eax361movq %r14,%r8362andl %r14d,%eax363shrq $26,%r8364movl %eax,-56(%rdi)365366movl $0x3ffffff,%edx367andl %r8d,%edx368movl %edx,-40(%rdi)369leal (%rdx,%rdx,4),%edx370shrq $26,%r8371movl %edx,-24(%rdi)372373movq %rbx,%rax374shlq $12,%rax375orq %r8,%rax376andl $0x3ffffff,%eax377movl %eax,-8(%rdi)378leal (%rax,%rax,4),%eax379movq %rbx,%r8380movl %eax,8(%rdi)381382movl $0x3ffffff,%edx383shrq $14,%r8384andl %r8d,%edx385movl %edx,24(%rdi)386leal (%rdx,%rdx,4),%edx387shrq $26,%r8388movl %edx,40(%rdi)389390movq %rbp,%rax391shlq $24,%rax392orq %rax,%r8393movl %r8d,56(%rdi)394leaq (%r8,%r8,4),%r8395movl %r8d,72(%rdi)396397leaq -48-64(%rdi),%rdi398.byte 0xf3,0xc3399.cfi_endproc400.size __poly1305_init_avx,.-__poly1305_init_avx401402.type poly1305_blocks_avx,@function403.align 32404poly1305_blocks_avx:405.cfi_startproc406.byte 243,15,30,250407movl 20(%rdi),%r8d408cmpq $128,%rdx409jae .Lblocks_avx410testl %r8d,%r8d411jz .Lblocks412413.Lblocks_avx:414andq $-16,%rdx415jz .Lno_data_avx416417vzeroupper418419testl %r8d,%r8d420jz .Lbase2_64_avx421422testq $31,%rdx423jz .Leven_avx424425pushq %rbx426.cfi_adjust_cfa_offset 8427.cfi_offset %rbx,-16428pushq %rbp429.cfi_adjust_cfa_offset 8430.cfi_offset %rbp,-24431pushq %r12432.cfi_adjust_cfa_offset 8433.cfi_offset %r12,-32434pushq %r13435.cfi_adjust_cfa_offset 8436.cfi_offset %r13,-40437pushq %r14438.cfi_adjust_cfa_offset 8439.cfi_offset %r14,-48440pushq %r15441.cfi_adjust_cfa_offset 8442.cfi_offset %r15,-56443.Lblocks_avx_body:444445movq %rdx,%r15446447movq 0(%rdi),%r8448movq 8(%rdi),%r9449movl 16(%rdi),%ebp450451movq 24(%rdi),%r11452movq 32(%rdi),%r13453454455movl %r8d,%r14d456andq $-2147483648,%r8457movq %r9,%r12458movl %r9d,%ebx459andq $-2147483648,%r9460461shrq $6,%r8462shlq $52,%r12463addq %r8,%r14464shrq $12,%rbx465shrq $18,%r9466addq %r12,%r14467adcq %r9,%rbx468469movq %rbp,%r8470shlq $40,%r8471shrq $24,%rbp472addq %r8,%rbx473adcq $0,%rbp474475movq $-4,%r9476movq %rbp,%r8477andq %rbp,%r9478shrq $2,%r8479andq $3,%rbp480addq %r9,%r8481addq %r8,%r14482adcq $0,%rbx483adcq $0,%rbp484485movq %r13,%r12486movq %r13,%rax487shrq $2,%r13488addq %r12,%r13489490addq 0(%rsi),%r14491adcq 8(%rsi),%rbx492leaq 16(%rsi),%rsi493adcq %rcx,%rbp494495call __poly1305_block496497testq %rcx,%rcx498jz .Lstore_base2_64_avx499500501movq %r14,%rax502movq %r14,%rdx503shrq $52,%r14504movq %rbx,%r11505movq %rbx,%r12506shrq $26,%rdx507andq $0x3ffffff,%rax508shlq $12,%r11509andq $0x3ffffff,%rdx510shrq $14,%rbx511orq %r11,%r14512shlq $24,%rbp513andq $0x3ffffff,%r14514shrq $40,%r12515andq $0x3ffffff,%rbx516orq %r12,%rbp517518subq $16,%r15519jz .Lstore_base2_26_avx520521vmovd %eax,%xmm0522vmovd %edx,%xmm1523vmovd %r14d,%xmm2524vmovd %ebx,%xmm3525vmovd %ebp,%xmm4526jmp .Lproceed_avx527528.align 32529.Lstore_base2_64_avx:530movq %r14,0(%rdi)531movq %rbx,8(%rdi)532movq %rbp,16(%rdi)533jmp .Ldone_avx534535.align 16536.Lstore_base2_26_avx:537movl %eax,0(%rdi)538movl %edx,4(%rdi)539movl %r14d,8(%rdi)540movl %ebx,12(%rdi)541movl %ebp,16(%rdi)542.align 16543.Ldone_avx:544movq 0(%rsp),%r15545.cfi_restore %r15546movq 8(%rsp),%r14547.cfi_restore %r14548movq 16(%rsp),%r13549.cfi_restore %r13550movq 24(%rsp),%r12551.cfi_restore %r12552movq 32(%rsp),%rbp553.cfi_restore %rbp554movq 40(%rsp),%rbx555.cfi_restore %rbx556leaq 48(%rsp),%rsp557.cfi_adjust_cfa_offset -48558.Lno_data_avx:559.Lblocks_avx_epilogue:560.byte 0xf3,0xc3561.cfi_endproc562563.align 32564.Lbase2_64_avx:565.cfi_startproc566pushq %rbx567.cfi_adjust_cfa_offset 8568.cfi_offset %rbx,-16569pushq %rbp570.cfi_adjust_cfa_offset 8571.cfi_offset %rbp,-24572pushq %r12573.cfi_adjust_cfa_offset 8574.cfi_offset %r12,-32575pushq %r13576.cfi_adjust_cfa_offset 8577.cfi_offset %r13,-40578pushq %r14579.cfi_adjust_cfa_offset 8580.cfi_offset %r14,-48581pushq %r15582.cfi_adjust_cfa_offset 8583.cfi_offset %r15,-56584.Lbase2_64_avx_body:585586movq %rdx,%r15587588movq 24(%rdi),%r11589movq 32(%rdi),%r13590591movq 0(%rdi),%r14592movq 8(%rdi),%rbx593movl 16(%rdi),%ebp594595movq %r13,%r12596movq %r13,%rax597shrq $2,%r13598addq %r12,%r13599600testq $31,%rdx601jz .Linit_avx602603addq 0(%rsi),%r14604adcq 8(%rsi),%rbx605leaq 16(%rsi),%rsi606adcq %rcx,%rbp607subq $16,%r15608609call __poly1305_block610611.Linit_avx:612613movq %r14,%rax614movq %r14,%rdx615shrq $52,%r14616movq %rbx,%r8617movq %rbx,%r9618shrq $26,%rdx619andq $0x3ffffff,%rax620shlq $12,%r8621andq $0x3ffffff,%rdx622shrq $14,%rbx623orq %r8,%r14624shlq $24,%rbp625andq $0x3ffffff,%r14626shrq $40,%r9627andq $0x3ffffff,%rbx628orq %r9,%rbp629630vmovd %eax,%xmm0631vmovd %edx,%xmm1632vmovd %r14d,%xmm2633vmovd %ebx,%xmm3634vmovd %ebp,%xmm4635movl $1,20(%rdi)636637call __poly1305_init_avx638639.Lproceed_avx:640movq %r15,%rdx641642movq 0(%rsp),%r15643.cfi_restore %r15644movq 8(%rsp),%r14645.cfi_restore %r14646movq 16(%rsp),%r13647.cfi_restore %r13648movq 24(%rsp),%r12649.cfi_restore %r12650movq 32(%rsp),%rbp651.cfi_restore %rbp652movq 40(%rsp),%rbx653.cfi_restore %rbx654leaq 48(%rsp),%rax655leaq 48(%rsp),%rsp656.cfi_adjust_cfa_offset -48657.Lbase2_64_avx_epilogue:658jmp .Ldo_avx659.cfi_endproc660661.align 32662.Leven_avx:663.cfi_startproc664vmovd 0(%rdi),%xmm0665vmovd 4(%rdi),%xmm1666vmovd 8(%rdi),%xmm2667vmovd 12(%rdi),%xmm3668vmovd 16(%rdi),%xmm4669670.Ldo_avx:671leaq -88(%rsp),%r11672.cfi_def_cfa %r11,0x60673subq $0x178,%rsp674subq $64,%rdx675leaq -32(%rsi),%rax676cmovcq %rax,%rsi677678vmovdqu 48(%rdi),%xmm14679leaq 112(%rdi),%rdi680leaq .Lconst(%rip),%rcx681682683684vmovdqu 32(%rsi),%xmm5685vmovdqu 48(%rsi),%xmm6686vmovdqa 64(%rcx),%xmm15687688vpsrldq $6,%xmm5,%xmm7689vpsrldq $6,%xmm6,%xmm8690vpunpckhqdq %xmm6,%xmm5,%xmm9691vpunpcklqdq %xmm6,%xmm5,%xmm5692vpunpcklqdq %xmm8,%xmm7,%xmm8693694vpsrlq $40,%xmm9,%xmm9695vpsrlq $26,%xmm5,%xmm6696vpand %xmm15,%xmm5,%xmm5697vpsrlq $4,%xmm8,%xmm7698vpand %xmm15,%xmm6,%xmm6699vpsrlq $30,%xmm8,%xmm8700vpand %xmm15,%xmm7,%xmm7701vpand %xmm15,%xmm8,%xmm8702vpor 32(%rcx),%xmm9,%xmm9703704jbe .Lskip_loop_avx705706707vmovdqu -48(%rdi),%xmm11708vmovdqu -32(%rdi),%xmm12709vpshufd $0xEE,%xmm14,%xmm13710vpshufd $0x44,%xmm14,%xmm10711vmovdqa %xmm13,-144(%r11)712vmovdqa %xmm10,0(%rsp)713vpshufd $0xEE,%xmm11,%xmm14714vmovdqu -16(%rdi),%xmm10715vpshufd $0x44,%xmm11,%xmm11716vmovdqa %xmm14,-128(%r11)717vmovdqa %xmm11,16(%rsp)718vpshufd $0xEE,%xmm12,%xmm13719vmovdqu 0(%rdi),%xmm11720vpshufd $0x44,%xmm12,%xmm12721vmovdqa %xmm13,-112(%r11)722vmovdqa %xmm12,32(%rsp)723vpshufd $0xEE,%xmm10,%xmm14724vmovdqu 16(%rdi),%xmm12725vpshufd $0x44,%xmm10,%xmm10726vmovdqa %xmm14,-96(%r11)727vmovdqa %xmm10,48(%rsp)728vpshufd $0xEE,%xmm11,%xmm13729vmovdqu 32(%rdi),%xmm10730vpshufd $0x44,%xmm11,%xmm11731vmovdqa %xmm13,-80(%r11)732vmovdqa %xmm11,64(%rsp)733vpshufd $0xEE,%xmm12,%xmm14734vmovdqu 48(%rdi),%xmm11735vpshufd $0x44,%xmm12,%xmm12736vmovdqa %xmm14,-64(%r11)737vmovdqa %xmm12,80(%rsp)738vpshufd $0xEE,%xmm10,%xmm13739vmovdqu 64(%rdi),%xmm12740vpshufd $0x44,%xmm10,%xmm10741vmovdqa %xmm13,-48(%r11)742vmovdqa %xmm10,96(%rsp)743vpshufd $0xEE,%xmm11,%xmm14744vpshufd $0x44,%xmm11,%xmm11745vmovdqa %xmm14,-32(%r11)746vmovdqa %xmm11,112(%rsp)747vpshufd $0xEE,%xmm12,%xmm13748vmovdqa 0(%rsp),%xmm14749vpshufd $0x44,%xmm12,%xmm12750vmovdqa %xmm13,-16(%r11)751vmovdqa %xmm12,128(%rsp)752753jmp .Loop_avx754755.align 32756.Loop_avx:757758759760761762763764765766767768769770771772773774775776777vpmuludq %xmm5,%xmm14,%xmm10778vpmuludq %xmm6,%xmm14,%xmm11779vmovdqa %xmm2,32(%r11)780vpmuludq %xmm7,%xmm14,%xmm12781vmovdqa 16(%rsp),%xmm2782vpmuludq %xmm8,%xmm14,%xmm13783vpmuludq %xmm9,%xmm14,%xmm14784785vmovdqa %xmm0,0(%r11)786vpmuludq 32(%rsp),%xmm9,%xmm0787vmovdqa %xmm1,16(%r11)788vpmuludq %xmm8,%xmm2,%xmm1789vpaddq %xmm0,%xmm10,%xmm10790vpaddq %xmm1,%xmm14,%xmm14791vmovdqa %xmm3,48(%r11)792vpmuludq %xmm7,%xmm2,%xmm0793vpmuludq %xmm6,%xmm2,%xmm1794vpaddq %xmm0,%xmm13,%xmm13795vmovdqa 48(%rsp),%xmm3796vpaddq %xmm1,%xmm12,%xmm12797vmovdqa %xmm4,64(%r11)798vpmuludq %xmm5,%xmm2,%xmm2799vpmuludq %xmm7,%xmm3,%xmm0800vpaddq %xmm2,%xmm11,%xmm11801802vmovdqa 64(%rsp),%xmm4803vpaddq %xmm0,%xmm14,%xmm14804vpmuludq %xmm6,%xmm3,%xmm1805vpmuludq %xmm5,%xmm3,%xmm3806vpaddq %xmm1,%xmm13,%xmm13807vmovdqa 80(%rsp),%xmm2808vpaddq %xmm3,%xmm12,%xmm12809vpmuludq %xmm9,%xmm4,%xmm0810vpmuludq %xmm8,%xmm4,%xmm4811vpaddq %xmm0,%xmm11,%xmm11812vmovdqa 96(%rsp),%xmm3813vpaddq %xmm4,%xmm10,%xmm10814815vmovdqa 128(%rsp),%xmm4816vpmuludq %xmm6,%xmm2,%xmm1817vpmuludq %xmm5,%xmm2,%xmm2818vpaddq %xmm1,%xmm14,%xmm14819vpaddq %xmm2,%xmm13,%xmm13820vpmuludq %xmm9,%xmm3,%xmm0821vpmuludq %xmm8,%xmm3,%xmm1822vpaddq %xmm0,%xmm12,%xmm12823vmovdqu 0(%rsi),%xmm0824vpaddq %xmm1,%xmm11,%xmm11825vpmuludq %xmm7,%xmm3,%xmm3826vpmuludq %xmm7,%xmm4,%xmm7827vpaddq %xmm3,%xmm10,%xmm10828829vmovdqu 16(%rsi),%xmm1830vpaddq %xmm7,%xmm11,%xmm11831vpmuludq %xmm8,%xmm4,%xmm8832vpmuludq %xmm9,%xmm4,%xmm9833vpsrldq $6,%xmm0,%xmm2834vpaddq %xmm8,%xmm12,%xmm12835vpaddq %xmm9,%xmm13,%xmm13836vpsrldq $6,%xmm1,%xmm3837vpmuludq 112(%rsp),%xmm5,%xmm9838vpmuludq %xmm6,%xmm4,%xmm5839vpunpckhqdq %xmm1,%xmm0,%xmm4840vpaddq %xmm9,%xmm14,%xmm14841vmovdqa -144(%r11),%xmm9842vpaddq %xmm5,%xmm10,%xmm10843844vpunpcklqdq %xmm1,%xmm0,%xmm0845vpunpcklqdq %xmm3,%xmm2,%xmm3846847848vpsrldq $5,%xmm4,%xmm4849vpsrlq $26,%xmm0,%xmm1850vpand %xmm15,%xmm0,%xmm0851vpsrlq $4,%xmm3,%xmm2852vpand %xmm15,%xmm1,%xmm1853vpand 0(%rcx),%xmm4,%xmm4854vpsrlq $30,%xmm3,%xmm3855vpand %xmm15,%xmm2,%xmm2856vpand %xmm15,%xmm3,%xmm3857vpor 32(%rcx),%xmm4,%xmm4858859vpaddq 0(%r11),%xmm0,%xmm0860vpaddq 16(%r11),%xmm1,%xmm1861vpaddq 32(%r11),%xmm2,%xmm2862vpaddq 48(%r11),%xmm3,%xmm3863vpaddq 64(%r11),%xmm4,%xmm4864865leaq 32(%rsi),%rax866leaq 64(%rsi),%rsi867subq $64,%rdx868cmovcq %rax,%rsi869870871872873874875876877878879vpmuludq %xmm0,%xmm9,%xmm5880vpmuludq %xmm1,%xmm9,%xmm6881vpaddq %xmm5,%xmm10,%xmm10882vpaddq %xmm6,%xmm11,%xmm11883vmovdqa -128(%r11),%xmm7884vpmuludq %xmm2,%xmm9,%xmm5885vpmuludq %xmm3,%xmm9,%xmm6886vpaddq %xmm5,%xmm12,%xmm12887vpaddq %xmm6,%xmm13,%xmm13888vpmuludq %xmm4,%xmm9,%xmm9889vpmuludq -112(%r11),%xmm4,%xmm5890vpaddq %xmm9,%xmm14,%xmm14891892vpaddq %xmm5,%xmm10,%xmm10893vpmuludq %xmm2,%xmm7,%xmm6894vpmuludq %xmm3,%xmm7,%xmm5895vpaddq %xmm6,%xmm13,%xmm13896vmovdqa -96(%r11),%xmm8897vpaddq %xmm5,%xmm14,%xmm14898vpmuludq %xmm1,%xmm7,%xmm6899vpmuludq %xmm0,%xmm7,%xmm7900vpaddq %xmm6,%xmm12,%xmm12901vpaddq %xmm7,%xmm11,%xmm11902903vmovdqa -80(%r11),%xmm9904vpmuludq %xmm2,%xmm8,%xmm5905vpmuludq %xmm1,%xmm8,%xmm6906vpaddq %xmm5,%xmm14,%xmm14907vpaddq %xmm6,%xmm13,%xmm13908vmovdqa -64(%r11),%xmm7909vpmuludq %xmm0,%xmm8,%xmm8910vpmuludq %xmm4,%xmm9,%xmm5911vpaddq %xmm8,%xmm12,%xmm12912vpaddq %xmm5,%xmm11,%xmm11913vmovdqa -48(%r11),%xmm8914vpmuludq %xmm3,%xmm9,%xmm9915vpmuludq %xmm1,%xmm7,%xmm6916vpaddq %xmm9,%xmm10,%xmm10917918vmovdqa -16(%r11),%xmm9919vpaddq %xmm6,%xmm14,%xmm14920vpmuludq %xmm0,%xmm7,%xmm7921vpmuludq %xmm4,%xmm8,%xmm5922vpaddq %xmm7,%xmm13,%xmm13923vpaddq %xmm5,%xmm12,%xmm12924vmovdqu 32(%rsi),%xmm5925vpmuludq %xmm3,%xmm8,%xmm7926vpmuludq %xmm2,%xmm8,%xmm8927vpaddq %xmm7,%xmm11,%xmm11928vmovdqu 48(%rsi),%xmm6929vpaddq %xmm8,%xmm10,%xmm10930931vpmuludq %xmm2,%xmm9,%xmm2932vpmuludq %xmm3,%xmm9,%xmm3933vpsrldq $6,%xmm5,%xmm7934vpaddq %xmm2,%xmm11,%xmm11935vpmuludq %xmm4,%xmm9,%xmm4936vpsrldq $6,%xmm6,%xmm8937vpaddq %xmm3,%xmm12,%xmm2938vpaddq %xmm4,%xmm13,%xmm3939vpmuludq -32(%r11),%xmm0,%xmm4940vpmuludq %xmm1,%xmm9,%xmm0941vpunpckhqdq %xmm6,%xmm5,%xmm9942vpaddq %xmm4,%xmm14,%xmm4943vpaddq %xmm0,%xmm10,%xmm0944945vpunpcklqdq %xmm6,%xmm5,%xmm5946vpunpcklqdq %xmm8,%xmm7,%xmm8947948949vpsrldq $5,%xmm9,%xmm9950vpsrlq $26,%xmm5,%xmm6951vmovdqa 0(%rsp),%xmm14952vpand %xmm15,%xmm5,%xmm5953vpsrlq $4,%xmm8,%xmm7954vpand %xmm15,%xmm6,%xmm6955vpand 0(%rcx),%xmm9,%xmm9956vpsrlq $30,%xmm8,%xmm8957vpand %xmm15,%xmm7,%xmm7958vpand %xmm15,%xmm8,%xmm8959vpor 32(%rcx),%xmm9,%xmm9960961962963964965vpsrlq $26,%xmm3,%xmm13966vpand %xmm15,%xmm3,%xmm3967vpaddq %xmm13,%xmm4,%xmm4968969vpsrlq $26,%xmm0,%xmm10970vpand %xmm15,%xmm0,%xmm0971vpaddq %xmm10,%xmm11,%xmm1972973vpsrlq $26,%xmm4,%xmm10974vpand %xmm15,%xmm4,%xmm4975976vpsrlq $26,%xmm1,%xmm11977vpand %xmm15,%xmm1,%xmm1978vpaddq %xmm11,%xmm2,%xmm2979980vpaddq %xmm10,%xmm0,%xmm0981vpsllq $2,%xmm10,%xmm10982vpaddq %xmm10,%xmm0,%xmm0983984vpsrlq $26,%xmm2,%xmm12985vpand %xmm15,%xmm2,%xmm2986vpaddq %xmm12,%xmm3,%xmm3987988vpsrlq $26,%xmm0,%xmm10989vpand %xmm15,%xmm0,%xmm0990vpaddq %xmm10,%xmm1,%xmm1991992vpsrlq $26,%xmm3,%xmm13993vpand %xmm15,%xmm3,%xmm3994vpaddq %xmm13,%xmm4,%xmm4995996ja .Loop_avx997998.Lskip_loop_avx:999100010011002vpshufd $0x10,%xmm14,%xmm141003addq $32,%rdx1004jnz .Long_tail_avx10051006vpaddq %xmm2,%xmm7,%xmm71007vpaddq %xmm0,%xmm5,%xmm51008vpaddq %xmm1,%xmm6,%xmm61009vpaddq %xmm3,%xmm8,%xmm81010vpaddq %xmm4,%xmm9,%xmm910111012.Long_tail_avx:1013vmovdqa %xmm2,32(%r11)1014vmovdqa %xmm0,0(%r11)1015vmovdqa %xmm1,16(%r11)1016vmovdqa %xmm3,48(%r11)1017vmovdqa %xmm4,64(%r11)10181019102010211022102310241025vpmuludq %xmm7,%xmm14,%xmm121026vpmuludq %xmm5,%xmm14,%xmm101027vpshufd $0x10,-48(%rdi),%xmm21028vpmuludq %xmm6,%xmm14,%xmm111029vpmuludq %xmm8,%xmm14,%xmm131030vpmuludq %xmm9,%xmm14,%xmm1410311032vpmuludq %xmm8,%xmm2,%xmm01033vpaddq %xmm0,%xmm14,%xmm141034vpshufd $0x10,-32(%rdi),%xmm31035vpmuludq %xmm7,%xmm2,%xmm11036vpaddq %xmm1,%xmm13,%xmm131037vpshufd $0x10,-16(%rdi),%xmm41038vpmuludq %xmm6,%xmm2,%xmm01039vpaddq %xmm0,%xmm12,%xmm121040vpmuludq %xmm5,%xmm2,%xmm21041vpaddq %xmm2,%xmm11,%xmm111042vpmuludq %xmm9,%xmm3,%xmm31043vpaddq %xmm3,%xmm10,%xmm1010441045vpshufd $0x10,0(%rdi),%xmm21046vpmuludq %xmm7,%xmm4,%xmm11047vpaddq %xmm1,%xmm14,%xmm141048vpmuludq %xmm6,%xmm4,%xmm01049vpaddq %xmm0,%xmm13,%xmm131050vpshufd $0x10,16(%rdi),%xmm31051vpmuludq %xmm5,%xmm4,%xmm41052vpaddq %xmm4,%xmm12,%xmm121053vpmuludq %xmm9,%xmm2,%xmm11054vpaddq %xmm1,%xmm11,%xmm111055vpshufd $0x10,32(%rdi),%xmm41056vpmuludq %xmm8,%xmm2,%xmm21057vpaddq %xmm2,%xmm10,%xmm1010581059vpmuludq %xmm6,%xmm3,%xmm01060vpaddq %xmm0,%xmm14,%xmm141061vpmuludq %xmm5,%xmm3,%xmm31062vpaddq %xmm3,%xmm13,%xmm131063vpshufd $0x10,48(%rdi),%xmm21064vpmuludq %xmm9,%xmm4,%xmm11065vpaddq %xmm1,%xmm12,%xmm121066vpshufd $0x10,64(%rdi),%xmm31067vpmuludq %xmm8,%xmm4,%xmm01068vpaddq %xmm0,%xmm11,%xmm111069vpmuludq %xmm7,%xmm4,%xmm41070vpaddq %xmm4,%xmm10,%xmm1010711072vpmuludq %xmm5,%xmm2,%xmm21073vpaddq %xmm2,%xmm14,%xmm141074vpmuludq %xmm9,%xmm3,%xmm11075vpaddq %xmm1,%xmm13,%xmm131076vpmuludq %xmm8,%xmm3,%xmm01077vpaddq %xmm0,%xmm12,%xmm121078vpmuludq %xmm7,%xmm3,%xmm11079vpaddq %xmm1,%xmm11,%xmm111080vpmuludq %xmm6,%xmm3,%xmm31081vpaddq %xmm3,%xmm10,%xmm1010821083jz .Lshort_tail_avx10841085vmovdqu 0(%rsi),%xmm01086vmovdqu 16(%rsi),%xmm110871088vpsrldq $6,%xmm0,%xmm21089vpsrldq $6,%xmm1,%xmm31090vpunpckhqdq %xmm1,%xmm0,%xmm41091vpunpcklqdq %xmm1,%xmm0,%xmm01092vpunpcklqdq %xmm3,%xmm2,%xmm310931094vpsrlq $40,%xmm4,%xmm41095vpsrlq $26,%xmm0,%xmm11096vpand %xmm15,%xmm0,%xmm01097vpsrlq $4,%xmm3,%xmm21098vpand %xmm15,%xmm1,%xmm11099vpsrlq $30,%xmm3,%xmm31100vpand %xmm15,%xmm2,%xmm21101vpand %xmm15,%xmm3,%xmm31102vpor 32(%rcx),%xmm4,%xmm411031104vpshufd $0x32,-64(%rdi),%xmm91105vpaddq 0(%r11),%xmm0,%xmm01106vpaddq 16(%r11),%xmm1,%xmm11107vpaddq 32(%r11),%xmm2,%xmm21108vpaddq 48(%r11),%xmm3,%xmm31109vpaddq 64(%r11),%xmm4,%xmm411101111111211131114vpmuludq %xmm0,%xmm9,%xmm51115vpaddq %xmm5,%xmm10,%xmm101116vpmuludq %xmm1,%xmm9,%xmm61117vpaddq %xmm6,%xmm11,%xmm111118vpmuludq %xmm2,%xmm9,%xmm51119vpaddq %xmm5,%xmm12,%xmm121120vpshufd $0x32,-48(%rdi),%xmm71121vpmuludq %xmm3,%xmm9,%xmm61122vpaddq %xmm6,%xmm13,%xmm131123vpmuludq %xmm4,%xmm9,%xmm91124vpaddq %xmm9,%xmm14,%xmm1411251126vpmuludq %xmm3,%xmm7,%xmm51127vpaddq %xmm5,%xmm14,%xmm141128vpshufd $0x32,-32(%rdi),%xmm81129vpmuludq %xmm2,%xmm7,%xmm61130vpaddq %xmm6,%xmm13,%xmm131131vpshufd $0x32,-16(%rdi),%xmm91132vpmuludq %xmm1,%xmm7,%xmm51133vpaddq %xmm5,%xmm12,%xmm121134vpmuludq %xmm0,%xmm7,%xmm71135vpaddq %xmm7,%xmm11,%xmm111136vpmuludq %xmm4,%xmm8,%xmm81137vpaddq %xmm8,%xmm10,%xmm1011381139vpshufd $0x32,0(%rdi),%xmm71140vpmuludq %xmm2,%xmm9,%xmm61141vpaddq %xmm6,%xmm14,%xmm141142vpmuludq %xmm1,%xmm9,%xmm51143vpaddq %xmm5,%xmm13,%xmm131144vpshufd $0x32,16(%rdi),%xmm81145vpmuludq %xmm0,%xmm9,%xmm91146vpaddq %xmm9,%xmm12,%xmm121147vpmuludq %xmm4,%xmm7,%xmm61148vpaddq %xmm6,%xmm11,%xmm111149vpshufd $0x32,32(%rdi),%xmm91150vpmuludq %xmm3,%xmm7,%xmm71151vpaddq %xmm7,%xmm10,%xmm1011521153vpmuludq %xmm1,%xmm8,%xmm51154vpaddq %xmm5,%xmm14,%xmm141155vpmuludq %xmm0,%xmm8,%xmm81156vpaddq %xmm8,%xmm13,%xmm131157vpshufd $0x32,48(%rdi),%xmm71158vpmuludq %xmm4,%xmm9,%xmm61159vpaddq %xmm6,%xmm12,%xmm121160vpshufd $0x32,64(%rdi),%xmm81161vpmuludq %xmm3,%xmm9,%xmm51162vpaddq %xmm5,%xmm11,%xmm111163vpmuludq %xmm2,%xmm9,%xmm91164vpaddq %xmm9,%xmm10,%xmm1011651166vpmuludq %xmm0,%xmm7,%xmm71167vpaddq %xmm7,%xmm14,%xmm141168vpmuludq %xmm4,%xmm8,%xmm61169vpaddq %xmm6,%xmm13,%xmm131170vpmuludq %xmm3,%xmm8,%xmm51171vpaddq %xmm5,%xmm12,%xmm121172vpmuludq %xmm2,%xmm8,%xmm61173vpaddq %xmm6,%xmm11,%xmm111174vpmuludq %xmm1,%xmm8,%xmm81175vpaddq %xmm8,%xmm10,%xmm1011761177.Lshort_tail_avx:1178117911801181vpsrldq $8,%xmm14,%xmm91182vpsrldq $8,%xmm13,%xmm81183vpsrldq $8,%xmm11,%xmm61184vpsrldq $8,%xmm10,%xmm51185vpsrldq $8,%xmm12,%xmm71186vpaddq %xmm8,%xmm13,%xmm131187vpaddq %xmm9,%xmm14,%xmm141188vpaddq %xmm5,%xmm10,%xmm101189vpaddq %xmm6,%xmm11,%xmm111190vpaddq %xmm7,%xmm12,%xmm1211911192119311941195vpsrlq $26,%xmm13,%xmm31196vpand %xmm15,%xmm13,%xmm131197vpaddq %xmm3,%xmm14,%xmm1411981199vpsrlq $26,%xmm10,%xmm01200vpand %xmm15,%xmm10,%xmm101201vpaddq %xmm0,%xmm11,%xmm1112021203vpsrlq $26,%xmm14,%xmm41204vpand %xmm15,%xmm14,%xmm1412051206vpsrlq $26,%xmm11,%xmm11207vpand %xmm15,%xmm11,%xmm111208vpaddq %xmm1,%xmm12,%xmm1212091210vpaddq %xmm4,%xmm10,%xmm101211vpsllq $2,%xmm4,%xmm41212vpaddq %xmm4,%xmm10,%xmm1012131214vpsrlq $26,%xmm12,%xmm21215vpand %xmm15,%xmm12,%xmm121216vpaddq %xmm2,%xmm13,%xmm1312171218vpsrlq $26,%xmm10,%xmm01219vpand %xmm15,%xmm10,%xmm101220vpaddq %xmm0,%xmm11,%xmm1112211222vpsrlq $26,%xmm13,%xmm31223vpand %xmm15,%xmm13,%xmm131224vpaddq %xmm3,%xmm14,%xmm1412251226vmovd %xmm10,-112(%rdi)1227vmovd %xmm11,-108(%rdi)1228vmovd %xmm12,-104(%rdi)1229vmovd %xmm13,-100(%rdi)1230vmovd %xmm14,-96(%rdi)1231leaq 88(%r11),%rsp1232.cfi_def_cfa %rsp,81233vzeroupper1234.byte 0xf3,0xc31235.cfi_endproc1236.size poly1305_blocks_avx,.-poly1305_blocks_avx12371238.type poly1305_emit_avx,@function1239.align 321240poly1305_emit_avx:1241.cfi_startproc1242.byte 243,15,30,2501243cmpl $0,20(%rdi)1244je .Lemit12451246movl 0(%rdi),%eax1247movl 4(%rdi),%ecx1248movl 8(%rdi),%r8d1249movl 12(%rdi),%r11d1250movl 16(%rdi),%r10d12511252shlq $26,%rcx1253movq %r8,%r91254shlq $52,%r81255addq %rcx,%rax1256shrq $12,%r91257addq %rax,%r81258adcq $0,%r912591260shlq $14,%r111261movq %r10,%rax1262shrq $24,%r101263addq %r11,%r91264shlq $40,%rax1265addq %rax,%r91266adcq $0,%r1012671268movq %r10,%rax1269movq %r10,%rcx1270andq $3,%r101271shrq $2,%rax1272andq $-4,%rcx1273addq %rcx,%rax1274addq %rax,%r81275adcq $0,%r91276adcq $0,%r1012771278movq %r8,%rax1279addq $5,%r81280movq %r9,%rcx1281adcq $0,%r91282adcq $0,%r101283shrq $2,%r101284cmovnzq %r8,%rax1285cmovnzq %r9,%rcx12861287addq 0(%rdx),%rax1288adcq 8(%rdx),%rcx1289movq %rax,0(%rsi)1290movq %rcx,8(%rsi)12911292.byte 0xf3,0xc31293.cfi_endproc1294.size poly1305_emit_avx,.-poly1305_emit_avx1295.type poly1305_blocks_avx2,@function1296.align 321297poly1305_blocks_avx2:1298.cfi_startproc1299.byte 243,15,30,2501300movl 20(%rdi),%r8d1301cmpq $128,%rdx1302jae .Lblocks_avx21303testl %r8d,%r8d1304jz .Lblocks13051306.Lblocks_avx2:1307andq $-16,%rdx1308jz .Lno_data_avx213091310vzeroupper13111312testl %r8d,%r8d1313jz .Lbase2_64_avx213141315testq $63,%rdx1316jz .Leven_avx213171318pushq %rbx1319.cfi_adjust_cfa_offset 81320.cfi_offset %rbx,-161321pushq %rbp1322.cfi_adjust_cfa_offset 81323.cfi_offset %rbp,-241324pushq %r121325.cfi_adjust_cfa_offset 81326.cfi_offset %r12,-321327pushq %r131328.cfi_adjust_cfa_offset 81329.cfi_offset %r13,-401330pushq %r141331.cfi_adjust_cfa_offset 81332.cfi_offset %r14,-481333pushq %r151334.cfi_adjust_cfa_offset 81335.cfi_offset %r15,-561336.Lblocks_avx2_body:13371338movq %rdx,%r1513391340movq 0(%rdi),%r81341movq 8(%rdi),%r91342movl 16(%rdi),%ebp13431344movq 24(%rdi),%r111345movq 32(%rdi),%r13134613471348movl %r8d,%r14d1349andq $-2147483648,%r81350movq %r9,%r121351movl %r9d,%ebx1352andq $-2147483648,%r913531354shrq $6,%r81355shlq $52,%r121356addq %r8,%r141357shrq $12,%rbx1358shrq $18,%r91359addq %r12,%r141360adcq %r9,%rbx13611362movq %rbp,%r81363shlq $40,%r81364shrq $24,%rbp1365addq %r8,%rbx1366adcq $0,%rbp13671368movq $-4,%r91369movq %rbp,%r81370andq %rbp,%r91371shrq $2,%r81372andq $3,%rbp1373addq %r9,%r81374addq %r8,%r141375adcq $0,%rbx1376adcq $0,%rbp13771378movq %r13,%r121379movq %r13,%rax1380shrq $2,%r131381addq %r12,%r1313821383.Lbase2_26_pre_avx2:1384addq 0(%rsi),%r141385adcq 8(%rsi),%rbx1386leaq 16(%rsi),%rsi1387adcq %rcx,%rbp1388subq $16,%r1513891390call __poly1305_block1391movq %r12,%rax13921393testq $63,%r151394jnz .Lbase2_26_pre_avx213951396testq %rcx,%rcx1397jz .Lstore_base2_64_avx2139813991400movq %r14,%rax1401movq %r14,%rdx1402shrq $52,%r141403movq %rbx,%r111404movq %rbx,%r121405shrq $26,%rdx1406andq $0x3ffffff,%rax1407shlq $12,%r111408andq $0x3ffffff,%rdx1409shrq $14,%rbx1410orq %r11,%r141411shlq $24,%rbp1412andq $0x3ffffff,%r141413shrq $40,%r121414andq $0x3ffffff,%rbx1415orq %r12,%rbp14161417testq %r15,%r151418jz .Lstore_base2_26_avx214191420vmovd %eax,%xmm01421vmovd %edx,%xmm11422vmovd %r14d,%xmm21423vmovd %ebx,%xmm31424vmovd %ebp,%xmm41425jmp .Lproceed_avx214261427.align 321428.Lstore_base2_64_avx2:1429movq %r14,0(%rdi)1430movq %rbx,8(%rdi)1431movq %rbp,16(%rdi)1432jmp .Ldone_avx214331434.align 161435.Lstore_base2_26_avx2:1436movl %eax,0(%rdi)1437movl %edx,4(%rdi)1438movl %r14d,8(%rdi)1439movl %ebx,12(%rdi)1440movl %ebp,16(%rdi)1441.align 161442.Ldone_avx2:1443movq 0(%rsp),%r151444.cfi_restore %r151445movq 8(%rsp),%r141446.cfi_restore %r141447movq 16(%rsp),%r131448.cfi_restore %r131449movq 24(%rsp),%r121450.cfi_restore %r121451movq 32(%rsp),%rbp1452.cfi_restore %rbp1453movq 40(%rsp),%rbx1454.cfi_restore %rbx1455leaq 48(%rsp),%rsp1456.cfi_adjust_cfa_offset -481457.Lno_data_avx2:1458.Lblocks_avx2_epilogue:1459.byte 0xf3,0xc31460.cfi_endproc14611462.align 321463.Lbase2_64_avx2:1464.cfi_startproc1465pushq %rbx1466.cfi_adjust_cfa_offset 81467.cfi_offset %rbx,-161468pushq %rbp1469.cfi_adjust_cfa_offset 81470.cfi_offset %rbp,-241471pushq %r121472.cfi_adjust_cfa_offset 81473.cfi_offset %r12,-321474pushq %r131475.cfi_adjust_cfa_offset 81476.cfi_offset %r13,-401477pushq %r141478.cfi_adjust_cfa_offset 81479.cfi_offset %r14,-481480pushq %r151481.cfi_adjust_cfa_offset 81482.cfi_offset %r15,-561483.Lbase2_64_avx2_body:14841485movq %rdx,%r1514861487movq 24(%rdi),%r111488movq 32(%rdi),%r1314891490movq 0(%rdi),%r141491movq 8(%rdi),%rbx1492movl 16(%rdi),%ebp14931494movq %r13,%r121495movq %r13,%rax1496shrq $2,%r131497addq %r12,%r1314981499testq $63,%rdx1500jz .Linit_avx215011502.Lbase2_64_pre_avx2:1503addq 0(%rsi),%r141504adcq 8(%rsi),%rbx1505leaq 16(%rsi),%rsi1506adcq %rcx,%rbp1507subq $16,%r1515081509call __poly1305_block1510movq %r12,%rax15111512testq $63,%r151513jnz .Lbase2_64_pre_avx215141515.Linit_avx2:15161517movq %r14,%rax1518movq %r14,%rdx1519shrq $52,%r141520movq %rbx,%r81521movq %rbx,%r91522shrq $26,%rdx1523andq $0x3ffffff,%rax1524shlq $12,%r81525andq $0x3ffffff,%rdx1526shrq $14,%rbx1527orq %r8,%r141528shlq $24,%rbp1529andq $0x3ffffff,%r141530shrq $40,%r91531andq $0x3ffffff,%rbx1532orq %r9,%rbp15331534vmovd %eax,%xmm01535vmovd %edx,%xmm11536vmovd %r14d,%xmm21537vmovd %ebx,%xmm31538vmovd %ebp,%xmm41539movl $1,20(%rdi)15401541call __poly1305_init_avx15421543.Lproceed_avx2:1544movq %r15,%rdx1545movl OPENSSL_ia32cap_P+8(%rip),%r10d1546movl $3221291008,%r11d15471548movq 0(%rsp),%r151549.cfi_restore %r151550movq 8(%rsp),%r141551.cfi_restore %r141552movq 16(%rsp),%r131553.cfi_restore %r131554movq 24(%rsp),%r121555.cfi_restore %r121556movq 32(%rsp),%rbp1557.cfi_restore %rbp1558movq 40(%rsp),%rbx1559.cfi_restore %rbx1560leaq 48(%rsp),%rax1561leaq 48(%rsp),%rsp1562.cfi_adjust_cfa_offset -481563.Lbase2_64_avx2_epilogue:1564jmp .Ldo_avx21565.cfi_endproc15661567.align 321568.Leven_avx2:1569.cfi_startproc1570movl OPENSSL_ia32cap_P+8(%rip),%r10d1571vmovd 0(%rdi),%xmm01572vmovd 4(%rdi),%xmm11573vmovd 8(%rdi),%xmm21574vmovd 12(%rdi),%xmm31575vmovd 16(%rdi),%xmm415761577.Ldo_avx2:1578leaq -8(%rsp),%r111579.cfi_def_cfa %r11,161580subq $0x128,%rsp1581leaq .Lconst(%rip),%rcx1582leaq 48+64(%rdi),%rdi1583vmovdqa 96(%rcx),%ymm7158415851586vmovdqu -64(%rdi),%xmm91587andq $-512,%rsp1588vmovdqu -48(%rdi),%xmm101589vmovdqu -32(%rdi),%xmm61590vmovdqu -16(%rdi),%xmm111591vmovdqu 0(%rdi),%xmm121592vmovdqu 16(%rdi),%xmm131593leaq 144(%rsp),%rax1594vmovdqu 32(%rdi),%xmm141595vpermd %ymm9,%ymm7,%ymm91596vmovdqu 48(%rdi),%xmm151597vpermd %ymm10,%ymm7,%ymm101598vmovdqu 64(%rdi),%xmm51599vpermd %ymm6,%ymm7,%ymm61600vmovdqa %ymm9,0(%rsp)1601vpermd %ymm11,%ymm7,%ymm111602vmovdqa %ymm10,32-144(%rax)1603vpermd %ymm12,%ymm7,%ymm121604vmovdqa %ymm6,64-144(%rax)1605vpermd %ymm13,%ymm7,%ymm131606vmovdqa %ymm11,96-144(%rax)1607vpermd %ymm14,%ymm7,%ymm141608vmovdqa %ymm12,128-144(%rax)1609vpermd %ymm15,%ymm7,%ymm151610vmovdqa %ymm13,160-144(%rax)1611vpermd %ymm5,%ymm7,%ymm51612vmovdqa %ymm14,192-144(%rax)1613vmovdqa %ymm15,224-144(%rax)1614vmovdqa %ymm5,256-144(%rax)1615vmovdqa 64(%rcx),%ymm51616161716181619vmovdqu 0(%rsi),%xmm71620vmovdqu 16(%rsi),%xmm81621vinserti128 $1,32(%rsi),%ymm7,%ymm71622vinserti128 $1,48(%rsi),%ymm8,%ymm81623leaq 64(%rsi),%rsi16241625vpsrldq $6,%ymm7,%ymm91626vpsrldq $6,%ymm8,%ymm101627vpunpckhqdq %ymm8,%ymm7,%ymm61628vpunpcklqdq %ymm10,%ymm9,%ymm91629vpunpcklqdq %ymm8,%ymm7,%ymm716301631vpsrlq $30,%ymm9,%ymm101632vpsrlq $4,%ymm9,%ymm91633vpsrlq $26,%ymm7,%ymm81634vpsrlq $40,%ymm6,%ymm61635vpand %ymm5,%ymm9,%ymm91636vpand %ymm5,%ymm7,%ymm71637vpand %ymm5,%ymm8,%ymm81638vpand %ymm5,%ymm10,%ymm101639vpor 32(%rcx),%ymm6,%ymm616401641vpaddq %ymm2,%ymm9,%ymm21642subq $64,%rdx1643jz .Ltail_avx21644jmp .Loop_avx216451646.align 321647.Loop_avx2:164816491650165116521653165416551656vpaddq %ymm0,%ymm7,%ymm01657vmovdqa 0(%rsp),%ymm71658vpaddq %ymm1,%ymm8,%ymm11659vmovdqa 32(%rsp),%ymm81660vpaddq %ymm3,%ymm10,%ymm31661vmovdqa 96(%rsp),%ymm91662vpaddq %ymm4,%ymm6,%ymm41663vmovdqa 48(%rax),%ymm101664vmovdqa 112(%rax),%ymm516651666166716681669167016711672167316741675167616771678167916801681vpmuludq %ymm2,%ymm7,%ymm131682vpmuludq %ymm2,%ymm8,%ymm141683vpmuludq %ymm2,%ymm9,%ymm151684vpmuludq %ymm2,%ymm10,%ymm111685vpmuludq %ymm2,%ymm5,%ymm1216861687vpmuludq %ymm0,%ymm8,%ymm61688vpmuludq %ymm1,%ymm8,%ymm21689vpaddq %ymm6,%ymm12,%ymm121690vpaddq %ymm2,%ymm13,%ymm131691vpmuludq %ymm3,%ymm8,%ymm61692vpmuludq 64(%rsp),%ymm4,%ymm21693vpaddq %ymm6,%ymm15,%ymm151694vpaddq %ymm2,%ymm11,%ymm111695vmovdqa -16(%rax),%ymm816961697vpmuludq %ymm0,%ymm7,%ymm61698vpmuludq %ymm1,%ymm7,%ymm21699vpaddq %ymm6,%ymm11,%ymm111700vpaddq %ymm2,%ymm12,%ymm121701vpmuludq %ymm3,%ymm7,%ymm61702vpmuludq %ymm4,%ymm7,%ymm21703vmovdqu 0(%rsi),%xmm71704vpaddq %ymm6,%ymm14,%ymm141705vpaddq %ymm2,%ymm15,%ymm151706vinserti128 $1,32(%rsi),%ymm7,%ymm717071708vpmuludq %ymm3,%ymm8,%ymm61709vpmuludq %ymm4,%ymm8,%ymm21710vmovdqu 16(%rsi),%xmm81711vpaddq %ymm6,%ymm11,%ymm111712vpaddq %ymm2,%ymm12,%ymm121713vmovdqa 16(%rax),%ymm21714vpmuludq %ymm1,%ymm9,%ymm61715vpmuludq %ymm0,%ymm9,%ymm91716vpaddq %ymm6,%ymm14,%ymm141717vpaddq %ymm9,%ymm13,%ymm131718vinserti128 $1,48(%rsi),%ymm8,%ymm81719leaq 64(%rsi),%rsi17201721vpmuludq %ymm1,%ymm2,%ymm61722vpmuludq %ymm0,%ymm2,%ymm21723vpsrldq $6,%ymm7,%ymm91724vpaddq %ymm6,%ymm15,%ymm151725vpaddq %ymm2,%ymm14,%ymm141726vpmuludq %ymm3,%ymm10,%ymm61727vpmuludq %ymm4,%ymm10,%ymm21728vpsrldq $6,%ymm8,%ymm101729vpaddq %ymm6,%ymm12,%ymm121730vpaddq %ymm2,%ymm13,%ymm131731vpunpckhqdq %ymm8,%ymm7,%ymm617321733vpmuludq %ymm3,%ymm5,%ymm31734vpmuludq %ymm4,%ymm5,%ymm41735vpunpcklqdq %ymm8,%ymm7,%ymm71736vpaddq %ymm3,%ymm13,%ymm21737vpaddq %ymm4,%ymm14,%ymm31738vpunpcklqdq %ymm10,%ymm9,%ymm101739vpmuludq 80(%rax),%ymm0,%ymm41740vpmuludq %ymm1,%ymm5,%ymm01741vmovdqa 64(%rcx),%ymm51742vpaddq %ymm4,%ymm15,%ymm41743vpaddq %ymm0,%ymm11,%ymm017441745174617471748vpsrlq $26,%ymm3,%ymm141749vpand %ymm5,%ymm3,%ymm31750vpaddq %ymm14,%ymm4,%ymm417511752vpsrlq $26,%ymm0,%ymm111753vpand %ymm5,%ymm0,%ymm01754vpaddq %ymm11,%ymm12,%ymm117551756vpsrlq $26,%ymm4,%ymm151757vpand %ymm5,%ymm4,%ymm417581759vpsrlq $4,%ymm10,%ymm917601761vpsrlq $26,%ymm1,%ymm121762vpand %ymm5,%ymm1,%ymm11763vpaddq %ymm12,%ymm2,%ymm217641765vpaddq %ymm15,%ymm0,%ymm01766vpsllq $2,%ymm15,%ymm151767vpaddq %ymm15,%ymm0,%ymm017681769vpand %ymm5,%ymm9,%ymm91770vpsrlq $26,%ymm7,%ymm817711772vpsrlq $26,%ymm2,%ymm131773vpand %ymm5,%ymm2,%ymm21774vpaddq %ymm13,%ymm3,%ymm317751776vpaddq %ymm9,%ymm2,%ymm21777vpsrlq $30,%ymm10,%ymm1017781779vpsrlq $26,%ymm0,%ymm111780vpand %ymm5,%ymm0,%ymm01781vpaddq %ymm11,%ymm1,%ymm117821783vpsrlq $40,%ymm6,%ymm617841785vpsrlq $26,%ymm3,%ymm141786vpand %ymm5,%ymm3,%ymm31787vpaddq %ymm14,%ymm4,%ymm417881789vpand %ymm5,%ymm7,%ymm71790vpand %ymm5,%ymm8,%ymm81791vpand %ymm5,%ymm10,%ymm101792vpor 32(%rcx),%ymm6,%ymm617931794subq $64,%rdx1795jnz .Loop_avx217961797.byte 0x66,0x901798.Ltail_avx2:17991800180118021803180418051806vpaddq %ymm0,%ymm7,%ymm01807vmovdqu 4(%rsp),%ymm71808vpaddq %ymm1,%ymm8,%ymm11809vmovdqu 36(%rsp),%ymm81810vpaddq %ymm3,%ymm10,%ymm31811vmovdqu 100(%rsp),%ymm91812vpaddq %ymm4,%ymm6,%ymm41813vmovdqu 52(%rax),%ymm101814vmovdqu 116(%rax),%ymm518151816vpmuludq %ymm2,%ymm7,%ymm131817vpmuludq %ymm2,%ymm8,%ymm141818vpmuludq %ymm2,%ymm9,%ymm151819vpmuludq %ymm2,%ymm10,%ymm111820vpmuludq %ymm2,%ymm5,%ymm1218211822vpmuludq %ymm0,%ymm8,%ymm61823vpmuludq %ymm1,%ymm8,%ymm21824vpaddq %ymm6,%ymm12,%ymm121825vpaddq %ymm2,%ymm13,%ymm131826vpmuludq %ymm3,%ymm8,%ymm61827vpmuludq 68(%rsp),%ymm4,%ymm21828vpaddq %ymm6,%ymm15,%ymm151829vpaddq %ymm2,%ymm11,%ymm1118301831vpmuludq %ymm0,%ymm7,%ymm61832vpmuludq %ymm1,%ymm7,%ymm21833vpaddq %ymm6,%ymm11,%ymm111834vmovdqu -12(%rax),%ymm81835vpaddq %ymm2,%ymm12,%ymm121836vpmuludq %ymm3,%ymm7,%ymm61837vpmuludq %ymm4,%ymm7,%ymm21838vpaddq %ymm6,%ymm14,%ymm141839vpaddq %ymm2,%ymm15,%ymm1518401841vpmuludq %ymm3,%ymm8,%ymm61842vpmuludq %ymm4,%ymm8,%ymm21843vpaddq %ymm6,%ymm11,%ymm111844vpaddq %ymm2,%ymm12,%ymm121845vmovdqu 20(%rax),%ymm21846vpmuludq %ymm1,%ymm9,%ymm61847vpmuludq %ymm0,%ymm9,%ymm91848vpaddq %ymm6,%ymm14,%ymm141849vpaddq %ymm9,%ymm13,%ymm1318501851vpmuludq %ymm1,%ymm2,%ymm61852vpmuludq %ymm0,%ymm2,%ymm21853vpaddq %ymm6,%ymm15,%ymm151854vpaddq %ymm2,%ymm14,%ymm141855vpmuludq %ymm3,%ymm10,%ymm61856vpmuludq %ymm4,%ymm10,%ymm21857vpaddq %ymm6,%ymm12,%ymm121858vpaddq %ymm2,%ymm13,%ymm1318591860vpmuludq %ymm3,%ymm5,%ymm31861vpmuludq %ymm4,%ymm5,%ymm41862vpaddq %ymm3,%ymm13,%ymm21863vpaddq %ymm4,%ymm14,%ymm31864vpmuludq 84(%rax),%ymm0,%ymm41865vpmuludq %ymm1,%ymm5,%ymm01866vmovdqa 64(%rcx),%ymm51867vpaddq %ymm4,%ymm15,%ymm41868vpaddq %ymm0,%ymm11,%ymm018691870187118721873vpsrldq $8,%ymm12,%ymm81874vpsrldq $8,%ymm2,%ymm91875vpsrldq $8,%ymm3,%ymm101876vpsrldq $8,%ymm4,%ymm61877vpsrldq $8,%ymm0,%ymm71878vpaddq %ymm8,%ymm12,%ymm121879vpaddq %ymm9,%ymm2,%ymm21880vpaddq %ymm10,%ymm3,%ymm31881vpaddq %ymm6,%ymm4,%ymm41882vpaddq %ymm7,%ymm0,%ymm018831884vpermq $0x2,%ymm3,%ymm101885vpermq $0x2,%ymm4,%ymm61886vpermq $0x2,%ymm0,%ymm71887vpermq $0x2,%ymm12,%ymm81888vpermq $0x2,%ymm2,%ymm91889vpaddq %ymm10,%ymm3,%ymm31890vpaddq %ymm6,%ymm4,%ymm41891vpaddq %ymm7,%ymm0,%ymm01892vpaddq %ymm8,%ymm12,%ymm121893vpaddq %ymm9,%ymm2,%ymm218941895189618971898vpsrlq $26,%ymm3,%ymm141899vpand %ymm5,%ymm3,%ymm31900vpaddq %ymm14,%ymm4,%ymm419011902vpsrlq $26,%ymm0,%ymm111903vpand %ymm5,%ymm0,%ymm01904vpaddq %ymm11,%ymm12,%ymm119051906vpsrlq $26,%ymm4,%ymm151907vpand %ymm5,%ymm4,%ymm419081909vpsrlq $26,%ymm1,%ymm121910vpand %ymm5,%ymm1,%ymm11911vpaddq %ymm12,%ymm2,%ymm219121913vpaddq %ymm15,%ymm0,%ymm01914vpsllq $2,%ymm15,%ymm151915vpaddq %ymm15,%ymm0,%ymm019161917vpsrlq $26,%ymm2,%ymm131918vpand %ymm5,%ymm2,%ymm21919vpaddq %ymm13,%ymm3,%ymm319201921vpsrlq $26,%ymm0,%ymm111922vpand %ymm5,%ymm0,%ymm01923vpaddq %ymm11,%ymm1,%ymm119241925vpsrlq $26,%ymm3,%ymm141926vpand %ymm5,%ymm3,%ymm31927vpaddq %ymm14,%ymm4,%ymm419281929vmovd %xmm0,-112(%rdi)1930vmovd %xmm1,-108(%rdi)1931vmovd %xmm2,-104(%rdi)1932vmovd %xmm3,-100(%rdi)1933vmovd %xmm4,-96(%rdi)1934leaq 8(%r11),%rsp1935.cfi_def_cfa %rsp,81936vzeroupper1937.byte 0xf3,0xc31938.cfi_endproc1939.size poly1305_blocks_avx2,.-poly1305_blocks_avx21940.section .rodata1941.align 641942.Lconst:1943.Lmask24:1944.long 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,01945.L129:1946.long 16777216,0,16777216,0,16777216,0,16777216,01947.Lmask26:1948.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,01949.Lpermd_avx2:1950.long 2,2,2,3,2,0,2,11951.Lpermd_avx512:1952.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,719531954.L2_44_inp_permd:1955.long 0,1,1,2,2,3,7,71956.L2_44_inp_shift:1957.quad 0,12,24,641958.L2_44_mask:1959.quad 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff1960.L2_44_shift_rgt:1961.quad 44,44,42,641962.L2_44_shift_lft:1963.quad 8,8,10,6419641965.align 641966.Lx_mask44:1967.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff1968.quad 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff1969.Lx_mask42:1970.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff1971.quad 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff1972.previous1973.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,01974.align 161975.globl xor128_encrypt_n_pad1976.type xor128_encrypt_n_pad,@function1977.align 161978xor128_encrypt_n_pad:1979.cfi_startproc1980subq %rdx,%rsi1981subq %rdx,%rdi1982movq %rcx,%r101983shrq $4,%rcx1984jz .Ltail_enc1985nop1986.Loop_enc_xmm:1987movdqu (%rsi,%rdx,1),%xmm01988pxor (%rdx),%xmm01989movdqu %xmm0,(%rdi,%rdx,1)1990movdqa %xmm0,(%rdx)1991leaq 16(%rdx),%rdx1992decq %rcx1993jnz .Loop_enc_xmm19941995andq $15,%r101996jz .Ldone_enc19971998.Ltail_enc:1999movq $16,%rcx2000subq %r10,%rcx2001xorl %eax,%eax2002.Loop_enc_byte:2003movb (%rsi,%rdx,1),%al2004xorb (%rdx),%al2005movb %al,(%rdi,%rdx,1)2006movb %al,(%rdx)2007leaq 1(%rdx),%rdx2008decq %r102009jnz .Loop_enc_byte20102011xorl %eax,%eax2012.Loop_enc_pad:2013movb %al,(%rdx)2014leaq 1(%rdx),%rdx2015decq %rcx2016jnz .Loop_enc_pad20172018.Ldone_enc:2019movq %rdx,%rax2020.byte 0xf3,0xc32021.cfi_endproc2022.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad20232024.globl xor128_decrypt_n_pad2025.type xor128_decrypt_n_pad,@function2026.align 162027xor128_decrypt_n_pad:2028.cfi_startproc2029subq %rdx,%rsi2030subq %rdx,%rdi2031movq %rcx,%r102032shrq $4,%rcx2033jz .Ltail_dec2034nop2035.Loop_dec_xmm:2036movdqu (%rsi,%rdx,1),%xmm02037movdqa (%rdx),%xmm12038pxor %xmm0,%xmm12039movdqu %xmm1,(%rdi,%rdx,1)2040movdqa %xmm0,(%rdx)2041leaq 16(%rdx),%rdx2042decq %rcx2043jnz .Loop_dec_xmm20442045pxor %xmm1,%xmm12046andq $15,%r102047jz .Ldone_dec20482049.Ltail_dec:2050movq $16,%rcx2051subq %r10,%rcx2052xorl %eax,%eax2053xorq %r11,%r112054.Loop_dec_byte:2055movb (%rsi,%rdx,1),%r11b2056movb (%rdx),%al2057xorb %r11b,%al2058movb %al,(%rdi,%rdx,1)2059movb %r11b,(%rdx)2060leaq 1(%rdx),%rdx2061decq %r102062jnz .Loop_dec_byte20632064xorl %eax,%eax2065.Loop_dec_pad:2066movb %al,(%rdx)2067leaq 1(%rdx),%rdx2068decq %rcx2069jnz .Loop_dec_pad20702071.Ldone_dec:2072movq %rdx,%rax2073.byte 0xf3,0xc32074.cfi_endproc2075.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad2076.section ".note.gnu.property", "a"2077.p2align 32078.long 1f - 0f2079.long 4f - 1f2080.long 520810:2082# "GNU" encoded with .byte, since .asciz isn't supported2083# on Solaris.2084.byte 0x472085.byte 0x4e2086.byte 0x552087.byte 020881:2089.p2align 32090.long 0xc00000022091.long 3f - 2f20922:2093.long 320943:2095.p2align 320964:209720982099