Path: blob/main/sys/crypto/openssl/amd64/chacha-x86_64.S
39482 views
/* Do not modify. This file is auto-generated from chacha-x86_64.pl. */1.text2345.section .rodata6.align 647.Lzero:8.long 0,0,0,09.Lone:10.long 1,0,0,011.Linc:12.long 0,1,2,313.Lfour:14.long 4,4,4,415.Lincy:16.long 0,2,4,6,1,3,5,717.Leight:18.long 8,8,8,8,8,8,8,819.Lrot16:20.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd21.Lrot24:22.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe23.Ltwoy:24.long 2,0,0,0, 2,0,0,025.align 6426.Lzeroz:27.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,028.Lfourz:29.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,030.Lincz:31.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,1532.Lsixteen:33.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,1634.Lsigma:35.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,036.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,037.previous38.globl ChaCha20_ctr3239.type ChaCha20_ctr32,@function40.align 6441ChaCha20_ctr32:42.cfi_startproc43cmpq $0,%rdx44je .Lno_data45movq OPENSSL_ia32cap_P+4(%rip),%r1046testl $512,%r10d47jnz .LChaCha20_ssse34849pushq %rbx50.cfi_adjust_cfa_offset 851.cfi_offset %rbx,-1652pushq %rbp53.cfi_adjust_cfa_offset 854.cfi_offset %rbp,-2455pushq %r1256.cfi_adjust_cfa_offset 857.cfi_offset %r12,-3258pushq %r1359.cfi_adjust_cfa_offset 860.cfi_offset %r13,-4061pushq %r1462.cfi_adjust_cfa_offset 863.cfi_offset %r14,-4864pushq %r1565.cfi_adjust_cfa_offset 866.cfi_offset %r15,-5667subq $64+24,%rsp68.cfi_adjust_cfa_offset 64+2469.Lctr32_body:707172movdqu (%rcx),%xmm173movdqu 16(%rcx),%xmm274movdqu (%r8),%xmm375movdqa .Lone(%rip),%xmm4767778movdqa %xmm1,16(%rsp)79movdqa %xmm2,32(%rsp)80movdqa %xmm3,48(%rsp)81movq %rdx,%rbp82jmp .Loop_outer8384.align 3285.Loop_outer:86movl $0x61707865,%eax87movl $0x3320646e,%ebx88movl $0x79622d32,%ecx89movl $0x6b206574,%edx90movl 16(%rsp),%r8d91movl 20(%rsp),%r9d92movl 24(%rsp),%r10d93movl 28(%rsp),%r11d94movd %xmm3,%r12d95movl 52(%rsp),%r13d96movl 56(%rsp),%r14d97movl 60(%rsp),%r15d9899movq %rbp,64+0(%rsp)100movl $10,%ebp101movq %rsi,64+8(%rsp)102.byte 102,72,15,126,214103movq %rdi,64+16(%rsp)104movq %rsi,%rdi105shrq $32,%rdi106jmp .Loop107108.align 32109.Loop:110addl %r8d,%eax111xorl %eax,%r12d112roll $16,%r12d113addl %r9d,%ebx114xorl %ebx,%r13d115roll $16,%r13d116addl %r12d,%esi117xorl %esi,%r8d118roll $12,%r8d119addl %r13d,%edi120xorl %edi,%r9d121roll $12,%r9d122addl %r8d,%eax123xorl %eax,%r12d124roll $8,%r12d125addl %r9d,%ebx126xorl %ebx,%r13d127roll $8,%r13d128addl %r12d,%esi129xorl %esi,%r8d130roll $7,%r8d131addl %r13d,%edi132xorl %edi,%r9d133roll $7,%r9d134movl %esi,32(%rsp)135movl %edi,36(%rsp)136movl 40(%rsp),%esi137movl 44(%rsp),%edi138addl %r10d,%ecx139xorl %ecx,%r14d140roll $16,%r14d141addl %r11d,%edx142xorl %edx,%r15d143roll $16,%r15d144addl %r14d,%esi145xorl %esi,%r10d146roll $12,%r10d147addl %r15d,%edi148xorl %edi,%r11d149roll $12,%r11d150addl %r10d,%ecx151xorl %ecx,%r14d152roll $8,%r14d153addl %r11d,%edx154xorl %edx,%r15d155roll $8,%r15d156addl %r14d,%esi157xorl %esi,%r10d158roll $7,%r10d159addl %r15d,%edi160xorl %edi,%r11d161roll $7,%r11d162addl %r9d,%eax163xorl %eax,%r15d164roll $16,%r15d165addl %r10d,%ebx166xorl %ebx,%r12d167roll $16,%r12d168addl %r15d,%esi169xorl %esi,%r9d170roll $12,%r9d171addl %r12d,%edi172xorl %edi,%r10d173roll $12,%r10d174addl %r9d,%eax175xorl %eax,%r15d176roll $8,%r15d177addl %r10d,%ebx178xorl %ebx,%r12d179roll $8,%r12d180addl %r15d,%esi181xorl %esi,%r9d182roll $7,%r9d183addl %r12d,%edi184xorl %edi,%r10d185roll $7,%r10d186movl %esi,40(%rsp)187movl %edi,44(%rsp)188movl 32(%rsp),%esi189movl 36(%rsp),%edi190addl %r11d,%ecx191xorl %ecx,%r13d192roll $16,%r13d193addl %r8d,%edx194xorl %edx,%r14d195roll $16,%r14d196addl %r13d,%esi197xorl %esi,%r11d198roll $12,%r11d199addl %r14d,%edi200xorl %edi,%r8d201roll $12,%r8d202addl %r11d,%ecx203xorl %ecx,%r13d204roll $8,%r13d205addl %r8d,%edx206xorl %edx,%r14d207roll $8,%r14d208addl %r13d,%esi209xorl %esi,%r11d210roll $7,%r11d211addl %r14d,%edi212xorl %edi,%r8d213roll $7,%r8d214decl %ebp215jnz .Loop216movl %edi,36(%rsp)217movl %esi,32(%rsp)218movq 64(%rsp),%rbp219movdqa %xmm2,%xmm1220movq 64+8(%rsp),%rsi221paddd %xmm4,%xmm3222movq 64+16(%rsp),%rdi223224addl $0x61707865,%eax225addl $0x3320646e,%ebx226addl $0x79622d32,%ecx227addl $0x6b206574,%edx228addl 16(%rsp),%r8d229addl 20(%rsp),%r9d230addl 24(%rsp),%r10d231addl 28(%rsp),%r11d232addl 48(%rsp),%r12d233addl 52(%rsp),%r13d234addl 56(%rsp),%r14d235addl 60(%rsp),%r15d236paddd 32(%rsp),%xmm1237238cmpq $64,%rbp239jb .Ltail240241xorl 0(%rsi),%eax242xorl 4(%rsi),%ebx243xorl 8(%rsi),%ecx244xorl 12(%rsi),%edx245xorl 16(%rsi),%r8d246xorl 20(%rsi),%r9d247xorl 24(%rsi),%r10d248xorl 28(%rsi),%r11d249movdqu 32(%rsi),%xmm0250xorl 48(%rsi),%r12d251xorl 52(%rsi),%r13d252xorl 56(%rsi),%r14d253xorl 60(%rsi),%r15d254leaq 64(%rsi),%rsi255pxor %xmm1,%xmm0256257movdqa %xmm2,32(%rsp)258movd %xmm3,48(%rsp)259260movl %eax,0(%rdi)261movl %ebx,4(%rdi)262movl %ecx,8(%rdi)263movl %edx,12(%rdi)264movl %r8d,16(%rdi)265movl %r9d,20(%rdi)266movl %r10d,24(%rdi)267movl %r11d,28(%rdi)268movdqu %xmm0,32(%rdi)269movl %r12d,48(%rdi)270movl %r13d,52(%rdi)271movl %r14d,56(%rdi)272movl %r15d,60(%rdi)273leaq 64(%rdi),%rdi274275subq $64,%rbp276jnz .Loop_outer277278jmp .Ldone279280.align 16281.Ltail:282movl %eax,0(%rsp)283movl %ebx,4(%rsp)284xorq %rbx,%rbx285movl %ecx,8(%rsp)286movl %edx,12(%rsp)287movl %r8d,16(%rsp)288movl %r9d,20(%rsp)289movl %r10d,24(%rsp)290movl %r11d,28(%rsp)291movdqa %xmm1,32(%rsp)292movl %r12d,48(%rsp)293movl %r13d,52(%rsp)294movl %r14d,56(%rsp)295movl %r15d,60(%rsp)296297.Loop_tail:298movzbl (%rsi,%rbx,1),%eax299movzbl (%rsp,%rbx,1),%edx300leaq 1(%rbx),%rbx301xorl %edx,%eax302movb %al,-1(%rdi,%rbx,1)303decq %rbp304jnz .Loop_tail305306.Ldone:307leaq 64+24+48(%rsp),%rsi308.cfi_def_cfa %rsi,8309movq -48(%rsi),%r15310.cfi_restore %r15311movq -40(%rsi),%r14312.cfi_restore %r14313movq -32(%rsi),%r13314.cfi_restore %r13315movq -24(%rsi),%r12316.cfi_restore %r12317movq -16(%rsi),%rbp318.cfi_restore %rbp319movq -8(%rsi),%rbx320.cfi_restore %rbx321leaq (%rsi),%rsp322.cfi_def_cfa_register %rsp323.Lno_data:324.byte 0xf3,0xc3325.cfi_endproc326.size ChaCha20_ctr32,.-ChaCha20_ctr32327.type ChaCha20_ssse3,@function328.align 32329ChaCha20_ssse3:330.cfi_startproc331.LChaCha20_ssse3:332movq %rsp,%r9333.cfi_def_cfa_register %r9334testl $2048,%r10d335jnz .LChaCha20_4xop336cmpq $128,%rdx337je .LChaCha20_128338ja .LChaCha20_4x339340.Ldo_sse3_after_all:341subq $64+8,%rsp342movdqa .Lsigma(%rip),%xmm0343movdqu (%rcx),%xmm1344movdqu 16(%rcx),%xmm2345movdqu (%r8),%xmm3346movdqa .Lrot16(%rip),%xmm6347movdqa .Lrot24(%rip),%xmm7348349movdqa %xmm0,0(%rsp)350movdqa %xmm1,16(%rsp)351movdqa %xmm2,32(%rsp)352movdqa %xmm3,48(%rsp)353movq $10,%r8354jmp .Loop_ssse3355356.align 32357.Loop_outer_ssse3:358movdqa .Lone(%rip),%xmm3359movdqa 0(%rsp),%xmm0360movdqa 16(%rsp),%xmm1361movdqa 32(%rsp),%xmm2362paddd 48(%rsp),%xmm3363movq $10,%r8364movdqa %xmm3,48(%rsp)365jmp .Loop_ssse3366367.align 32368.Loop_ssse3:369paddd %xmm1,%xmm0370pxor %xmm0,%xmm3371.byte 102,15,56,0,222372paddd %xmm3,%xmm2373pxor %xmm2,%xmm1374movdqa %xmm1,%xmm4375psrld $20,%xmm1376pslld $12,%xmm4377por %xmm4,%xmm1378paddd %xmm1,%xmm0379pxor %xmm0,%xmm3380.byte 102,15,56,0,223381paddd %xmm3,%xmm2382pxor %xmm2,%xmm1383movdqa %xmm1,%xmm4384psrld $25,%xmm1385pslld $7,%xmm4386por %xmm4,%xmm1387pshufd $78,%xmm2,%xmm2388pshufd $57,%xmm1,%xmm1389pshufd $147,%xmm3,%xmm3390nop391paddd %xmm1,%xmm0392pxor %xmm0,%xmm3393.byte 102,15,56,0,222394paddd %xmm3,%xmm2395pxor %xmm2,%xmm1396movdqa %xmm1,%xmm4397psrld $20,%xmm1398pslld $12,%xmm4399por %xmm4,%xmm1400paddd %xmm1,%xmm0401pxor %xmm0,%xmm3402.byte 102,15,56,0,223403paddd %xmm3,%xmm2404pxor %xmm2,%xmm1405movdqa %xmm1,%xmm4406psrld $25,%xmm1407pslld $7,%xmm4408por %xmm4,%xmm1409pshufd $78,%xmm2,%xmm2410pshufd $147,%xmm1,%xmm1411pshufd $57,%xmm3,%xmm3412decq %r8413jnz .Loop_ssse3414paddd 0(%rsp),%xmm0415paddd 16(%rsp),%xmm1416paddd 32(%rsp),%xmm2417paddd 48(%rsp),%xmm3418419cmpq $64,%rdx420jb .Ltail_ssse3421422movdqu 0(%rsi),%xmm4423movdqu 16(%rsi),%xmm5424pxor %xmm4,%xmm0425movdqu 32(%rsi),%xmm4426pxor %xmm5,%xmm1427movdqu 48(%rsi),%xmm5428leaq 64(%rsi),%rsi429pxor %xmm4,%xmm2430pxor %xmm5,%xmm3431432movdqu %xmm0,0(%rdi)433movdqu %xmm1,16(%rdi)434movdqu %xmm2,32(%rdi)435movdqu %xmm3,48(%rdi)436leaq 64(%rdi),%rdi437438subq $64,%rdx439jnz .Loop_outer_ssse3440441jmp .Ldone_ssse3442443.align 16444.Ltail_ssse3:445movdqa %xmm0,0(%rsp)446movdqa %xmm1,16(%rsp)447movdqa %xmm2,32(%rsp)448movdqa %xmm3,48(%rsp)449xorq %r8,%r8450451.Loop_tail_ssse3:452movzbl (%rsi,%r8,1),%eax453movzbl (%rsp,%r8,1),%ecx454leaq 1(%r8),%r8455xorl %ecx,%eax456movb %al,-1(%rdi,%r8,1)457decq %rdx458jnz .Loop_tail_ssse3459460.Ldone_ssse3:461leaq (%r9),%rsp462.cfi_def_cfa_register %rsp463.Lssse3_epilogue:464.byte 0xf3,0xc3465.cfi_endproc466.size ChaCha20_ssse3,.-ChaCha20_ssse3467.type ChaCha20_128,@function468.align 32469ChaCha20_128:470.cfi_startproc471.LChaCha20_128:472movq %rsp,%r9473.cfi_def_cfa_register %r9474subq $64+8,%rsp475movdqa .Lsigma(%rip),%xmm8476movdqu (%rcx),%xmm9477movdqu 16(%rcx),%xmm2478movdqu (%r8),%xmm3479movdqa .Lone(%rip),%xmm1480movdqa .Lrot16(%rip),%xmm6481movdqa .Lrot24(%rip),%xmm7482483movdqa %xmm8,%xmm10484movdqa %xmm8,0(%rsp)485movdqa %xmm9,%xmm11486movdqa %xmm9,16(%rsp)487movdqa %xmm2,%xmm0488movdqa %xmm2,32(%rsp)489paddd %xmm3,%xmm1490movdqa %xmm3,48(%rsp)491movq $10,%r8492jmp .Loop_128493494.align 32495.Loop_128:496paddd %xmm9,%xmm8497pxor %xmm8,%xmm3498paddd %xmm11,%xmm10499pxor %xmm10,%xmm1500.byte 102,15,56,0,222501.byte 102,15,56,0,206502paddd %xmm3,%xmm2503paddd %xmm1,%xmm0504pxor %xmm2,%xmm9505pxor %xmm0,%xmm11506movdqa %xmm9,%xmm4507psrld $20,%xmm9508movdqa %xmm11,%xmm5509pslld $12,%xmm4510psrld $20,%xmm11511por %xmm4,%xmm9512pslld $12,%xmm5513por %xmm5,%xmm11514paddd %xmm9,%xmm8515pxor %xmm8,%xmm3516paddd %xmm11,%xmm10517pxor %xmm10,%xmm1518.byte 102,15,56,0,223519.byte 102,15,56,0,207520paddd %xmm3,%xmm2521paddd %xmm1,%xmm0522pxor %xmm2,%xmm9523pxor %xmm0,%xmm11524movdqa %xmm9,%xmm4525psrld $25,%xmm9526movdqa %xmm11,%xmm5527pslld $7,%xmm4528psrld $25,%xmm11529por %xmm4,%xmm9530pslld $7,%xmm5531por %xmm5,%xmm11532pshufd $78,%xmm2,%xmm2533pshufd $57,%xmm9,%xmm9534pshufd $147,%xmm3,%xmm3535pshufd $78,%xmm0,%xmm0536pshufd $57,%xmm11,%xmm11537pshufd $147,%xmm1,%xmm1538paddd %xmm9,%xmm8539pxor %xmm8,%xmm3540paddd %xmm11,%xmm10541pxor %xmm10,%xmm1542.byte 102,15,56,0,222543.byte 102,15,56,0,206544paddd %xmm3,%xmm2545paddd %xmm1,%xmm0546pxor %xmm2,%xmm9547pxor %xmm0,%xmm11548movdqa %xmm9,%xmm4549psrld $20,%xmm9550movdqa %xmm11,%xmm5551pslld $12,%xmm4552psrld $20,%xmm11553por %xmm4,%xmm9554pslld $12,%xmm5555por %xmm5,%xmm11556paddd %xmm9,%xmm8557pxor %xmm8,%xmm3558paddd %xmm11,%xmm10559pxor %xmm10,%xmm1560.byte 102,15,56,0,223561.byte 102,15,56,0,207562paddd %xmm3,%xmm2563paddd %xmm1,%xmm0564pxor %xmm2,%xmm9565pxor %xmm0,%xmm11566movdqa %xmm9,%xmm4567psrld $25,%xmm9568movdqa %xmm11,%xmm5569pslld $7,%xmm4570psrld $25,%xmm11571por %xmm4,%xmm9572pslld $7,%xmm5573por %xmm5,%xmm11574pshufd $78,%xmm2,%xmm2575pshufd $147,%xmm9,%xmm9576pshufd $57,%xmm3,%xmm3577pshufd $78,%xmm0,%xmm0578pshufd $147,%xmm11,%xmm11579pshufd $57,%xmm1,%xmm1580decq %r8581jnz .Loop_128582paddd 0(%rsp),%xmm8583paddd 16(%rsp),%xmm9584paddd 32(%rsp),%xmm2585paddd 48(%rsp),%xmm3586paddd .Lone(%rip),%xmm1587paddd 0(%rsp),%xmm10588paddd 16(%rsp),%xmm11589paddd 32(%rsp),%xmm0590paddd 48(%rsp),%xmm1591592movdqu 0(%rsi),%xmm4593movdqu 16(%rsi),%xmm5594pxor %xmm4,%xmm8595movdqu 32(%rsi),%xmm4596pxor %xmm5,%xmm9597movdqu 48(%rsi),%xmm5598pxor %xmm4,%xmm2599movdqu 64(%rsi),%xmm4600pxor %xmm5,%xmm3601movdqu 80(%rsi),%xmm5602pxor %xmm4,%xmm10603movdqu 96(%rsi),%xmm4604pxor %xmm5,%xmm11605movdqu 112(%rsi),%xmm5606pxor %xmm4,%xmm0607pxor %xmm5,%xmm1608609movdqu %xmm8,0(%rdi)610movdqu %xmm9,16(%rdi)611movdqu %xmm2,32(%rdi)612movdqu %xmm3,48(%rdi)613movdqu %xmm10,64(%rdi)614movdqu %xmm11,80(%rdi)615movdqu %xmm0,96(%rdi)616movdqu %xmm1,112(%rdi)617leaq (%r9),%rsp618.cfi_def_cfa_register %rsp619.L128_epilogue:620.byte 0xf3,0xc3621.cfi_endproc622.size ChaCha20_128,.-ChaCha20_128623.type ChaCha20_4x,@function624.align 32625ChaCha20_4x:626.cfi_startproc627.LChaCha20_4x:628movq %rsp,%r9629.cfi_def_cfa_register %r9630movq %r10,%r11631shrq $32,%r10632testq $32,%r10633jnz .LChaCha20_8x634cmpq $192,%rdx635ja .Lproceed4x636637andq $71303168,%r11638cmpq $4194304,%r11639je .Ldo_sse3_after_all640641.Lproceed4x:642subq $0x140+8,%rsp643movdqa .Lsigma(%rip),%xmm11644movdqu (%rcx),%xmm15645movdqu 16(%rcx),%xmm7646movdqu (%r8),%xmm3647leaq 256(%rsp),%rcx648leaq .Lrot16(%rip),%r10649leaq .Lrot24(%rip),%r11650651pshufd $0x00,%xmm11,%xmm8652pshufd $0x55,%xmm11,%xmm9653movdqa %xmm8,64(%rsp)654pshufd $0xaa,%xmm11,%xmm10655movdqa %xmm9,80(%rsp)656pshufd $0xff,%xmm11,%xmm11657movdqa %xmm10,96(%rsp)658movdqa %xmm11,112(%rsp)659660pshufd $0x00,%xmm15,%xmm12661pshufd $0x55,%xmm15,%xmm13662movdqa %xmm12,128-256(%rcx)663pshufd $0xaa,%xmm15,%xmm14664movdqa %xmm13,144-256(%rcx)665pshufd $0xff,%xmm15,%xmm15666movdqa %xmm14,160-256(%rcx)667movdqa %xmm15,176-256(%rcx)668669pshufd $0x00,%xmm7,%xmm4670pshufd $0x55,%xmm7,%xmm5671movdqa %xmm4,192-256(%rcx)672pshufd $0xaa,%xmm7,%xmm6673movdqa %xmm5,208-256(%rcx)674pshufd $0xff,%xmm7,%xmm7675movdqa %xmm6,224-256(%rcx)676movdqa %xmm7,240-256(%rcx)677678pshufd $0x00,%xmm3,%xmm0679pshufd $0x55,%xmm3,%xmm1680paddd .Linc(%rip),%xmm0681pshufd $0xaa,%xmm3,%xmm2682movdqa %xmm1,272-256(%rcx)683pshufd $0xff,%xmm3,%xmm3684movdqa %xmm2,288-256(%rcx)685movdqa %xmm3,304-256(%rcx)686687jmp .Loop_enter4x688689.align 32690.Loop_outer4x:691movdqa 64(%rsp),%xmm8692movdqa 80(%rsp),%xmm9693movdqa 96(%rsp),%xmm10694movdqa 112(%rsp),%xmm11695movdqa 128-256(%rcx),%xmm12696movdqa 144-256(%rcx),%xmm13697movdqa 160-256(%rcx),%xmm14698movdqa 176-256(%rcx),%xmm15699movdqa 192-256(%rcx),%xmm4700movdqa 208-256(%rcx),%xmm5701movdqa 224-256(%rcx),%xmm6702movdqa 240-256(%rcx),%xmm7703movdqa 256-256(%rcx),%xmm0704movdqa 272-256(%rcx),%xmm1705movdqa 288-256(%rcx),%xmm2706movdqa 304-256(%rcx),%xmm3707paddd .Lfour(%rip),%xmm0708709.Loop_enter4x:710movdqa %xmm6,32(%rsp)711movdqa %xmm7,48(%rsp)712movdqa (%r10),%xmm7713movl $10,%eax714movdqa %xmm0,256-256(%rcx)715jmp .Loop4x716717.align 32718.Loop4x:719paddd %xmm12,%xmm8720paddd %xmm13,%xmm9721pxor %xmm8,%xmm0722pxor %xmm9,%xmm1723.byte 102,15,56,0,199724.byte 102,15,56,0,207725paddd %xmm0,%xmm4726paddd %xmm1,%xmm5727pxor %xmm4,%xmm12728pxor %xmm5,%xmm13729movdqa %xmm12,%xmm6730pslld $12,%xmm12731psrld $20,%xmm6732movdqa %xmm13,%xmm7733pslld $12,%xmm13734por %xmm6,%xmm12735psrld $20,%xmm7736movdqa (%r11),%xmm6737por %xmm7,%xmm13738paddd %xmm12,%xmm8739paddd %xmm13,%xmm9740pxor %xmm8,%xmm0741pxor %xmm9,%xmm1742.byte 102,15,56,0,198743.byte 102,15,56,0,206744paddd %xmm0,%xmm4745paddd %xmm1,%xmm5746pxor %xmm4,%xmm12747pxor %xmm5,%xmm13748movdqa %xmm12,%xmm7749pslld $7,%xmm12750psrld $25,%xmm7751movdqa %xmm13,%xmm6752pslld $7,%xmm13753por %xmm7,%xmm12754psrld $25,%xmm6755movdqa (%r10),%xmm7756por %xmm6,%xmm13757movdqa %xmm4,0(%rsp)758movdqa %xmm5,16(%rsp)759movdqa 32(%rsp),%xmm4760movdqa 48(%rsp),%xmm5761paddd %xmm14,%xmm10762paddd %xmm15,%xmm11763pxor %xmm10,%xmm2764pxor %xmm11,%xmm3765.byte 102,15,56,0,215766.byte 102,15,56,0,223767paddd %xmm2,%xmm4768paddd %xmm3,%xmm5769pxor %xmm4,%xmm14770pxor %xmm5,%xmm15771movdqa %xmm14,%xmm6772pslld $12,%xmm14773psrld $20,%xmm6774movdqa %xmm15,%xmm7775pslld $12,%xmm15776por %xmm6,%xmm14777psrld $20,%xmm7778movdqa (%r11),%xmm6779por %xmm7,%xmm15780paddd %xmm14,%xmm10781paddd %xmm15,%xmm11782pxor %xmm10,%xmm2783pxor %xmm11,%xmm3784.byte 102,15,56,0,214785.byte 102,15,56,0,222786paddd %xmm2,%xmm4787paddd %xmm3,%xmm5788pxor %xmm4,%xmm14789pxor %xmm5,%xmm15790movdqa %xmm14,%xmm7791pslld $7,%xmm14792psrld $25,%xmm7793movdqa %xmm15,%xmm6794pslld $7,%xmm15795por %xmm7,%xmm14796psrld $25,%xmm6797movdqa (%r10),%xmm7798por %xmm6,%xmm15799paddd %xmm13,%xmm8800paddd %xmm14,%xmm9801pxor %xmm8,%xmm3802pxor %xmm9,%xmm0803.byte 102,15,56,0,223804.byte 102,15,56,0,199805paddd %xmm3,%xmm4806paddd %xmm0,%xmm5807pxor %xmm4,%xmm13808pxor %xmm5,%xmm14809movdqa %xmm13,%xmm6810pslld $12,%xmm13811psrld $20,%xmm6812movdqa %xmm14,%xmm7813pslld $12,%xmm14814por %xmm6,%xmm13815psrld $20,%xmm7816movdqa (%r11),%xmm6817por %xmm7,%xmm14818paddd %xmm13,%xmm8819paddd %xmm14,%xmm9820pxor %xmm8,%xmm3821pxor %xmm9,%xmm0822.byte 102,15,56,0,222823.byte 102,15,56,0,198824paddd %xmm3,%xmm4825paddd %xmm0,%xmm5826pxor %xmm4,%xmm13827pxor %xmm5,%xmm14828movdqa %xmm13,%xmm7829pslld $7,%xmm13830psrld $25,%xmm7831movdqa %xmm14,%xmm6832pslld $7,%xmm14833por %xmm7,%xmm13834psrld $25,%xmm6835movdqa (%r10),%xmm7836por %xmm6,%xmm14837movdqa %xmm4,32(%rsp)838movdqa %xmm5,48(%rsp)839movdqa 0(%rsp),%xmm4840movdqa 16(%rsp),%xmm5841paddd %xmm15,%xmm10842paddd %xmm12,%xmm11843pxor %xmm10,%xmm1844pxor %xmm11,%xmm2845.byte 102,15,56,0,207846.byte 102,15,56,0,215847paddd %xmm1,%xmm4848paddd %xmm2,%xmm5849pxor %xmm4,%xmm15850pxor %xmm5,%xmm12851movdqa %xmm15,%xmm6852pslld $12,%xmm15853psrld $20,%xmm6854movdqa %xmm12,%xmm7855pslld $12,%xmm12856por %xmm6,%xmm15857psrld $20,%xmm7858movdqa (%r11),%xmm6859por %xmm7,%xmm12860paddd %xmm15,%xmm10861paddd %xmm12,%xmm11862pxor %xmm10,%xmm1863pxor %xmm11,%xmm2864.byte 102,15,56,0,206865.byte 102,15,56,0,214866paddd %xmm1,%xmm4867paddd %xmm2,%xmm5868pxor %xmm4,%xmm15869pxor %xmm5,%xmm12870movdqa %xmm15,%xmm7871pslld $7,%xmm15872psrld $25,%xmm7873movdqa %xmm12,%xmm6874pslld $7,%xmm12875por %xmm7,%xmm15876psrld $25,%xmm6877movdqa (%r10),%xmm7878por %xmm6,%xmm12879decl %eax880jnz .Loop4x881882paddd 64(%rsp),%xmm8883paddd 80(%rsp),%xmm9884paddd 96(%rsp),%xmm10885paddd 112(%rsp),%xmm11886887movdqa %xmm8,%xmm6888punpckldq %xmm9,%xmm8889movdqa %xmm10,%xmm7890punpckldq %xmm11,%xmm10891punpckhdq %xmm9,%xmm6892punpckhdq %xmm11,%xmm7893movdqa %xmm8,%xmm9894punpcklqdq %xmm10,%xmm8895movdqa %xmm6,%xmm11896punpcklqdq %xmm7,%xmm6897punpckhqdq %xmm10,%xmm9898punpckhqdq %xmm7,%xmm11899paddd 128-256(%rcx),%xmm12900paddd 144-256(%rcx),%xmm13901paddd 160-256(%rcx),%xmm14902paddd 176-256(%rcx),%xmm15903904movdqa %xmm8,0(%rsp)905movdqa %xmm9,16(%rsp)906movdqa 32(%rsp),%xmm8907movdqa 48(%rsp),%xmm9908909movdqa %xmm12,%xmm10910punpckldq %xmm13,%xmm12911movdqa %xmm14,%xmm7912punpckldq %xmm15,%xmm14913punpckhdq %xmm13,%xmm10914punpckhdq %xmm15,%xmm7915movdqa %xmm12,%xmm13916punpcklqdq %xmm14,%xmm12917movdqa %xmm10,%xmm15918punpcklqdq %xmm7,%xmm10919punpckhqdq %xmm14,%xmm13920punpckhqdq %xmm7,%xmm15921paddd 192-256(%rcx),%xmm4922paddd 208-256(%rcx),%xmm5923paddd 224-256(%rcx),%xmm8924paddd 240-256(%rcx),%xmm9925926movdqa %xmm6,32(%rsp)927movdqa %xmm11,48(%rsp)928929movdqa %xmm4,%xmm14930punpckldq %xmm5,%xmm4931movdqa %xmm8,%xmm7932punpckldq %xmm9,%xmm8933punpckhdq %xmm5,%xmm14934punpckhdq %xmm9,%xmm7935movdqa %xmm4,%xmm5936punpcklqdq %xmm8,%xmm4937movdqa %xmm14,%xmm9938punpcklqdq %xmm7,%xmm14939punpckhqdq %xmm8,%xmm5940punpckhqdq %xmm7,%xmm9941paddd 256-256(%rcx),%xmm0942paddd 272-256(%rcx),%xmm1943paddd 288-256(%rcx),%xmm2944paddd 304-256(%rcx),%xmm3945946movdqa %xmm0,%xmm8947punpckldq %xmm1,%xmm0948movdqa %xmm2,%xmm7949punpckldq %xmm3,%xmm2950punpckhdq %xmm1,%xmm8951punpckhdq %xmm3,%xmm7952movdqa %xmm0,%xmm1953punpcklqdq %xmm2,%xmm0954movdqa %xmm8,%xmm3955punpcklqdq %xmm7,%xmm8956punpckhqdq %xmm2,%xmm1957punpckhqdq %xmm7,%xmm3958cmpq $256,%rdx959jb .Ltail4x960961movdqu 0(%rsi),%xmm6962movdqu 16(%rsi),%xmm11963movdqu 32(%rsi),%xmm2964movdqu 48(%rsi),%xmm7965pxor 0(%rsp),%xmm6966pxor %xmm12,%xmm11967pxor %xmm4,%xmm2968pxor %xmm0,%xmm7969970movdqu %xmm6,0(%rdi)971movdqu 64(%rsi),%xmm6972movdqu %xmm11,16(%rdi)973movdqu 80(%rsi),%xmm11974movdqu %xmm2,32(%rdi)975movdqu 96(%rsi),%xmm2976movdqu %xmm7,48(%rdi)977movdqu 112(%rsi),%xmm7978leaq 128(%rsi),%rsi979pxor 16(%rsp),%xmm6980pxor %xmm13,%xmm11981pxor %xmm5,%xmm2982pxor %xmm1,%xmm7983984movdqu %xmm6,64(%rdi)985movdqu 0(%rsi),%xmm6986movdqu %xmm11,80(%rdi)987movdqu 16(%rsi),%xmm11988movdqu %xmm2,96(%rdi)989movdqu 32(%rsi),%xmm2990movdqu %xmm7,112(%rdi)991leaq 128(%rdi),%rdi992movdqu 48(%rsi),%xmm7993pxor 32(%rsp),%xmm6994pxor %xmm10,%xmm11995pxor %xmm14,%xmm2996pxor %xmm8,%xmm7997998movdqu %xmm6,0(%rdi)999movdqu 64(%rsi),%xmm61000movdqu %xmm11,16(%rdi)1001movdqu 80(%rsi),%xmm111002movdqu %xmm2,32(%rdi)1003movdqu 96(%rsi),%xmm21004movdqu %xmm7,48(%rdi)1005movdqu 112(%rsi),%xmm71006leaq 128(%rsi),%rsi1007pxor 48(%rsp),%xmm61008pxor %xmm15,%xmm111009pxor %xmm9,%xmm21010pxor %xmm3,%xmm71011movdqu %xmm6,64(%rdi)1012movdqu %xmm11,80(%rdi)1013movdqu %xmm2,96(%rdi)1014movdqu %xmm7,112(%rdi)1015leaq 128(%rdi),%rdi10161017subq $256,%rdx1018jnz .Loop_outer4x10191020jmp .Ldone4x10211022.Ltail4x:1023cmpq $192,%rdx1024jae .L192_or_more4x1025cmpq $128,%rdx1026jae .L128_or_more4x1027cmpq $64,%rdx1028jae .L64_or_more4x102910301031xorq %r10,%r1010321033movdqa %xmm12,16(%rsp)1034movdqa %xmm4,32(%rsp)1035movdqa %xmm0,48(%rsp)1036jmp .Loop_tail4x10371038.align 321039.L64_or_more4x:1040movdqu 0(%rsi),%xmm61041movdqu 16(%rsi),%xmm111042movdqu 32(%rsi),%xmm21043movdqu 48(%rsi),%xmm71044pxor 0(%rsp),%xmm61045pxor %xmm12,%xmm111046pxor %xmm4,%xmm21047pxor %xmm0,%xmm71048movdqu %xmm6,0(%rdi)1049movdqu %xmm11,16(%rdi)1050movdqu %xmm2,32(%rdi)1051movdqu %xmm7,48(%rdi)1052je .Ldone4x10531054movdqa 16(%rsp),%xmm61055leaq 64(%rsi),%rsi1056xorq %r10,%r101057movdqa %xmm6,0(%rsp)1058movdqa %xmm13,16(%rsp)1059leaq 64(%rdi),%rdi1060movdqa %xmm5,32(%rsp)1061subq $64,%rdx1062movdqa %xmm1,48(%rsp)1063jmp .Loop_tail4x10641065.align 321066.L128_or_more4x:1067movdqu 0(%rsi),%xmm61068movdqu 16(%rsi),%xmm111069movdqu 32(%rsi),%xmm21070movdqu 48(%rsi),%xmm71071pxor 0(%rsp),%xmm61072pxor %xmm12,%xmm111073pxor %xmm4,%xmm21074pxor %xmm0,%xmm710751076movdqu %xmm6,0(%rdi)1077movdqu 64(%rsi),%xmm61078movdqu %xmm11,16(%rdi)1079movdqu 80(%rsi),%xmm111080movdqu %xmm2,32(%rdi)1081movdqu 96(%rsi),%xmm21082movdqu %xmm7,48(%rdi)1083movdqu 112(%rsi),%xmm71084pxor 16(%rsp),%xmm61085pxor %xmm13,%xmm111086pxor %xmm5,%xmm21087pxor %xmm1,%xmm71088movdqu %xmm6,64(%rdi)1089movdqu %xmm11,80(%rdi)1090movdqu %xmm2,96(%rdi)1091movdqu %xmm7,112(%rdi)1092je .Ldone4x10931094movdqa 32(%rsp),%xmm61095leaq 128(%rsi),%rsi1096xorq %r10,%r101097movdqa %xmm6,0(%rsp)1098movdqa %xmm10,16(%rsp)1099leaq 128(%rdi),%rdi1100movdqa %xmm14,32(%rsp)1101subq $128,%rdx1102movdqa %xmm8,48(%rsp)1103jmp .Loop_tail4x11041105.align 321106.L192_or_more4x:1107movdqu 0(%rsi),%xmm61108movdqu 16(%rsi),%xmm111109movdqu 32(%rsi),%xmm21110movdqu 48(%rsi),%xmm71111pxor 0(%rsp),%xmm61112pxor %xmm12,%xmm111113pxor %xmm4,%xmm21114pxor %xmm0,%xmm711151116movdqu %xmm6,0(%rdi)1117movdqu 64(%rsi),%xmm61118movdqu %xmm11,16(%rdi)1119movdqu 80(%rsi),%xmm111120movdqu %xmm2,32(%rdi)1121movdqu 96(%rsi),%xmm21122movdqu %xmm7,48(%rdi)1123movdqu 112(%rsi),%xmm71124leaq 128(%rsi),%rsi1125pxor 16(%rsp),%xmm61126pxor %xmm13,%xmm111127pxor %xmm5,%xmm21128pxor %xmm1,%xmm711291130movdqu %xmm6,64(%rdi)1131movdqu 0(%rsi),%xmm61132movdqu %xmm11,80(%rdi)1133movdqu 16(%rsi),%xmm111134movdqu %xmm2,96(%rdi)1135movdqu 32(%rsi),%xmm21136movdqu %xmm7,112(%rdi)1137leaq 128(%rdi),%rdi1138movdqu 48(%rsi),%xmm71139pxor 32(%rsp),%xmm61140pxor %xmm10,%xmm111141pxor %xmm14,%xmm21142pxor %xmm8,%xmm71143movdqu %xmm6,0(%rdi)1144movdqu %xmm11,16(%rdi)1145movdqu %xmm2,32(%rdi)1146movdqu %xmm7,48(%rdi)1147je .Ldone4x11481149movdqa 48(%rsp),%xmm61150leaq 64(%rsi),%rsi1151xorq %r10,%r101152movdqa %xmm6,0(%rsp)1153movdqa %xmm15,16(%rsp)1154leaq 64(%rdi),%rdi1155movdqa %xmm9,32(%rsp)1156subq $192,%rdx1157movdqa %xmm3,48(%rsp)11581159.Loop_tail4x:1160movzbl (%rsi,%r10,1),%eax1161movzbl (%rsp,%r10,1),%ecx1162leaq 1(%r10),%r101163xorl %ecx,%eax1164movb %al,-1(%rdi,%r10,1)1165decq %rdx1166jnz .Loop_tail4x11671168.Ldone4x:1169leaq (%r9),%rsp1170.cfi_def_cfa_register %rsp1171.L4x_epilogue:1172.byte 0xf3,0xc31173.cfi_endproc1174.size ChaCha20_4x,.-ChaCha20_4x1175.type ChaCha20_4xop,@function1176.align 321177ChaCha20_4xop:1178.cfi_startproc1179.LChaCha20_4xop:1180movq %rsp,%r91181.cfi_def_cfa_register %r91182subq $0x140+8,%rsp1183vzeroupper11841185vmovdqa .Lsigma(%rip),%xmm111186vmovdqu (%rcx),%xmm31187vmovdqu 16(%rcx),%xmm151188vmovdqu (%r8),%xmm71189leaq 256(%rsp),%rcx11901191vpshufd $0x00,%xmm11,%xmm81192vpshufd $0x55,%xmm11,%xmm91193vmovdqa %xmm8,64(%rsp)1194vpshufd $0xaa,%xmm11,%xmm101195vmovdqa %xmm9,80(%rsp)1196vpshufd $0xff,%xmm11,%xmm111197vmovdqa %xmm10,96(%rsp)1198vmovdqa %xmm11,112(%rsp)11991200vpshufd $0x00,%xmm3,%xmm01201vpshufd $0x55,%xmm3,%xmm11202vmovdqa %xmm0,128-256(%rcx)1203vpshufd $0xaa,%xmm3,%xmm21204vmovdqa %xmm1,144-256(%rcx)1205vpshufd $0xff,%xmm3,%xmm31206vmovdqa %xmm2,160-256(%rcx)1207vmovdqa %xmm3,176-256(%rcx)12081209vpshufd $0x00,%xmm15,%xmm121210vpshufd $0x55,%xmm15,%xmm131211vmovdqa %xmm12,192-256(%rcx)1212vpshufd $0xaa,%xmm15,%xmm141213vmovdqa %xmm13,208-256(%rcx)1214vpshufd $0xff,%xmm15,%xmm151215vmovdqa %xmm14,224-256(%rcx)1216vmovdqa %xmm15,240-256(%rcx)12171218vpshufd $0x00,%xmm7,%xmm41219vpshufd $0x55,%xmm7,%xmm51220vpaddd .Linc(%rip),%xmm4,%xmm41221vpshufd $0xaa,%xmm7,%xmm61222vmovdqa %xmm5,272-256(%rcx)1223vpshufd $0xff,%xmm7,%xmm71224vmovdqa %xmm6,288-256(%rcx)1225vmovdqa %xmm7,304-256(%rcx)12261227jmp .Loop_enter4xop12281229.align 321230.Loop_outer4xop:1231vmovdqa 64(%rsp),%xmm81232vmovdqa 80(%rsp),%xmm91233vmovdqa 96(%rsp),%xmm101234vmovdqa 112(%rsp),%xmm111235vmovdqa 128-256(%rcx),%xmm01236vmovdqa 144-256(%rcx),%xmm11237vmovdqa 160-256(%rcx),%xmm21238vmovdqa 176-256(%rcx),%xmm31239vmovdqa 192-256(%rcx),%xmm121240vmovdqa 208-256(%rcx),%xmm131241vmovdqa 224-256(%rcx),%xmm141242vmovdqa 240-256(%rcx),%xmm151243vmovdqa 256-256(%rcx),%xmm41244vmovdqa 272-256(%rcx),%xmm51245vmovdqa 288-256(%rcx),%xmm61246vmovdqa 304-256(%rcx),%xmm71247vpaddd .Lfour(%rip),%xmm4,%xmm412481249.Loop_enter4xop:1250movl $10,%eax1251vmovdqa %xmm4,256-256(%rcx)1252jmp .Loop4xop12531254.align 321255.Loop4xop:1256vpaddd %xmm0,%xmm8,%xmm81257vpaddd %xmm1,%xmm9,%xmm91258vpaddd %xmm2,%xmm10,%xmm101259vpaddd %xmm3,%xmm11,%xmm111260vpxor %xmm4,%xmm8,%xmm41261vpxor %xmm5,%xmm9,%xmm51262vpxor %xmm6,%xmm10,%xmm61263vpxor %xmm7,%xmm11,%xmm71264.byte 143,232,120,194,228,161265.byte 143,232,120,194,237,161266.byte 143,232,120,194,246,161267.byte 143,232,120,194,255,161268vpaddd %xmm4,%xmm12,%xmm121269vpaddd %xmm5,%xmm13,%xmm131270vpaddd %xmm6,%xmm14,%xmm141271vpaddd %xmm7,%xmm15,%xmm151272vpxor %xmm0,%xmm12,%xmm01273vpxor %xmm1,%xmm13,%xmm11274vpxor %xmm14,%xmm2,%xmm21275vpxor %xmm15,%xmm3,%xmm31276.byte 143,232,120,194,192,121277.byte 143,232,120,194,201,121278.byte 143,232,120,194,210,121279.byte 143,232,120,194,219,121280vpaddd %xmm8,%xmm0,%xmm81281vpaddd %xmm9,%xmm1,%xmm91282vpaddd %xmm2,%xmm10,%xmm101283vpaddd %xmm3,%xmm11,%xmm111284vpxor %xmm4,%xmm8,%xmm41285vpxor %xmm5,%xmm9,%xmm51286vpxor %xmm6,%xmm10,%xmm61287vpxor %xmm7,%xmm11,%xmm71288.byte 143,232,120,194,228,81289.byte 143,232,120,194,237,81290.byte 143,232,120,194,246,81291.byte 143,232,120,194,255,81292vpaddd %xmm4,%xmm12,%xmm121293vpaddd %xmm5,%xmm13,%xmm131294vpaddd %xmm6,%xmm14,%xmm141295vpaddd %xmm7,%xmm15,%xmm151296vpxor %xmm0,%xmm12,%xmm01297vpxor %xmm1,%xmm13,%xmm11298vpxor %xmm14,%xmm2,%xmm21299vpxor %xmm15,%xmm3,%xmm31300.byte 143,232,120,194,192,71301.byte 143,232,120,194,201,71302.byte 143,232,120,194,210,71303.byte 143,232,120,194,219,71304vpaddd %xmm1,%xmm8,%xmm81305vpaddd %xmm2,%xmm9,%xmm91306vpaddd %xmm3,%xmm10,%xmm101307vpaddd %xmm0,%xmm11,%xmm111308vpxor %xmm7,%xmm8,%xmm71309vpxor %xmm4,%xmm9,%xmm41310vpxor %xmm5,%xmm10,%xmm51311vpxor %xmm6,%xmm11,%xmm61312.byte 143,232,120,194,255,161313.byte 143,232,120,194,228,161314.byte 143,232,120,194,237,161315.byte 143,232,120,194,246,161316vpaddd %xmm7,%xmm14,%xmm141317vpaddd %xmm4,%xmm15,%xmm151318vpaddd %xmm5,%xmm12,%xmm121319vpaddd %xmm6,%xmm13,%xmm131320vpxor %xmm1,%xmm14,%xmm11321vpxor %xmm2,%xmm15,%xmm21322vpxor %xmm12,%xmm3,%xmm31323vpxor %xmm13,%xmm0,%xmm01324.byte 143,232,120,194,201,121325.byte 143,232,120,194,210,121326.byte 143,232,120,194,219,121327.byte 143,232,120,194,192,121328vpaddd %xmm8,%xmm1,%xmm81329vpaddd %xmm9,%xmm2,%xmm91330vpaddd %xmm3,%xmm10,%xmm101331vpaddd %xmm0,%xmm11,%xmm111332vpxor %xmm7,%xmm8,%xmm71333vpxor %xmm4,%xmm9,%xmm41334vpxor %xmm5,%xmm10,%xmm51335vpxor %xmm6,%xmm11,%xmm61336.byte 143,232,120,194,255,81337.byte 143,232,120,194,228,81338.byte 143,232,120,194,237,81339.byte 143,232,120,194,246,81340vpaddd %xmm7,%xmm14,%xmm141341vpaddd %xmm4,%xmm15,%xmm151342vpaddd %xmm5,%xmm12,%xmm121343vpaddd %xmm6,%xmm13,%xmm131344vpxor %xmm1,%xmm14,%xmm11345vpxor %xmm2,%xmm15,%xmm21346vpxor %xmm12,%xmm3,%xmm31347vpxor %xmm13,%xmm0,%xmm01348.byte 143,232,120,194,201,71349.byte 143,232,120,194,210,71350.byte 143,232,120,194,219,71351.byte 143,232,120,194,192,71352decl %eax1353jnz .Loop4xop13541355vpaddd 64(%rsp),%xmm8,%xmm81356vpaddd 80(%rsp),%xmm9,%xmm91357vpaddd 96(%rsp),%xmm10,%xmm101358vpaddd 112(%rsp),%xmm11,%xmm1113591360vmovdqa %xmm14,32(%rsp)1361vmovdqa %xmm15,48(%rsp)13621363vpunpckldq %xmm9,%xmm8,%xmm141364vpunpckldq %xmm11,%xmm10,%xmm151365vpunpckhdq %xmm9,%xmm8,%xmm81366vpunpckhdq %xmm11,%xmm10,%xmm101367vpunpcklqdq %xmm15,%xmm14,%xmm91368vpunpckhqdq %xmm15,%xmm14,%xmm141369vpunpcklqdq %xmm10,%xmm8,%xmm111370vpunpckhqdq %xmm10,%xmm8,%xmm81371vpaddd 128-256(%rcx),%xmm0,%xmm01372vpaddd 144-256(%rcx),%xmm1,%xmm11373vpaddd 160-256(%rcx),%xmm2,%xmm21374vpaddd 176-256(%rcx),%xmm3,%xmm313751376vmovdqa %xmm9,0(%rsp)1377vmovdqa %xmm14,16(%rsp)1378vmovdqa 32(%rsp),%xmm91379vmovdqa 48(%rsp),%xmm1413801381vpunpckldq %xmm1,%xmm0,%xmm101382vpunpckldq %xmm3,%xmm2,%xmm151383vpunpckhdq %xmm1,%xmm0,%xmm01384vpunpckhdq %xmm3,%xmm2,%xmm21385vpunpcklqdq %xmm15,%xmm10,%xmm11386vpunpckhqdq %xmm15,%xmm10,%xmm101387vpunpcklqdq %xmm2,%xmm0,%xmm31388vpunpckhqdq %xmm2,%xmm0,%xmm01389vpaddd 192-256(%rcx),%xmm12,%xmm121390vpaddd 208-256(%rcx),%xmm13,%xmm131391vpaddd 224-256(%rcx),%xmm9,%xmm91392vpaddd 240-256(%rcx),%xmm14,%xmm1413931394vpunpckldq %xmm13,%xmm12,%xmm21395vpunpckldq %xmm14,%xmm9,%xmm151396vpunpckhdq %xmm13,%xmm12,%xmm121397vpunpckhdq %xmm14,%xmm9,%xmm91398vpunpcklqdq %xmm15,%xmm2,%xmm131399vpunpckhqdq %xmm15,%xmm2,%xmm21400vpunpcklqdq %xmm9,%xmm12,%xmm141401vpunpckhqdq %xmm9,%xmm12,%xmm121402vpaddd 256-256(%rcx),%xmm4,%xmm41403vpaddd 272-256(%rcx),%xmm5,%xmm51404vpaddd 288-256(%rcx),%xmm6,%xmm61405vpaddd 304-256(%rcx),%xmm7,%xmm714061407vpunpckldq %xmm5,%xmm4,%xmm91408vpunpckldq %xmm7,%xmm6,%xmm151409vpunpckhdq %xmm5,%xmm4,%xmm41410vpunpckhdq %xmm7,%xmm6,%xmm61411vpunpcklqdq %xmm15,%xmm9,%xmm51412vpunpckhqdq %xmm15,%xmm9,%xmm91413vpunpcklqdq %xmm6,%xmm4,%xmm71414vpunpckhqdq %xmm6,%xmm4,%xmm41415vmovdqa 0(%rsp),%xmm61416vmovdqa 16(%rsp),%xmm1514171418cmpq $256,%rdx1419jb .Ltail4xop14201421vpxor 0(%rsi),%xmm6,%xmm61422vpxor 16(%rsi),%xmm1,%xmm11423vpxor 32(%rsi),%xmm13,%xmm131424vpxor 48(%rsi),%xmm5,%xmm51425vpxor 64(%rsi),%xmm15,%xmm151426vpxor 80(%rsi),%xmm10,%xmm101427vpxor 96(%rsi),%xmm2,%xmm21428vpxor 112(%rsi),%xmm9,%xmm91429leaq 128(%rsi),%rsi1430vpxor 0(%rsi),%xmm11,%xmm111431vpxor 16(%rsi),%xmm3,%xmm31432vpxor 32(%rsi),%xmm14,%xmm141433vpxor 48(%rsi),%xmm7,%xmm71434vpxor 64(%rsi),%xmm8,%xmm81435vpxor 80(%rsi),%xmm0,%xmm01436vpxor 96(%rsi),%xmm12,%xmm121437vpxor 112(%rsi),%xmm4,%xmm41438leaq 128(%rsi),%rsi14391440vmovdqu %xmm6,0(%rdi)1441vmovdqu %xmm1,16(%rdi)1442vmovdqu %xmm13,32(%rdi)1443vmovdqu %xmm5,48(%rdi)1444vmovdqu %xmm15,64(%rdi)1445vmovdqu %xmm10,80(%rdi)1446vmovdqu %xmm2,96(%rdi)1447vmovdqu %xmm9,112(%rdi)1448leaq 128(%rdi),%rdi1449vmovdqu %xmm11,0(%rdi)1450vmovdqu %xmm3,16(%rdi)1451vmovdqu %xmm14,32(%rdi)1452vmovdqu %xmm7,48(%rdi)1453vmovdqu %xmm8,64(%rdi)1454vmovdqu %xmm0,80(%rdi)1455vmovdqu %xmm12,96(%rdi)1456vmovdqu %xmm4,112(%rdi)1457leaq 128(%rdi),%rdi14581459subq $256,%rdx1460jnz .Loop_outer4xop14611462jmp .Ldone4xop14631464.align 321465.Ltail4xop:1466cmpq $192,%rdx1467jae .L192_or_more4xop1468cmpq $128,%rdx1469jae .L128_or_more4xop1470cmpq $64,%rdx1471jae .L64_or_more4xop14721473xorq %r10,%r101474vmovdqa %xmm6,0(%rsp)1475vmovdqa %xmm1,16(%rsp)1476vmovdqa %xmm13,32(%rsp)1477vmovdqa %xmm5,48(%rsp)1478jmp .Loop_tail4xop14791480.align 321481.L64_or_more4xop:1482vpxor 0(%rsi),%xmm6,%xmm61483vpxor 16(%rsi),%xmm1,%xmm11484vpxor 32(%rsi),%xmm13,%xmm131485vpxor 48(%rsi),%xmm5,%xmm51486vmovdqu %xmm6,0(%rdi)1487vmovdqu %xmm1,16(%rdi)1488vmovdqu %xmm13,32(%rdi)1489vmovdqu %xmm5,48(%rdi)1490je .Ldone4xop14911492leaq 64(%rsi),%rsi1493vmovdqa %xmm15,0(%rsp)1494xorq %r10,%r101495vmovdqa %xmm10,16(%rsp)1496leaq 64(%rdi),%rdi1497vmovdqa %xmm2,32(%rsp)1498subq $64,%rdx1499vmovdqa %xmm9,48(%rsp)1500jmp .Loop_tail4xop15011502.align 321503.L128_or_more4xop:1504vpxor 0(%rsi),%xmm6,%xmm61505vpxor 16(%rsi),%xmm1,%xmm11506vpxor 32(%rsi),%xmm13,%xmm131507vpxor 48(%rsi),%xmm5,%xmm51508vpxor 64(%rsi),%xmm15,%xmm151509vpxor 80(%rsi),%xmm10,%xmm101510vpxor 96(%rsi),%xmm2,%xmm21511vpxor 112(%rsi),%xmm9,%xmm915121513vmovdqu %xmm6,0(%rdi)1514vmovdqu %xmm1,16(%rdi)1515vmovdqu %xmm13,32(%rdi)1516vmovdqu %xmm5,48(%rdi)1517vmovdqu %xmm15,64(%rdi)1518vmovdqu %xmm10,80(%rdi)1519vmovdqu %xmm2,96(%rdi)1520vmovdqu %xmm9,112(%rdi)1521je .Ldone4xop15221523leaq 128(%rsi),%rsi1524vmovdqa %xmm11,0(%rsp)1525xorq %r10,%r101526vmovdqa %xmm3,16(%rsp)1527leaq 128(%rdi),%rdi1528vmovdqa %xmm14,32(%rsp)1529subq $128,%rdx1530vmovdqa %xmm7,48(%rsp)1531jmp .Loop_tail4xop15321533.align 321534.L192_or_more4xop:1535vpxor 0(%rsi),%xmm6,%xmm61536vpxor 16(%rsi),%xmm1,%xmm11537vpxor 32(%rsi),%xmm13,%xmm131538vpxor 48(%rsi),%xmm5,%xmm51539vpxor 64(%rsi),%xmm15,%xmm151540vpxor 80(%rsi),%xmm10,%xmm101541vpxor 96(%rsi),%xmm2,%xmm21542vpxor 112(%rsi),%xmm9,%xmm91543leaq 128(%rsi),%rsi1544vpxor 0(%rsi),%xmm11,%xmm111545vpxor 16(%rsi),%xmm3,%xmm31546vpxor 32(%rsi),%xmm14,%xmm141547vpxor 48(%rsi),%xmm7,%xmm715481549vmovdqu %xmm6,0(%rdi)1550vmovdqu %xmm1,16(%rdi)1551vmovdqu %xmm13,32(%rdi)1552vmovdqu %xmm5,48(%rdi)1553vmovdqu %xmm15,64(%rdi)1554vmovdqu %xmm10,80(%rdi)1555vmovdqu %xmm2,96(%rdi)1556vmovdqu %xmm9,112(%rdi)1557leaq 128(%rdi),%rdi1558vmovdqu %xmm11,0(%rdi)1559vmovdqu %xmm3,16(%rdi)1560vmovdqu %xmm14,32(%rdi)1561vmovdqu %xmm7,48(%rdi)1562je .Ldone4xop15631564leaq 64(%rsi),%rsi1565vmovdqa %xmm8,0(%rsp)1566xorq %r10,%r101567vmovdqa %xmm0,16(%rsp)1568leaq 64(%rdi),%rdi1569vmovdqa %xmm12,32(%rsp)1570subq $192,%rdx1571vmovdqa %xmm4,48(%rsp)15721573.Loop_tail4xop:1574movzbl (%rsi,%r10,1),%eax1575movzbl (%rsp,%r10,1),%ecx1576leaq 1(%r10),%r101577xorl %ecx,%eax1578movb %al,-1(%rdi,%r10,1)1579decq %rdx1580jnz .Loop_tail4xop15811582.Ldone4xop:1583vzeroupper1584leaq (%r9),%rsp1585.cfi_def_cfa_register %rsp1586.L4xop_epilogue:1587.byte 0xf3,0xc31588.cfi_endproc1589.size ChaCha20_4xop,.-ChaCha20_4xop1590.type ChaCha20_8x,@function1591.align 321592ChaCha20_8x:1593.cfi_startproc1594.LChaCha20_8x:1595movq %rsp,%r91596.cfi_def_cfa_register %r91597subq $0x280+8,%rsp1598andq $-32,%rsp1599vzeroupper16001601160216031604160516061607160816091610vbroadcasti128 .Lsigma(%rip),%ymm111611vbroadcasti128 (%rcx),%ymm31612vbroadcasti128 16(%rcx),%ymm151613vbroadcasti128 (%r8),%ymm71614leaq 256(%rsp),%rcx1615leaq 512(%rsp),%rax1616leaq .Lrot16(%rip),%r101617leaq .Lrot24(%rip),%r1116181619vpshufd $0x00,%ymm11,%ymm81620vpshufd $0x55,%ymm11,%ymm91621vmovdqa %ymm8,128-256(%rcx)1622vpshufd $0xaa,%ymm11,%ymm101623vmovdqa %ymm9,160-256(%rcx)1624vpshufd $0xff,%ymm11,%ymm111625vmovdqa %ymm10,192-256(%rcx)1626vmovdqa %ymm11,224-256(%rcx)16271628vpshufd $0x00,%ymm3,%ymm01629vpshufd $0x55,%ymm3,%ymm11630vmovdqa %ymm0,256-256(%rcx)1631vpshufd $0xaa,%ymm3,%ymm21632vmovdqa %ymm1,288-256(%rcx)1633vpshufd $0xff,%ymm3,%ymm31634vmovdqa %ymm2,320-256(%rcx)1635vmovdqa %ymm3,352-256(%rcx)16361637vpshufd $0x00,%ymm15,%ymm121638vpshufd $0x55,%ymm15,%ymm131639vmovdqa %ymm12,384-512(%rax)1640vpshufd $0xaa,%ymm15,%ymm141641vmovdqa %ymm13,416-512(%rax)1642vpshufd $0xff,%ymm15,%ymm151643vmovdqa %ymm14,448-512(%rax)1644vmovdqa %ymm15,480-512(%rax)16451646vpshufd $0x00,%ymm7,%ymm41647vpshufd $0x55,%ymm7,%ymm51648vpaddd .Lincy(%rip),%ymm4,%ymm41649vpshufd $0xaa,%ymm7,%ymm61650vmovdqa %ymm5,544-512(%rax)1651vpshufd $0xff,%ymm7,%ymm71652vmovdqa %ymm6,576-512(%rax)1653vmovdqa %ymm7,608-512(%rax)16541655jmp .Loop_enter8x16561657.align 321658.Loop_outer8x:1659vmovdqa 128-256(%rcx),%ymm81660vmovdqa 160-256(%rcx),%ymm91661vmovdqa 192-256(%rcx),%ymm101662vmovdqa 224-256(%rcx),%ymm111663vmovdqa 256-256(%rcx),%ymm01664vmovdqa 288-256(%rcx),%ymm11665vmovdqa 320-256(%rcx),%ymm21666vmovdqa 352-256(%rcx),%ymm31667vmovdqa 384-512(%rax),%ymm121668vmovdqa 416-512(%rax),%ymm131669vmovdqa 448-512(%rax),%ymm141670vmovdqa 480-512(%rax),%ymm151671vmovdqa 512-512(%rax),%ymm41672vmovdqa 544-512(%rax),%ymm51673vmovdqa 576-512(%rax),%ymm61674vmovdqa 608-512(%rax),%ymm71675vpaddd .Leight(%rip),%ymm4,%ymm416761677.Loop_enter8x:1678vmovdqa %ymm14,64(%rsp)1679vmovdqa %ymm15,96(%rsp)1680vbroadcasti128 (%r10),%ymm151681vmovdqa %ymm4,512-512(%rax)1682movl $10,%eax1683jmp .Loop8x16841685.align 321686.Loop8x:1687vpaddd %ymm0,%ymm8,%ymm81688vpxor %ymm4,%ymm8,%ymm41689vpshufb %ymm15,%ymm4,%ymm41690vpaddd %ymm1,%ymm9,%ymm91691vpxor %ymm5,%ymm9,%ymm51692vpshufb %ymm15,%ymm5,%ymm51693vpaddd %ymm4,%ymm12,%ymm121694vpxor %ymm0,%ymm12,%ymm01695vpslld $12,%ymm0,%ymm141696vpsrld $20,%ymm0,%ymm01697vpor %ymm0,%ymm14,%ymm01698vbroadcasti128 (%r11),%ymm141699vpaddd %ymm5,%ymm13,%ymm131700vpxor %ymm1,%ymm13,%ymm11701vpslld $12,%ymm1,%ymm151702vpsrld $20,%ymm1,%ymm11703vpor %ymm1,%ymm15,%ymm11704vpaddd %ymm0,%ymm8,%ymm81705vpxor %ymm4,%ymm8,%ymm41706vpshufb %ymm14,%ymm4,%ymm41707vpaddd %ymm1,%ymm9,%ymm91708vpxor %ymm5,%ymm9,%ymm51709vpshufb %ymm14,%ymm5,%ymm51710vpaddd %ymm4,%ymm12,%ymm121711vpxor %ymm0,%ymm12,%ymm01712vpslld $7,%ymm0,%ymm151713vpsrld $25,%ymm0,%ymm01714vpor %ymm0,%ymm15,%ymm01715vbroadcasti128 (%r10),%ymm151716vpaddd %ymm5,%ymm13,%ymm131717vpxor %ymm1,%ymm13,%ymm11718vpslld $7,%ymm1,%ymm141719vpsrld $25,%ymm1,%ymm11720vpor %ymm1,%ymm14,%ymm11721vmovdqa %ymm12,0(%rsp)1722vmovdqa %ymm13,32(%rsp)1723vmovdqa 64(%rsp),%ymm121724vmovdqa 96(%rsp),%ymm131725vpaddd %ymm2,%ymm10,%ymm101726vpxor %ymm6,%ymm10,%ymm61727vpshufb %ymm15,%ymm6,%ymm61728vpaddd %ymm3,%ymm11,%ymm111729vpxor %ymm7,%ymm11,%ymm71730vpshufb %ymm15,%ymm7,%ymm71731vpaddd %ymm6,%ymm12,%ymm121732vpxor %ymm2,%ymm12,%ymm21733vpslld $12,%ymm2,%ymm141734vpsrld $20,%ymm2,%ymm21735vpor %ymm2,%ymm14,%ymm21736vbroadcasti128 (%r11),%ymm141737vpaddd %ymm7,%ymm13,%ymm131738vpxor %ymm3,%ymm13,%ymm31739vpslld $12,%ymm3,%ymm151740vpsrld $20,%ymm3,%ymm31741vpor %ymm3,%ymm15,%ymm31742vpaddd %ymm2,%ymm10,%ymm101743vpxor %ymm6,%ymm10,%ymm61744vpshufb %ymm14,%ymm6,%ymm61745vpaddd %ymm3,%ymm11,%ymm111746vpxor %ymm7,%ymm11,%ymm71747vpshufb %ymm14,%ymm7,%ymm71748vpaddd %ymm6,%ymm12,%ymm121749vpxor %ymm2,%ymm12,%ymm21750vpslld $7,%ymm2,%ymm151751vpsrld $25,%ymm2,%ymm21752vpor %ymm2,%ymm15,%ymm21753vbroadcasti128 (%r10),%ymm151754vpaddd %ymm7,%ymm13,%ymm131755vpxor %ymm3,%ymm13,%ymm31756vpslld $7,%ymm3,%ymm141757vpsrld $25,%ymm3,%ymm31758vpor %ymm3,%ymm14,%ymm31759vpaddd %ymm1,%ymm8,%ymm81760vpxor %ymm7,%ymm8,%ymm71761vpshufb %ymm15,%ymm7,%ymm71762vpaddd %ymm2,%ymm9,%ymm91763vpxor %ymm4,%ymm9,%ymm41764vpshufb %ymm15,%ymm4,%ymm41765vpaddd %ymm7,%ymm12,%ymm121766vpxor %ymm1,%ymm12,%ymm11767vpslld $12,%ymm1,%ymm141768vpsrld $20,%ymm1,%ymm11769vpor %ymm1,%ymm14,%ymm11770vbroadcasti128 (%r11),%ymm141771vpaddd %ymm4,%ymm13,%ymm131772vpxor %ymm2,%ymm13,%ymm21773vpslld $12,%ymm2,%ymm151774vpsrld $20,%ymm2,%ymm21775vpor %ymm2,%ymm15,%ymm21776vpaddd %ymm1,%ymm8,%ymm81777vpxor %ymm7,%ymm8,%ymm71778vpshufb %ymm14,%ymm7,%ymm71779vpaddd %ymm2,%ymm9,%ymm91780vpxor %ymm4,%ymm9,%ymm41781vpshufb %ymm14,%ymm4,%ymm41782vpaddd %ymm7,%ymm12,%ymm121783vpxor %ymm1,%ymm12,%ymm11784vpslld $7,%ymm1,%ymm151785vpsrld $25,%ymm1,%ymm11786vpor %ymm1,%ymm15,%ymm11787vbroadcasti128 (%r10),%ymm151788vpaddd %ymm4,%ymm13,%ymm131789vpxor %ymm2,%ymm13,%ymm21790vpslld $7,%ymm2,%ymm141791vpsrld $25,%ymm2,%ymm21792vpor %ymm2,%ymm14,%ymm21793vmovdqa %ymm12,64(%rsp)1794vmovdqa %ymm13,96(%rsp)1795vmovdqa 0(%rsp),%ymm121796vmovdqa 32(%rsp),%ymm131797vpaddd %ymm3,%ymm10,%ymm101798vpxor %ymm5,%ymm10,%ymm51799vpshufb %ymm15,%ymm5,%ymm51800vpaddd %ymm0,%ymm11,%ymm111801vpxor %ymm6,%ymm11,%ymm61802vpshufb %ymm15,%ymm6,%ymm61803vpaddd %ymm5,%ymm12,%ymm121804vpxor %ymm3,%ymm12,%ymm31805vpslld $12,%ymm3,%ymm141806vpsrld $20,%ymm3,%ymm31807vpor %ymm3,%ymm14,%ymm31808vbroadcasti128 (%r11),%ymm141809vpaddd %ymm6,%ymm13,%ymm131810vpxor %ymm0,%ymm13,%ymm01811vpslld $12,%ymm0,%ymm151812vpsrld $20,%ymm0,%ymm01813vpor %ymm0,%ymm15,%ymm01814vpaddd %ymm3,%ymm10,%ymm101815vpxor %ymm5,%ymm10,%ymm51816vpshufb %ymm14,%ymm5,%ymm51817vpaddd %ymm0,%ymm11,%ymm111818vpxor %ymm6,%ymm11,%ymm61819vpshufb %ymm14,%ymm6,%ymm61820vpaddd %ymm5,%ymm12,%ymm121821vpxor %ymm3,%ymm12,%ymm31822vpslld $7,%ymm3,%ymm151823vpsrld $25,%ymm3,%ymm31824vpor %ymm3,%ymm15,%ymm31825vbroadcasti128 (%r10),%ymm151826vpaddd %ymm6,%ymm13,%ymm131827vpxor %ymm0,%ymm13,%ymm01828vpslld $7,%ymm0,%ymm141829vpsrld $25,%ymm0,%ymm01830vpor %ymm0,%ymm14,%ymm01831decl %eax1832jnz .Loop8x18331834leaq 512(%rsp),%rax1835vpaddd 128-256(%rcx),%ymm8,%ymm81836vpaddd 160-256(%rcx),%ymm9,%ymm91837vpaddd 192-256(%rcx),%ymm10,%ymm101838vpaddd 224-256(%rcx),%ymm11,%ymm1118391840vpunpckldq %ymm9,%ymm8,%ymm141841vpunpckldq %ymm11,%ymm10,%ymm151842vpunpckhdq %ymm9,%ymm8,%ymm81843vpunpckhdq %ymm11,%ymm10,%ymm101844vpunpcklqdq %ymm15,%ymm14,%ymm91845vpunpckhqdq %ymm15,%ymm14,%ymm141846vpunpcklqdq %ymm10,%ymm8,%ymm111847vpunpckhqdq %ymm10,%ymm8,%ymm81848vpaddd 256-256(%rcx),%ymm0,%ymm01849vpaddd 288-256(%rcx),%ymm1,%ymm11850vpaddd 320-256(%rcx),%ymm2,%ymm21851vpaddd 352-256(%rcx),%ymm3,%ymm318521853vpunpckldq %ymm1,%ymm0,%ymm101854vpunpckldq %ymm3,%ymm2,%ymm151855vpunpckhdq %ymm1,%ymm0,%ymm01856vpunpckhdq %ymm3,%ymm2,%ymm21857vpunpcklqdq %ymm15,%ymm10,%ymm11858vpunpckhqdq %ymm15,%ymm10,%ymm101859vpunpcklqdq %ymm2,%ymm0,%ymm31860vpunpckhqdq %ymm2,%ymm0,%ymm01861vperm2i128 $0x20,%ymm1,%ymm9,%ymm151862vperm2i128 $0x31,%ymm1,%ymm9,%ymm11863vperm2i128 $0x20,%ymm10,%ymm14,%ymm91864vperm2i128 $0x31,%ymm10,%ymm14,%ymm101865vperm2i128 $0x20,%ymm3,%ymm11,%ymm141866vperm2i128 $0x31,%ymm3,%ymm11,%ymm31867vperm2i128 $0x20,%ymm0,%ymm8,%ymm111868vperm2i128 $0x31,%ymm0,%ymm8,%ymm01869vmovdqa %ymm15,0(%rsp)1870vmovdqa %ymm9,32(%rsp)1871vmovdqa 64(%rsp),%ymm151872vmovdqa 96(%rsp),%ymm918731874vpaddd 384-512(%rax),%ymm12,%ymm121875vpaddd 416-512(%rax),%ymm13,%ymm131876vpaddd 448-512(%rax),%ymm15,%ymm151877vpaddd 480-512(%rax),%ymm9,%ymm918781879vpunpckldq %ymm13,%ymm12,%ymm21880vpunpckldq %ymm9,%ymm15,%ymm81881vpunpckhdq %ymm13,%ymm12,%ymm121882vpunpckhdq %ymm9,%ymm15,%ymm151883vpunpcklqdq %ymm8,%ymm2,%ymm131884vpunpckhqdq %ymm8,%ymm2,%ymm21885vpunpcklqdq %ymm15,%ymm12,%ymm91886vpunpckhqdq %ymm15,%ymm12,%ymm121887vpaddd 512-512(%rax),%ymm4,%ymm41888vpaddd 544-512(%rax),%ymm5,%ymm51889vpaddd 576-512(%rax),%ymm6,%ymm61890vpaddd 608-512(%rax),%ymm7,%ymm718911892vpunpckldq %ymm5,%ymm4,%ymm151893vpunpckldq %ymm7,%ymm6,%ymm81894vpunpckhdq %ymm5,%ymm4,%ymm41895vpunpckhdq %ymm7,%ymm6,%ymm61896vpunpcklqdq %ymm8,%ymm15,%ymm51897vpunpckhqdq %ymm8,%ymm15,%ymm151898vpunpcklqdq %ymm6,%ymm4,%ymm71899vpunpckhqdq %ymm6,%ymm4,%ymm41900vperm2i128 $0x20,%ymm5,%ymm13,%ymm81901vperm2i128 $0x31,%ymm5,%ymm13,%ymm51902vperm2i128 $0x20,%ymm15,%ymm2,%ymm131903vperm2i128 $0x31,%ymm15,%ymm2,%ymm151904vperm2i128 $0x20,%ymm7,%ymm9,%ymm21905vperm2i128 $0x31,%ymm7,%ymm9,%ymm71906vperm2i128 $0x20,%ymm4,%ymm12,%ymm91907vperm2i128 $0x31,%ymm4,%ymm12,%ymm41908vmovdqa 0(%rsp),%ymm61909vmovdqa 32(%rsp),%ymm1219101911cmpq $512,%rdx1912jb .Ltail8x19131914vpxor 0(%rsi),%ymm6,%ymm61915vpxor 32(%rsi),%ymm8,%ymm81916vpxor 64(%rsi),%ymm1,%ymm11917vpxor 96(%rsi),%ymm5,%ymm51918leaq 128(%rsi),%rsi1919vmovdqu %ymm6,0(%rdi)1920vmovdqu %ymm8,32(%rdi)1921vmovdqu %ymm1,64(%rdi)1922vmovdqu %ymm5,96(%rdi)1923leaq 128(%rdi),%rdi19241925vpxor 0(%rsi),%ymm12,%ymm121926vpxor 32(%rsi),%ymm13,%ymm131927vpxor 64(%rsi),%ymm10,%ymm101928vpxor 96(%rsi),%ymm15,%ymm151929leaq 128(%rsi),%rsi1930vmovdqu %ymm12,0(%rdi)1931vmovdqu %ymm13,32(%rdi)1932vmovdqu %ymm10,64(%rdi)1933vmovdqu %ymm15,96(%rdi)1934leaq 128(%rdi),%rdi19351936vpxor 0(%rsi),%ymm14,%ymm141937vpxor 32(%rsi),%ymm2,%ymm21938vpxor 64(%rsi),%ymm3,%ymm31939vpxor 96(%rsi),%ymm7,%ymm71940leaq 128(%rsi),%rsi1941vmovdqu %ymm14,0(%rdi)1942vmovdqu %ymm2,32(%rdi)1943vmovdqu %ymm3,64(%rdi)1944vmovdqu %ymm7,96(%rdi)1945leaq 128(%rdi),%rdi19461947vpxor 0(%rsi),%ymm11,%ymm111948vpxor 32(%rsi),%ymm9,%ymm91949vpxor 64(%rsi),%ymm0,%ymm01950vpxor 96(%rsi),%ymm4,%ymm41951leaq 128(%rsi),%rsi1952vmovdqu %ymm11,0(%rdi)1953vmovdqu %ymm9,32(%rdi)1954vmovdqu %ymm0,64(%rdi)1955vmovdqu %ymm4,96(%rdi)1956leaq 128(%rdi),%rdi19571958subq $512,%rdx1959jnz .Loop_outer8x19601961jmp .Ldone8x19621963.Ltail8x:1964cmpq $448,%rdx1965jae .L448_or_more8x1966cmpq $384,%rdx1967jae .L384_or_more8x1968cmpq $320,%rdx1969jae .L320_or_more8x1970cmpq $256,%rdx1971jae .L256_or_more8x1972cmpq $192,%rdx1973jae .L192_or_more8x1974cmpq $128,%rdx1975jae .L128_or_more8x1976cmpq $64,%rdx1977jae .L64_or_more8x19781979xorq %r10,%r101980vmovdqa %ymm6,0(%rsp)1981vmovdqa %ymm8,32(%rsp)1982jmp .Loop_tail8x19831984.align 321985.L64_or_more8x:1986vpxor 0(%rsi),%ymm6,%ymm61987vpxor 32(%rsi),%ymm8,%ymm81988vmovdqu %ymm6,0(%rdi)1989vmovdqu %ymm8,32(%rdi)1990je .Ldone8x19911992leaq 64(%rsi),%rsi1993xorq %r10,%r101994vmovdqa %ymm1,0(%rsp)1995leaq 64(%rdi),%rdi1996subq $64,%rdx1997vmovdqa %ymm5,32(%rsp)1998jmp .Loop_tail8x19992000.align 322001.L128_or_more8x:2002vpxor 0(%rsi),%ymm6,%ymm62003vpxor 32(%rsi),%ymm8,%ymm82004vpxor 64(%rsi),%ymm1,%ymm12005vpxor 96(%rsi),%ymm5,%ymm52006vmovdqu %ymm6,0(%rdi)2007vmovdqu %ymm8,32(%rdi)2008vmovdqu %ymm1,64(%rdi)2009vmovdqu %ymm5,96(%rdi)2010je .Ldone8x20112012leaq 128(%rsi),%rsi2013xorq %r10,%r102014vmovdqa %ymm12,0(%rsp)2015leaq 128(%rdi),%rdi2016subq $128,%rdx2017vmovdqa %ymm13,32(%rsp)2018jmp .Loop_tail8x20192020.align 322021.L192_or_more8x:2022vpxor 0(%rsi),%ymm6,%ymm62023vpxor 32(%rsi),%ymm8,%ymm82024vpxor 64(%rsi),%ymm1,%ymm12025vpxor 96(%rsi),%ymm5,%ymm52026vpxor 128(%rsi),%ymm12,%ymm122027vpxor 160(%rsi),%ymm13,%ymm132028vmovdqu %ymm6,0(%rdi)2029vmovdqu %ymm8,32(%rdi)2030vmovdqu %ymm1,64(%rdi)2031vmovdqu %ymm5,96(%rdi)2032vmovdqu %ymm12,128(%rdi)2033vmovdqu %ymm13,160(%rdi)2034je .Ldone8x20352036leaq 192(%rsi),%rsi2037xorq %r10,%r102038vmovdqa %ymm10,0(%rsp)2039leaq 192(%rdi),%rdi2040subq $192,%rdx2041vmovdqa %ymm15,32(%rsp)2042jmp .Loop_tail8x20432044.align 322045.L256_or_more8x:2046vpxor 0(%rsi),%ymm6,%ymm62047vpxor 32(%rsi),%ymm8,%ymm82048vpxor 64(%rsi),%ymm1,%ymm12049vpxor 96(%rsi),%ymm5,%ymm52050vpxor 128(%rsi),%ymm12,%ymm122051vpxor 160(%rsi),%ymm13,%ymm132052vpxor 192(%rsi),%ymm10,%ymm102053vpxor 224(%rsi),%ymm15,%ymm152054vmovdqu %ymm6,0(%rdi)2055vmovdqu %ymm8,32(%rdi)2056vmovdqu %ymm1,64(%rdi)2057vmovdqu %ymm5,96(%rdi)2058vmovdqu %ymm12,128(%rdi)2059vmovdqu %ymm13,160(%rdi)2060vmovdqu %ymm10,192(%rdi)2061vmovdqu %ymm15,224(%rdi)2062je .Ldone8x20632064leaq 256(%rsi),%rsi2065xorq %r10,%r102066vmovdqa %ymm14,0(%rsp)2067leaq 256(%rdi),%rdi2068subq $256,%rdx2069vmovdqa %ymm2,32(%rsp)2070jmp .Loop_tail8x20712072.align 322073.L320_or_more8x:2074vpxor 0(%rsi),%ymm6,%ymm62075vpxor 32(%rsi),%ymm8,%ymm82076vpxor 64(%rsi),%ymm1,%ymm12077vpxor 96(%rsi),%ymm5,%ymm52078vpxor 128(%rsi),%ymm12,%ymm122079vpxor 160(%rsi),%ymm13,%ymm132080vpxor 192(%rsi),%ymm10,%ymm102081vpxor 224(%rsi),%ymm15,%ymm152082vpxor 256(%rsi),%ymm14,%ymm142083vpxor 288(%rsi),%ymm2,%ymm22084vmovdqu %ymm6,0(%rdi)2085vmovdqu %ymm8,32(%rdi)2086vmovdqu %ymm1,64(%rdi)2087vmovdqu %ymm5,96(%rdi)2088vmovdqu %ymm12,128(%rdi)2089vmovdqu %ymm13,160(%rdi)2090vmovdqu %ymm10,192(%rdi)2091vmovdqu %ymm15,224(%rdi)2092vmovdqu %ymm14,256(%rdi)2093vmovdqu %ymm2,288(%rdi)2094je .Ldone8x20952096leaq 320(%rsi),%rsi2097xorq %r10,%r102098vmovdqa %ymm3,0(%rsp)2099leaq 320(%rdi),%rdi2100subq $320,%rdx2101vmovdqa %ymm7,32(%rsp)2102jmp .Loop_tail8x21032104.align 322105.L384_or_more8x:2106vpxor 0(%rsi),%ymm6,%ymm62107vpxor 32(%rsi),%ymm8,%ymm82108vpxor 64(%rsi),%ymm1,%ymm12109vpxor 96(%rsi),%ymm5,%ymm52110vpxor 128(%rsi),%ymm12,%ymm122111vpxor 160(%rsi),%ymm13,%ymm132112vpxor 192(%rsi),%ymm10,%ymm102113vpxor 224(%rsi),%ymm15,%ymm152114vpxor 256(%rsi),%ymm14,%ymm142115vpxor 288(%rsi),%ymm2,%ymm22116vpxor 320(%rsi),%ymm3,%ymm32117vpxor 352(%rsi),%ymm7,%ymm72118vmovdqu %ymm6,0(%rdi)2119vmovdqu %ymm8,32(%rdi)2120vmovdqu %ymm1,64(%rdi)2121vmovdqu %ymm5,96(%rdi)2122vmovdqu %ymm12,128(%rdi)2123vmovdqu %ymm13,160(%rdi)2124vmovdqu %ymm10,192(%rdi)2125vmovdqu %ymm15,224(%rdi)2126vmovdqu %ymm14,256(%rdi)2127vmovdqu %ymm2,288(%rdi)2128vmovdqu %ymm3,320(%rdi)2129vmovdqu %ymm7,352(%rdi)2130je .Ldone8x21312132leaq 384(%rsi),%rsi2133xorq %r10,%r102134vmovdqa %ymm11,0(%rsp)2135leaq 384(%rdi),%rdi2136subq $384,%rdx2137vmovdqa %ymm9,32(%rsp)2138jmp .Loop_tail8x21392140.align 322141.L448_or_more8x:2142vpxor 0(%rsi),%ymm6,%ymm62143vpxor 32(%rsi),%ymm8,%ymm82144vpxor 64(%rsi),%ymm1,%ymm12145vpxor 96(%rsi),%ymm5,%ymm52146vpxor 128(%rsi),%ymm12,%ymm122147vpxor 160(%rsi),%ymm13,%ymm132148vpxor 192(%rsi),%ymm10,%ymm102149vpxor 224(%rsi),%ymm15,%ymm152150vpxor 256(%rsi),%ymm14,%ymm142151vpxor 288(%rsi),%ymm2,%ymm22152vpxor 320(%rsi),%ymm3,%ymm32153vpxor 352(%rsi),%ymm7,%ymm72154vpxor 384(%rsi),%ymm11,%ymm112155vpxor 416(%rsi),%ymm9,%ymm92156vmovdqu %ymm6,0(%rdi)2157vmovdqu %ymm8,32(%rdi)2158vmovdqu %ymm1,64(%rdi)2159vmovdqu %ymm5,96(%rdi)2160vmovdqu %ymm12,128(%rdi)2161vmovdqu %ymm13,160(%rdi)2162vmovdqu %ymm10,192(%rdi)2163vmovdqu %ymm15,224(%rdi)2164vmovdqu %ymm14,256(%rdi)2165vmovdqu %ymm2,288(%rdi)2166vmovdqu %ymm3,320(%rdi)2167vmovdqu %ymm7,352(%rdi)2168vmovdqu %ymm11,384(%rdi)2169vmovdqu %ymm9,416(%rdi)2170je .Ldone8x21712172leaq 448(%rsi),%rsi2173xorq %r10,%r102174vmovdqa %ymm0,0(%rsp)2175leaq 448(%rdi),%rdi2176subq $448,%rdx2177vmovdqa %ymm4,32(%rsp)21782179.Loop_tail8x:2180movzbl (%rsi,%r10,1),%eax2181movzbl (%rsp,%r10,1),%ecx2182leaq 1(%r10),%r102183xorl %ecx,%eax2184movb %al,-1(%rdi,%r10,1)2185decq %rdx2186jnz .Loop_tail8x21872188.Ldone8x:2189vzeroall2190leaq (%r9),%rsp2191.cfi_def_cfa_register %rsp2192.L8x_epilogue:2193.byte 0xf3,0xc32194.cfi_endproc2195.size ChaCha20_8x,.-ChaCha20_8x2196.section ".note.gnu.property", "a"2197.p2align 32198.long 1f - 0f2199.long 4f - 1f2200.long 522010:2202# "GNU" encoded with .byte, since .asciz isn't supported2203# on Solaris.2204.byte 0x472205.byte 0x4e2206.byte 0x552207.byte 022081:2209.p2align 32210.long 0xc00000022211.long 3f - 2f22122:2213.long 322143:2215.p2align 322164:221722182219