Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/amd64/chacha-x86_64.S
39482 views
1
/* Do not modify. This file is auto-generated from chacha-x86_64.pl. */
2
.text
3
4
5
6
.section .rodata
7
.align 64
8
.Lzero:
9
.long 0,0,0,0
10
.Lone:
11
.long 1,0,0,0
12
.Linc:
13
.long 0,1,2,3
14
.Lfour:
15
.long 4,4,4,4
16
.Lincy:
17
.long 0,2,4,6,1,3,5,7
18
.Leight:
19
.long 8,8,8,8,8,8,8,8
20
.Lrot16:
21
.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd
22
.Lrot24:
23
.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe
24
.Ltwoy:
25
.long 2,0,0,0, 2,0,0,0
26
.align 64
27
.Lzeroz:
28
.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0
29
.Lfourz:
30
.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0
31
.Lincz:
32
.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
33
.Lsixteen:
34
.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
35
.Lsigma:
36
.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0
37
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
38
.previous
39
.globl ChaCha20_ctr32
40
.type ChaCha20_ctr32,@function
41
.align 64
42
ChaCha20_ctr32:
43
.cfi_startproc
44
cmpq $0,%rdx
45
je .Lno_data
46
movq OPENSSL_ia32cap_P+4(%rip),%r10
47
testl $512,%r10d
48
jnz .LChaCha20_ssse3
49
50
pushq %rbx
51
.cfi_adjust_cfa_offset 8
52
.cfi_offset %rbx,-16
53
pushq %rbp
54
.cfi_adjust_cfa_offset 8
55
.cfi_offset %rbp,-24
56
pushq %r12
57
.cfi_adjust_cfa_offset 8
58
.cfi_offset %r12,-32
59
pushq %r13
60
.cfi_adjust_cfa_offset 8
61
.cfi_offset %r13,-40
62
pushq %r14
63
.cfi_adjust_cfa_offset 8
64
.cfi_offset %r14,-48
65
pushq %r15
66
.cfi_adjust_cfa_offset 8
67
.cfi_offset %r15,-56
68
subq $64+24,%rsp
69
.cfi_adjust_cfa_offset 64+24
70
.Lctr32_body:
71
72
73
movdqu (%rcx),%xmm1
74
movdqu 16(%rcx),%xmm2
75
movdqu (%r8),%xmm3
76
movdqa .Lone(%rip),%xmm4
77
78
79
movdqa %xmm1,16(%rsp)
80
movdqa %xmm2,32(%rsp)
81
movdqa %xmm3,48(%rsp)
82
movq %rdx,%rbp
83
jmp .Loop_outer
84
85
.align 32
86
.Loop_outer:
87
movl $0x61707865,%eax
88
movl $0x3320646e,%ebx
89
movl $0x79622d32,%ecx
90
movl $0x6b206574,%edx
91
movl 16(%rsp),%r8d
92
movl 20(%rsp),%r9d
93
movl 24(%rsp),%r10d
94
movl 28(%rsp),%r11d
95
movd %xmm3,%r12d
96
movl 52(%rsp),%r13d
97
movl 56(%rsp),%r14d
98
movl 60(%rsp),%r15d
99
100
movq %rbp,64+0(%rsp)
101
movl $10,%ebp
102
movq %rsi,64+8(%rsp)
103
.byte 102,72,15,126,214
104
movq %rdi,64+16(%rsp)
105
movq %rsi,%rdi
106
shrq $32,%rdi
107
jmp .Loop
108
109
.align 32
110
.Loop:
111
addl %r8d,%eax
112
xorl %eax,%r12d
113
roll $16,%r12d
114
addl %r9d,%ebx
115
xorl %ebx,%r13d
116
roll $16,%r13d
117
addl %r12d,%esi
118
xorl %esi,%r8d
119
roll $12,%r8d
120
addl %r13d,%edi
121
xorl %edi,%r9d
122
roll $12,%r9d
123
addl %r8d,%eax
124
xorl %eax,%r12d
125
roll $8,%r12d
126
addl %r9d,%ebx
127
xorl %ebx,%r13d
128
roll $8,%r13d
129
addl %r12d,%esi
130
xorl %esi,%r8d
131
roll $7,%r8d
132
addl %r13d,%edi
133
xorl %edi,%r9d
134
roll $7,%r9d
135
movl %esi,32(%rsp)
136
movl %edi,36(%rsp)
137
movl 40(%rsp),%esi
138
movl 44(%rsp),%edi
139
addl %r10d,%ecx
140
xorl %ecx,%r14d
141
roll $16,%r14d
142
addl %r11d,%edx
143
xorl %edx,%r15d
144
roll $16,%r15d
145
addl %r14d,%esi
146
xorl %esi,%r10d
147
roll $12,%r10d
148
addl %r15d,%edi
149
xorl %edi,%r11d
150
roll $12,%r11d
151
addl %r10d,%ecx
152
xorl %ecx,%r14d
153
roll $8,%r14d
154
addl %r11d,%edx
155
xorl %edx,%r15d
156
roll $8,%r15d
157
addl %r14d,%esi
158
xorl %esi,%r10d
159
roll $7,%r10d
160
addl %r15d,%edi
161
xorl %edi,%r11d
162
roll $7,%r11d
163
addl %r9d,%eax
164
xorl %eax,%r15d
165
roll $16,%r15d
166
addl %r10d,%ebx
167
xorl %ebx,%r12d
168
roll $16,%r12d
169
addl %r15d,%esi
170
xorl %esi,%r9d
171
roll $12,%r9d
172
addl %r12d,%edi
173
xorl %edi,%r10d
174
roll $12,%r10d
175
addl %r9d,%eax
176
xorl %eax,%r15d
177
roll $8,%r15d
178
addl %r10d,%ebx
179
xorl %ebx,%r12d
180
roll $8,%r12d
181
addl %r15d,%esi
182
xorl %esi,%r9d
183
roll $7,%r9d
184
addl %r12d,%edi
185
xorl %edi,%r10d
186
roll $7,%r10d
187
movl %esi,40(%rsp)
188
movl %edi,44(%rsp)
189
movl 32(%rsp),%esi
190
movl 36(%rsp),%edi
191
addl %r11d,%ecx
192
xorl %ecx,%r13d
193
roll $16,%r13d
194
addl %r8d,%edx
195
xorl %edx,%r14d
196
roll $16,%r14d
197
addl %r13d,%esi
198
xorl %esi,%r11d
199
roll $12,%r11d
200
addl %r14d,%edi
201
xorl %edi,%r8d
202
roll $12,%r8d
203
addl %r11d,%ecx
204
xorl %ecx,%r13d
205
roll $8,%r13d
206
addl %r8d,%edx
207
xorl %edx,%r14d
208
roll $8,%r14d
209
addl %r13d,%esi
210
xorl %esi,%r11d
211
roll $7,%r11d
212
addl %r14d,%edi
213
xorl %edi,%r8d
214
roll $7,%r8d
215
decl %ebp
216
jnz .Loop
217
movl %edi,36(%rsp)
218
movl %esi,32(%rsp)
219
movq 64(%rsp),%rbp
220
movdqa %xmm2,%xmm1
221
movq 64+8(%rsp),%rsi
222
paddd %xmm4,%xmm3
223
movq 64+16(%rsp),%rdi
224
225
addl $0x61707865,%eax
226
addl $0x3320646e,%ebx
227
addl $0x79622d32,%ecx
228
addl $0x6b206574,%edx
229
addl 16(%rsp),%r8d
230
addl 20(%rsp),%r9d
231
addl 24(%rsp),%r10d
232
addl 28(%rsp),%r11d
233
addl 48(%rsp),%r12d
234
addl 52(%rsp),%r13d
235
addl 56(%rsp),%r14d
236
addl 60(%rsp),%r15d
237
paddd 32(%rsp),%xmm1
238
239
cmpq $64,%rbp
240
jb .Ltail
241
242
xorl 0(%rsi),%eax
243
xorl 4(%rsi),%ebx
244
xorl 8(%rsi),%ecx
245
xorl 12(%rsi),%edx
246
xorl 16(%rsi),%r8d
247
xorl 20(%rsi),%r9d
248
xorl 24(%rsi),%r10d
249
xorl 28(%rsi),%r11d
250
movdqu 32(%rsi),%xmm0
251
xorl 48(%rsi),%r12d
252
xorl 52(%rsi),%r13d
253
xorl 56(%rsi),%r14d
254
xorl 60(%rsi),%r15d
255
leaq 64(%rsi),%rsi
256
pxor %xmm1,%xmm0
257
258
movdqa %xmm2,32(%rsp)
259
movd %xmm3,48(%rsp)
260
261
movl %eax,0(%rdi)
262
movl %ebx,4(%rdi)
263
movl %ecx,8(%rdi)
264
movl %edx,12(%rdi)
265
movl %r8d,16(%rdi)
266
movl %r9d,20(%rdi)
267
movl %r10d,24(%rdi)
268
movl %r11d,28(%rdi)
269
movdqu %xmm0,32(%rdi)
270
movl %r12d,48(%rdi)
271
movl %r13d,52(%rdi)
272
movl %r14d,56(%rdi)
273
movl %r15d,60(%rdi)
274
leaq 64(%rdi),%rdi
275
276
subq $64,%rbp
277
jnz .Loop_outer
278
279
jmp .Ldone
280
281
.align 16
282
.Ltail:
283
movl %eax,0(%rsp)
284
movl %ebx,4(%rsp)
285
xorq %rbx,%rbx
286
movl %ecx,8(%rsp)
287
movl %edx,12(%rsp)
288
movl %r8d,16(%rsp)
289
movl %r9d,20(%rsp)
290
movl %r10d,24(%rsp)
291
movl %r11d,28(%rsp)
292
movdqa %xmm1,32(%rsp)
293
movl %r12d,48(%rsp)
294
movl %r13d,52(%rsp)
295
movl %r14d,56(%rsp)
296
movl %r15d,60(%rsp)
297
298
.Loop_tail:
299
movzbl (%rsi,%rbx,1),%eax
300
movzbl (%rsp,%rbx,1),%edx
301
leaq 1(%rbx),%rbx
302
xorl %edx,%eax
303
movb %al,-1(%rdi,%rbx,1)
304
decq %rbp
305
jnz .Loop_tail
306
307
.Ldone:
308
leaq 64+24+48(%rsp),%rsi
309
.cfi_def_cfa %rsi,8
310
movq -48(%rsi),%r15
311
.cfi_restore %r15
312
movq -40(%rsi),%r14
313
.cfi_restore %r14
314
movq -32(%rsi),%r13
315
.cfi_restore %r13
316
movq -24(%rsi),%r12
317
.cfi_restore %r12
318
movq -16(%rsi),%rbp
319
.cfi_restore %rbp
320
movq -8(%rsi),%rbx
321
.cfi_restore %rbx
322
leaq (%rsi),%rsp
323
.cfi_def_cfa_register %rsp
324
.Lno_data:
325
.byte 0xf3,0xc3
326
.cfi_endproc
327
.size ChaCha20_ctr32,.-ChaCha20_ctr32
328
.type ChaCha20_ssse3,@function
329
.align 32
330
ChaCha20_ssse3:
331
.cfi_startproc
332
.LChaCha20_ssse3:
333
movq %rsp,%r9
334
.cfi_def_cfa_register %r9
335
testl $2048,%r10d
336
jnz .LChaCha20_4xop
337
cmpq $128,%rdx
338
je .LChaCha20_128
339
ja .LChaCha20_4x
340
341
.Ldo_sse3_after_all:
342
subq $64+8,%rsp
343
movdqa .Lsigma(%rip),%xmm0
344
movdqu (%rcx),%xmm1
345
movdqu 16(%rcx),%xmm2
346
movdqu (%r8),%xmm3
347
movdqa .Lrot16(%rip),%xmm6
348
movdqa .Lrot24(%rip),%xmm7
349
350
movdqa %xmm0,0(%rsp)
351
movdqa %xmm1,16(%rsp)
352
movdqa %xmm2,32(%rsp)
353
movdqa %xmm3,48(%rsp)
354
movq $10,%r8
355
jmp .Loop_ssse3
356
357
.align 32
358
.Loop_outer_ssse3:
359
movdqa .Lone(%rip),%xmm3
360
movdqa 0(%rsp),%xmm0
361
movdqa 16(%rsp),%xmm1
362
movdqa 32(%rsp),%xmm2
363
paddd 48(%rsp),%xmm3
364
movq $10,%r8
365
movdqa %xmm3,48(%rsp)
366
jmp .Loop_ssse3
367
368
.align 32
369
.Loop_ssse3:
370
paddd %xmm1,%xmm0
371
pxor %xmm0,%xmm3
372
.byte 102,15,56,0,222
373
paddd %xmm3,%xmm2
374
pxor %xmm2,%xmm1
375
movdqa %xmm1,%xmm4
376
psrld $20,%xmm1
377
pslld $12,%xmm4
378
por %xmm4,%xmm1
379
paddd %xmm1,%xmm0
380
pxor %xmm0,%xmm3
381
.byte 102,15,56,0,223
382
paddd %xmm3,%xmm2
383
pxor %xmm2,%xmm1
384
movdqa %xmm1,%xmm4
385
psrld $25,%xmm1
386
pslld $7,%xmm4
387
por %xmm4,%xmm1
388
pshufd $78,%xmm2,%xmm2
389
pshufd $57,%xmm1,%xmm1
390
pshufd $147,%xmm3,%xmm3
391
nop
392
paddd %xmm1,%xmm0
393
pxor %xmm0,%xmm3
394
.byte 102,15,56,0,222
395
paddd %xmm3,%xmm2
396
pxor %xmm2,%xmm1
397
movdqa %xmm1,%xmm4
398
psrld $20,%xmm1
399
pslld $12,%xmm4
400
por %xmm4,%xmm1
401
paddd %xmm1,%xmm0
402
pxor %xmm0,%xmm3
403
.byte 102,15,56,0,223
404
paddd %xmm3,%xmm2
405
pxor %xmm2,%xmm1
406
movdqa %xmm1,%xmm4
407
psrld $25,%xmm1
408
pslld $7,%xmm4
409
por %xmm4,%xmm1
410
pshufd $78,%xmm2,%xmm2
411
pshufd $147,%xmm1,%xmm1
412
pshufd $57,%xmm3,%xmm3
413
decq %r8
414
jnz .Loop_ssse3
415
paddd 0(%rsp),%xmm0
416
paddd 16(%rsp),%xmm1
417
paddd 32(%rsp),%xmm2
418
paddd 48(%rsp),%xmm3
419
420
cmpq $64,%rdx
421
jb .Ltail_ssse3
422
423
movdqu 0(%rsi),%xmm4
424
movdqu 16(%rsi),%xmm5
425
pxor %xmm4,%xmm0
426
movdqu 32(%rsi),%xmm4
427
pxor %xmm5,%xmm1
428
movdqu 48(%rsi),%xmm5
429
leaq 64(%rsi),%rsi
430
pxor %xmm4,%xmm2
431
pxor %xmm5,%xmm3
432
433
movdqu %xmm0,0(%rdi)
434
movdqu %xmm1,16(%rdi)
435
movdqu %xmm2,32(%rdi)
436
movdqu %xmm3,48(%rdi)
437
leaq 64(%rdi),%rdi
438
439
subq $64,%rdx
440
jnz .Loop_outer_ssse3
441
442
jmp .Ldone_ssse3
443
444
.align 16
445
.Ltail_ssse3:
446
movdqa %xmm0,0(%rsp)
447
movdqa %xmm1,16(%rsp)
448
movdqa %xmm2,32(%rsp)
449
movdqa %xmm3,48(%rsp)
450
xorq %r8,%r8
451
452
.Loop_tail_ssse3:
453
movzbl (%rsi,%r8,1),%eax
454
movzbl (%rsp,%r8,1),%ecx
455
leaq 1(%r8),%r8
456
xorl %ecx,%eax
457
movb %al,-1(%rdi,%r8,1)
458
decq %rdx
459
jnz .Loop_tail_ssse3
460
461
.Ldone_ssse3:
462
leaq (%r9),%rsp
463
.cfi_def_cfa_register %rsp
464
.Lssse3_epilogue:
465
.byte 0xf3,0xc3
466
.cfi_endproc
467
.size ChaCha20_ssse3,.-ChaCha20_ssse3
468
.type ChaCha20_128,@function
469
.align 32
470
ChaCha20_128:
471
.cfi_startproc
472
.LChaCha20_128:
473
movq %rsp,%r9
474
.cfi_def_cfa_register %r9
475
subq $64+8,%rsp
476
movdqa .Lsigma(%rip),%xmm8
477
movdqu (%rcx),%xmm9
478
movdqu 16(%rcx),%xmm2
479
movdqu (%r8),%xmm3
480
movdqa .Lone(%rip),%xmm1
481
movdqa .Lrot16(%rip),%xmm6
482
movdqa .Lrot24(%rip),%xmm7
483
484
movdqa %xmm8,%xmm10
485
movdqa %xmm8,0(%rsp)
486
movdqa %xmm9,%xmm11
487
movdqa %xmm9,16(%rsp)
488
movdqa %xmm2,%xmm0
489
movdqa %xmm2,32(%rsp)
490
paddd %xmm3,%xmm1
491
movdqa %xmm3,48(%rsp)
492
movq $10,%r8
493
jmp .Loop_128
494
495
.align 32
496
.Loop_128:
497
paddd %xmm9,%xmm8
498
pxor %xmm8,%xmm3
499
paddd %xmm11,%xmm10
500
pxor %xmm10,%xmm1
501
.byte 102,15,56,0,222
502
.byte 102,15,56,0,206
503
paddd %xmm3,%xmm2
504
paddd %xmm1,%xmm0
505
pxor %xmm2,%xmm9
506
pxor %xmm0,%xmm11
507
movdqa %xmm9,%xmm4
508
psrld $20,%xmm9
509
movdqa %xmm11,%xmm5
510
pslld $12,%xmm4
511
psrld $20,%xmm11
512
por %xmm4,%xmm9
513
pslld $12,%xmm5
514
por %xmm5,%xmm11
515
paddd %xmm9,%xmm8
516
pxor %xmm8,%xmm3
517
paddd %xmm11,%xmm10
518
pxor %xmm10,%xmm1
519
.byte 102,15,56,0,223
520
.byte 102,15,56,0,207
521
paddd %xmm3,%xmm2
522
paddd %xmm1,%xmm0
523
pxor %xmm2,%xmm9
524
pxor %xmm0,%xmm11
525
movdqa %xmm9,%xmm4
526
psrld $25,%xmm9
527
movdqa %xmm11,%xmm5
528
pslld $7,%xmm4
529
psrld $25,%xmm11
530
por %xmm4,%xmm9
531
pslld $7,%xmm5
532
por %xmm5,%xmm11
533
pshufd $78,%xmm2,%xmm2
534
pshufd $57,%xmm9,%xmm9
535
pshufd $147,%xmm3,%xmm3
536
pshufd $78,%xmm0,%xmm0
537
pshufd $57,%xmm11,%xmm11
538
pshufd $147,%xmm1,%xmm1
539
paddd %xmm9,%xmm8
540
pxor %xmm8,%xmm3
541
paddd %xmm11,%xmm10
542
pxor %xmm10,%xmm1
543
.byte 102,15,56,0,222
544
.byte 102,15,56,0,206
545
paddd %xmm3,%xmm2
546
paddd %xmm1,%xmm0
547
pxor %xmm2,%xmm9
548
pxor %xmm0,%xmm11
549
movdqa %xmm9,%xmm4
550
psrld $20,%xmm9
551
movdqa %xmm11,%xmm5
552
pslld $12,%xmm4
553
psrld $20,%xmm11
554
por %xmm4,%xmm9
555
pslld $12,%xmm5
556
por %xmm5,%xmm11
557
paddd %xmm9,%xmm8
558
pxor %xmm8,%xmm3
559
paddd %xmm11,%xmm10
560
pxor %xmm10,%xmm1
561
.byte 102,15,56,0,223
562
.byte 102,15,56,0,207
563
paddd %xmm3,%xmm2
564
paddd %xmm1,%xmm0
565
pxor %xmm2,%xmm9
566
pxor %xmm0,%xmm11
567
movdqa %xmm9,%xmm4
568
psrld $25,%xmm9
569
movdqa %xmm11,%xmm5
570
pslld $7,%xmm4
571
psrld $25,%xmm11
572
por %xmm4,%xmm9
573
pslld $7,%xmm5
574
por %xmm5,%xmm11
575
pshufd $78,%xmm2,%xmm2
576
pshufd $147,%xmm9,%xmm9
577
pshufd $57,%xmm3,%xmm3
578
pshufd $78,%xmm0,%xmm0
579
pshufd $147,%xmm11,%xmm11
580
pshufd $57,%xmm1,%xmm1
581
decq %r8
582
jnz .Loop_128
583
paddd 0(%rsp),%xmm8
584
paddd 16(%rsp),%xmm9
585
paddd 32(%rsp),%xmm2
586
paddd 48(%rsp),%xmm3
587
paddd .Lone(%rip),%xmm1
588
paddd 0(%rsp),%xmm10
589
paddd 16(%rsp),%xmm11
590
paddd 32(%rsp),%xmm0
591
paddd 48(%rsp),%xmm1
592
593
movdqu 0(%rsi),%xmm4
594
movdqu 16(%rsi),%xmm5
595
pxor %xmm4,%xmm8
596
movdqu 32(%rsi),%xmm4
597
pxor %xmm5,%xmm9
598
movdqu 48(%rsi),%xmm5
599
pxor %xmm4,%xmm2
600
movdqu 64(%rsi),%xmm4
601
pxor %xmm5,%xmm3
602
movdqu 80(%rsi),%xmm5
603
pxor %xmm4,%xmm10
604
movdqu 96(%rsi),%xmm4
605
pxor %xmm5,%xmm11
606
movdqu 112(%rsi),%xmm5
607
pxor %xmm4,%xmm0
608
pxor %xmm5,%xmm1
609
610
movdqu %xmm8,0(%rdi)
611
movdqu %xmm9,16(%rdi)
612
movdqu %xmm2,32(%rdi)
613
movdqu %xmm3,48(%rdi)
614
movdqu %xmm10,64(%rdi)
615
movdqu %xmm11,80(%rdi)
616
movdqu %xmm0,96(%rdi)
617
movdqu %xmm1,112(%rdi)
618
leaq (%r9),%rsp
619
.cfi_def_cfa_register %rsp
620
.L128_epilogue:
621
.byte 0xf3,0xc3
622
.cfi_endproc
623
.size ChaCha20_128,.-ChaCha20_128
624
.type ChaCha20_4x,@function
625
.align 32
626
ChaCha20_4x:
627
.cfi_startproc
628
.LChaCha20_4x:
629
movq %rsp,%r9
630
.cfi_def_cfa_register %r9
631
movq %r10,%r11
632
shrq $32,%r10
633
testq $32,%r10
634
jnz .LChaCha20_8x
635
cmpq $192,%rdx
636
ja .Lproceed4x
637
638
andq $71303168,%r11
639
cmpq $4194304,%r11
640
je .Ldo_sse3_after_all
641
642
.Lproceed4x:
643
subq $0x140+8,%rsp
644
movdqa .Lsigma(%rip),%xmm11
645
movdqu (%rcx),%xmm15
646
movdqu 16(%rcx),%xmm7
647
movdqu (%r8),%xmm3
648
leaq 256(%rsp),%rcx
649
leaq .Lrot16(%rip),%r10
650
leaq .Lrot24(%rip),%r11
651
652
pshufd $0x00,%xmm11,%xmm8
653
pshufd $0x55,%xmm11,%xmm9
654
movdqa %xmm8,64(%rsp)
655
pshufd $0xaa,%xmm11,%xmm10
656
movdqa %xmm9,80(%rsp)
657
pshufd $0xff,%xmm11,%xmm11
658
movdqa %xmm10,96(%rsp)
659
movdqa %xmm11,112(%rsp)
660
661
pshufd $0x00,%xmm15,%xmm12
662
pshufd $0x55,%xmm15,%xmm13
663
movdqa %xmm12,128-256(%rcx)
664
pshufd $0xaa,%xmm15,%xmm14
665
movdqa %xmm13,144-256(%rcx)
666
pshufd $0xff,%xmm15,%xmm15
667
movdqa %xmm14,160-256(%rcx)
668
movdqa %xmm15,176-256(%rcx)
669
670
pshufd $0x00,%xmm7,%xmm4
671
pshufd $0x55,%xmm7,%xmm5
672
movdqa %xmm4,192-256(%rcx)
673
pshufd $0xaa,%xmm7,%xmm6
674
movdqa %xmm5,208-256(%rcx)
675
pshufd $0xff,%xmm7,%xmm7
676
movdqa %xmm6,224-256(%rcx)
677
movdqa %xmm7,240-256(%rcx)
678
679
pshufd $0x00,%xmm3,%xmm0
680
pshufd $0x55,%xmm3,%xmm1
681
paddd .Linc(%rip),%xmm0
682
pshufd $0xaa,%xmm3,%xmm2
683
movdqa %xmm1,272-256(%rcx)
684
pshufd $0xff,%xmm3,%xmm3
685
movdqa %xmm2,288-256(%rcx)
686
movdqa %xmm3,304-256(%rcx)
687
688
jmp .Loop_enter4x
689
690
.align 32
691
.Loop_outer4x:
692
movdqa 64(%rsp),%xmm8
693
movdqa 80(%rsp),%xmm9
694
movdqa 96(%rsp),%xmm10
695
movdqa 112(%rsp),%xmm11
696
movdqa 128-256(%rcx),%xmm12
697
movdqa 144-256(%rcx),%xmm13
698
movdqa 160-256(%rcx),%xmm14
699
movdqa 176-256(%rcx),%xmm15
700
movdqa 192-256(%rcx),%xmm4
701
movdqa 208-256(%rcx),%xmm5
702
movdqa 224-256(%rcx),%xmm6
703
movdqa 240-256(%rcx),%xmm7
704
movdqa 256-256(%rcx),%xmm0
705
movdqa 272-256(%rcx),%xmm1
706
movdqa 288-256(%rcx),%xmm2
707
movdqa 304-256(%rcx),%xmm3
708
paddd .Lfour(%rip),%xmm0
709
710
.Loop_enter4x:
711
movdqa %xmm6,32(%rsp)
712
movdqa %xmm7,48(%rsp)
713
movdqa (%r10),%xmm7
714
movl $10,%eax
715
movdqa %xmm0,256-256(%rcx)
716
jmp .Loop4x
717
718
.align 32
719
.Loop4x:
720
paddd %xmm12,%xmm8
721
paddd %xmm13,%xmm9
722
pxor %xmm8,%xmm0
723
pxor %xmm9,%xmm1
724
.byte 102,15,56,0,199
725
.byte 102,15,56,0,207
726
paddd %xmm0,%xmm4
727
paddd %xmm1,%xmm5
728
pxor %xmm4,%xmm12
729
pxor %xmm5,%xmm13
730
movdqa %xmm12,%xmm6
731
pslld $12,%xmm12
732
psrld $20,%xmm6
733
movdqa %xmm13,%xmm7
734
pslld $12,%xmm13
735
por %xmm6,%xmm12
736
psrld $20,%xmm7
737
movdqa (%r11),%xmm6
738
por %xmm7,%xmm13
739
paddd %xmm12,%xmm8
740
paddd %xmm13,%xmm9
741
pxor %xmm8,%xmm0
742
pxor %xmm9,%xmm1
743
.byte 102,15,56,0,198
744
.byte 102,15,56,0,206
745
paddd %xmm0,%xmm4
746
paddd %xmm1,%xmm5
747
pxor %xmm4,%xmm12
748
pxor %xmm5,%xmm13
749
movdqa %xmm12,%xmm7
750
pslld $7,%xmm12
751
psrld $25,%xmm7
752
movdqa %xmm13,%xmm6
753
pslld $7,%xmm13
754
por %xmm7,%xmm12
755
psrld $25,%xmm6
756
movdqa (%r10),%xmm7
757
por %xmm6,%xmm13
758
movdqa %xmm4,0(%rsp)
759
movdqa %xmm5,16(%rsp)
760
movdqa 32(%rsp),%xmm4
761
movdqa 48(%rsp),%xmm5
762
paddd %xmm14,%xmm10
763
paddd %xmm15,%xmm11
764
pxor %xmm10,%xmm2
765
pxor %xmm11,%xmm3
766
.byte 102,15,56,0,215
767
.byte 102,15,56,0,223
768
paddd %xmm2,%xmm4
769
paddd %xmm3,%xmm5
770
pxor %xmm4,%xmm14
771
pxor %xmm5,%xmm15
772
movdqa %xmm14,%xmm6
773
pslld $12,%xmm14
774
psrld $20,%xmm6
775
movdqa %xmm15,%xmm7
776
pslld $12,%xmm15
777
por %xmm6,%xmm14
778
psrld $20,%xmm7
779
movdqa (%r11),%xmm6
780
por %xmm7,%xmm15
781
paddd %xmm14,%xmm10
782
paddd %xmm15,%xmm11
783
pxor %xmm10,%xmm2
784
pxor %xmm11,%xmm3
785
.byte 102,15,56,0,214
786
.byte 102,15,56,0,222
787
paddd %xmm2,%xmm4
788
paddd %xmm3,%xmm5
789
pxor %xmm4,%xmm14
790
pxor %xmm5,%xmm15
791
movdqa %xmm14,%xmm7
792
pslld $7,%xmm14
793
psrld $25,%xmm7
794
movdqa %xmm15,%xmm6
795
pslld $7,%xmm15
796
por %xmm7,%xmm14
797
psrld $25,%xmm6
798
movdqa (%r10),%xmm7
799
por %xmm6,%xmm15
800
paddd %xmm13,%xmm8
801
paddd %xmm14,%xmm9
802
pxor %xmm8,%xmm3
803
pxor %xmm9,%xmm0
804
.byte 102,15,56,0,223
805
.byte 102,15,56,0,199
806
paddd %xmm3,%xmm4
807
paddd %xmm0,%xmm5
808
pxor %xmm4,%xmm13
809
pxor %xmm5,%xmm14
810
movdqa %xmm13,%xmm6
811
pslld $12,%xmm13
812
psrld $20,%xmm6
813
movdqa %xmm14,%xmm7
814
pslld $12,%xmm14
815
por %xmm6,%xmm13
816
psrld $20,%xmm7
817
movdqa (%r11),%xmm6
818
por %xmm7,%xmm14
819
paddd %xmm13,%xmm8
820
paddd %xmm14,%xmm9
821
pxor %xmm8,%xmm3
822
pxor %xmm9,%xmm0
823
.byte 102,15,56,0,222
824
.byte 102,15,56,0,198
825
paddd %xmm3,%xmm4
826
paddd %xmm0,%xmm5
827
pxor %xmm4,%xmm13
828
pxor %xmm5,%xmm14
829
movdqa %xmm13,%xmm7
830
pslld $7,%xmm13
831
psrld $25,%xmm7
832
movdqa %xmm14,%xmm6
833
pslld $7,%xmm14
834
por %xmm7,%xmm13
835
psrld $25,%xmm6
836
movdqa (%r10),%xmm7
837
por %xmm6,%xmm14
838
movdqa %xmm4,32(%rsp)
839
movdqa %xmm5,48(%rsp)
840
movdqa 0(%rsp),%xmm4
841
movdqa 16(%rsp),%xmm5
842
paddd %xmm15,%xmm10
843
paddd %xmm12,%xmm11
844
pxor %xmm10,%xmm1
845
pxor %xmm11,%xmm2
846
.byte 102,15,56,0,207
847
.byte 102,15,56,0,215
848
paddd %xmm1,%xmm4
849
paddd %xmm2,%xmm5
850
pxor %xmm4,%xmm15
851
pxor %xmm5,%xmm12
852
movdqa %xmm15,%xmm6
853
pslld $12,%xmm15
854
psrld $20,%xmm6
855
movdqa %xmm12,%xmm7
856
pslld $12,%xmm12
857
por %xmm6,%xmm15
858
psrld $20,%xmm7
859
movdqa (%r11),%xmm6
860
por %xmm7,%xmm12
861
paddd %xmm15,%xmm10
862
paddd %xmm12,%xmm11
863
pxor %xmm10,%xmm1
864
pxor %xmm11,%xmm2
865
.byte 102,15,56,0,206
866
.byte 102,15,56,0,214
867
paddd %xmm1,%xmm4
868
paddd %xmm2,%xmm5
869
pxor %xmm4,%xmm15
870
pxor %xmm5,%xmm12
871
movdqa %xmm15,%xmm7
872
pslld $7,%xmm15
873
psrld $25,%xmm7
874
movdqa %xmm12,%xmm6
875
pslld $7,%xmm12
876
por %xmm7,%xmm15
877
psrld $25,%xmm6
878
movdqa (%r10),%xmm7
879
por %xmm6,%xmm12
880
decl %eax
881
jnz .Loop4x
882
883
paddd 64(%rsp),%xmm8
884
paddd 80(%rsp),%xmm9
885
paddd 96(%rsp),%xmm10
886
paddd 112(%rsp),%xmm11
887
888
movdqa %xmm8,%xmm6
889
punpckldq %xmm9,%xmm8
890
movdqa %xmm10,%xmm7
891
punpckldq %xmm11,%xmm10
892
punpckhdq %xmm9,%xmm6
893
punpckhdq %xmm11,%xmm7
894
movdqa %xmm8,%xmm9
895
punpcklqdq %xmm10,%xmm8
896
movdqa %xmm6,%xmm11
897
punpcklqdq %xmm7,%xmm6
898
punpckhqdq %xmm10,%xmm9
899
punpckhqdq %xmm7,%xmm11
900
paddd 128-256(%rcx),%xmm12
901
paddd 144-256(%rcx),%xmm13
902
paddd 160-256(%rcx),%xmm14
903
paddd 176-256(%rcx),%xmm15
904
905
movdqa %xmm8,0(%rsp)
906
movdqa %xmm9,16(%rsp)
907
movdqa 32(%rsp),%xmm8
908
movdqa 48(%rsp),%xmm9
909
910
movdqa %xmm12,%xmm10
911
punpckldq %xmm13,%xmm12
912
movdqa %xmm14,%xmm7
913
punpckldq %xmm15,%xmm14
914
punpckhdq %xmm13,%xmm10
915
punpckhdq %xmm15,%xmm7
916
movdqa %xmm12,%xmm13
917
punpcklqdq %xmm14,%xmm12
918
movdqa %xmm10,%xmm15
919
punpcklqdq %xmm7,%xmm10
920
punpckhqdq %xmm14,%xmm13
921
punpckhqdq %xmm7,%xmm15
922
paddd 192-256(%rcx),%xmm4
923
paddd 208-256(%rcx),%xmm5
924
paddd 224-256(%rcx),%xmm8
925
paddd 240-256(%rcx),%xmm9
926
927
movdqa %xmm6,32(%rsp)
928
movdqa %xmm11,48(%rsp)
929
930
movdqa %xmm4,%xmm14
931
punpckldq %xmm5,%xmm4
932
movdqa %xmm8,%xmm7
933
punpckldq %xmm9,%xmm8
934
punpckhdq %xmm5,%xmm14
935
punpckhdq %xmm9,%xmm7
936
movdqa %xmm4,%xmm5
937
punpcklqdq %xmm8,%xmm4
938
movdqa %xmm14,%xmm9
939
punpcklqdq %xmm7,%xmm14
940
punpckhqdq %xmm8,%xmm5
941
punpckhqdq %xmm7,%xmm9
942
paddd 256-256(%rcx),%xmm0
943
paddd 272-256(%rcx),%xmm1
944
paddd 288-256(%rcx),%xmm2
945
paddd 304-256(%rcx),%xmm3
946
947
movdqa %xmm0,%xmm8
948
punpckldq %xmm1,%xmm0
949
movdqa %xmm2,%xmm7
950
punpckldq %xmm3,%xmm2
951
punpckhdq %xmm1,%xmm8
952
punpckhdq %xmm3,%xmm7
953
movdqa %xmm0,%xmm1
954
punpcklqdq %xmm2,%xmm0
955
movdqa %xmm8,%xmm3
956
punpcklqdq %xmm7,%xmm8
957
punpckhqdq %xmm2,%xmm1
958
punpckhqdq %xmm7,%xmm3
959
cmpq $256,%rdx
960
jb .Ltail4x
961
962
movdqu 0(%rsi),%xmm6
963
movdqu 16(%rsi),%xmm11
964
movdqu 32(%rsi),%xmm2
965
movdqu 48(%rsi),%xmm7
966
pxor 0(%rsp),%xmm6
967
pxor %xmm12,%xmm11
968
pxor %xmm4,%xmm2
969
pxor %xmm0,%xmm7
970
971
movdqu %xmm6,0(%rdi)
972
movdqu 64(%rsi),%xmm6
973
movdqu %xmm11,16(%rdi)
974
movdqu 80(%rsi),%xmm11
975
movdqu %xmm2,32(%rdi)
976
movdqu 96(%rsi),%xmm2
977
movdqu %xmm7,48(%rdi)
978
movdqu 112(%rsi),%xmm7
979
leaq 128(%rsi),%rsi
980
pxor 16(%rsp),%xmm6
981
pxor %xmm13,%xmm11
982
pxor %xmm5,%xmm2
983
pxor %xmm1,%xmm7
984
985
movdqu %xmm6,64(%rdi)
986
movdqu 0(%rsi),%xmm6
987
movdqu %xmm11,80(%rdi)
988
movdqu 16(%rsi),%xmm11
989
movdqu %xmm2,96(%rdi)
990
movdqu 32(%rsi),%xmm2
991
movdqu %xmm7,112(%rdi)
992
leaq 128(%rdi),%rdi
993
movdqu 48(%rsi),%xmm7
994
pxor 32(%rsp),%xmm6
995
pxor %xmm10,%xmm11
996
pxor %xmm14,%xmm2
997
pxor %xmm8,%xmm7
998
999
movdqu %xmm6,0(%rdi)
1000
movdqu 64(%rsi),%xmm6
1001
movdqu %xmm11,16(%rdi)
1002
movdqu 80(%rsi),%xmm11
1003
movdqu %xmm2,32(%rdi)
1004
movdqu 96(%rsi),%xmm2
1005
movdqu %xmm7,48(%rdi)
1006
movdqu 112(%rsi),%xmm7
1007
leaq 128(%rsi),%rsi
1008
pxor 48(%rsp),%xmm6
1009
pxor %xmm15,%xmm11
1010
pxor %xmm9,%xmm2
1011
pxor %xmm3,%xmm7
1012
movdqu %xmm6,64(%rdi)
1013
movdqu %xmm11,80(%rdi)
1014
movdqu %xmm2,96(%rdi)
1015
movdqu %xmm7,112(%rdi)
1016
leaq 128(%rdi),%rdi
1017
1018
subq $256,%rdx
1019
jnz .Loop_outer4x
1020
1021
jmp .Ldone4x
1022
1023
.Ltail4x:
1024
cmpq $192,%rdx
1025
jae .L192_or_more4x
1026
cmpq $128,%rdx
1027
jae .L128_or_more4x
1028
cmpq $64,%rdx
1029
jae .L64_or_more4x
1030
1031
1032
xorq %r10,%r10
1033
1034
movdqa %xmm12,16(%rsp)
1035
movdqa %xmm4,32(%rsp)
1036
movdqa %xmm0,48(%rsp)
1037
jmp .Loop_tail4x
1038
1039
.align 32
1040
.L64_or_more4x:
1041
movdqu 0(%rsi),%xmm6
1042
movdqu 16(%rsi),%xmm11
1043
movdqu 32(%rsi),%xmm2
1044
movdqu 48(%rsi),%xmm7
1045
pxor 0(%rsp),%xmm6
1046
pxor %xmm12,%xmm11
1047
pxor %xmm4,%xmm2
1048
pxor %xmm0,%xmm7
1049
movdqu %xmm6,0(%rdi)
1050
movdqu %xmm11,16(%rdi)
1051
movdqu %xmm2,32(%rdi)
1052
movdqu %xmm7,48(%rdi)
1053
je .Ldone4x
1054
1055
movdqa 16(%rsp),%xmm6
1056
leaq 64(%rsi),%rsi
1057
xorq %r10,%r10
1058
movdqa %xmm6,0(%rsp)
1059
movdqa %xmm13,16(%rsp)
1060
leaq 64(%rdi),%rdi
1061
movdqa %xmm5,32(%rsp)
1062
subq $64,%rdx
1063
movdqa %xmm1,48(%rsp)
1064
jmp .Loop_tail4x
1065
1066
.align 32
1067
.L128_or_more4x:
1068
movdqu 0(%rsi),%xmm6
1069
movdqu 16(%rsi),%xmm11
1070
movdqu 32(%rsi),%xmm2
1071
movdqu 48(%rsi),%xmm7
1072
pxor 0(%rsp),%xmm6
1073
pxor %xmm12,%xmm11
1074
pxor %xmm4,%xmm2
1075
pxor %xmm0,%xmm7
1076
1077
movdqu %xmm6,0(%rdi)
1078
movdqu 64(%rsi),%xmm6
1079
movdqu %xmm11,16(%rdi)
1080
movdqu 80(%rsi),%xmm11
1081
movdqu %xmm2,32(%rdi)
1082
movdqu 96(%rsi),%xmm2
1083
movdqu %xmm7,48(%rdi)
1084
movdqu 112(%rsi),%xmm7
1085
pxor 16(%rsp),%xmm6
1086
pxor %xmm13,%xmm11
1087
pxor %xmm5,%xmm2
1088
pxor %xmm1,%xmm7
1089
movdqu %xmm6,64(%rdi)
1090
movdqu %xmm11,80(%rdi)
1091
movdqu %xmm2,96(%rdi)
1092
movdqu %xmm7,112(%rdi)
1093
je .Ldone4x
1094
1095
movdqa 32(%rsp),%xmm6
1096
leaq 128(%rsi),%rsi
1097
xorq %r10,%r10
1098
movdqa %xmm6,0(%rsp)
1099
movdqa %xmm10,16(%rsp)
1100
leaq 128(%rdi),%rdi
1101
movdqa %xmm14,32(%rsp)
1102
subq $128,%rdx
1103
movdqa %xmm8,48(%rsp)
1104
jmp .Loop_tail4x
1105
1106
.align 32
1107
.L192_or_more4x:
1108
movdqu 0(%rsi),%xmm6
1109
movdqu 16(%rsi),%xmm11
1110
movdqu 32(%rsi),%xmm2
1111
movdqu 48(%rsi),%xmm7
1112
pxor 0(%rsp),%xmm6
1113
pxor %xmm12,%xmm11
1114
pxor %xmm4,%xmm2
1115
pxor %xmm0,%xmm7
1116
1117
movdqu %xmm6,0(%rdi)
1118
movdqu 64(%rsi),%xmm6
1119
movdqu %xmm11,16(%rdi)
1120
movdqu 80(%rsi),%xmm11
1121
movdqu %xmm2,32(%rdi)
1122
movdqu 96(%rsi),%xmm2
1123
movdqu %xmm7,48(%rdi)
1124
movdqu 112(%rsi),%xmm7
1125
leaq 128(%rsi),%rsi
1126
pxor 16(%rsp),%xmm6
1127
pxor %xmm13,%xmm11
1128
pxor %xmm5,%xmm2
1129
pxor %xmm1,%xmm7
1130
1131
movdqu %xmm6,64(%rdi)
1132
movdqu 0(%rsi),%xmm6
1133
movdqu %xmm11,80(%rdi)
1134
movdqu 16(%rsi),%xmm11
1135
movdqu %xmm2,96(%rdi)
1136
movdqu 32(%rsi),%xmm2
1137
movdqu %xmm7,112(%rdi)
1138
leaq 128(%rdi),%rdi
1139
movdqu 48(%rsi),%xmm7
1140
pxor 32(%rsp),%xmm6
1141
pxor %xmm10,%xmm11
1142
pxor %xmm14,%xmm2
1143
pxor %xmm8,%xmm7
1144
movdqu %xmm6,0(%rdi)
1145
movdqu %xmm11,16(%rdi)
1146
movdqu %xmm2,32(%rdi)
1147
movdqu %xmm7,48(%rdi)
1148
je .Ldone4x
1149
1150
movdqa 48(%rsp),%xmm6
1151
leaq 64(%rsi),%rsi
1152
xorq %r10,%r10
1153
movdqa %xmm6,0(%rsp)
1154
movdqa %xmm15,16(%rsp)
1155
leaq 64(%rdi),%rdi
1156
movdqa %xmm9,32(%rsp)
1157
subq $192,%rdx
1158
movdqa %xmm3,48(%rsp)
1159
1160
.Loop_tail4x:
1161
movzbl (%rsi,%r10,1),%eax
1162
movzbl (%rsp,%r10,1),%ecx
1163
leaq 1(%r10),%r10
1164
xorl %ecx,%eax
1165
movb %al,-1(%rdi,%r10,1)
1166
decq %rdx
1167
jnz .Loop_tail4x
1168
1169
.Ldone4x:
1170
leaq (%r9),%rsp
1171
.cfi_def_cfa_register %rsp
1172
.L4x_epilogue:
1173
.byte 0xf3,0xc3
1174
.cfi_endproc
1175
.size ChaCha20_4x,.-ChaCha20_4x
1176
.type ChaCha20_4xop,@function
1177
.align 32
1178
ChaCha20_4xop:
1179
.cfi_startproc
1180
.LChaCha20_4xop:
1181
movq %rsp,%r9
1182
.cfi_def_cfa_register %r9
1183
subq $0x140+8,%rsp
1184
vzeroupper
1185
1186
vmovdqa .Lsigma(%rip),%xmm11
1187
vmovdqu (%rcx),%xmm3
1188
vmovdqu 16(%rcx),%xmm15
1189
vmovdqu (%r8),%xmm7
1190
leaq 256(%rsp),%rcx
1191
1192
vpshufd $0x00,%xmm11,%xmm8
1193
vpshufd $0x55,%xmm11,%xmm9
1194
vmovdqa %xmm8,64(%rsp)
1195
vpshufd $0xaa,%xmm11,%xmm10
1196
vmovdqa %xmm9,80(%rsp)
1197
vpshufd $0xff,%xmm11,%xmm11
1198
vmovdqa %xmm10,96(%rsp)
1199
vmovdqa %xmm11,112(%rsp)
1200
1201
vpshufd $0x00,%xmm3,%xmm0
1202
vpshufd $0x55,%xmm3,%xmm1
1203
vmovdqa %xmm0,128-256(%rcx)
1204
vpshufd $0xaa,%xmm3,%xmm2
1205
vmovdqa %xmm1,144-256(%rcx)
1206
vpshufd $0xff,%xmm3,%xmm3
1207
vmovdqa %xmm2,160-256(%rcx)
1208
vmovdqa %xmm3,176-256(%rcx)
1209
1210
vpshufd $0x00,%xmm15,%xmm12
1211
vpshufd $0x55,%xmm15,%xmm13
1212
vmovdqa %xmm12,192-256(%rcx)
1213
vpshufd $0xaa,%xmm15,%xmm14
1214
vmovdqa %xmm13,208-256(%rcx)
1215
vpshufd $0xff,%xmm15,%xmm15
1216
vmovdqa %xmm14,224-256(%rcx)
1217
vmovdqa %xmm15,240-256(%rcx)
1218
1219
vpshufd $0x00,%xmm7,%xmm4
1220
vpshufd $0x55,%xmm7,%xmm5
1221
vpaddd .Linc(%rip),%xmm4,%xmm4
1222
vpshufd $0xaa,%xmm7,%xmm6
1223
vmovdqa %xmm5,272-256(%rcx)
1224
vpshufd $0xff,%xmm7,%xmm7
1225
vmovdqa %xmm6,288-256(%rcx)
1226
vmovdqa %xmm7,304-256(%rcx)
1227
1228
jmp .Loop_enter4xop
1229
1230
.align 32
1231
.Loop_outer4xop:
1232
vmovdqa 64(%rsp),%xmm8
1233
vmovdqa 80(%rsp),%xmm9
1234
vmovdqa 96(%rsp),%xmm10
1235
vmovdqa 112(%rsp),%xmm11
1236
vmovdqa 128-256(%rcx),%xmm0
1237
vmovdqa 144-256(%rcx),%xmm1
1238
vmovdqa 160-256(%rcx),%xmm2
1239
vmovdqa 176-256(%rcx),%xmm3
1240
vmovdqa 192-256(%rcx),%xmm12
1241
vmovdqa 208-256(%rcx),%xmm13
1242
vmovdqa 224-256(%rcx),%xmm14
1243
vmovdqa 240-256(%rcx),%xmm15
1244
vmovdqa 256-256(%rcx),%xmm4
1245
vmovdqa 272-256(%rcx),%xmm5
1246
vmovdqa 288-256(%rcx),%xmm6
1247
vmovdqa 304-256(%rcx),%xmm7
1248
vpaddd .Lfour(%rip),%xmm4,%xmm4
1249
1250
.Loop_enter4xop:
1251
movl $10,%eax
1252
vmovdqa %xmm4,256-256(%rcx)
1253
jmp .Loop4xop
1254
1255
.align 32
1256
.Loop4xop:
1257
vpaddd %xmm0,%xmm8,%xmm8
1258
vpaddd %xmm1,%xmm9,%xmm9
1259
vpaddd %xmm2,%xmm10,%xmm10
1260
vpaddd %xmm3,%xmm11,%xmm11
1261
vpxor %xmm4,%xmm8,%xmm4
1262
vpxor %xmm5,%xmm9,%xmm5
1263
vpxor %xmm6,%xmm10,%xmm6
1264
vpxor %xmm7,%xmm11,%xmm7
1265
.byte 143,232,120,194,228,16
1266
.byte 143,232,120,194,237,16
1267
.byte 143,232,120,194,246,16
1268
.byte 143,232,120,194,255,16
1269
vpaddd %xmm4,%xmm12,%xmm12
1270
vpaddd %xmm5,%xmm13,%xmm13
1271
vpaddd %xmm6,%xmm14,%xmm14
1272
vpaddd %xmm7,%xmm15,%xmm15
1273
vpxor %xmm0,%xmm12,%xmm0
1274
vpxor %xmm1,%xmm13,%xmm1
1275
vpxor %xmm14,%xmm2,%xmm2
1276
vpxor %xmm15,%xmm3,%xmm3
1277
.byte 143,232,120,194,192,12
1278
.byte 143,232,120,194,201,12
1279
.byte 143,232,120,194,210,12
1280
.byte 143,232,120,194,219,12
1281
vpaddd %xmm8,%xmm0,%xmm8
1282
vpaddd %xmm9,%xmm1,%xmm9
1283
vpaddd %xmm2,%xmm10,%xmm10
1284
vpaddd %xmm3,%xmm11,%xmm11
1285
vpxor %xmm4,%xmm8,%xmm4
1286
vpxor %xmm5,%xmm9,%xmm5
1287
vpxor %xmm6,%xmm10,%xmm6
1288
vpxor %xmm7,%xmm11,%xmm7
1289
.byte 143,232,120,194,228,8
1290
.byte 143,232,120,194,237,8
1291
.byte 143,232,120,194,246,8
1292
.byte 143,232,120,194,255,8
1293
vpaddd %xmm4,%xmm12,%xmm12
1294
vpaddd %xmm5,%xmm13,%xmm13
1295
vpaddd %xmm6,%xmm14,%xmm14
1296
vpaddd %xmm7,%xmm15,%xmm15
1297
vpxor %xmm0,%xmm12,%xmm0
1298
vpxor %xmm1,%xmm13,%xmm1
1299
vpxor %xmm14,%xmm2,%xmm2
1300
vpxor %xmm15,%xmm3,%xmm3
1301
.byte 143,232,120,194,192,7
1302
.byte 143,232,120,194,201,7
1303
.byte 143,232,120,194,210,7
1304
.byte 143,232,120,194,219,7
1305
vpaddd %xmm1,%xmm8,%xmm8
1306
vpaddd %xmm2,%xmm9,%xmm9
1307
vpaddd %xmm3,%xmm10,%xmm10
1308
vpaddd %xmm0,%xmm11,%xmm11
1309
vpxor %xmm7,%xmm8,%xmm7
1310
vpxor %xmm4,%xmm9,%xmm4
1311
vpxor %xmm5,%xmm10,%xmm5
1312
vpxor %xmm6,%xmm11,%xmm6
1313
.byte 143,232,120,194,255,16
1314
.byte 143,232,120,194,228,16
1315
.byte 143,232,120,194,237,16
1316
.byte 143,232,120,194,246,16
1317
vpaddd %xmm7,%xmm14,%xmm14
1318
vpaddd %xmm4,%xmm15,%xmm15
1319
vpaddd %xmm5,%xmm12,%xmm12
1320
vpaddd %xmm6,%xmm13,%xmm13
1321
vpxor %xmm1,%xmm14,%xmm1
1322
vpxor %xmm2,%xmm15,%xmm2
1323
vpxor %xmm12,%xmm3,%xmm3
1324
vpxor %xmm13,%xmm0,%xmm0
1325
.byte 143,232,120,194,201,12
1326
.byte 143,232,120,194,210,12
1327
.byte 143,232,120,194,219,12
1328
.byte 143,232,120,194,192,12
1329
vpaddd %xmm8,%xmm1,%xmm8
1330
vpaddd %xmm9,%xmm2,%xmm9
1331
vpaddd %xmm3,%xmm10,%xmm10
1332
vpaddd %xmm0,%xmm11,%xmm11
1333
vpxor %xmm7,%xmm8,%xmm7
1334
vpxor %xmm4,%xmm9,%xmm4
1335
vpxor %xmm5,%xmm10,%xmm5
1336
vpxor %xmm6,%xmm11,%xmm6
1337
.byte 143,232,120,194,255,8
1338
.byte 143,232,120,194,228,8
1339
.byte 143,232,120,194,237,8
1340
.byte 143,232,120,194,246,8
1341
vpaddd %xmm7,%xmm14,%xmm14
1342
vpaddd %xmm4,%xmm15,%xmm15
1343
vpaddd %xmm5,%xmm12,%xmm12
1344
vpaddd %xmm6,%xmm13,%xmm13
1345
vpxor %xmm1,%xmm14,%xmm1
1346
vpxor %xmm2,%xmm15,%xmm2
1347
vpxor %xmm12,%xmm3,%xmm3
1348
vpxor %xmm13,%xmm0,%xmm0
1349
.byte 143,232,120,194,201,7
1350
.byte 143,232,120,194,210,7
1351
.byte 143,232,120,194,219,7
1352
.byte 143,232,120,194,192,7
1353
decl %eax
1354
jnz .Loop4xop
1355
1356
vpaddd 64(%rsp),%xmm8,%xmm8
1357
vpaddd 80(%rsp),%xmm9,%xmm9
1358
vpaddd 96(%rsp),%xmm10,%xmm10
1359
vpaddd 112(%rsp),%xmm11,%xmm11
1360
1361
vmovdqa %xmm14,32(%rsp)
1362
vmovdqa %xmm15,48(%rsp)
1363
1364
vpunpckldq %xmm9,%xmm8,%xmm14
1365
vpunpckldq %xmm11,%xmm10,%xmm15
1366
vpunpckhdq %xmm9,%xmm8,%xmm8
1367
vpunpckhdq %xmm11,%xmm10,%xmm10
1368
vpunpcklqdq %xmm15,%xmm14,%xmm9
1369
vpunpckhqdq %xmm15,%xmm14,%xmm14
1370
vpunpcklqdq %xmm10,%xmm8,%xmm11
1371
vpunpckhqdq %xmm10,%xmm8,%xmm8
1372
vpaddd 128-256(%rcx),%xmm0,%xmm0
1373
vpaddd 144-256(%rcx),%xmm1,%xmm1
1374
vpaddd 160-256(%rcx),%xmm2,%xmm2
1375
vpaddd 176-256(%rcx),%xmm3,%xmm3
1376
1377
vmovdqa %xmm9,0(%rsp)
1378
vmovdqa %xmm14,16(%rsp)
1379
vmovdqa 32(%rsp),%xmm9
1380
vmovdqa 48(%rsp),%xmm14
1381
1382
vpunpckldq %xmm1,%xmm0,%xmm10
1383
vpunpckldq %xmm3,%xmm2,%xmm15
1384
vpunpckhdq %xmm1,%xmm0,%xmm0
1385
vpunpckhdq %xmm3,%xmm2,%xmm2
1386
vpunpcklqdq %xmm15,%xmm10,%xmm1
1387
vpunpckhqdq %xmm15,%xmm10,%xmm10
1388
vpunpcklqdq %xmm2,%xmm0,%xmm3
1389
vpunpckhqdq %xmm2,%xmm0,%xmm0
1390
vpaddd 192-256(%rcx),%xmm12,%xmm12
1391
vpaddd 208-256(%rcx),%xmm13,%xmm13
1392
vpaddd 224-256(%rcx),%xmm9,%xmm9
1393
vpaddd 240-256(%rcx),%xmm14,%xmm14
1394
1395
vpunpckldq %xmm13,%xmm12,%xmm2
1396
vpunpckldq %xmm14,%xmm9,%xmm15
1397
vpunpckhdq %xmm13,%xmm12,%xmm12
1398
vpunpckhdq %xmm14,%xmm9,%xmm9
1399
vpunpcklqdq %xmm15,%xmm2,%xmm13
1400
vpunpckhqdq %xmm15,%xmm2,%xmm2
1401
vpunpcklqdq %xmm9,%xmm12,%xmm14
1402
vpunpckhqdq %xmm9,%xmm12,%xmm12
1403
vpaddd 256-256(%rcx),%xmm4,%xmm4
1404
vpaddd 272-256(%rcx),%xmm5,%xmm5
1405
vpaddd 288-256(%rcx),%xmm6,%xmm6
1406
vpaddd 304-256(%rcx),%xmm7,%xmm7
1407
1408
vpunpckldq %xmm5,%xmm4,%xmm9
1409
vpunpckldq %xmm7,%xmm6,%xmm15
1410
vpunpckhdq %xmm5,%xmm4,%xmm4
1411
vpunpckhdq %xmm7,%xmm6,%xmm6
1412
vpunpcklqdq %xmm15,%xmm9,%xmm5
1413
vpunpckhqdq %xmm15,%xmm9,%xmm9
1414
vpunpcklqdq %xmm6,%xmm4,%xmm7
1415
vpunpckhqdq %xmm6,%xmm4,%xmm4
1416
vmovdqa 0(%rsp),%xmm6
1417
vmovdqa 16(%rsp),%xmm15
1418
1419
cmpq $256,%rdx
1420
jb .Ltail4xop
1421
1422
vpxor 0(%rsi),%xmm6,%xmm6
1423
vpxor 16(%rsi),%xmm1,%xmm1
1424
vpxor 32(%rsi),%xmm13,%xmm13
1425
vpxor 48(%rsi),%xmm5,%xmm5
1426
vpxor 64(%rsi),%xmm15,%xmm15
1427
vpxor 80(%rsi),%xmm10,%xmm10
1428
vpxor 96(%rsi),%xmm2,%xmm2
1429
vpxor 112(%rsi),%xmm9,%xmm9
1430
leaq 128(%rsi),%rsi
1431
vpxor 0(%rsi),%xmm11,%xmm11
1432
vpxor 16(%rsi),%xmm3,%xmm3
1433
vpxor 32(%rsi),%xmm14,%xmm14
1434
vpxor 48(%rsi),%xmm7,%xmm7
1435
vpxor 64(%rsi),%xmm8,%xmm8
1436
vpxor 80(%rsi),%xmm0,%xmm0
1437
vpxor 96(%rsi),%xmm12,%xmm12
1438
vpxor 112(%rsi),%xmm4,%xmm4
1439
leaq 128(%rsi),%rsi
1440
1441
vmovdqu %xmm6,0(%rdi)
1442
vmovdqu %xmm1,16(%rdi)
1443
vmovdqu %xmm13,32(%rdi)
1444
vmovdqu %xmm5,48(%rdi)
1445
vmovdqu %xmm15,64(%rdi)
1446
vmovdqu %xmm10,80(%rdi)
1447
vmovdqu %xmm2,96(%rdi)
1448
vmovdqu %xmm9,112(%rdi)
1449
leaq 128(%rdi),%rdi
1450
vmovdqu %xmm11,0(%rdi)
1451
vmovdqu %xmm3,16(%rdi)
1452
vmovdqu %xmm14,32(%rdi)
1453
vmovdqu %xmm7,48(%rdi)
1454
vmovdqu %xmm8,64(%rdi)
1455
vmovdqu %xmm0,80(%rdi)
1456
vmovdqu %xmm12,96(%rdi)
1457
vmovdqu %xmm4,112(%rdi)
1458
leaq 128(%rdi),%rdi
1459
1460
subq $256,%rdx
1461
jnz .Loop_outer4xop
1462
1463
jmp .Ldone4xop
1464
1465
.align 32
1466
.Ltail4xop:
1467
cmpq $192,%rdx
1468
jae .L192_or_more4xop
1469
cmpq $128,%rdx
1470
jae .L128_or_more4xop
1471
cmpq $64,%rdx
1472
jae .L64_or_more4xop
1473
1474
xorq %r10,%r10
1475
vmovdqa %xmm6,0(%rsp)
1476
vmovdqa %xmm1,16(%rsp)
1477
vmovdqa %xmm13,32(%rsp)
1478
vmovdqa %xmm5,48(%rsp)
1479
jmp .Loop_tail4xop
1480
1481
.align 32
1482
.L64_or_more4xop:
1483
vpxor 0(%rsi),%xmm6,%xmm6
1484
vpxor 16(%rsi),%xmm1,%xmm1
1485
vpxor 32(%rsi),%xmm13,%xmm13
1486
vpxor 48(%rsi),%xmm5,%xmm5
1487
vmovdqu %xmm6,0(%rdi)
1488
vmovdqu %xmm1,16(%rdi)
1489
vmovdqu %xmm13,32(%rdi)
1490
vmovdqu %xmm5,48(%rdi)
1491
je .Ldone4xop
1492
1493
leaq 64(%rsi),%rsi
1494
vmovdqa %xmm15,0(%rsp)
1495
xorq %r10,%r10
1496
vmovdqa %xmm10,16(%rsp)
1497
leaq 64(%rdi),%rdi
1498
vmovdqa %xmm2,32(%rsp)
1499
subq $64,%rdx
1500
vmovdqa %xmm9,48(%rsp)
1501
jmp .Loop_tail4xop
1502
1503
.align 32
1504
.L128_or_more4xop:
1505
vpxor 0(%rsi),%xmm6,%xmm6
1506
vpxor 16(%rsi),%xmm1,%xmm1
1507
vpxor 32(%rsi),%xmm13,%xmm13
1508
vpxor 48(%rsi),%xmm5,%xmm5
1509
vpxor 64(%rsi),%xmm15,%xmm15
1510
vpxor 80(%rsi),%xmm10,%xmm10
1511
vpxor 96(%rsi),%xmm2,%xmm2
1512
vpxor 112(%rsi),%xmm9,%xmm9
1513
1514
vmovdqu %xmm6,0(%rdi)
1515
vmovdqu %xmm1,16(%rdi)
1516
vmovdqu %xmm13,32(%rdi)
1517
vmovdqu %xmm5,48(%rdi)
1518
vmovdqu %xmm15,64(%rdi)
1519
vmovdqu %xmm10,80(%rdi)
1520
vmovdqu %xmm2,96(%rdi)
1521
vmovdqu %xmm9,112(%rdi)
1522
je .Ldone4xop
1523
1524
leaq 128(%rsi),%rsi
1525
vmovdqa %xmm11,0(%rsp)
1526
xorq %r10,%r10
1527
vmovdqa %xmm3,16(%rsp)
1528
leaq 128(%rdi),%rdi
1529
vmovdqa %xmm14,32(%rsp)
1530
subq $128,%rdx
1531
vmovdqa %xmm7,48(%rsp)
1532
jmp .Loop_tail4xop
1533
1534
.align 32
1535
.L192_or_more4xop:
1536
vpxor 0(%rsi),%xmm6,%xmm6
1537
vpxor 16(%rsi),%xmm1,%xmm1
1538
vpxor 32(%rsi),%xmm13,%xmm13
1539
vpxor 48(%rsi),%xmm5,%xmm5
1540
vpxor 64(%rsi),%xmm15,%xmm15
1541
vpxor 80(%rsi),%xmm10,%xmm10
1542
vpxor 96(%rsi),%xmm2,%xmm2
1543
vpxor 112(%rsi),%xmm9,%xmm9
1544
leaq 128(%rsi),%rsi
1545
vpxor 0(%rsi),%xmm11,%xmm11
1546
vpxor 16(%rsi),%xmm3,%xmm3
1547
vpxor 32(%rsi),%xmm14,%xmm14
1548
vpxor 48(%rsi),%xmm7,%xmm7
1549
1550
vmovdqu %xmm6,0(%rdi)
1551
vmovdqu %xmm1,16(%rdi)
1552
vmovdqu %xmm13,32(%rdi)
1553
vmovdqu %xmm5,48(%rdi)
1554
vmovdqu %xmm15,64(%rdi)
1555
vmovdqu %xmm10,80(%rdi)
1556
vmovdqu %xmm2,96(%rdi)
1557
vmovdqu %xmm9,112(%rdi)
1558
leaq 128(%rdi),%rdi
1559
vmovdqu %xmm11,0(%rdi)
1560
vmovdqu %xmm3,16(%rdi)
1561
vmovdqu %xmm14,32(%rdi)
1562
vmovdqu %xmm7,48(%rdi)
1563
je .Ldone4xop
1564
1565
leaq 64(%rsi),%rsi
1566
vmovdqa %xmm8,0(%rsp)
1567
xorq %r10,%r10
1568
vmovdqa %xmm0,16(%rsp)
1569
leaq 64(%rdi),%rdi
1570
vmovdqa %xmm12,32(%rsp)
1571
subq $192,%rdx
1572
vmovdqa %xmm4,48(%rsp)
1573
1574
.Loop_tail4xop:
1575
movzbl (%rsi,%r10,1),%eax
1576
movzbl (%rsp,%r10,1),%ecx
1577
leaq 1(%r10),%r10
1578
xorl %ecx,%eax
1579
movb %al,-1(%rdi,%r10,1)
1580
decq %rdx
1581
jnz .Loop_tail4xop
1582
1583
.Ldone4xop:
1584
vzeroupper
1585
leaq (%r9),%rsp
1586
.cfi_def_cfa_register %rsp
1587
.L4xop_epilogue:
1588
.byte 0xf3,0xc3
1589
.cfi_endproc
1590
.size ChaCha20_4xop,.-ChaCha20_4xop
1591
.type ChaCha20_8x,@function
1592
.align 32
1593
ChaCha20_8x:
1594
.cfi_startproc
1595
.LChaCha20_8x:
1596
movq %rsp,%r9
1597
.cfi_def_cfa_register %r9
1598
subq $0x280+8,%rsp
1599
andq $-32,%rsp
1600
vzeroupper
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
vbroadcasti128 .Lsigma(%rip),%ymm11
1612
vbroadcasti128 (%rcx),%ymm3
1613
vbroadcasti128 16(%rcx),%ymm15
1614
vbroadcasti128 (%r8),%ymm7
1615
leaq 256(%rsp),%rcx
1616
leaq 512(%rsp),%rax
1617
leaq .Lrot16(%rip),%r10
1618
leaq .Lrot24(%rip),%r11
1619
1620
vpshufd $0x00,%ymm11,%ymm8
1621
vpshufd $0x55,%ymm11,%ymm9
1622
vmovdqa %ymm8,128-256(%rcx)
1623
vpshufd $0xaa,%ymm11,%ymm10
1624
vmovdqa %ymm9,160-256(%rcx)
1625
vpshufd $0xff,%ymm11,%ymm11
1626
vmovdqa %ymm10,192-256(%rcx)
1627
vmovdqa %ymm11,224-256(%rcx)
1628
1629
vpshufd $0x00,%ymm3,%ymm0
1630
vpshufd $0x55,%ymm3,%ymm1
1631
vmovdqa %ymm0,256-256(%rcx)
1632
vpshufd $0xaa,%ymm3,%ymm2
1633
vmovdqa %ymm1,288-256(%rcx)
1634
vpshufd $0xff,%ymm3,%ymm3
1635
vmovdqa %ymm2,320-256(%rcx)
1636
vmovdqa %ymm3,352-256(%rcx)
1637
1638
vpshufd $0x00,%ymm15,%ymm12
1639
vpshufd $0x55,%ymm15,%ymm13
1640
vmovdqa %ymm12,384-512(%rax)
1641
vpshufd $0xaa,%ymm15,%ymm14
1642
vmovdqa %ymm13,416-512(%rax)
1643
vpshufd $0xff,%ymm15,%ymm15
1644
vmovdqa %ymm14,448-512(%rax)
1645
vmovdqa %ymm15,480-512(%rax)
1646
1647
vpshufd $0x00,%ymm7,%ymm4
1648
vpshufd $0x55,%ymm7,%ymm5
1649
vpaddd .Lincy(%rip),%ymm4,%ymm4
1650
vpshufd $0xaa,%ymm7,%ymm6
1651
vmovdqa %ymm5,544-512(%rax)
1652
vpshufd $0xff,%ymm7,%ymm7
1653
vmovdqa %ymm6,576-512(%rax)
1654
vmovdqa %ymm7,608-512(%rax)
1655
1656
jmp .Loop_enter8x
1657
1658
.align 32
1659
.Loop_outer8x:
1660
vmovdqa 128-256(%rcx),%ymm8
1661
vmovdqa 160-256(%rcx),%ymm9
1662
vmovdqa 192-256(%rcx),%ymm10
1663
vmovdqa 224-256(%rcx),%ymm11
1664
vmovdqa 256-256(%rcx),%ymm0
1665
vmovdqa 288-256(%rcx),%ymm1
1666
vmovdqa 320-256(%rcx),%ymm2
1667
vmovdqa 352-256(%rcx),%ymm3
1668
vmovdqa 384-512(%rax),%ymm12
1669
vmovdqa 416-512(%rax),%ymm13
1670
vmovdqa 448-512(%rax),%ymm14
1671
vmovdqa 480-512(%rax),%ymm15
1672
vmovdqa 512-512(%rax),%ymm4
1673
vmovdqa 544-512(%rax),%ymm5
1674
vmovdqa 576-512(%rax),%ymm6
1675
vmovdqa 608-512(%rax),%ymm7
1676
vpaddd .Leight(%rip),%ymm4,%ymm4
1677
1678
.Loop_enter8x:
1679
vmovdqa %ymm14,64(%rsp)
1680
vmovdqa %ymm15,96(%rsp)
1681
vbroadcasti128 (%r10),%ymm15
1682
vmovdqa %ymm4,512-512(%rax)
1683
movl $10,%eax
1684
jmp .Loop8x
1685
1686
.align 32
1687
.Loop8x:
1688
vpaddd %ymm0,%ymm8,%ymm8
1689
vpxor %ymm4,%ymm8,%ymm4
1690
vpshufb %ymm15,%ymm4,%ymm4
1691
vpaddd %ymm1,%ymm9,%ymm9
1692
vpxor %ymm5,%ymm9,%ymm5
1693
vpshufb %ymm15,%ymm5,%ymm5
1694
vpaddd %ymm4,%ymm12,%ymm12
1695
vpxor %ymm0,%ymm12,%ymm0
1696
vpslld $12,%ymm0,%ymm14
1697
vpsrld $20,%ymm0,%ymm0
1698
vpor %ymm0,%ymm14,%ymm0
1699
vbroadcasti128 (%r11),%ymm14
1700
vpaddd %ymm5,%ymm13,%ymm13
1701
vpxor %ymm1,%ymm13,%ymm1
1702
vpslld $12,%ymm1,%ymm15
1703
vpsrld $20,%ymm1,%ymm1
1704
vpor %ymm1,%ymm15,%ymm1
1705
vpaddd %ymm0,%ymm8,%ymm8
1706
vpxor %ymm4,%ymm8,%ymm4
1707
vpshufb %ymm14,%ymm4,%ymm4
1708
vpaddd %ymm1,%ymm9,%ymm9
1709
vpxor %ymm5,%ymm9,%ymm5
1710
vpshufb %ymm14,%ymm5,%ymm5
1711
vpaddd %ymm4,%ymm12,%ymm12
1712
vpxor %ymm0,%ymm12,%ymm0
1713
vpslld $7,%ymm0,%ymm15
1714
vpsrld $25,%ymm0,%ymm0
1715
vpor %ymm0,%ymm15,%ymm0
1716
vbroadcasti128 (%r10),%ymm15
1717
vpaddd %ymm5,%ymm13,%ymm13
1718
vpxor %ymm1,%ymm13,%ymm1
1719
vpslld $7,%ymm1,%ymm14
1720
vpsrld $25,%ymm1,%ymm1
1721
vpor %ymm1,%ymm14,%ymm1
1722
vmovdqa %ymm12,0(%rsp)
1723
vmovdqa %ymm13,32(%rsp)
1724
vmovdqa 64(%rsp),%ymm12
1725
vmovdqa 96(%rsp),%ymm13
1726
vpaddd %ymm2,%ymm10,%ymm10
1727
vpxor %ymm6,%ymm10,%ymm6
1728
vpshufb %ymm15,%ymm6,%ymm6
1729
vpaddd %ymm3,%ymm11,%ymm11
1730
vpxor %ymm7,%ymm11,%ymm7
1731
vpshufb %ymm15,%ymm7,%ymm7
1732
vpaddd %ymm6,%ymm12,%ymm12
1733
vpxor %ymm2,%ymm12,%ymm2
1734
vpslld $12,%ymm2,%ymm14
1735
vpsrld $20,%ymm2,%ymm2
1736
vpor %ymm2,%ymm14,%ymm2
1737
vbroadcasti128 (%r11),%ymm14
1738
vpaddd %ymm7,%ymm13,%ymm13
1739
vpxor %ymm3,%ymm13,%ymm3
1740
vpslld $12,%ymm3,%ymm15
1741
vpsrld $20,%ymm3,%ymm3
1742
vpor %ymm3,%ymm15,%ymm3
1743
vpaddd %ymm2,%ymm10,%ymm10
1744
vpxor %ymm6,%ymm10,%ymm6
1745
vpshufb %ymm14,%ymm6,%ymm6
1746
vpaddd %ymm3,%ymm11,%ymm11
1747
vpxor %ymm7,%ymm11,%ymm7
1748
vpshufb %ymm14,%ymm7,%ymm7
1749
vpaddd %ymm6,%ymm12,%ymm12
1750
vpxor %ymm2,%ymm12,%ymm2
1751
vpslld $7,%ymm2,%ymm15
1752
vpsrld $25,%ymm2,%ymm2
1753
vpor %ymm2,%ymm15,%ymm2
1754
vbroadcasti128 (%r10),%ymm15
1755
vpaddd %ymm7,%ymm13,%ymm13
1756
vpxor %ymm3,%ymm13,%ymm3
1757
vpslld $7,%ymm3,%ymm14
1758
vpsrld $25,%ymm3,%ymm3
1759
vpor %ymm3,%ymm14,%ymm3
1760
vpaddd %ymm1,%ymm8,%ymm8
1761
vpxor %ymm7,%ymm8,%ymm7
1762
vpshufb %ymm15,%ymm7,%ymm7
1763
vpaddd %ymm2,%ymm9,%ymm9
1764
vpxor %ymm4,%ymm9,%ymm4
1765
vpshufb %ymm15,%ymm4,%ymm4
1766
vpaddd %ymm7,%ymm12,%ymm12
1767
vpxor %ymm1,%ymm12,%ymm1
1768
vpslld $12,%ymm1,%ymm14
1769
vpsrld $20,%ymm1,%ymm1
1770
vpor %ymm1,%ymm14,%ymm1
1771
vbroadcasti128 (%r11),%ymm14
1772
vpaddd %ymm4,%ymm13,%ymm13
1773
vpxor %ymm2,%ymm13,%ymm2
1774
vpslld $12,%ymm2,%ymm15
1775
vpsrld $20,%ymm2,%ymm2
1776
vpor %ymm2,%ymm15,%ymm2
1777
vpaddd %ymm1,%ymm8,%ymm8
1778
vpxor %ymm7,%ymm8,%ymm7
1779
vpshufb %ymm14,%ymm7,%ymm7
1780
vpaddd %ymm2,%ymm9,%ymm9
1781
vpxor %ymm4,%ymm9,%ymm4
1782
vpshufb %ymm14,%ymm4,%ymm4
1783
vpaddd %ymm7,%ymm12,%ymm12
1784
vpxor %ymm1,%ymm12,%ymm1
1785
vpslld $7,%ymm1,%ymm15
1786
vpsrld $25,%ymm1,%ymm1
1787
vpor %ymm1,%ymm15,%ymm1
1788
vbroadcasti128 (%r10),%ymm15
1789
vpaddd %ymm4,%ymm13,%ymm13
1790
vpxor %ymm2,%ymm13,%ymm2
1791
vpslld $7,%ymm2,%ymm14
1792
vpsrld $25,%ymm2,%ymm2
1793
vpor %ymm2,%ymm14,%ymm2
1794
vmovdqa %ymm12,64(%rsp)
1795
vmovdqa %ymm13,96(%rsp)
1796
vmovdqa 0(%rsp),%ymm12
1797
vmovdqa 32(%rsp),%ymm13
1798
vpaddd %ymm3,%ymm10,%ymm10
1799
vpxor %ymm5,%ymm10,%ymm5
1800
vpshufb %ymm15,%ymm5,%ymm5
1801
vpaddd %ymm0,%ymm11,%ymm11
1802
vpxor %ymm6,%ymm11,%ymm6
1803
vpshufb %ymm15,%ymm6,%ymm6
1804
vpaddd %ymm5,%ymm12,%ymm12
1805
vpxor %ymm3,%ymm12,%ymm3
1806
vpslld $12,%ymm3,%ymm14
1807
vpsrld $20,%ymm3,%ymm3
1808
vpor %ymm3,%ymm14,%ymm3
1809
vbroadcasti128 (%r11),%ymm14
1810
vpaddd %ymm6,%ymm13,%ymm13
1811
vpxor %ymm0,%ymm13,%ymm0
1812
vpslld $12,%ymm0,%ymm15
1813
vpsrld $20,%ymm0,%ymm0
1814
vpor %ymm0,%ymm15,%ymm0
1815
vpaddd %ymm3,%ymm10,%ymm10
1816
vpxor %ymm5,%ymm10,%ymm5
1817
vpshufb %ymm14,%ymm5,%ymm5
1818
vpaddd %ymm0,%ymm11,%ymm11
1819
vpxor %ymm6,%ymm11,%ymm6
1820
vpshufb %ymm14,%ymm6,%ymm6
1821
vpaddd %ymm5,%ymm12,%ymm12
1822
vpxor %ymm3,%ymm12,%ymm3
1823
vpslld $7,%ymm3,%ymm15
1824
vpsrld $25,%ymm3,%ymm3
1825
vpor %ymm3,%ymm15,%ymm3
1826
vbroadcasti128 (%r10),%ymm15
1827
vpaddd %ymm6,%ymm13,%ymm13
1828
vpxor %ymm0,%ymm13,%ymm0
1829
vpslld $7,%ymm0,%ymm14
1830
vpsrld $25,%ymm0,%ymm0
1831
vpor %ymm0,%ymm14,%ymm0
1832
decl %eax
1833
jnz .Loop8x
1834
1835
leaq 512(%rsp),%rax
1836
vpaddd 128-256(%rcx),%ymm8,%ymm8
1837
vpaddd 160-256(%rcx),%ymm9,%ymm9
1838
vpaddd 192-256(%rcx),%ymm10,%ymm10
1839
vpaddd 224-256(%rcx),%ymm11,%ymm11
1840
1841
vpunpckldq %ymm9,%ymm8,%ymm14
1842
vpunpckldq %ymm11,%ymm10,%ymm15
1843
vpunpckhdq %ymm9,%ymm8,%ymm8
1844
vpunpckhdq %ymm11,%ymm10,%ymm10
1845
vpunpcklqdq %ymm15,%ymm14,%ymm9
1846
vpunpckhqdq %ymm15,%ymm14,%ymm14
1847
vpunpcklqdq %ymm10,%ymm8,%ymm11
1848
vpunpckhqdq %ymm10,%ymm8,%ymm8
1849
vpaddd 256-256(%rcx),%ymm0,%ymm0
1850
vpaddd 288-256(%rcx),%ymm1,%ymm1
1851
vpaddd 320-256(%rcx),%ymm2,%ymm2
1852
vpaddd 352-256(%rcx),%ymm3,%ymm3
1853
1854
vpunpckldq %ymm1,%ymm0,%ymm10
1855
vpunpckldq %ymm3,%ymm2,%ymm15
1856
vpunpckhdq %ymm1,%ymm0,%ymm0
1857
vpunpckhdq %ymm3,%ymm2,%ymm2
1858
vpunpcklqdq %ymm15,%ymm10,%ymm1
1859
vpunpckhqdq %ymm15,%ymm10,%ymm10
1860
vpunpcklqdq %ymm2,%ymm0,%ymm3
1861
vpunpckhqdq %ymm2,%ymm0,%ymm0
1862
vperm2i128 $0x20,%ymm1,%ymm9,%ymm15
1863
vperm2i128 $0x31,%ymm1,%ymm9,%ymm1
1864
vperm2i128 $0x20,%ymm10,%ymm14,%ymm9
1865
vperm2i128 $0x31,%ymm10,%ymm14,%ymm10
1866
vperm2i128 $0x20,%ymm3,%ymm11,%ymm14
1867
vperm2i128 $0x31,%ymm3,%ymm11,%ymm3
1868
vperm2i128 $0x20,%ymm0,%ymm8,%ymm11
1869
vperm2i128 $0x31,%ymm0,%ymm8,%ymm0
1870
vmovdqa %ymm15,0(%rsp)
1871
vmovdqa %ymm9,32(%rsp)
1872
vmovdqa 64(%rsp),%ymm15
1873
vmovdqa 96(%rsp),%ymm9
1874
1875
vpaddd 384-512(%rax),%ymm12,%ymm12
1876
vpaddd 416-512(%rax),%ymm13,%ymm13
1877
vpaddd 448-512(%rax),%ymm15,%ymm15
1878
vpaddd 480-512(%rax),%ymm9,%ymm9
1879
1880
vpunpckldq %ymm13,%ymm12,%ymm2
1881
vpunpckldq %ymm9,%ymm15,%ymm8
1882
vpunpckhdq %ymm13,%ymm12,%ymm12
1883
vpunpckhdq %ymm9,%ymm15,%ymm15
1884
vpunpcklqdq %ymm8,%ymm2,%ymm13
1885
vpunpckhqdq %ymm8,%ymm2,%ymm2
1886
vpunpcklqdq %ymm15,%ymm12,%ymm9
1887
vpunpckhqdq %ymm15,%ymm12,%ymm12
1888
vpaddd 512-512(%rax),%ymm4,%ymm4
1889
vpaddd 544-512(%rax),%ymm5,%ymm5
1890
vpaddd 576-512(%rax),%ymm6,%ymm6
1891
vpaddd 608-512(%rax),%ymm7,%ymm7
1892
1893
vpunpckldq %ymm5,%ymm4,%ymm15
1894
vpunpckldq %ymm7,%ymm6,%ymm8
1895
vpunpckhdq %ymm5,%ymm4,%ymm4
1896
vpunpckhdq %ymm7,%ymm6,%ymm6
1897
vpunpcklqdq %ymm8,%ymm15,%ymm5
1898
vpunpckhqdq %ymm8,%ymm15,%ymm15
1899
vpunpcklqdq %ymm6,%ymm4,%ymm7
1900
vpunpckhqdq %ymm6,%ymm4,%ymm4
1901
vperm2i128 $0x20,%ymm5,%ymm13,%ymm8
1902
vperm2i128 $0x31,%ymm5,%ymm13,%ymm5
1903
vperm2i128 $0x20,%ymm15,%ymm2,%ymm13
1904
vperm2i128 $0x31,%ymm15,%ymm2,%ymm15
1905
vperm2i128 $0x20,%ymm7,%ymm9,%ymm2
1906
vperm2i128 $0x31,%ymm7,%ymm9,%ymm7
1907
vperm2i128 $0x20,%ymm4,%ymm12,%ymm9
1908
vperm2i128 $0x31,%ymm4,%ymm12,%ymm4
1909
vmovdqa 0(%rsp),%ymm6
1910
vmovdqa 32(%rsp),%ymm12
1911
1912
cmpq $512,%rdx
1913
jb .Ltail8x
1914
1915
vpxor 0(%rsi),%ymm6,%ymm6
1916
vpxor 32(%rsi),%ymm8,%ymm8
1917
vpxor 64(%rsi),%ymm1,%ymm1
1918
vpxor 96(%rsi),%ymm5,%ymm5
1919
leaq 128(%rsi),%rsi
1920
vmovdqu %ymm6,0(%rdi)
1921
vmovdqu %ymm8,32(%rdi)
1922
vmovdqu %ymm1,64(%rdi)
1923
vmovdqu %ymm5,96(%rdi)
1924
leaq 128(%rdi),%rdi
1925
1926
vpxor 0(%rsi),%ymm12,%ymm12
1927
vpxor 32(%rsi),%ymm13,%ymm13
1928
vpxor 64(%rsi),%ymm10,%ymm10
1929
vpxor 96(%rsi),%ymm15,%ymm15
1930
leaq 128(%rsi),%rsi
1931
vmovdqu %ymm12,0(%rdi)
1932
vmovdqu %ymm13,32(%rdi)
1933
vmovdqu %ymm10,64(%rdi)
1934
vmovdqu %ymm15,96(%rdi)
1935
leaq 128(%rdi),%rdi
1936
1937
vpxor 0(%rsi),%ymm14,%ymm14
1938
vpxor 32(%rsi),%ymm2,%ymm2
1939
vpxor 64(%rsi),%ymm3,%ymm3
1940
vpxor 96(%rsi),%ymm7,%ymm7
1941
leaq 128(%rsi),%rsi
1942
vmovdqu %ymm14,0(%rdi)
1943
vmovdqu %ymm2,32(%rdi)
1944
vmovdqu %ymm3,64(%rdi)
1945
vmovdqu %ymm7,96(%rdi)
1946
leaq 128(%rdi),%rdi
1947
1948
vpxor 0(%rsi),%ymm11,%ymm11
1949
vpxor 32(%rsi),%ymm9,%ymm9
1950
vpxor 64(%rsi),%ymm0,%ymm0
1951
vpxor 96(%rsi),%ymm4,%ymm4
1952
leaq 128(%rsi),%rsi
1953
vmovdqu %ymm11,0(%rdi)
1954
vmovdqu %ymm9,32(%rdi)
1955
vmovdqu %ymm0,64(%rdi)
1956
vmovdqu %ymm4,96(%rdi)
1957
leaq 128(%rdi),%rdi
1958
1959
subq $512,%rdx
1960
jnz .Loop_outer8x
1961
1962
jmp .Ldone8x
1963
1964
.Ltail8x:
1965
cmpq $448,%rdx
1966
jae .L448_or_more8x
1967
cmpq $384,%rdx
1968
jae .L384_or_more8x
1969
cmpq $320,%rdx
1970
jae .L320_or_more8x
1971
cmpq $256,%rdx
1972
jae .L256_or_more8x
1973
cmpq $192,%rdx
1974
jae .L192_or_more8x
1975
cmpq $128,%rdx
1976
jae .L128_or_more8x
1977
cmpq $64,%rdx
1978
jae .L64_or_more8x
1979
1980
xorq %r10,%r10
1981
vmovdqa %ymm6,0(%rsp)
1982
vmovdqa %ymm8,32(%rsp)
1983
jmp .Loop_tail8x
1984
1985
.align 32
1986
.L64_or_more8x:
1987
vpxor 0(%rsi),%ymm6,%ymm6
1988
vpxor 32(%rsi),%ymm8,%ymm8
1989
vmovdqu %ymm6,0(%rdi)
1990
vmovdqu %ymm8,32(%rdi)
1991
je .Ldone8x
1992
1993
leaq 64(%rsi),%rsi
1994
xorq %r10,%r10
1995
vmovdqa %ymm1,0(%rsp)
1996
leaq 64(%rdi),%rdi
1997
subq $64,%rdx
1998
vmovdqa %ymm5,32(%rsp)
1999
jmp .Loop_tail8x
2000
2001
.align 32
2002
.L128_or_more8x:
2003
vpxor 0(%rsi),%ymm6,%ymm6
2004
vpxor 32(%rsi),%ymm8,%ymm8
2005
vpxor 64(%rsi),%ymm1,%ymm1
2006
vpxor 96(%rsi),%ymm5,%ymm5
2007
vmovdqu %ymm6,0(%rdi)
2008
vmovdqu %ymm8,32(%rdi)
2009
vmovdqu %ymm1,64(%rdi)
2010
vmovdqu %ymm5,96(%rdi)
2011
je .Ldone8x
2012
2013
leaq 128(%rsi),%rsi
2014
xorq %r10,%r10
2015
vmovdqa %ymm12,0(%rsp)
2016
leaq 128(%rdi),%rdi
2017
subq $128,%rdx
2018
vmovdqa %ymm13,32(%rsp)
2019
jmp .Loop_tail8x
2020
2021
.align 32
2022
.L192_or_more8x:
2023
vpxor 0(%rsi),%ymm6,%ymm6
2024
vpxor 32(%rsi),%ymm8,%ymm8
2025
vpxor 64(%rsi),%ymm1,%ymm1
2026
vpxor 96(%rsi),%ymm5,%ymm5
2027
vpxor 128(%rsi),%ymm12,%ymm12
2028
vpxor 160(%rsi),%ymm13,%ymm13
2029
vmovdqu %ymm6,0(%rdi)
2030
vmovdqu %ymm8,32(%rdi)
2031
vmovdqu %ymm1,64(%rdi)
2032
vmovdqu %ymm5,96(%rdi)
2033
vmovdqu %ymm12,128(%rdi)
2034
vmovdqu %ymm13,160(%rdi)
2035
je .Ldone8x
2036
2037
leaq 192(%rsi),%rsi
2038
xorq %r10,%r10
2039
vmovdqa %ymm10,0(%rsp)
2040
leaq 192(%rdi),%rdi
2041
subq $192,%rdx
2042
vmovdqa %ymm15,32(%rsp)
2043
jmp .Loop_tail8x
2044
2045
.align 32
2046
.L256_or_more8x:
2047
vpxor 0(%rsi),%ymm6,%ymm6
2048
vpxor 32(%rsi),%ymm8,%ymm8
2049
vpxor 64(%rsi),%ymm1,%ymm1
2050
vpxor 96(%rsi),%ymm5,%ymm5
2051
vpxor 128(%rsi),%ymm12,%ymm12
2052
vpxor 160(%rsi),%ymm13,%ymm13
2053
vpxor 192(%rsi),%ymm10,%ymm10
2054
vpxor 224(%rsi),%ymm15,%ymm15
2055
vmovdqu %ymm6,0(%rdi)
2056
vmovdqu %ymm8,32(%rdi)
2057
vmovdqu %ymm1,64(%rdi)
2058
vmovdqu %ymm5,96(%rdi)
2059
vmovdqu %ymm12,128(%rdi)
2060
vmovdqu %ymm13,160(%rdi)
2061
vmovdqu %ymm10,192(%rdi)
2062
vmovdqu %ymm15,224(%rdi)
2063
je .Ldone8x
2064
2065
leaq 256(%rsi),%rsi
2066
xorq %r10,%r10
2067
vmovdqa %ymm14,0(%rsp)
2068
leaq 256(%rdi),%rdi
2069
subq $256,%rdx
2070
vmovdqa %ymm2,32(%rsp)
2071
jmp .Loop_tail8x
2072
2073
.align 32
2074
.L320_or_more8x:
2075
vpxor 0(%rsi),%ymm6,%ymm6
2076
vpxor 32(%rsi),%ymm8,%ymm8
2077
vpxor 64(%rsi),%ymm1,%ymm1
2078
vpxor 96(%rsi),%ymm5,%ymm5
2079
vpxor 128(%rsi),%ymm12,%ymm12
2080
vpxor 160(%rsi),%ymm13,%ymm13
2081
vpxor 192(%rsi),%ymm10,%ymm10
2082
vpxor 224(%rsi),%ymm15,%ymm15
2083
vpxor 256(%rsi),%ymm14,%ymm14
2084
vpxor 288(%rsi),%ymm2,%ymm2
2085
vmovdqu %ymm6,0(%rdi)
2086
vmovdqu %ymm8,32(%rdi)
2087
vmovdqu %ymm1,64(%rdi)
2088
vmovdqu %ymm5,96(%rdi)
2089
vmovdqu %ymm12,128(%rdi)
2090
vmovdqu %ymm13,160(%rdi)
2091
vmovdqu %ymm10,192(%rdi)
2092
vmovdqu %ymm15,224(%rdi)
2093
vmovdqu %ymm14,256(%rdi)
2094
vmovdqu %ymm2,288(%rdi)
2095
je .Ldone8x
2096
2097
leaq 320(%rsi),%rsi
2098
xorq %r10,%r10
2099
vmovdqa %ymm3,0(%rsp)
2100
leaq 320(%rdi),%rdi
2101
subq $320,%rdx
2102
vmovdqa %ymm7,32(%rsp)
2103
jmp .Loop_tail8x
2104
2105
.align 32
2106
.L384_or_more8x:
2107
vpxor 0(%rsi),%ymm6,%ymm6
2108
vpxor 32(%rsi),%ymm8,%ymm8
2109
vpxor 64(%rsi),%ymm1,%ymm1
2110
vpxor 96(%rsi),%ymm5,%ymm5
2111
vpxor 128(%rsi),%ymm12,%ymm12
2112
vpxor 160(%rsi),%ymm13,%ymm13
2113
vpxor 192(%rsi),%ymm10,%ymm10
2114
vpxor 224(%rsi),%ymm15,%ymm15
2115
vpxor 256(%rsi),%ymm14,%ymm14
2116
vpxor 288(%rsi),%ymm2,%ymm2
2117
vpxor 320(%rsi),%ymm3,%ymm3
2118
vpxor 352(%rsi),%ymm7,%ymm7
2119
vmovdqu %ymm6,0(%rdi)
2120
vmovdqu %ymm8,32(%rdi)
2121
vmovdqu %ymm1,64(%rdi)
2122
vmovdqu %ymm5,96(%rdi)
2123
vmovdqu %ymm12,128(%rdi)
2124
vmovdqu %ymm13,160(%rdi)
2125
vmovdqu %ymm10,192(%rdi)
2126
vmovdqu %ymm15,224(%rdi)
2127
vmovdqu %ymm14,256(%rdi)
2128
vmovdqu %ymm2,288(%rdi)
2129
vmovdqu %ymm3,320(%rdi)
2130
vmovdqu %ymm7,352(%rdi)
2131
je .Ldone8x
2132
2133
leaq 384(%rsi),%rsi
2134
xorq %r10,%r10
2135
vmovdqa %ymm11,0(%rsp)
2136
leaq 384(%rdi),%rdi
2137
subq $384,%rdx
2138
vmovdqa %ymm9,32(%rsp)
2139
jmp .Loop_tail8x
2140
2141
.align 32
2142
.L448_or_more8x:
2143
vpxor 0(%rsi),%ymm6,%ymm6
2144
vpxor 32(%rsi),%ymm8,%ymm8
2145
vpxor 64(%rsi),%ymm1,%ymm1
2146
vpxor 96(%rsi),%ymm5,%ymm5
2147
vpxor 128(%rsi),%ymm12,%ymm12
2148
vpxor 160(%rsi),%ymm13,%ymm13
2149
vpxor 192(%rsi),%ymm10,%ymm10
2150
vpxor 224(%rsi),%ymm15,%ymm15
2151
vpxor 256(%rsi),%ymm14,%ymm14
2152
vpxor 288(%rsi),%ymm2,%ymm2
2153
vpxor 320(%rsi),%ymm3,%ymm3
2154
vpxor 352(%rsi),%ymm7,%ymm7
2155
vpxor 384(%rsi),%ymm11,%ymm11
2156
vpxor 416(%rsi),%ymm9,%ymm9
2157
vmovdqu %ymm6,0(%rdi)
2158
vmovdqu %ymm8,32(%rdi)
2159
vmovdqu %ymm1,64(%rdi)
2160
vmovdqu %ymm5,96(%rdi)
2161
vmovdqu %ymm12,128(%rdi)
2162
vmovdqu %ymm13,160(%rdi)
2163
vmovdqu %ymm10,192(%rdi)
2164
vmovdqu %ymm15,224(%rdi)
2165
vmovdqu %ymm14,256(%rdi)
2166
vmovdqu %ymm2,288(%rdi)
2167
vmovdqu %ymm3,320(%rdi)
2168
vmovdqu %ymm7,352(%rdi)
2169
vmovdqu %ymm11,384(%rdi)
2170
vmovdqu %ymm9,416(%rdi)
2171
je .Ldone8x
2172
2173
leaq 448(%rsi),%rsi
2174
xorq %r10,%r10
2175
vmovdqa %ymm0,0(%rsp)
2176
leaq 448(%rdi),%rdi
2177
subq $448,%rdx
2178
vmovdqa %ymm4,32(%rsp)
2179
2180
.Loop_tail8x:
2181
movzbl (%rsi,%r10,1),%eax
2182
movzbl (%rsp,%r10,1),%ecx
2183
leaq 1(%r10),%r10
2184
xorl %ecx,%eax
2185
movb %al,-1(%rdi,%r10,1)
2186
decq %rdx
2187
jnz .Loop_tail8x
2188
2189
.Ldone8x:
2190
vzeroall
2191
leaq (%r9),%rsp
2192
.cfi_def_cfa_register %rsp
2193
.L8x_epilogue:
2194
.byte 0xf3,0xc3
2195
.cfi_endproc
2196
.size ChaCha20_8x,.-ChaCha20_8x
2197
.section ".note.gnu.property", "a"
2198
.p2align 3
2199
.long 1f - 0f
2200
.long 4f - 1f
2201
.long 5
2202
0:
2203
# "GNU" encoded with .byte, since .asciz isn't supported
2204
# on Solaris.
2205
.byte 0x47
2206
.byte 0x4e
2207
.byte 0x55
2208
.byte 0
2209
1:
2210
.p2align 3
2211
.long 0xc0000002
2212
.long 3f - 2f
2213
2:
2214
.long 3
2215
3:
2216
.p2align 3
2217
4:
2218
2219