Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/i386/chacha-x86.S
39483 views
1
/* Do not modify. This file is auto-generated from chacha-x86.pl. */
2
#ifdef PIC
3
.text
4
.globl ChaCha20_ctr32
5
.type ChaCha20_ctr32,@function
6
.align 16
7
ChaCha20_ctr32:
8
.L_ChaCha20_ctr32_begin:
9
#ifdef __CET__
10
11
.byte 243,15,30,251
12
#endif
13
14
pushl %ebp
15
pushl %ebx
16
pushl %esi
17
pushl %edi
18
xorl %eax,%eax
19
cmpl 28(%esp),%eax
20
je .L000no_data
21
call .Lpic_point
22
.Lpic_point:
23
popl %eax
24
leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
25
testl $16777216,(%ebp)
26
jz .L001x86
27
testl $512,4(%ebp)
28
jz .L001x86
29
jmp .Lssse3_shortcut
30
.L001x86:
31
movl 32(%esp),%esi
32
movl 36(%esp),%edi
33
subl $132,%esp
34
movl (%esi),%eax
35
movl 4(%esi),%ebx
36
movl 8(%esi),%ecx
37
movl 12(%esi),%edx
38
movl %eax,80(%esp)
39
movl %ebx,84(%esp)
40
movl %ecx,88(%esp)
41
movl %edx,92(%esp)
42
movl 16(%esi),%eax
43
movl 20(%esi),%ebx
44
movl 24(%esi),%ecx
45
movl 28(%esi),%edx
46
movl %eax,96(%esp)
47
movl %ebx,100(%esp)
48
movl %ecx,104(%esp)
49
movl %edx,108(%esp)
50
movl (%edi),%eax
51
movl 4(%edi),%ebx
52
movl 8(%edi),%ecx
53
movl 12(%edi),%edx
54
subl $1,%eax
55
movl %eax,112(%esp)
56
movl %ebx,116(%esp)
57
movl %ecx,120(%esp)
58
movl %edx,124(%esp)
59
jmp .L002entry
60
.align 16
61
.L003outer_loop:
62
movl %ebx,156(%esp)
63
movl %eax,152(%esp)
64
movl %ecx,160(%esp)
65
.L002entry:
66
movl $1634760805,%eax
67
movl $857760878,4(%esp)
68
movl $2036477234,8(%esp)
69
movl $1797285236,12(%esp)
70
movl 84(%esp),%ebx
71
movl 88(%esp),%ebp
72
movl 104(%esp),%ecx
73
movl 108(%esp),%esi
74
movl 116(%esp),%edx
75
movl 120(%esp),%edi
76
movl %ebx,20(%esp)
77
movl %ebp,24(%esp)
78
movl %ecx,40(%esp)
79
movl %esi,44(%esp)
80
movl %edx,52(%esp)
81
movl %edi,56(%esp)
82
movl 92(%esp),%ebx
83
movl 124(%esp),%edi
84
movl 112(%esp),%edx
85
movl 80(%esp),%ebp
86
movl 96(%esp),%ecx
87
movl 100(%esp),%esi
88
addl $1,%edx
89
movl %ebx,28(%esp)
90
movl %edi,60(%esp)
91
movl %edx,112(%esp)
92
movl $10,%ebx
93
jmp .L004loop
94
.align 16
95
.L004loop:
96
addl %ebp,%eax
97
movl %ebx,128(%esp)
98
movl %ebp,%ebx
99
xorl %eax,%edx
100
roll $16,%edx
101
addl %edx,%ecx
102
xorl %ecx,%ebx
103
movl 52(%esp),%edi
104
roll $12,%ebx
105
movl 20(%esp),%ebp
106
addl %ebx,%eax
107
xorl %eax,%edx
108
movl %eax,(%esp)
109
roll $8,%edx
110
movl 4(%esp),%eax
111
addl %edx,%ecx
112
movl %edx,48(%esp)
113
xorl %ecx,%ebx
114
addl %ebp,%eax
115
roll $7,%ebx
116
xorl %eax,%edi
117
movl %ecx,32(%esp)
118
roll $16,%edi
119
movl %ebx,16(%esp)
120
addl %edi,%esi
121
movl 40(%esp),%ecx
122
xorl %esi,%ebp
123
movl 56(%esp),%edx
124
roll $12,%ebp
125
movl 24(%esp),%ebx
126
addl %ebp,%eax
127
xorl %eax,%edi
128
movl %eax,4(%esp)
129
roll $8,%edi
130
movl 8(%esp),%eax
131
addl %edi,%esi
132
movl %edi,52(%esp)
133
xorl %esi,%ebp
134
addl %ebx,%eax
135
roll $7,%ebp
136
xorl %eax,%edx
137
movl %esi,36(%esp)
138
roll $16,%edx
139
movl %ebp,20(%esp)
140
addl %edx,%ecx
141
movl 44(%esp),%esi
142
xorl %ecx,%ebx
143
movl 60(%esp),%edi
144
roll $12,%ebx
145
movl 28(%esp),%ebp
146
addl %ebx,%eax
147
xorl %eax,%edx
148
movl %eax,8(%esp)
149
roll $8,%edx
150
movl 12(%esp),%eax
151
addl %edx,%ecx
152
movl %edx,56(%esp)
153
xorl %ecx,%ebx
154
addl %ebp,%eax
155
roll $7,%ebx
156
xorl %eax,%edi
157
roll $16,%edi
158
movl %ebx,24(%esp)
159
addl %edi,%esi
160
xorl %esi,%ebp
161
roll $12,%ebp
162
movl 20(%esp),%ebx
163
addl %ebp,%eax
164
xorl %eax,%edi
165
movl %eax,12(%esp)
166
roll $8,%edi
167
movl (%esp),%eax
168
addl %edi,%esi
169
movl %edi,%edx
170
xorl %esi,%ebp
171
addl %ebx,%eax
172
roll $7,%ebp
173
xorl %eax,%edx
174
roll $16,%edx
175
movl %ebp,28(%esp)
176
addl %edx,%ecx
177
xorl %ecx,%ebx
178
movl 48(%esp),%edi
179
roll $12,%ebx
180
movl 24(%esp),%ebp
181
addl %ebx,%eax
182
xorl %eax,%edx
183
movl %eax,(%esp)
184
roll $8,%edx
185
movl 4(%esp),%eax
186
addl %edx,%ecx
187
movl %edx,60(%esp)
188
xorl %ecx,%ebx
189
addl %ebp,%eax
190
roll $7,%ebx
191
xorl %eax,%edi
192
movl %ecx,40(%esp)
193
roll $16,%edi
194
movl %ebx,20(%esp)
195
addl %edi,%esi
196
movl 32(%esp),%ecx
197
xorl %esi,%ebp
198
movl 52(%esp),%edx
199
roll $12,%ebp
200
movl 28(%esp),%ebx
201
addl %ebp,%eax
202
xorl %eax,%edi
203
movl %eax,4(%esp)
204
roll $8,%edi
205
movl 8(%esp),%eax
206
addl %edi,%esi
207
movl %edi,48(%esp)
208
xorl %esi,%ebp
209
addl %ebx,%eax
210
roll $7,%ebp
211
xorl %eax,%edx
212
movl %esi,44(%esp)
213
roll $16,%edx
214
movl %ebp,24(%esp)
215
addl %edx,%ecx
216
movl 36(%esp),%esi
217
xorl %ecx,%ebx
218
movl 56(%esp),%edi
219
roll $12,%ebx
220
movl 16(%esp),%ebp
221
addl %ebx,%eax
222
xorl %eax,%edx
223
movl %eax,8(%esp)
224
roll $8,%edx
225
movl 12(%esp),%eax
226
addl %edx,%ecx
227
movl %edx,52(%esp)
228
xorl %ecx,%ebx
229
addl %ebp,%eax
230
roll $7,%ebx
231
xorl %eax,%edi
232
roll $16,%edi
233
movl %ebx,28(%esp)
234
addl %edi,%esi
235
xorl %esi,%ebp
236
movl 48(%esp),%edx
237
roll $12,%ebp
238
movl 128(%esp),%ebx
239
addl %ebp,%eax
240
xorl %eax,%edi
241
movl %eax,12(%esp)
242
roll $8,%edi
243
movl (%esp),%eax
244
addl %edi,%esi
245
movl %edi,56(%esp)
246
xorl %esi,%ebp
247
roll $7,%ebp
248
decl %ebx
249
jnz .L004loop
250
movl 160(%esp),%ebx
251
addl $1634760805,%eax
252
addl 80(%esp),%ebp
253
addl 96(%esp),%ecx
254
addl 100(%esp),%esi
255
cmpl $64,%ebx
256
jb .L005tail
257
movl 156(%esp),%ebx
258
addl 112(%esp),%edx
259
addl 120(%esp),%edi
260
xorl (%ebx),%eax
261
xorl 16(%ebx),%ebp
262
movl %eax,(%esp)
263
movl 152(%esp),%eax
264
xorl 32(%ebx),%ecx
265
xorl 36(%ebx),%esi
266
xorl 48(%ebx),%edx
267
xorl 56(%ebx),%edi
268
movl %ebp,16(%eax)
269
movl %ecx,32(%eax)
270
movl %esi,36(%eax)
271
movl %edx,48(%eax)
272
movl %edi,56(%eax)
273
movl 4(%esp),%ebp
274
movl 8(%esp),%ecx
275
movl 12(%esp),%esi
276
movl 20(%esp),%edx
277
movl 24(%esp),%edi
278
addl $857760878,%ebp
279
addl $2036477234,%ecx
280
addl $1797285236,%esi
281
addl 84(%esp),%edx
282
addl 88(%esp),%edi
283
xorl 4(%ebx),%ebp
284
xorl 8(%ebx),%ecx
285
xorl 12(%ebx),%esi
286
xorl 20(%ebx),%edx
287
xorl 24(%ebx),%edi
288
movl %ebp,4(%eax)
289
movl %ecx,8(%eax)
290
movl %esi,12(%eax)
291
movl %edx,20(%eax)
292
movl %edi,24(%eax)
293
movl 28(%esp),%ebp
294
movl 40(%esp),%ecx
295
movl 44(%esp),%esi
296
movl 52(%esp),%edx
297
movl 60(%esp),%edi
298
addl 92(%esp),%ebp
299
addl 104(%esp),%ecx
300
addl 108(%esp),%esi
301
addl 116(%esp),%edx
302
addl 124(%esp),%edi
303
xorl 28(%ebx),%ebp
304
xorl 40(%ebx),%ecx
305
xorl 44(%ebx),%esi
306
xorl 52(%ebx),%edx
307
xorl 60(%ebx),%edi
308
leal 64(%ebx),%ebx
309
movl %ebp,28(%eax)
310
movl (%esp),%ebp
311
movl %ecx,40(%eax)
312
movl 160(%esp),%ecx
313
movl %esi,44(%eax)
314
movl %edx,52(%eax)
315
movl %edi,60(%eax)
316
movl %ebp,(%eax)
317
leal 64(%eax),%eax
318
subl $64,%ecx
319
jnz .L003outer_loop
320
jmp .L006done
321
.L005tail:
322
addl 112(%esp),%edx
323
addl 120(%esp),%edi
324
movl %eax,(%esp)
325
movl %ebp,16(%esp)
326
movl %ecx,32(%esp)
327
movl %esi,36(%esp)
328
movl %edx,48(%esp)
329
movl %edi,56(%esp)
330
movl 4(%esp),%ebp
331
movl 8(%esp),%ecx
332
movl 12(%esp),%esi
333
movl 20(%esp),%edx
334
movl 24(%esp),%edi
335
addl $857760878,%ebp
336
addl $2036477234,%ecx
337
addl $1797285236,%esi
338
addl 84(%esp),%edx
339
addl 88(%esp),%edi
340
movl %ebp,4(%esp)
341
movl %ecx,8(%esp)
342
movl %esi,12(%esp)
343
movl %edx,20(%esp)
344
movl %edi,24(%esp)
345
movl 28(%esp),%ebp
346
movl 40(%esp),%ecx
347
movl 44(%esp),%esi
348
movl 52(%esp),%edx
349
movl 60(%esp),%edi
350
addl 92(%esp),%ebp
351
addl 104(%esp),%ecx
352
addl 108(%esp),%esi
353
addl 116(%esp),%edx
354
addl 124(%esp),%edi
355
movl %ebp,28(%esp)
356
movl 156(%esp),%ebp
357
movl %ecx,40(%esp)
358
movl 152(%esp),%ecx
359
movl %esi,44(%esp)
360
xorl %esi,%esi
361
movl %edx,52(%esp)
362
movl %edi,60(%esp)
363
xorl %eax,%eax
364
xorl %edx,%edx
365
.L007tail_loop:
366
movb (%esi,%ebp,1),%al
367
movb (%esp,%esi,1),%dl
368
leal 1(%esi),%esi
369
xorb %dl,%al
370
movb %al,-1(%ecx,%esi,1)
371
decl %ebx
372
jnz .L007tail_loop
373
.L006done:
374
addl $132,%esp
375
.L000no_data:
376
popl %edi
377
popl %esi
378
popl %ebx
379
popl %ebp
380
ret
381
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
382
.globl ChaCha20_ssse3
383
.type ChaCha20_ssse3,@function
384
.align 16
385
ChaCha20_ssse3:
386
.L_ChaCha20_ssse3_begin:
387
#ifdef __CET__
388
389
.byte 243,15,30,251
390
#endif
391
392
pushl %ebp
393
pushl %ebx
394
pushl %esi
395
pushl %edi
396
.Lssse3_shortcut:
397
testl $2048,4(%ebp)
398
jnz .Lxop_shortcut
399
movl 20(%esp),%edi
400
movl 24(%esp),%esi
401
movl 28(%esp),%ecx
402
movl 32(%esp),%edx
403
movl 36(%esp),%ebx
404
movl %esp,%ebp
405
subl $524,%esp
406
andl $-64,%esp
407
movl %ebp,512(%esp)
408
leal .Lssse3_data-.Lpic_point(%eax),%eax
409
movdqu (%ebx),%xmm3
410
.L0081x:
411
movdqa 32(%eax),%xmm0
412
movdqu (%edx),%xmm1
413
movdqu 16(%edx),%xmm2
414
movdqa (%eax),%xmm6
415
movdqa 16(%eax),%xmm7
416
movl %ebp,48(%esp)
417
movdqa %xmm0,(%esp)
418
movdqa %xmm1,16(%esp)
419
movdqa %xmm2,32(%esp)
420
movdqa %xmm3,48(%esp)
421
movl $10,%edx
422
jmp .L009loop1x
423
.align 16
424
.L010outer1x:
425
movdqa 80(%eax),%xmm3
426
movdqa (%esp),%xmm0
427
movdqa 16(%esp),%xmm1
428
movdqa 32(%esp),%xmm2
429
paddd 48(%esp),%xmm3
430
movl $10,%edx
431
movdqa %xmm3,48(%esp)
432
jmp .L009loop1x
433
.align 16
434
.L009loop1x:
435
paddd %xmm1,%xmm0
436
pxor %xmm0,%xmm3
437
.byte 102,15,56,0,222
438
paddd %xmm3,%xmm2
439
pxor %xmm2,%xmm1
440
movdqa %xmm1,%xmm4
441
psrld $20,%xmm1
442
pslld $12,%xmm4
443
por %xmm4,%xmm1
444
paddd %xmm1,%xmm0
445
pxor %xmm0,%xmm3
446
.byte 102,15,56,0,223
447
paddd %xmm3,%xmm2
448
pxor %xmm2,%xmm1
449
movdqa %xmm1,%xmm4
450
psrld $25,%xmm1
451
pslld $7,%xmm4
452
por %xmm4,%xmm1
453
pshufd $78,%xmm2,%xmm2
454
pshufd $57,%xmm1,%xmm1
455
pshufd $147,%xmm3,%xmm3
456
nop
457
paddd %xmm1,%xmm0
458
pxor %xmm0,%xmm3
459
.byte 102,15,56,0,222
460
paddd %xmm3,%xmm2
461
pxor %xmm2,%xmm1
462
movdqa %xmm1,%xmm4
463
psrld $20,%xmm1
464
pslld $12,%xmm4
465
por %xmm4,%xmm1
466
paddd %xmm1,%xmm0
467
pxor %xmm0,%xmm3
468
.byte 102,15,56,0,223
469
paddd %xmm3,%xmm2
470
pxor %xmm2,%xmm1
471
movdqa %xmm1,%xmm4
472
psrld $25,%xmm1
473
pslld $7,%xmm4
474
por %xmm4,%xmm1
475
pshufd $78,%xmm2,%xmm2
476
pshufd $147,%xmm1,%xmm1
477
pshufd $57,%xmm3,%xmm3
478
decl %edx
479
jnz .L009loop1x
480
paddd (%esp),%xmm0
481
paddd 16(%esp),%xmm1
482
paddd 32(%esp),%xmm2
483
paddd 48(%esp),%xmm3
484
cmpl $64,%ecx
485
jb .L011tail
486
movdqu (%esi),%xmm4
487
movdqu 16(%esi),%xmm5
488
pxor %xmm4,%xmm0
489
movdqu 32(%esi),%xmm4
490
pxor %xmm5,%xmm1
491
movdqu 48(%esi),%xmm5
492
pxor %xmm4,%xmm2
493
pxor %xmm5,%xmm3
494
leal 64(%esi),%esi
495
movdqu %xmm0,(%edi)
496
movdqu %xmm1,16(%edi)
497
movdqu %xmm2,32(%edi)
498
movdqu %xmm3,48(%edi)
499
leal 64(%edi),%edi
500
subl $64,%ecx
501
jnz .L010outer1x
502
jmp .L012done
503
.L011tail:
504
movdqa %xmm0,(%esp)
505
movdqa %xmm1,16(%esp)
506
movdqa %xmm2,32(%esp)
507
movdqa %xmm3,48(%esp)
508
xorl %eax,%eax
509
xorl %edx,%edx
510
xorl %ebp,%ebp
511
.L013tail_loop:
512
movb (%esp,%ebp,1),%al
513
movb (%esi,%ebp,1),%dl
514
leal 1(%ebp),%ebp
515
xorb %dl,%al
516
movb %al,-1(%edi,%ebp,1)
517
decl %ecx
518
jnz .L013tail_loop
519
.L012done:
520
movl 512(%esp),%esp
521
popl %edi
522
popl %esi
523
popl %ebx
524
popl %ebp
525
ret
526
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
527
.align 64
528
.Lssse3_data:
529
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
530
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
531
.long 1634760805,857760878,2036477234,1797285236
532
.long 0,1,2,3
533
.long 4,4,4,4
534
.long 1,0,0,0
535
.long 4,0,0,0
536
.long 0,-1,-1,-1
537
.align 64
538
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
539
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
540
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
541
.byte 114,103,62,0
542
.globl ChaCha20_xop
543
.type ChaCha20_xop,@function
544
.align 16
545
ChaCha20_xop:
546
.L_ChaCha20_xop_begin:
547
#ifdef __CET__
548
549
.byte 243,15,30,251
550
#endif
551
552
pushl %ebp
553
pushl %ebx
554
pushl %esi
555
pushl %edi
556
.Lxop_shortcut:
557
movl 20(%esp),%edi
558
movl 24(%esp),%esi
559
movl 28(%esp),%ecx
560
movl 32(%esp),%edx
561
movl 36(%esp),%ebx
562
vzeroupper
563
movl %esp,%ebp
564
subl $524,%esp
565
andl $-64,%esp
566
movl %ebp,512(%esp)
567
leal .Lssse3_data-.Lpic_point(%eax),%eax
568
vmovdqu (%ebx),%xmm3
569
cmpl $256,%ecx
570
jb .L0141x
571
movl %edx,516(%esp)
572
movl %ebx,520(%esp)
573
subl $256,%ecx
574
leal 384(%esp),%ebp
575
vmovdqu (%edx),%xmm7
576
vpshufd $0,%xmm3,%xmm0
577
vpshufd $85,%xmm3,%xmm1
578
vpshufd $170,%xmm3,%xmm2
579
vpshufd $255,%xmm3,%xmm3
580
vpaddd 48(%eax),%xmm0,%xmm0
581
vpshufd $0,%xmm7,%xmm4
582
vpshufd $85,%xmm7,%xmm5
583
vpsubd 64(%eax),%xmm0,%xmm0
584
vpshufd $170,%xmm7,%xmm6
585
vpshufd $255,%xmm7,%xmm7
586
vmovdqa %xmm0,64(%ebp)
587
vmovdqa %xmm1,80(%ebp)
588
vmovdqa %xmm2,96(%ebp)
589
vmovdqa %xmm3,112(%ebp)
590
vmovdqu 16(%edx),%xmm3
591
vmovdqa %xmm4,-64(%ebp)
592
vmovdqa %xmm5,-48(%ebp)
593
vmovdqa %xmm6,-32(%ebp)
594
vmovdqa %xmm7,-16(%ebp)
595
vmovdqa 32(%eax),%xmm7
596
leal 128(%esp),%ebx
597
vpshufd $0,%xmm3,%xmm0
598
vpshufd $85,%xmm3,%xmm1
599
vpshufd $170,%xmm3,%xmm2
600
vpshufd $255,%xmm3,%xmm3
601
vpshufd $0,%xmm7,%xmm4
602
vpshufd $85,%xmm7,%xmm5
603
vpshufd $170,%xmm7,%xmm6
604
vpshufd $255,%xmm7,%xmm7
605
vmovdqa %xmm0,(%ebp)
606
vmovdqa %xmm1,16(%ebp)
607
vmovdqa %xmm2,32(%ebp)
608
vmovdqa %xmm3,48(%ebp)
609
vmovdqa %xmm4,-128(%ebp)
610
vmovdqa %xmm5,-112(%ebp)
611
vmovdqa %xmm6,-96(%ebp)
612
vmovdqa %xmm7,-80(%ebp)
613
leal 128(%esi),%esi
614
leal 128(%edi),%edi
615
jmp .L015outer_loop
616
.align 32
617
.L015outer_loop:
618
vmovdqa -112(%ebp),%xmm1
619
vmovdqa -96(%ebp),%xmm2
620
vmovdqa -80(%ebp),%xmm3
621
vmovdqa -48(%ebp),%xmm5
622
vmovdqa -32(%ebp),%xmm6
623
vmovdqa -16(%ebp),%xmm7
624
vmovdqa %xmm1,-112(%ebx)
625
vmovdqa %xmm2,-96(%ebx)
626
vmovdqa %xmm3,-80(%ebx)
627
vmovdqa %xmm5,-48(%ebx)
628
vmovdqa %xmm6,-32(%ebx)
629
vmovdqa %xmm7,-16(%ebx)
630
vmovdqa 32(%ebp),%xmm2
631
vmovdqa 48(%ebp),%xmm3
632
vmovdqa 64(%ebp),%xmm4
633
vmovdqa 80(%ebp),%xmm5
634
vmovdqa 96(%ebp),%xmm6
635
vmovdqa 112(%ebp),%xmm7
636
vpaddd 64(%eax),%xmm4,%xmm4
637
vmovdqa %xmm2,32(%ebx)
638
vmovdqa %xmm3,48(%ebx)
639
vmovdqa %xmm4,64(%ebx)
640
vmovdqa %xmm5,80(%ebx)
641
vmovdqa %xmm6,96(%ebx)
642
vmovdqa %xmm7,112(%ebx)
643
vmovdqa %xmm4,64(%ebp)
644
vmovdqa -128(%ebp),%xmm0
645
vmovdqa %xmm4,%xmm6
646
vmovdqa -64(%ebp),%xmm3
647
vmovdqa (%ebp),%xmm4
648
vmovdqa 16(%ebp),%xmm5
649
movl $10,%edx
650
nop
651
.align 32
652
.L016loop:
653
vpaddd %xmm3,%xmm0,%xmm0
654
vpxor %xmm0,%xmm6,%xmm6
655
.byte 143,232,120,194,246,16
656
vpaddd %xmm6,%xmm4,%xmm4
657
vpxor %xmm4,%xmm3,%xmm2
658
vmovdqa -112(%ebx),%xmm1
659
.byte 143,232,120,194,210,12
660
vmovdqa -48(%ebx),%xmm3
661
vpaddd %xmm2,%xmm0,%xmm0
662
vmovdqa 80(%ebx),%xmm7
663
vpxor %xmm0,%xmm6,%xmm6
664
vpaddd %xmm3,%xmm1,%xmm1
665
.byte 143,232,120,194,246,8
666
vmovdqa %xmm0,-128(%ebx)
667
vpaddd %xmm6,%xmm4,%xmm4
668
vmovdqa %xmm6,64(%ebx)
669
vpxor %xmm4,%xmm2,%xmm2
670
vpxor %xmm1,%xmm7,%xmm7
671
.byte 143,232,120,194,210,7
672
vmovdqa %xmm4,(%ebx)
673
.byte 143,232,120,194,255,16
674
vmovdqa %xmm2,-64(%ebx)
675
vpaddd %xmm7,%xmm5,%xmm5
676
vmovdqa 32(%ebx),%xmm4
677
vpxor %xmm5,%xmm3,%xmm3
678
vmovdqa -96(%ebx),%xmm0
679
.byte 143,232,120,194,219,12
680
vmovdqa -32(%ebx),%xmm2
681
vpaddd %xmm3,%xmm1,%xmm1
682
vmovdqa 96(%ebx),%xmm6
683
vpxor %xmm1,%xmm7,%xmm7
684
vpaddd %xmm2,%xmm0,%xmm0
685
.byte 143,232,120,194,255,8
686
vmovdqa %xmm1,-112(%ebx)
687
vpaddd %xmm7,%xmm5,%xmm5
688
vmovdqa %xmm7,80(%ebx)
689
vpxor %xmm5,%xmm3,%xmm3
690
vpxor %xmm0,%xmm6,%xmm6
691
.byte 143,232,120,194,219,7
692
vmovdqa %xmm5,16(%ebx)
693
.byte 143,232,120,194,246,16
694
vmovdqa %xmm3,-48(%ebx)
695
vpaddd %xmm6,%xmm4,%xmm4
696
vmovdqa 48(%ebx),%xmm5
697
vpxor %xmm4,%xmm2,%xmm2
698
vmovdqa -80(%ebx),%xmm1
699
.byte 143,232,120,194,210,12
700
vmovdqa -16(%ebx),%xmm3
701
vpaddd %xmm2,%xmm0,%xmm0
702
vmovdqa 112(%ebx),%xmm7
703
vpxor %xmm0,%xmm6,%xmm6
704
vpaddd %xmm3,%xmm1,%xmm1
705
.byte 143,232,120,194,246,8
706
vmovdqa %xmm0,-96(%ebx)
707
vpaddd %xmm6,%xmm4,%xmm4
708
vmovdqa %xmm6,96(%ebx)
709
vpxor %xmm4,%xmm2,%xmm2
710
vpxor %xmm1,%xmm7,%xmm7
711
.byte 143,232,120,194,210,7
712
.byte 143,232,120,194,255,16
713
vmovdqa %xmm2,-32(%ebx)
714
vpaddd %xmm7,%xmm5,%xmm5
715
vpxor %xmm5,%xmm3,%xmm3
716
vmovdqa -128(%ebx),%xmm0
717
.byte 143,232,120,194,219,12
718
vmovdqa -48(%ebx),%xmm2
719
vpaddd %xmm3,%xmm1,%xmm1
720
vpxor %xmm1,%xmm7,%xmm7
721
vpaddd %xmm2,%xmm0,%xmm0
722
.byte 143,232,120,194,255,8
723
vmovdqa %xmm1,-80(%ebx)
724
vpaddd %xmm7,%xmm5,%xmm5
725
vpxor %xmm5,%xmm3,%xmm3
726
vpxor %xmm0,%xmm7,%xmm6
727
.byte 143,232,120,194,219,7
728
.byte 143,232,120,194,246,16
729
vmovdqa %xmm3,-16(%ebx)
730
vpaddd %xmm6,%xmm4,%xmm4
731
vpxor %xmm4,%xmm2,%xmm2
732
vmovdqa -112(%ebx),%xmm1
733
.byte 143,232,120,194,210,12
734
vmovdqa -32(%ebx),%xmm3
735
vpaddd %xmm2,%xmm0,%xmm0
736
vmovdqa 64(%ebx),%xmm7
737
vpxor %xmm0,%xmm6,%xmm6
738
vpaddd %xmm3,%xmm1,%xmm1
739
.byte 143,232,120,194,246,8
740
vmovdqa %xmm0,-128(%ebx)
741
vpaddd %xmm6,%xmm4,%xmm4
742
vmovdqa %xmm6,112(%ebx)
743
vpxor %xmm4,%xmm2,%xmm2
744
vpxor %xmm1,%xmm7,%xmm7
745
.byte 143,232,120,194,210,7
746
vmovdqa %xmm4,32(%ebx)
747
.byte 143,232,120,194,255,16
748
vmovdqa %xmm2,-48(%ebx)
749
vpaddd %xmm7,%xmm5,%xmm5
750
vmovdqa (%ebx),%xmm4
751
vpxor %xmm5,%xmm3,%xmm3
752
vmovdqa -96(%ebx),%xmm0
753
.byte 143,232,120,194,219,12
754
vmovdqa -16(%ebx),%xmm2
755
vpaddd %xmm3,%xmm1,%xmm1
756
vmovdqa 80(%ebx),%xmm6
757
vpxor %xmm1,%xmm7,%xmm7
758
vpaddd %xmm2,%xmm0,%xmm0
759
.byte 143,232,120,194,255,8
760
vmovdqa %xmm1,-112(%ebx)
761
vpaddd %xmm7,%xmm5,%xmm5
762
vmovdqa %xmm7,64(%ebx)
763
vpxor %xmm5,%xmm3,%xmm3
764
vpxor %xmm0,%xmm6,%xmm6
765
.byte 143,232,120,194,219,7
766
vmovdqa %xmm5,48(%ebx)
767
.byte 143,232,120,194,246,16
768
vmovdqa %xmm3,-32(%ebx)
769
vpaddd %xmm6,%xmm4,%xmm4
770
vmovdqa 16(%ebx),%xmm5
771
vpxor %xmm4,%xmm2,%xmm2
772
vmovdqa -80(%ebx),%xmm1
773
.byte 143,232,120,194,210,12
774
vmovdqa -64(%ebx),%xmm3
775
vpaddd %xmm2,%xmm0,%xmm0
776
vmovdqa 96(%ebx),%xmm7
777
vpxor %xmm0,%xmm6,%xmm6
778
vpaddd %xmm3,%xmm1,%xmm1
779
.byte 143,232,120,194,246,8
780
vmovdqa %xmm0,-96(%ebx)
781
vpaddd %xmm6,%xmm4,%xmm4
782
vmovdqa %xmm6,80(%ebx)
783
vpxor %xmm4,%xmm2,%xmm2
784
vpxor %xmm1,%xmm7,%xmm7
785
.byte 143,232,120,194,210,7
786
.byte 143,232,120,194,255,16
787
vmovdqa %xmm2,-16(%ebx)
788
vpaddd %xmm7,%xmm5,%xmm5
789
vpxor %xmm5,%xmm3,%xmm3
790
vmovdqa -128(%ebx),%xmm0
791
.byte 143,232,120,194,219,12
792
vpaddd %xmm3,%xmm1,%xmm1
793
vmovdqa 64(%ebx),%xmm6
794
vpxor %xmm1,%xmm7,%xmm7
795
.byte 143,232,120,194,255,8
796
vmovdqa %xmm1,-80(%ebx)
797
vpaddd %xmm7,%xmm5,%xmm5
798
vmovdqa %xmm7,96(%ebx)
799
vpxor %xmm5,%xmm3,%xmm3
800
.byte 143,232,120,194,219,7
801
decl %edx
802
jnz .L016loop
803
vmovdqa %xmm3,-64(%ebx)
804
vmovdqa %xmm4,(%ebx)
805
vmovdqa %xmm5,16(%ebx)
806
vmovdqa %xmm6,64(%ebx)
807
vmovdqa %xmm7,96(%ebx)
808
vmovdqa -112(%ebx),%xmm1
809
vmovdqa -96(%ebx),%xmm2
810
vmovdqa -80(%ebx),%xmm3
811
vpaddd -128(%ebp),%xmm0,%xmm0
812
vpaddd -112(%ebp),%xmm1,%xmm1
813
vpaddd -96(%ebp),%xmm2,%xmm2
814
vpaddd -80(%ebp),%xmm3,%xmm3
815
vpunpckldq %xmm1,%xmm0,%xmm6
816
vpunpckldq %xmm3,%xmm2,%xmm7
817
vpunpckhdq %xmm1,%xmm0,%xmm0
818
vpunpckhdq %xmm3,%xmm2,%xmm2
819
vpunpcklqdq %xmm7,%xmm6,%xmm1
820
vpunpckhqdq %xmm7,%xmm6,%xmm6
821
vpunpcklqdq %xmm2,%xmm0,%xmm7
822
vpunpckhqdq %xmm2,%xmm0,%xmm3
823
vpxor -128(%esi),%xmm1,%xmm4
824
vpxor -64(%esi),%xmm6,%xmm5
825
vpxor (%esi),%xmm7,%xmm6
826
vpxor 64(%esi),%xmm3,%xmm7
827
leal 16(%esi),%esi
828
vmovdqa -64(%ebx),%xmm0
829
vmovdqa -48(%ebx),%xmm1
830
vmovdqa -32(%ebx),%xmm2
831
vmovdqa -16(%ebx),%xmm3
832
vmovdqu %xmm4,-128(%edi)
833
vmovdqu %xmm5,-64(%edi)
834
vmovdqu %xmm6,(%edi)
835
vmovdqu %xmm7,64(%edi)
836
leal 16(%edi),%edi
837
vpaddd -64(%ebp),%xmm0,%xmm0
838
vpaddd -48(%ebp),%xmm1,%xmm1
839
vpaddd -32(%ebp),%xmm2,%xmm2
840
vpaddd -16(%ebp),%xmm3,%xmm3
841
vpunpckldq %xmm1,%xmm0,%xmm6
842
vpunpckldq %xmm3,%xmm2,%xmm7
843
vpunpckhdq %xmm1,%xmm0,%xmm0
844
vpunpckhdq %xmm3,%xmm2,%xmm2
845
vpunpcklqdq %xmm7,%xmm6,%xmm1
846
vpunpckhqdq %xmm7,%xmm6,%xmm6
847
vpunpcklqdq %xmm2,%xmm0,%xmm7
848
vpunpckhqdq %xmm2,%xmm0,%xmm3
849
vpxor -128(%esi),%xmm1,%xmm4
850
vpxor -64(%esi),%xmm6,%xmm5
851
vpxor (%esi),%xmm7,%xmm6
852
vpxor 64(%esi),%xmm3,%xmm7
853
leal 16(%esi),%esi
854
vmovdqa (%ebx),%xmm0
855
vmovdqa 16(%ebx),%xmm1
856
vmovdqa 32(%ebx),%xmm2
857
vmovdqa 48(%ebx),%xmm3
858
vmovdqu %xmm4,-128(%edi)
859
vmovdqu %xmm5,-64(%edi)
860
vmovdqu %xmm6,(%edi)
861
vmovdqu %xmm7,64(%edi)
862
leal 16(%edi),%edi
863
vpaddd (%ebp),%xmm0,%xmm0
864
vpaddd 16(%ebp),%xmm1,%xmm1
865
vpaddd 32(%ebp),%xmm2,%xmm2
866
vpaddd 48(%ebp),%xmm3,%xmm3
867
vpunpckldq %xmm1,%xmm0,%xmm6
868
vpunpckldq %xmm3,%xmm2,%xmm7
869
vpunpckhdq %xmm1,%xmm0,%xmm0
870
vpunpckhdq %xmm3,%xmm2,%xmm2
871
vpunpcklqdq %xmm7,%xmm6,%xmm1
872
vpunpckhqdq %xmm7,%xmm6,%xmm6
873
vpunpcklqdq %xmm2,%xmm0,%xmm7
874
vpunpckhqdq %xmm2,%xmm0,%xmm3
875
vpxor -128(%esi),%xmm1,%xmm4
876
vpxor -64(%esi),%xmm6,%xmm5
877
vpxor (%esi),%xmm7,%xmm6
878
vpxor 64(%esi),%xmm3,%xmm7
879
leal 16(%esi),%esi
880
vmovdqa 64(%ebx),%xmm0
881
vmovdqa 80(%ebx),%xmm1
882
vmovdqa 96(%ebx),%xmm2
883
vmovdqa 112(%ebx),%xmm3
884
vmovdqu %xmm4,-128(%edi)
885
vmovdqu %xmm5,-64(%edi)
886
vmovdqu %xmm6,(%edi)
887
vmovdqu %xmm7,64(%edi)
888
leal 16(%edi),%edi
889
vpaddd 64(%ebp),%xmm0,%xmm0
890
vpaddd 80(%ebp),%xmm1,%xmm1
891
vpaddd 96(%ebp),%xmm2,%xmm2
892
vpaddd 112(%ebp),%xmm3,%xmm3
893
vpunpckldq %xmm1,%xmm0,%xmm6
894
vpunpckldq %xmm3,%xmm2,%xmm7
895
vpunpckhdq %xmm1,%xmm0,%xmm0
896
vpunpckhdq %xmm3,%xmm2,%xmm2
897
vpunpcklqdq %xmm7,%xmm6,%xmm1
898
vpunpckhqdq %xmm7,%xmm6,%xmm6
899
vpunpcklqdq %xmm2,%xmm0,%xmm7
900
vpunpckhqdq %xmm2,%xmm0,%xmm3
901
vpxor -128(%esi),%xmm1,%xmm4
902
vpxor -64(%esi),%xmm6,%xmm5
903
vpxor (%esi),%xmm7,%xmm6
904
vpxor 64(%esi),%xmm3,%xmm7
905
leal 208(%esi),%esi
906
vmovdqu %xmm4,-128(%edi)
907
vmovdqu %xmm5,-64(%edi)
908
vmovdqu %xmm6,(%edi)
909
vmovdqu %xmm7,64(%edi)
910
leal 208(%edi),%edi
911
subl $256,%ecx
912
jnc .L015outer_loop
913
addl $256,%ecx
914
jz .L017done
915
movl 520(%esp),%ebx
916
leal -128(%esi),%esi
917
movl 516(%esp),%edx
918
leal -128(%edi),%edi
919
vmovd 64(%ebp),%xmm2
920
vmovdqu (%ebx),%xmm3
921
vpaddd 96(%eax),%xmm2,%xmm2
922
vpand 112(%eax),%xmm3,%xmm3
923
vpor %xmm2,%xmm3,%xmm3
924
.L0141x:
925
vmovdqa 32(%eax),%xmm0
926
vmovdqu (%edx),%xmm1
927
vmovdqu 16(%edx),%xmm2
928
vmovdqa (%eax),%xmm6
929
vmovdqa 16(%eax),%xmm7
930
movl %ebp,48(%esp)
931
vmovdqa %xmm0,(%esp)
932
vmovdqa %xmm1,16(%esp)
933
vmovdqa %xmm2,32(%esp)
934
vmovdqa %xmm3,48(%esp)
935
movl $10,%edx
936
jmp .L018loop1x
937
.align 16
938
.L019outer1x:
939
vmovdqa 80(%eax),%xmm3
940
vmovdqa (%esp),%xmm0
941
vmovdqa 16(%esp),%xmm1
942
vmovdqa 32(%esp),%xmm2
943
vpaddd 48(%esp),%xmm3,%xmm3
944
movl $10,%edx
945
vmovdqa %xmm3,48(%esp)
946
jmp .L018loop1x
947
.align 16
948
.L018loop1x:
949
vpaddd %xmm1,%xmm0,%xmm0
950
vpxor %xmm0,%xmm3,%xmm3
951
.byte 143,232,120,194,219,16
952
vpaddd %xmm3,%xmm2,%xmm2
953
vpxor %xmm2,%xmm1,%xmm1
954
.byte 143,232,120,194,201,12
955
vpaddd %xmm1,%xmm0,%xmm0
956
vpxor %xmm0,%xmm3,%xmm3
957
.byte 143,232,120,194,219,8
958
vpaddd %xmm3,%xmm2,%xmm2
959
vpxor %xmm2,%xmm1,%xmm1
960
.byte 143,232,120,194,201,7
961
vpshufd $78,%xmm2,%xmm2
962
vpshufd $57,%xmm1,%xmm1
963
vpshufd $147,%xmm3,%xmm3
964
vpaddd %xmm1,%xmm0,%xmm0
965
vpxor %xmm0,%xmm3,%xmm3
966
.byte 143,232,120,194,219,16
967
vpaddd %xmm3,%xmm2,%xmm2
968
vpxor %xmm2,%xmm1,%xmm1
969
.byte 143,232,120,194,201,12
970
vpaddd %xmm1,%xmm0,%xmm0
971
vpxor %xmm0,%xmm3,%xmm3
972
.byte 143,232,120,194,219,8
973
vpaddd %xmm3,%xmm2,%xmm2
974
vpxor %xmm2,%xmm1,%xmm1
975
.byte 143,232,120,194,201,7
976
vpshufd $78,%xmm2,%xmm2
977
vpshufd $147,%xmm1,%xmm1
978
vpshufd $57,%xmm3,%xmm3
979
decl %edx
980
jnz .L018loop1x
981
vpaddd (%esp),%xmm0,%xmm0
982
vpaddd 16(%esp),%xmm1,%xmm1
983
vpaddd 32(%esp),%xmm2,%xmm2
984
vpaddd 48(%esp),%xmm3,%xmm3
985
cmpl $64,%ecx
986
jb .L020tail
987
vpxor (%esi),%xmm0,%xmm0
988
vpxor 16(%esi),%xmm1,%xmm1
989
vpxor 32(%esi),%xmm2,%xmm2
990
vpxor 48(%esi),%xmm3,%xmm3
991
leal 64(%esi),%esi
992
vmovdqu %xmm0,(%edi)
993
vmovdqu %xmm1,16(%edi)
994
vmovdqu %xmm2,32(%edi)
995
vmovdqu %xmm3,48(%edi)
996
leal 64(%edi),%edi
997
subl $64,%ecx
998
jnz .L019outer1x
999
jmp .L017done
1000
.L020tail:
1001
vmovdqa %xmm0,(%esp)
1002
vmovdqa %xmm1,16(%esp)
1003
vmovdqa %xmm2,32(%esp)
1004
vmovdqa %xmm3,48(%esp)
1005
xorl %eax,%eax
1006
xorl %edx,%edx
1007
xorl %ebp,%ebp
1008
.L021tail_loop:
1009
movb (%esp,%ebp,1),%al
1010
movb (%esi,%ebp,1),%dl
1011
leal 1(%ebp),%ebp
1012
xorb %dl,%al
1013
movb %al,-1(%edi,%ebp,1)
1014
decl %ecx
1015
jnz .L021tail_loop
1016
.L017done:
1017
vzeroupper
1018
movl 512(%esp),%esp
1019
popl %edi
1020
popl %esi
1021
popl %ebx
1022
popl %ebp
1023
ret
1024
.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
1025
.comm OPENSSL_ia32cap_P,40,4
1026
1027
.section ".note.gnu.property", "a"
1028
.p2align 2
1029
.long 1f - 0f
1030
.long 4f - 1f
1031
.long 5
1032
0:
1033
.asciz "GNU"
1034
1:
1035
.p2align 2
1036
.long 0xc0000002
1037
.long 3f - 2f
1038
2:
1039
.long 3
1040
3:
1041
.p2align 2
1042
4:
1043
#else
1044
.text
1045
.globl ChaCha20_ctr32
1046
.type ChaCha20_ctr32,@function
1047
.align 16
1048
ChaCha20_ctr32:
1049
.L_ChaCha20_ctr32_begin:
1050
#ifdef __CET__
1051
1052
.byte 243,15,30,251
1053
#endif
1054
1055
pushl %ebp
1056
pushl %ebx
1057
pushl %esi
1058
pushl %edi
1059
xorl %eax,%eax
1060
cmpl 28(%esp),%eax
1061
je .L000no_data
1062
call .Lpic_point
1063
.Lpic_point:
1064
popl %eax
1065
leal OPENSSL_ia32cap_P,%ebp
1066
testl $16777216,(%ebp)
1067
jz .L001x86
1068
testl $512,4(%ebp)
1069
jz .L001x86
1070
jmp .Lssse3_shortcut
1071
.L001x86:
1072
movl 32(%esp),%esi
1073
movl 36(%esp),%edi
1074
subl $132,%esp
1075
movl (%esi),%eax
1076
movl 4(%esi),%ebx
1077
movl 8(%esi),%ecx
1078
movl 12(%esi),%edx
1079
movl %eax,80(%esp)
1080
movl %ebx,84(%esp)
1081
movl %ecx,88(%esp)
1082
movl %edx,92(%esp)
1083
movl 16(%esi),%eax
1084
movl 20(%esi),%ebx
1085
movl 24(%esi),%ecx
1086
movl 28(%esi),%edx
1087
movl %eax,96(%esp)
1088
movl %ebx,100(%esp)
1089
movl %ecx,104(%esp)
1090
movl %edx,108(%esp)
1091
movl (%edi),%eax
1092
movl 4(%edi),%ebx
1093
movl 8(%edi),%ecx
1094
movl 12(%edi),%edx
1095
subl $1,%eax
1096
movl %eax,112(%esp)
1097
movl %ebx,116(%esp)
1098
movl %ecx,120(%esp)
1099
movl %edx,124(%esp)
1100
jmp .L002entry
1101
.align 16
1102
.L003outer_loop:
1103
movl %ebx,156(%esp)
1104
movl %eax,152(%esp)
1105
movl %ecx,160(%esp)
1106
.L002entry:
1107
movl $1634760805,%eax
1108
movl $857760878,4(%esp)
1109
movl $2036477234,8(%esp)
1110
movl $1797285236,12(%esp)
1111
movl 84(%esp),%ebx
1112
movl 88(%esp),%ebp
1113
movl 104(%esp),%ecx
1114
movl 108(%esp),%esi
1115
movl 116(%esp),%edx
1116
movl 120(%esp),%edi
1117
movl %ebx,20(%esp)
1118
movl %ebp,24(%esp)
1119
movl %ecx,40(%esp)
1120
movl %esi,44(%esp)
1121
movl %edx,52(%esp)
1122
movl %edi,56(%esp)
1123
movl 92(%esp),%ebx
1124
movl 124(%esp),%edi
1125
movl 112(%esp),%edx
1126
movl 80(%esp),%ebp
1127
movl 96(%esp),%ecx
1128
movl 100(%esp),%esi
1129
addl $1,%edx
1130
movl %ebx,28(%esp)
1131
movl %edi,60(%esp)
1132
movl %edx,112(%esp)
1133
movl $10,%ebx
1134
jmp .L004loop
1135
.align 16
1136
.L004loop:
1137
addl %ebp,%eax
1138
movl %ebx,128(%esp)
1139
movl %ebp,%ebx
1140
xorl %eax,%edx
1141
roll $16,%edx
1142
addl %edx,%ecx
1143
xorl %ecx,%ebx
1144
movl 52(%esp),%edi
1145
roll $12,%ebx
1146
movl 20(%esp),%ebp
1147
addl %ebx,%eax
1148
xorl %eax,%edx
1149
movl %eax,(%esp)
1150
roll $8,%edx
1151
movl 4(%esp),%eax
1152
addl %edx,%ecx
1153
movl %edx,48(%esp)
1154
xorl %ecx,%ebx
1155
addl %ebp,%eax
1156
roll $7,%ebx
1157
xorl %eax,%edi
1158
movl %ecx,32(%esp)
1159
roll $16,%edi
1160
movl %ebx,16(%esp)
1161
addl %edi,%esi
1162
movl 40(%esp),%ecx
1163
xorl %esi,%ebp
1164
movl 56(%esp),%edx
1165
roll $12,%ebp
1166
movl 24(%esp),%ebx
1167
addl %ebp,%eax
1168
xorl %eax,%edi
1169
movl %eax,4(%esp)
1170
roll $8,%edi
1171
movl 8(%esp),%eax
1172
addl %edi,%esi
1173
movl %edi,52(%esp)
1174
xorl %esi,%ebp
1175
addl %ebx,%eax
1176
roll $7,%ebp
1177
xorl %eax,%edx
1178
movl %esi,36(%esp)
1179
roll $16,%edx
1180
movl %ebp,20(%esp)
1181
addl %edx,%ecx
1182
movl 44(%esp),%esi
1183
xorl %ecx,%ebx
1184
movl 60(%esp),%edi
1185
roll $12,%ebx
1186
movl 28(%esp),%ebp
1187
addl %ebx,%eax
1188
xorl %eax,%edx
1189
movl %eax,8(%esp)
1190
roll $8,%edx
1191
movl 12(%esp),%eax
1192
addl %edx,%ecx
1193
movl %edx,56(%esp)
1194
xorl %ecx,%ebx
1195
addl %ebp,%eax
1196
roll $7,%ebx
1197
xorl %eax,%edi
1198
roll $16,%edi
1199
movl %ebx,24(%esp)
1200
addl %edi,%esi
1201
xorl %esi,%ebp
1202
roll $12,%ebp
1203
movl 20(%esp),%ebx
1204
addl %ebp,%eax
1205
xorl %eax,%edi
1206
movl %eax,12(%esp)
1207
roll $8,%edi
1208
movl (%esp),%eax
1209
addl %edi,%esi
1210
movl %edi,%edx
1211
xorl %esi,%ebp
1212
addl %ebx,%eax
1213
roll $7,%ebp
1214
xorl %eax,%edx
1215
roll $16,%edx
1216
movl %ebp,28(%esp)
1217
addl %edx,%ecx
1218
xorl %ecx,%ebx
1219
movl 48(%esp),%edi
1220
roll $12,%ebx
1221
movl 24(%esp),%ebp
1222
addl %ebx,%eax
1223
xorl %eax,%edx
1224
movl %eax,(%esp)
1225
roll $8,%edx
1226
movl 4(%esp),%eax
1227
addl %edx,%ecx
1228
movl %edx,60(%esp)
1229
xorl %ecx,%ebx
1230
addl %ebp,%eax
1231
roll $7,%ebx
1232
xorl %eax,%edi
1233
movl %ecx,40(%esp)
1234
roll $16,%edi
1235
movl %ebx,20(%esp)
1236
addl %edi,%esi
1237
movl 32(%esp),%ecx
1238
xorl %esi,%ebp
1239
movl 52(%esp),%edx
1240
roll $12,%ebp
1241
movl 28(%esp),%ebx
1242
addl %ebp,%eax
1243
xorl %eax,%edi
1244
movl %eax,4(%esp)
1245
roll $8,%edi
1246
movl 8(%esp),%eax
1247
addl %edi,%esi
1248
movl %edi,48(%esp)
1249
xorl %esi,%ebp
1250
addl %ebx,%eax
1251
roll $7,%ebp
1252
xorl %eax,%edx
1253
movl %esi,44(%esp)
1254
roll $16,%edx
1255
movl %ebp,24(%esp)
1256
addl %edx,%ecx
1257
movl 36(%esp),%esi
1258
xorl %ecx,%ebx
1259
movl 56(%esp),%edi
1260
roll $12,%ebx
1261
movl 16(%esp),%ebp
1262
addl %ebx,%eax
1263
xorl %eax,%edx
1264
movl %eax,8(%esp)
1265
roll $8,%edx
1266
movl 12(%esp),%eax
1267
addl %edx,%ecx
1268
movl %edx,52(%esp)
1269
xorl %ecx,%ebx
1270
addl %ebp,%eax
1271
roll $7,%ebx
1272
xorl %eax,%edi
1273
roll $16,%edi
1274
movl %ebx,28(%esp)
1275
addl %edi,%esi
1276
xorl %esi,%ebp
1277
movl 48(%esp),%edx
1278
roll $12,%ebp
1279
movl 128(%esp),%ebx
1280
addl %ebp,%eax
1281
xorl %eax,%edi
1282
movl %eax,12(%esp)
1283
roll $8,%edi
1284
movl (%esp),%eax
1285
addl %edi,%esi
1286
movl %edi,56(%esp)
1287
xorl %esi,%ebp
1288
roll $7,%ebp
1289
decl %ebx
1290
jnz .L004loop
1291
movl 160(%esp),%ebx
1292
addl $1634760805,%eax
1293
addl 80(%esp),%ebp
1294
addl 96(%esp),%ecx
1295
addl 100(%esp),%esi
1296
cmpl $64,%ebx
1297
jb .L005tail
1298
movl 156(%esp),%ebx
1299
addl 112(%esp),%edx
1300
addl 120(%esp),%edi
1301
xorl (%ebx),%eax
1302
xorl 16(%ebx),%ebp
1303
movl %eax,(%esp)
1304
movl 152(%esp),%eax
1305
xorl 32(%ebx),%ecx
1306
xorl 36(%ebx),%esi
1307
xorl 48(%ebx),%edx
1308
xorl 56(%ebx),%edi
1309
movl %ebp,16(%eax)
1310
movl %ecx,32(%eax)
1311
movl %esi,36(%eax)
1312
movl %edx,48(%eax)
1313
movl %edi,56(%eax)
1314
movl 4(%esp),%ebp
1315
movl 8(%esp),%ecx
1316
movl 12(%esp),%esi
1317
movl 20(%esp),%edx
1318
movl 24(%esp),%edi
1319
addl $857760878,%ebp
1320
addl $2036477234,%ecx
1321
addl $1797285236,%esi
1322
addl 84(%esp),%edx
1323
addl 88(%esp),%edi
1324
xorl 4(%ebx),%ebp
1325
xorl 8(%ebx),%ecx
1326
xorl 12(%ebx),%esi
1327
xorl 20(%ebx),%edx
1328
xorl 24(%ebx),%edi
1329
movl %ebp,4(%eax)
1330
movl %ecx,8(%eax)
1331
movl %esi,12(%eax)
1332
movl %edx,20(%eax)
1333
movl %edi,24(%eax)
1334
movl 28(%esp),%ebp
1335
movl 40(%esp),%ecx
1336
movl 44(%esp),%esi
1337
movl 52(%esp),%edx
1338
movl 60(%esp),%edi
1339
addl 92(%esp),%ebp
1340
addl 104(%esp),%ecx
1341
addl 108(%esp),%esi
1342
addl 116(%esp),%edx
1343
addl 124(%esp),%edi
1344
xorl 28(%ebx),%ebp
1345
xorl 40(%ebx),%ecx
1346
xorl 44(%ebx),%esi
1347
xorl 52(%ebx),%edx
1348
xorl 60(%ebx),%edi
1349
leal 64(%ebx),%ebx
1350
movl %ebp,28(%eax)
1351
movl (%esp),%ebp
1352
movl %ecx,40(%eax)
1353
movl 160(%esp),%ecx
1354
movl %esi,44(%eax)
1355
movl %edx,52(%eax)
1356
movl %edi,60(%eax)
1357
movl %ebp,(%eax)
1358
leal 64(%eax),%eax
1359
subl $64,%ecx
1360
jnz .L003outer_loop
1361
jmp .L006done
1362
.L005tail:
1363
addl 112(%esp),%edx
1364
addl 120(%esp),%edi
1365
movl %eax,(%esp)
1366
movl %ebp,16(%esp)
1367
movl %ecx,32(%esp)
1368
movl %esi,36(%esp)
1369
movl %edx,48(%esp)
1370
movl %edi,56(%esp)
1371
movl 4(%esp),%ebp
1372
movl 8(%esp),%ecx
1373
movl 12(%esp),%esi
1374
movl 20(%esp),%edx
1375
movl 24(%esp),%edi
1376
addl $857760878,%ebp
1377
addl $2036477234,%ecx
1378
addl $1797285236,%esi
1379
addl 84(%esp),%edx
1380
addl 88(%esp),%edi
1381
movl %ebp,4(%esp)
1382
movl %ecx,8(%esp)
1383
movl %esi,12(%esp)
1384
movl %edx,20(%esp)
1385
movl %edi,24(%esp)
1386
movl 28(%esp),%ebp
1387
movl 40(%esp),%ecx
1388
movl 44(%esp),%esi
1389
movl 52(%esp),%edx
1390
movl 60(%esp),%edi
1391
addl 92(%esp),%ebp
1392
addl 104(%esp),%ecx
1393
addl 108(%esp),%esi
1394
addl 116(%esp),%edx
1395
addl 124(%esp),%edi
1396
movl %ebp,28(%esp)
1397
movl 156(%esp),%ebp
1398
movl %ecx,40(%esp)
1399
movl 152(%esp),%ecx
1400
movl %esi,44(%esp)
1401
xorl %esi,%esi
1402
movl %edx,52(%esp)
1403
movl %edi,60(%esp)
1404
xorl %eax,%eax
1405
xorl %edx,%edx
1406
.L007tail_loop:
1407
movb (%esi,%ebp,1),%al
1408
movb (%esp,%esi,1),%dl
1409
leal 1(%esi),%esi
1410
xorb %dl,%al
1411
movb %al,-1(%ecx,%esi,1)
1412
decl %ebx
1413
jnz .L007tail_loop
1414
.L006done:
1415
addl $132,%esp
1416
.L000no_data:
1417
popl %edi
1418
popl %esi
1419
popl %ebx
1420
popl %ebp
1421
ret
1422
.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
1423
.globl ChaCha20_ssse3
1424
.type ChaCha20_ssse3,@function
1425
.align 16
1426
ChaCha20_ssse3:
1427
.L_ChaCha20_ssse3_begin:
1428
#ifdef __CET__
1429
1430
.byte 243,15,30,251
1431
#endif
1432
1433
pushl %ebp
1434
pushl %ebx
1435
pushl %esi
1436
pushl %edi
1437
.Lssse3_shortcut:
1438
testl $2048,4(%ebp)
1439
jnz .Lxop_shortcut
1440
movl 20(%esp),%edi
1441
movl 24(%esp),%esi
1442
movl 28(%esp),%ecx
1443
movl 32(%esp),%edx
1444
movl 36(%esp),%ebx
1445
movl %esp,%ebp
1446
subl $524,%esp
1447
andl $-64,%esp
1448
movl %ebp,512(%esp)
1449
leal .Lssse3_data-.Lpic_point(%eax),%eax
1450
movdqu (%ebx),%xmm3
1451
.L0081x:
1452
movdqa 32(%eax),%xmm0
1453
movdqu (%edx),%xmm1
1454
movdqu 16(%edx),%xmm2
1455
movdqa (%eax),%xmm6
1456
movdqa 16(%eax),%xmm7
1457
movl %ebp,48(%esp)
1458
movdqa %xmm0,(%esp)
1459
movdqa %xmm1,16(%esp)
1460
movdqa %xmm2,32(%esp)
1461
movdqa %xmm3,48(%esp)
1462
movl $10,%edx
1463
jmp .L009loop1x
1464
.align 16
1465
.L010outer1x:
1466
movdqa 80(%eax),%xmm3
1467
movdqa (%esp),%xmm0
1468
movdqa 16(%esp),%xmm1
1469
movdqa 32(%esp),%xmm2
1470
paddd 48(%esp),%xmm3
1471
movl $10,%edx
1472
movdqa %xmm3,48(%esp)
1473
jmp .L009loop1x
1474
.align 16
1475
.L009loop1x:
1476
paddd %xmm1,%xmm0
1477
pxor %xmm0,%xmm3
1478
.byte 102,15,56,0,222
1479
paddd %xmm3,%xmm2
1480
pxor %xmm2,%xmm1
1481
movdqa %xmm1,%xmm4
1482
psrld $20,%xmm1
1483
pslld $12,%xmm4
1484
por %xmm4,%xmm1
1485
paddd %xmm1,%xmm0
1486
pxor %xmm0,%xmm3
1487
.byte 102,15,56,0,223
1488
paddd %xmm3,%xmm2
1489
pxor %xmm2,%xmm1
1490
movdqa %xmm1,%xmm4
1491
psrld $25,%xmm1
1492
pslld $7,%xmm4
1493
por %xmm4,%xmm1
1494
pshufd $78,%xmm2,%xmm2
1495
pshufd $57,%xmm1,%xmm1
1496
pshufd $147,%xmm3,%xmm3
1497
nop
1498
paddd %xmm1,%xmm0
1499
pxor %xmm0,%xmm3
1500
.byte 102,15,56,0,222
1501
paddd %xmm3,%xmm2
1502
pxor %xmm2,%xmm1
1503
movdqa %xmm1,%xmm4
1504
psrld $20,%xmm1
1505
pslld $12,%xmm4
1506
por %xmm4,%xmm1
1507
paddd %xmm1,%xmm0
1508
pxor %xmm0,%xmm3
1509
.byte 102,15,56,0,223
1510
paddd %xmm3,%xmm2
1511
pxor %xmm2,%xmm1
1512
movdqa %xmm1,%xmm4
1513
psrld $25,%xmm1
1514
pslld $7,%xmm4
1515
por %xmm4,%xmm1
1516
pshufd $78,%xmm2,%xmm2
1517
pshufd $147,%xmm1,%xmm1
1518
pshufd $57,%xmm3,%xmm3
1519
decl %edx
1520
jnz .L009loop1x
1521
paddd (%esp),%xmm0
1522
paddd 16(%esp),%xmm1
1523
paddd 32(%esp),%xmm2
1524
paddd 48(%esp),%xmm3
1525
cmpl $64,%ecx
1526
jb .L011tail
1527
movdqu (%esi),%xmm4
1528
movdqu 16(%esi),%xmm5
1529
pxor %xmm4,%xmm0
1530
movdqu 32(%esi),%xmm4
1531
pxor %xmm5,%xmm1
1532
movdqu 48(%esi),%xmm5
1533
pxor %xmm4,%xmm2
1534
pxor %xmm5,%xmm3
1535
leal 64(%esi),%esi
1536
movdqu %xmm0,(%edi)
1537
movdqu %xmm1,16(%edi)
1538
movdqu %xmm2,32(%edi)
1539
movdqu %xmm3,48(%edi)
1540
leal 64(%edi),%edi
1541
subl $64,%ecx
1542
jnz .L010outer1x
1543
jmp .L012done
1544
.L011tail:
1545
movdqa %xmm0,(%esp)
1546
movdqa %xmm1,16(%esp)
1547
movdqa %xmm2,32(%esp)
1548
movdqa %xmm3,48(%esp)
1549
xorl %eax,%eax
1550
xorl %edx,%edx
1551
xorl %ebp,%ebp
1552
.L013tail_loop:
1553
movb (%esp,%ebp,1),%al
1554
movb (%esi,%ebp,1),%dl
1555
leal 1(%ebp),%ebp
1556
xorb %dl,%al
1557
movb %al,-1(%edi,%ebp,1)
1558
decl %ecx
1559
jnz .L013tail_loop
1560
.L012done:
1561
movl 512(%esp),%esp
1562
popl %edi
1563
popl %esi
1564
popl %ebx
1565
popl %ebp
1566
ret
1567
.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
1568
.align 64
1569
.Lssse3_data:
1570
.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
1571
.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
1572
.long 1634760805,857760878,2036477234,1797285236
1573
.long 0,1,2,3
1574
.long 4,4,4,4
1575
.long 1,0,0,0
1576
.long 4,0,0,0
1577
.long 0,-1,-1,-1
1578
.align 64
1579
.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
1580
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1581
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1582
.byte 114,103,62,0
1583
.globl ChaCha20_xop
1584
.type ChaCha20_xop,@function
1585
.align 16
1586
ChaCha20_xop:
1587
.L_ChaCha20_xop_begin:
1588
#ifdef __CET__
1589
1590
.byte 243,15,30,251
1591
#endif
1592
1593
pushl %ebp
1594
pushl %ebx
1595
pushl %esi
1596
pushl %edi
1597
.Lxop_shortcut:
1598
movl 20(%esp),%edi
1599
movl 24(%esp),%esi
1600
movl 28(%esp),%ecx
1601
movl 32(%esp),%edx
1602
movl 36(%esp),%ebx
1603
vzeroupper
1604
movl %esp,%ebp
1605
subl $524,%esp
1606
andl $-64,%esp
1607
movl %ebp,512(%esp)
1608
leal .Lssse3_data-.Lpic_point(%eax),%eax
1609
vmovdqu (%ebx),%xmm3
1610
cmpl $256,%ecx
1611
jb .L0141x
1612
movl %edx,516(%esp)
1613
movl %ebx,520(%esp)
1614
subl $256,%ecx
1615
leal 384(%esp),%ebp
1616
vmovdqu (%edx),%xmm7
1617
vpshufd $0,%xmm3,%xmm0
1618
vpshufd $85,%xmm3,%xmm1
1619
vpshufd $170,%xmm3,%xmm2
1620
vpshufd $255,%xmm3,%xmm3
1621
vpaddd 48(%eax),%xmm0,%xmm0
1622
vpshufd $0,%xmm7,%xmm4
1623
vpshufd $85,%xmm7,%xmm5
1624
vpsubd 64(%eax),%xmm0,%xmm0
1625
vpshufd $170,%xmm7,%xmm6
1626
vpshufd $255,%xmm7,%xmm7
1627
vmovdqa %xmm0,64(%ebp)
1628
vmovdqa %xmm1,80(%ebp)
1629
vmovdqa %xmm2,96(%ebp)
1630
vmovdqa %xmm3,112(%ebp)
1631
vmovdqu 16(%edx),%xmm3
1632
vmovdqa %xmm4,-64(%ebp)
1633
vmovdqa %xmm5,-48(%ebp)
1634
vmovdqa %xmm6,-32(%ebp)
1635
vmovdqa %xmm7,-16(%ebp)
1636
vmovdqa 32(%eax),%xmm7
1637
leal 128(%esp),%ebx
1638
vpshufd $0,%xmm3,%xmm0
1639
vpshufd $85,%xmm3,%xmm1
1640
vpshufd $170,%xmm3,%xmm2
1641
vpshufd $255,%xmm3,%xmm3
1642
vpshufd $0,%xmm7,%xmm4
1643
vpshufd $85,%xmm7,%xmm5
1644
vpshufd $170,%xmm7,%xmm6
1645
vpshufd $255,%xmm7,%xmm7
1646
vmovdqa %xmm0,(%ebp)
1647
vmovdqa %xmm1,16(%ebp)
1648
vmovdqa %xmm2,32(%ebp)
1649
vmovdqa %xmm3,48(%ebp)
1650
vmovdqa %xmm4,-128(%ebp)
1651
vmovdqa %xmm5,-112(%ebp)
1652
vmovdqa %xmm6,-96(%ebp)
1653
vmovdqa %xmm7,-80(%ebp)
1654
leal 128(%esi),%esi
1655
leal 128(%edi),%edi
1656
jmp .L015outer_loop
1657
.align 32
1658
.L015outer_loop:
1659
vmovdqa -112(%ebp),%xmm1
1660
vmovdqa -96(%ebp),%xmm2
1661
vmovdqa -80(%ebp),%xmm3
1662
vmovdqa -48(%ebp),%xmm5
1663
vmovdqa -32(%ebp),%xmm6
1664
vmovdqa -16(%ebp),%xmm7
1665
vmovdqa %xmm1,-112(%ebx)
1666
vmovdqa %xmm2,-96(%ebx)
1667
vmovdqa %xmm3,-80(%ebx)
1668
vmovdqa %xmm5,-48(%ebx)
1669
vmovdqa %xmm6,-32(%ebx)
1670
vmovdqa %xmm7,-16(%ebx)
1671
vmovdqa 32(%ebp),%xmm2
1672
vmovdqa 48(%ebp),%xmm3
1673
vmovdqa 64(%ebp),%xmm4
1674
vmovdqa 80(%ebp),%xmm5
1675
vmovdqa 96(%ebp),%xmm6
1676
vmovdqa 112(%ebp),%xmm7
1677
vpaddd 64(%eax),%xmm4,%xmm4
1678
vmovdqa %xmm2,32(%ebx)
1679
vmovdqa %xmm3,48(%ebx)
1680
vmovdqa %xmm4,64(%ebx)
1681
vmovdqa %xmm5,80(%ebx)
1682
vmovdqa %xmm6,96(%ebx)
1683
vmovdqa %xmm7,112(%ebx)
1684
vmovdqa %xmm4,64(%ebp)
1685
vmovdqa -128(%ebp),%xmm0
1686
vmovdqa %xmm4,%xmm6
1687
vmovdqa -64(%ebp),%xmm3
1688
vmovdqa (%ebp),%xmm4
1689
vmovdqa 16(%ebp),%xmm5
1690
movl $10,%edx
1691
nop
1692
.align 32
1693
.L016loop:
1694
vpaddd %xmm3,%xmm0,%xmm0
1695
vpxor %xmm0,%xmm6,%xmm6
1696
.byte 143,232,120,194,246,16
1697
vpaddd %xmm6,%xmm4,%xmm4
1698
vpxor %xmm4,%xmm3,%xmm2
1699
vmovdqa -112(%ebx),%xmm1
1700
.byte 143,232,120,194,210,12
1701
vmovdqa -48(%ebx),%xmm3
1702
vpaddd %xmm2,%xmm0,%xmm0
1703
vmovdqa 80(%ebx),%xmm7
1704
vpxor %xmm0,%xmm6,%xmm6
1705
vpaddd %xmm3,%xmm1,%xmm1
1706
.byte 143,232,120,194,246,8
1707
vmovdqa %xmm0,-128(%ebx)
1708
vpaddd %xmm6,%xmm4,%xmm4
1709
vmovdqa %xmm6,64(%ebx)
1710
vpxor %xmm4,%xmm2,%xmm2
1711
vpxor %xmm1,%xmm7,%xmm7
1712
.byte 143,232,120,194,210,7
1713
vmovdqa %xmm4,(%ebx)
1714
.byte 143,232,120,194,255,16
1715
vmovdqa %xmm2,-64(%ebx)
1716
vpaddd %xmm7,%xmm5,%xmm5
1717
vmovdqa 32(%ebx),%xmm4
1718
vpxor %xmm5,%xmm3,%xmm3
1719
vmovdqa -96(%ebx),%xmm0
1720
.byte 143,232,120,194,219,12
1721
vmovdqa -32(%ebx),%xmm2
1722
vpaddd %xmm3,%xmm1,%xmm1
1723
vmovdqa 96(%ebx),%xmm6
1724
vpxor %xmm1,%xmm7,%xmm7
1725
vpaddd %xmm2,%xmm0,%xmm0
1726
.byte 143,232,120,194,255,8
1727
vmovdqa %xmm1,-112(%ebx)
1728
vpaddd %xmm7,%xmm5,%xmm5
1729
vmovdqa %xmm7,80(%ebx)
1730
vpxor %xmm5,%xmm3,%xmm3
1731
vpxor %xmm0,%xmm6,%xmm6
1732
.byte 143,232,120,194,219,7
1733
vmovdqa %xmm5,16(%ebx)
1734
.byte 143,232,120,194,246,16
1735
vmovdqa %xmm3,-48(%ebx)
1736
vpaddd %xmm6,%xmm4,%xmm4
1737
vmovdqa 48(%ebx),%xmm5
1738
vpxor %xmm4,%xmm2,%xmm2
1739
vmovdqa -80(%ebx),%xmm1
1740
.byte 143,232,120,194,210,12
1741
vmovdqa -16(%ebx),%xmm3
1742
vpaddd %xmm2,%xmm0,%xmm0
1743
vmovdqa 112(%ebx),%xmm7
1744
vpxor %xmm0,%xmm6,%xmm6
1745
vpaddd %xmm3,%xmm1,%xmm1
1746
.byte 143,232,120,194,246,8
1747
vmovdqa %xmm0,-96(%ebx)
1748
vpaddd %xmm6,%xmm4,%xmm4
1749
vmovdqa %xmm6,96(%ebx)
1750
vpxor %xmm4,%xmm2,%xmm2
1751
vpxor %xmm1,%xmm7,%xmm7
1752
.byte 143,232,120,194,210,7
1753
.byte 143,232,120,194,255,16
1754
vmovdqa %xmm2,-32(%ebx)
1755
vpaddd %xmm7,%xmm5,%xmm5
1756
vpxor %xmm5,%xmm3,%xmm3
1757
vmovdqa -128(%ebx),%xmm0
1758
.byte 143,232,120,194,219,12
1759
vmovdqa -48(%ebx),%xmm2
1760
vpaddd %xmm3,%xmm1,%xmm1
1761
vpxor %xmm1,%xmm7,%xmm7
1762
vpaddd %xmm2,%xmm0,%xmm0
1763
.byte 143,232,120,194,255,8
1764
vmovdqa %xmm1,-80(%ebx)
1765
vpaddd %xmm7,%xmm5,%xmm5
1766
vpxor %xmm5,%xmm3,%xmm3
1767
vpxor %xmm0,%xmm7,%xmm6
1768
.byte 143,232,120,194,219,7
1769
.byte 143,232,120,194,246,16
1770
vmovdqa %xmm3,-16(%ebx)
1771
vpaddd %xmm6,%xmm4,%xmm4
1772
vpxor %xmm4,%xmm2,%xmm2
1773
vmovdqa -112(%ebx),%xmm1
1774
.byte 143,232,120,194,210,12
1775
vmovdqa -32(%ebx),%xmm3
1776
vpaddd %xmm2,%xmm0,%xmm0
1777
vmovdqa 64(%ebx),%xmm7
1778
vpxor %xmm0,%xmm6,%xmm6
1779
vpaddd %xmm3,%xmm1,%xmm1
1780
.byte 143,232,120,194,246,8
1781
vmovdqa %xmm0,-128(%ebx)
1782
vpaddd %xmm6,%xmm4,%xmm4
1783
vmovdqa %xmm6,112(%ebx)
1784
vpxor %xmm4,%xmm2,%xmm2
1785
vpxor %xmm1,%xmm7,%xmm7
1786
.byte 143,232,120,194,210,7
1787
vmovdqa %xmm4,32(%ebx)
1788
.byte 143,232,120,194,255,16
1789
vmovdqa %xmm2,-48(%ebx)
1790
vpaddd %xmm7,%xmm5,%xmm5
1791
vmovdqa (%ebx),%xmm4
1792
vpxor %xmm5,%xmm3,%xmm3
1793
vmovdqa -96(%ebx),%xmm0
1794
.byte 143,232,120,194,219,12
1795
vmovdqa -16(%ebx),%xmm2
1796
vpaddd %xmm3,%xmm1,%xmm1
1797
vmovdqa 80(%ebx),%xmm6
1798
vpxor %xmm1,%xmm7,%xmm7
1799
vpaddd %xmm2,%xmm0,%xmm0
1800
.byte 143,232,120,194,255,8
1801
vmovdqa %xmm1,-112(%ebx)
1802
vpaddd %xmm7,%xmm5,%xmm5
1803
vmovdqa %xmm7,64(%ebx)
1804
vpxor %xmm5,%xmm3,%xmm3
1805
vpxor %xmm0,%xmm6,%xmm6
1806
.byte 143,232,120,194,219,7
1807
vmovdqa %xmm5,48(%ebx)
1808
.byte 143,232,120,194,246,16
1809
vmovdqa %xmm3,-32(%ebx)
1810
vpaddd %xmm6,%xmm4,%xmm4
1811
vmovdqa 16(%ebx),%xmm5
1812
vpxor %xmm4,%xmm2,%xmm2
1813
vmovdqa -80(%ebx),%xmm1
1814
.byte 143,232,120,194,210,12
1815
vmovdqa -64(%ebx),%xmm3
1816
vpaddd %xmm2,%xmm0,%xmm0
1817
vmovdqa 96(%ebx),%xmm7
1818
vpxor %xmm0,%xmm6,%xmm6
1819
vpaddd %xmm3,%xmm1,%xmm1
1820
.byte 143,232,120,194,246,8
1821
vmovdqa %xmm0,-96(%ebx)
1822
vpaddd %xmm6,%xmm4,%xmm4
1823
vmovdqa %xmm6,80(%ebx)
1824
vpxor %xmm4,%xmm2,%xmm2
1825
vpxor %xmm1,%xmm7,%xmm7
1826
.byte 143,232,120,194,210,7
1827
.byte 143,232,120,194,255,16
1828
vmovdqa %xmm2,-16(%ebx)
1829
vpaddd %xmm7,%xmm5,%xmm5
1830
vpxor %xmm5,%xmm3,%xmm3
1831
vmovdqa -128(%ebx),%xmm0
1832
.byte 143,232,120,194,219,12
1833
vpaddd %xmm3,%xmm1,%xmm1
1834
vmovdqa 64(%ebx),%xmm6
1835
vpxor %xmm1,%xmm7,%xmm7
1836
.byte 143,232,120,194,255,8
1837
vmovdqa %xmm1,-80(%ebx)
1838
vpaddd %xmm7,%xmm5,%xmm5
1839
vmovdqa %xmm7,96(%ebx)
1840
vpxor %xmm5,%xmm3,%xmm3
1841
.byte 143,232,120,194,219,7
1842
decl %edx
1843
jnz .L016loop
1844
vmovdqa %xmm3,-64(%ebx)
1845
vmovdqa %xmm4,(%ebx)
1846
vmovdqa %xmm5,16(%ebx)
1847
vmovdqa %xmm6,64(%ebx)
1848
vmovdqa %xmm7,96(%ebx)
1849
vmovdqa -112(%ebx),%xmm1
1850
vmovdqa -96(%ebx),%xmm2
1851
vmovdqa -80(%ebx),%xmm3
1852
vpaddd -128(%ebp),%xmm0,%xmm0
1853
vpaddd -112(%ebp),%xmm1,%xmm1
1854
vpaddd -96(%ebp),%xmm2,%xmm2
1855
vpaddd -80(%ebp),%xmm3,%xmm3
1856
vpunpckldq %xmm1,%xmm0,%xmm6
1857
vpunpckldq %xmm3,%xmm2,%xmm7
1858
vpunpckhdq %xmm1,%xmm0,%xmm0
1859
vpunpckhdq %xmm3,%xmm2,%xmm2
1860
vpunpcklqdq %xmm7,%xmm6,%xmm1
1861
vpunpckhqdq %xmm7,%xmm6,%xmm6
1862
vpunpcklqdq %xmm2,%xmm0,%xmm7
1863
vpunpckhqdq %xmm2,%xmm0,%xmm3
1864
vpxor -128(%esi),%xmm1,%xmm4
1865
vpxor -64(%esi),%xmm6,%xmm5
1866
vpxor (%esi),%xmm7,%xmm6
1867
vpxor 64(%esi),%xmm3,%xmm7
1868
leal 16(%esi),%esi
1869
vmovdqa -64(%ebx),%xmm0
1870
vmovdqa -48(%ebx),%xmm1
1871
vmovdqa -32(%ebx),%xmm2
1872
vmovdqa -16(%ebx),%xmm3
1873
vmovdqu %xmm4,-128(%edi)
1874
vmovdqu %xmm5,-64(%edi)
1875
vmovdqu %xmm6,(%edi)
1876
vmovdqu %xmm7,64(%edi)
1877
leal 16(%edi),%edi
1878
vpaddd -64(%ebp),%xmm0,%xmm0
1879
vpaddd -48(%ebp),%xmm1,%xmm1
1880
vpaddd -32(%ebp),%xmm2,%xmm2
1881
vpaddd -16(%ebp),%xmm3,%xmm3
1882
vpunpckldq %xmm1,%xmm0,%xmm6
1883
vpunpckldq %xmm3,%xmm2,%xmm7
1884
vpunpckhdq %xmm1,%xmm0,%xmm0
1885
vpunpckhdq %xmm3,%xmm2,%xmm2
1886
vpunpcklqdq %xmm7,%xmm6,%xmm1
1887
vpunpckhqdq %xmm7,%xmm6,%xmm6
1888
vpunpcklqdq %xmm2,%xmm0,%xmm7
1889
vpunpckhqdq %xmm2,%xmm0,%xmm3
1890
vpxor -128(%esi),%xmm1,%xmm4
1891
vpxor -64(%esi),%xmm6,%xmm5
1892
vpxor (%esi),%xmm7,%xmm6
1893
vpxor 64(%esi),%xmm3,%xmm7
1894
leal 16(%esi),%esi
1895
vmovdqa (%ebx),%xmm0
1896
vmovdqa 16(%ebx),%xmm1
1897
vmovdqa 32(%ebx),%xmm2
1898
vmovdqa 48(%ebx),%xmm3
1899
vmovdqu %xmm4,-128(%edi)
1900
vmovdqu %xmm5,-64(%edi)
1901
vmovdqu %xmm6,(%edi)
1902
vmovdqu %xmm7,64(%edi)
1903
leal 16(%edi),%edi
1904
vpaddd (%ebp),%xmm0,%xmm0
1905
vpaddd 16(%ebp),%xmm1,%xmm1
1906
vpaddd 32(%ebp),%xmm2,%xmm2
1907
vpaddd 48(%ebp),%xmm3,%xmm3
1908
vpunpckldq %xmm1,%xmm0,%xmm6
1909
vpunpckldq %xmm3,%xmm2,%xmm7
1910
vpunpckhdq %xmm1,%xmm0,%xmm0
1911
vpunpckhdq %xmm3,%xmm2,%xmm2
1912
vpunpcklqdq %xmm7,%xmm6,%xmm1
1913
vpunpckhqdq %xmm7,%xmm6,%xmm6
1914
vpunpcklqdq %xmm2,%xmm0,%xmm7
1915
vpunpckhqdq %xmm2,%xmm0,%xmm3
1916
vpxor -128(%esi),%xmm1,%xmm4
1917
vpxor -64(%esi),%xmm6,%xmm5
1918
vpxor (%esi),%xmm7,%xmm6
1919
vpxor 64(%esi),%xmm3,%xmm7
1920
leal 16(%esi),%esi
1921
vmovdqa 64(%ebx),%xmm0
1922
vmovdqa 80(%ebx),%xmm1
1923
vmovdqa 96(%ebx),%xmm2
1924
vmovdqa 112(%ebx),%xmm3
1925
vmovdqu %xmm4,-128(%edi)
1926
vmovdqu %xmm5,-64(%edi)
1927
vmovdqu %xmm6,(%edi)
1928
vmovdqu %xmm7,64(%edi)
1929
leal 16(%edi),%edi
1930
vpaddd 64(%ebp),%xmm0,%xmm0
1931
vpaddd 80(%ebp),%xmm1,%xmm1
1932
vpaddd 96(%ebp),%xmm2,%xmm2
1933
vpaddd 112(%ebp),%xmm3,%xmm3
1934
vpunpckldq %xmm1,%xmm0,%xmm6
1935
vpunpckldq %xmm3,%xmm2,%xmm7
1936
vpunpckhdq %xmm1,%xmm0,%xmm0
1937
vpunpckhdq %xmm3,%xmm2,%xmm2
1938
vpunpcklqdq %xmm7,%xmm6,%xmm1
1939
vpunpckhqdq %xmm7,%xmm6,%xmm6
1940
vpunpcklqdq %xmm2,%xmm0,%xmm7
1941
vpunpckhqdq %xmm2,%xmm0,%xmm3
1942
vpxor -128(%esi),%xmm1,%xmm4
1943
vpxor -64(%esi),%xmm6,%xmm5
1944
vpxor (%esi),%xmm7,%xmm6
1945
vpxor 64(%esi),%xmm3,%xmm7
1946
leal 208(%esi),%esi
1947
vmovdqu %xmm4,-128(%edi)
1948
vmovdqu %xmm5,-64(%edi)
1949
vmovdqu %xmm6,(%edi)
1950
vmovdqu %xmm7,64(%edi)
1951
leal 208(%edi),%edi
1952
subl $256,%ecx
1953
jnc .L015outer_loop
1954
addl $256,%ecx
1955
jz .L017done
1956
movl 520(%esp),%ebx
1957
leal -128(%esi),%esi
1958
movl 516(%esp),%edx
1959
leal -128(%edi),%edi
1960
vmovd 64(%ebp),%xmm2
1961
vmovdqu (%ebx),%xmm3
1962
vpaddd 96(%eax),%xmm2,%xmm2
1963
vpand 112(%eax),%xmm3,%xmm3
1964
vpor %xmm2,%xmm3,%xmm3
1965
.L0141x:
1966
vmovdqa 32(%eax),%xmm0
1967
vmovdqu (%edx),%xmm1
1968
vmovdqu 16(%edx),%xmm2
1969
vmovdqa (%eax),%xmm6
1970
vmovdqa 16(%eax),%xmm7
1971
movl %ebp,48(%esp)
1972
vmovdqa %xmm0,(%esp)
1973
vmovdqa %xmm1,16(%esp)
1974
vmovdqa %xmm2,32(%esp)
1975
vmovdqa %xmm3,48(%esp)
1976
movl $10,%edx
1977
jmp .L018loop1x
1978
.align 16
1979
.L019outer1x:
1980
vmovdqa 80(%eax),%xmm3
1981
vmovdqa (%esp),%xmm0
1982
vmovdqa 16(%esp),%xmm1
1983
vmovdqa 32(%esp),%xmm2
1984
vpaddd 48(%esp),%xmm3,%xmm3
1985
movl $10,%edx
1986
vmovdqa %xmm3,48(%esp)
1987
jmp .L018loop1x
1988
.align 16
1989
.L018loop1x:
1990
vpaddd %xmm1,%xmm0,%xmm0
1991
vpxor %xmm0,%xmm3,%xmm3
1992
.byte 143,232,120,194,219,16
1993
vpaddd %xmm3,%xmm2,%xmm2
1994
vpxor %xmm2,%xmm1,%xmm1
1995
.byte 143,232,120,194,201,12
1996
vpaddd %xmm1,%xmm0,%xmm0
1997
vpxor %xmm0,%xmm3,%xmm3
1998
.byte 143,232,120,194,219,8
1999
vpaddd %xmm3,%xmm2,%xmm2
2000
vpxor %xmm2,%xmm1,%xmm1
2001
.byte 143,232,120,194,201,7
2002
vpshufd $78,%xmm2,%xmm2
2003
vpshufd $57,%xmm1,%xmm1
2004
vpshufd $147,%xmm3,%xmm3
2005
vpaddd %xmm1,%xmm0,%xmm0
2006
vpxor %xmm0,%xmm3,%xmm3
2007
.byte 143,232,120,194,219,16
2008
vpaddd %xmm3,%xmm2,%xmm2
2009
vpxor %xmm2,%xmm1,%xmm1
2010
.byte 143,232,120,194,201,12
2011
vpaddd %xmm1,%xmm0,%xmm0
2012
vpxor %xmm0,%xmm3,%xmm3
2013
.byte 143,232,120,194,219,8
2014
vpaddd %xmm3,%xmm2,%xmm2
2015
vpxor %xmm2,%xmm1,%xmm1
2016
.byte 143,232,120,194,201,7
2017
vpshufd $78,%xmm2,%xmm2
2018
vpshufd $147,%xmm1,%xmm1
2019
vpshufd $57,%xmm3,%xmm3
2020
decl %edx
2021
jnz .L018loop1x
2022
vpaddd (%esp),%xmm0,%xmm0
2023
vpaddd 16(%esp),%xmm1,%xmm1
2024
vpaddd 32(%esp),%xmm2,%xmm2
2025
vpaddd 48(%esp),%xmm3,%xmm3
2026
cmpl $64,%ecx
2027
jb .L020tail
2028
vpxor (%esi),%xmm0,%xmm0
2029
vpxor 16(%esi),%xmm1,%xmm1
2030
vpxor 32(%esi),%xmm2,%xmm2
2031
vpxor 48(%esi),%xmm3,%xmm3
2032
leal 64(%esi),%esi
2033
vmovdqu %xmm0,(%edi)
2034
vmovdqu %xmm1,16(%edi)
2035
vmovdqu %xmm2,32(%edi)
2036
vmovdqu %xmm3,48(%edi)
2037
leal 64(%edi),%edi
2038
subl $64,%ecx
2039
jnz .L019outer1x
2040
jmp .L017done
2041
.L020tail:
2042
vmovdqa %xmm0,(%esp)
2043
vmovdqa %xmm1,16(%esp)
2044
vmovdqa %xmm2,32(%esp)
2045
vmovdqa %xmm3,48(%esp)
2046
xorl %eax,%eax
2047
xorl %edx,%edx
2048
xorl %ebp,%ebp
2049
.L021tail_loop:
2050
movb (%esp,%ebp,1),%al
2051
movb (%esi,%ebp,1),%dl
2052
leal 1(%ebp),%ebp
2053
xorb %dl,%al
2054
movb %al,-1(%edi,%ebp,1)
2055
decl %ecx
2056
jnz .L021tail_loop
2057
.L017done:
2058
vzeroupper
2059
movl 512(%esp),%esp
2060
popl %edi
2061
popl %esi
2062
popl %ebx
2063
popl %ebp
2064
ret
2065
.size ChaCha20_xop,.-.L_ChaCha20_xop_begin
2066
.comm OPENSSL_ia32cap_P,40,4
2067
2068
.section ".note.gnu.property", "a"
2069
.p2align 2
2070
.long 1f - 0f
2071
.long 4f - 1f
2072
.long 5
2073
0:
2074
.asciz "GNU"
2075
1:
2076
.p2align 2
2077
.long 0xc0000002
2078
.long 3f - 2f
2079
2:
2080
.long 3
2081
3:
2082
.p2align 2
2083
4:
2084
#endif
2085
2086