Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/i386/poly1305-x86.S
39483 views
1
/* Do not modify. This file is auto-generated from poly1305-x86.pl. */
2
#ifdef PIC
3
.text
4
.align 64
5
.globl poly1305_init
6
.type poly1305_init,@function
7
.align 16
8
poly1305_init:
9
.L_poly1305_init_begin:
10
#ifdef __CET__
11
12
.byte 243,15,30,251
13
#endif
14
15
pushl %ebp
16
pushl %ebx
17
pushl %esi
18
pushl %edi
19
movl 20(%esp),%edi
20
movl 24(%esp),%esi
21
movl 28(%esp),%ebp
22
xorl %eax,%eax
23
movl %eax,(%edi)
24
movl %eax,4(%edi)
25
movl %eax,8(%edi)
26
movl %eax,12(%edi)
27
movl %eax,16(%edi)
28
movl %eax,20(%edi)
29
cmpl $0,%esi
30
je .L000nokey
31
call .L001pic_point
32
.L001pic_point:
33
popl %ebx
34
leal poly1305_blocks-.L001pic_point(%ebx),%eax
35
leal poly1305_emit-.L001pic_point(%ebx),%edx
36
leal OPENSSL_ia32cap_P-.L001pic_point(%ebx),%edi
37
movl (%edi),%ecx
38
andl $83886080,%ecx
39
cmpl $83886080,%ecx
40
jne .L002no_sse2
41
leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
42
leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx
43
movl 8(%edi),%ecx
44
testl $32,%ecx
45
jz .L002no_sse2
46
leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
47
.L002no_sse2:
48
movl 20(%esp),%edi
49
movl %eax,(%ebp)
50
movl %edx,4(%ebp)
51
movl (%esi),%eax
52
movl 4(%esi),%ebx
53
movl 8(%esi),%ecx
54
movl 12(%esi),%edx
55
andl $268435455,%eax
56
andl $268435452,%ebx
57
andl $268435452,%ecx
58
andl $268435452,%edx
59
movl %eax,24(%edi)
60
movl %ebx,28(%edi)
61
movl %ecx,32(%edi)
62
movl %edx,36(%edi)
63
movl $1,%eax
64
.L000nokey:
65
popl %edi
66
popl %esi
67
popl %ebx
68
popl %ebp
69
ret
70
.size poly1305_init,.-.L_poly1305_init_begin
71
.globl poly1305_blocks
72
.type poly1305_blocks,@function
73
.align 16
74
poly1305_blocks:
75
.L_poly1305_blocks_begin:
76
#ifdef __CET__
77
78
.byte 243,15,30,251
79
#endif
80
81
pushl %ebp
82
pushl %ebx
83
pushl %esi
84
pushl %edi
85
movl 20(%esp),%edi
86
movl 24(%esp),%esi
87
movl 28(%esp),%ecx
88
.Lenter_blocks:
89
andl $-15,%ecx
90
jz .L003nodata
91
subl $64,%esp
92
movl 24(%edi),%eax
93
movl 28(%edi),%ebx
94
leal (%esi,%ecx,1),%ebp
95
movl 32(%edi),%ecx
96
movl 36(%edi),%edx
97
movl %ebp,92(%esp)
98
movl %esi,%ebp
99
movl %eax,36(%esp)
100
movl %ebx,%eax
101
shrl $2,%eax
102
movl %ebx,40(%esp)
103
addl %ebx,%eax
104
movl %ecx,%ebx
105
shrl $2,%ebx
106
movl %ecx,44(%esp)
107
addl %ecx,%ebx
108
movl %edx,%ecx
109
shrl $2,%ecx
110
movl %edx,48(%esp)
111
addl %edx,%ecx
112
movl %eax,52(%esp)
113
movl %ebx,56(%esp)
114
movl %ecx,60(%esp)
115
movl (%edi),%eax
116
movl 4(%edi),%ebx
117
movl 8(%edi),%ecx
118
movl 12(%edi),%esi
119
movl 16(%edi),%edi
120
jmp .L004loop
121
.align 32
122
.L004loop:
123
addl (%ebp),%eax
124
adcl 4(%ebp),%ebx
125
adcl 8(%ebp),%ecx
126
adcl 12(%ebp),%esi
127
leal 16(%ebp),%ebp
128
adcl 96(%esp),%edi
129
movl %eax,(%esp)
130
movl %esi,12(%esp)
131
mull 36(%esp)
132
movl %edi,16(%esp)
133
movl %eax,%edi
134
movl %ebx,%eax
135
movl %edx,%esi
136
mull 60(%esp)
137
addl %eax,%edi
138
movl %ecx,%eax
139
adcl %edx,%esi
140
mull 56(%esp)
141
addl %eax,%edi
142
movl 12(%esp),%eax
143
adcl %edx,%esi
144
mull 52(%esp)
145
addl %eax,%edi
146
movl (%esp),%eax
147
adcl %edx,%esi
148
mull 40(%esp)
149
movl %edi,20(%esp)
150
xorl %edi,%edi
151
addl %eax,%esi
152
movl %ebx,%eax
153
adcl %edx,%edi
154
mull 36(%esp)
155
addl %eax,%esi
156
movl %ecx,%eax
157
adcl %edx,%edi
158
mull 60(%esp)
159
addl %eax,%esi
160
movl 12(%esp),%eax
161
adcl %edx,%edi
162
mull 56(%esp)
163
addl %eax,%esi
164
movl 16(%esp),%eax
165
adcl %edx,%edi
166
imull 52(%esp),%eax
167
addl %eax,%esi
168
movl (%esp),%eax
169
adcl $0,%edi
170
mull 44(%esp)
171
movl %esi,24(%esp)
172
xorl %esi,%esi
173
addl %eax,%edi
174
movl %ebx,%eax
175
adcl %edx,%esi
176
mull 40(%esp)
177
addl %eax,%edi
178
movl %ecx,%eax
179
adcl %edx,%esi
180
mull 36(%esp)
181
addl %eax,%edi
182
movl 12(%esp),%eax
183
adcl %edx,%esi
184
mull 60(%esp)
185
addl %eax,%edi
186
movl 16(%esp),%eax
187
adcl %edx,%esi
188
imull 56(%esp),%eax
189
addl %eax,%edi
190
movl (%esp),%eax
191
adcl $0,%esi
192
mull 48(%esp)
193
movl %edi,28(%esp)
194
xorl %edi,%edi
195
addl %eax,%esi
196
movl %ebx,%eax
197
adcl %edx,%edi
198
mull 44(%esp)
199
addl %eax,%esi
200
movl %ecx,%eax
201
adcl %edx,%edi
202
mull 40(%esp)
203
addl %eax,%esi
204
movl 12(%esp),%eax
205
adcl %edx,%edi
206
mull 36(%esp)
207
addl %eax,%esi
208
movl 16(%esp),%ecx
209
adcl %edx,%edi
210
movl %ecx,%edx
211
imull 60(%esp),%ecx
212
addl %ecx,%esi
213
movl 20(%esp),%eax
214
adcl $0,%edi
215
imull 36(%esp),%edx
216
addl %edi,%edx
217
movl 24(%esp),%ebx
218
movl 28(%esp),%ecx
219
movl %edx,%edi
220
shrl $2,%edx
221
andl $3,%edi
222
leal (%edx,%edx,4),%edx
223
addl %edx,%eax
224
adcl $0,%ebx
225
adcl $0,%ecx
226
adcl $0,%esi
227
adcl $0,%edi
228
cmpl 92(%esp),%ebp
229
jne .L004loop
230
movl 84(%esp),%edx
231
addl $64,%esp
232
movl %eax,(%edx)
233
movl %ebx,4(%edx)
234
movl %ecx,8(%edx)
235
movl %esi,12(%edx)
236
movl %edi,16(%edx)
237
.L003nodata:
238
popl %edi
239
popl %esi
240
popl %ebx
241
popl %ebp
242
ret
243
.size poly1305_blocks,.-.L_poly1305_blocks_begin
244
.globl poly1305_emit
245
.type poly1305_emit,@function
246
.align 16
247
poly1305_emit:
248
.L_poly1305_emit_begin:
249
#ifdef __CET__
250
251
.byte 243,15,30,251
252
#endif
253
254
pushl %ebp
255
pushl %ebx
256
pushl %esi
257
pushl %edi
258
movl 20(%esp),%ebp
259
.Lenter_emit:
260
movl 24(%esp),%edi
261
movl (%ebp),%eax
262
movl 4(%ebp),%ebx
263
movl 8(%ebp),%ecx
264
movl 12(%ebp),%edx
265
movl 16(%ebp),%esi
266
addl $5,%eax
267
adcl $0,%ebx
268
adcl $0,%ecx
269
adcl $0,%edx
270
adcl $0,%esi
271
shrl $2,%esi
272
negl %esi
273
andl %esi,%eax
274
andl %esi,%ebx
275
andl %esi,%ecx
276
andl %esi,%edx
277
movl %eax,(%edi)
278
movl %ebx,4(%edi)
279
movl %ecx,8(%edi)
280
movl %edx,12(%edi)
281
notl %esi
282
movl (%ebp),%eax
283
movl 4(%ebp),%ebx
284
movl 8(%ebp),%ecx
285
movl 12(%ebp),%edx
286
movl 28(%esp),%ebp
287
andl %esi,%eax
288
andl %esi,%ebx
289
andl %esi,%ecx
290
andl %esi,%edx
291
orl (%edi),%eax
292
orl 4(%edi),%ebx
293
orl 8(%edi),%ecx
294
orl 12(%edi),%edx
295
addl (%ebp),%eax
296
adcl 4(%ebp),%ebx
297
adcl 8(%ebp),%ecx
298
adcl 12(%ebp),%edx
299
movl %eax,(%edi)
300
movl %ebx,4(%edi)
301
movl %ecx,8(%edi)
302
movl %edx,12(%edi)
303
popl %edi
304
popl %esi
305
popl %ebx
306
popl %ebp
307
ret
308
.size poly1305_emit,.-.L_poly1305_emit_begin
309
.align 32
310
.type _poly1305_init_sse2,@function
311
.align 16
312
_poly1305_init_sse2:
313
#ifdef __CET__
314
315
.byte 243,15,30,251
316
#endif
317
318
movdqu 24(%edi),%xmm4
319
leal 48(%edi),%edi
320
movl %esp,%ebp
321
subl $224,%esp
322
andl $-16,%esp
323
movq 64(%ebx),%xmm7
324
movdqa %xmm4,%xmm0
325
movdqa %xmm4,%xmm1
326
movdqa %xmm4,%xmm2
327
pand %xmm7,%xmm0
328
psrlq $26,%xmm1
329
psrldq $6,%xmm2
330
pand %xmm7,%xmm1
331
movdqa %xmm2,%xmm3
332
psrlq $4,%xmm2
333
psrlq $30,%xmm3
334
pand %xmm7,%xmm2
335
pand %xmm7,%xmm3
336
psrldq $13,%xmm4
337
leal 144(%esp),%edx
338
movl $2,%ecx
339
.L005square:
340
movdqa %xmm0,(%esp)
341
movdqa %xmm1,16(%esp)
342
movdqa %xmm2,32(%esp)
343
movdqa %xmm3,48(%esp)
344
movdqa %xmm4,64(%esp)
345
movdqa %xmm1,%xmm6
346
movdqa %xmm2,%xmm5
347
pslld $2,%xmm6
348
pslld $2,%xmm5
349
paddd %xmm1,%xmm6
350
paddd %xmm2,%xmm5
351
movdqa %xmm6,80(%esp)
352
movdqa %xmm5,96(%esp)
353
movdqa %xmm3,%xmm6
354
movdqa %xmm4,%xmm5
355
pslld $2,%xmm6
356
pslld $2,%xmm5
357
paddd %xmm3,%xmm6
358
paddd %xmm4,%xmm5
359
movdqa %xmm6,112(%esp)
360
movdqa %xmm5,128(%esp)
361
pshufd $68,%xmm0,%xmm6
362
movdqa %xmm1,%xmm5
363
pshufd $68,%xmm1,%xmm1
364
pshufd $68,%xmm2,%xmm2
365
pshufd $68,%xmm3,%xmm3
366
pshufd $68,%xmm4,%xmm4
367
movdqa %xmm6,(%edx)
368
movdqa %xmm1,16(%edx)
369
movdqa %xmm2,32(%edx)
370
movdqa %xmm3,48(%edx)
371
movdqa %xmm4,64(%edx)
372
pmuludq %xmm0,%xmm4
373
pmuludq %xmm0,%xmm3
374
pmuludq %xmm0,%xmm2
375
pmuludq %xmm0,%xmm1
376
pmuludq %xmm6,%xmm0
377
movdqa %xmm5,%xmm6
378
pmuludq 48(%edx),%xmm5
379
movdqa %xmm6,%xmm7
380
pmuludq 32(%edx),%xmm6
381
paddq %xmm5,%xmm4
382
movdqa %xmm7,%xmm5
383
pmuludq 16(%edx),%xmm7
384
paddq %xmm6,%xmm3
385
movdqa 80(%esp),%xmm6
386
pmuludq (%edx),%xmm5
387
paddq %xmm7,%xmm2
388
pmuludq 64(%edx),%xmm6
389
movdqa 32(%esp),%xmm7
390
paddq %xmm5,%xmm1
391
movdqa %xmm7,%xmm5
392
pmuludq 32(%edx),%xmm7
393
paddq %xmm6,%xmm0
394
movdqa %xmm5,%xmm6
395
pmuludq 16(%edx),%xmm5
396
paddq %xmm7,%xmm4
397
movdqa 96(%esp),%xmm7
398
pmuludq (%edx),%xmm6
399
paddq %xmm5,%xmm3
400
movdqa %xmm7,%xmm5
401
pmuludq 64(%edx),%xmm7
402
paddq %xmm6,%xmm2
403
pmuludq 48(%edx),%xmm5
404
movdqa 48(%esp),%xmm6
405
paddq %xmm7,%xmm1
406
movdqa %xmm6,%xmm7
407
pmuludq 16(%edx),%xmm6
408
paddq %xmm5,%xmm0
409
movdqa 112(%esp),%xmm5
410
pmuludq (%edx),%xmm7
411
paddq %xmm6,%xmm4
412
movdqa %xmm5,%xmm6
413
pmuludq 64(%edx),%xmm5
414
paddq %xmm7,%xmm3
415
movdqa %xmm6,%xmm7
416
pmuludq 48(%edx),%xmm6
417
paddq %xmm5,%xmm2
418
pmuludq 32(%edx),%xmm7
419
movdqa 64(%esp),%xmm5
420
paddq %xmm6,%xmm1
421
movdqa 128(%esp),%xmm6
422
pmuludq (%edx),%xmm5
423
paddq %xmm7,%xmm0
424
movdqa %xmm6,%xmm7
425
pmuludq 64(%edx),%xmm6
426
paddq %xmm5,%xmm4
427
movdqa %xmm7,%xmm5
428
pmuludq 16(%edx),%xmm7
429
paddq %xmm6,%xmm3
430
movdqa %xmm5,%xmm6
431
pmuludq 32(%edx),%xmm5
432
paddq %xmm7,%xmm0
433
pmuludq 48(%edx),%xmm6
434
movdqa 64(%ebx),%xmm7
435
paddq %xmm5,%xmm1
436
paddq %xmm6,%xmm2
437
movdqa %xmm3,%xmm5
438
pand %xmm7,%xmm3
439
psrlq $26,%xmm5
440
paddq %xmm4,%xmm5
441
movdqa %xmm0,%xmm6
442
pand %xmm7,%xmm0
443
psrlq $26,%xmm6
444
movdqa %xmm5,%xmm4
445
paddq %xmm1,%xmm6
446
psrlq $26,%xmm5
447
pand %xmm7,%xmm4
448
movdqa %xmm6,%xmm1
449
psrlq $26,%xmm6
450
paddd %xmm5,%xmm0
451
psllq $2,%xmm5
452
paddq %xmm2,%xmm6
453
paddq %xmm0,%xmm5
454
pand %xmm7,%xmm1
455
movdqa %xmm6,%xmm2
456
psrlq $26,%xmm6
457
pand %xmm7,%xmm2
458
paddd %xmm3,%xmm6
459
movdqa %xmm5,%xmm0
460
psrlq $26,%xmm5
461
movdqa %xmm6,%xmm3
462
psrlq $26,%xmm6
463
pand %xmm7,%xmm0
464
paddd %xmm5,%xmm1
465
pand %xmm7,%xmm3
466
paddd %xmm6,%xmm4
467
decl %ecx
468
jz .L006square_break
469
punpcklqdq (%esp),%xmm0
470
punpcklqdq 16(%esp),%xmm1
471
punpcklqdq 32(%esp),%xmm2
472
punpcklqdq 48(%esp),%xmm3
473
punpcklqdq 64(%esp),%xmm4
474
jmp .L005square
475
.L006square_break:
476
psllq $32,%xmm0
477
psllq $32,%xmm1
478
psllq $32,%xmm2
479
psllq $32,%xmm3
480
psllq $32,%xmm4
481
por (%esp),%xmm0
482
por 16(%esp),%xmm1
483
por 32(%esp),%xmm2
484
por 48(%esp),%xmm3
485
por 64(%esp),%xmm4
486
pshufd $141,%xmm0,%xmm0
487
pshufd $141,%xmm1,%xmm1
488
pshufd $141,%xmm2,%xmm2
489
pshufd $141,%xmm3,%xmm3
490
pshufd $141,%xmm4,%xmm4
491
movdqu %xmm0,(%edi)
492
movdqu %xmm1,16(%edi)
493
movdqu %xmm2,32(%edi)
494
movdqu %xmm3,48(%edi)
495
movdqu %xmm4,64(%edi)
496
movdqa %xmm1,%xmm6
497
movdqa %xmm2,%xmm5
498
pslld $2,%xmm6
499
pslld $2,%xmm5
500
paddd %xmm1,%xmm6
501
paddd %xmm2,%xmm5
502
movdqu %xmm6,80(%edi)
503
movdqu %xmm5,96(%edi)
504
movdqa %xmm3,%xmm6
505
movdqa %xmm4,%xmm5
506
pslld $2,%xmm6
507
pslld $2,%xmm5
508
paddd %xmm3,%xmm6
509
paddd %xmm4,%xmm5
510
movdqu %xmm6,112(%edi)
511
movdqu %xmm5,128(%edi)
512
movl %ebp,%esp
513
leal -48(%edi),%edi
514
ret
515
.size _poly1305_init_sse2,.-_poly1305_init_sse2
516
.align 32
517
.type _poly1305_blocks_sse2,@function
518
.align 16
519
_poly1305_blocks_sse2:
520
#ifdef __CET__
521
522
.byte 243,15,30,251
523
#endif
524
525
pushl %ebp
526
pushl %ebx
527
pushl %esi
528
pushl %edi
529
movl 20(%esp),%edi
530
movl 24(%esp),%esi
531
movl 28(%esp),%ecx
532
movl 20(%edi),%eax
533
andl $-16,%ecx
534
jz .L007nodata
535
cmpl $64,%ecx
536
jae .L008enter_sse2
537
testl %eax,%eax
538
jz .Lenter_blocks
539
.align 16
540
.L008enter_sse2:
541
call .L009pic_point
542
.L009pic_point:
543
popl %ebx
544
leal .Lconst_sse2-.L009pic_point(%ebx),%ebx
545
testl %eax,%eax
546
jnz .L010base2_26
547
call _poly1305_init_sse2
548
movl (%edi),%eax
549
movl 3(%edi),%ecx
550
movl 6(%edi),%edx
551
movl 9(%edi),%esi
552
movl 13(%edi),%ebp
553
movl $1,20(%edi)
554
shrl $2,%ecx
555
andl $67108863,%eax
556
shrl $4,%edx
557
andl $67108863,%ecx
558
shrl $6,%esi
559
andl $67108863,%edx
560
movd %eax,%xmm0
561
movd %ecx,%xmm1
562
movd %edx,%xmm2
563
movd %esi,%xmm3
564
movd %ebp,%xmm4
565
movl 24(%esp),%esi
566
movl 28(%esp),%ecx
567
jmp .L011base2_32
568
.align 16
569
.L010base2_26:
570
movd (%edi),%xmm0
571
movd 4(%edi),%xmm1
572
movd 8(%edi),%xmm2
573
movd 12(%edi),%xmm3
574
movd 16(%edi),%xmm4
575
movdqa 64(%ebx),%xmm7
576
.L011base2_32:
577
movl 32(%esp),%eax
578
movl %esp,%ebp
579
subl $528,%esp
580
andl $-16,%esp
581
leal 48(%edi),%edi
582
shll $24,%eax
583
testl $31,%ecx
584
jz .L012even
585
movdqu (%esi),%xmm6
586
leal 16(%esi),%esi
587
movdqa %xmm6,%xmm5
588
pand %xmm7,%xmm6
589
paddd %xmm6,%xmm0
590
movdqa %xmm5,%xmm6
591
psrlq $26,%xmm5
592
psrldq $6,%xmm6
593
pand %xmm7,%xmm5
594
paddd %xmm5,%xmm1
595
movdqa %xmm6,%xmm5
596
psrlq $4,%xmm6
597
pand %xmm7,%xmm6
598
paddd %xmm6,%xmm2
599
movdqa %xmm5,%xmm6
600
psrlq $30,%xmm5
601
pand %xmm7,%xmm5
602
psrldq $7,%xmm6
603
paddd %xmm5,%xmm3
604
movd %eax,%xmm5
605
paddd %xmm6,%xmm4
606
movd 12(%edi),%xmm6
607
paddd %xmm5,%xmm4
608
movdqa %xmm0,(%esp)
609
movdqa %xmm1,16(%esp)
610
movdqa %xmm2,32(%esp)
611
movdqa %xmm3,48(%esp)
612
movdqa %xmm4,64(%esp)
613
pmuludq %xmm6,%xmm0
614
pmuludq %xmm6,%xmm1
615
pmuludq %xmm6,%xmm2
616
movd 28(%edi),%xmm5
617
pmuludq %xmm6,%xmm3
618
pmuludq %xmm6,%xmm4
619
movdqa %xmm5,%xmm6
620
pmuludq 48(%esp),%xmm5
621
movdqa %xmm6,%xmm7
622
pmuludq 32(%esp),%xmm6
623
paddq %xmm5,%xmm4
624
movdqa %xmm7,%xmm5
625
pmuludq 16(%esp),%xmm7
626
paddq %xmm6,%xmm3
627
movd 92(%edi),%xmm6
628
pmuludq (%esp),%xmm5
629
paddq %xmm7,%xmm2
630
pmuludq 64(%esp),%xmm6
631
movd 44(%edi),%xmm7
632
paddq %xmm5,%xmm1
633
movdqa %xmm7,%xmm5
634
pmuludq 32(%esp),%xmm7
635
paddq %xmm6,%xmm0
636
movdqa %xmm5,%xmm6
637
pmuludq 16(%esp),%xmm5
638
paddq %xmm7,%xmm4
639
movd 108(%edi),%xmm7
640
pmuludq (%esp),%xmm6
641
paddq %xmm5,%xmm3
642
movdqa %xmm7,%xmm5
643
pmuludq 64(%esp),%xmm7
644
paddq %xmm6,%xmm2
645
pmuludq 48(%esp),%xmm5
646
movd 60(%edi),%xmm6
647
paddq %xmm7,%xmm1
648
movdqa %xmm6,%xmm7
649
pmuludq 16(%esp),%xmm6
650
paddq %xmm5,%xmm0
651
movd 124(%edi),%xmm5
652
pmuludq (%esp),%xmm7
653
paddq %xmm6,%xmm4
654
movdqa %xmm5,%xmm6
655
pmuludq 64(%esp),%xmm5
656
paddq %xmm7,%xmm3
657
movdqa %xmm6,%xmm7
658
pmuludq 48(%esp),%xmm6
659
paddq %xmm5,%xmm2
660
pmuludq 32(%esp),%xmm7
661
movd 76(%edi),%xmm5
662
paddq %xmm6,%xmm1
663
movd 140(%edi),%xmm6
664
pmuludq (%esp),%xmm5
665
paddq %xmm7,%xmm0
666
movdqa %xmm6,%xmm7
667
pmuludq 64(%esp),%xmm6
668
paddq %xmm5,%xmm4
669
movdqa %xmm7,%xmm5
670
pmuludq 16(%esp),%xmm7
671
paddq %xmm6,%xmm3
672
movdqa %xmm5,%xmm6
673
pmuludq 32(%esp),%xmm5
674
paddq %xmm7,%xmm0
675
pmuludq 48(%esp),%xmm6
676
movdqa 64(%ebx),%xmm7
677
paddq %xmm5,%xmm1
678
paddq %xmm6,%xmm2
679
movdqa %xmm3,%xmm5
680
pand %xmm7,%xmm3
681
psrlq $26,%xmm5
682
paddq %xmm4,%xmm5
683
movdqa %xmm0,%xmm6
684
pand %xmm7,%xmm0
685
psrlq $26,%xmm6
686
movdqa %xmm5,%xmm4
687
paddq %xmm1,%xmm6
688
psrlq $26,%xmm5
689
pand %xmm7,%xmm4
690
movdqa %xmm6,%xmm1
691
psrlq $26,%xmm6
692
paddd %xmm5,%xmm0
693
psllq $2,%xmm5
694
paddq %xmm2,%xmm6
695
paddq %xmm0,%xmm5
696
pand %xmm7,%xmm1
697
movdqa %xmm6,%xmm2
698
psrlq $26,%xmm6
699
pand %xmm7,%xmm2
700
paddd %xmm3,%xmm6
701
movdqa %xmm5,%xmm0
702
psrlq $26,%xmm5
703
movdqa %xmm6,%xmm3
704
psrlq $26,%xmm6
705
pand %xmm7,%xmm0
706
paddd %xmm5,%xmm1
707
pand %xmm7,%xmm3
708
paddd %xmm6,%xmm4
709
subl $16,%ecx
710
jz .L013done
711
.L012even:
712
leal 384(%esp),%edx
713
leal -32(%esi),%eax
714
subl $64,%ecx
715
movdqu (%edi),%xmm5
716
pshufd $68,%xmm5,%xmm6
717
cmovbl %eax,%esi
718
pshufd $238,%xmm5,%xmm5
719
movdqa %xmm6,(%edx)
720
leal 160(%esp),%eax
721
movdqu 16(%edi),%xmm6
722
movdqa %xmm5,-144(%edx)
723
pshufd $68,%xmm6,%xmm5
724
pshufd $238,%xmm6,%xmm6
725
movdqa %xmm5,16(%edx)
726
movdqu 32(%edi),%xmm5
727
movdqa %xmm6,-128(%edx)
728
pshufd $68,%xmm5,%xmm6
729
pshufd $238,%xmm5,%xmm5
730
movdqa %xmm6,32(%edx)
731
movdqu 48(%edi),%xmm6
732
movdqa %xmm5,-112(%edx)
733
pshufd $68,%xmm6,%xmm5
734
pshufd $238,%xmm6,%xmm6
735
movdqa %xmm5,48(%edx)
736
movdqu 64(%edi),%xmm5
737
movdqa %xmm6,-96(%edx)
738
pshufd $68,%xmm5,%xmm6
739
pshufd $238,%xmm5,%xmm5
740
movdqa %xmm6,64(%edx)
741
movdqu 80(%edi),%xmm6
742
movdqa %xmm5,-80(%edx)
743
pshufd $68,%xmm6,%xmm5
744
pshufd $238,%xmm6,%xmm6
745
movdqa %xmm5,80(%edx)
746
movdqu 96(%edi),%xmm5
747
movdqa %xmm6,-64(%edx)
748
pshufd $68,%xmm5,%xmm6
749
pshufd $238,%xmm5,%xmm5
750
movdqa %xmm6,96(%edx)
751
movdqu 112(%edi),%xmm6
752
movdqa %xmm5,-48(%edx)
753
pshufd $68,%xmm6,%xmm5
754
pshufd $238,%xmm6,%xmm6
755
movdqa %xmm5,112(%edx)
756
movdqu 128(%edi),%xmm5
757
movdqa %xmm6,-32(%edx)
758
pshufd $68,%xmm5,%xmm6
759
pshufd $238,%xmm5,%xmm5
760
movdqa %xmm6,128(%edx)
761
movdqa %xmm5,-16(%edx)
762
movdqu 32(%esi),%xmm5
763
movdqu 48(%esi),%xmm6
764
leal 32(%esi),%esi
765
movdqa %xmm2,112(%esp)
766
movdqa %xmm3,128(%esp)
767
movdqa %xmm4,144(%esp)
768
movdqa %xmm5,%xmm2
769
movdqa %xmm6,%xmm3
770
psrldq $6,%xmm2
771
psrldq $6,%xmm3
772
movdqa %xmm5,%xmm4
773
punpcklqdq %xmm3,%xmm2
774
punpckhqdq %xmm6,%xmm4
775
punpcklqdq %xmm6,%xmm5
776
movdqa %xmm2,%xmm3
777
psrlq $4,%xmm2
778
psrlq $30,%xmm3
779
movdqa %xmm5,%xmm6
780
psrlq $40,%xmm4
781
psrlq $26,%xmm6
782
pand %xmm7,%xmm5
783
pand %xmm7,%xmm6
784
pand %xmm7,%xmm2
785
pand %xmm7,%xmm3
786
por (%ebx),%xmm4
787
movdqa %xmm0,80(%esp)
788
movdqa %xmm1,96(%esp)
789
jbe .L014skip_loop
790
jmp .L015loop
791
.align 32
792
.L015loop:
793
movdqa -144(%edx),%xmm7
794
movdqa %xmm6,16(%eax)
795
movdqa %xmm2,32(%eax)
796
movdqa %xmm3,48(%eax)
797
movdqa %xmm4,64(%eax)
798
movdqa %xmm5,%xmm1
799
pmuludq %xmm7,%xmm5
800
movdqa %xmm6,%xmm0
801
pmuludq %xmm7,%xmm6
802
pmuludq %xmm7,%xmm2
803
pmuludq %xmm7,%xmm3
804
pmuludq %xmm7,%xmm4
805
pmuludq -16(%edx),%xmm0
806
movdqa %xmm1,%xmm7
807
pmuludq -128(%edx),%xmm1
808
paddq %xmm5,%xmm0
809
movdqa %xmm7,%xmm5
810
pmuludq -112(%edx),%xmm7
811
paddq %xmm6,%xmm1
812
movdqa %xmm5,%xmm6
813
pmuludq -96(%edx),%xmm5
814
paddq %xmm7,%xmm2
815
movdqa 16(%eax),%xmm7
816
pmuludq -80(%edx),%xmm6
817
paddq %xmm5,%xmm3
818
movdqa %xmm7,%xmm5
819
pmuludq -128(%edx),%xmm7
820
paddq %xmm6,%xmm4
821
movdqa %xmm5,%xmm6
822
pmuludq -112(%edx),%xmm5
823
paddq %xmm7,%xmm2
824
movdqa 32(%eax),%xmm7
825
pmuludq -96(%edx),%xmm6
826
paddq %xmm5,%xmm3
827
movdqa %xmm7,%xmm5
828
pmuludq -32(%edx),%xmm7
829
paddq %xmm6,%xmm4
830
movdqa %xmm5,%xmm6
831
pmuludq -16(%edx),%xmm5
832
paddq %xmm7,%xmm0
833
movdqa %xmm6,%xmm7
834
pmuludq -128(%edx),%xmm6
835
paddq %xmm5,%xmm1
836
movdqa 48(%eax),%xmm5
837
pmuludq -112(%edx),%xmm7
838
paddq %xmm6,%xmm3
839
movdqa %xmm5,%xmm6
840
pmuludq -48(%edx),%xmm5
841
paddq %xmm7,%xmm4
842
movdqa %xmm6,%xmm7
843
pmuludq -32(%edx),%xmm6
844
paddq %xmm5,%xmm0
845
movdqa %xmm7,%xmm5
846
pmuludq -16(%edx),%xmm7
847
paddq %xmm6,%xmm1
848
movdqa 64(%eax),%xmm6
849
pmuludq -128(%edx),%xmm5
850
paddq %xmm7,%xmm2
851
movdqa %xmm6,%xmm7
852
pmuludq -16(%edx),%xmm6
853
paddq %xmm5,%xmm4
854
movdqa %xmm7,%xmm5
855
pmuludq -64(%edx),%xmm7
856
paddq %xmm6,%xmm3
857
movdqa %xmm5,%xmm6
858
pmuludq -48(%edx),%xmm5
859
paddq %xmm7,%xmm0
860
movdqa 64(%ebx),%xmm7
861
pmuludq -32(%edx),%xmm6
862
paddq %xmm5,%xmm1
863
paddq %xmm6,%xmm2
864
movdqu -32(%esi),%xmm5
865
movdqu -16(%esi),%xmm6
866
leal 32(%esi),%esi
867
movdqa %xmm2,32(%esp)
868
movdqa %xmm3,48(%esp)
869
movdqa %xmm4,64(%esp)
870
movdqa %xmm5,%xmm2
871
movdqa %xmm6,%xmm3
872
psrldq $6,%xmm2
873
psrldq $6,%xmm3
874
movdqa %xmm5,%xmm4
875
punpcklqdq %xmm3,%xmm2
876
punpckhqdq %xmm6,%xmm4
877
punpcklqdq %xmm6,%xmm5
878
movdqa %xmm2,%xmm3
879
psrlq $4,%xmm2
880
psrlq $30,%xmm3
881
movdqa %xmm5,%xmm6
882
psrlq $40,%xmm4
883
psrlq $26,%xmm6
884
pand %xmm7,%xmm5
885
pand %xmm7,%xmm6
886
pand %xmm7,%xmm2
887
pand %xmm7,%xmm3
888
por (%ebx),%xmm4
889
leal -32(%esi),%eax
890
subl $64,%ecx
891
paddd 80(%esp),%xmm5
892
paddd 96(%esp),%xmm6
893
paddd 112(%esp),%xmm2
894
paddd 128(%esp),%xmm3
895
paddd 144(%esp),%xmm4
896
cmovbl %eax,%esi
897
leal 160(%esp),%eax
898
movdqa (%edx),%xmm7
899
movdqa %xmm1,16(%esp)
900
movdqa %xmm6,16(%eax)
901
movdqa %xmm2,32(%eax)
902
movdqa %xmm3,48(%eax)
903
movdqa %xmm4,64(%eax)
904
movdqa %xmm5,%xmm1
905
pmuludq %xmm7,%xmm5
906
paddq %xmm0,%xmm5
907
movdqa %xmm6,%xmm0
908
pmuludq %xmm7,%xmm6
909
pmuludq %xmm7,%xmm2
910
pmuludq %xmm7,%xmm3
911
pmuludq %xmm7,%xmm4
912
paddq 16(%esp),%xmm6
913
paddq 32(%esp),%xmm2
914
paddq 48(%esp),%xmm3
915
paddq 64(%esp),%xmm4
916
pmuludq 128(%edx),%xmm0
917
movdqa %xmm1,%xmm7
918
pmuludq 16(%edx),%xmm1
919
paddq %xmm5,%xmm0
920
movdqa %xmm7,%xmm5
921
pmuludq 32(%edx),%xmm7
922
paddq %xmm6,%xmm1
923
movdqa %xmm5,%xmm6
924
pmuludq 48(%edx),%xmm5
925
paddq %xmm7,%xmm2
926
movdqa 16(%eax),%xmm7
927
pmuludq 64(%edx),%xmm6
928
paddq %xmm5,%xmm3
929
movdqa %xmm7,%xmm5
930
pmuludq 16(%edx),%xmm7
931
paddq %xmm6,%xmm4
932
movdqa %xmm5,%xmm6
933
pmuludq 32(%edx),%xmm5
934
paddq %xmm7,%xmm2
935
movdqa 32(%eax),%xmm7
936
pmuludq 48(%edx),%xmm6
937
paddq %xmm5,%xmm3
938
movdqa %xmm7,%xmm5
939
pmuludq 112(%edx),%xmm7
940
paddq %xmm6,%xmm4
941
movdqa %xmm5,%xmm6
942
pmuludq 128(%edx),%xmm5
943
paddq %xmm7,%xmm0
944
movdqa %xmm6,%xmm7
945
pmuludq 16(%edx),%xmm6
946
paddq %xmm5,%xmm1
947
movdqa 48(%eax),%xmm5
948
pmuludq 32(%edx),%xmm7
949
paddq %xmm6,%xmm3
950
movdqa %xmm5,%xmm6
951
pmuludq 96(%edx),%xmm5
952
paddq %xmm7,%xmm4
953
movdqa %xmm6,%xmm7
954
pmuludq 112(%edx),%xmm6
955
paddq %xmm5,%xmm0
956
movdqa %xmm7,%xmm5
957
pmuludq 128(%edx),%xmm7
958
paddq %xmm6,%xmm1
959
movdqa 64(%eax),%xmm6
960
pmuludq 16(%edx),%xmm5
961
paddq %xmm7,%xmm2
962
movdqa %xmm6,%xmm7
963
pmuludq 128(%edx),%xmm6
964
paddq %xmm5,%xmm4
965
movdqa %xmm7,%xmm5
966
pmuludq 80(%edx),%xmm7
967
paddq %xmm6,%xmm3
968
movdqa %xmm5,%xmm6
969
pmuludq 96(%edx),%xmm5
970
paddq %xmm7,%xmm0
971
movdqa 64(%ebx),%xmm7
972
pmuludq 112(%edx),%xmm6
973
paddq %xmm5,%xmm1
974
paddq %xmm6,%xmm2
975
movdqa %xmm3,%xmm5
976
pand %xmm7,%xmm3
977
psrlq $26,%xmm5
978
paddq %xmm4,%xmm5
979
movdqa %xmm0,%xmm6
980
pand %xmm7,%xmm0
981
psrlq $26,%xmm6
982
movdqa %xmm5,%xmm4
983
paddq %xmm1,%xmm6
984
psrlq $26,%xmm5
985
pand %xmm7,%xmm4
986
movdqa %xmm6,%xmm1
987
psrlq $26,%xmm6
988
paddd %xmm5,%xmm0
989
psllq $2,%xmm5
990
paddq %xmm2,%xmm6
991
paddq %xmm0,%xmm5
992
pand %xmm7,%xmm1
993
movdqa %xmm6,%xmm2
994
psrlq $26,%xmm6
995
pand %xmm7,%xmm2
996
paddd %xmm3,%xmm6
997
movdqa %xmm5,%xmm0
998
psrlq $26,%xmm5
999
movdqa %xmm6,%xmm3
1000
psrlq $26,%xmm6
1001
pand %xmm7,%xmm0
1002
paddd %xmm5,%xmm1
1003
pand %xmm7,%xmm3
1004
paddd %xmm6,%xmm4
1005
movdqu 32(%esi),%xmm5
1006
movdqu 48(%esi),%xmm6
1007
leal 32(%esi),%esi
1008
movdqa %xmm2,112(%esp)
1009
movdqa %xmm3,128(%esp)
1010
movdqa %xmm4,144(%esp)
1011
movdqa %xmm5,%xmm2
1012
movdqa %xmm6,%xmm3
1013
psrldq $6,%xmm2
1014
psrldq $6,%xmm3
1015
movdqa %xmm5,%xmm4
1016
punpcklqdq %xmm3,%xmm2
1017
punpckhqdq %xmm6,%xmm4
1018
punpcklqdq %xmm6,%xmm5
1019
movdqa %xmm2,%xmm3
1020
psrlq $4,%xmm2
1021
psrlq $30,%xmm3
1022
movdqa %xmm5,%xmm6
1023
psrlq $40,%xmm4
1024
psrlq $26,%xmm6
1025
pand %xmm7,%xmm5
1026
pand %xmm7,%xmm6
1027
pand %xmm7,%xmm2
1028
pand %xmm7,%xmm3
1029
por (%ebx),%xmm4
1030
movdqa %xmm0,80(%esp)
1031
movdqa %xmm1,96(%esp)
1032
ja .L015loop
1033
.L014skip_loop:
1034
pshufd $16,-144(%edx),%xmm7
1035
addl $32,%ecx
1036
jnz .L016long_tail
1037
paddd %xmm0,%xmm5
1038
paddd %xmm1,%xmm6
1039
paddd 112(%esp),%xmm2
1040
paddd 128(%esp),%xmm3
1041
paddd 144(%esp),%xmm4
1042
.L016long_tail:
1043
movdqa %xmm5,(%eax)
1044
movdqa %xmm6,16(%eax)
1045
movdqa %xmm2,32(%eax)
1046
movdqa %xmm3,48(%eax)
1047
movdqa %xmm4,64(%eax)
1048
pmuludq %xmm7,%xmm5
1049
pmuludq %xmm7,%xmm6
1050
pmuludq %xmm7,%xmm2
1051
movdqa %xmm5,%xmm0
1052
pshufd $16,-128(%edx),%xmm5
1053
pmuludq %xmm7,%xmm3
1054
movdqa %xmm6,%xmm1
1055
pmuludq %xmm7,%xmm4
1056
movdqa %xmm5,%xmm6
1057
pmuludq 48(%eax),%xmm5
1058
movdqa %xmm6,%xmm7
1059
pmuludq 32(%eax),%xmm6
1060
paddq %xmm5,%xmm4
1061
movdqa %xmm7,%xmm5
1062
pmuludq 16(%eax),%xmm7
1063
paddq %xmm6,%xmm3
1064
pshufd $16,-64(%edx),%xmm6
1065
pmuludq (%eax),%xmm5
1066
paddq %xmm7,%xmm2
1067
pmuludq 64(%eax),%xmm6
1068
pshufd $16,-112(%edx),%xmm7
1069
paddq %xmm5,%xmm1
1070
movdqa %xmm7,%xmm5
1071
pmuludq 32(%eax),%xmm7
1072
paddq %xmm6,%xmm0
1073
movdqa %xmm5,%xmm6
1074
pmuludq 16(%eax),%xmm5
1075
paddq %xmm7,%xmm4
1076
pshufd $16,-48(%edx),%xmm7
1077
pmuludq (%eax),%xmm6
1078
paddq %xmm5,%xmm3
1079
movdqa %xmm7,%xmm5
1080
pmuludq 64(%eax),%xmm7
1081
paddq %xmm6,%xmm2
1082
pmuludq 48(%eax),%xmm5
1083
pshufd $16,-96(%edx),%xmm6
1084
paddq %xmm7,%xmm1
1085
movdqa %xmm6,%xmm7
1086
pmuludq 16(%eax),%xmm6
1087
paddq %xmm5,%xmm0
1088
pshufd $16,-32(%edx),%xmm5
1089
pmuludq (%eax),%xmm7
1090
paddq %xmm6,%xmm4
1091
movdqa %xmm5,%xmm6
1092
pmuludq 64(%eax),%xmm5
1093
paddq %xmm7,%xmm3
1094
movdqa %xmm6,%xmm7
1095
pmuludq 48(%eax),%xmm6
1096
paddq %xmm5,%xmm2
1097
pmuludq 32(%eax),%xmm7
1098
pshufd $16,-80(%edx),%xmm5
1099
paddq %xmm6,%xmm1
1100
pshufd $16,-16(%edx),%xmm6
1101
pmuludq (%eax),%xmm5
1102
paddq %xmm7,%xmm0
1103
movdqa %xmm6,%xmm7
1104
pmuludq 64(%eax),%xmm6
1105
paddq %xmm5,%xmm4
1106
movdqa %xmm7,%xmm5
1107
pmuludq 16(%eax),%xmm7
1108
paddq %xmm6,%xmm3
1109
movdqa %xmm5,%xmm6
1110
pmuludq 32(%eax),%xmm5
1111
paddq %xmm7,%xmm0
1112
pmuludq 48(%eax),%xmm6
1113
movdqa 64(%ebx),%xmm7
1114
paddq %xmm5,%xmm1
1115
paddq %xmm6,%xmm2
1116
jz .L017short_tail
1117
movdqu -32(%esi),%xmm5
1118
movdqu -16(%esi),%xmm6
1119
leal 32(%esi),%esi
1120
movdqa %xmm2,32(%esp)
1121
movdqa %xmm3,48(%esp)
1122
movdqa %xmm4,64(%esp)
1123
movdqa %xmm5,%xmm2
1124
movdqa %xmm6,%xmm3
1125
psrldq $6,%xmm2
1126
psrldq $6,%xmm3
1127
movdqa %xmm5,%xmm4
1128
punpcklqdq %xmm3,%xmm2
1129
punpckhqdq %xmm6,%xmm4
1130
punpcklqdq %xmm6,%xmm5
1131
movdqa %xmm2,%xmm3
1132
psrlq $4,%xmm2
1133
psrlq $30,%xmm3
1134
movdqa %xmm5,%xmm6
1135
psrlq $40,%xmm4
1136
psrlq $26,%xmm6
1137
pand %xmm7,%xmm5
1138
pand %xmm7,%xmm6
1139
pand %xmm7,%xmm2
1140
pand %xmm7,%xmm3
1141
por (%ebx),%xmm4
1142
pshufd $16,(%edx),%xmm7
1143
paddd 80(%esp),%xmm5
1144
paddd 96(%esp),%xmm6
1145
paddd 112(%esp),%xmm2
1146
paddd 128(%esp),%xmm3
1147
paddd 144(%esp),%xmm4
1148
movdqa %xmm5,(%esp)
1149
pmuludq %xmm7,%xmm5
1150
movdqa %xmm6,16(%esp)
1151
pmuludq %xmm7,%xmm6
1152
paddq %xmm5,%xmm0
1153
movdqa %xmm2,%xmm5
1154
pmuludq %xmm7,%xmm2
1155
paddq %xmm6,%xmm1
1156
movdqa %xmm3,%xmm6
1157
pmuludq %xmm7,%xmm3
1158
paddq 32(%esp),%xmm2
1159
movdqa %xmm5,32(%esp)
1160
pshufd $16,16(%edx),%xmm5
1161
paddq 48(%esp),%xmm3
1162
movdqa %xmm6,48(%esp)
1163
movdqa %xmm4,%xmm6
1164
pmuludq %xmm7,%xmm4
1165
paddq 64(%esp),%xmm4
1166
movdqa %xmm6,64(%esp)
1167
movdqa %xmm5,%xmm6
1168
pmuludq 48(%esp),%xmm5
1169
movdqa %xmm6,%xmm7
1170
pmuludq 32(%esp),%xmm6
1171
paddq %xmm5,%xmm4
1172
movdqa %xmm7,%xmm5
1173
pmuludq 16(%esp),%xmm7
1174
paddq %xmm6,%xmm3
1175
pshufd $16,80(%edx),%xmm6
1176
pmuludq (%esp),%xmm5
1177
paddq %xmm7,%xmm2
1178
pmuludq 64(%esp),%xmm6
1179
pshufd $16,32(%edx),%xmm7
1180
paddq %xmm5,%xmm1
1181
movdqa %xmm7,%xmm5
1182
pmuludq 32(%esp),%xmm7
1183
paddq %xmm6,%xmm0
1184
movdqa %xmm5,%xmm6
1185
pmuludq 16(%esp),%xmm5
1186
paddq %xmm7,%xmm4
1187
pshufd $16,96(%edx),%xmm7
1188
pmuludq (%esp),%xmm6
1189
paddq %xmm5,%xmm3
1190
movdqa %xmm7,%xmm5
1191
pmuludq 64(%esp),%xmm7
1192
paddq %xmm6,%xmm2
1193
pmuludq 48(%esp),%xmm5
1194
pshufd $16,48(%edx),%xmm6
1195
paddq %xmm7,%xmm1
1196
movdqa %xmm6,%xmm7
1197
pmuludq 16(%esp),%xmm6
1198
paddq %xmm5,%xmm0
1199
pshufd $16,112(%edx),%xmm5
1200
pmuludq (%esp),%xmm7
1201
paddq %xmm6,%xmm4
1202
movdqa %xmm5,%xmm6
1203
pmuludq 64(%esp),%xmm5
1204
paddq %xmm7,%xmm3
1205
movdqa %xmm6,%xmm7
1206
pmuludq 48(%esp),%xmm6
1207
paddq %xmm5,%xmm2
1208
pmuludq 32(%esp),%xmm7
1209
pshufd $16,64(%edx),%xmm5
1210
paddq %xmm6,%xmm1
1211
pshufd $16,128(%edx),%xmm6
1212
pmuludq (%esp),%xmm5
1213
paddq %xmm7,%xmm0
1214
movdqa %xmm6,%xmm7
1215
pmuludq 64(%esp),%xmm6
1216
paddq %xmm5,%xmm4
1217
movdqa %xmm7,%xmm5
1218
pmuludq 16(%esp),%xmm7
1219
paddq %xmm6,%xmm3
1220
movdqa %xmm5,%xmm6
1221
pmuludq 32(%esp),%xmm5
1222
paddq %xmm7,%xmm0
1223
pmuludq 48(%esp),%xmm6
1224
movdqa 64(%ebx),%xmm7
1225
paddq %xmm5,%xmm1
1226
paddq %xmm6,%xmm2
1227
.L017short_tail:
1228
pshufd $78,%xmm4,%xmm6
1229
pshufd $78,%xmm3,%xmm5
1230
paddq %xmm6,%xmm4
1231
paddq %xmm5,%xmm3
1232
pshufd $78,%xmm0,%xmm6
1233
pshufd $78,%xmm1,%xmm5
1234
paddq %xmm6,%xmm0
1235
paddq %xmm5,%xmm1
1236
pshufd $78,%xmm2,%xmm6
1237
movdqa %xmm3,%xmm5
1238
pand %xmm7,%xmm3
1239
psrlq $26,%xmm5
1240
paddq %xmm6,%xmm2
1241
paddq %xmm4,%xmm5
1242
movdqa %xmm0,%xmm6
1243
pand %xmm7,%xmm0
1244
psrlq $26,%xmm6
1245
movdqa %xmm5,%xmm4
1246
paddq %xmm1,%xmm6
1247
psrlq $26,%xmm5
1248
pand %xmm7,%xmm4
1249
movdqa %xmm6,%xmm1
1250
psrlq $26,%xmm6
1251
paddd %xmm5,%xmm0
1252
psllq $2,%xmm5
1253
paddq %xmm2,%xmm6
1254
paddq %xmm0,%xmm5
1255
pand %xmm7,%xmm1
1256
movdqa %xmm6,%xmm2
1257
psrlq $26,%xmm6
1258
pand %xmm7,%xmm2
1259
paddd %xmm3,%xmm6
1260
movdqa %xmm5,%xmm0
1261
psrlq $26,%xmm5
1262
movdqa %xmm6,%xmm3
1263
psrlq $26,%xmm6
1264
pand %xmm7,%xmm0
1265
paddd %xmm5,%xmm1
1266
pand %xmm7,%xmm3
1267
paddd %xmm6,%xmm4
1268
.L013done:
1269
movd %xmm0,-48(%edi)
1270
movd %xmm1,-44(%edi)
1271
movd %xmm2,-40(%edi)
1272
movd %xmm3,-36(%edi)
1273
movd %xmm4,-32(%edi)
1274
movl %ebp,%esp
1275
.L007nodata:
1276
popl %edi
1277
popl %esi
1278
popl %ebx
1279
popl %ebp
1280
ret
1281
.size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2
1282
.align 32
1283
.type _poly1305_emit_sse2,@function
1284
.align 16
1285
_poly1305_emit_sse2:
1286
#ifdef __CET__
1287
1288
.byte 243,15,30,251
1289
#endif
1290
1291
pushl %ebp
1292
pushl %ebx
1293
pushl %esi
1294
pushl %edi
1295
movl 20(%esp),%ebp
1296
cmpl $0,20(%ebp)
1297
je .Lenter_emit
1298
movl (%ebp),%eax
1299
movl 4(%ebp),%edi
1300
movl 8(%ebp),%ecx
1301
movl 12(%ebp),%edx
1302
movl 16(%ebp),%esi
1303
movl %edi,%ebx
1304
shll $26,%edi
1305
shrl $6,%ebx
1306
addl %edi,%eax
1307
movl %ecx,%edi
1308
adcl $0,%ebx
1309
shll $20,%edi
1310
shrl $12,%ecx
1311
addl %edi,%ebx
1312
movl %edx,%edi
1313
adcl $0,%ecx
1314
shll $14,%edi
1315
shrl $18,%edx
1316
addl %edi,%ecx
1317
movl %esi,%edi
1318
adcl $0,%edx
1319
shll $8,%edi
1320
shrl $24,%esi
1321
addl %edi,%edx
1322
adcl $0,%esi
1323
movl %esi,%edi
1324
andl $3,%esi
1325
shrl $2,%edi
1326
leal (%edi,%edi,4),%ebp
1327
movl 24(%esp),%edi
1328
addl %ebp,%eax
1329
movl 28(%esp),%ebp
1330
adcl $0,%ebx
1331
adcl $0,%ecx
1332
adcl $0,%edx
1333
adcl $0,%esi
1334
movd %eax,%xmm0
1335
addl $5,%eax
1336
movd %ebx,%xmm1
1337
adcl $0,%ebx
1338
movd %ecx,%xmm2
1339
adcl $0,%ecx
1340
movd %edx,%xmm3
1341
adcl $0,%edx
1342
adcl $0,%esi
1343
shrl $2,%esi
1344
negl %esi
1345
andl %esi,%eax
1346
andl %esi,%ebx
1347
andl %esi,%ecx
1348
andl %esi,%edx
1349
movl %eax,(%edi)
1350
movd %xmm0,%eax
1351
movl %ebx,4(%edi)
1352
movd %xmm1,%ebx
1353
movl %ecx,8(%edi)
1354
movd %xmm2,%ecx
1355
movl %edx,12(%edi)
1356
movd %xmm3,%edx
1357
notl %esi
1358
andl %esi,%eax
1359
andl %esi,%ebx
1360
orl (%edi),%eax
1361
andl %esi,%ecx
1362
orl 4(%edi),%ebx
1363
andl %esi,%edx
1364
orl 8(%edi),%ecx
1365
orl 12(%edi),%edx
1366
addl (%ebp),%eax
1367
adcl 4(%ebp),%ebx
1368
movl %eax,(%edi)
1369
adcl 8(%ebp),%ecx
1370
movl %ebx,4(%edi)
1371
adcl 12(%ebp),%edx
1372
movl %ecx,8(%edi)
1373
movl %edx,12(%edi)
1374
popl %edi
1375
popl %esi
1376
popl %ebx
1377
popl %ebp
1378
ret
1379
.size _poly1305_emit_sse2,.-_poly1305_emit_sse2
1380
.align 32
1381
.type _poly1305_init_avx2,@function
1382
.align 16
1383
_poly1305_init_avx2:
1384
#ifdef __CET__
1385
1386
.byte 243,15,30,251
1387
#endif
1388
1389
vmovdqu 24(%edi),%xmm4
1390
leal 48(%edi),%edi
1391
movl %esp,%ebp
1392
subl $224,%esp
1393
andl $-16,%esp
1394
vmovdqa 64(%ebx),%xmm7
1395
vpand %xmm7,%xmm4,%xmm0
1396
vpsrlq $26,%xmm4,%xmm1
1397
vpsrldq $6,%xmm4,%xmm3
1398
vpand %xmm7,%xmm1,%xmm1
1399
vpsrlq $4,%xmm3,%xmm2
1400
vpsrlq $30,%xmm3,%xmm3
1401
vpand %xmm7,%xmm2,%xmm2
1402
vpand %xmm7,%xmm3,%xmm3
1403
vpsrldq $13,%xmm4,%xmm4
1404
leal 144(%esp),%edx
1405
movl $2,%ecx
1406
.L018square:
1407
vmovdqa %xmm0,(%esp)
1408
vmovdqa %xmm1,16(%esp)
1409
vmovdqa %xmm2,32(%esp)
1410
vmovdqa %xmm3,48(%esp)
1411
vmovdqa %xmm4,64(%esp)
1412
vpslld $2,%xmm1,%xmm6
1413
vpslld $2,%xmm2,%xmm5
1414
vpaddd %xmm1,%xmm6,%xmm6
1415
vpaddd %xmm2,%xmm5,%xmm5
1416
vmovdqa %xmm6,80(%esp)
1417
vmovdqa %xmm5,96(%esp)
1418
vpslld $2,%xmm3,%xmm6
1419
vpslld $2,%xmm4,%xmm5
1420
vpaddd %xmm3,%xmm6,%xmm6
1421
vpaddd %xmm4,%xmm5,%xmm5
1422
vmovdqa %xmm6,112(%esp)
1423
vmovdqa %xmm5,128(%esp)
1424
vpshufd $68,%xmm0,%xmm5
1425
vmovdqa %xmm1,%xmm6
1426
vpshufd $68,%xmm1,%xmm1
1427
vpshufd $68,%xmm2,%xmm2
1428
vpshufd $68,%xmm3,%xmm3
1429
vpshufd $68,%xmm4,%xmm4
1430
vmovdqa %xmm5,(%edx)
1431
vmovdqa %xmm1,16(%edx)
1432
vmovdqa %xmm2,32(%edx)
1433
vmovdqa %xmm3,48(%edx)
1434
vmovdqa %xmm4,64(%edx)
1435
vpmuludq %xmm0,%xmm4,%xmm4
1436
vpmuludq %xmm0,%xmm3,%xmm3
1437
vpmuludq %xmm0,%xmm2,%xmm2
1438
vpmuludq %xmm0,%xmm1,%xmm1
1439
vpmuludq %xmm0,%xmm5,%xmm0
1440
vpmuludq 48(%edx),%xmm6,%xmm5
1441
vpaddq %xmm5,%xmm4,%xmm4
1442
vpmuludq 32(%edx),%xmm6,%xmm7
1443
vpaddq %xmm7,%xmm3,%xmm3
1444
vpmuludq 16(%edx),%xmm6,%xmm5
1445
vpaddq %xmm5,%xmm2,%xmm2
1446
vmovdqa 80(%esp),%xmm7
1447
vpmuludq (%edx),%xmm6,%xmm6
1448
vpaddq %xmm6,%xmm1,%xmm1
1449
vmovdqa 32(%esp),%xmm5
1450
vpmuludq 64(%edx),%xmm7,%xmm7
1451
vpaddq %xmm7,%xmm0,%xmm0
1452
vpmuludq 32(%edx),%xmm5,%xmm6
1453
vpaddq %xmm6,%xmm4,%xmm4
1454
vpmuludq 16(%edx),%xmm5,%xmm7
1455
vpaddq %xmm7,%xmm3,%xmm3
1456
vmovdqa 96(%esp),%xmm6
1457
vpmuludq (%edx),%xmm5,%xmm5
1458
vpaddq %xmm5,%xmm2,%xmm2
1459
vpmuludq 64(%edx),%xmm6,%xmm7
1460
vpaddq %xmm7,%xmm1,%xmm1
1461
vmovdqa 48(%esp),%xmm5
1462
vpmuludq 48(%edx),%xmm6,%xmm6
1463
vpaddq %xmm6,%xmm0,%xmm0
1464
vpmuludq 16(%edx),%xmm5,%xmm7
1465
vpaddq %xmm7,%xmm4,%xmm4
1466
vmovdqa 112(%esp),%xmm6
1467
vpmuludq (%edx),%xmm5,%xmm5
1468
vpaddq %xmm5,%xmm3,%xmm3
1469
vpmuludq 64(%edx),%xmm6,%xmm7
1470
vpaddq %xmm7,%xmm2,%xmm2
1471
vpmuludq 48(%edx),%xmm6,%xmm5
1472
vpaddq %xmm5,%xmm1,%xmm1
1473
vmovdqa 64(%esp),%xmm7
1474
vpmuludq 32(%edx),%xmm6,%xmm6
1475
vpaddq %xmm6,%xmm0,%xmm0
1476
vmovdqa 128(%esp),%xmm5
1477
vpmuludq (%edx),%xmm7,%xmm7
1478
vpaddq %xmm7,%xmm4,%xmm4
1479
vpmuludq 64(%edx),%xmm5,%xmm6
1480
vpaddq %xmm6,%xmm3,%xmm3
1481
vpmuludq 16(%edx),%xmm5,%xmm7
1482
vpaddq %xmm7,%xmm0,%xmm0
1483
vpmuludq 32(%edx),%xmm5,%xmm6
1484
vpaddq %xmm6,%xmm1,%xmm1
1485
vmovdqa 64(%ebx),%xmm7
1486
vpmuludq 48(%edx),%xmm5,%xmm5
1487
vpaddq %xmm5,%xmm2,%xmm2
1488
vpsrlq $26,%xmm3,%xmm5
1489
vpand %xmm7,%xmm3,%xmm3
1490
vpsrlq $26,%xmm0,%xmm6
1491
vpand %xmm7,%xmm0,%xmm0
1492
vpaddq %xmm5,%xmm4,%xmm4
1493
vpaddq %xmm6,%xmm1,%xmm1
1494
vpsrlq $26,%xmm4,%xmm5
1495
vpand %xmm7,%xmm4,%xmm4
1496
vpsrlq $26,%xmm1,%xmm6
1497
vpand %xmm7,%xmm1,%xmm1
1498
vpaddq %xmm6,%xmm2,%xmm2
1499
vpaddd %xmm5,%xmm0,%xmm0
1500
vpsllq $2,%xmm5,%xmm5
1501
vpsrlq $26,%xmm2,%xmm6
1502
vpand %xmm7,%xmm2,%xmm2
1503
vpaddd %xmm5,%xmm0,%xmm0
1504
vpaddd %xmm6,%xmm3,%xmm3
1505
vpsrlq $26,%xmm3,%xmm6
1506
vpsrlq $26,%xmm0,%xmm5
1507
vpand %xmm7,%xmm0,%xmm0
1508
vpand %xmm7,%xmm3,%xmm3
1509
vpaddd %xmm5,%xmm1,%xmm1
1510
vpaddd %xmm6,%xmm4,%xmm4
1511
decl %ecx
1512
jz .L019square_break
1513
vpunpcklqdq (%esp),%xmm0,%xmm0
1514
vpunpcklqdq 16(%esp),%xmm1,%xmm1
1515
vpunpcklqdq 32(%esp),%xmm2,%xmm2
1516
vpunpcklqdq 48(%esp),%xmm3,%xmm3
1517
vpunpcklqdq 64(%esp),%xmm4,%xmm4
1518
jmp .L018square
1519
.L019square_break:
1520
vpsllq $32,%xmm0,%xmm0
1521
vpsllq $32,%xmm1,%xmm1
1522
vpsllq $32,%xmm2,%xmm2
1523
vpsllq $32,%xmm3,%xmm3
1524
vpsllq $32,%xmm4,%xmm4
1525
vpor (%esp),%xmm0,%xmm0
1526
vpor 16(%esp),%xmm1,%xmm1
1527
vpor 32(%esp),%xmm2,%xmm2
1528
vpor 48(%esp),%xmm3,%xmm3
1529
vpor 64(%esp),%xmm4,%xmm4
1530
vpshufd $141,%xmm0,%xmm0
1531
vpshufd $141,%xmm1,%xmm1
1532
vpshufd $141,%xmm2,%xmm2
1533
vpshufd $141,%xmm3,%xmm3
1534
vpshufd $141,%xmm4,%xmm4
1535
vmovdqu %xmm0,(%edi)
1536
vmovdqu %xmm1,16(%edi)
1537
vmovdqu %xmm2,32(%edi)
1538
vmovdqu %xmm3,48(%edi)
1539
vmovdqu %xmm4,64(%edi)
1540
vpslld $2,%xmm1,%xmm6
1541
vpslld $2,%xmm2,%xmm5
1542
vpaddd %xmm1,%xmm6,%xmm6
1543
vpaddd %xmm2,%xmm5,%xmm5
1544
vmovdqu %xmm6,80(%edi)
1545
vmovdqu %xmm5,96(%edi)
1546
vpslld $2,%xmm3,%xmm6
1547
vpslld $2,%xmm4,%xmm5
1548
vpaddd %xmm3,%xmm6,%xmm6
1549
vpaddd %xmm4,%xmm5,%xmm5
1550
vmovdqu %xmm6,112(%edi)
1551
vmovdqu %xmm5,128(%edi)
1552
movl %ebp,%esp
1553
leal -48(%edi),%edi
1554
ret
1555
.size _poly1305_init_avx2,.-_poly1305_init_avx2
1556
.align 32
1557
.type _poly1305_blocks_avx2,@function
1558
.align 16
1559
_poly1305_blocks_avx2:
1560
#ifdef __CET__
1561
1562
.byte 243,15,30,251
1563
#endif
1564
1565
pushl %ebp
1566
pushl %ebx
1567
pushl %esi
1568
pushl %edi
1569
movl 20(%esp),%edi
1570
movl 24(%esp),%esi
1571
movl 28(%esp),%ecx
1572
movl 20(%edi),%eax
1573
andl $-16,%ecx
1574
jz .L020nodata
1575
cmpl $64,%ecx
1576
jae .L021enter_avx2
1577
testl %eax,%eax
1578
jz .Lenter_blocks
1579
.L021enter_avx2:
1580
vzeroupper
1581
call .L022pic_point
1582
.L022pic_point:
1583
popl %ebx
1584
leal .Lconst_sse2-.L022pic_point(%ebx),%ebx
1585
testl %eax,%eax
1586
jnz .L023base2_26
1587
call _poly1305_init_avx2
1588
movl (%edi),%eax
1589
movl 3(%edi),%ecx
1590
movl 6(%edi),%edx
1591
movl 9(%edi),%esi
1592
movl 13(%edi),%ebp
1593
shrl $2,%ecx
1594
andl $67108863,%eax
1595
shrl $4,%edx
1596
andl $67108863,%ecx
1597
shrl $6,%esi
1598
andl $67108863,%edx
1599
movl %eax,(%edi)
1600
movl %ecx,4(%edi)
1601
movl %edx,8(%edi)
1602
movl %esi,12(%edi)
1603
movl %ebp,16(%edi)
1604
movl $1,20(%edi)
1605
movl 24(%esp),%esi
1606
movl 28(%esp),%ecx
1607
.L023base2_26:
1608
movl 32(%esp),%eax
1609
movl %esp,%ebp
1610
subl $448,%esp
1611
andl $-512,%esp
1612
vmovdqu 48(%edi),%xmm0
1613
leal 288(%esp),%edx
1614
vmovdqu 64(%edi),%xmm1
1615
vmovdqu 80(%edi),%xmm2
1616
vmovdqu 96(%edi),%xmm3
1617
vmovdqu 112(%edi),%xmm4
1618
leal 48(%edi),%edi
1619
vpermq $64,%ymm0,%ymm0
1620
vpermq $64,%ymm1,%ymm1
1621
vpermq $64,%ymm2,%ymm2
1622
vpermq $64,%ymm3,%ymm3
1623
vpermq $64,%ymm4,%ymm4
1624
vpshufd $200,%ymm0,%ymm0
1625
vpshufd $200,%ymm1,%ymm1
1626
vpshufd $200,%ymm2,%ymm2
1627
vpshufd $200,%ymm3,%ymm3
1628
vpshufd $200,%ymm4,%ymm4
1629
vmovdqa %ymm0,-128(%edx)
1630
vmovdqu 80(%edi),%xmm0
1631
vmovdqa %ymm1,-96(%edx)
1632
vmovdqu 96(%edi),%xmm1
1633
vmovdqa %ymm2,-64(%edx)
1634
vmovdqu 112(%edi),%xmm2
1635
vmovdqa %ymm3,-32(%edx)
1636
vmovdqu 128(%edi),%xmm3
1637
vmovdqa %ymm4,(%edx)
1638
vpermq $64,%ymm0,%ymm0
1639
vpermq $64,%ymm1,%ymm1
1640
vpermq $64,%ymm2,%ymm2
1641
vpermq $64,%ymm3,%ymm3
1642
vpshufd $200,%ymm0,%ymm0
1643
vpshufd $200,%ymm1,%ymm1
1644
vpshufd $200,%ymm2,%ymm2
1645
vpshufd $200,%ymm3,%ymm3
1646
vmovdqa %ymm0,32(%edx)
1647
vmovd -48(%edi),%xmm0
1648
vmovdqa %ymm1,64(%edx)
1649
vmovd -44(%edi),%xmm1
1650
vmovdqa %ymm2,96(%edx)
1651
vmovd -40(%edi),%xmm2
1652
vmovdqa %ymm3,128(%edx)
1653
vmovd -36(%edi),%xmm3
1654
vmovd -32(%edi),%xmm4
1655
vmovdqa 64(%ebx),%ymm7
1656
negl %eax
1657
testl $63,%ecx
1658
jz .L024even
1659
movl %ecx,%edx
1660
andl $-64,%ecx
1661
andl $63,%edx
1662
vmovdqu (%esi),%xmm5
1663
cmpl $32,%edx
1664
jb .L025one
1665
vmovdqu 16(%esi),%xmm6
1666
je .L026two
1667
vinserti128 $1,32(%esi),%ymm5,%ymm5
1668
leal 48(%esi),%esi
1669
leal 8(%ebx),%ebx
1670
leal 296(%esp),%edx
1671
jmp .L027tail
1672
.L026two:
1673
leal 32(%esi),%esi
1674
leal 16(%ebx),%ebx
1675
leal 304(%esp),%edx
1676
jmp .L027tail
1677
.L025one:
1678
leal 16(%esi),%esi
1679
vpxor %ymm6,%ymm6,%ymm6
1680
leal 32(%ebx,%eax,8),%ebx
1681
leal 312(%esp),%edx
1682
jmp .L027tail
1683
.align 32
1684
.L024even:
1685
vmovdqu (%esi),%xmm5
1686
vmovdqu 16(%esi),%xmm6
1687
vinserti128 $1,32(%esi),%ymm5,%ymm5
1688
vinserti128 $1,48(%esi),%ymm6,%ymm6
1689
leal 64(%esi),%esi
1690
subl $64,%ecx
1691
jz .L027tail
1692
.L028loop:
1693
vmovdqa %ymm2,64(%esp)
1694
vpsrldq $6,%ymm5,%ymm2
1695
vmovdqa %ymm0,(%esp)
1696
vpsrldq $6,%ymm6,%ymm0
1697
vmovdqa %ymm1,32(%esp)
1698
vpunpckhqdq %ymm6,%ymm5,%ymm1
1699
vpunpcklqdq %ymm6,%ymm5,%ymm5
1700
vpunpcklqdq %ymm0,%ymm2,%ymm2
1701
vpsrlq $30,%ymm2,%ymm0
1702
vpsrlq $4,%ymm2,%ymm2
1703
vpsrlq $26,%ymm5,%ymm6
1704
vpsrlq $40,%ymm1,%ymm1
1705
vpand %ymm7,%ymm2,%ymm2
1706
vpand %ymm7,%ymm5,%ymm5
1707
vpand %ymm7,%ymm6,%ymm6
1708
vpand %ymm7,%ymm0,%ymm0
1709
vpor (%ebx),%ymm1,%ymm1
1710
vpaddq 64(%esp),%ymm2,%ymm2
1711
vpaddq (%esp),%ymm5,%ymm5
1712
vpaddq 32(%esp),%ymm6,%ymm6
1713
vpaddq %ymm3,%ymm0,%ymm0
1714
vpaddq %ymm4,%ymm1,%ymm1
1715
vpmuludq -96(%edx),%ymm2,%ymm3
1716
vmovdqa %ymm6,32(%esp)
1717
vpmuludq -64(%edx),%ymm2,%ymm4
1718
vmovdqa %ymm0,96(%esp)
1719
vpmuludq 96(%edx),%ymm2,%ymm0
1720
vmovdqa %ymm1,128(%esp)
1721
vpmuludq 128(%edx),%ymm2,%ymm1
1722
vpmuludq -128(%edx),%ymm2,%ymm2
1723
vpmuludq -32(%edx),%ymm5,%ymm7
1724
vpaddq %ymm7,%ymm3,%ymm3
1725
vpmuludq (%edx),%ymm5,%ymm6
1726
vpaddq %ymm6,%ymm4,%ymm4
1727
vpmuludq -128(%edx),%ymm5,%ymm7
1728
vpaddq %ymm7,%ymm0,%ymm0
1729
vmovdqa 32(%esp),%ymm7
1730
vpmuludq -96(%edx),%ymm5,%ymm6
1731
vpaddq %ymm6,%ymm1,%ymm1
1732
vpmuludq -64(%edx),%ymm5,%ymm5
1733
vpaddq %ymm5,%ymm2,%ymm2
1734
vpmuludq -64(%edx),%ymm7,%ymm6
1735
vpaddq %ymm6,%ymm3,%ymm3
1736
vpmuludq -32(%edx),%ymm7,%ymm5
1737
vpaddq %ymm5,%ymm4,%ymm4
1738
vpmuludq 128(%edx),%ymm7,%ymm6
1739
vpaddq %ymm6,%ymm0,%ymm0
1740
vmovdqa 96(%esp),%ymm6
1741
vpmuludq -128(%edx),%ymm7,%ymm5
1742
vpaddq %ymm5,%ymm1,%ymm1
1743
vpmuludq -96(%edx),%ymm7,%ymm7
1744
vpaddq %ymm7,%ymm2,%ymm2
1745
vpmuludq -128(%edx),%ymm6,%ymm5
1746
vpaddq %ymm5,%ymm3,%ymm3
1747
vpmuludq -96(%edx),%ymm6,%ymm7
1748
vpaddq %ymm7,%ymm4,%ymm4
1749
vpmuludq 64(%edx),%ymm6,%ymm5
1750
vpaddq %ymm5,%ymm0,%ymm0
1751
vmovdqa 128(%esp),%ymm5
1752
vpmuludq 96(%edx),%ymm6,%ymm7
1753
vpaddq %ymm7,%ymm1,%ymm1
1754
vpmuludq 128(%edx),%ymm6,%ymm6
1755
vpaddq %ymm6,%ymm2,%ymm2
1756
vpmuludq 128(%edx),%ymm5,%ymm7
1757
vpaddq %ymm7,%ymm3,%ymm3
1758
vpmuludq 32(%edx),%ymm5,%ymm6
1759
vpaddq %ymm6,%ymm0,%ymm0
1760
vpmuludq -128(%edx),%ymm5,%ymm7
1761
vpaddq %ymm7,%ymm4,%ymm4
1762
vmovdqa 64(%ebx),%ymm7
1763
vpmuludq 64(%edx),%ymm5,%ymm6
1764
vpaddq %ymm6,%ymm1,%ymm1
1765
vpmuludq 96(%edx),%ymm5,%ymm5
1766
vpaddq %ymm5,%ymm2,%ymm2
1767
vpsrlq $26,%ymm3,%ymm5
1768
vpand %ymm7,%ymm3,%ymm3
1769
vpsrlq $26,%ymm0,%ymm6
1770
vpand %ymm7,%ymm0,%ymm0
1771
vpaddq %ymm5,%ymm4,%ymm4
1772
vpaddq %ymm6,%ymm1,%ymm1
1773
vpsrlq $26,%ymm4,%ymm5
1774
vpand %ymm7,%ymm4,%ymm4
1775
vpsrlq $26,%ymm1,%ymm6
1776
vpand %ymm7,%ymm1,%ymm1
1777
vpaddq %ymm6,%ymm2,%ymm2
1778
vpaddq %ymm5,%ymm0,%ymm0
1779
vpsllq $2,%ymm5,%ymm5
1780
vpsrlq $26,%ymm2,%ymm6
1781
vpand %ymm7,%ymm2,%ymm2
1782
vpaddq %ymm5,%ymm0,%ymm0
1783
vpaddq %ymm6,%ymm3,%ymm3
1784
vpsrlq $26,%ymm3,%ymm6
1785
vpsrlq $26,%ymm0,%ymm5
1786
vpand %ymm7,%ymm0,%ymm0
1787
vpand %ymm7,%ymm3,%ymm3
1788
vpaddq %ymm5,%ymm1,%ymm1
1789
vpaddq %ymm6,%ymm4,%ymm4
1790
vmovdqu (%esi),%xmm5
1791
vmovdqu 16(%esi),%xmm6
1792
vinserti128 $1,32(%esi),%ymm5,%ymm5
1793
vinserti128 $1,48(%esi),%ymm6,%ymm6
1794
leal 64(%esi),%esi
1795
subl $64,%ecx
1796
jnz .L028loop
1797
.L027tail:
1798
vmovdqa %ymm2,64(%esp)
1799
vpsrldq $6,%ymm5,%ymm2
1800
vmovdqa %ymm0,(%esp)
1801
vpsrldq $6,%ymm6,%ymm0
1802
vmovdqa %ymm1,32(%esp)
1803
vpunpckhqdq %ymm6,%ymm5,%ymm1
1804
vpunpcklqdq %ymm6,%ymm5,%ymm5
1805
vpunpcklqdq %ymm0,%ymm2,%ymm2
1806
vpsrlq $30,%ymm2,%ymm0
1807
vpsrlq $4,%ymm2,%ymm2
1808
vpsrlq $26,%ymm5,%ymm6
1809
vpsrlq $40,%ymm1,%ymm1
1810
vpand %ymm7,%ymm2,%ymm2
1811
vpand %ymm7,%ymm5,%ymm5
1812
vpand %ymm7,%ymm6,%ymm6
1813
vpand %ymm7,%ymm0,%ymm0
1814
vpor (%ebx),%ymm1,%ymm1
1815
andl $-64,%ebx
1816
vpaddq 64(%esp),%ymm2,%ymm2
1817
vpaddq (%esp),%ymm5,%ymm5
1818
vpaddq 32(%esp),%ymm6,%ymm6
1819
vpaddq %ymm3,%ymm0,%ymm0
1820
vpaddq %ymm4,%ymm1,%ymm1
1821
vpmuludq -92(%edx),%ymm2,%ymm3
1822
vmovdqa %ymm6,32(%esp)
1823
vpmuludq -60(%edx),%ymm2,%ymm4
1824
vmovdqa %ymm0,96(%esp)
1825
vpmuludq 100(%edx),%ymm2,%ymm0
1826
vmovdqa %ymm1,128(%esp)
1827
vpmuludq 132(%edx),%ymm2,%ymm1
1828
vpmuludq -124(%edx),%ymm2,%ymm2
1829
vpmuludq -28(%edx),%ymm5,%ymm7
1830
vpaddq %ymm7,%ymm3,%ymm3
1831
vpmuludq 4(%edx),%ymm5,%ymm6
1832
vpaddq %ymm6,%ymm4,%ymm4
1833
vpmuludq -124(%edx),%ymm5,%ymm7
1834
vpaddq %ymm7,%ymm0,%ymm0
1835
vmovdqa 32(%esp),%ymm7
1836
vpmuludq -92(%edx),%ymm5,%ymm6
1837
vpaddq %ymm6,%ymm1,%ymm1
1838
vpmuludq -60(%edx),%ymm5,%ymm5
1839
vpaddq %ymm5,%ymm2,%ymm2
1840
vpmuludq -60(%edx),%ymm7,%ymm6
1841
vpaddq %ymm6,%ymm3,%ymm3
1842
vpmuludq -28(%edx),%ymm7,%ymm5
1843
vpaddq %ymm5,%ymm4,%ymm4
1844
vpmuludq 132(%edx),%ymm7,%ymm6
1845
vpaddq %ymm6,%ymm0,%ymm0
1846
vmovdqa 96(%esp),%ymm6
1847
vpmuludq -124(%edx),%ymm7,%ymm5
1848
vpaddq %ymm5,%ymm1,%ymm1
1849
vpmuludq -92(%edx),%ymm7,%ymm7
1850
vpaddq %ymm7,%ymm2,%ymm2
1851
vpmuludq -124(%edx),%ymm6,%ymm5
1852
vpaddq %ymm5,%ymm3,%ymm3
1853
vpmuludq -92(%edx),%ymm6,%ymm7
1854
vpaddq %ymm7,%ymm4,%ymm4
1855
vpmuludq 68(%edx),%ymm6,%ymm5
1856
vpaddq %ymm5,%ymm0,%ymm0
1857
vmovdqa 128(%esp),%ymm5
1858
vpmuludq 100(%edx),%ymm6,%ymm7
1859
vpaddq %ymm7,%ymm1,%ymm1
1860
vpmuludq 132(%edx),%ymm6,%ymm6
1861
vpaddq %ymm6,%ymm2,%ymm2
1862
vpmuludq 132(%edx),%ymm5,%ymm7
1863
vpaddq %ymm7,%ymm3,%ymm3
1864
vpmuludq 36(%edx),%ymm5,%ymm6
1865
vpaddq %ymm6,%ymm0,%ymm0
1866
vpmuludq -124(%edx),%ymm5,%ymm7
1867
vpaddq %ymm7,%ymm4,%ymm4
1868
vmovdqa 64(%ebx),%ymm7
1869
vpmuludq 68(%edx),%ymm5,%ymm6
1870
vpaddq %ymm6,%ymm1,%ymm1
1871
vpmuludq 100(%edx),%ymm5,%ymm5
1872
vpaddq %ymm5,%ymm2,%ymm2
1873
vpsrldq $8,%ymm4,%ymm5
1874
vpsrldq $8,%ymm3,%ymm6
1875
vpaddq %ymm5,%ymm4,%ymm4
1876
vpsrldq $8,%ymm0,%ymm5
1877
vpaddq %ymm6,%ymm3,%ymm3
1878
vpsrldq $8,%ymm1,%ymm6
1879
vpaddq %ymm5,%ymm0,%ymm0
1880
vpsrldq $8,%ymm2,%ymm5
1881
vpaddq %ymm6,%ymm1,%ymm1
1882
vpermq $2,%ymm4,%ymm6
1883
vpaddq %ymm5,%ymm2,%ymm2
1884
vpermq $2,%ymm3,%ymm5
1885
vpaddq %ymm6,%ymm4,%ymm4
1886
vpermq $2,%ymm0,%ymm6
1887
vpaddq %ymm5,%ymm3,%ymm3
1888
vpermq $2,%ymm1,%ymm5
1889
vpaddq %ymm6,%ymm0,%ymm0
1890
vpermq $2,%ymm2,%ymm6
1891
vpaddq %ymm5,%ymm1,%ymm1
1892
vpaddq %ymm6,%ymm2,%ymm2
1893
vpsrlq $26,%ymm3,%ymm5
1894
vpand %ymm7,%ymm3,%ymm3
1895
vpsrlq $26,%ymm0,%ymm6
1896
vpand %ymm7,%ymm0,%ymm0
1897
vpaddq %ymm5,%ymm4,%ymm4
1898
vpaddq %ymm6,%ymm1,%ymm1
1899
vpsrlq $26,%ymm4,%ymm5
1900
vpand %ymm7,%ymm4,%ymm4
1901
vpsrlq $26,%ymm1,%ymm6
1902
vpand %ymm7,%ymm1,%ymm1
1903
vpaddq %ymm6,%ymm2,%ymm2
1904
vpaddq %ymm5,%ymm0,%ymm0
1905
vpsllq $2,%ymm5,%ymm5
1906
vpsrlq $26,%ymm2,%ymm6
1907
vpand %ymm7,%ymm2,%ymm2
1908
vpaddq %ymm5,%ymm0,%ymm0
1909
vpaddq %ymm6,%ymm3,%ymm3
1910
vpsrlq $26,%ymm3,%ymm6
1911
vpsrlq $26,%ymm0,%ymm5
1912
vpand %ymm7,%ymm0,%ymm0
1913
vpand %ymm7,%ymm3,%ymm3
1914
vpaddq %ymm5,%ymm1,%ymm1
1915
vpaddq %ymm6,%ymm4,%ymm4
1916
cmpl $0,%ecx
1917
je .L029done
1918
vpshufd $252,%xmm0,%xmm0
1919
leal 288(%esp),%edx
1920
vpshufd $252,%xmm1,%xmm1
1921
vpshufd $252,%xmm2,%xmm2
1922
vpshufd $252,%xmm3,%xmm3
1923
vpshufd $252,%xmm4,%xmm4
1924
jmp .L024even
1925
.align 16
1926
.L029done:
1927
vmovd %xmm0,-48(%edi)
1928
vmovd %xmm1,-44(%edi)
1929
vmovd %xmm2,-40(%edi)
1930
vmovd %xmm3,-36(%edi)
1931
vmovd %xmm4,-32(%edi)
1932
vzeroupper
1933
movl %ebp,%esp
1934
.L020nodata:
1935
popl %edi
1936
popl %esi
1937
popl %ebx
1938
popl %ebp
1939
ret
1940
.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2
1941
.align 64
1942
.Lconst_sse2:
1943
.long 16777216,0,16777216,0,16777216,0,16777216,0
1944
.long 0,0,0,0,0,0,0,0
1945
.long 67108863,0,67108863,0,67108863,0,67108863,0
1946
.long 268435455,268435452,268435452,268435452
1947
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
1948
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1949
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1950
.byte 114,103,62,0
1951
.align 4
1952
.comm OPENSSL_ia32cap_P,40,4
1953
1954
.section ".note.gnu.property", "a"
1955
.p2align 2
1956
.long 1f - 0f
1957
.long 4f - 1f
1958
.long 5
1959
0:
1960
.asciz "GNU"
1961
1:
1962
.p2align 2
1963
.long 0xc0000002
1964
.long 3f - 2f
1965
2:
1966
.long 3
1967
3:
1968
.p2align 2
1969
4:
1970
#else
1971
.text
1972
.align 64
1973
.globl poly1305_init
1974
.type poly1305_init,@function
1975
.align 16
1976
poly1305_init:
1977
.L_poly1305_init_begin:
1978
#ifdef __CET__
1979
1980
.byte 243,15,30,251
1981
#endif
1982
1983
pushl %ebp
1984
pushl %ebx
1985
pushl %esi
1986
pushl %edi
1987
movl 20(%esp),%edi
1988
movl 24(%esp),%esi
1989
movl 28(%esp),%ebp
1990
xorl %eax,%eax
1991
movl %eax,(%edi)
1992
movl %eax,4(%edi)
1993
movl %eax,8(%edi)
1994
movl %eax,12(%edi)
1995
movl %eax,16(%edi)
1996
movl %eax,20(%edi)
1997
cmpl $0,%esi
1998
je .L000nokey
1999
call .L001pic_point
2000
.L001pic_point:
2001
popl %ebx
2002
leal poly1305_blocks-.L001pic_point(%ebx),%eax
2003
leal poly1305_emit-.L001pic_point(%ebx),%edx
2004
leal OPENSSL_ia32cap_P,%edi
2005
movl (%edi),%ecx
2006
andl $83886080,%ecx
2007
cmpl $83886080,%ecx
2008
jne .L002no_sse2
2009
leal _poly1305_blocks_sse2-.L001pic_point(%ebx),%eax
2010
leal _poly1305_emit_sse2-.L001pic_point(%ebx),%edx
2011
movl 8(%edi),%ecx
2012
testl $32,%ecx
2013
jz .L002no_sse2
2014
leal _poly1305_blocks_avx2-.L001pic_point(%ebx),%eax
2015
.L002no_sse2:
2016
movl 20(%esp),%edi
2017
movl %eax,(%ebp)
2018
movl %edx,4(%ebp)
2019
movl (%esi),%eax
2020
movl 4(%esi),%ebx
2021
movl 8(%esi),%ecx
2022
movl 12(%esi),%edx
2023
andl $268435455,%eax
2024
andl $268435452,%ebx
2025
andl $268435452,%ecx
2026
andl $268435452,%edx
2027
movl %eax,24(%edi)
2028
movl %ebx,28(%edi)
2029
movl %ecx,32(%edi)
2030
movl %edx,36(%edi)
2031
movl $1,%eax
2032
.L000nokey:
2033
popl %edi
2034
popl %esi
2035
popl %ebx
2036
popl %ebp
2037
ret
2038
.size poly1305_init,.-.L_poly1305_init_begin
2039
.globl poly1305_blocks
2040
.type poly1305_blocks,@function
2041
.align 16
2042
poly1305_blocks:
2043
.L_poly1305_blocks_begin:
2044
#ifdef __CET__
2045
2046
.byte 243,15,30,251
2047
#endif
2048
2049
pushl %ebp
2050
pushl %ebx
2051
pushl %esi
2052
pushl %edi
2053
movl 20(%esp),%edi
2054
movl 24(%esp),%esi
2055
movl 28(%esp),%ecx
2056
.Lenter_blocks:
2057
andl $-15,%ecx
2058
jz .L003nodata
2059
subl $64,%esp
2060
movl 24(%edi),%eax
2061
movl 28(%edi),%ebx
2062
leal (%esi,%ecx,1),%ebp
2063
movl 32(%edi),%ecx
2064
movl 36(%edi),%edx
2065
movl %ebp,92(%esp)
2066
movl %esi,%ebp
2067
movl %eax,36(%esp)
2068
movl %ebx,%eax
2069
shrl $2,%eax
2070
movl %ebx,40(%esp)
2071
addl %ebx,%eax
2072
movl %ecx,%ebx
2073
shrl $2,%ebx
2074
movl %ecx,44(%esp)
2075
addl %ecx,%ebx
2076
movl %edx,%ecx
2077
shrl $2,%ecx
2078
movl %edx,48(%esp)
2079
addl %edx,%ecx
2080
movl %eax,52(%esp)
2081
movl %ebx,56(%esp)
2082
movl %ecx,60(%esp)
2083
movl (%edi),%eax
2084
movl 4(%edi),%ebx
2085
movl 8(%edi),%ecx
2086
movl 12(%edi),%esi
2087
movl 16(%edi),%edi
2088
jmp .L004loop
2089
.align 32
2090
.L004loop:
2091
addl (%ebp),%eax
2092
adcl 4(%ebp),%ebx
2093
adcl 8(%ebp),%ecx
2094
adcl 12(%ebp),%esi
2095
leal 16(%ebp),%ebp
2096
adcl 96(%esp),%edi
2097
movl %eax,(%esp)
2098
movl %esi,12(%esp)
2099
mull 36(%esp)
2100
movl %edi,16(%esp)
2101
movl %eax,%edi
2102
movl %ebx,%eax
2103
movl %edx,%esi
2104
mull 60(%esp)
2105
addl %eax,%edi
2106
movl %ecx,%eax
2107
adcl %edx,%esi
2108
mull 56(%esp)
2109
addl %eax,%edi
2110
movl 12(%esp),%eax
2111
adcl %edx,%esi
2112
mull 52(%esp)
2113
addl %eax,%edi
2114
movl (%esp),%eax
2115
adcl %edx,%esi
2116
mull 40(%esp)
2117
movl %edi,20(%esp)
2118
xorl %edi,%edi
2119
addl %eax,%esi
2120
movl %ebx,%eax
2121
adcl %edx,%edi
2122
mull 36(%esp)
2123
addl %eax,%esi
2124
movl %ecx,%eax
2125
adcl %edx,%edi
2126
mull 60(%esp)
2127
addl %eax,%esi
2128
movl 12(%esp),%eax
2129
adcl %edx,%edi
2130
mull 56(%esp)
2131
addl %eax,%esi
2132
movl 16(%esp),%eax
2133
adcl %edx,%edi
2134
imull 52(%esp),%eax
2135
addl %eax,%esi
2136
movl (%esp),%eax
2137
adcl $0,%edi
2138
mull 44(%esp)
2139
movl %esi,24(%esp)
2140
xorl %esi,%esi
2141
addl %eax,%edi
2142
movl %ebx,%eax
2143
adcl %edx,%esi
2144
mull 40(%esp)
2145
addl %eax,%edi
2146
movl %ecx,%eax
2147
adcl %edx,%esi
2148
mull 36(%esp)
2149
addl %eax,%edi
2150
movl 12(%esp),%eax
2151
adcl %edx,%esi
2152
mull 60(%esp)
2153
addl %eax,%edi
2154
movl 16(%esp),%eax
2155
adcl %edx,%esi
2156
imull 56(%esp),%eax
2157
addl %eax,%edi
2158
movl (%esp),%eax
2159
adcl $0,%esi
2160
mull 48(%esp)
2161
movl %edi,28(%esp)
2162
xorl %edi,%edi
2163
addl %eax,%esi
2164
movl %ebx,%eax
2165
adcl %edx,%edi
2166
mull 44(%esp)
2167
addl %eax,%esi
2168
movl %ecx,%eax
2169
adcl %edx,%edi
2170
mull 40(%esp)
2171
addl %eax,%esi
2172
movl 12(%esp),%eax
2173
adcl %edx,%edi
2174
mull 36(%esp)
2175
addl %eax,%esi
2176
movl 16(%esp),%ecx
2177
adcl %edx,%edi
2178
movl %ecx,%edx
2179
imull 60(%esp),%ecx
2180
addl %ecx,%esi
2181
movl 20(%esp),%eax
2182
adcl $0,%edi
2183
imull 36(%esp),%edx
2184
addl %edi,%edx
2185
movl 24(%esp),%ebx
2186
movl 28(%esp),%ecx
2187
movl %edx,%edi
2188
shrl $2,%edx
2189
andl $3,%edi
2190
leal (%edx,%edx,4),%edx
2191
addl %edx,%eax
2192
adcl $0,%ebx
2193
adcl $0,%ecx
2194
adcl $0,%esi
2195
adcl $0,%edi
2196
cmpl 92(%esp),%ebp
2197
jne .L004loop
2198
movl 84(%esp),%edx
2199
addl $64,%esp
2200
movl %eax,(%edx)
2201
movl %ebx,4(%edx)
2202
movl %ecx,8(%edx)
2203
movl %esi,12(%edx)
2204
movl %edi,16(%edx)
2205
.L003nodata:
2206
popl %edi
2207
popl %esi
2208
popl %ebx
2209
popl %ebp
2210
ret
2211
.size poly1305_blocks,.-.L_poly1305_blocks_begin
2212
.globl poly1305_emit
2213
.type poly1305_emit,@function
2214
.align 16
2215
poly1305_emit:
2216
.L_poly1305_emit_begin:
2217
#ifdef __CET__
2218
2219
.byte 243,15,30,251
2220
#endif
2221
2222
pushl %ebp
2223
pushl %ebx
2224
pushl %esi
2225
pushl %edi
2226
movl 20(%esp),%ebp
2227
.Lenter_emit:
2228
movl 24(%esp),%edi
2229
movl (%ebp),%eax
2230
movl 4(%ebp),%ebx
2231
movl 8(%ebp),%ecx
2232
movl 12(%ebp),%edx
2233
movl 16(%ebp),%esi
2234
addl $5,%eax
2235
adcl $0,%ebx
2236
adcl $0,%ecx
2237
adcl $0,%edx
2238
adcl $0,%esi
2239
shrl $2,%esi
2240
negl %esi
2241
andl %esi,%eax
2242
andl %esi,%ebx
2243
andl %esi,%ecx
2244
andl %esi,%edx
2245
movl %eax,(%edi)
2246
movl %ebx,4(%edi)
2247
movl %ecx,8(%edi)
2248
movl %edx,12(%edi)
2249
notl %esi
2250
movl (%ebp),%eax
2251
movl 4(%ebp),%ebx
2252
movl 8(%ebp),%ecx
2253
movl 12(%ebp),%edx
2254
movl 28(%esp),%ebp
2255
andl %esi,%eax
2256
andl %esi,%ebx
2257
andl %esi,%ecx
2258
andl %esi,%edx
2259
orl (%edi),%eax
2260
orl 4(%edi),%ebx
2261
orl 8(%edi),%ecx
2262
orl 12(%edi),%edx
2263
addl (%ebp),%eax
2264
adcl 4(%ebp),%ebx
2265
adcl 8(%ebp),%ecx
2266
adcl 12(%ebp),%edx
2267
movl %eax,(%edi)
2268
movl %ebx,4(%edi)
2269
movl %ecx,8(%edi)
2270
movl %edx,12(%edi)
2271
popl %edi
2272
popl %esi
2273
popl %ebx
2274
popl %ebp
2275
ret
2276
.size poly1305_emit,.-.L_poly1305_emit_begin
2277
.align 32
2278
.type _poly1305_init_sse2,@function
2279
.align 16
2280
_poly1305_init_sse2:
2281
#ifdef __CET__
2282
2283
.byte 243,15,30,251
2284
#endif
2285
2286
movdqu 24(%edi),%xmm4
2287
leal 48(%edi),%edi
2288
movl %esp,%ebp
2289
subl $224,%esp
2290
andl $-16,%esp
2291
movq 64(%ebx),%xmm7
2292
movdqa %xmm4,%xmm0
2293
movdqa %xmm4,%xmm1
2294
movdqa %xmm4,%xmm2
2295
pand %xmm7,%xmm0
2296
psrlq $26,%xmm1
2297
psrldq $6,%xmm2
2298
pand %xmm7,%xmm1
2299
movdqa %xmm2,%xmm3
2300
psrlq $4,%xmm2
2301
psrlq $30,%xmm3
2302
pand %xmm7,%xmm2
2303
pand %xmm7,%xmm3
2304
psrldq $13,%xmm4
2305
leal 144(%esp),%edx
2306
movl $2,%ecx
2307
.L005square:
2308
movdqa %xmm0,(%esp)
2309
movdqa %xmm1,16(%esp)
2310
movdqa %xmm2,32(%esp)
2311
movdqa %xmm3,48(%esp)
2312
movdqa %xmm4,64(%esp)
2313
movdqa %xmm1,%xmm6
2314
movdqa %xmm2,%xmm5
2315
pslld $2,%xmm6
2316
pslld $2,%xmm5
2317
paddd %xmm1,%xmm6
2318
paddd %xmm2,%xmm5
2319
movdqa %xmm6,80(%esp)
2320
movdqa %xmm5,96(%esp)
2321
movdqa %xmm3,%xmm6
2322
movdqa %xmm4,%xmm5
2323
pslld $2,%xmm6
2324
pslld $2,%xmm5
2325
paddd %xmm3,%xmm6
2326
paddd %xmm4,%xmm5
2327
movdqa %xmm6,112(%esp)
2328
movdqa %xmm5,128(%esp)
2329
pshufd $68,%xmm0,%xmm6
2330
movdqa %xmm1,%xmm5
2331
pshufd $68,%xmm1,%xmm1
2332
pshufd $68,%xmm2,%xmm2
2333
pshufd $68,%xmm3,%xmm3
2334
pshufd $68,%xmm4,%xmm4
2335
movdqa %xmm6,(%edx)
2336
movdqa %xmm1,16(%edx)
2337
movdqa %xmm2,32(%edx)
2338
movdqa %xmm3,48(%edx)
2339
movdqa %xmm4,64(%edx)
2340
pmuludq %xmm0,%xmm4
2341
pmuludq %xmm0,%xmm3
2342
pmuludq %xmm0,%xmm2
2343
pmuludq %xmm0,%xmm1
2344
pmuludq %xmm6,%xmm0
2345
movdqa %xmm5,%xmm6
2346
pmuludq 48(%edx),%xmm5
2347
movdqa %xmm6,%xmm7
2348
pmuludq 32(%edx),%xmm6
2349
paddq %xmm5,%xmm4
2350
movdqa %xmm7,%xmm5
2351
pmuludq 16(%edx),%xmm7
2352
paddq %xmm6,%xmm3
2353
movdqa 80(%esp),%xmm6
2354
pmuludq (%edx),%xmm5
2355
paddq %xmm7,%xmm2
2356
pmuludq 64(%edx),%xmm6
2357
movdqa 32(%esp),%xmm7
2358
paddq %xmm5,%xmm1
2359
movdqa %xmm7,%xmm5
2360
pmuludq 32(%edx),%xmm7
2361
paddq %xmm6,%xmm0
2362
movdqa %xmm5,%xmm6
2363
pmuludq 16(%edx),%xmm5
2364
paddq %xmm7,%xmm4
2365
movdqa 96(%esp),%xmm7
2366
pmuludq (%edx),%xmm6
2367
paddq %xmm5,%xmm3
2368
movdqa %xmm7,%xmm5
2369
pmuludq 64(%edx),%xmm7
2370
paddq %xmm6,%xmm2
2371
pmuludq 48(%edx),%xmm5
2372
movdqa 48(%esp),%xmm6
2373
paddq %xmm7,%xmm1
2374
movdqa %xmm6,%xmm7
2375
pmuludq 16(%edx),%xmm6
2376
paddq %xmm5,%xmm0
2377
movdqa 112(%esp),%xmm5
2378
pmuludq (%edx),%xmm7
2379
paddq %xmm6,%xmm4
2380
movdqa %xmm5,%xmm6
2381
pmuludq 64(%edx),%xmm5
2382
paddq %xmm7,%xmm3
2383
movdqa %xmm6,%xmm7
2384
pmuludq 48(%edx),%xmm6
2385
paddq %xmm5,%xmm2
2386
pmuludq 32(%edx),%xmm7
2387
movdqa 64(%esp),%xmm5
2388
paddq %xmm6,%xmm1
2389
movdqa 128(%esp),%xmm6
2390
pmuludq (%edx),%xmm5
2391
paddq %xmm7,%xmm0
2392
movdqa %xmm6,%xmm7
2393
pmuludq 64(%edx),%xmm6
2394
paddq %xmm5,%xmm4
2395
movdqa %xmm7,%xmm5
2396
pmuludq 16(%edx),%xmm7
2397
paddq %xmm6,%xmm3
2398
movdqa %xmm5,%xmm6
2399
pmuludq 32(%edx),%xmm5
2400
paddq %xmm7,%xmm0
2401
pmuludq 48(%edx),%xmm6
2402
movdqa 64(%ebx),%xmm7
2403
paddq %xmm5,%xmm1
2404
paddq %xmm6,%xmm2
2405
movdqa %xmm3,%xmm5
2406
pand %xmm7,%xmm3
2407
psrlq $26,%xmm5
2408
paddq %xmm4,%xmm5
2409
movdqa %xmm0,%xmm6
2410
pand %xmm7,%xmm0
2411
psrlq $26,%xmm6
2412
movdqa %xmm5,%xmm4
2413
paddq %xmm1,%xmm6
2414
psrlq $26,%xmm5
2415
pand %xmm7,%xmm4
2416
movdqa %xmm6,%xmm1
2417
psrlq $26,%xmm6
2418
paddd %xmm5,%xmm0
2419
psllq $2,%xmm5
2420
paddq %xmm2,%xmm6
2421
paddq %xmm0,%xmm5
2422
pand %xmm7,%xmm1
2423
movdqa %xmm6,%xmm2
2424
psrlq $26,%xmm6
2425
pand %xmm7,%xmm2
2426
paddd %xmm3,%xmm6
2427
movdqa %xmm5,%xmm0
2428
psrlq $26,%xmm5
2429
movdqa %xmm6,%xmm3
2430
psrlq $26,%xmm6
2431
pand %xmm7,%xmm0
2432
paddd %xmm5,%xmm1
2433
pand %xmm7,%xmm3
2434
paddd %xmm6,%xmm4
2435
decl %ecx
2436
jz .L006square_break
2437
punpcklqdq (%esp),%xmm0
2438
punpcklqdq 16(%esp),%xmm1
2439
punpcklqdq 32(%esp),%xmm2
2440
punpcklqdq 48(%esp),%xmm3
2441
punpcklqdq 64(%esp),%xmm4
2442
jmp .L005square
2443
.L006square_break:
2444
psllq $32,%xmm0
2445
psllq $32,%xmm1
2446
psllq $32,%xmm2
2447
psllq $32,%xmm3
2448
psllq $32,%xmm4
2449
por (%esp),%xmm0
2450
por 16(%esp),%xmm1
2451
por 32(%esp),%xmm2
2452
por 48(%esp),%xmm3
2453
por 64(%esp),%xmm4
2454
pshufd $141,%xmm0,%xmm0
2455
pshufd $141,%xmm1,%xmm1
2456
pshufd $141,%xmm2,%xmm2
2457
pshufd $141,%xmm3,%xmm3
2458
pshufd $141,%xmm4,%xmm4
2459
movdqu %xmm0,(%edi)
2460
movdqu %xmm1,16(%edi)
2461
movdqu %xmm2,32(%edi)
2462
movdqu %xmm3,48(%edi)
2463
movdqu %xmm4,64(%edi)
2464
movdqa %xmm1,%xmm6
2465
movdqa %xmm2,%xmm5
2466
pslld $2,%xmm6
2467
pslld $2,%xmm5
2468
paddd %xmm1,%xmm6
2469
paddd %xmm2,%xmm5
2470
movdqu %xmm6,80(%edi)
2471
movdqu %xmm5,96(%edi)
2472
movdqa %xmm3,%xmm6
2473
movdqa %xmm4,%xmm5
2474
pslld $2,%xmm6
2475
pslld $2,%xmm5
2476
paddd %xmm3,%xmm6
2477
paddd %xmm4,%xmm5
2478
movdqu %xmm6,112(%edi)
2479
movdqu %xmm5,128(%edi)
2480
movl %ebp,%esp
2481
leal -48(%edi),%edi
2482
ret
2483
.size _poly1305_init_sse2,.-_poly1305_init_sse2
2484
.align 32
2485
.type _poly1305_blocks_sse2,@function
2486
.align 16
2487
_poly1305_blocks_sse2:
2488
#ifdef __CET__
2489
2490
.byte 243,15,30,251
2491
#endif
2492
2493
pushl %ebp
2494
pushl %ebx
2495
pushl %esi
2496
pushl %edi
2497
movl 20(%esp),%edi
2498
movl 24(%esp),%esi
2499
movl 28(%esp),%ecx
2500
movl 20(%edi),%eax
2501
andl $-16,%ecx
2502
jz .L007nodata
2503
cmpl $64,%ecx
2504
jae .L008enter_sse2
2505
testl %eax,%eax
2506
jz .Lenter_blocks
2507
.align 16
2508
.L008enter_sse2:
2509
call .L009pic_point
2510
.L009pic_point:
2511
popl %ebx
2512
leal .Lconst_sse2-.L009pic_point(%ebx),%ebx
2513
testl %eax,%eax
2514
jnz .L010base2_26
2515
call _poly1305_init_sse2
2516
movl (%edi),%eax
2517
movl 3(%edi),%ecx
2518
movl 6(%edi),%edx
2519
movl 9(%edi),%esi
2520
movl 13(%edi),%ebp
2521
movl $1,20(%edi)
2522
shrl $2,%ecx
2523
andl $67108863,%eax
2524
shrl $4,%edx
2525
andl $67108863,%ecx
2526
shrl $6,%esi
2527
andl $67108863,%edx
2528
movd %eax,%xmm0
2529
movd %ecx,%xmm1
2530
movd %edx,%xmm2
2531
movd %esi,%xmm3
2532
movd %ebp,%xmm4
2533
movl 24(%esp),%esi
2534
movl 28(%esp),%ecx
2535
jmp .L011base2_32
2536
.align 16
2537
.L010base2_26:
2538
movd (%edi),%xmm0
2539
movd 4(%edi),%xmm1
2540
movd 8(%edi),%xmm2
2541
movd 12(%edi),%xmm3
2542
movd 16(%edi),%xmm4
2543
movdqa 64(%ebx),%xmm7
2544
.L011base2_32:
2545
movl 32(%esp),%eax
2546
movl %esp,%ebp
2547
subl $528,%esp
2548
andl $-16,%esp
2549
leal 48(%edi),%edi
2550
shll $24,%eax
2551
testl $31,%ecx
2552
jz .L012even
2553
movdqu (%esi),%xmm6
2554
leal 16(%esi),%esi
2555
movdqa %xmm6,%xmm5
2556
pand %xmm7,%xmm6
2557
paddd %xmm6,%xmm0
2558
movdqa %xmm5,%xmm6
2559
psrlq $26,%xmm5
2560
psrldq $6,%xmm6
2561
pand %xmm7,%xmm5
2562
paddd %xmm5,%xmm1
2563
movdqa %xmm6,%xmm5
2564
psrlq $4,%xmm6
2565
pand %xmm7,%xmm6
2566
paddd %xmm6,%xmm2
2567
movdqa %xmm5,%xmm6
2568
psrlq $30,%xmm5
2569
pand %xmm7,%xmm5
2570
psrldq $7,%xmm6
2571
paddd %xmm5,%xmm3
2572
movd %eax,%xmm5
2573
paddd %xmm6,%xmm4
2574
movd 12(%edi),%xmm6
2575
paddd %xmm5,%xmm4
2576
movdqa %xmm0,(%esp)
2577
movdqa %xmm1,16(%esp)
2578
movdqa %xmm2,32(%esp)
2579
movdqa %xmm3,48(%esp)
2580
movdqa %xmm4,64(%esp)
2581
pmuludq %xmm6,%xmm0
2582
pmuludq %xmm6,%xmm1
2583
pmuludq %xmm6,%xmm2
2584
movd 28(%edi),%xmm5
2585
pmuludq %xmm6,%xmm3
2586
pmuludq %xmm6,%xmm4
2587
movdqa %xmm5,%xmm6
2588
pmuludq 48(%esp),%xmm5
2589
movdqa %xmm6,%xmm7
2590
pmuludq 32(%esp),%xmm6
2591
paddq %xmm5,%xmm4
2592
movdqa %xmm7,%xmm5
2593
pmuludq 16(%esp),%xmm7
2594
paddq %xmm6,%xmm3
2595
movd 92(%edi),%xmm6
2596
pmuludq (%esp),%xmm5
2597
paddq %xmm7,%xmm2
2598
pmuludq 64(%esp),%xmm6
2599
movd 44(%edi),%xmm7
2600
paddq %xmm5,%xmm1
2601
movdqa %xmm7,%xmm5
2602
pmuludq 32(%esp),%xmm7
2603
paddq %xmm6,%xmm0
2604
movdqa %xmm5,%xmm6
2605
pmuludq 16(%esp),%xmm5
2606
paddq %xmm7,%xmm4
2607
movd 108(%edi),%xmm7
2608
pmuludq (%esp),%xmm6
2609
paddq %xmm5,%xmm3
2610
movdqa %xmm7,%xmm5
2611
pmuludq 64(%esp),%xmm7
2612
paddq %xmm6,%xmm2
2613
pmuludq 48(%esp),%xmm5
2614
movd 60(%edi),%xmm6
2615
paddq %xmm7,%xmm1
2616
movdqa %xmm6,%xmm7
2617
pmuludq 16(%esp),%xmm6
2618
paddq %xmm5,%xmm0
2619
movd 124(%edi),%xmm5
2620
pmuludq (%esp),%xmm7
2621
paddq %xmm6,%xmm4
2622
movdqa %xmm5,%xmm6
2623
pmuludq 64(%esp),%xmm5
2624
paddq %xmm7,%xmm3
2625
movdqa %xmm6,%xmm7
2626
pmuludq 48(%esp),%xmm6
2627
paddq %xmm5,%xmm2
2628
pmuludq 32(%esp),%xmm7
2629
movd 76(%edi),%xmm5
2630
paddq %xmm6,%xmm1
2631
movd 140(%edi),%xmm6
2632
pmuludq (%esp),%xmm5
2633
paddq %xmm7,%xmm0
2634
movdqa %xmm6,%xmm7
2635
pmuludq 64(%esp),%xmm6
2636
paddq %xmm5,%xmm4
2637
movdqa %xmm7,%xmm5
2638
pmuludq 16(%esp),%xmm7
2639
paddq %xmm6,%xmm3
2640
movdqa %xmm5,%xmm6
2641
pmuludq 32(%esp),%xmm5
2642
paddq %xmm7,%xmm0
2643
pmuludq 48(%esp),%xmm6
2644
movdqa 64(%ebx),%xmm7
2645
paddq %xmm5,%xmm1
2646
paddq %xmm6,%xmm2
2647
movdqa %xmm3,%xmm5
2648
pand %xmm7,%xmm3
2649
psrlq $26,%xmm5
2650
paddq %xmm4,%xmm5
2651
movdqa %xmm0,%xmm6
2652
pand %xmm7,%xmm0
2653
psrlq $26,%xmm6
2654
movdqa %xmm5,%xmm4
2655
paddq %xmm1,%xmm6
2656
psrlq $26,%xmm5
2657
pand %xmm7,%xmm4
2658
movdqa %xmm6,%xmm1
2659
psrlq $26,%xmm6
2660
paddd %xmm5,%xmm0
2661
psllq $2,%xmm5
2662
paddq %xmm2,%xmm6
2663
paddq %xmm0,%xmm5
2664
pand %xmm7,%xmm1
2665
movdqa %xmm6,%xmm2
2666
psrlq $26,%xmm6
2667
pand %xmm7,%xmm2
2668
paddd %xmm3,%xmm6
2669
movdqa %xmm5,%xmm0
2670
psrlq $26,%xmm5
2671
movdqa %xmm6,%xmm3
2672
psrlq $26,%xmm6
2673
pand %xmm7,%xmm0
2674
paddd %xmm5,%xmm1
2675
pand %xmm7,%xmm3
2676
paddd %xmm6,%xmm4
2677
subl $16,%ecx
2678
jz .L013done
2679
.L012even:
2680
leal 384(%esp),%edx
2681
leal -32(%esi),%eax
2682
subl $64,%ecx
2683
movdqu (%edi),%xmm5
2684
pshufd $68,%xmm5,%xmm6
2685
cmovbl %eax,%esi
2686
pshufd $238,%xmm5,%xmm5
2687
movdqa %xmm6,(%edx)
2688
leal 160(%esp),%eax
2689
movdqu 16(%edi),%xmm6
2690
movdqa %xmm5,-144(%edx)
2691
pshufd $68,%xmm6,%xmm5
2692
pshufd $238,%xmm6,%xmm6
2693
movdqa %xmm5,16(%edx)
2694
movdqu 32(%edi),%xmm5
2695
movdqa %xmm6,-128(%edx)
2696
pshufd $68,%xmm5,%xmm6
2697
pshufd $238,%xmm5,%xmm5
2698
movdqa %xmm6,32(%edx)
2699
movdqu 48(%edi),%xmm6
2700
movdqa %xmm5,-112(%edx)
2701
pshufd $68,%xmm6,%xmm5
2702
pshufd $238,%xmm6,%xmm6
2703
movdqa %xmm5,48(%edx)
2704
movdqu 64(%edi),%xmm5
2705
movdqa %xmm6,-96(%edx)
2706
pshufd $68,%xmm5,%xmm6
2707
pshufd $238,%xmm5,%xmm5
2708
movdqa %xmm6,64(%edx)
2709
movdqu 80(%edi),%xmm6
2710
movdqa %xmm5,-80(%edx)
2711
pshufd $68,%xmm6,%xmm5
2712
pshufd $238,%xmm6,%xmm6
2713
movdqa %xmm5,80(%edx)
2714
movdqu 96(%edi),%xmm5
2715
movdqa %xmm6,-64(%edx)
2716
pshufd $68,%xmm5,%xmm6
2717
pshufd $238,%xmm5,%xmm5
2718
movdqa %xmm6,96(%edx)
2719
movdqu 112(%edi),%xmm6
2720
movdqa %xmm5,-48(%edx)
2721
pshufd $68,%xmm6,%xmm5
2722
pshufd $238,%xmm6,%xmm6
2723
movdqa %xmm5,112(%edx)
2724
movdqu 128(%edi),%xmm5
2725
movdqa %xmm6,-32(%edx)
2726
pshufd $68,%xmm5,%xmm6
2727
pshufd $238,%xmm5,%xmm5
2728
movdqa %xmm6,128(%edx)
2729
movdqa %xmm5,-16(%edx)
2730
movdqu 32(%esi),%xmm5
2731
movdqu 48(%esi),%xmm6
2732
leal 32(%esi),%esi
2733
movdqa %xmm2,112(%esp)
2734
movdqa %xmm3,128(%esp)
2735
movdqa %xmm4,144(%esp)
2736
movdqa %xmm5,%xmm2
2737
movdqa %xmm6,%xmm3
2738
psrldq $6,%xmm2
2739
psrldq $6,%xmm3
2740
movdqa %xmm5,%xmm4
2741
punpcklqdq %xmm3,%xmm2
2742
punpckhqdq %xmm6,%xmm4
2743
punpcklqdq %xmm6,%xmm5
2744
movdqa %xmm2,%xmm3
2745
psrlq $4,%xmm2
2746
psrlq $30,%xmm3
2747
movdqa %xmm5,%xmm6
2748
psrlq $40,%xmm4
2749
psrlq $26,%xmm6
2750
pand %xmm7,%xmm5
2751
pand %xmm7,%xmm6
2752
pand %xmm7,%xmm2
2753
pand %xmm7,%xmm3
2754
por (%ebx),%xmm4
2755
movdqa %xmm0,80(%esp)
2756
movdqa %xmm1,96(%esp)
2757
jbe .L014skip_loop
2758
jmp .L015loop
2759
.align 32
2760
.L015loop:
2761
movdqa -144(%edx),%xmm7
2762
movdqa %xmm6,16(%eax)
2763
movdqa %xmm2,32(%eax)
2764
movdqa %xmm3,48(%eax)
2765
movdqa %xmm4,64(%eax)
2766
movdqa %xmm5,%xmm1
2767
pmuludq %xmm7,%xmm5
2768
movdqa %xmm6,%xmm0
2769
pmuludq %xmm7,%xmm6
2770
pmuludq %xmm7,%xmm2
2771
pmuludq %xmm7,%xmm3
2772
pmuludq %xmm7,%xmm4
2773
pmuludq -16(%edx),%xmm0
2774
movdqa %xmm1,%xmm7
2775
pmuludq -128(%edx),%xmm1
2776
paddq %xmm5,%xmm0
2777
movdqa %xmm7,%xmm5
2778
pmuludq -112(%edx),%xmm7
2779
paddq %xmm6,%xmm1
2780
movdqa %xmm5,%xmm6
2781
pmuludq -96(%edx),%xmm5
2782
paddq %xmm7,%xmm2
2783
movdqa 16(%eax),%xmm7
2784
pmuludq -80(%edx),%xmm6
2785
paddq %xmm5,%xmm3
2786
movdqa %xmm7,%xmm5
2787
pmuludq -128(%edx),%xmm7
2788
paddq %xmm6,%xmm4
2789
movdqa %xmm5,%xmm6
2790
pmuludq -112(%edx),%xmm5
2791
paddq %xmm7,%xmm2
2792
movdqa 32(%eax),%xmm7
2793
pmuludq -96(%edx),%xmm6
2794
paddq %xmm5,%xmm3
2795
movdqa %xmm7,%xmm5
2796
pmuludq -32(%edx),%xmm7
2797
paddq %xmm6,%xmm4
2798
movdqa %xmm5,%xmm6
2799
pmuludq -16(%edx),%xmm5
2800
paddq %xmm7,%xmm0
2801
movdqa %xmm6,%xmm7
2802
pmuludq -128(%edx),%xmm6
2803
paddq %xmm5,%xmm1
2804
movdqa 48(%eax),%xmm5
2805
pmuludq -112(%edx),%xmm7
2806
paddq %xmm6,%xmm3
2807
movdqa %xmm5,%xmm6
2808
pmuludq -48(%edx),%xmm5
2809
paddq %xmm7,%xmm4
2810
movdqa %xmm6,%xmm7
2811
pmuludq -32(%edx),%xmm6
2812
paddq %xmm5,%xmm0
2813
movdqa %xmm7,%xmm5
2814
pmuludq -16(%edx),%xmm7
2815
paddq %xmm6,%xmm1
2816
movdqa 64(%eax),%xmm6
2817
pmuludq -128(%edx),%xmm5
2818
paddq %xmm7,%xmm2
2819
movdqa %xmm6,%xmm7
2820
pmuludq -16(%edx),%xmm6
2821
paddq %xmm5,%xmm4
2822
movdqa %xmm7,%xmm5
2823
pmuludq -64(%edx),%xmm7
2824
paddq %xmm6,%xmm3
2825
movdqa %xmm5,%xmm6
2826
pmuludq -48(%edx),%xmm5
2827
paddq %xmm7,%xmm0
2828
movdqa 64(%ebx),%xmm7
2829
pmuludq -32(%edx),%xmm6
2830
paddq %xmm5,%xmm1
2831
paddq %xmm6,%xmm2
2832
movdqu -32(%esi),%xmm5
2833
movdqu -16(%esi),%xmm6
2834
leal 32(%esi),%esi
2835
movdqa %xmm2,32(%esp)
2836
movdqa %xmm3,48(%esp)
2837
movdqa %xmm4,64(%esp)
2838
movdqa %xmm5,%xmm2
2839
movdqa %xmm6,%xmm3
2840
psrldq $6,%xmm2
2841
psrldq $6,%xmm3
2842
movdqa %xmm5,%xmm4
2843
punpcklqdq %xmm3,%xmm2
2844
punpckhqdq %xmm6,%xmm4
2845
punpcklqdq %xmm6,%xmm5
2846
movdqa %xmm2,%xmm3
2847
psrlq $4,%xmm2
2848
psrlq $30,%xmm3
2849
movdqa %xmm5,%xmm6
2850
psrlq $40,%xmm4
2851
psrlq $26,%xmm6
2852
pand %xmm7,%xmm5
2853
pand %xmm7,%xmm6
2854
pand %xmm7,%xmm2
2855
pand %xmm7,%xmm3
2856
por (%ebx),%xmm4
2857
leal -32(%esi),%eax
2858
subl $64,%ecx
2859
paddd 80(%esp),%xmm5
2860
paddd 96(%esp),%xmm6
2861
paddd 112(%esp),%xmm2
2862
paddd 128(%esp),%xmm3
2863
paddd 144(%esp),%xmm4
2864
cmovbl %eax,%esi
2865
leal 160(%esp),%eax
2866
movdqa (%edx),%xmm7
2867
movdqa %xmm1,16(%esp)
2868
movdqa %xmm6,16(%eax)
2869
movdqa %xmm2,32(%eax)
2870
movdqa %xmm3,48(%eax)
2871
movdqa %xmm4,64(%eax)
2872
movdqa %xmm5,%xmm1
2873
pmuludq %xmm7,%xmm5
2874
paddq %xmm0,%xmm5
2875
movdqa %xmm6,%xmm0
2876
pmuludq %xmm7,%xmm6
2877
pmuludq %xmm7,%xmm2
2878
pmuludq %xmm7,%xmm3
2879
pmuludq %xmm7,%xmm4
2880
paddq 16(%esp),%xmm6
2881
paddq 32(%esp),%xmm2
2882
paddq 48(%esp),%xmm3
2883
paddq 64(%esp),%xmm4
2884
pmuludq 128(%edx),%xmm0
2885
movdqa %xmm1,%xmm7
2886
pmuludq 16(%edx),%xmm1
2887
paddq %xmm5,%xmm0
2888
movdqa %xmm7,%xmm5
2889
pmuludq 32(%edx),%xmm7
2890
paddq %xmm6,%xmm1
2891
movdqa %xmm5,%xmm6
2892
pmuludq 48(%edx),%xmm5
2893
paddq %xmm7,%xmm2
2894
movdqa 16(%eax),%xmm7
2895
pmuludq 64(%edx),%xmm6
2896
paddq %xmm5,%xmm3
2897
movdqa %xmm7,%xmm5
2898
pmuludq 16(%edx),%xmm7
2899
paddq %xmm6,%xmm4
2900
movdqa %xmm5,%xmm6
2901
pmuludq 32(%edx),%xmm5
2902
paddq %xmm7,%xmm2
2903
movdqa 32(%eax),%xmm7
2904
pmuludq 48(%edx),%xmm6
2905
paddq %xmm5,%xmm3
2906
movdqa %xmm7,%xmm5
2907
pmuludq 112(%edx),%xmm7
2908
paddq %xmm6,%xmm4
2909
movdqa %xmm5,%xmm6
2910
pmuludq 128(%edx),%xmm5
2911
paddq %xmm7,%xmm0
2912
movdqa %xmm6,%xmm7
2913
pmuludq 16(%edx),%xmm6
2914
paddq %xmm5,%xmm1
2915
movdqa 48(%eax),%xmm5
2916
pmuludq 32(%edx),%xmm7
2917
paddq %xmm6,%xmm3
2918
movdqa %xmm5,%xmm6
2919
pmuludq 96(%edx),%xmm5
2920
paddq %xmm7,%xmm4
2921
movdqa %xmm6,%xmm7
2922
pmuludq 112(%edx),%xmm6
2923
paddq %xmm5,%xmm0
2924
movdqa %xmm7,%xmm5
2925
pmuludq 128(%edx),%xmm7
2926
paddq %xmm6,%xmm1
2927
movdqa 64(%eax),%xmm6
2928
pmuludq 16(%edx),%xmm5
2929
paddq %xmm7,%xmm2
2930
movdqa %xmm6,%xmm7
2931
pmuludq 128(%edx),%xmm6
2932
paddq %xmm5,%xmm4
2933
movdqa %xmm7,%xmm5
2934
pmuludq 80(%edx),%xmm7
2935
paddq %xmm6,%xmm3
2936
movdqa %xmm5,%xmm6
2937
pmuludq 96(%edx),%xmm5
2938
paddq %xmm7,%xmm0
2939
movdqa 64(%ebx),%xmm7
2940
pmuludq 112(%edx),%xmm6
2941
paddq %xmm5,%xmm1
2942
paddq %xmm6,%xmm2
2943
movdqa %xmm3,%xmm5
2944
pand %xmm7,%xmm3
2945
psrlq $26,%xmm5
2946
paddq %xmm4,%xmm5
2947
movdqa %xmm0,%xmm6
2948
pand %xmm7,%xmm0
2949
psrlq $26,%xmm6
2950
movdqa %xmm5,%xmm4
2951
paddq %xmm1,%xmm6
2952
psrlq $26,%xmm5
2953
pand %xmm7,%xmm4
2954
movdqa %xmm6,%xmm1
2955
psrlq $26,%xmm6
2956
paddd %xmm5,%xmm0
2957
psllq $2,%xmm5
2958
paddq %xmm2,%xmm6
2959
paddq %xmm0,%xmm5
2960
pand %xmm7,%xmm1
2961
movdqa %xmm6,%xmm2
2962
psrlq $26,%xmm6
2963
pand %xmm7,%xmm2
2964
paddd %xmm3,%xmm6
2965
movdqa %xmm5,%xmm0
2966
psrlq $26,%xmm5
2967
movdqa %xmm6,%xmm3
2968
psrlq $26,%xmm6
2969
pand %xmm7,%xmm0
2970
paddd %xmm5,%xmm1
2971
pand %xmm7,%xmm3
2972
paddd %xmm6,%xmm4
2973
movdqu 32(%esi),%xmm5
2974
movdqu 48(%esi),%xmm6
2975
leal 32(%esi),%esi
2976
movdqa %xmm2,112(%esp)
2977
movdqa %xmm3,128(%esp)
2978
movdqa %xmm4,144(%esp)
2979
movdqa %xmm5,%xmm2
2980
movdqa %xmm6,%xmm3
2981
psrldq $6,%xmm2
2982
psrldq $6,%xmm3
2983
movdqa %xmm5,%xmm4
2984
punpcklqdq %xmm3,%xmm2
2985
punpckhqdq %xmm6,%xmm4
2986
punpcklqdq %xmm6,%xmm5
2987
movdqa %xmm2,%xmm3
2988
psrlq $4,%xmm2
2989
psrlq $30,%xmm3
2990
movdqa %xmm5,%xmm6
2991
psrlq $40,%xmm4
2992
psrlq $26,%xmm6
2993
pand %xmm7,%xmm5
2994
pand %xmm7,%xmm6
2995
pand %xmm7,%xmm2
2996
pand %xmm7,%xmm3
2997
por (%ebx),%xmm4
2998
movdqa %xmm0,80(%esp)
2999
movdqa %xmm1,96(%esp)
3000
ja .L015loop
3001
.L014skip_loop:
3002
pshufd $16,-144(%edx),%xmm7
3003
addl $32,%ecx
3004
jnz .L016long_tail
3005
paddd %xmm0,%xmm5
3006
paddd %xmm1,%xmm6
3007
paddd 112(%esp),%xmm2
3008
paddd 128(%esp),%xmm3
3009
paddd 144(%esp),%xmm4
3010
.L016long_tail:
3011
movdqa %xmm5,(%eax)
3012
movdqa %xmm6,16(%eax)
3013
movdqa %xmm2,32(%eax)
3014
movdqa %xmm3,48(%eax)
3015
movdqa %xmm4,64(%eax)
3016
pmuludq %xmm7,%xmm5
3017
pmuludq %xmm7,%xmm6
3018
pmuludq %xmm7,%xmm2
3019
movdqa %xmm5,%xmm0
3020
pshufd $16,-128(%edx),%xmm5
3021
pmuludq %xmm7,%xmm3
3022
movdqa %xmm6,%xmm1
3023
pmuludq %xmm7,%xmm4
3024
movdqa %xmm5,%xmm6
3025
pmuludq 48(%eax),%xmm5
3026
movdqa %xmm6,%xmm7
3027
pmuludq 32(%eax),%xmm6
3028
paddq %xmm5,%xmm4
3029
movdqa %xmm7,%xmm5
3030
pmuludq 16(%eax),%xmm7
3031
paddq %xmm6,%xmm3
3032
pshufd $16,-64(%edx),%xmm6
3033
pmuludq (%eax),%xmm5
3034
paddq %xmm7,%xmm2
3035
pmuludq 64(%eax),%xmm6
3036
pshufd $16,-112(%edx),%xmm7
3037
paddq %xmm5,%xmm1
3038
movdqa %xmm7,%xmm5
3039
pmuludq 32(%eax),%xmm7
3040
paddq %xmm6,%xmm0
3041
movdqa %xmm5,%xmm6
3042
pmuludq 16(%eax),%xmm5
3043
paddq %xmm7,%xmm4
3044
pshufd $16,-48(%edx),%xmm7
3045
pmuludq (%eax),%xmm6
3046
paddq %xmm5,%xmm3
3047
movdqa %xmm7,%xmm5
3048
pmuludq 64(%eax),%xmm7
3049
paddq %xmm6,%xmm2
3050
pmuludq 48(%eax),%xmm5
3051
pshufd $16,-96(%edx),%xmm6
3052
paddq %xmm7,%xmm1
3053
movdqa %xmm6,%xmm7
3054
pmuludq 16(%eax),%xmm6
3055
paddq %xmm5,%xmm0
3056
pshufd $16,-32(%edx),%xmm5
3057
pmuludq (%eax),%xmm7
3058
paddq %xmm6,%xmm4
3059
movdqa %xmm5,%xmm6
3060
pmuludq 64(%eax),%xmm5
3061
paddq %xmm7,%xmm3
3062
movdqa %xmm6,%xmm7
3063
pmuludq 48(%eax),%xmm6
3064
paddq %xmm5,%xmm2
3065
pmuludq 32(%eax),%xmm7
3066
pshufd $16,-80(%edx),%xmm5
3067
paddq %xmm6,%xmm1
3068
pshufd $16,-16(%edx),%xmm6
3069
pmuludq (%eax),%xmm5
3070
paddq %xmm7,%xmm0
3071
movdqa %xmm6,%xmm7
3072
pmuludq 64(%eax),%xmm6
3073
paddq %xmm5,%xmm4
3074
movdqa %xmm7,%xmm5
3075
pmuludq 16(%eax),%xmm7
3076
paddq %xmm6,%xmm3
3077
movdqa %xmm5,%xmm6
3078
pmuludq 32(%eax),%xmm5
3079
paddq %xmm7,%xmm0
3080
pmuludq 48(%eax),%xmm6
3081
movdqa 64(%ebx),%xmm7
3082
paddq %xmm5,%xmm1
3083
paddq %xmm6,%xmm2
3084
jz .L017short_tail
3085
movdqu -32(%esi),%xmm5
3086
movdqu -16(%esi),%xmm6
3087
leal 32(%esi),%esi
3088
movdqa %xmm2,32(%esp)
3089
movdqa %xmm3,48(%esp)
3090
movdqa %xmm4,64(%esp)
3091
movdqa %xmm5,%xmm2
3092
movdqa %xmm6,%xmm3
3093
psrldq $6,%xmm2
3094
psrldq $6,%xmm3
3095
movdqa %xmm5,%xmm4
3096
punpcklqdq %xmm3,%xmm2
3097
punpckhqdq %xmm6,%xmm4
3098
punpcklqdq %xmm6,%xmm5
3099
movdqa %xmm2,%xmm3
3100
psrlq $4,%xmm2
3101
psrlq $30,%xmm3
3102
movdqa %xmm5,%xmm6
3103
psrlq $40,%xmm4
3104
psrlq $26,%xmm6
3105
pand %xmm7,%xmm5
3106
pand %xmm7,%xmm6
3107
pand %xmm7,%xmm2
3108
pand %xmm7,%xmm3
3109
por (%ebx),%xmm4
3110
pshufd $16,(%edx),%xmm7
3111
paddd 80(%esp),%xmm5
3112
paddd 96(%esp),%xmm6
3113
paddd 112(%esp),%xmm2
3114
paddd 128(%esp),%xmm3
3115
paddd 144(%esp),%xmm4
3116
movdqa %xmm5,(%esp)
3117
pmuludq %xmm7,%xmm5
3118
movdqa %xmm6,16(%esp)
3119
pmuludq %xmm7,%xmm6
3120
paddq %xmm5,%xmm0
3121
movdqa %xmm2,%xmm5
3122
pmuludq %xmm7,%xmm2
3123
paddq %xmm6,%xmm1
3124
movdqa %xmm3,%xmm6
3125
pmuludq %xmm7,%xmm3
3126
paddq 32(%esp),%xmm2
3127
movdqa %xmm5,32(%esp)
3128
pshufd $16,16(%edx),%xmm5
3129
paddq 48(%esp),%xmm3
3130
movdqa %xmm6,48(%esp)
3131
movdqa %xmm4,%xmm6
3132
pmuludq %xmm7,%xmm4
3133
paddq 64(%esp),%xmm4
3134
movdqa %xmm6,64(%esp)
3135
movdqa %xmm5,%xmm6
3136
pmuludq 48(%esp),%xmm5
3137
movdqa %xmm6,%xmm7
3138
pmuludq 32(%esp),%xmm6
3139
paddq %xmm5,%xmm4
3140
movdqa %xmm7,%xmm5
3141
pmuludq 16(%esp),%xmm7
3142
paddq %xmm6,%xmm3
3143
pshufd $16,80(%edx),%xmm6
3144
pmuludq (%esp),%xmm5
3145
paddq %xmm7,%xmm2
3146
pmuludq 64(%esp),%xmm6
3147
pshufd $16,32(%edx),%xmm7
3148
paddq %xmm5,%xmm1
3149
movdqa %xmm7,%xmm5
3150
pmuludq 32(%esp),%xmm7
3151
paddq %xmm6,%xmm0
3152
movdqa %xmm5,%xmm6
3153
pmuludq 16(%esp),%xmm5
3154
paddq %xmm7,%xmm4
3155
pshufd $16,96(%edx),%xmm7
3156
pmuludq (%esp),%xmm6
3157
paddq %xmm5,%xmm3
3158
movdqa %xmm7,%xmm5
3159
pmuludq 64(%esp),%xmm7
3160
paddq %xmm6,%xmm2
3161
pmuludq 48(%esp),%xmm5
3162
pshufd $16,48(%edx),%xmm6
3163
paddq %xmm7,%xmm1
3164
movdqa %xmm6,%xmm7
3165
pmuludq 16(%esp),%xmm6
3166
paddq %xmm5,%xmm0
3167
pshufd $16,112(%edx),%xmm5
3168
pmuludq (%esp),%xmm7
3169
paddq %xmm6,%xmm4
3170
movdqa %xmm5,%xmm6
3171
pmuludq 64(%esp),%xmm5
3172
paddq %xmm7,%xmm3
3173
movdqa %xmm6,%xmm7
3174
pmuludq 48(%esp),%xmm6
3175
paddq %xmm5,%xmm2
3176
pmuludq 32(%esp),%xmm7
3177
pshufd $16,64(%edx),%xmm5
3178
paddq %xmm6,%xmm1
3179
pshufd $16,128(%edx),%xmm6
3180
pmuludq (%esp),%xmm5
3181
paddq %xmm7,%xmm0
3182
movdqa %xmm6,%xmm7
3183
pmuludq 64(%esp),%xmm6
3184
paddq %xmm5,%xmm4
3185
movdqa %xmm7,%xmm5
3186
pmuludq 16(%esp),%xmm7
3187
paddq %xmm6,%xmm3
3188
movdqa %xmm5,%xmm6
3189
pmuludq 32(%esp),%xmm5
3190
paddq %xmm7,%xmm0
3191
pmuludq 48(%esp),%xmm6
3192
movdqa 64(%ebx),%xmm7
3193
paddq %xmm5,%xmm1
3194
paddq %xmm6,%xmm2
3195
.L017short_tail:
3196
pshufd $78,%xmm4,%xmm6
3197
pshufd $78,%xmm3,%xmm5
3198
paddq %xmm6,%xmm4
3199
paddq %xmm5,%xmm3
3200
pshufd $78,%xmm0,%xmm6
3201
pshufd $78,%xmm1,%xmm5
3202
paddq %xmm6,%xmm0
3203
paddq %xmm5,%xmm1
3204
pshufd $78,%xmm2,%xmm6
3205
movdqa %xmm3,%xmm5
3206
pand %xmm7,%xmm3
3207
psrlq $26,%xmm5
3208
paddq %xmm6,%xmm2
3209
paddq %xmm4,%xmm5
3210
movdqa %xmm0,%xmm6
3211
pand %xmm7,%xmm0
3212
psrlq $26,%xmm6
3213
movdqa %xmm5,%xmm4
3214
paddq %xmm1,%xmm6
3215
psrlq $26,%xmm5
3216
pand %xmm7,%xmm4
3217
movdqa %xmm6,%xmm1
3218
psrlq $26,%xmm6
3219
paddd %xmm5,%xmm0
3220
psllq $2,%xmm5
3221
paddq %xmm2,%xmm6
3222
paddq %xmm0,%xmm5
3223
pand %xmm7,%xmm1
3224
movdqa %xmm6,%xmm2
3225
psrlq $26,%xmm6
3226
pand %xmm7,%xmm2
3227
paddd %xmm3,%xmm6
3228
movdqa %xmm5,%xmm0
3229
psrlq $26,%xmm5
3230
movdqa %xmm6,%xmm3
3231
psrlq $26,%xmm6
3232
pand %xmm7,%xmm0
3233
paddd %xmm5,%xmm1
3234
pand %xmm7,%xmm3
3235
paddd %xmm6,%xmm4
3236
.L013done:
3237
movd %xmm0,-48(%edi)
3238
movd %xmm1,-44(%edi)
3239
movd %xmm2,-40(%edi)
3240
movd %xmm3,-36(%edi)
3241
movd %xmm4,-32(%edi)
3242
movl %ebp,%esp
3243
.L007nodata:
3244
popl %edi
3245
popl %esi
3246
popl %ebx
3247
popl %ebp
3248
ret
3249
.size _poly1305_blocks_sse2,.-_poly1305_blocks_sse2
3250
.align 32
3251
.type _poly1305_emit_sse2,@function
3252
.align 16
3253
_poly1305_emit_sse2:
3254
#ifdef __CET__
3255
3256
.byte 243,15,30,251
3257
#endif
3258
3259
pushl %ebp
3260
pushl %ebx
3261
pushl %esi
3262
pushl %edi
3263
movl 20(%esp),%ebp
3264
cmpl $0,20(%ebp)
3265
je .Lenter_emit
3266
movl (%ebp),%eax
3267
movl 4(%ebp),%edi
3268
movl 8(%ebp),%ecx
3269
movl 12(%ebp),%edx
3270
movl 16(%ebp),%esi
3271
movl %edi,%ebx
3272
shll $26,%edi
3273
shrl $6,%ebx
3274
addl %edi,%eax
3275
movl %ecx,%edi
3276
adcl $0,%ebx
3277
shll $20,%edi
3278
shrl $12,%ecx
3279
addl %edi,%ebx
3280
movl %edx,%edi
3281
adcl $0,%ecx
3282
shll $14,%edi
3283
shrl $18,%edx
3284
addl %edi,%ecx
3285
movl %esi,%edi
3286
adcl $0,%edx
3287
shll $8,%edi
3288
shrl $24,%esi
3289
addl %edi,%edx
3290
adcl $0,%esi
3291
movl %esi,%edi
3292
andl $3,%esi
3293
shrl $2,%edi
3294
leal (%edi,%edi,4),%ebp
3295
movl 24(%esp),%edi
3296
addl %ebp,%eax
3297
movl 28(%esp),%ebp
3298
adcl $0,%ebx
3299
adcl $0,%ecx
3300
adcl $0,%edx
3301
adcl $0,%esi
3302
movd %eax,%xmm0
3303
addl $5,%eax
3304
movd %ebx,%xmm1
3305
adcl $0,%ebx
3306
movd %ecx,%xmm2
3307
adcl $0,%ecx
3308
movd %edx,%xmm3
3309
adcl $0,%edx
3310
adcl $0,%esi
3311
shrl $2,%esi
3312
negl %esi
3313
andl %esi,%eax
3314
andl %esi,%ebx
3315
andl %esi,%ecx
3316
andl %esi,%edx
3317
movl %eax,(%edi)
3318
movd %xmm0,%eax
3319
movl %ebx,4(%edi)
3320
movd %xmm1,%ebx
3321
movl %ecx,8(%edi)
3322
movd %xmm2,%ecx
3323
movl %edx,12(%edi)
3324
movd %xmm3,%edx
3325
notl %esi
3326
andl %esi,%eax
3327
andl %esi,%ebx
3328
orl (%edi),%eax
3329
andl %esi,%ecx
3330
orl 4(%edi),%ebx
3331
andl %esi,%edx
3332
orl 8(%edi),%ecx
3333
orl 12(%edi),%edx
3334
addl (%ebp),%eax
3335
adcl 4(%ebp),%ebx
3336
movl %eax,(%edi)
3337
adcl 8(%ebp),%ecx
3338
movl %ebx,4(%edi)
3339
adcl 12(%ebp),%edx
3340
movl %ecx,8(%edi)
3341
movl %edx,12(%edi)
3342
popl %edi
3343
popl %esi
3344
popl %ebx
3345
popl %ebp
3346
ret
3347
.size _poly1305_emit_sse2,.-_poly1305_emit_sse2
3348
.align 32
3349
.type _poly1305_init_avx2,@function
3350
.align 16
3351
_poly1305_init_avx2:
3352
#ifdef __CET__
3353
3354
.byte 243,15,30,251
3355
#endif
3356
3357
vmovdqu 24(%edi),%xmm4
3358
leal 48(%edi),%edi
3359
movl %esp,%ebp
3360
subl $224,%esp
3361
andl $-16,%esp
3362
vmovdqa 64(%ebx),%xmm7
3363
vpand %xmm7,%xmm4,%xmm0
3364
vpsrlq $26,%xmm4,%xmm1
3365
vpsrldq $6,%xmm4,%xmm3
3366
vpand %xmm7,%xmm1,%xmm1
3367
vpsrlq $4,%xmm3,%xmm2
3368
vpsrlq $30,%xmm3,%xmm3
3369
vpand %xmm7,%xmm2,%xmm2
3370
vpand %xmm7,%xmm3,%xmm3
3371
vpsrldq $13,%xmm4,%xmm4
3372
leal 144(%esp),%edx
3373
movl $2,%ecx
3374
.L018square:
3375
vmovdqa %xmm0,(%esp)
3376
vmovdqa %xmm1,16(%esp)
3377
vmovdqa %xmm2,32(%esp)
3378
vmovdqa %xmm3,48(%esp)
3379
vmovdqa %xmm4,64(%esp)
3380
vpslld $2,%xmm1,%xmm6
3381
vpslld $2,%xmm2,%xmm5
3382
vpaddd %xmm1,%xmm6,%xmm6
3383
vpaddd %xmm2,%xmm5,%xmm5
3384
vmovdqa %xmm6,80(%esp)
3385
vmovdqa %xmm5,96(%esp)
3386
vpslld $2,%xmm3,%xmm6
3387
vpslld $2,%xmm4,%xmm5
3388
vpaddd %xmm3,%xmm6,%xmm6
3389
vpaddd %xmm4,%xmm5,%xmm5
3390
vmovdqa %xmm6,112(%esp)
3391
vmovdqa %xmm5,128(%esp)
3392
vpshufd $68,%xmm0,%xmm5
3393
vmovdqa %xmm1,%xmm6
3394
vpshufd $68,%xmm1,%xmm1
3395
vpshufd $68,%xmm2,%xmm2
3396
vpshufd $68,%xmm3,%xmm3
3397
vpshufd $68,%xmm4,%xmm4
3398
vmovdqa %xmm5,(%edx)
3399
vmovdqa %xmm1,16(%edx)
3400
vmovdqa %xmm2,32(%edx)
3401
vmovdqa %xmm3,48(%edx)
3402
vmovdqa %xmm4,64(%edx)
3403
vpmuludq %xmm0,%xmm4,%xmm4
3404
vpmuludq %xmm0,%xmm3,%xmm3
3405
vpmuludq %xmm0,%xmm2,%xmm2
3406
vpmuludq %xmm0,%xmm1,%xmm1
3407
vpmuludq %xmm0,%xmm5,%xmm0
3408
vpmuludq 48(%edx),%xmm6,%xmm5
3409
vpaddq %xmm5,%xmm4,%xmm4
3410
vpmuludq 32(%edx),%xmm6,%xmm7
3411
vpaddq %xmm7,%xmm3,%xmm3
3412
vpmuludq 16(%edx),%xmm6,%xmm5
3413
vpaddq %xmm5,%xmm2,%xmm2
3414
vmovdqa 80(%esp),%xmm7
3415
vpmuludq (%edx),%xmm6,%xmm6
3416
vpaddq %xmm6,%xmm1,%xmm1
3417
vmovdqa 32(%esp),%xmm5
3418
vpmuludq 64(%edx),%xmm7,%xmm7
3419
vpaddq %xmm7,%xmm0,%xmm0
3420
vpmuludq 32(%edx),%xmm5,%xmm6
3421
vpaddq %xmm6,%xmm4,%xmm4
3422
vpmuludq 16(%edx),%xmm5,%xmm7
3423
vpaddq %xmm7,%xmm3,%xmm3
3424
vmovdqa 96(%esp),%xmm6
3425
vpmuludq (%edx),%xmm5,%xmm5
3426
vpaddq %xmm5,%xmm2,%xmm2
3427
vpmuludq 64(%edx),%xmm6,%xmm7
3428
vpaddq %xmm7,%xmm1,%xmm1
3429
vmovdqa 48(%esp),%xmm5
3430
vpmuludq 48(%edx),%xmm6,%xmm6
3431
vpaddq %xmm6,%xmm0,%xmm0
3432
vpmuludq 16(%edx),%xmm5,%xmm7
3433
vpaddq %xmm7,%xmm4,%xmm4
3434
vmovdqa 112(%esp),%xmm6
3435
vpmuludq (%edx),%xmm5,%xmm5
3436
vpaddq %xmm5,%xmm3,%xmm3
3437
vpmuludq 64(%edx),%xmm6,%xmm7
3438
vpaddq %xmm7,%xmm2,%xmm2
3439
vpmuludq 48(%edx),%xmm6,%xmm5
3440
vpaddq %xmm5,%xmm1,%xmm1
3441
vmovdqa 64(%esp),%xmm7
3442
vpmuludq 32(%edx),%xmm6,%xmm6
3443
vpaddq %xmm6,%xmm0,%xmm0
3444
vmovdqa 128(%esp),%xmm5
3445
vpmuludq (%edx),%xmm7,%xmm7
3446
vpaddq %xmm7,%xmm4,%xmm4
3447
vpmuludq 64(%edx),%xmm5,%xmm6
3448
vpaddq %xmm6,%xmm3,%xmm3
3449
vpmuludq 16(%edx),%xmm5,%xmm7
3450
vpaddq %xmm7,%xmm0,%xmm0
3451
vpmuludq 32(%edx),%xmm5,%xmm6
3452
vpaddq %xmm6,%xmm1,%xmm1
3453
vmovdqa 64(%ebx),%xmm7
3454
vpmuludq 48(%edx),%xmm5,%xmm5
3455
vpaddq %xmm5,%xmm2,%xmm2
3456
vpsrlq $26,%xmm3,%xmm5
3457
vpand %xmm7,%xmm3,%xmm3
3458
vpsrlq $26,%xmm0,%xmm6
3459
vpand %xmm7,%xmm0,%xmm0
3460
vpaddq %xmm5,%xmm4,%xmm4
3461
vpaddq %xmm6,%xmm1,%xmm1
3462
vpsrlq $26,%xmm4,%xmm5
3463
vpand %xmm7,%xmm4,%xmm4
3464
vpsrlq $26,%xmm1,%xmm6
3465
vpand %xmm7,%xmm1,%xmm1
3466
vpaddq %xmm6,%xmm2,%xmm2
3467
vpaddd %xmm5,%xmm0,%xmm0
3468
vpsllq $2,%xmm5,%xmm5
3469
vpsrlq $26,%xmm2,%xmm6
3470
vpand %xmm7,%xmm2,%xmm2
3471
vpaddd %xmm5,%xmm0,%xmm0
3472
vpaddd %xmm6,%xmm3,%xmm3
3473
vpsrlq $26,%xmm3,%xmm6
3474
vpsrlq $26,%xmm0,%xmm5
3475
vpand %xmm7,%xmm0,%xmm0
3476
vpand %xmm7,%xmm3,%xmm3
3477
vpaddd %xmm5,%xmm1,%xmm1
3478
vpaddd %xmm6,%xmm4,%xmm4
3479
decl %ecx
3480
jz .L019square_break
3481
vpunpcklqdq (%esp),%xmm0,%xmm0
3482
vpunpcklqdq 16(%esp),%xmm1,%xmm1
3483
vpunpcklqdq 32(%esp),%xmm2,%xmm2
3484
vpunpcklqdq 48(%esp),%xmm3,%xmm3
3485
vpunpcklqdq 64(%esp),%xmm4,%xmm4
3486
jmp .L018square
3487
.L019square_break:
3488
vpsllq $32,%xmm0,%xmm0
3489
vpsllq $32,%xmm1,%xmm1
3490
vpsllq $32,%xmm2,%xmm2
3491
vpsllq $32,%xmm3,%xmm3
3492
vpsllq $32,%xmm4,%xmm4
3493
vpor (%esp),%xmm0,%xmm0
3494
vpor 16(%esp),%xmm1,%xmm1
3495
vpor 32(%esp),%xmm2,%xmm2
3496
vpor 48(%esp),%xmm3,%xmm3
3497
vpor 64(%esp),%xmm4,%xmm4
3498
vpshufd $141,%xmm0,%xmm0
3499
vpshufd $141,%xmm1,%xmm1
3500
vpshufd $141,%xmm2,%xmm2
3501
vpshufd $141,%xmm3,%xmm3
3502
vpshufd $141,%xmm4,%xmm4
3503
vmovdqu %xmm0,(%edi)
3504
vmovdqu %xmm1,16(%edi)
3505
vmovdqu %xmm2,32(%edi)
3506
vmovdqu %xmm3,48(%edi)
3507
vmovdqu %xmm4,64(%edi)
3508
vpslld $2,%xmm1,%xmm6
3509
vpslld $2,%xmm2,%xmm5
3510
vpaddd %xmm1,%xmm6,%xmm6
3511
vpaddd %xmm2,%xmm5,%xmm5
3512
vmovdqu %xmm6,80(%edi)
3513
vmovdqu %xmm5,96(%edi)
3514
vpslld $2,%xmm3,%xmm6
3515
vpslld $2,%xmm4,%xmm5
3516
vpaddd %xmm3,%xmm6,%xmm6
3517
vpaddd %xmm4,%xmm5,%xmm5
3518
vmovdqu %xmm6,112(%edi)
3519
vmovdqu %xmm5,128(%edi)
3520
movl %ebp,%esp
3521
leal -48(%edi),%edi
3522
ret
3523
.size _poly1305_init_avx2,.-_poly1305_init_avx2
3524
.align 32
3525
.type _poly1305_blocks_avx2,@function
3526
.align 16
3527
_poly1305_blocks_avx2:
3528
#ifdef __CET__
3529
3530
.byte 243,15,30,251
3531
#endif
3532
3533
pushl %ebp
3534
pushl %ebx
3535
pushl %esi
3536
pushl %edi
3537
movl 20(%esp),%edi
3538
movl 24(%esp),%esi
3539
movl 28(%esp),%ecx
3540
movl 20(%edi),%eax
3541
andl $-16,%ecx
3542
jz .L020nodata
3543
cmpl $64,%ecx
3544
jae .L021enter_avx2
3545
testl %eax,%eax
3546
jz .Lenter_blocks
3547
.L021enter_avx2:
3548
vzeroupper
3549
call .L022pic_point
3550
.L022pic_point:
3551
popl %ebx
3552
leal .Lconst_sse2-.L022pic_point(%ebx),%ebx
3553
testl %eax,%eax
3554
jnz .L023base2_26
3555
call _poly1305_init_avx2
3556
movl (%edi),%eax
3557
movl 3(%edi),%ecx
3558
movl 6(%edi),%edx
3559
movl 9(%edi),%esi
3560
movl 13(%edi),%ebp
3561
shrl $2,%ecx
3562
andl $67108863,%eax
3563
shrl $4,%edx
3564
andl $67108863,%ecx
3565
shrl $6,%esi
3566
andl $67108863,%edx
3567
movl %eax,(%edi)
3568
movl %ecx,4(%edi)
3569
movl %edx,8(%edi)
3570
movl %esi,12(%edi)
3571
movl %ebp,16(%edi)
3572
movl $1,20(%edi)
3573
movl 24(%esp),%esi
3574
movl 28(%esp),%ecx
3575
.L023base2_26:
3576
movl 32(%esp),%eax
3577
movl %esp,%ebp
3578
subl $448,%esp
3579
andl $-512,%esp
3580
vmovdqu 48(%edi),%xmm0
3581
leal 288(%esp),%edx
3582
vmovdqu 64(%edi),%xmm1
3583
vmovdqu 80(%edi),%xmm2
3584
vmovdqu 96(%edi),%xmm3
3585
vmovdqu 112(%edi),%xmm4
3586
leal 48(%edi),%edi
3587
vpermq $64,%ymm0,%ymm0
3588
vpermq $64,%ymm1,%ymm1
3589
vpermq $64,%ymm2,%ymm2
3590
vpermq $64,%ymm3,%ymm3
3591
vpermq $64,%ymm4,%ymm4
3592
vpshufd $200,%ymm0,%ymm0
3593
vpshufd $200,%ymm1,%ymm1
3594
vpshufd $200,%ymm2,%ymm2
3595
vpshufd $200,%ymm3,%ymm3
3596
vpshufd $200,%ymm4,%ymm4
3597
vmovdqa %ymm0,-128(%edx)
3598
vmovdqu 80(%edi),%xmm0
3599
vmovdqa %ymm1,-96(%edx)
3600
vmovdqu 96(%edi),%xmm1
3601
vmovdqa %ymm2,-64(%edx)
3602
vmovdqu 112(%edi),%xmm2
3603
vmovdqa %ymm3,-32(%edx)
3604
vmovdqu 128(%edi),%xmm3
3605
vmovdqa %ymm4,(%edx)
3606
vpermq $64,%ymm0,%ymm0
3607
vpermq $64,%ymm1,%ymm1
3608
vpermq $64,%ymm2,%ymm2
3609
vpermq $64,%ymm3,%ymm3
3610
vpshufd $200,%ymm0,%ymm0
3611
vpshufd $200,%ymm1,%ymm1
3612
vpshufd $200,%ymm2,%ymm2
3613
vpshufd $200,%ymm3,%ymm3
3614
vmovdqa %ymm0,32(%edx)
3615
vmovd -48(%edi),%xmm0
3616
vmovdqa %ymm1,64(%edx)
3617
vmovd -44(%edi),%xmm1
3618
vmovdqa %ymm2,96(%edx)
3619
vmovd -40(%edi),%xmm2
3620
vmovdqa %ymm3,128(%edx)
3621
vmovd -36(%edi),%xmm3
3622
vmovd -32(%edi),%xmm4
3623
vmovdqa 64(%ebx),%ymm7
3624
negl %eax
3625
testl $63,%ecx
3626
jz .L024even
3627
movl %ecx,%edx
3628
andl $-64,%ecx
3629
andl $63,%edx
3630
vmovdqu (%esi),%xmm5
3631
cmpl $32,%edx
3632
jb .L025one
3633
vmovdqu 16(%esi),%xmm6
3634
je .L026two
3635
vinserti128 $1,32(%esi),%ymm5,%ymm5
3636
leal 48(%esi),%esi
3637
leal 8(%ebx),%ebx
3638
leal 296(%esp),%edx
3639
jmp .L027tail
3640
.L026two:
3641
leal 32(%esi),%esi
3642
leal 16(%ebx),%ebx
3643
leal 304(%esp),%edx
3644
jmp .L027tail
3645
.L025one:
3646
leal 16(%esi),%esi
3647
vpxor %ymm6,%ymm6,%ymm6
3648
leal 32(%ebx,%eax,8),%ebx
3649
leal 312(%esp),%edx
3650
jmp .L027tail
3651
.align 32
3652
.L024even:
3653
vmovdqu (%esi),%xmm5
3654
vmovdqu 16(%esi),%xmm6
3655
vinserti128 $1,32(%esi),%ymm5,%ymm5
3656
vinserti128 $1,48(%esi),%ymm6,%ymm6
3657
leal 64(%esi),%esi
3658
subl $64,%ecx
3659
jz .L027tail
3660
.L028loop:
3661
vmovdqa %ymm2,64(%esp)
3662
vpsrldq $6,%ymm5,%ymm2
3663
vmovdqa %ymm0,(%esp)
3664
vpsrldq $6,%ymm6,%ymm0
3665
vmovdqa %ymm1,32(%esp)
3666
vpunpckhqdq %ymm6,%ymm5,%ymm1
3667
vpunpcklqdq %ymm6,%ymm5,%ymm5
3668
vpunpcklqdq %ymm0,%ymm2,%ymm2
3669
vpsrlq $30,%ymm2,%ymm0
3670
vpsrlq $4,%ymm2,%ymm2
3671
vpsrlq $26,%ymm5,%ymm6
3672
vpsrlq $40,%ymm1,%ymm1
3673
vpand %ymm7,%ymm2,%ymm2
3674
vpand %ymm7,%ymm5,%ymm5
3675
vpand %ymm7,%ymm6,%ymm6
3676
vpand %ymm7,%ymm0,%ymm0
3677
vpor (%ebx),%ymm1,%ymm1
3678
vpaddq 64(%esp),%ymm2,%ymm2
3679
vpaddq (%esp),%ymm5,%ymm5
3680
vpaddq 32(%esp),%ymm6,%ymm6
3681
vpaddq %ymm3,%ymm0,%ymm0
3682
vpaddq %ymm4,%ymm1,%ymm1
3683
vpmuludq -96(%edx),%ymm2,%ymm3
3684
vmovdqa %ymm6,32(%esp)
3685
vpmuludq -64(%edx),%ymm2,%ymm4
3686
vmovdqa %ymm0,96(%esp)
3687
vpmuludq 96(%edx),%ymm2,%ymm0
3688
vmovdqa %ymm1,128(%esp)
3689
vpmuludq 128(%edx),%ymm2,%ymm1
3690
vpmuludq -128(%edx),%ymm2,%ymm2
3691
vpmuludq -32(%edx),%ymm5,%ymm7
3692
vpaddq %ymm7,%ymm3,%ymm3
3693
vpmuludq (%edx),%ymm5,%ymm6
3694
vpaddq %ymm6,%ymm4,%ymm4
3695
vpmuludq -128(%edx),%ymm5,%ymm7
3696
vpaddq %ymm7,%ymm0,%ymm0
3697
vmovdqa 32(%esp),%ymm7
3698
vpmuludq -96(%edx),%ymm5,%ymm6
3699
vpaddq %ymm6,%ymm1,%ymm1
3700
vpmuludq -64(%edx),%ymm5,%ymm5
3701
vpaddq %ymm5,%ymm2,%ymm2
3702
vpmuludq -64(%edx),%ymm7,%ymm6
3703
vpaddq %ymm6,%ymm3,%ymm3
3704
vpmuludq -32(%edx),%ymm7,%ymm5
3705
vpaddq %ymm5,%ymm4,%ymm4
3706
vpmuludq 128(%edx),%ymm7,%ymm6
3707
vpaddq %ymm6,%ymm0,%ymm0
3708
vmovdqa 96(%esp),%ymm6
3709
vpmuludq -128(%edx),%ymm7,%ymm5
3710
vpaddq %ymm5,%ymm1,%ymm1
3711
vpmuludq -96(%edx),%ymm7,%ymm7
3712
vpaddq %ymm7,%ymm2,%ymm2
3713
vpmuludq -128(%edx),%ymm6,%ymm5
3714
vpaddq %ymm5,%ymm3,%ymm3
3715
vpmuludq -96(%edx),%ymm6,%ymm7
3716
vpaddq %ymm7,%ymm4,%ymm4
3717
vpmuludq 64(%edx),%ymm6,%ymm5
3718
vpaddq %ymm5,%ymm0,%ymm0
3719
vmovdqa 128(%esp),%ymm5
3720
vpmuludq 96(%edx),%ymm6,%ymm7
3721
vpaddq %ymm7,%ymm1,%ymm1
3722
vpmuludq 128(%edx),%ymm6,%ymm6
3723
vpaddq %ymm6,%ymm2,%ymm2
3724
vpmuludq 128(%edx),%ymm5,%ymm7
3725
vpaddq %ymm7,%ymm3,%ymm3
3726
vpmuludq 32(%edx),%ymm5,%ymm6
3727
vpaddq %ymm6,%ymm0,%ymm0
3728
vpmuludq -128(%edx),%ymm5,%ymm7
3729
vpaddq %ymm7,%ymm4,%ymm4
3730
vmovdqa 64(%ebx),%ymm7
3731
vpmuludq 64(%edx),%ymm5,%ymm6
3732
vpaddq %ymm6,%ymm1,%ymm1
3733
vpmuludq 96(%edx),%ymm5,%ymm5
3734
vpaddq %ymm5,%ymm2,%ymm2
3735
vpsrlq $26,%ymm3,%ymm5
3736
vpand %ymm7,%ymm3,%ymm3
3737
vpsrlq $26,%ymm0,%ymm6
3738
vpand %ymm7,%ymm0,%ymm0
3739
vpaddq %ymm5,%ymm4,%ymm4
3740
vpaddq %ymm6,%ymm1,%ymm1
3741
vpsrlq $26,%ymm4,%ymm5
3742
vpand %ymm7,%ymm4,%ymm4
3743
vpsrlq $26,%ymm1,%ymm6
3744
vpand %ymm7,%ymm1,%ymm1
3745
vpaddq %ymm6,%ymm2,%ymm2
3746
vpaddq %ymm5,%ymm0,%ymm0
3747
vpsllq $2,%ymm5,%ymm5
3748
vpsrlq $26,%ymm2,%ymm6
3749
vpand %ymm7,%ymm2,%ymm2
3750
vpaddq %ymm5,%ymm0,%ymm0
3751
vpaddq %ymm6,%ymm3,%ymm3
3752
vpsrlq $26,%ymm3,%ymm6
3753
vpsrlq $26,%ymm0,%ymm5
3754
vpand %ymm7,%ymm0,%ymm0
3755
vpand %ymm7,%ymm3,%ymm3
3756
vpaddq %ymm5,%ymm1,%ymm1
3757
vpaddq %ymm6,%ymm4,%ymm4
3758
vmovdqu (%esi),%xmm5
3759
vmovdqu 16(%esi),%xmm6
3760
vinserti128 $1,32(%esi),%ymm5,%ymm5
3761
vinserti128 $1,48(%esi),%ymm6,%ymm6
3762
leal 64(%esi),%esi
3763
subl $64,%ecx
3764
jnz .L028loop
3765
.L027tail:
3766
vmovdqa %ymm2,64(%esp)
3767
vpsrldq $6,%ymm5,%ymm2
3768
vmovdqa %ymm0,(%esp)
3769
vpsrldq $6,%ymm6,%ymm0
3770
vmovdqa %ymm1,32(%esp)
3771
vpunpckhqdq %ymm6,%ymm5,%ymm1
3772
vpunpcklqdq %ymm6,%ymm5,%ymm5
3773
vpunpcklqdq %ymm0,%ymm2,%ymm2
3774
vpsrlq $30,%ymm2,%ymm0
3775
vpsrlq $4,%ymm2,%ymm2
3776
vpsrlq $26,%ymm5,%ymm6
3777
vpsrlq $40,%ymm1,%ymm1
3778
vpand %ymm7,%ymm2,%ymm2
3779
vpand %ymm7,%ymm5,%ymm5
3780
vpand %ymm7,%ymm6,%ymm6
3781
vpand %ymm7,%ymm0,%ymm0
3782
vpor (%ebx),%ymm1,%ymm1
3783
andl $-64,%ebx
3784
vpaddq 64(%esp),%ymm2,%ymm2
3785
vpaddq (%esp),%ymm5,%ymm5
3786
vpaddq 32(%esp),%ymm6,%ymm6
3787
vpaddq %ymm3,%ymm0,%ymm0
3788
vpaddq %ymm4,%ymm1,%ymm1
3789
vpmuludq -92(%edx),%ymm2,%ymm3
3790
vmovdqa %ymm6,32(%esp)
3791
vpmuludq -60(%edx),%ymm2,%ymm4
3792
vmovdqa %ymm0,96(%esp)
3793
vpmuludq 100(%edx),%ymm2,%ymm0
3794
vmovdqa %ymm1,128(%esp)
3795
vpmuludq 132(%edx),%ymm2,%ymm1
3796
vpmuludq -124(%edx),%ymm2,%ymm2
3797
vpmuludq -28(%edx),%ymm5,%ymm7
3798
vpaddq %ymm7,%ymm3,%ymm3
3799
vpmuludq 4(%edx),%ymm5,%ymm6
3800
vpaddq %ymm6,%ymm4,%ymm4
3801
vpmuludq -124(%edx),%ymm5,%ymm7
3802
vpaddq %ymm7,%ymm0,%ymm0
3803
vmovdqa 32(%esp),%ymm7
3804
vpmuludq -92(%edx),%ymm5,%ymm6
3805
vpaddq %ymm6,%ymm1,%ymm1
3806
vpmuludq -60(%edx),%ymm5,%ymm5
3807
vpaddq %ymm5,%ymm2,%ymm2
3808
vpmuludq -60(%edx),%ymm7,%ymm6
3809
vpaddq %ymm6,%ymm3,%ymm3
3810
vpmuludq -28(%edx),%ymm7,%ymm5
3811
vpaddq %ymm5,%ymm4,%ymm4
3812
vpmuludq 132(%edx),%ymm7,%ymm6
3813
vpaddq %ymm6,%ymm0,%ymm0
3814
vmovdqa 96(%esp),%ymm6
3815
vpmuludq -124(%edx),%ymm7,%ymm5
3816
vpaddq %ymm5,%ymm1,%ymm1
3817
vpmuludq -92(%edx),%ymm7,%ymm7
3818
vpaddq %ymm7,%ymm2,%ymm2
3819
vpmuludq -124(%edx),%ymm6,%ymm5
3820
vpaddq %ymm5,%ymm3,%ymm3
3821
vpmuludq -92(%edx),%ymm6,%ymm7
3822
vpaddq %ymm7,%ymm4,%ymm4
3823
vpmuludq 68(%edx),%ymm6,%ymm5
3824
vpaddq %ymm5,%ymm0,%ymm0
3825
vmovdqa 128(%esp),%ymm5
3826
vpmuludq 100(%edx),%ymm6,%ymm7
3827
vpaddq %ymm7,%ymm1,%ymm1
3828
vpmuludq 132(%edx),%ymm6,%ymm6
3829
vpaddq %ymm6,%ymm2,%ymm2
3830
vpmuludq 132(%edx),%ymm5,%ymm7
3831
vpaddq %ymm7,%ymm3,%ymm3
3832
vpmuludq 36(%edx),%ymm5,%ymm6
3833
vpaddq %ymm6,%ymm0,%ymm0
3834
vpmuludq -124(%edx),%ymm5,%ymm7
3835
vpaddq %ymm7,%ymm4,%ymm4
3836
vmovdqa 64(%ebx),%ymm7
3837
vpmuludq 68(%edx),%ymm5,%ymm6
3838
vpaddq %ymm6,%ymm1,%ymm1
3839
vpmuludq 100(%edx),%ymm5,%ymm5
3840
vpaddq %ymm5,%ymm2,%ymm2
3841
vpsrldq $8,%ymm4,%ymm5
3842
vpsrldq $8,%ymm3,%ymm6
3843
vpaddq %ymm5,%ymm4,%ymm4
3844
vpsrldq $8,%ymm0,%ymm5
3845
vpaddq %ymm6,%ymm3,%ymm3
3846
vpsrldq $8,%ymm1,%ymm6
3847
vpaddq %ymm5,%ymm0,%ymm0
3848
vpsrldq $8,%ymm2,%ymm5
3849
vpaddq %ymm6,%ymm1,%ymm1
3850
vpermq $2,%ymm4,%ymm6
3851
vpaddq %ymm5,%ymm2,%ymm2
3852
vpermq $2,%ymm3,%ymm5
3853
vpaddq %ymm6,%ymm4,%ymm4
3854
vpermq $2,%ymm0,%ymm6
3855
vpaddq %ymm5,%ymm3,%ymm3
3856
vpermq $2,%ymm1,%ymm5
3857
vpaddq %ymm6,%ymm0,%ymm0
3858
vpermq $2,%ymm2,%ymm6
3859
vpaddq %ymm5,%ymm1,%ymm1
3860
vpaddq %ymm6,%ymm2,%ymm2
3861
vpsrlq $26,%ymm3,%ymm5
3862
vpand %ymm7,%ymm3,%ymm3
3863
vpsrlq $26,%ymm0,%ymm6
3864
vpand %ymm7,%ymm0,%ymm0
3865
vpaddq %ymm5,%ymm4,%ymm4
3866
vpaddq %ymm6,%ymm1,%ymm1
3867
vpsrlq $26,%ymm4,%ymm5
3868
vpand %ymm7,%ymm4,%ymm4
3869
vpsrlq $26,%ymm1,%ymm6
3870
vpand %ymm7,%ymm1,%ymm1
3871
vpaddq %ymm6,%ymm2,%ymm2
3872
vpaddq %ymm5,%ymm0,%ymm0
3873
vpsllq $2,%ymm5,%ymm5
3874
vpsrlq $26,%ymm2,%ymm6
3875
vpand %ymm7,%ymm2,%ymm2
3876
vpaddq %ymm5,%ymm0,%ymm0
3877
vpaddq %ymm6,%ymm3,%ymm3
3878
vpsrlq $26,%ymm3,%ymm6
3879
vpsrlq $26,%ymm0,%ymm5
3880
vpand %ymm7,%ymm0,%ymm0
3881
vpand %ymm7,%ymm3,%ymm3
3882
vpaddq %ymm5,%ymm1,%ymm1
3883
vpaddq %ymm6,%ymm4,%ymm4
3884
cmpl $0,%ecx
3885
je .L029done
3886
vpshufd $252,%xmm0,%xmm0
3887
leal 288(%esp),%edx
3888
vpshufd $252,%xmm1,%xmm1
3889
vpshufd $252,%xmm2,%xmm2
3890
vpshufd $252,%xmm3,%xmm3
3891
vpshufd $252,%xmm4,%xmm4
3892
jmp .L024even
3893
.align 16
3894
.L029done:
3895
vmovd %xmm0,-48(%edi)
3896
vmovd %xmm1,-44(%edi)
3897
vmovd %xmm2,-40(%edi)
3898
vmovd %xmm3,-36(%edi)
3899
vmovd %xmm4,-32(%edi)
3900
vzeroupper
3901
movl %ebp,%esp
3902
.L020nodata:
3903
popl %edi
3904
popl %esi
3905
popl %ebx
3906
popl %ebp
3907
ret
3908
.size _poly1305_blocks_avx2,.-_poly1305_blocks_avx2
3909
.align 64
3910
.Lconst_sse2:
3911
.long 16777216,0,16777216,0,16777216,0,16777216,0
3912
.long 0,0,0,0,0,0,0,0
3913
.long 67108863,0,67108863,0,67108863,0,67108863,0
3914
.long 268435455,268435452,268435452,268435452
3915
.byte 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54
3916
.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
3917
.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
3918
.byte 114,103,62,0
3919
.align 4
3920
.comm OPENSSL_ia32cap_P,40,4
3921
3922
.section ".note.gnu.property", "a"
3923
.p2align 2
3924
.long 1f - 0f
3925
.long 4f - 1f
3926
.long 5
3927
0:
3928
.asciz "GNU"
3929
1:
3930
.p2align 2
3931
.long 0xc0000002
3932
.long 3f - 2f
3933
2:
3934
.long 3
3935
3:
3936
.p2align 2
3937
4:
3938
#endif
3939
3940