Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/amd64/ghash-x86_64.S
39482 views
1
/* Do not modify. This file is auto-generated from ghash-x86_64.pl. */
2
.text
3
4
5
.globl gcm_gmult_4bit
6
.type gcm_gmult_4bit,@function
7
.align 16
8
gcm_gmult_4bit:
9
.cfi_startproc
10
.byte 243,15,30,250
11
pushq %rbx
12
.cfi_adjust_cfa_offset 8
13
.cfi_offset %rbx,-16
14
pushq %rbp
15
.cfi_adjust_cfa_offset 8
16
.cfi_offset %rbp,-24
17
pushq %r12
18
.cfi_adjust_cfa_offset 8
19
.cfi_offset %r12,-32
20
pushq %r13
21
.cfi_adjust_cfa_offset 8
22
.cfi_offset %r13,-40
23
pushq %r14
24
.cfi_adjust_cfa_offset 8
25
.cfi_offset %r14,-48
26
pushq %r15
27
.cfi_adjust_cfa_offset 8
28
.cfi_offset %r15,-56
29
subq $280,%rsp
30
.cfi_adjust_cfa_offset 280
31
.Lgmult_prologue:
32
33
movzbq 15(%rdi),%r8
34
leaq .Lrem_4bit(%rip),%r11
35
xorq %rax,%rax
36
xorq %rbx,%rbx
37
movb %r8b,%al
38
movb %r8b,%bl
39
shlb $4,%al
40
movq $14,%rcx
41
movq 8(%rsi,%rax,1),%r8
42
movq (%rsi,%rax,1),%r9
43
andb $0xf0,%bl
44
movq %r8,%rdx
45
jmp .Loop1
46
47
.align 16
48
.Loop1:
49
shrq $4,%r8
50
andq $0xf,%rdx
51
movq %r9,%r10
52
movb (%rdi,%rcx,1),%al
53
shrq $4,%r9
54
xorq 8(%rsi,%rbx,1),%r8
55
shlq $60,%r10
56
xorq (%rsi,%rbx,1),%r9
57
movb %al,%bl
58
xorq (%r11,%rdx,8),%r9
59
movq %r8,%rdx
60
shlb $4,%al
61
xorq %r10,%r8
62
decq %rcx
63
js .Lbreak1
64
65
shrq $4,%r8
66
andq $0xf,%rdx
67
movq %r9,%r10
68
shrq $4,%r9
69
xorq 8(%rsi,%rax,1),%r8
70
shlq $60,%r10
71
xorq (%rsi,%rax,1),%r9
72
andb $0xf0,%bl
73
xorq (%r11,%rdx,8),%r9
74
movq %r8,%rdx
75
xorq %r10,%r8
76
jmp .Loop1
77
78
.align 16
79
.Lbreak1:
80
shrq $4,%r8
81
andq $0xf,%rdx
82
movq %r9,%r10
83
shrq $4,%r9
84
xorq 8(%rsi,%rax,1),%r8
85
shlq $60,%r10
86
xorq (%rsi,%rax,1),%r9
87
andb $0xf0,%bl
88
xorq (%r11,%rdx,8),%r9
89
movq %r8,%rdx
90
xorq %r10,%r8
91
92
shrq $4,%r8
93
andq $0xf,%rdx
94
movq %r9,%r10
95
shrq $4,%r9
96
xorq 8(%rsi,%rbx,1),%r8
97
shlq $60,%r10
98
xorq (%rsi,%rbx,1),%r9
99
xorq %r10,%r8
100
xorq (%r11,%rdx,8),%r9
101
102
bswapq %r8
103
bswapq %r9
104
movq %r8,8(%rdi)
105
movq %r9,(%rdi)
106
107
leaq 280+48(%rsp),%rsi
108
.cfi_def_cfa %rsi,8
109
movq -8(%rsi),%rbx
110
.cfi_restore %rbx
111
leaq (%rsi),%rsp
112
.cfi_def_cfa_register %rsp
113
.Lgmult_epilogue:
114
.byte 0xf3,0xc3
115
.cfi_endproc
116
.size gcm_gmult_4bit,.-gcm_gmult_4bit
117
.globl gcm_ghash_4bit
118
.type gcm_ghash_4bit,@function
119
.align 16
120
gcm_ghash_4bit:
121
.cfi_startproc
122
.byte 243,15,30,250
123
pushq %rbx
124
.cfi_adjust_cfa_offset 8
125
.cfi_offset %rbx,-16
126
pushq %rbp
127
.cfi_adjust_cfa_offset 8
128
.cfi_offset %rbp,-24
129
pushq %r12
130
.cfi_adjust_cfa_offset 8
131
.cfi_offset %r12,-32
132
pushq %r13
133
.cfi_adjust_cfa_offset 8
134
.cfi_offset %r13,-40
135
pushq %r14
136
.cfi_adjust_cfa_offset 8
137
.cfi_offset %r14,-48
138
pushq %r15
139
.cfi_adjust_cfa_offset 8
140
.cfi_offset %r15,-56
141
subq $280,%rsp
142
.cfi_adjust_cfa_offset 280
143
.Lghash_prologue:
144
movq %rdx,%r14
145
movq %rcx,%r15
146
subq $-128,%rsi
147
leaq 16+128(%rsp),%rbp
148
xorl %edx,%edx
149
movq 0+0-128(%rsi),%r8
150
movq 0+8-128(%rsi),%rax
151
movb %al,%dl
152
shrq $4,%rax
153
movq %r8,%r10
154
shrq $4,%r8
155
movq 16+0-128(%rsi),%r9
156
shlb $4,%dl
157
movq 16+8-128(%rsi),%rbx
158
shlq $60,%r10
159
movb %dl,0(%rsp)
160
orq %r10,%rax
161
movb %bl,%dl
162
shrq $4,%rbx
163
movq %r9,%r10
164
shrq $4,%r9
165
movq %r8,0(%rbp)
166
movq 32+0-128(%rsi),%r8
167
shlb $4,%dl
168
movq %rax,0-128(%rbp)
169
movq 32+8-128(%rsi),%rax
170
shlq $60,%r10
171
movb %dl,1(%rsp)
172
orq %r10,%rbx
173
movb %al,%dl
174
shrq $4,%rax
175
movq %r8,%r10
176
shrq $4,%r8
177
movq %r9,8(%rbp)
178
movq 48+0-128(%rsi),%r9
179
shlb $4,%dl
180
movq %rbx,8-128(%rbp)
181
movq 48+8-128(%rsi),%rbx
182
shlq $60,%r10
183
movb %dl,2(%rsp)
184
orq %r10,%rax
185
movb %bl,%dl
186
shrq $4,%rbx
187
movq %r9,%r10
188
shrq $4,%r9
189
movq %r8,16(%rbp)
190
movq 64+0-128(%rsi),%r8
191
shlb $4,%dl
192
movq %rax,16-128(%rbp)
193
movq 64+8-128(%rsi),%rax
194
shlq $60,%r10
195
movb %dl,3(%rsp)
196
orq %r10,%rbx
197
movb %al,%dl
198
shrq $4,%rax
199
movq %r8,%r10
200
shrq $4,%r8
201
movq %r9,24(%rbp)
202
movq 80+0-128(%rsi),%r9
203
shlb $4,%dl
204
movq %rbx,24-128(%rbp)
205
movq 80+8-128(%rsi),%rbx
206
shlq $60,%r10
207
movb %dl,4(%rsp)
208
orq %r10,%rax
209
movb %bl,%dl
210
shrq $4,%rbx
211
movq %r9,%r10
212
shrq $4,%r9
213
movq %r8,32(%rbp)
214
movq 96+0-128(%rsi),%r8
215
shlb $4,%dl
216
movq %rax,32-128(%rbp)
217
movq 96+8-128(%rsi),%rax
218
shlq $60,%r10
219
movb %dl,5(%rsp)
220
orq %r10,%rbx
221
movb %al,%dl
222
shrq $4,%rax
223
movq %r8,%r10
224
shrq $4,%r8
225
movq %r9,40(%rbp)
226
movq 112+0-128(%rsi),%r9
227
shlb $4,%dl
228
movq %rbx,40-128(%rbp)
229
movq 112+8-128(%rsi),%rbx
230
shlq $60,%r10
231
movb %dl,6(%rsp)
232
orq %r10,%rax
233
movb %bl,%dl
234
shrq $4,%rbx
235
movq %r9,%r10
236
shrq $4,%r9
237
movq %r8,48(%rbp)
238
movq 128+0-128(%rsi),%r8
239
shlb $4,%dl
240
movq %rax,48-128(%rbp)
241
movq 128+8-128(%rsi),%rax
242
shlq $60,%r10
243
movb %dl,7(%rsp)
244
orq %r10,%rbx
245
movb %al,%dl
246
shrq $4,%rax
247
movq %r8,%r10
248
shrq $4,%r8
249
movq %r9,56(%rbp)
250
movq 144+0-128(%rsi),%r9
251
shlb $4,%dl
252
movq %rbx,56-128(%rbp)
253
movq 144+8-128(%rsi),%rbx
254
shlq $60,%r10
255
movb %dl,8(%rsp)
256
orq %r10,%rax
257
movb %bl,%dl
258
shrq $4,%rbx
259
movq %r9,%r10
260
shrq $4,%r9
261
movq %r8,64(%rbp)
262
movq 160+0-128(%rsi),%r8
263
shlb $4,%dl
264
movq %rax,64-128(%rbp)
265
movq 160+8-128(%rsi),%rax
266
shlq $60,%r10
267
movb %dl,9(%rsp)
268
orq %r10,%rbx
269
movb %al,%dl
270
shrq $4,%rax
271
movq %r8,%r10
272
shrq $4,%r8
273
movq %r9,72(%rbp)
274
movq 176+0-128(%rsi),%r9
275
shlb $4,%dl
276
movq %rbx,72-128(%rbp)
277
movq 176+8-128(%rsi),%rbx
278
shlq $60,%r10
279
movb %dl,10(%rsp)
280
orq %r10,%rax
281
movb %bl,%dl
282
shrq $4,%rbx
283
movq %r9,%r10
284
shrq $4,%r9
285
movq %r8,80(%rbp)
286
movq 192+0-128(%rsi),%r8
287
shlb $4,%dl
288
movq %rax,80-128(%rbp)
289
movq 192+8-128(%rsi),%rax
290
shlq $60,%r10
291
movb %dl,11(%rsp)
292
orq %r10,%rbx
293
movb %al,%dl
294
shrq $4,%rax
295
movq %r8,%r10
296
shrq $4,%r8
297
movq %r9,88(%rbp)
298
movq 208+0-128(%rsi),%r9
299
shlb $4,%dl
300
movq %rbx,88-128(%rbp)
301
movq 208+8-128(%rsi),%rbx
302
shlq $60,%r10
303
movb %dl,12(%rsp)
304
orq %r10,%rax
305
movb %bl,%dl
306
shrq $4,%rbx
307
movq %r9,%r10
308
shrq $4,%r9
309
movq %r8,96(%rbp)
310
movq 224+0-128(%rsi),%r8
311
shlb $4,%dl
312
movq %rax,96-128(%rbp)
313
movq 224+8-128(%rsi),%rax
314
shlq $60,%r10
315
movb %dl,13(%rsp)
316
orq %r10,%rbx
317
movb %al,%dl
318
shrq $4,%rax
319
movq %r8,%r10
320
shrq $4,%r8
321
movq %r9,104(%rbp)
322
movq 240+0-128(%rsi),%r9
323
shlb $4,%dl
324
movq %rbx,104-128(%rbp)
325
movq 240+8-128(%rsi),%rbx
326
shlq $60,%r10
327
movb %dl,14(%rsp)
328
orq %r10,%rax
329
movb %bl,%dl
330
shrq $4,%rbx
331
movq %r9,%r10
332
shrq $4,%r9
333
movq %r8,112(%rbp)
334
shlb $4,%dl
335
movq %rax,112-128(%rbp)
336
shlq $60,%r10
337
movb %dl,15(%rsp)
338
orq %r10,%rbx
339
movq %r9,120(%rbp)
340
movq %rbx,120-128(%rbp)
341
addq $-128,%rsi
342
movq 8(%rdi),%r8
343
movq 0(%rdi),%r9
344
addq %r14,%r15
345
leaq .Lrem_8bit(%rip),%r11
346
jmp .Louter_loop
347
.align 16
348
.Louter_loop:
349
xorq (%r14),%r9
350
movq 8(%r14),%rdx
351
leaq 16(%r14),%r14
352
xorq %r8,%rdx
353
movq %r9,(%rdi)
354
movq %rdx,8(%rdi)
355
shrq $32,%rdx
356
xorq %rax,%rax
357
roll $8,%edx
358
movb %dl,%al
359
movzbl %dl,%ebx
360
shlb $4,%al
361
shrl $4,%ebx
362
roll $8,%edx
363
movq 8(%rsi,%rax,1),%r8
364
movq (%rsi,%rax,1),%r9
365
movb %dl,%al
366
movzbl %dl,%ecx
367
shlb $4,%al
368
movzbq (%rsp,%rbx,1),%r12
369
shrl $4,%ecx
370
xorq %r8,%r12
371
movq %r9,%r10
372
shrq $8,%r8
373
movzbq %r12b,%r12
374
shrq $8,%r9
375
xorq -128(%rbp,%rbx,8),%r8
376
shlq $56,%r10
377
xorq (%rbp,%rbx,8),%r9
378
roll $8,%edx
379
xorq 8(%rsi,%rax,1),%r8
380
xorq (%rsi,%rax,1),%r9
381
movb %dl,%al
382
xorq %r10,%r8
383
movzwq (%r11,%r12,2),%r12
384
movzbl %dl,%ebx
385
shlb $4,%al
386
movzbq (%rsp,%rcx,1),%r13
387
shrl $4,%ebx
388
shlq $48,%r12
389
xorq %r8,%r13
390
movq %r9,%r10
391
xorq %r12,%r9
392
shrq $8,%r8
393
movzbq %r13b,%r13
394
shrq $8,%r9
395
xorq -128(%rbp,%rcx,8),%r8
396
shlq $56,%r10
397
xorq (%rbp,%rcx,8),%r9
398
roll $8,%edx
399
xorq 8(%rsi,%rax,1),%r8
400
xorq (%rsi,%rax,1),%r9
401
movb %dl,%al
402
xorq %r10,%r8
403
movzwq (%r11,%r13,2),%r13
404
movzbl %dl,%ecx
405
shlb $4,%al
406
movzbq (%rsp,%rbx,1),%r12
407
shrl $4,%ecx
408
shlq $48,%r13
409
xorq %r8,%r12
410
movq %r9,%r10
411
xorq %r13,%r9
412
shrq $8,%r8
413
movzbq %r12b,%r12
414
movl 8(%rdi),%edx
415
shrq $8,%r9
416
xorq -128(%rbp,%rbx,8),%r8
417
shlq $56,%r10
418
xorq (%rbp,%rbx,8),%r9
419
roll $8,%edx
420
xorq 8(%rsi,%rax,1),%r8
421
xorq (%rsi,%rax,1),%r9
422
movb %dl,%al
423
xorq %r10,%r8
424
movzwq (%r11,%r12,2),%r12
425
movzbl %dl,%ebx
426
shlb $4,%al
427
movzbq (%rsp,%rcx,1),%r13
428
shrl $4,%ebx
429
shlq $48,%r12
430
xorq %r8,%r13
431
movq %r9,%r10
432
xorq %r12,%r9
433
shrq $8,%r8
434
movzbq %r13b,%r13
435
shrq $8,%r9
436
xorq -128(%rbp,%rcx,8),%r8
437
shlq $56,%r10
438
xorq (%rbp,%rcx,8),%r9
439
roll $8,%edx
440
xorq 8(%rsi,%rax,1),%r8
441
xorq (%rsi,%rax,1),%r9
442
movb %dl,%al
443
xorq %r10,%r8
444
movzwq (%r11,%r13,2),%r13
445
movzbl %dl,%ecx
446
shlb $4,%al
447
movzbq (%rsp,%rbx,1),%r12
448
shrl $4,%ecx
449
shlq $48,%r13
450
xorq %r8,%r12
451
movq %r9,%r10
452
xorq %r13,%r9
453
shrq $8,%r8
454
movzbq %r12b,%r12
455
shrq $8,%r9
456
xorq -128(%rbp,%rbx,8),%r8
457
shlq $56,%r10
458
xorq (%rbp,%rbx,8),%r9
459
roll $8,%edx
460
xorq 8(%rsi,%rax,1),%r8
461
xorq (%rsi,%rax,1),%r9
462
movb %dl,%al
463
xorq %r10,%r8
464
movzwq (%r11,%r12,2),%r12
465
movzbl %dl,%ebx
466
shlb $4,%al
467
movzbq (%rsp,%rcx,1),%r13
468
shrl $4,%ebx
469
shlq $48,%r12
470
xorq %r8,%r13
471
movq %r9,%r10
472
xorq %r12,%r9
473
shrq $8,%r8
474
movzbq %r13b,%r13
475
shrq $8,%r9
476
xorq -128(%rbp,%rcx,8),%r8
477
shlq $56,%r10
478
xorq (%rbp,%rcx,8),%r9
479
roll $8,%edx
480
xorq 8(%rsi,%rax,1),%r8
481
xorq (%rsi,%rax,1),%r9
482
movb %dl,%al
483
xorq %r10,%r8
484
movzwq (%r11,%r13,2),%r13
485
movzbl %dl,%ecx
486
shlb $4,%al
487
movzbq (%rsp,%rbx,1),%r12
488
shrl $4,%ecx
489
shlq $48,%r13
490
xorq %r8,%r12
491
movq %r9,%r10
492
xorq %r13,%r9
493
shrq $8,%r8
494
movzbq %r12b,%r12
495
movl 4(%rdi),%edx
496
shrq $8,%r9
497
xorq -128(%rbp,%rbx,8),%r8
498
shlq $56,%r10
499
xorq (%rbp,%rbx,8),%r9
500
roll $8,%edx
501
xorq 8(%rsi,%rax,1),%r8
502
xorq (%rsi,%rax,1),%r9
503
movb %dl,%al
504
xorq %r10,%r8
505
movzwq (%r11,%r12,2),%r12
506
movzbl %dl,%ebx
507
shlb $4,%al
508
movzbq (%rsp,%rcx,1),%r13
509
shrl $4,%ebx
510
shlq $48,%r12
511
xorq %r8,%r13
512
movq %r9,%r10
513
xorq %r12,%r9
514
shrq $8,%r8
515
movzbq %r13b,%r13
516
shrq $8,%r9
517
xorq -128(%rbp,%rcx,8),%r8
518
shlq $56,%r10
519
xorq (%rbp,%rcx,8),%r9
520
roll $8,%edx
521
xorq 8(%rsi,%rax,1),%r8
522
xorq (%rsi,%rax,1),%r9
523
movb %dl,%al
524
xorq %r10,%r8
525
movzwq (%r11,%r13,2),%r13
526
movzbl %dl,%ecx
527
shlb $4,%al
528
movzbq (%rsp,%rbx,1),%r12
529
shrl $4,%ecx
530
shlq $48,%r13
531
xorq %r8,%r12
532
movq %r9,%r10
533
xorq %r13,%r9
534
shrq $8,%r8
535
movzbq %r12b,%r12
536
shrq $8,%r9
537
xorq -128(%rbp,%rbx,8),%r8
538
shlq $56,%r10
539
xorq (%rbp,%rbx,8),%r9
540
roll $8,%edx
541
xorq 8(%rsi,%rax,1),%r8
542
xorq (%rsi,%rax,1),%r9
543
movb %dl,%al
544
xorq %r10,%r8
545
movzwq (%r11,%r12,2),%r12
546
movzbl %dl,%ebx
547
shlb $4,%al
548
movzbq (%rsp,%rcx,1),%r13
549
shrl $4,%ebx
550
shlq $48,%r12
551
xorq %r8,%r13
552
movq %r9,%r10
553
xorq %r12,%r9
554
shrq $8,%r8
555
movzbq %r13b,%r13
556
shrq $8,%r9
557
xorq -128(%rbp,%rcx,8),%r8
558
shlq $56,%r10
559
xorq (%rbp,%rcx,8),%r9
560
roll $8,%edx
561
xorq 8(%rsi,%rax,1),%r8
562
xorq (%rsi,%rax,1),%r9
563
movb %dl,%al
564
xorq %r10,%r8
565
movzwq (%r11,%r13,2),%r13
566
movzbl %dl,%ecx
567
shlb $4,%al
568
movzbq (%rsp,%rbx,1),%r12
569
shrl $4,%ecx
570
shlq $48,%r13
571
xorq %r8,%r12
572
movq %r9,%r10
573
xorq %r13,%r9
574
shrq $8,%r8
575
movzbq %r12b,%r12
576
movl 0(%rdi),%edx
577
shrq $8,%r9
578
xorq -128(%rbp,%rbx,8),%r8
579
shlq $56,%r10
580
xorq (%rbp,%rbx,8),%r9
581
roll $8,%edx
582
xorq 8(%rsi,%rax,1),%r8
583
xorq (%rsi,%rax,1),%r9
584
movb %dl,%al
585
xorq %r10,%r8
586
movzwq (%r11,%r12,2),%r12
587
movzbl %dl,%ebx
588
shlb $4,%al
589
movzbq (%rsp,%rcx,1),%r13
590
shrl $4,%ebx
591
shlq $48,%r12
592
xorq %r8,%r13
593
movq %r9,%r10
594
xorq %r12,%r9
595
shrq $8,%r8
596
movzbq %r13b,%r13
597
shrq $8,%r9
598
xorq -128(%rbp,%rcx,8),%r8
599
shlq $56,%r10
600
xorq (%rbp,%rcx,8),%r9
601
roll $8,%edx
602
xorq 8(%rsi,%rax,1),%r8
603
xorq (%rsi,%rax,1),%r9
604
movb %dl,%al
605
xorq %r10,%r8
606
movzwq (%r11,%r13,2),%r13
607
movzbl %dl,%ecx
608
shlb $4,%al
609
movzbq (%rsp,%rbx,1),%r12
610
shrl $4,%ecx
611
shlq $48,%r13
612
xorq %r8,%r12
613
movq %r9,%r10
614
xorq %r13,%r9
615
shrq $8,%r8
616
movzbq %r12b,%r12
617
shrq $8,%r9
618
xorq -128(%rbp,%rbx,8),%r8
619
shlq $56,%r10
620
xorq (%rbp,%rbx,8),%r9
621
roll $8,%edx
622
xorq 8(%rsi,%rax,1),%r8
623
xorq (%rsi,%rax,1),%r9
624
movb %dl,%al
625
xorq %r10,%r8
626
movzwq (%r11,%r12,2),%r12
627
movzbl %dl,%ebx
628
shlb $4,%al
629
movzbq (%rsp,%rcx,1),%r13
630
shrl $4,%ebx
631
shlq $48,%r12
632
xorq %r8,%r13
633
movq %r9,%r10
634
xorq %r12,%r9
635
shrq $8,%r8
636
movzbq %r13b,%r13
637
shrq $8,%r9
638
xorq -128(%rbp,%rcx,8),%r8
639
shlq $56,%r10
640
xorq (%rbp,%rcx,8),%r9
641
roll $8,%edx
642
xorq 8(%rsi,%rax,1),%r8
643
xorq (%rsi,%rax,1),%r9
644
movb %dl,%al
645
xorq %r10,%r8
646
movzwq (%r11,%r13,2),%r13
647
movzbl %dl,%ecx
648
shlb $4,%al
649
movzbq (%rsp,%rbx,1),%r12
650
andl $240,%ecx
651
shlq $48,%r13
652
xorq %r8,%r12
653
movq %r9,%r10
654
xorq %r13,%r9
655
shrq $8,%r8
656
movzbq %r12b,%r12
657
movl -4(%rdi),%edx
658
shrq $8,%r9
659
xorq -128(%rbp,%rbx,8),%r8
660
shlq $56,%r10
661
xorq (%rbp,%rbx,8),%r9
662
movzwq (%r11,%r12,2),%r12
663
xorq 8(%rsi,%rax,1),%r8
664
xorq (%rsi,%rax,1),%r9
665
shlq $48,%r12
666
xorq %r10,%r8
667
xorq %r12,%r9
668
movzbq %r8b,%r13
669
shrq $4,%r8
670
movq %r9,%r10
671
shlb $4,%r13b
672
shrq $4,%r9
673
xorq 8(%rsi,%rcx,1),%r8
674
movzwq (%r11,%r13,2),%r13
675
shlq $60,%r10
676
xorq (%rsi,%rcx,1),%r9
677
xorq %r10,%r8
678
shlq $48,%r13
679
bswapq %r8
680
xorq %r13,%r9
681
bswapq %r9
682
cmpq %r15,%r14
683
jb .Louter_loop
684
movq %r8,8(%rdi)
685
movq %r9,(%rdi)
686
687
leaq 280+48(%rsp),%rsi
688
.cfi_def_cfa %rsi,8
689
movq -48(%rsi),%r15
690
.cfi_restore %r15
691
movq -40(%rsi),%r14
692
.cfi_restore %r14
693
movq -32(%rsi),%r13
694
.cfi_restore %r13
695
movq -24(%rsi),%r12
696
.cfi_restore %r12
697
movq -16(%rsi),%rbp
698
.cfi_restore %rbp
699
movq -8(%rsi),%rbx
700
.cfi_restore %rbx
701
leaq 0(%rsi),%rsp
702
.cfi_def_cfa_register %rsp
703
.Lghash_epilogue:
704
.byte 0xf3,0xc3
705
.cfi_endproc
706
.size gcm_ghash_4bit,.-gcm_ghash_4bit
707
.globl gcm_init_clmul
708
.type gcm_init_clmul,@function
709
.align 16
710
gcm_init_clmul:
711
.cfi_startproc
712
.byte 243,15,30,250
713
.L_init_clmul:
714
movdqu (%rsi),%xmm2
715
pshufd $78,%xmm2,%xmm2
716
717
718
pshufd $255,%xmm2,%xmm4
719
movdqa %xmm2,%xmm3
720
psllq $1,%xmm2
721
pxor %xmm5,%xmm5
722
psrlq $63,%xmm3
723
pcmpgtd %xmm4,%xmm5
724
pslldq $8,%xmm3
725
por %xmm3,%xmm2
726
727
728
pand .L0x1c2_polynomial(%rip),%xmm5
729
pxor %xmm5,%xmm2
730
731
732
pshufd $78,%xmm2,%xmm6
733
movdqa %xmm2,%xmm0
734
pxor %xmm2,%xmm6
735
movdqa %xmm0,%xmm1
736
pshufd $78,%xmm0,%xmm3
737
pxor %xmm0,%xmm3
738
.byte 102,15,58,68,194,0
739
.byte 102,15,58,68,202,17
740
.byte 102,15,58,68,222,0
741
pxor %xmm0,%xmm3
742
pxor %xmm1,%xmm3
743
744
movdqa %xmm3,%xmm4
745
psrldq $8,%xmm3
746
pslldq $8,%xmm4
747
pxor %xmm3,%xmm1
748
pxor %xmm4,%xmm0
749
750
movdqa %xmm0,%xmm4
751
movdqa %xmm0,%xmm3
752
psllq $5,%xmm0
753
pxor %xmm0,%xmm3
754
psllq $1,%xmm0
755
pxor %xmm3,%xmm0
756
psllq $57,%xmm0
757
movdqa %xmm0,%xmm3
758
pslldq $8,%xmm0
759
psrldq $8,%xmm3
760
pxor %xmm4,%xmm0
761
pxor %xmm3,%xmm1
762
763
764
movdqa %xmm0,%xmm4
765
psrlq $1,%xmm0
766
pxor %xmm4,%xmm1
767
pxor %xmm0,%xmm4
768
psrlq $5,%xmm0
769
pxor %xmm4,%xmm0
770
psrlq $1,%xmm0
771
pxor %xmm1,%xmm0
772
pshufd $78,%xmm2,%xmm3
773
pshufd $78,%xmm0,%xmm4
774
pxor %xmm2,%xmm3
775
movdqu %xmm2,0(%rdi)
776
pxor %xmm0,%xmm4
777
movdqu %xmm0,16(%rdi)
778
.byte 102,15,58,15,227,8
779
movdqu %xmm4,32(%rdi)
780
movdqa %xmm0,%xmm1
781
pshufd $78,%xmm0,%xmm3
782
pxor %xmm0,%xmm3
783
.byte 102,15,58,68,194,0
784
.byte 102,15,58,68,202,17
785
.byte 102,15,58,68,222,0
786
pxor %xmm0,%xmm3
787
pxor %xmm1,%xmm3
788
789
movdqa %xmm3,%xmm4
790
psrldq $8,%xmm3
791
pslldq $8,%xmm4
792
pxor %xmm3,%xmm1
793
pxor %xmm4,%xmm0
794
795
movdqa %xmm0,%xmm4
796
movdqa %xmm0,%xmm3
797
psllq $5,%xmm0
798
pxor %xmm0,%xmm3
799
psllq $1,%xmm0
800
pxor %xmm3,%xmm0
801
psllq $57,%xmm0
802
movdqa %xmm0,%xmm3
803
pslldq $8,%xmm0
804
psrldq $8,%xmm3
805
pxor %xmm4,%xmm0
806
pxor %xmm3,%xmm1
807
808
809
movdqa %xmm0,%xmm4
810
psrlq $1,%xmm0
811
pxor %xmm4,%xmm1
812
pxor %xmm0,%xmm4
813
psrlq $5,%xmm0
814
pxor %xmm4,%xmm0
815
psrlq $1,%xmm0
816
pxor %xmm1,%xmm0
817
movdqa %xmm0,%xmm5
818
movdqa %xmm0,%xmm1
819
pshufd $78,%xmm0,%xmm3
820
pxor %xmm0,%xmm3
821
.byte 102,15,58,68,194,0
822
.byte 102,15,58,68,202,17
823
.byte 102,15,58,68,222,0
824
pxor %xmm0,%xmm3
825
pxor %xmm1,%xmm3
826
827
movdqa %xmm3,%xmm4
828
psrldq $8,%xmm3
829
pslldq $8,%xmm4
830
pxor %xmm3,%xmm1
831
pxor %xmm4,%xmm0
832
833
movdqa %xmm0,%xmm4
834
movdqa %xmm0,%xmm3
835
psllq $5,%xmm0
836
pxor %xmm0,%xmm3
837
psllq $1,%xmm0
838
pxor %xmm3,%xmm0
839
psllq $57,%xmm0
840
movdqa %xmm0,%xmm3
841
pslldq $8,%xmm0
842
psrldq $8,%xmm3
843
pxor %xmm4,%xmm0
844
pxor %xmm3,%xmm1
845
846
847
movdqa %xmm0,%xmm4
848
psrlq $1,%xmm0
849
pxor %xmm4,%xmm1
850
pxor %xmm0,%xmm4
851
psrlq $5,%xmm0
852
pxor %xmm4,%xmm0
853
psrlq $1,%xmm0
854
pxor %xmm1,%xmm0
855
pshufd $78,%xmm5,%xmm3
856
pshufd $78,%xmm0,%xmm4
857
pxor %xmm5,%xmm3
858
movdqu %xmm5,48(%rdi)
859
pxor %xmm0,%xmm4
860
movdqu %xmm0,64(%rdi)
861
.byte 102,15,58,15,227,8
862
movdqu %xmm4,80(%rdi)
863
.byte 0xf3,0xc3
864
.cfi_endproc
865
.size gcm_init_clmul,.-gcm_init_clmul
866
.globl gcm_gmult_clmul
867
.type gcm_gmult_clmul,@function
868
.align 16
869
gcm_gmult_clmul:
870
.cfi_startproc
871
.byte 243,15,30,250
872
.L_gmult_clmul:
873
movdqu (%rdi),%xmm0
874
movdqa .Lbswap_mask(%rip),%xmm5
875
movdqu (%rsi),%xmm2
876
movdqu 32(%rsi),%xmm4
877
.byte 102,15,56,0,197
878
movdqa %xmm0,%xmm1
879
pshufd $78,%xmm0,%xmm3
880
pxor %xmm0,%xmm3
881
.byte 102,15,58,68,194,0
882
.byte 102,15,58,68,202,17
883
.byte 102,15,58,68,220,0
884
pxor %xmm0,%xmm3
885
pxor %xmm1,%xmm3
886
887
movdqa %xmm3,%xmm4
888
psrldq $8,%xmm3
889
pslldq $8,%xmm4
890
pxor %xmm3,%xmm1
891
pxor %xmm4,%xmm0
892
893
movdqa %xmm0,%xmm4
894
movdqa %xmm0,%xmm3
895
psllq $5,%xmm0
896
pxor %xmm0,%xmm3
897
psllq $1,%xmm0
898
pxor %xmm3,%xmm0
899
psllq $57,%xmm0
900
movdqa %xmm0,%xmm3
901
pslldq $8,%xmm0
902
psrldq $8,%xmm3
903
pxor %xmm4,%xmm0
904
pxor %xmm3,%xmm1
905
906
907
movdqa %xmm0,%xmm4
908
psrlq $1,%xmm0
909
pxor %xmm4,%xmm1
910
pxor %xmm0,%xmm4
911
psrlq $5,%xmm0
912
pxor %xmm4,%xmm0
913
psrlq $1,%xmm0
914
pxor %xmm1,%xmm0
915
.byte 102,15,56,0,197
916
movdqu %xmm0,(%rdi)
917
.byte 0xf3,0xc3
918
.cfi_endproc
919
.size gcm_gmult_clmul,.-gcm_gmult_clmul
920
.globl gcm_ghash_clmul
921
.type gcm_ghash_clmul,@function
922
.align 32
923
gcm_ghash_clmul:
924
.cfi_startproc
925
.byte 243,15,30,250
926
.L_ghash_clmul:
927
movdqa .Lbswap_mask(%rip),%xmm10
928
929
movdqu (%rdi),%xmm0
930
movdqu (%rsi),%xmm2
931
movdqu 32(%rsi),%xmm7
932
.byte 102,65,15,56,0,194
933
934
subq $0x10,%rcx
935
jz .Lodd_tail
936
937
movdqu 16(%rsi),%xmm6
938
movl OPENSSL_ia32cap_P+4(%rip),%eax
939
cmpq $0x30,%rcx
940
jb .Lskip4x
941
942
andl $71303168,%eax
943
cmpl $4194304,%eax
944
je .Lskip4x
945
946
subq $0x30,%rcx
947
movq $0xA040608020C0E000,%rax
948
movdqu 48(%rsi),%xmm14
949
movdqu 64(%rsi),%xmm15
950
951
952
953
954
movdqu 48(%rdx),%xmm3
955
movdqu 32(%rdx),%xmm11
956
.byte 102,65,15,56,0,218
957
.byte 102,69,15,56,0,218
958
movdqa %xmm3,%xmm5
959
pshufd $78,%xmm3,%xmm4
960
pxor %xmm3,%xmm4
961
.byte 102,15,58,68,218,0
962
.byte 102,15,58,68,234,17
963
.byte 102,15,58,68,231,0
964
965
movdqa %xmm11,%xmm13
966
pshufd $78,%xmm11,%xmm12
967
pxor %xmm11,%xmm12
968
.byte 102,68,15,58,68,222,0
969
.byte 102,68,15,58,68,238,17
970
.byte 102,68,15,58,68,231,16
971
xorps %xmm11,%xmm3
972
xorps %xmm13,%xmm5
973
movups 80(%rsi),%xmm7
974
xorps %xmm12,%xmm4
975
976
movdqu 16(%rdx),%xmm11
977
movdqu 0(%rdx),%xmm8
978
.byte 102,69,15,56,0,218
979
.byte 102,69,15,56,0,194
980
movdqa %xmm11,%xmm13
981
pshufd $78,%xmm11,%xmm12
982
pxor %xmm8,%xmm0
983
pxor %xmm11,%xmm12
984
.byte 102,69,15,58,68,222,0
985
movdqa %xmm0,%xmm1
986
pshufd $78,%xmm0,%xmm8
987
pxor %xmm0,%xmm8
988
.byte 102,69,15,58,68,238,17
989
.byte 102,68,15,58,68,231,0
990
xorps %xmm11,%xmm3
991
xorps %xmm13,%xmm5
992
993
leaq 64(%rdx),%rdx
994
subq $0x40,%rcx
995
jc .Ltail4x
996
997
jmp .Lmod4_loop
998
.align 32
999
.Lmod4_loop:
1000
.byte 102,65,15,58,68,199,0
1001
xorps %xmm12,%xmm4
1002
movdqu 48(%rdx),%xmm11
1003
.byte 102,69,15,56,0,218
1004
.byte 102,65,15,58,68,207,17
1005
xorps %xmm3,%xmm0
1006
movdqu 32(%rdx),%xmm3
1007
movdqa %xmm11,%xmm13
1008
.byte 102,68,15,58,68,199,16
1009
pshufd $78,%xmm11,%xmm12
1010
xorps %xmm5,%xmm1
1011
pxor %xmm11,%xmm12
1012
.byte 102,65,15,56,0,218
1013
movups 32(%rsi),%xmm7
1014
xorps %xmm4,%xmm8
1015
.byte 102,68,15,58,68,218,0
1016
pshufd $78,%xmm3,%xmm4
1017
1018
pxor %xmm0,%xmm8
1019
movdqa %xmm3,%xmm5
1020
pxor %xmm1,%xmm8
1021
pxor %xmm3,%xmm4
1022
movdqa %xmm8,%xmm9
1023
.byte 102,68,15,58,68,234,17
1024
pslldq $8,%xmm8
1025
psrldq $8,%xmm9
1026
pxor %xmm8,%xmm0
1027
movdqa .L7_mask(%rip),%xmm8
1028
pxor %xmm9,%xmm1
1029
.byte 102,76,15,110,200
1030
1031
pand %xmm0,%xmm8
1032
.byte 102,69,15,56,0,200
1033
pxor %xmm0,%xmm9
1034
.byte 102,68,15,58,68,231,0
1035
psllq $57,%xmm9
1036
movdqa %xmm9,%xmm8
1037
pslldq $8,%xmm9
1038
.byte 102,15,58,68,222,0
1039
psrldq $8,%xmm8
1040
pxor %xmm9,%xmm0
1041
pxor %xmm8,%xmm1
1042
movdqu 0(%rdx),%xmm8
1043
1044
movdqa %xmm0,%xmm9
1045
psrlq $1,%xmm0
1046
.byte 102,15,58,68,238,17
1047
xorps %xmm11,%xmm3
1048
movdqu 16(%rdx),%xmm11
1049
.byte 102,69,15,56,0,218
1050
.byte 102,15,58,68,231,16
1051
xorps %xmm13,%xmm5
1052
movups 80(%rsi),%xmm7
1053
.byte 102,69,15,56,0,194
1054
pxor %xmm9,%xmm1
1055
pxor %xmm0,%xmm9
1056
psrlq $5,%xmm0
1057
1058
movdqa %xmm11,%xmm13
1059
pxor %xmm12,%xmm4
1060
pshufd $78,%xmm11,%xmm12
1061
pxor %xmm9,%xmm0
1062
pxor %xmm8,%xmm1
1063
pxor %xmm11,%xmm12
1064
.byte 102,69,15,58,68,222,0
1065
psrlq $1,%xmm0
1066
pxor %xmm1,%xmm0
1067
movdqa %xmm0,%xmm1
1068
.byte 102,69,15,58,68,238,17
1069
xorps %xmm11,%xmm3
1070
pshufd $78,%xmm0,%xmm8
1071
pxor %xmm0,%xmm8
1072
1073
.byte 102,68,15,58,68,231,0
1074
xorps %xmm13,%xmm5
1075
1076
leaq 64(%rdx),%rdx
1077
subq $0x40,%rcx
1078
jnc .Lmod4_loop
1079
1080
.Ltail4x:
1081
.byte 102,65,15,58,68,199,0
1082
.byte 102,65,15,58,68,207,17
1083
.byte 102,68,15,58,68,199,16
1084
xorps %xmm12,%xmm4
1085
xorps %xmm3,%xmm0
1086
xorps %xmm5,%xmm1
1087
pxor %xmm0,%xmm1
1088
pxor %xmm4,%xmm8
1089
1090
pxor %xmm1,%xmm8
1091
pxor %xmm0,%xmm1
1092
1093
movdqa %xmm8,%xmm9
1094
psrldq $8,%xmm8
1095
pslldq $8,%xmm9
1096
pxor %xmm8,%xmm1
1097
pxor %xmm9,%xmm0
1098
1099
movdqa %xmm0,%xmm4
1100
movdqa %xmm0,%xmm3
1101
psllq $5,%xmm0
1102
pxor %xmm0,%xmm3
1103
psllq $1,%xmm0
1104
pxor %xmm3,%xmm0
1105
psllq $57,%xmm0
1106
movdqa %xmm0,%xmm3
1107
pslldq $8,%xmm0
1108
psrldq $8,%xmm3
1109
pxor %xmm4,%xmm0
1110
pxor %xmm3,%xmm1
1111
1112
1113
movdqa %xmm0,%xmm4
1114
psrlq $1,%xmm0
1115
pxor %xmm4,%xmm1
1116
pxor %xmm0,%xmm4
1117
psrlq $5,%xmm0
1118
pxor %xmm4,%xmm0
1119
psrlq $1,%xmm0
1120
pxor %xmm1,%xmm0
1121
addq $0x40,%rcx
1122
jz .Ldone
1123
movdqu 32(%rsi),%xmm7
1124
subq $0x10,%rcx
1125
jz .Lodd_tail
1126
.Lskip4x:
1127
1128
1129
1130
1131
1132
movdqu (%rdx),%xmm8
1133
movdqu 16(%rdx),%xmm3
1134
.byte 102,69,15,56,0,194
1135
.byte 102,65,15,56,0,218
1136
pxor %xmm8,%xmm0
1137
1138
movdqa %xmm3,%xmm5
1139
pshufd $78,%xmm3,%xmm4
1140
pxor %xmm3,%xmm4
1141
.byte 102,15,58,68,218,0
1142
.byte 102,15,58,68,234,17
1143
.byte 102,15,58,68,231,0
1144
1145
leaq 32(%rdx),%rdx
1146
nop
1147
subq $0x20,%rcx
1148
jbe .Leven_tail
1149
nop
1150
jmp .Lmod_loop
1151
1152
.align 32
1153
.Lmod_loop:
1154
movdqa %xmm0,%xmm1
1155
movdqa %xmm4,%xmm8
1156
pshufd $78,%xmm0,%xmm4
1157
pxor %xmm0,%xmm4
1158
1159
.byte 102,15,58,68,198,0
1160
.byte 102,15,58,68,206,17
1161
.byte 102,15,58,68,231,16
1162
1163
pxor %xmm3,%xmm0
1164
pxor %xmm5,%xmm1
1165
movdqu (%rdx),%xmm9
1166
pxor %xmm0,%xmm8
1167
.byte 102,69,15,56,0,202
1168
movdqu 16(%rdx),%xmm3
1169
1170
pxor %xmm1,%xmm8
1171
pxor %xmm9,%xmm1
1172
pxor %xmm8,%xmm4
1173
.byte 102,65,15,56,0,218
1174
movdqa %xmm4,%xmm8
1175
psrldq $8,%xmm8
1176
pslldq $8,%xmm4
1177
pxor %xmm8,%xmm1
1178
pxor %xmm4,%xmm0
1179
1180
movdqa %xmm3,%xmm5
1181
1182
movdqa %xmm0,%xmm9
1183
movdqa %xmm0,%xmm8
1184
psllq $5,%xmm0
1185
pxor %xmm0,%xmm8
1186
.byte 102,15,58,68,218,0
1187
psllq $1,%xmm0
1188
pxor %xmm8,%xmm0
1189
psllq $57,%xmm0
1190
movdqa %xmm0,%xmm8
1191
pslldq $8,%xmm0
1192
psrldq $8,%xmm8
1193
pxor %xmm9,%xmm0
1194
pshufd $78,%xmm5,%xmm4
1195
pxor %xmm8,%xmm1
1196
pxor %xmm5,%xmm4
1197
1198
movdqa %xmm0,%xmm9
1199
psrlq $1,%xmm0
1200
.byte 102,15,58,68,234,17
1201
pxor %xmm9,%xmm1
1202
pxor %xmm0,%xmm9
1203
psrlq $5,%xmm0
1204
pxor %xmm9,%xmm0
1205
leaq 32(%rdx),%rdx
1206
psrlq $1,%xmm0
1207
.byte 102,15,58,68,231,0
1208
pxor %xmm1,%xmm0
1209
1210
subq $0x20,%rcx
1211
ja .Lmod_loop
1212
1213
.Leven_tail:
1214
movdqa %xmm0,%xmm1
1215
movdqa %xmm4,%xmm8
1216
pshufd $78,%xmm0,%xmm4
1217
pxor %xmm0,%xmm4
1218
1219
.byte 102,15,58,68,198,0
1220
.byte 102,15,58,68,206,17
1221
.byte 102,15,58,68,231,16
1222
1223
pxor %xmm3,%xmm0
1224
pxor %xmm5,%xmm1
1225
pxor %xmm0,%xmm8
1226
pxor %xmm1,%xmm8
1227
pxor %xmm8,%xmm4
1228
movdqa %xmm4,%xmm8
1229
psrldq $8,%xmm8
1230
pslldq $8,%xmm4
1231
pxor %xmm8,%xmm1
1232
pxor %xmm4,%xmm0
1233
1234
movdqa %xmm0,%xmm4
1235
movdqa %xmm0,%xmm3
1236
psllq $5,%xmm0
1237
pxor %xmm0,%xmm3
1238
psllq $1,%xmm0
1239
pxor %xmm3,%xmm0
1240
psllq $57,%xmm0
1241
movdqa %xmm0,%xmm3
1242
pslldq $8,%xmm0
1243
psrldq $8,%xmm3
1244
pxor %xmm4,%xmm0
1245
pxor %xmm3,%xmm1
1246
1247
1248
movdqa %xmm0,%xmm4
1249
psrlq $1,%xmm0
1250
pxor %xmm4,%xmm1
1251
pxor %xmm0,%xmm4
1252
psrlq $5,%xmm0
1253
pxor %xmm4,%xmm0
1254
psrlq $1,%xmm0
1255
pxor %xmm1,%xmm0
1256
testq %rcx,%rcx
1257
jnz .Ldone
1258
1259
.Lodd_tail:
1260
movdqu (%rdx),%xmm8
1261
.byte 102,69,15,56,0,194
1262
pxor %xmm8,%xmm0
1263
movdqa %xmm0,%xmm1
1264
pshufd $78,%xmm0,%xmm3
1265
pxor %xmm0,%xmm3
1266
.byte 102,15,58,68,194,0
1267
.byte 102,15,58,68,202,17
1268
.byte 102,15,58,68,223,0
1269
pxor %xmm0,%xmm3
1270
pxor %xmm1,%xmm3
1271
1272
movdqa %xmm3,%xmm4
1273
psrldq $8,%xmm3
1274
pslldq $8,%xmm4
1275
pxor %xmm3,%xmm1
1276
pxor %xmm4,%xmm0
1277
1278
movdqa %xmm0,%xmm4
1279
movdqa %xmm0,%xmm3
1280
psllq $5,%xmm0
1281
pxor %xmm0,%xmm3
1282
psllq $1,%xmm0
1283
pxor %xmm3,%xmm0
1284
psllq $57,%xmm0
1285
movdqa %xmm0,%xmm3
1286
pslldq $8,%xmm0
1287
psrldq $8,%xmm3
1288
pxor %xmm4,%xmm0
1289
pxor %xmm3,%xmm1
1290
1291
1292
movdqa %xmm0,%xmm4
1293
psrlq $1,%xmm0
1294
pxor %xmm4,%xmm1
1295
pxor %xmm0,%xmm4
1296
psrlq $5,%xmm0
1297
pxor %xmm4,%xmm0
1298
psrlq $1,%xmm0
1299
pxor %xmm1,%xmm0
1300
.Ldone:
1301
.byte 102,65,15,56,0,194
1302
movdqu %xmm0,(%rdi)
1303
.byte 0xf3,0xc3
1304
.cfi_endproc
1305
.size gcm_ghash_clmul,.-gcm_ghash_clmul
1306
.globl gcm_init_avx
1307
.type gcm_init_avx,@function
1308
.align 32
1309
gcm_init_avx:
1310
.cfi_startproc
1311
.byte 243,15,30,250
1312
vzeroupper
1313
1314
vmovdqu (%rsi),%xmm2
1315
vpshufd $78,%xmm2,%xmm2
1316
1317
1318
vpshufd $255,%xmm2,%xmm4
1319
vpsrlq $63,%xmm2,%xmm3
1320
vpsllq $1,%xmm2,%xmm2
1321
vpxor %xmm5,%xmm5,%xmm5
1322
vpcmpgtd %xmm4,%xmm5,%xmm5
1323
vpslldq $8,%xmm3,%xmm3
1324
vpor %xmm3,%xmm2,%xmm2
1325
1326
1327
vpand .L0x1c2_polynomial(%rip),%xmm5,%xmm5
1328
vpxor %xmm5,%xmm2,%xmm2
1329
1330
vpunpckhqdq %xmm2,%xmm2,%xmm6
1331
vmovdqa %xmm2,%xmm0
1332
vpxor %xmm2,%xmm6,%xmm6
1333
movq $4,%r10
1334
jmp .Linit_start_avx
1335
.align 32
1336
.Linit_loop_avx:
1337
vpalignr $8,%xmm3,%xmm4,%xmm5
1338
vmovdqu %xmm5,-16(%rdi)
1339
vpunpckhqdq %xmm0,%xmm0,%xmm3
1340
vpxor %xmm0,%xmm3,%xmm3
1341
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
1342
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
1343
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
1344
vpxor %xmm0,%xmm1,%xmm4
1345
vpxor %xmm4,%xmm3,%xmm3
1346
1347
vpslldq $8,%xmm3,%xmm4
1348
vpsrldq $8,%xmm3,%xmm3
1349
vpxor %xmm4,%xmm0,%xmm0
1350
vpxor %xmm3,%xmm1,%xmm1
1351
vpsllq $57,%xmm0,%xmm3
1352
vpsllq $62,%xmm0,%xmm4
1353
vpxor %xmm3,%xmm4,%xmm4
1354
vpsllq $63,%xmm0,%xmm3
1355
vpxor %xmm3,%xmm4,%xmm4
1356
vpslldq $8,%xmm4,%xmm3
1357
vpsrldq $8,%xmm4,%xmm4
1358
vpxor %xmm3,%xmm0,%xmm0
1359
vpxor %xmm4,%xmm1,%xmm1
1360
1361
vpsrlq $1,%xmm0,%xmm4
1362
vpxor %xmm0,%xmm1,%xmm1
1363
vpxor %xmm4,%xmm0,%xmm0
1364
vpsrlq $5,%xmm4,%xmm4
1365
vpxor %xmm4,%xmm0,%xmm0
1366
vpsrlq $1,%xmm0,%xmm0
1367
vpxor %xmm1,%xmm0,%xmm0
1368
.Linit_start_avx:
1369
vmovdqa %xmm0,%xmm5
1370
vpunpckhqdq %xmm0,%xmm0,%xmm3
1371
vpxor %xmm0,%xmm3,%xmm3
1372
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1
1373
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0
1374
vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3
1375
vpxor %xmm0,%xmm1,%xmm4
1376
vpxor %xmm4,%xmm3,%xmm3
1377
1378
vpslldq $8,%xmm3,%xmm4
1379
vpsrldq $8,%xmm3,%xmm3
1380
vpxor %xmm4,%xmm0,%xmm0
1381
vpxor %xmm3,%xmm1,%xmm1
1382
vpsllq $57,%xmm0,%xmm3
1383
vpsllq $62,%xmm0,%xmm4
1384
vpxor %xmm3,%xmm4,%xmm4
1385
vpsllq $63,%xmm0,%xmm3
1386
vpxor %xmm3,%xmm4,%xmm4
1387
vpslldq $8,%xmm4,%xmm3
1388
vpsrldq $8,%xmm4,%xmm4
1389
vpxor %xmm3,%xmm0,%xmm0
1390
vpxor %xmm4,%xmm1,%xmm1
1391
1392
vpsrlq $1,%xmm0,%xmm4
1393
vpxor %xmm0,%xmm1,%xmm1
1394
vpxor %xmm4,%xmm0,%xmm0
1395
vpsrlq $5,%xmm4,%xmm4
1396
vpxor %xmm4,%xmm0,%xmm0
1397
vpsrlq $1,%xmm0,%xmm0
1398
vpxor %xmm1,%xmm0,%xmm0
1399
vpshufd $78,%xmm5,%xmm3
1400
vpshufd $78,%xmm0,%xmm4
1401
vpxor %xmm5,%xmm3,%xmm3
1402
vmovdqu %xmm5,0(%rdi)
1403
vpxor %xmm0,%xmm4,%xmm4
1404
vmovdqu %xmm0,16(%rdi)
1405
leaq 48(%rdi),%rdi
1406
subq $1,%r10
1407
jnz .Linit_loop_avx
1408
1409
vpalignr $8,%xmm4,%xmm3,%xmm5
1410
vmovdqu %xmm5,-16(%rdi)
1411
1412
vzeroupper
1413
.byte 0xf3,0xc3
1414
.cfi_endproc
1415
.size gcm_init_avx,.-gcm_init_avx
1416
.globl gcm_gmult_avx
1417
.type gcm_gmult_avx,@function
1418
.align 32
1419
gcm_gmult_avx:
1420
.cfi_startproc
1421
.byte 243,15,30,250
1422
jmp .L_gmult_clmul
1423
.cfi_endproc
1424
.size gcm_gmult_avx,.-gcm_gmult_avx
1425
.globl gcm_ghash_avx
1426
.type gcm_ghash_avx,@function
1427
.align 32
1428
gcm_ghash_avx:
1429
.cfi_startproc
1430
.byte 243,15,30,250
1431
vzeroupper
1432
1433
vmovdqu (%rdi),%xmm10
1434
leaq .L0x1c2_polynomial(%rip),%r10
1435
leaq 64(%rsi),%rsi
1436
vmovdqu .Lbswap_mask(%rip),%xmm13
1437
vpshufb %xmm13,%xmm10,%xmm10
1438
cmpq $0x80,%rcx
1439
jb .Lshort_avx
1440
subq $0x80,%rcx
1441
1442
vmovdqu 112(%rdx),%xmm14
1443
vmovdqu 0-64(%rsi),%xmm6
1444
vpshufb %xmm13,%xmm14,%xmm14
1445
vmovdqu 32-64(%rsi),%xmm7
1446
1447
vpunpckhqdq %xmm14,%xmm14,%xmm9
1448
vmovdqu 96(%rdx),%xmm15
1449
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1450
vpxor %xmm14,%xmm9,%xmm9
1451
vpshufb %xmm13,%xmm15,%xmm15
1452
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1453
vmovdqu 16-64(%rsi),%xmm6
1454
vpunpckhqdq %xmm15,%xmm15,%xmm8
1455
vmovdqu 80(%rdx),%xmm14
1456
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1457
vpxor %xmm15,%xmm8,%xmm8
1458
1459
vpshufb %xmm13,%xmm14,%xmm14
1460
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1461
vpunpckhqdq %xmm14,%xmm14,%xmm9
1462
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1463
vmovdqu 48-64(%rsi),%xmm6
1464
vpxor %xmm14,%xmm9,%xmm9
1465
vmovdqu 64(%rdx),%xmm15
1466
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1467
vmovdqu 80-64(%rsi),%xmm7
1468
1469
vpshufb %xmm13,%xmm15,%xmm15
1470
vpxor %xmm0,%xmm3,%xmm3
1471
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1472
vpxor %xmm1,%xmm4,%xmm4
1473
vpunpckhqdq %xmm15,%xmm15,%xmm8
1474
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1475
vmovdqu 64-64(%rsi),%xmm6
1476
vpxor %xmm2,%xmm5,%xmm5
1477
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1478
vpxor %xmm15,%xmm8,%xmm8
1479
1480
vmovdqu 48(%rdx),%xmm14
1481
vpxor %xmm3,%xmm0,%xmm0
1482
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1483
vpxor %xmm4,%xmm1,%xmm1
1484
vpshufb %xmm13,%xmm14,%xmm14
1485
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1486
vmovdqu 96-64(%rsi),%xmm6
1487
vpxor %xmm5,%xmm2,%xmm2
1488
vpunpckhqdq %xmm14,%xmm14,%xmm9
1489
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1490
vmovdqu 128-64(%rsi),%xmm7
1491
vpxor %xmm14,%xmm9,%xmm9
1492
1493
vmovdqu 32(%rdx),%xmm15
1494
vpxor %xmm0,%xmm3,%xmm3
1495
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1496
vpxor %xmm1,%xmm4,%xmm4
1497
vpshufb %xmm13,%xmm15,%xmm15
1498
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1499
vmovdqu 112-64(%rsi),%xmm6
1500
vpxor %xmm2,%xmm5,%xmm5
1501
vpunpckhqdq %xmm15,%xmm15,%xmm8
1502
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1503
vpxor %xmm15,%xmm8,%xmm8
1504
1505
vmovdqu 16(%rdx),%xmm14
1506
vpxor %xmm3,%xmm0,%xmm0
1507
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1508
vpxor %xmm4,%xmm1,%xmm1
1509
vpshufb %xmm13,%xmm14,%xmm14
1510
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1511
vmovdqu 144-64(%rsi),%xmm6
1512
vpxor %xmm5,%xmm2,%xmm2
1513
vpunpckhqdq %xmm14,%xmm14,%xmm9
1514
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1515
vmovdqu 176-64(%rsi),%xmm7
1516
vpxor %xmm14,%xmm9,%xmm9
1517
1518
vmovdqu (%rdx),%xmm15
1519
vpxor %xmm0,%xmm3,%xmm3
1520
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1521
vpxor %xmm1,%xmm4,%xmm4
1522
vpshufb %xmm13,%xmm15,%xmm15
1523
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1524
vmovdqu 160-64(%rsi),%xmm6
1525
vpxor %xmm2,%xmm5,%xmm5
1526
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
1527
1528
leaq 128(%rdx),%rdx
1529
cmpq $0x80,%rcx
1530
jb .Ltail_avx
1531
1532
vpxor %xmm10,%xmm15,%xmm15
1533
subq $0x80,%rcx
1534
jmp .Loop8x_avx
1535
1536
.align 32
1537
.Loop8x_avx:
1538
vpunpckhqdq %xmm15,%xmm15,%xmm8
1539
vmovdqu 112(%rdx),%xmm14
1540
vpxor %xmm0,%xmm3,%xmm3
1541
vpxor %xmm15,%xmm8,%xmm8
1542
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10
1543
vpshufb %xmm13,%xmm14,%xmm14
1544
vpxor %xmm1,%xmm4,%xmm4
1545
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11
1546
vmovdqu 0-64(%rsi),%xmm6
1547
vpunpckhqdq %xmm14,%xmm14,%xmm9
1548
vpxor %xmm2,%xmm5,%xmm5
1549
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12
1550
vmovdqu 32-64(%rsi),%xmm7
1551
vpxor %xmm14,%xmm9,%xmm9
1552
1553
vmovdqu 96(%rdx),%xmm15
1554
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1555
vpxor %xmm3,%xmm10,%xmm10
1556
vpshufb %xmm13,%xmm15,%xmm15
1557
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1558
vxorps %xmm4,%xmm11,%xmm11
1559
vmovdqu 16-64(%rsi),%xmm6
1560
vpunpckhqdq %xmm15,%xmm15,%xmm8
1561
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1562
vpxor %xmm5,%xmm12,%xmm12
1563
vxorps %xmm15,%xmm8,%xmm8
1564
1565
vmovdqu 80(%rdx),%xmm14
1566
vpxor %xmm10,%xmm12,%xmm12
1567
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1568
vpxor %xmm11,%xmm12,%xmm12
1569
vpslldq $8,%xmm12,%xmm9
1570
vpxor %xmm0,%xmm3,%xmm3
1571
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1572
vpsrldq $8,%xmm12,%xmm12
1573
vpxor %xmm9,%xmm10,%xmm10
1574
vmovdqu 48-64(%rsi),%xmm6
1575
vpshufb %xmm13,%xmm14,%xmm14
1576
vxorps %xmm12,%xmm11,%xmm11
1577
vpxor %xmm1,%xmm4,%xmm4
1578
vpunpckhqdq %xmm14,%xmm14,%xmm9
1579
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1580
vmovdqu 80-64(%rsi),%xmm7
1581
vpxor %xmm14,%xmm9,%xmm9
1582
vpxor %xmm2,%xmm5,%xmm5
1583
1584
vmovdqu 64(%rdx),%xmm15
1585
vpalignr $8,%xmm10,%xmm10,%xmm12
1586
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1587
vpshufb %xmm13,%xmm15,%xmm15
1588
vpxor %xmm3,%xmm0,%xmm0
1589
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1590
vmovdqu 64-64(%rsi),%xmm6
1591
vpunpckhqdq %xmm15,%xmm15,%xmm8
1592
vpxor %xmm4,%xmm1,%xmm1
1593
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1594
vxorps %xmm15,%xmm8,%xmm8
1595
vpxor %xmm5,%xmm2,%xmm2
1596
1597
vmovdqu 48(%rdx),%xmm14
1598
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
1599
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1600
vpshufb %xmm13,%xmm14,%xmm14
1601
vpxor %xmm0,%xmm3,%xmm3
1602
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1603
vmovdqu 96-64(%rsi),%xmm6
1604
vpunpckhqdq %xmm14,%xmm14,%xmm9
1605
vpxor %xmm1,%xmm4,%xmm4
1606
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1607
vmovdqu 128-64(%rsi),%xmm7
1608
vpxor %xmm14,%xmm9,%xmm9
1609
vpxor %xmm2,%xmm5,%xmm5
1610
1611
vmovdqu 32(%rdx),%xmm15
1612
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1613
vpshufb %xmm13,%xmm15,%xmm15
1614
vpxor %xmm3,%xmm0,%xmm0
1615
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1616
vmovdqu 112-64(%rsi),%xmm6
1617
vpunpckhqdq %xmm15,%xmm15,%xmm8
1618
vpxor %xmm4,%xmm1,%xmm1
1619
vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2
1620
vpxor %xmm15,%xmm8,%xmm8
1621
vpxor %xmm5,%xmm2,%xmm2
1622
vxorps %xmm12,%xmm10,%xmm10
1623
1624
vmovdqu 16(%rdx),%xmm14
1625
vpalignr $8,%xmm10,%xmm10,%xmm12
1626
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3
1627
vpshufb %xmm13,%xmm14,%xmm14
1628
vpxor %xmm0,%xmm3,%xmm3
1629
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4
1630
vmovdqu 144-64(%rsi),%xmm6
1631
vpclmulqdq $0x10,(%r10),%xmm10,%xmm10
1632
vxorps %xmm11,%xmm12,%xmm12
1633
vpunpckhqdq %xmm14,%xmm14,%xmm9
1634
vpxor %xmm1,%xmm4,%xmm4
1635
vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5
1636
vmovdqu 176-64(%rsi),%xmm7
1637
vpxor %xmm14,%xmm9,%xmm9
1638
vpxor %xmm2,%xmm5,%xmm5
1639
1640
vmovdqu (%rdx),%xmm15
1641
vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0
1642
vpshufb %xmm13,%xmm15,%xmm15
1643
vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1
1644
vmovdqu 160-64(%rsi),%xmm6
1645
vpxor %xmm12,%xmm15,%xmm15
1646
vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2
1647
vpxor %xmm10,%xmm15,%xmm15
1648
1649
leaq 128(%rdx),%rdx
1650
subq $0x80,%rcx
1651
jnc .Loop8x_avx
1652
1653
addq $0x80,%rcx
1654
jmp .Ltail_no_xor_avx
1655
1656
.align 32
1657
.Lshort_avx:
1658
vmovdqu -16(%rdx,%rcx,1),%xmm14
1659
leaq (%rdx,%rcx,1),%rdx
1660
vmovdqu 0-64(%rsi),%xmm6
1661
vmovdqu 32-64(%rsi),%xmm7
1662
vpshufb %xmm13,%xmm14,%xmm15
1663
1664
vmovdqa %xmm0,%xmm3
1665
vmovdqa %xmm1,%xmm4
1666
vmovdqa %xmm2,%xmm5
1667
subq $0x10,%rcx
1668
jz .Ltail_avx
1669
1670
vpunpckhqdq %xmm15,%xmm15,%xmm8
1671
vpxor %xmm0,%xmm3,%xmm3
1672
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1673
vpxor %xmm15,%xmm8,%xmm8
1674
vmovdqu -32(%rdx),%xmm14
1675
vpxor %xmm1,%xmm4,%xmm4
1676
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1677
vmovdqu 16-64(%rsi),%xmm6
1678
vpshufb %xmm13,%xmm14,%xmm15
1679
vpxor %xmm2,%xmm5,%xmm5
1680
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1681
vpsrldq $8,%xmm7,%xmm7
1682
subq $0x10,%rcx
1683
jz .Ltail_avx
1684
1685
vpunpckhqdq %xmm15,%xmm15,%xmm8
1686
vpxor %xmm0,%xmm3,%xmm3
1687
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1688
vpxor %xmm15,%xmm8,%xmm8
1689
vmovdqu -48(%rdx),%xmm14
1690
vpxor %xmm1,%xmm4,%xmm4
1691
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1692
vmovdqu 48-64(%rsi),%xmm6
1693
vpshufb %xmm13,%xmm14,%xmm15
1694
vpxor %xmm2,%xmm5,%xmm5
1695
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1696
vmovdqu 80-64(%rsi),%xmm7
1697
subq $0x10,%rcx
1698
jz .Ltail_avx
1699
1700
vpunpckhqdq %xmm15,%xmm15,%xmm8
1701
vpxor %xmm0,%xmm3,%xmm3
1702
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1703
vpxor %xmm15,%xmm8,%xmm8
1704
vmovdqu -64(%rdx),%xmm14
1705
vpxor %xmm1,%xmm4,%xmm4
1706
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1707
vmovdqu 64-64(%rsi),%xmm6
1708
vpshufb %xmm13,%xmm14,%xmm15
1709
vpxor %xmm2,%xmm5,%xmm5
1710
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1711
vpsrldq $8,%xmm7,%xmm7
1712
subq $0x10,%rcx
1713
jz .Ltail_avx
1714
1715
vpunpckhqdq %xmm15,%xmm15,%xmm8
1716
vpxor %xmm0,%xmm3,%xmm3
1717
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1718
vpxor %xmm15,%xmm8,%xmm8
1719
vmovdqu -80(%rdx),%xmm14
1720
vpxor %xmm1,%xmm4,%xmm4
1721
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1722
vmovdqu 96-64(%rsi),%xmm6
1723
vpshufb %xmm13,%xmm14,%xmm15
1724
vpxor %xmm2,%xmm5,%xmm5
1725
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1726
vmovdqu 128-64(%rsi),%xmm7
1727
subq $0x10,%rcx
1728
jz .Ltail_avx
1729
1730
vpunpckhqdq %xmm15,%xmm15,%xmm8
1731
vpxor %xmm0,%xmm3,%xmm3
1732
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1733
vpxor %xmm15,%xmm8,%xmm8
1734
vmovdqu -96(%rdx),%xmm14
1735
vpxor %xmm1,%xmm4,%xmm4
1736
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1737
vmovdqu 112-64(%rsi),%xmm6
1738
vpshufb %xmm13,%xmm14,%xmm15
1739
vpxor %xmm2,%xmm5,%xmm5
1740
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1741
vpsrldq $8,%xmm7,%xmm7
1742
subq $0x10,%rcx
1743
jz .Ltail_avx
1744
1745
vpunpckhqdq %xmm15,%xmm15,%xmm8
1746
vpxor %xmm0,%xmm3,%xmm3
1747
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1748
vpxor %xmm15,%xmm8,%xmm8
1749
vmovdqu -112(%rdx),%xmm14
1750
vpxor %xmm1,%xmm4,%xmm4
1751
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1752
vmovdqu 144-64(%rsi),%xmm6
1753
vpshufb %xmm13,%xmm14,%xmm15
1754
vpxor %xmm2,%xmm5,%xmm5
1755
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1756
vmovq 184-64(%rsi),%xmm7
1757
subq $0x10,%rcx
1758
jmp .Ltail_avx
1759
1760
.align 32
1761
.Ltail_avx:
1762
vpxor %xmm10,%xmm15,%xmm15
1763
.Ltail_no_xor_avx:
1764
vpunpckhqdq %xmm15,%xmm15,%xmm8
1765
vpxor %xmm0,%xmm3,%xmm3
1766
vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0
1767
vpxor %xmm15,%xmm8,%xmm8
1768
vpxor %xmm1,%xmm4,%xmm4
1769
vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1
1770
vpxor %xmm2,%xmm5,%xmm5
1771
vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2
1772
1773
vmovdqu (%r10),%xmm12
1774
1775
vpxor %xmm0,%xmm3,%xmm10
1776
vpxor %xmm1,%xmm4,%xmm11
1777
vpxor %xmm2,%xmm5,%xmm5
1778
1779
vpxor %xmm10,%xmm5,%xmm5
1780
vpxor %xmm11,%xmm5,%xmm5
1781
vpslldq $8,%xmm5,%xmm9
1782
vpsrldq $8,%xmm5,%xmm5
1783
vpxor %xmm9,%xmm10,%xmm10
1784
vpxor %xmm5,%xmm11,%xmm11
1785
1786
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
1787
vpalignr $8,%xmm10,%xmm10,%xmm10
1788
vpxor %xmm9,%xmm10,%xmm10
1789
1790
vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9
1791
vpalignr $8,%xmm10,%xmm10,%xmm10
1792
vpxor %xmm11,%xmm10,%xmm10
1793
vpxor %xmm9,%xmm10,%xmm10
1794
1795
cmpq $0,%rcx
1796
jne .Lshort_avx
1797
1798
vpshufb %xmm13,%xmm10,%xmm10
1799
vmovdqu %xmm10,(%rdi)
1800
vzeroupper
1801
.byte 0xf3,0xc3
1802
.cfi_endproc
1803
.size gcm_ghash_avx,.-gcm_ghash_avx
1804
.section .rodata
1805
.align 64
1806
.Lbswap_mask:
1807
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1808
.L0x1c2_polynomial:
1809
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1810
.L7_mask:
1811
.long 7,0,7,0
1812
.L7_mask_poly:
1813
.long 7,0,450,0
1814
.align 64
1815
.type .Lrem_4bit,@object
1816
.Lrem_4bit:
1817
.long 0,0,0,471859200,0,943718400,0,610271232
1818
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
1819
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
1820
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
1821
.type .Lrem_8bit,@object
1822
.Lrem_8bit:
1823
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1824
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1825
.value 0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1826
.value 0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1827
.value 0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1828
.value 0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1829
.value 0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1830
.value 0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1831
.value 0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1832
.value 0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1833
.value 0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1834
.value 0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1835
.value 0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1836
.value 0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1837
.value 0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1838
.value 0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1839
.value 0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1840
.value 0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1841
.value 0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1842
.value 0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1843
.value 0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1844
.value 0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1845
.value 0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1846
.value 0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1847
.value 0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1848
.value 0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1849
.value 0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1850
.value 0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1851
.value 0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1852
.value 0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1853
.value 0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1854
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1855
1856
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1857
.align 64
1858
.previous
1859
.section ".note.gnu.property", "a"
1860
.p2align 3
1861
.long 1f - 0f
1862
.long 4f - 1f
1863
.long 5
1864
0:
1865
# "GNU" encoded with .byte, since .asciz isn't supported
1866
# on Solaris.
1867
.byte 0x47
1868
.byte 0x4e
1869
.byte 0x55
1870
.byte 0
1871
1:
1872
.p2align 3
1873
.long 0xc0000002
1874
.long 3f - 2f
1875
2:
1876
.long 3
1877
3:
1878
.p2align 3
1879
4:
1880
1881