Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/amd64/rsaz-x86_64.S
39482 views
1
/* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
2
.text
3
4
5
6
.globl rsaz_512_sqr
7
.type rsaz_512_sqr,@function
8
.align 32
9
rsaz_512_sqr:
10
.cfi_startproc
11
pushq %rbx
12
.cfi_adjust_cfa_offset 8
13
.cfi_offset %rbx,-16
14
pushq %rbp
15
.cfi_adjust_cfa_offset 8
16
.cfi_offset %rbp,-24
17
pushq %r12
18
.cfi_adjust_cfa_offset 8
19
.cfi_offset %r12,-32
20
pushq %r13
21
.cfi_adjust_cfa_offset 8
22
.cfi_offset %r13,-40
23
pushq %r14
24
.cfi_adjust_cfa_offset 8
25
.cfi_offset %r14,-48
26
pushq %r15
27
.cfi_adjust_cfa_offset 8
28
.cfi_offset %r15,-56
29
30
subq $128+24,%rsp
31
.cfi_adjust_cfa_offset 128+24
32
.Lsqr_body:
33
.byte 102,72,15,110,202
34
movq (%rsi),%rdx
35
movq 8(%rsi),%rax
36
movq %rcx,128(%rsp)
37
movl $0x80100,%r11d
38
andl OPENSSL_ia32cap_P+8(%rip),%r11d
39
cmpl $0x80100,%r11d
40
je .Loop_sqrx
41
jmp .Loop_sqr
42
43
.align 32
44
.Loop_sqr:
45
movl %r8d,128+8(%rsp)
46
47
movq %rdx,%rbx
48
movq %rax,%rbp
49
mulq %rdx
50
movq %rax,%r8
51
movq 16(%rsi),%rax
52
movq %rdx,%r9
53
54
mulq %rbx
55
addq %rax,%r9
56
movq 24(%rsi),%rax
57
movq %rdx,%r10
58
adcq $0,%r10
59
60
mulq %rbx
61
addq %rax,%r10
62
movq 32(%rsi),%rax
63
movq %rdx,%r11
64
adcq $0,%r11
65
66
mulq %rbx
67
addq %rax,%r11
68
movq 40(%rsi),%rax
69
movq %rdx,%r12
70
adcq $0,%r12
71
72
mulq %rbx
73
addq %rax,%r12
74
movq 48(%rsi),%rax
75
movq %rdx,%r13
76
adcq $0,%r13
77
78
mulq %rbx
79
addq %rax,%r13
80
movq 56(%rsi),%rax
81
movq %rdx,%r14
82
adcq $0,%r14
83
84
mulq %rbx
85
addq %rax,%r14
86
movq %rbx,%rax
87
adcq $0,%rdx
88
89
xorq %rcx,%rcx
90
addq %r8,%r8
91
movq %rdx,%r15
92
adcq $0,%rcx
93
94
mulq %rax
95
addq %r8,%rdx
96
adcq $0,%rcx
97
98
movq %rax,(%rsp)
99
movq %rdx,8(%rsp)
100
101
102
movq 16(%rsi),%rax
103
mulq %rbp
104
addq %rax,%r10
105
movq 24(%rsi),%rax
106
movq %rdx,%rbx
107
adcq $0,%rbx
108
109
mulq %rbp
110
addq %rax,%r11
111
movq 32(%rsi),%rax
112
adcq $0,%rdx
113
addq %rbx,%r11
114
movq %rdx,%rbx
115
adcq $0,%rbx
116
117
mulq %rbp
118
addq %rax,%r12
119
movq 40(%rsi),%rax
120
adcq $0,%rdx
121
addq %rbx,%r12
122
movq %rdx,%rbx
123
adcq $0,%rbx
124
125
mulq %rbp
126
addq %rax,%r13
127
movq 48(%rsi),%rax
128
adcq $0,%rdx
129
addq %rbx,%r13
130
movq %rdx,%rbx
131
adcq $0,%rbx
132
133
mulq %rbp
134
addq %rax,%r14
135
movq 56(%rsi),%rax
136
adcq $0,%rdx
137
addq %rbx,%r14
138
movq %rdx,%rbx
139
adcq $0,%rbx
140
141
mulq %rbp
142
addq %rax,%r15
143
movq %rbp,%rax
144
adcq $0,%rdx
145
addq %rbx,%r15
146
adcq $0,%rdx
147
148
xorq %rbx,%rbx
149
addq %r9,%r9
150
movq %rdx,%r8
151
adcq %r10,%r10
152
adcq $0,%rbx
153
154
mulq %rax
155
156
addq %rcx,%rax
157
movq 16(%rsi),%rbp
158
addq %rax,%r9
159
movq 24(%rsi),%rax
160
adcq %rdx,%r10
161
adcq $0,%rbx
162
163
movq %r9,16(%rsp)
164
movq %r10,24(%rsp)
165
166
167
mulq %rbp
168
addq %rax,%r12
169
movq 32(%rsi),%rax
170
movq %rdx,%rcx
171
adcq $0,%rcx
172
173
mulq %rbp
174
addq %rax,%r13
175
movq 40(%rsi),%rax
176
adcq $0,%rdx
177
addq %rcx,%r13
178
movq %rdx,%rcx
179
adcq $0,%rcx
180
181
mulq %rbp
182
addq %rax,%r14
183
movq 48(%rsi),%rax
184
adcq $0,%rdx
185
addq %rcx,%r14
186
movq %rdx,%rcx
187
adcq $0,%rcx
188
189
mulq %rbp
190
addq %rax,%r15
191
movq 56(%rsi),%rax
192
adcq $0,%rdx
193
addq %rcx,%r15
194
movq %rdx,%rcx
195
adcq $0,%rcx
196
197
mulq %rbp
198
addq %rax,%r8
199
movq %rbp,%rax
200
adcq $0,%rdx
201
addq %rcx,%r8
202
adcq $0,%rdx
203
204
xorq %rcx,%rcx
205
addq %r11,%r11
206
movq %rdx,%r9
207
adcq %r12,%r12
208
adcq $0,%rcx
209
210
mulq %rax
211
212
addq %rbx,%rax
213
movq 24(%rsi),%r10
214
addq %rax,%r11
215
movq 32(%rsi),%rax
216
adcq %rdx,%r12
217
adcq $0,%rcx
218
219
movq %r11,32(%rsp)
220
movq %r12,40(%rsp)
221
222
223
movq %rax,%r11
224
mulq %r10
225
addq %rax,%r14
226
movq 40(%rsi),%rax
227
movq %rdx,%rbx
228
adcq $0,%rbx
229
230
movq %rax,%r12
231
mulq %r10
232
addq %rax,%r15
233
movq 48(%rsi),%rax
234
adcq $0,%rdx
235
addq %rbx,%r15
236
movq %rdx,%rbx
237
adcq $0,%rbx
238
239
movq %rax,%rbp
240
mulq %r10
241
addq %rax,%r8
242
movq 56(%rsi),%rax
243
adcq $0,%rdx
244
addq %rbx,%r8
245
movq %rdx,%rbx
246
adcq $0,%rbx
247
248
mulq %r10
249
addq %rax,%r9
250
movq %r10,%rax
251
adcq $0,%rdx
252
addq %rbx,%r9
253
adcq $0,%rdx
254
255
xorq %rbx,%rbx
256
addq %r13,%r13
257
movq %rdx,%r10
258
adcq %r14,%r14
259
adcq $0,%rbx
260
261
mulq %rax
262
263
addq %rcx,%rax
264
addq %rax,%r13
265
movq %r12,%rax
266
adcq %rdx,%r14
267
adcq $0,%rbx
268
269
movq %r13,48(%rsp)
270
movq %r14,56(%rsp)
271
272
273
mulq %r11
274
addq %rax,%r8
275
movq %rbp,%rax
276
movq %rdx,%rcx
277
adcq $0,%rcx
278
279
mulq %r11
280
addq %rax,%r9
281
movq 56(%rsi),%rax
282
adcq $0,%rdx
283
addq %rcx,%r9
284
movq %rdx,%rcx
285
adcq $0,%rcx
286
287
movq %rax,%r14
288
mulq %r11
289
addq %rax,%r10
290
movq %r11,%rax
291
adcq $0,%rdx
292
addq %rcx,%r10
293
adcq $0,%rdx
294
295
xorq %rcx,%rcx
296
addq %r15,%r15
297
movq %rdx,%r11
298
adcq %r8,%r8
299
adcq $0,%rcx
300
301
mulq %rax
302
303
addq %rbx,%rax
304
addq %rax,%r15
305
movq %rbp,%rax
306
adcq %rdx,%r8
307
adcq $0,%rcx
308
309
movq %r15,64(%rsp)
310
movq %r8,72(%rsp)
311
312
313
mulq %r12
314
addq %rax,%r10
315
movq %r14,%rax
316
movq %rdx,%rbx
317
adcq $0,%rbx
318
319
mulq %r12
320
addq %rax,%r11
321
movq %r12,%rax
322
adcq $0,%rdx
323
addq %rbx,%r11
324
adcq $0,%rdx
325
326
xorq %rbx,%rbx
327
addq %r9,%r9
328
movq %rdx,%r12
329
adcq %r10,%r10
330
adcq $0,%rbx
331
332
mulq %rax
333
334
addq %rcx,%rax
335
addq %rax,%r9
336
movq %r14,%rax
337
adcq %rdx,%r10
338
adcq $0,%rbx
339
340
movq %r9,80(%rsp)
341
movq %r10,88(%rsp)
342
343
344
mulq %rbp
345
addq %rax,%r12
346
movq %rbp,%rax
347
adcq $0,%rdx
348
349
xorq %rcx,%rcx
350
addq %r11,%r11
351
movq %rdx,%r13
352
adcq %r12,%r12
353
adcq $0,%rcx
354
355
mulq %rax
356
357
addq %rbx,%rax
358
addq %rax,%r11
359
movq %r14,%rax
360
adcq %rdx,%r12
361
adcq $0,%rcx
362
363
movq %r11,96(%rsp)
364
movq %r12,104(%rsp)
365
366
367
xorq %rbx,%rbx
368
addq %r13,%r13
369
adcq $0,%rbx
370
371
mulq %rax
372
373
addq %rcx,%rax
374
addq %r13,%rax
375
adcq %rbx,%rdx
376
377
movq (%rsp),%r8
378
movq 8(%rsp),%r9
379
movq 16(%rsp),%r10
380
movq 24(%rsp),%r11
381
movq 32(%rsp),%r12
382
movq 40(%rsp),%r13
383
movq 48(%rsp),%r14
384
movq 56(%rsp),%r15
385
.byte 102,72,15,126,205
386
387
movq %rax,112(%rsp)
388
movq %rdx,120(%rsp)
389
390
call __rsaz_512_reduce
391
392
addq 64(%rsp),%r8
393
adcq 72(%rsp),%r9
394
adcq 80(%rsp),%r10
395
adcq 88(%rsp),%r11
396
adcq 96(%rsp),%r12
397
adcq 104(%rsp),%r13
398
adcq 112(%rsp),%r14
399
adcq 120(%rsp),%r15
400
sbbq %rcx,%rcx
401
402
call __rsaz_512_subtract
403
404
movq %r8,%rdx
405
movq %r9,%rax
406
movl 128+8(%rsp),%r8d
407
movq %rdi,%rsi
408
409
decl %r8d
410
jnz .Loop_sqr
411
jmp .Lsqr_tail
412
413
.align 32
414
.Loop_sqrx:
415
movl %r8d,128+8(%rsp)
416
.byte 102,72,15,110,199
417
418
mulxq %rax,%r8,%r9
419
movq %rax,%rbx
420
421
mulxq 16(%rsi),%rcx,%r10
422
xorq %rbp,%rbp
423
424
mulxq 24(%rsi),%rax,%r11
425
adcxq %rcx,%r9
426
427
.byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
428
adcxq %rax,%r10
429
430
.byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
431
adcxq %rcx,%r11
432
433
mulxq 48(%rsi),%rcx,%r14
434
adcxq %rax,%r12
435
adcxq %rcx,%r13
436
437
mulxq 56(%rsi),%rax,%r15
438
adcxq %rax,%r14
439
adcxq %rbp,%r15
440
441
mulxq %rdx,%rax,%rdi
442
movq %rbx,%rdx
443
xorq %rcx,%rcx
444
adoxq %r8,%r8
445
adcxq %rdi,%r8
446
adoxq %rbp,%rcx
447
adcxq %rbp,%rcx
448
449
movq %rax,(%rsp)
450
movq %r8,8(%rsp)
451
452
453
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
454
adoxq %rax,%r10
455
adcxq %rbx,%r11
456
457
mulxq 24(%rsi),%rdi,%r8
458
adoxq %rdi,%r11
459
.byte 0x66
460
adcxq %r8,%r12
461
462
mulxq 32(%rsi),%rax,%rbx
463
adoxq %rax,%r12
464
adcxq %rbx,%r13
465
466
mulxq 40(%rsi),%rdi,%r8
467
adoxq %rdi,%r13
468
adcxq %r8,%r14
469
470
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
471
adoxq %rax,%r14
472
adcxq %rbx,%r15
473
474
.byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
475
adoxq %rdi,%r15
476
adcxq %rbp,%r8
477
mulxq %rdx,%rax,%rdi
478
adoxq %rbp,%r8
479
.byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00
480
481
xorq %rbx,%rbx
482
adoxq %r9,%r9
483
484
adcxq %rcx,%rax
485
adoxq %r10,%r10
486
adcxq %rax,%r9
487
adoxq %rbp,%rbx
488
adcxq %rdi,%r10
489
adcxq %rbp,%rbx
490
491
movq %r9,16(%rsp)
492
.byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
493
494
495
mulxq 24(%rsi),%rdi,%r9
496
adoxq %rdi,%r12
497
adcxq %r9,%r13
498
499
mulxq 32(%rsi),%rax,%rcx
500
adoxq %rax,%r13
501
adcxq %rcx,%r14
502
503
.byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
504
adoxq %rdi,%r14
505
adcxq %r9,%r15
506
507
.byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
508
adoxq %rax,%r15
509
adcxq %rcx,%r8
510
511
mulxq 56(%rsi),%rdi,%r9
512
adoxq %rdi,%r8
513
adcxq %rbp,%r9
514
mulxq %rdx,%rax,%rdi
515
adoxq %rbp,%r9
516
movq 24(%rsi),%rdx
517
518
xorq %rcx,%rcx
519
adoxq %r11,%r11
520
521
adcxq %rbx,%rax
522
adoxq %r12,%r12
523
adcxq %rax,%r11
524
adoxq %rbp,%rcx
525
adcxq %rdi,%r12
526
adcxq %rbp,%rcx
527
528
movq %r11,32(%rsp)
529
movq %r12,40(%rsp)
530
531
532
mulxq 32(%rsi),%rax,%rbx
533
adoxq %rax,%r14
534
adcxq %rbx,%r15
535
536
mulxq 40(%rsi),%rdi,%r10
537
adoxq %rdi,%r15
538
adcxq %r10,%r8
539
540
mulxq 48(%rsi),%rax,%rbx
541
adoxq %rax,%r8
542
adcxq %rbx,%r9
543
544
mulxq 56(%rsi),%rdi,%r10
545
adoxq %rdi,%r9
546
adcxq %rbp,%r10
547
mulxq %rdx,%rax,%rdi
548
adoxq %rbp,%r10
549
movq 32(%rsi),%rdx
550
551
xorq %rbx,%rbx
552
adoxq %r13,%r13
553
554
adcxq %rcx,%rax
555
adoxq %r14,%r14
556
adcxq %rax,%r13
557
adoxq %rbp,%rbx
558
adcxq %rdi,%r14
559
adcxq %rbp,%rbx
560
561
movq %r13,48(%rsp)
562
movq %r14,56(%rsp)
563
564
565
mulxq 40(%rsi),%rdi,%r11
566
adoxq %rdi,%r8
567
adcxq %r11,%r9
568
569
mulxq 48(%rsi),%rax,%rcx
570
adoxq %rax,%r9
571
adcxq %rcx,%r10
572
573
mulxq 56(%rsi),%rdi,%r11
574
adoxq %rdi,%r10
575
adcxq %rbp,%r11
576
mulxq %rdx,%rax,%rdi
577
movq 40(%rsi),%rdx
578
adoxq %rbp,%r11
579
580
xorq %rcx,%rcx
581
adoxq %r15,%r15
582
583
adcxq %rbx,%rax
584
adoxq %r8,%r8
585
adcxq %rax,%r15
586
adoxq %rbp,%rcx
587
adcxq %rdi,%r8
588
adcxq %rbp,%rcx
589
590
movq %r15,64(%rsp)
591
movq %r8,72(%rsp)
592
593
594
.byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
595
adoxq %rax,%r10
596
adcxq %rbx,%r11
597
598
.byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
599
adoxq %rdi,%r11
600
adcxq %rbp,%r12
601
mulxq %rdx,%rax,%rdi
602
adoxq %rbp,%r12
603
movq 48(%rsi),%rdx
604
605
xorq %rbx,%rbx
606
adoxq %r9,%r9
607
608
adcxq %rcx,%rax
609
adoxq %r10,%r10
610
adcxq %rax,%r9
611
adcxq %rdi,%r10
612
adoxq %rbp,%rbx
613
adcxq %rbp,%rbx
614
615
movq %r9,80(%rsp)
616
movq %r10,88(%rsp)
617
618
619
.byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
620
adoxq %rax,%r12
621
adoxq %rbp,%r13
622
623
mulxq %rdx,%rax,%rdi
624
xorq %rcx,%rcx
625
movq 56(%rsi),%rdx
626
adoxq %r11,%r11
627
628
adcxq %rbx,%rax
629
adoxq %r12,%r12
630
adcxq %rax,%r11
631
adoxq %rbp,%rcx
632
adcxq %rdi,%r12
633
adcxq %rbp,%rcx
634
635
.byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
636
.byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
637
638
639
mulxq %rdx,%rax,%rdx
640
xorq %rbx,%rbx
641
adoxq %r13,%r13
642
643
adcxq %rcx,%rax
644
adoxq %rbp,%rbx
645
adcxq %r13,%rax
646
adcxq %rdx,%rbx
647
648
.byte 102,72,15,126,199
649
.byte 102,72,15,126,205
650
651
movq 128(%rsp),%rdx
652
movq (%rsp),%r8
653
movq 8(%rsp),%r9
654
movq 16(%rsp),%r10
655
movq 24(%rsp),%r11
656
movq 32(%rsp),%r12
657
movq 40(%rsp),%r13
658
movq 48(%rsp),%r14
659
movq 56(%rsp),%r15
660
661
movq %rax,112(%rsp)
662
movq %rbx,120(%rsp)
663
664
call __rsaz_512_reducex
665
666
addq 64(%rsp),%r8
667
adcq 72(%rsp),%r9
668
adcq 80(%rsp),%r10
669
adcq 88(%rsp),%r11
670
adcq 96(%rsp),%r12
671
adcq 104(%rsp),%r13
672
adcq 112(%rsp),%r14
673
adcq 120(%rsp),%r15
674
sbbq %rcx,%rcx
675
676
call __rsaz_512_subtract
677
678
movq %r8,%rdx
679
movq %r9,%rax
680
movl 128+8(%rsp),%r8d
681
movq %rdi,%rsi
682
683
decl %r8d
684
jnz .Loop_sqrx
685
686
.Lsqr_tail:
687
688
leaq 128+24+48(%rsp),%rax
689
.cfi_def_cfa %rax,8
690
movq -48(%rax),%r15
691
.cfi_restore %r15
692
movq -40(%rax),%r14
693
.cfi_restore %r14
694
movq -32(%rax),%r13
695
.cfi_restore %r13
696
movq -24(%rax),%r12
697
.cfi_restore %r12
698
movq -16(%rax),%rbp
699
.cfi_restore %rbp
700
movq -8(%rax),%rbx
701
.cfi_restore %rbx
702
leaq (%rax),%rsp
703
.cfi_def_cfa_register %rsp
704
.Lsqr_epilogue:
705
.byte 0xf3,0xc3
706
.cfi_endproc
707
.size rsaz_512_sqr,.-rsaz_512_sqr
708
.globl rsaz_512_mul
709
.type rsaz_512_mul,@function
710
.align 32
711
rsaz_512_mul:
712
.cfi_startproc
713
pushq %rbx
714
.cfi_adjust_cfa_offset 8
715
.cfi_offset %rbx,-16
716
pushq %rbp
717
.cfi_adjust_cfa_offset 8
718
.cfi_offset %rbp,-24
719
pushq %r12
720
.cfi_adjust_cfa_offset 8
721
.cfi_offset %r12,-32
722
pushq %r13
723
.cfi_adjust_cfa_offset 8
724
.cfi_offset %r13,-40
725
pushq %r14
726
.cfi_adjust_cfa_offset 8
727
.cfi_offset %r14,-48
728
pushq %r15
729
.cfi_adjust_cfa_offset 8
730
.cfi_offset %r15,-56
731
732
subq $128+24,%rsp
733
.cfi_adjust_cfa_offset 128+24
734
.Lmul_body:
735
.byte 102,72,15,110,199
736
.byte 102,72,15,110,201
737
movq %r8,128(%rsp)
738
movl $0x80100,%r11d
739
andl OPENSSL_ia32cap_P+8(%rip),%r11d
740
cmpl $0x80100,%r11d
741
je .Lmulx
742
movq (%rdx),%rbx
743
movq %rdx,%rbp
744
call __rsaz_512_mul
745
746
.byte 102,72,15,126,199
747
.byte 102,72,15,126,205
748
749
movq (%rsp),%r8
750
movq 8(%rsp),%r9
751
movq 16(%rsp),%r10
752
movq 24(%rsp),%r11
753
movq 32(%rsp),%r12
754
movq 40(%rsp),%r13
755
movq 48(%rsp),%r14
756
movq 56(%rsp),%r15
757
758
call __rsaz_512_reduce
759
jmp .Lmul_tail
760
761
.align 32
762
.Lmulx:
763
movq %rdx,%rbp
764
movq (%rdx),%rdx
765
call __rsaz_512_mulx
766
767
.byte 102,72,15,126,199
768
.byte 102,72,15,126,205
769
770
movq 128(%rsp),%rdx
771
movq (%rsp),%r8
772
movq 8(%rsp),%r9
773
movq 16(%rsp),%r10
774
movq 24(%rsp),%r11
775
movq 32(%rsp),%r12
776
movq 40(%rsp),%r13
777
movq 48(%rsp),%r14
778
movq 56(%rsp),%r15
779
780
call __rsaz_512_reducex
781
.Lmul_tail:
782
addq 64(%rsp),%r8
783
adcq 72(%rsp),%r9
784
adcq 80(%rsp),%r10
785
adcq 88(%rsp),%r11
786
adcq 96(%rsp),%r12
787
adcq 104(%rsp),%r13
788
adcq 112(%rsp),%r14
789
adcq 120(%rsp),%r15
790
sbbq %rcx,%rcx
791
792
call __rsaz_512_subtract
793
794
leaq 128+24+48(%rsp),%rax
795
.cfi_def_cfa %rax,8
796
movq -48(%rax),%r15
797
.cfi_restore %r15
798
movq -40(%rax),%r14
799
.cfi_restore %r14
800
movq -32(%rax),%r13
801
.cfi_restore %r13
802
movq -24(%rax),%r12
803
.cfi_restore %r12
804
movq -16(%rax),%rbp
805
.cfi_restore %rbp
806
movq -8(%rax),%rbx
807
.cfi_restore %rbx
808
leaq (%rax),%rsp
809
.cfi_def_cfa_register %rsp
810
.Lmul_epilogue:
811
.byte 0xf3,0xc3
812
.cfi_endproc
813
.size rsaz_512_mul,.-rsaz_512_mul
814
.globl rsaz_512_mul_gather4
815
.type rsaz_512_mul_gather4,@function
816
.align 32
817
rsaz_512_mul_gather4:
818
.cfi_startproc
819
pushq %rbx
820
.cfi_adjust_cfa_offset 8
821
.cfi_offset %rbx,-16
822
pushq %rbp
823
.cfi_adjust_cfa_offset 8
824
.cfi_offset %rbp,-24
825
pushq %r12
826
.cfi_adjust_cfa_offset 8
827
.cfi_offset %r12,-32
828
pushq %r13
829
.cfi_adjust_cfa_offset 8
830
.cfi_offset %r13,-40
831
pushq %r14
832
.cfi_adjust_cfa_offset 8
833
.cfi_offset %r14,-48
834
pushq %r15
835
.cfi_adjust_cfa_offset 8
836
.cfi_offset %r15,-56
837
838
subq $152,%rsp
839
.cfi_adjust_cfa_offset 152
840
.Lmul_gather4_body:
841
movd %r9d,%xmm8
842
movdqa .Linc+16(%rip),%xmm1
843
movdqa .Linc(%rip),%xmm0
844
845
pshufd $0,%xmm8,%xmm8
846
movdqa %xmm1,%xmm7
847
movdqa %xmm1,%xmm2
848
paddd %xmm0,%xmm1
849
pcmpeqd %xmm8,%xmm0
850
movdqa %xmm7,%xmm3
851
paddd %xmm1,%xmm2
852
pcmpeqd %xmm8,%xmm1
853
movdqa %xmm7,%xmm4
854
paddd %xmm2,%xmm3
855
pcmpeqd %xmm8,%xmm2
856
movdqa %xmm7,%xmm5
857
paddd %xmm3,%xmm4
858
pcmpeqd %xmm8,%xmm3
859
movdqa %xmm7,%xmm6
860
paddd %xmm4,%xmm5
861
pcmpeqd %xmm8,%xmm4
862
paddd %xmm5,%xmm6
863
pcmpeqd %xmm8,%xmm5
864
paddd %xmm6,%xmm7
865
pcmpeqd %xmm8,%xmm6
866
pcmpeqd %xmm8,%xmm7
867
868
movdqa 0(%rdx),%xmm8
869
movdqa 16(%rdx),%xmm9
870
movdqa 32(%rdx),%xmm10
871
movdqa 48(%rdx),%xmm11
872
pand %xmm0,%xmm8
873
movdqa 64(%rdx),%xmm12
874
pand %xmm1,%xmm9
875
movdqa 80(%rdx),%xmm13
876
pand %xmm2,%xmm10
877
movdqa 96(%rdx),%xmm14
878
pand %xmm3,%xmm11
879
movdqa 112(%rdx),%xmm15
880
leaq 128(%rdx),%rbp
881
pand %xmm4,%xmm12
882
pand %xmm5,%xmm13
883
pand %xmm6,%xmm14
884
pand %xmm7,%xmm15
885
por %xmm10,%xmm8
886
por %xmm11,%xmm9
887
por %xmm12,%xmm8
888
por %xmm13,%xmm9
889
por %xmm14,%xmm8
890
por %xmm15,%xmm9
891
892
por %xmm9,%xmm8
893
pshufd $0x4e,%xmm8,%xmm9
894
por %xmm9,%xmm8
895
movl $0x80100,%r11d
896
andl OPENSSL_ia32cap_P+8(%rip),%r11d
897
cmpl $0x80100,%r11d
898
je .Lmulx_gather
899
.byte 102,76,15,126,195
900
901
movq %r8,128(%rsp)
902
movq %rdi,128+8(%rsp)
903
movq %rcx,128+16(%rsp)
904
905
movq (%rsi),%rax
906
movq 8(%rsi),%rcx
907
mulq %rbx
908
movq %rax,(%rsp)
909
movq %rcx,%rax
910
movq %rdx,%r8
911
912
mulq %rbx
913
addq %rax,%r8
914
movq 16(%rsi),%rax
915
movq %rdx,%r9
916
adcq $0,%r9
917
918
mulq %rbx
919
addq %rax,%r9
920
movq 24(%rsi),%rax
921
movq %rdx,%r10
922
adcq $0,%r10
923
924
mulq %rbx
925
addq %rax,%r10
926
movq 32(%rsi),%rax
927
movq %rdx,%r11
928
adcq $0,%r11
929
930
mulq %rbx
931
addq %rax,%r11
932
movq 40(%rsi),%rax
933
movq %rdx,%r12
934
adcq $0,%r12
935
936
mulq %rbx
937
addq %rax,%r12
938
movq 48(%rsi),%rax
939
movq %rdx,%r13
940
adcq $0,%r13
941
942
mulq %rbx
943
addq %rax,%r13
944
movq 56(%rsi),%rax
945
movq %rdx,%r14
946
adcq $0,%r14
947
948
mulq %rbx
949
addq %rax,%r14
950
movq (%rsi),%rax
951
movq %rdx,%r15
952
adcq $0,%r15
953
954
leaq 8(%rsp),%rdi
955
movl $7,%ecx
956
jmp .Loop_mul_gather
957
958
.align 32
959
.Loop_mul_gather:
960
movdqa 0(%rbp),%xmm8
961
movdqa 16(%rbp),%xmm9
962
movdqa 32(%rbp),%xmm10
963
movdqa 48(%rbp),%xmm11
964
pand %xmm0,%xmm8
965
movdqa 64(%rbp),%xmm12
966
pand %xmm1,%xmm9
967
movdqa 80(%rbp),%xmm13
968
pand %xmm2,%xmm10
969
movdqa 96(%rbp),%xmm14
970
pand %xmm3,%xmm11
971
movdqa 112(%rbp),%xmm15
972
leaq 128(%rbp),%rbp
973
pand %xmm4,%xmm12
974
pand %xmm5,%xmm13
975
pand %xmm6,%xmm14
976
pand %xmm7,%xmm15
977
por %xmm10,%xmm8
978
por %xmm11,%xmm9
979
por %xmm12,%xmm8
980
por %xmm13,%xmm9
981
por %xmm14,%xmm8
982
por %xmm15,%xmm9
983
984
por %xmm9,%xmm8
985
pshufd $0x4e,%xmm8,%xmm9
986
por %xmm9,%xmm8
987
.byte 102,76,15,126,195
988
989
mulq %rbx
990
addq %rax,%r8
991
movq 8(%rsi),%rax
992
movq %r8,(%rdi)
993
movq %rdx,%r8
994
adcq $0,%r8
995
996
mulq %rbx
997
addq %rax,%r9
998
movq 16(%rsi),%rax
999
adcq $0,%rdx
1000
addq %r9,%r8
1001
movq %rdx,%r9
1002
adcq $0,%r9
1003
1004
mulq %rbx
1005
addq %rax,%r10
1006
movq 24(%rsi),%rax
1007
adcq $0,%rdx
1008
addq %r10,%r9
1009
movq %rdx,%r10
1010
adcq $0,%r10
1011
1012
mulq %rbx
1013
addq %rax,%r11
1014
movq 32(%rsi),%rax
1015
adcq $0,%rdx
1016
addq %r11,%r10
1017
movq %rdx,%r11
1018
adcq $0,%r11
1019
1020
mulq %rbx
1021
addq %rax,%r12
1022
movq 40(%rsi),%rax
1023
adcq $0,%rdx
1024
addq %r12,%r11
1025
movq %rdx,%r12
1026
adcq $0,%r12
1027
1028
mulq %rbx
1029
addq %rax,%r13
1030
movq 48(%rsi),%rax
1031
adcq $0,%rdx
1032
addq %r13,%r12
1033
movq %rdx,%r13
1034
adcq $0,%r13
1035
1036
mulq %rbx
1037
addq %rax,%r14
1038
movq 56(%rsi),%rax
1039
adcq $0,%rdx
1040
addq %r14,%r13
1041
movq %rdx,%r14
1042
adcq $0,%r14
1043
1044
mulq %rbx
1045
addq %rax,%r15
1046
movq (%rsi),%rax
1047
adcq $0,%rdx
1048
addq %r15,%r14
1049
movq %rdx,%r15
1050
adcq $0,%r15
1051
1052
leaq 8(%rdi),%rdi
1053
1054
decl %ecx
1055
jnz .Loop_mul_gather
1056
1057
movq %r8,(%rdi)
1058
movq %r9,8(%rdi)
1059
movq %r10,16(%rdi)
1060
movq %r11,24(%rdi)
1061
movq %r12,32(%rdi)
1062
movq %r13,40(%rdi)
1063
movq %r14,48(%rdi)
1064
movq %r15,56(%rdi)
1065
1066
movq 128+8(%rsp),%rdi
1067
movq 128+16(%rsp),%rbp
1068
1069
movq (%rsp),%r8
1070
movq 8(%rsp),%r9
1071
movq 16(%rsp),%r10
1072
movq 24(%rsp),%r11
1073
movq 32(%rsp),%r12
1074
movq 40(%rsp),%r13
1075
movq 48(%rsp),%r14
1076
movq 56(%rsp),%r15
1077
1078
call __rsaz_512_reduce
1079
jmp .Lmul_gather_tail
1080
1081
.align 32
1082
.Lmulx_gather:
1083
.byte 102,76,15,126,194
1084
1085
movq %r8,128(%rsp)
1086
movq %rdi,128+8(%rsp)
1087
movq %rcx,128+16(%rsp)
1088
1089
mulxq (%rsi),%rbx,%r8
1090
movq %rbx,(%rsp)
1091
xorl %edi,%edi
1092
1093
mulxq 8(%rsi),%rax,%r9
1094
1095
mulxq 16(%rsi),%rbx,%r10
1096
adcxq %rax,%r8
1097
1098
mulxq 24(%rsi),%rax,%r11
1099
adcxq %rbx,%r9
1100
1101
mulxq 32(%rsi),%rbx,%r12
1102
adcxq %rax,%r10
1103
1104
mulxq 40(%rsi),%rax,%r13
1105
adcxq %rbx,%r11
1106
1107
mulxq 48(%rsi),%rbx,%r14
1108
adcxq %rax,%r12
1109
1110
mulxq 56(%rsi),%rax,%r15
1111
adcxq %rbx,%r13
1112
adcxq %rax,%r14
1113
.byte 0x67
1114
movq %r8,%rbx
1115
adcxq %rdi,%r15
1116
1117
movq $-7,%rcx
1118
jmp .Loop_mulx_gather
1119
1120
.align 32
1121
.Loop_mulx_gather:
1122
movdqa 0(%rbp),%xmm8
1123
movdqa 16(%rbp),%xmm9
1124
movdqa 32(%rbp),%xmm10
1125
movdqa 48(%rbp),%xmm11
1126
pand %xmm0,%xmm8
1127
movdqa 64(%rbp),%xmm12
1128
pand %xmm1,%xmm9
1129
movdqa 80(%rbp),%xmm13
1130
pand %xmm2,%xmm10
1131
movdqa 96(%rbp),%xmm14
1132
pand %xmm3,%xmm11
1133
movdqa 112(%rbp),%xmm15
1134
leaq 128(%rbp),%rbp
1135
pand %xmm4,%xmm12
1136
pand %xmm5,%xmm13
1137
pand %xmm6,%xmm14
1138
pand %xmm7,%xmm15
1139
por %xmm10,%xmm8
1140
por %xmm11,%xmm9
1141
por %xmm12,%xmm8
1142
por %xmm13,%xmm9
1143
por %xmm14,%xmm8
1144
por %xmm15,%xmm9
1145
1146
por %xmm9,%xmm8
1147
pshufd $0x4e,%xmm8,%xmm9
1148
por %xmm9,%xmm8
1149
.byte 102,76,15,126,194
1150
1151
.byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1152
adcxq %rax,%rbx
1153
adoxq %r9,%r8
1154
1155
mulxq 8(%rsi),%rax,%r9
1156
adcxq %rax,%r8
1157
adoxq %r10,%r9
1158
1159
mulxq 16(%rsi),%rax,%r10
1160
adcxq %rax,%r9
1161
adoxq %r11,%r10
1162
1163
.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1164
adcxq %rax,%r10
1165
adoxq %r12,%r11
1166
1167
mulxq 32(%rsi),%rax,%r12
1168
adcxq %rax,%r11
1169
adoxq %r13,%r12
1170
1171
mulxq 40(%rsi),%rax,%r13
1172
adcxq %rax,%r12
1173
adoxq %r14,%r13
1174
1175
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1176
adcxq %rax,%r13
1177
.byte 0x67
1178
adoxq %r15,%r14
1179
1180
mulxq 56(%rsi),%rax,%r15
1181
movq %rbx,64(%rsp,%rcx,8)
1182
adcxq %rax,%r14
1183
adoxq %rdi,%r15
1184
movq %r8,%rbx
1185
adcxq %rdi,%r15
1186
1187
incq %rcx
1188
jnz .Loop_mulx_gather
1189
1190
movq %r8,64(%rsp)
1191
movq %r9,64+8(%rsp)
1192
movq %r10,64+16(%rsp)
1193
movq %r11,64+24(%rsp)
1194
movq %r12,64+32(%rsp)
1195
movq %r13,64+40(%rsp)
1196
movq %r14,64+48(%rsp)
1197
movq %r15,64+56(%rsp)
1198
1199
movq 128(%rsp),%rdx
1200
movq 128+8(%rsp),%rdi
1201
movq 128+16(%rsp),%rbp
1202
1203
movq (%rsp),%r8
1204
movq 8(%rsp),%r9
1205
movq 16(%rsp),%r10
1206
movq 24(%rsp),%r11
1207
movq 32(%rsp),%r12
1208
movq 40(%rsp),%r13
1209
movq 48(%rsp),%r14
1210
movq 56(%rsp),%r15
1211
1212
call __rsaz_512_reducex
1213
1214
.Lmul_gather_tail:
1215
addq 64(%rsp),%r8
1216
adcq 72(%rsp),%r9
1217
adcq 80(%rsp),%r10
1218
adcq 88(%rsp),%r11
1219
adcq 96(%rsp),%r12
1220
adcq 104(%rsp),%r13
1221
adcq 112(%rsp),%r14
1222
adcq 120(%rsp),%r15
1223
sbbq %rcx,%rcx
1224
1225
call __rsaz_512_subtract
1226
1227
leaq 128+24+48(%rsp),%rax
1228
.cfi_def_cfa %rax,8
1229
movq -48(%rax),%r15
1230
.cfi_restore %r15
1231
movq -40(%rax),%r14
1232
.cfi_restore %r14
1233
movq -32(%rax),%r13
1234
.cfi_restore %r13
1235
movq -24(%rax),%r12
1236
.cfi_restore %r12
1237
movq -16(%rax),%rbp
1238
.cfi_restore %rbp
1239
movq -8(%rax),%rbx
1240
.cfi_restore %rbx
1241
leaq (%rax),%rsp
1242
.cfi_def_cfa_register %rsp
1243
.Lmul_gather4_epilogue:
1244
.byte 0xf3,0xc3
1245
.cfi_endproc
1246
.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1247
.globl rsaz_512_mul_scatter4
1248
.type rsaz_512_mul_scatter4,@function
1249
.align 32
1250
rsaz_512_mul_scatter4:
1251
.cfi_startproc
1252
pushq %rbx
1253
.cfi_adjust_cfa_offset 8
1254
.cfi_offset %rbx,-16
1255
pushq %rbp
1256
.cfi_adjust_cfa_offset 8
1257
.cfi_offset %rbp,-24
1258
pushq %r12
1259
.cfi_adjust_cfa_offset 8
1260
.cfi_offset %r12,-32
1261
pushq %r13
1262
.cfi_adjust_cfa_offset 8
1263
.cfi_offset %r13,-40
1264
pushq %r14
1265
.cfi_adjust_cfa_offset 8
1266
.cfi_offset %r14,-48
1267
pushq %r15
1268
.cfi_adjust_cfa_offset 8
1269
.cfi_offset %r15,-56
1270
1271
movl %r9d,%r9d
1272
subq $128+24,%rsp
1273
.cfi_adjust_cfa_offset 128+24
1274
.Lmul_scatter4_body:
1275
leaq (%r8,%r9,8),%r8
1276
.byte 102,72,15,110,199
1277
.byte 102,72,15,110,202
1278
.byte 102,73,15,110,208
1279
movq %rcx,128(%rsp)
1280
1281
movq %rdi,%rbp
1282
movl $0x80100,%r11d
1283
andl OPENSSL_ia32cap_P+8(%rip),%r11d
1284
cmpl $0x80100,%r11d
1285
je .Lmulx_scatter
1286
movq (%rdi),%rbx
1287
call __rsaz_512_mul
1288
1289
.byte 102,72,15,126,199
1290
.byte 102,72,15,126,205
1291
1292
movq (%rsp),%r8
1293
movq 8(%rsp),%r9
1294
movq 16(%rsp),%r10
1295
movq 24(%rsp),%r11
1296
movq 32(%rsp),%r12
1297
movq 40(%rsp),%r13
1298
movq 48(%rsp),%r14
1299
movq 56(%rsp),%r15
1300
1301
call __rsaz_512_reduce
1302
jmp .Lmul_scatter_tail
1303
1304
.align 32
1305
.Lmulx_scatter:
1306
movq (%rdi),%rdx
1307
call __rsaz_512_mulx
1308
1309
.byte 102,72,15,126,199
1310
.byte 102,72,15,126,205
1311
1312
movq 128(%rsp),%rdx
1313
movq (%rsp),%r8
1314
movq 8(%rsp),%r9
1315
movq 16(%rsp),%r10
1316
movq 24(%rsp),%r11
1317
movq 32(%rsp),%r12
1318
movq 40(%rsp),%r13
1319
movq 48(%rsp),%r14
1320
movq 56(%rsp),%r15
1321
1322
call __rsaz_512_reducex
1323
1324
.Lmul_scatter_tail:
1325
addq 64(%rsp),%r8
1326
adcq 72(%rsp),%r9
1327
adcq 80(%rsp),%r10
1328
adcq 88(%rsp),%r11
1329
adcq 96(%rsp),%r12
1330
adcq 104(%rsp),%r13
1331
adcq 112(%rsp),%r14
1332
adcq 120(%rsp),%r15
1333
.byte 102,72,15,126,214
1334
sbbq %rcx,%rcx
1335
1336
call __rsaz_512_subtract
1337
1338
movq %r8,0(%rsi)
1339
movq %r9,128(%rsi)
1340
movq %r10,256(%rsi)
1341
movq %r11,384(%rsi)
1342
movq %r12,512(%rsi)
1343
movq %r13,640(%rsi)
1344
movq %r14,768(%rsi)
1345
movq %r15,896(%rsi)
1346
1347
leaq 128+24+48(%rsp),%rax
1348
.cfi_def_cfa %rax,8
1349
movq -48(%rax),%r15
1350
.cfi_restore %r15
1351
movq -40(%rax),%r14
1352
.cfi_restore %r14
1353
movq -32(%rax),%r13
1354
.cfi_restore %r13
1355
movq -24(%rax),%r12
1356
.cfi_restore %r12
1357
movq -16(%rax),%rbp
1358
.cfi_restore %rbp
1359
movq -8(%rax),%rbx
1360
.cfi_restore %rbx
1361
leaq (%rax),%rsp
1362
.cfi_def_cfa_register %rsp
1363
.Lmul_scatter4_epilogue:
1364
.byte 0xf3,0xc3
1365
.cfi_endproc
1366
.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1367
.globl rsaz_512_mul_by_one
1368
.type rsaz_512_mul_by_one,@function
1369
.align 32
1370
rsaz_512_mul_by_one:
1371
.cfi_startproc
1372
pushq %rbx
1373
.cfi_adjust_cfa_offset 8
1374
.cfi_offset %rbx,-16
1375
pushq %rbp
1376
.cfi_adjust_cfa_offset 8
1377
.cfi_offset %rbp,-24
1378
pushq %r12
1379
.cfi_adjust_cfa_offset 8
1380
.cfi_offset %r12,-32
1381
pushq %r13
1382
.cfi_adjust_cfa_offset 8
1383
.cfi_offset %r13,-40
1384
pushq %r14
1385
.cfi_adjust_cfa_offset 8
1386
.cfi_offset %r14,-48
1387
pushq %r15
1388
.cfi_adjust_cfa_offset 8
1389
.cfi_offset %r15,-56
1390
1391
subq $128+24,%rsp
1392
.cfi_adjust_cfa_offset 128+24
1393
.Lmul_by_one_body:
1394
movl OPENSSL_ia32cap_P+8(%rip),%eax
1395
movq %rdx,%rbp
1396
movq %rcx,128(%rsp)
1397
1398
movq (%rsi),%r8
1399
pxor %xmm0,%xmm0
1400
movq 8(%rsi),%r9
1401
movq 16(%rsi),%r10
1402
movq 24(%rsi),%r11
1403
movq 32(%rsi),%r12
1404
movq 40(%rsi),%r13
1405
movq 48(%rsi),%r14
1406
movq 56(%rsi),%r15
1407
1408
movdqa %xmm0,(%rsp)
1409
movdqa %xmm0,16(%rsp)
1410
movdqa %xmm0,32(%rsp)
1411
movdqa %xmm0,48(%rsp)
1412
movdqa %xmm0,64(%rsp)
1413
movdqa %xmm0,80(%rsp)
1414
movdqa %xmm0,96(%rsp)
1415
andl $0x80100,%eax
1416
cmpl $0x80100,%eax
1417
je .Lby_one_callx
1418
call __rsaz_512_reduce
1419
jmp .Lby_one_tail
1420
.align 32
1421
.Lby_one_callx:
1422
movq 128(%rsp),%rdx
1423
call __rsaz_512_reducex
1424
.Lby_one_tail:
1425
movq %r8,(%rdi)
1426
movq %r9,8(%rdi)
1427
movq %r10,16(%rdi)
1428
movq %r11,24(%rdi)
1429
movq %r12,32(%rdi)
1430
movq %r13,40(%rdi)
1431
movq %r14,48(%rdi)
1432
movq %r15,56(%rdi)
1433
1434
leaq 128+24+48(%rsp),%rax
1435
.cfi_def_cfa %rax,8
1436
movq -48(%rax),%r15
1437
.cfi_restore %r15
1438
movq -40(%rax),%r14
1439
.cfi_restore %r14
1440
movq -32(%rax),%r13
1441
.cfi_restore %r13
1442
movq -24(%rax),%r12
1443
.cfi_restore %r12
1444
movq -16(%rax),%rbp
1445
.cfi_restore %rbp
1446
movq -8(%rax),%rbx
1447
.cfi_restore %rbx
1448
leaq (%rax),%rsp
1449
.cfi_def_cfa_register %rsp
1450
.Lmul_by_one_epilogue:
1451
.byte 0xf3,0xc3
1452
.cfi_endproc
1453
.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1454
.type __rsaz_512_reduce,@function
1455
.align 32
1456
__rsaz_512_reduce:
1457
.cfi_startproc
1458
movq %r8,%rbx
1459
imulq 128+8(%rsp),%rbx
1460
movq 0(%rbp),%rax
1461
movl $8,%ecx
1462
jmp .Lreduction_loop
1463
1464
.align 32
1465
.Lreduction_loop:
1466
mulq %rbx
1467
movq 8(%rbp),%rax
1468
negq %r8
1469
movq %rdx,%r8
1470
adcq $0,%r8
1471
1472
mulq %rbx
1473
addq %rax,%r9
1474
movq 16(%rbp),%rax
1475
adcq $0,%rdx
1476
addq %r9,%r8
1477
movq %rdx,%r9
1478
adcq $0,%r9
1479
1480
mulq %rbx
1481
addq %rax,%r10
1482
movq 24(%rbp),%rax
1483
adcq $0,%rdx
1484
addq %r10,%r9
1485
movq %rdx,%r10
1486
adcq $0,%r10
1487
1488
mulq %rbx
1489
addq %rax,%r11
1490
movq 32(%rbp),%rax
1491
adcq $0,%rdx
1492
addq %r11,%r10
1493
movq 128+8(%rsp),%rsi
1494
1495
1496
adcq $0,%rdx
1497
movq %rdx,%r11
1498
1499
mulq %rbx
1500
addq %rax,%r12
1501
movq 40(%rbp),%rax
1502
adcq $0,%rdx
1503
imulq %r8,%rsi
1504
addq %r12,%r11
1505
movq %rdx,%r12
1506
adcq $0,%r12
1507
1508
mulq %rbx
1509
addq %rax,%r13
1510
movq 48(%rbp),%rax
1511
adcq $0,%rdx
1512
addq %r13,%r12
1513
movq %rdx,%r13
1514
adcq $0,%r13
1515
1516
mulq %rbx
1517
addq %rax,%r14
1518
movq 56(%rbp),%rax
1519
adcq $0,%rdx
1520
addq %r14,%r13
1521
movq %rdx,%r14
1522
adcq $0,%r14
1523
1524
mulq %rbx
1525
movq %rsi,%rbx
1526
addq %rax,%r15
1527
movq 0(%rbp),%rax
1528
adcq $0,%rdx
1529
addq %r15,%r14
1530
movq %rdx,%r15
1531
adcq $0,%r15
1532
1533
decl %ecx
1534
jne .Lreduction_loop
1535
1536
.byte 0xf3,0xc3
1537
.cfi_endproc
1538
.size __rsaz_512_reduce,.-__rsaz_512_reduce
1539
.type __rsaz_512_reducex,@function
1540
.align 32
1541
__rsaz_512_reducex:
1542
.cfi_startproc
1543
1544
imulq %r8,%rdx
1545
xorq %rsi,%rsi
1546
movl $8,%ecx
1547
jmp .Lreduction_loopx
1548
1549
.align 32
1550
.Lreduction_loopx:
1551
movq %r8,%rbx
1552
mulxq 0(%rbp),%rax,%r8
1553
adcxq %rbx,%rax
1554
adoxq %r9,%r8
1555
1556
mulxq 8(%rbp),%rax,%r9
1557
adcxq %rax,%r8
1558
adoxq %r10,%r9
1559
1560
mulxq 16(%rbp),%rbx,%r10
1561
adcxq %rbx,%r9
1562
adoxq %r11,%r10
1563
1564
mulxq 24(%rbp),%rbx,%r11
1565
adcxq %rbx,%r10
1566
adoxq %r12,%r11
1567
1568
.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1569
movq %rdx,%rax
1570
movq %r8,%rdx
1571
adcxq %rbx,%r11
1572
adoxq %r13,%r12
1573
1574
mulxq 128+8(%rsp),%rbx,%rdx
1575
movq %rax,%rdx
1576
1577
mulxq 40(%rbp),%rax,%r13
1578
adcxq %rax,%r12
1579
adoxq %r14,%r13
1580
1581
.byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1582
adcxq %rax,%r13
1583
adoxq %r15,%r14
1584
1585
mulxq 56(%rbp),%rax,%r15
1586
movq %rbx,%rdx
1587
adcxq %rax,%r14
1588
adoxq %rsi,%r15
1589
adcxq %rsi,%r15
1590
1591
decl %ecx
1592
jne .Lreduction_loopx
1593
1594
.byte 0xf3,0xc3
1595
.cfi_endproc
1596
.size __rsaz_512_reducex,.-__rsaz_512_reducex
1597
.type __rsaz_512_subtract,@function
1598
.align 32
1599
__rsaz_512_subtract:
1600
.cfi_startproc
1601
movq %r8,(%rdi)
1602
movq %r9,8(%rdi)
1603
movq %r10,16(%rdi)
1604
movq %r11,24(%rdi)
1605
movq %r12,32(%rdi)
1606
movq %r13,40(%rdi)
1607
movq %r14,48(%rdi)
1608
movq %r15,56(%rdi)
1609
1610
movq 0(%rbp),%r8
1611
movq 8(%rbp),%r9
1612
negq %r8
1613
notq %r9
1614
andq %rcx,%r8
1615
movq 16(%rbp),%r10
1616
andq %rcx,%r9
1617
notq %r10
1618
movq 24(%rbp),%r11
1619
andq %rcx,%r10
1620
notq %r11
1621
movq 32(%rbp),%r12
1622
andq %rcx,%r11
1623
notq %r12
1624
movq 40(%rbp),%r13
1625
andq %rcx,%r12
1626
notq %r13
1627
movq 48(%rbp),%r14
1628
andq %rcx,%r13
1629
notq %r14
1630
movq 56(%rbp),%r15
1631
andq %rcx,%r14
1632
notq %r15
1633
andq %rcx,%r15
1634
1635
addq (%rdi),%r8
1636
adcq 8(%rdi),%r9
1637
adcq 16(%rdi),%r10
1638
adcq 24(%rdi),%r11
1639
adcq 32(%rdi),%r12
1640
adcq 40(%rdi),%r13
1641
adcq 48(%rdi),%r14
1642
adcq 56(%rdi),%r15
1643
1644
movq %r8,(%rdi)
1645
movq %r9,8(%rdi)
1646
movq %r10,16(%rdi)
1647
movq %r11,24(%rdi)
1648
movq %r12,32(%rdi)
1649
movq %r13,40(%rdi)
1650
movq %r14,48(%rdi)
1651
movq %r15,56(%rdi)
1652
1653
.byte 0xf3,0xc3
1654
.cfi_endproc
1655
.size __rsaz_512_subtract,.-__rsaz_512_subtract
1656
.type __rsaz_512_mul,@function
1657
.align 32
1658
__rsaz_512_mul:
1659
.cfi_startproc
1660
leaq 8(%rsp),%rdi
1661
1662
movq (%rsi),%rax
1663
mulq %rbx
1664
movq %rax,(%rdi)
1665
movq 8(%rsi),%rax
1666
movq %rdx,%r8
1667
1668
mulq %rbx
1669
addq %rax,%r8
1670
movq 16(%rsi),%rax
1671
movq %rdx,%r9
1672
adcq $0,%r9
1673
1674
mulq %rbx
1675
addq %rax,%r9
1676
movq 24(%rsi),%rax
1677
movq %rdx,%r10
1678
adcq $0,%r10
1679
1680
mulq %rbx
1681
addq %rax,%r10
1682
movq 32(%rsi),%rax
1683
movq %rdx,%r11
1684
adcq $0,%r11
1685
1686
mulq %rbx
1687
addq %rax,%r11
1688
movq 40(%rsi),%rax
1689
movq %rdx,%r12
1690
adcq $0,%r12
1691
1692
mulq %rbx
1693
addq %rax,%r12
1694
movq 48(%rsi),%rax
1695
movq %rdx,%r13
1696
adcq $0,%r13
1697
1698
mulq %rbx
1699
addq %rax,%r13
1700
movq 56(%rsi),%rax
1701
movq %rdx,%r14
1702
adcq $0,%r14
1703
1704
mulq %rbx
1705
addq %rax,%r14
1706
movq (%rsi),%rax
1707
movq %rdx,%r15
1708
adcq $0,%r15
1709
1710
leaq 8(%rbp),%rbp
1711
leaq 8(%rdi),%rdi
1712
1713
movl $7,%ecx
1714
jmp .Loop_mul
1715
1716
.align 32
1717
.Loop_mul:
1718
movq (%rbp),%rbx
1719
mulq %rbx
1720
addq %rax,%r8
1721
movq 8(%rsi),%rax
1722
movq %r8,(%rdi)
1723
movq %rdx,%r8
1724
adcq $0,%r8
1725
1726
mulq %rbx
1727
addq %rax,%r9
1728
movq 16(%rsi),%rax
1729
adcq $0,%rdx
1730
addq %r9,%r8
1731
movq %rdx,%r9
1732
adcq $0,%r9
1733
1734
mulq %rbx
1735
addq %rax,%r10
1736
movq 24(%rsi),%rax
1737
adcq $0,%rdx
1738
addq %r10,%r9
1739
movq %rdx,%r10
1740
adcq $0,%r10
1741
1742
mulq %rbx
1743
addq %rax,%r11
1744
movq 32(%rsi),%rax
1745
adcq $0,%rdx
1746
addq %r11,%r10
1747
movq %rdx,%r11
1748
adcq $0,%r11
1749
1750
mulq %rbx
1751
addq %rax,%r12
1752
movq 40(%rsi),%rax
1753
adcq $0,%rdx
1754
addq %r12,%r11
1755
movq %rdx,%r12
1756
adcq $0,%r12
1757
1758
mulq %rbx
1759
addq %rax,%r13
1760
movq 48(%rsi),%rax
1761
adcq $0,%rdx
1762
addq %r13,%r12
1763
movq %rdx,%r13
1764
adcq $0,%r13
1765
1766
mulq %rbx
1767
addq %rax,%r14
1768
movq 56(%rsi),%rax
1769
adcq $0,%rdx
1770
addq %r14,%r13
1771
movq %rdx,%r14
1772
leaq 8(%rbp),%rbp
1773
adcq $0,%r14
1774
1775
mulq %rbx
1776
addq %rax,%r15
1777
movq (%rsi),%rax
1778
adcq $0,%rdx
1779
addq %r15,%r14
1780
movq %rdx,%r15
1781
adcq $0,%r15
1782
1783
leaq 8(%rdi),%rdi
1784
1785
decl %ecx
1786
jnz .Loop_mul
1787
1788
movq %r8,(%rdi)
1789
movq %r9,8(%rdi)
1790
movq %r10,16(%rdi)
1791
movq %r11,24(%rdi)
1792
movq %r12,32(%rdi)
1793
movq %r13,40(%rdi)
1794
movq %r14,48(%rdi)
1795
movq %r15,56(%rdi)
1796
1797
.byte 0xf3,0xc3
1798
.cfi_endproc
1799
.size __rsaz_512_mul,.-__rsaz_512_mul
1800
.type __rsaz_512_mulx,@function
1801
.align 32
1802
__rsaz_512_mulx:
1803
.cfi_startproc
1804
mulxq (%rsi),%rbx,%r8
1805
movq $-6,%rcx
1806
1807
mulxq 8(%rsi),%rax,%r9
1808
movq %rbx,8(%rsp)
1809
1810
mulxq 16(%rsi),%rbx,%r10
1811
adcq %rax,%r8
1812
1813
mulxq 24(%rsi),%rax,%r11
1814
adcq %rbx,%r9
1815
1816
mulxq 32(%rsi),%rbx,%r12
1817
adcq %rax,%r10
1818
1819
mulxq 40(%rsi),%rax,%r13
1820
adcq %rbx,%r11
1821
1822
mulxq 48(%rsi),%rbx,%r14
1823
adcq %rax,%r12
1824
1825
mulxq 56(%rsi),%rax,%r15
1826
movq 8(%rbp),%rdx
1827
adcq %rbx,%r13
1828
adcq %rax,%r14
1829
adcq $0,%r15
1830
1831
xorq %rdi,%rdi
1832
jmp .Loop_mulx
1833
1834
.align 32
1835
.Loop_mulx:
1836
movq %r8,%rbx
1837
mulxq (%rsi),%rax,%r8
1838
adcxq %rax,%rbx
1839
adoxq %r9,%r8
1840
1841
mulxq 8(%rsi),%rax,%r9
1842
adcxq %rax,%r8
1843
adoxq %r10,%r9
1844
1845
mulxq 16(%rsi),%rax,%r10
1846
adcxq %rax,%r9
1847
adoxq %r11,%r10
1848
1849
mulxq 24(%rsi),%rax,%r11
1850
adcxq %rax,%r10
1851
adoxq %r12,%r11
1852
1853
.byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1854
adcxq %rax,%r11
1855
adoxq %r13,%r12
1856
1857
mulxq 40(%rsi),%rax,%r13
1858
adcxq %rax,%r12
1859
adoxq %r14,%r13
1860
1861
mulxq 48(%rsi),%rax,%r14
1862
adcxq %rax,%r13
1863
adoxq %r15,%r14
1864
1865
mulxq 56(%rsi),%rax,%r15
1866
movq 64(%rbp,%rcx,8),%rdx
1867
movq %rbx,8+64-8(%rsp,%rcx,8)
1868
adcxq %rax,%r14
1869
adoxq %rdi,%r15
1870
adcxq %rdi,%r15
1871
1872
incq %rcx
1873
jnz .Loop_mulx
1874
1875
movq %r8,%rbx
1876
mulxq (%rsi),%rax,%r8
1877
adcxq %rax,%rbx
1878
adoxq %r9,%r8
1879
1880
.byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1881
adcxq %rax,%r8
1882
adoxq %r10,%r9
1883
1884
.byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1885
adcxq %rax,%r9
1886
adoxq %r11,%r10
1887
1888
mulxq 24(%rsi),%rax,%r11
1889
adcxq %rax,%r10
1890
adoxq %r12,%r11
1891
1892
mulxq 32(%rsi),%rax,%r12
1893
adcxq %rax,%r11
1894
adoxq %r13,%r12
1895
1896
mulxq 40(%rsi),%rax,%r13
1897
adcxq %rax,%r12
1898
adoxq %r14,%r13
1899
1900
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1901
adcxq %rax,%r13
1902
adoxq %r15,%r14
1903
1904
.byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1905
adcxq %rax,%r14
1906
adoxq %rdi,%r15
1907
adcxq %rdi,%r15
1908
1909
movq %rbx,8+64-8(%rsp)
1910
movq %r8,8+64(%rsp)
1911
movq %r9,8+64+8(%rsp)
1912
movq %r10,8+64+16(%rsp)
1913
movq %r11,8+64+24(%rsp)
1914
movq %r12,8+64+32(%rsp)
1915
movq %r13,8+64+40(%rsp)
1916
movq %r14,8+64+48(%rsp)
1917
movq %r15,8+64+56(%rsp)
1918
1919
.byte 0xf3,0xc3
1920
.cfi_endproc
1921
.size __rsaz_512_mulx,.-__rsaz_512_mulx
1922
.globl rsaz_512_scatter4
1923
.type rsaz_512_scatter4,@function
1924
.align 16
1925
rsaz_512_scatter4:
1926
.cfi_startproc
1927
leaq (%rdi,%rdx,8),%rdi
1928
movl $8,%r9d
1929
jmp .Loop_scatter
1930
.align 16
1931
.Loop_scatter:
1932
movq (%rsi),%rax
1933
leaq 8(%rsi),%rsi
1934
movq %rax,(%rdi)
1935
leaq 128(%rdi),%rdi
1936
decl %r9d
1937
jnz .Loop_scatter
1938
.byte 0xf3,0xc3
1939
.cfi_endproc
1940
.size rsaz_512_scatter4,.-rsaz_512_scatter4
1941
1942
.globl rsaz_512_gather4
1943
.type rsaz_512_gather4,@function
1944
.align 16
1945
rsaz_512_gather4:
1946
.cfi_startproc
1947
movd %edx,%xmm8
1948
movdqa .Linc+16(%rip),%xmm1
1949
movdqa .Linc(%rip),%xmm0
1950
1951
pshufd $0,%xmm8,%xmm8
1952
movdqa %xmm1,%xmm7
1953
movdqa %xmm1,%xmm2
1954
paddd %xmm0,%xmm1
1955
pcmpeqd %xmm8,%xmm0
1956
movdqa %xmm7,%xmm3
1957
paddd %xmm1,%xmm2
1958
pcmpeqd %xmm8,%xmm1
1959
movdqa %xmm7,%xmm4
1960
paddd %xmm2,%xmm3
1961
pcmpeqd %xmm8,%xmm2
1962
movdqa %xmm7,%xmm5
1963
paddd %xmm3,%xmm4
1964
pcmpeqd %xmm8,%xmm3
1965
movdqa %xmm7,%xmm6
1966
paddd %xmm4,%xmm5
1967
pcmpeqd %xmm8,%xmm4
1968
paddd %xmm5,%xmm6
1969
pcmpeqd %xmm8,%xmm5
1970
paddd %xmm6,%xmm7
1971
pcmpeqd %xmm8,%xmm6
1972
pcmpeqd %xmm8,%xmm7
1973
movl $8,%r9d
1974
jmp .Loop_gather
1975
.align 16
1976
.Loop_gather:
1977
movdqa 0(%rsi),%xmm8
1978
movdqa 16(%rsi),%xmm9
1979
movdqa 32(%rsi),%xmm10
1980
movdqa 48(%rsi),%xmm11
1981
pand %xmm0,%xmm8
1982
movdqa 64(%rsi),%xmm12
1983
pand %xmm1,%xmm9
1984
movdqa 80(%rsi),%xmm13
1985
pand %xmm2,%xmm10
1986
movdqa 96(%rsi),%xmm14
1987
pand %xmm3,%xmm11
1988
movdqa 112(%rsi),%xmm15
1989
leaq 128(%rsi),%rsi
1990
pand %xmm4,%xmm12
1991
pand %xmm5,%xmm13
1992
pand %xmm6,%xmm14
1993
pand %xmm7,%xmm15
1994
por %xmm10,%xmm8
1995
por %xmm11,%xmm9
1996
por %xmm12,%xmm8
1997
por %xmm13,%xmm9
1998
por %xmm14,%xmm8
1999
por %xmm15,%xmm9
2000
2001
por %xmm9,%xmm8
2002
pshufd $0x4e,%xmm8,%xmm9
2003
por %xmm9,%xmm8
2004
movq %xmm8,(%rdi)
2005
leaq 8(%rdi),%rdi
2006
decl %r9d
2007
jnz .Loop_gather
2008
.byte 0xf3,0xc3
2009
.LSEH_end_rsaz_512_gather4:
2010
.cfi_endproc
2011
.size rsaz_512_gather4,.-rsaz_512_gather4
2012
2013
.section .rodata
2014
.align 64
2015
.Linc:
2016
.long 0,0, 1,1
2017
.long 2,2, 2,2
2018
.previous
2019
.section ".note.gnu.property", "a"
2020
.p2align 3
2021
.long 1f - 0f
2022
.long 4f - 1f
2023
.long 5
2024
0:
2025
# "GNU" encoded with .byte, since .asciz isn't supported
2026
# on Solaris.
2027
.byte 0x47
2028
.byte 0x4e
2029
.byte 0x55
2030
.byte 0
2031
1:
2032
.p2align 3
2033
.long 0xc0000002
2034
.long 3f - 2f
2035
2:
2036
.long 3
2037
3:
2038
.p2align 3
2039
4:
2040
2041