Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/amd64/x86_64-mont.S
39482 views
1
/* Do not modify. This file is auto-generated from x86_64-mont.pl. */
2
.text
3
4
5
6
.globl bn_mul_mont
7
.type bn_mul_mont,@function
8
.align 16
9
bn_mul_mont:
10
.cfi_startproc
11
movl %r9d,%r9d
12
movq %rsp,%rax
13
.cfi_def_cfa_register %rax
14
testl $3,%r9d
15
jnz .Lmul_enter
16
cmpl $8,%r9d
17
jb .Lmul_enter
18
movl OPENSSL_ia32cap_P+8(%rip),%r11d
19
cmpq %rsi,%rdx
20
jne .Lmul4x_enter
21
testl $7,%r9d
22
jz .Lsqr8x_enter
23
jmp .Lmul4x_enter
24
25
.align 16
26
.Lmul_enter:
27
pushq %rbx
28
.cfi_offset %rbx,-16
29
pushq %rbp
30
.cfi_offset %rbp,-24
31
pushq %r12
32
.cfi_offset %r12,-32
33
pushq %r13
34
.cfi_offset %r13,-40
35
pushq %r14
36
.cfi_offset %r14,-48
37
pushq %r15
38
.cfi_offset %r15,-56
39
40
negq %r9
41
movq %rsp,%r11
42
leaq -16(%rsp,%r9,8),%r10
43
negq %r9
44
andq $-1024,%r10
45
46
47
48
49
50
51
52
53
54
subq %r10,%r11
55
andq $-4096,%r11
56
leaq (%r10,%r11,1),%rsp
57
movq (%rsp),%r11
58
cmpq %r10,%rsp
59
ja .Lmul_page_walk
60
jmp .Lmul_page_walk_done
61
62
.align 16
63
.Lmul_page_walk:
64
leaq -4096(%rsp),%rsp
65
movq (%rsp),%r11
66
cmpq %r10,%rsp
67
ja .Lmul_page_walk
68
.Lmul_page_walk_done:
69
70
movq %rax,8(%rsp,%r9,8)
71
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
72
.Lmul_body:
73
movq %rdx,%r12
74
movq (%r8),%r8
75
movq (%r12),%rbx
76
movq (%rsi),%rax
77
78
xorq %r14,%r14
79
xorq %r15,%r15
80
81
movq %r8,%rbp
82
mulq %rbx
83
movq %rax,%r10
84
movq (%rcx),%rax
85
86
imulq %r10,%rbp
87
movq %rdx,%r11
88
89
mulq %rbp
90
addq %rax,%r10
91
movq 8(%rsi),%rax
92
adcq $0,%rdx
93
movq %rdx,%r13
94
95
leaq 1(%r15),%r15
96
jmp .L1st_enter
97
98
.align 16
99
.L1st:
100
addq %rax,%r13
101
movq (%rsi,%r15,8),%rax
102
adcq $0,%rdx
103
addq %r11,%r13
104
movq %r10,%r11
105
adcq $0,%rdx
106
movq %r13,-16(%rsp,%r15,8)
107
movq %rdx,%r13
108
109
.L1st_enter:
110
mulq %rbx
111
addq %rax,%r11
112
movq (%rcx,%r15,8),%rax
113
adcq $0,%rdx
114
leaq 1(%r15),%r15
115
movq %rdx,%r10
116
117
mulq %rbp
118
cmpq %r9,%r15
119
jne .L1st
120
121
addq %rax,%r13
122
movq (%rsi),%rax
123
adcq $0,%rdx
124
addq %r11,%r13
125
adcq $0,%rdx
126
movq %r13,-16(%rsp,%r15,8)
127
movq %rdx,%r13
128
movq %r10,%r11
129
130
xorq %rdx,%rdx
131
addq %r11,%r13
132
adcq $0,%rdx
133
movq %r13,-8(%rsp,%r9,8)
134
movq %rdx,(%rsp,%r9,8)
135
136
leaq 1(%r14),%r14
137
jmp .Louter
138
.align 16
139
.Louter:
140
movq (%r12,%r14,8),%rbx
141
xorq %r15,%r15
142
movq %r8,%rbp
143
movq (%rsp),%r10
144
mulq %rbx
145
addq %rax,%r10
146
movq (%rcx),%rax
147
adcq $0,%rdx
148
149
imulq %r10,%rbp
150
movq %rdx,%r11
151
152
mulq %rbp
153
addq %rax,%r10
154
movq 8(%rsi),%rax
155
adcq $0,%rdx
156
movq 8(%rsp),%r10
157
movq %rdx,%r13
158
159
leaq 1(%r15),%r15
160
jmp .Linner_enter
161
162
.align 16
163
.Linner:
164
addq %rax,%r13
165
movq (%rsi,%r15,8),%rax
166
adcq $0,%rdx
167
addq %r10,%r13
168
movq (%rsp,%r15,8),%r10
169
adcq $0,%rdx
170
movq %r13,-16(%rsp,%r15,8)
171
movq %rdx,%r13
172
173
.Linner_enter:
174
mulq %rbx
175
addq %rax,%r11
176
movq (%rcx,%r15,8),%rax
177
adcq $0,%rdx
178
addq %r11,%r10
179
movq %rdx,%r11
180
adcq $0,%r11
181
leaq 1(%r15),%r15
182
183
mulq %rbp
184
cmpq %r9,%r15
185
jne .Linner
186
187
addq %rax,%r13
188
movq (%rsi),%rax
189
adcq $0,%rdx
190
addq %r10,%r13
191
movq (%rsp,%r15,8),%r10
192
adcq $0,%rdx
193
movq %r13,-16(%rsp,%r15,8)
194
movq %rdx,%r13
195
196
xorq %rdx,%rdx
197
addq %r11,%r13
198
adcq $0,%rdx
199
addq %r10,%r13
200
adcq $0,%rdx
201
movq %r13,-8(%rsp,%r9,8)
202
movq %rdx,(%rsp,%r9,8)
203
204
leaq 1(%r14),%r14
205
cmpq %r9,%r14
206
jb .Louter
207
208
xorq %r14,%r14
209
movq (%rsp),%rax
210
movq %r9,%r15
211
212
.align 16
213
.Lsub: sbbq (%rcx,%r14,8),%rax
214
movq %rax,(%rdi,%r14,8)
215
movq 8(%rsp,%r14,8),%rax
216
leaq 1(%r14),%r14
217
decq %r15
218
jnz .Lsub
219
220
sbbq $0,%rax
221
movq $-1,%rbx
222
xorq %rax,%rbx
223
xorq %r14,%r14
224
movq %r9,%r15
225
226
.Lcopy:
227
movq (%rdi,%r14,8),%rcx
228
movq (%rsp,%r14,8),%rdx
229
andq %rbx,%rcx
230
andq %rax,%rdx
231
movq %r9,(%rsp,%r14,8)
232
orq %rcx,%rdx
233
movq %rdx,(%rdi,%r14,8)
234
leaq 1(%r14),%r14
235
subq $1,%r15
236
jnz .Lcopy
237
238
movq 8(%rsp,%r9,8),%rsi
239
.cfi_def_cfa %rsi,8
240
movq $1,%rax
241
movq -48(%rsi),%r15
242
.cfi_restore %r15
243
movq -40(%rsi),%r14
244
.cfi_restore %r14
245
movq -32(%rsi),%r13
246
.cfi_restore %r13
247
movq -24(%rsi),%r12
248
.cfi_restore %r12
249
movq -16(%rsi),%rbp
250
.cfi_restore %rbp
251
movq -8(%rsi),%rbx
252
.cfi_restore %rbx
253
leaq (%rsi),%rsp
254
.cfi_def_cfa_register %rsp
255
.Lmul_epilogue:
256
.byte 0xf3,0xc3
257
.cfi_endproc
258
.size bn_mul_mont,.-bn_mul_mont
259
.type bn_mul4x_mont,@function
260
.align 16
261
bn_mul4x_mont:
262
.cfi_startproc
263
movl %r9d,%r9d
264
movq %rsp,%rax
265
.cfi_def_cfa_register %rax
266
.Lmul4x_enter:
267
andl $0x80100,%r11d
268
cmpl $0x80100,%r11d
269
je .Lmulx4x_enter
270
pushq %rbx
271
.cfi_offset %rbx,-16
272
pushq %rbp
273
.cfi_offset %rbp,-24
274
pushq %r12
275
.cfi_offset %r12,-32
276
pushq %r13
277
.cfi_offset %r13,-40
278
pushq %r14
279
.cfi_offset %r14,-48
280
pushq %r15
281
.cfi_offset %r15,-56
282
283
negq %r9
284
movq %rsp,%r11
285
leaq -32(%rsp,%r9,8),%r10
286
negq %r9
287
andq $-1024,%r10
288
289
subq %r10,%r11
290
andq $-4096,%r11
291
leaq (%r10,%r11,1),%rsp
292
movq (%rsp),%r11
293
cmpq %r10,%rsp
294
ja .Lmul4x_page_walk
295
jmp .Lmul4x_page_walk_done
296
297
.Lmul4x_page_walk:
298
leaq -4096(%rsp),%rsp
299
movq (%rsp),%r11
300
cmpq %r10,%rsp
301
ja .Lmul4x_page_walk
302
.Lmul4x_page_walk_done:
303
304
movq %rax,8(%rsp,%r9,8)
305
.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
306
.Lmul4x_body:
307
movq %rdi,16(%rsp,%r9,8)
308
movq %rdx,%r12
309
movq (%r8),%r8
310
movq (%r12),%rbx
311
movq (%rsi),%rax
312
313
xorq %r14,%r14
314
xorq %r15,%r15
315
316
movq %r8,%rbp
317
mulq %rbx
318
movq %rax,%r10
319
movq (%rcx),%rax
320
321
imulq %r10,%rbp
322
movq %rdx,%r11
323
324
mulq %rbp
325
addq %rax,%r10
326
movq 8(%rsi),%rax
327
adcq $0,%rdx
328
movq %rdx,%rdi
329
330
mulq %rbx
331
addq %rax,%r11
332
movq 8(%rcx),%rax
333
adcq $0,%rdx
334
movq %rdx,%r10
335
336
mulq %rbp
337
addq %rax,%rdi
338
movq 16(%rsi),%rax
339
adcq $0,%rdx
340
addq %r11,%rdi
341
leaq 4(%r15),%r15
342
adcq $0,%rdx
343
movq %rdi,(%rsp)
344
movq %rdx,%r13
345
jmp .L1st4x
346
.align 16
347
.L1st4x:
348
mulq %rbx
349
addq %rax,%r10
350
movq -16(%rcx,%r15,8),%rax
351
adcq $0,%rdx
352
movq %rdx,%r11
353
354
mulq %rbp
355
addq %rax,%r13
356
movq -8(%rsi,%r15,8),%rax
357
adcq $0,%rdx
358
addq %r10,%r13
359
adcq $0,%rdx
360
movq %r13,-24(%rsp,%r15,8)
361
movq %rdx,%rdi
362
363
mulq %rbx
364
addq %rax,%r11
365
movq -8(%rcx,%r15,8),%rax
366
adcq $0,%rdx
367
movq %rdx,%r10
368
369
mulq %rbp
370
addq %rax,%rdi
371
movq (%rsi,%r15,8),%rax
372
adcq $0,%rdx
373
addq %r11,%rdi
374
adcq $0,%rdx
375
movq %rdi,-16(%rsp,%r15,8)
376
movq %rdx,%r13
377
378
mulq %rbx
379
addq %rax,%r10
380
movq (%rcx,%r15,8),%rax
381
adcq $0,%rdx
382
movq %rdx,%r11
383
384
mulq %rbp
385
addq %rax,%r13
386
movq 8(%rsi,%r15,8),%rax
387
adcq $0,%rdx
388
addq %r10,%r13
389
adcq $0,%rdx
390
movq %r13,-8(%rsp,%r15,8)
391
movq %rdx,%rdi
392
393
mulq %rbx
394
addq %rax,%r11
395
movq 8(%rcx,%r15,8),%rax
396
adcq $0,%rdx
397
leaq 4(%r15),%r15
398
movq %rdx,%r10
399
400
mulq %rbp
401
addq %rax,%rdi
402
movq -16(%rsi,%r15,8),%rax
403
adcq $0,%rdx
404
addq %r11,%rdi
405
adcq $0,%rdx
406
movq %rdi,-32(%rsp,%r15,8)
407
movq %rdx,%r13
408
cmpq %r9,%r15
409
jb .L1st4x
410
411
mulq %rbx
412
addq %rax,%r10
413
movq -16(%rcx,%r15,8),%rax
414
adcq $0,%rdx
415
movq %rdx,%r11
416
417
mulq %rbp
418
addq %rax,%r13
419
movq -8(%rsi,%r15,8),%rax
420
adcq $0,%rdx
421
addq %r10,%r13
422
adcq $0,%rdx
423
movq %r13,-24(%rsp,%r15,8)
424
movq %rdx,%rdi
425
426
mulq %rbx
427
addq %rax,%r11
428
movq -8(%rcx,%r15,8),%rax
429
adcq $0,%rdx
430
movq %rdx,%r10
431
432
mulq %rbp
433
addq %rax,%rdi
434
movq (%rsi),%rax
435
adcq $0,%rdx
436
addq %r11,%rdi
437
adcq $0,%rdx
438
movq %rdi,-16(%rsp,%r15,8)
439
movq %rdx,%r13
440
441
xorq %rdi,%rdi
442
addq %r10,%r13
443
adcq $0,%rdi
444
movq %r13,-8(%rsp,%r15,8)
445
movq %rdi,(%rsp,%r15,8)
446
447
leaq 1(%r14),%r14
448
.align 4
449
.Louter4x:
450
movq (%r12,%r14,8),%rbx
451
xorq %r15,%r15
452
movq (%rsp),%r10
453
movq %r8,%rbp
454
mulq %rbx
455
addq %rax,%r10
456
movq (%rcx),%rax
457
adcq $0,%rdx
458
459
imulq %r10,%rbp
460
movq %rdx,%r11
461
462
mulq %rbp
463
addq %rax,%r10
464
movq 8(%rsi),%rax
465
adcq $0,%rdx
466
movq %rdx,%rdi
467
468
mulq %rbx
469
addq %rax,%r11
470
movq 8(%rcx),%rax
471
adcq $0,%rdx
472
addq 8(%rsp),%r11
473
adcq $0,%rdx
474
movq %rdx,%r10
475
476
mulq %rbp
477
addq %rax,%rdi
478
movq 16(%rsi),%rax
479
adcq $0,%rdx
480
addq %r11,%rdi
481
leaq 4(%r15),%r15
482
adcq $0,%rdx
483
movq %rdi,(%rsp)
484
movq %rdx,%r13
485
jmp .Linner4x
486
.align 16
487
.Linner4x:
488
mulq %rbx
489
addq %rax,%r10
490
movq -16(%rcx,%r15,8),%rax
491
adcq $0,%rdx
492
addq -16(%rsp,%r15,8),%r10
493
adcq $0,%rdx
494
movq %rdx,%r11
495
496
mulq %rbp
497
addq %rax,%r13
498
movq -8(%rsi,%r15,8),%rax
499
adcq $0,%rdx
500
addq %r10,%r13
501
adcq $0,%rdx
502
movq %r13,-24(%rsp,%r15,8)
503
movq %rdx,%rdi
504
505
mulq %rbx
506
addq %rax,%r11
507
movq -8(%rcx,%r15,8),%rax
508
adcq $0,%rdx
509
addq -8(%rsp,%r15,8),%r11
510
adcq $0,%rdx
511
movq %rdx,%r10
512
513
mulq %rbp
514
addq %rax,%rdi
515
movq (%rsi,%r15,8),%rax
516
adcq $0,%rdx
517
addq %r11,%rdi
518
adcq $0,%rdx
519
movq %rdi,-16(%rsp,%r15,8)
520
movq %rdx,%r13
521
522
mulq %rbx
523
addq %rax,%r10
524
movq (%rcx,%r15,8),%rax
525
adcq $0,%rdx
526
addq (%rsp,%r15,8),%r10
527
adcq $0,%rdx
528
movq %rdx,%r11
529
530
mulq %rbp
531
addq %rax,%r13
532
movq 8(%rsi,%r15,8),%rax
533
adcq $0,%rdx
534
addq %r10,%r13
535
adcq $0,%rdx
536
movq %r13,-8(%rsp,%r15,8)
537
movq %rdx,%rdi
538
539
mulq %rbx
540
addq %rax,%r11
541
movq 8(%rcx,%r15,8),%rax
542
adcq $0,%rdx
543
addq 8(%rsp,%r15,8),%r11
544
adcq $0,%rdx
545
leaq 4(%r15),%r15
546
movq %rdx,%r10
547
548
mulq %rbp
549
addq %rax,%rdi
550
movq -16(%rsi,%r15,8),%rax
551
adcq $0,%rdx
552
addq %r11,%rdi
553
adcq $0,%rdx
554
movq %rdi,-32(%rsp,%r15,8)
555
movq %rdx,%r13
556
cmpq %r9,%r15
557
jb .Linner4x
558
559
mulq %rbx
560
addq %rax,%r10
561
movq -16(%rcx,%r15,8),%rax
562
adcq $0,%rdx
563
addq -16(%rsp,%r15,8),%r10
564
adcq $0,%rdx
565
movq %rdx,%r11
566
567
mulq %rbp
568
addq %rax,%r13
569
movq -8(%rsi,%r15,8),%rax
570
adcq $0,%rdx
571
addq %r10,%r13
572
adcq $0,%rdx
573
movq %r13,-24(%rsp,%r15,8)
574
movq %rdx,%rdi
575
576
mulq %rbx
577
addq %rax,%r11
578
movq -8(%rcx,%r15,8),%rax
579
adcq $0,%rdx
580
addq -8(%rsp,%r15,8),%r11
581
adcq $0,%rdx
582
leaq 1(%r14),%r14
583
movq %rdx,%r10
584
585
mulq %rbp
586
addq %rax,%rdi
587
movq (%rsi),%rax
588
adcq $0,%rdx
589
addq %r11,%rdi
590
adcq $0,%rdx
591
movq %rdi,-16(%rsp,%r15,8)
592
movq %rdx,%r13
593
594
xorq %rdi,%rdi
595
addq %r10,%r13
596
adcq $0,%rdi
597
addq (%rsp,%r9,8),%r13
598
adcq $0,%rdi
599
movq %r13,-8(%rsp,%r15,8)
600
movq %rdi,(%rsp,%r15,8)
601
602
cmpq %r9,%r14
603
jb .Louter4x
604
movq 16(%rsp,%r9,8),%rdi
605
leaq -4(%r9),%r15
606
movq 0(%rsp),%rax
607
movq 8(%rsp),%rdx
608
shrq $2,%r15
609
leaq (%rsp),%rsi
610
xorq %r14,%r14
611
612
subq 0(%rcx),%rax
613
movq 16(%rsi),%rbx
614
movq 24(%rsi),%rbp
615
sbbq 8(%rcx),%rdx
616
617
.Lsub4x:
618
movq %rax,0(%rdi,%r14,8)
619
movq %rdx,8(%rdi,%r14,8)
620
sbbq 16(%rcx,%r14,8),%rbx
621
movq 32(%rsi,%r14,8),%rax
622
movq 40(%rsi,%r14,8),%rdx
623
sbbq 24(%rcx,%r14,8),%rbp
624
movq %rbx,16(%rdi,%r14,8)
625
movq %rbp,24(%rdi,%r14,8)
626
sbbq 32(%rcx,%r14,8),%rax
627
movq 48(%rsi,%r14,8),%rbx
628
movq 56(%rsi,%r14,8),%rbp
629
sbbq 40(%rcx,%r14,8),%rdx
630
leaq 4(%r14),%r14
631
decq %r15
632
jnz .Lsub4x
633
634
movq %rax,0(%rdi,%r14,8)
635
movq 32(%rsi,%r14,8),%rax
636
sbbq 16(%rcx,%r14,8),%rbx
637
movq %rdx,8(%rdi,%r14,8)
638
sbbq 24(%rcx,%r14,8),%rbp
639
movq %rbx,16(%rdi,%r14,8)
640
641
sbbq $0,%rax
642
movq %rbp,24(%rdi,%r14,8)
643
pxor %xmm0,%xmm0
644
.byte 102,72,15,110,224
645
pcmpeqd %xmm5,%xmm5
646
pshufd $0,%xmm4,%xmm4
647
movq %r9,%r15
648
pxor %xmm4,%xmm5
649
shrq $2,%r15
650
xorl %eax,%eax
651
652
jmp .Lcopy4x
653
.align 16
654
.Lcopy4x:
655
movdqa (%rsp,%rax,1),%xmm1
656
movdqu (%rdi,%rax,1),%xmm2
657
pand %xmm4,%xmm1
658
pand %xmm5,%xmm2
659
movdqa 16(%rsp,%rax,1),%xmm3
660
movdqa %xmm0,(%rsp,%rax,1)
661
por %xmm2,%xmm1
662
movdqu 16(%rdi,%rax,1),%xmm2
663
movdqu %xmm1,(%rdi,%rax,1)
664
pand %xmm4,%xmm3
665
pand %xmm5,%xmm2
666
movdqa %xmm0,16(%rsp,%rax,1)
667
por %xmm2,%xmm3
668
movdqu %xmm3,16(%rdi,%rax,1)
669
leaq 32(%rax),%rax
670
decq %r15
671
jnz .Lcopy4x
672
movq 8(%rsp,%r9,8),%rsi
673
.cfi_def_cfa %rsi, 8
674
movq $1,%rax
675
movq -48(%rsi),%r15
676
.cfi_restore %r15
677
movq -40(%rsi),%r14
678
.cfi_restore %r14
679
movq -32(%rsi),%r13
680
.cfi_restore %r13
681
movq -24(%rsi),%r12
682
.cfi_restore %r12
683
movq -16(%rsi),%rbp
684
.cfi_restore %rbp
685
movq -8(%rsi),%rbx
686
.cfi_restore %rbx
687
leaq (%rsi),%rsp
688
.cfi_def_cfa_register %rsp
689
.Lmul4x_epilogue:
690
.byte 0xf3,0xc3
691
.cfi_endproc
692
.size bn_mul4x_mont,.-bn_mul4x_mont
693
694
695
696
.type bn_sqr8x_mont,@function
697
.align 32
698
bn_sqr8x_mont:
699
.cfi_startproc
700
movq %rsp,%rax
701
.cfi_def_cfa_register %rax
702
.Lsqr8x_enter:
703
pushq %rbx
704
.cfi_offset %rbx,-16
705
pushq %rbp
706
.cfi_offset %rbp,-24
707
pushq %r12
708
.cfi_offset %r12,-32
709
pushq %r13
710
.cfi_offset %r13,-40
711
pushq %r14
712
.cfi_offset %r14,-48
713
pushq %r15
714
.cfi_offset %r15,-56
715
.Lsqr8x_prologue:
716
717
movl %r9d,%r10d
718
shll $3,%r9d
719
shlq $3+2,%r10
720
negq %r9
721
722
723
724
725
726
727
leaq -64(%rsp,%r9,2),%r11
728
movq %rsp,%rbp
729
movq (%r8),%r8
730
subq %rsi,%r11
731
andq $4095,%r11
732
cmpq %r11,%r10
733
jb .Lsqr8x_sp_alt
734
subq %r11,%rbp
735
leaq -64(%rbp,%r9,2),%rbp
736
jmp .Lsqr8x_sp_done
737
738
.align 32
739
.Lsqr8x_sp_alt:
740
leaq 4096-64(,%r9,2),%r10
741
leaq -64(%rbp,%r9,2),%rbp
742
subq %r10,%r11
743
movq $0,%r10
744
cmovcq %r10,%r11
745
subq %r11,%rbp
746
.Lsqr8x_sp_done:
747
andq $-64,%rbp
748
movq %rsp,%r11
749
subq %rbp,%r11
750
andq $-4096,%r11
751
leaq (%r11,%rbp,1),%rsp
752
movq (%rsp),%r10
753
cmpq %rbp,%rsp
754
ja .Lsqr8x_page_walk
755
jmp .Lsqr8x_page_walk_done
756
757
.align 16
758
.Lsqr8x_page_walk:
759
leaq -4096(%rsp),%rsp
760
movq (%rsp),%r10
761
cmpq %rbp,%rsp
762
ja .Lsqr8x_page_walk
763
.Lsqr8x_page_walk_done:
764
765
movq %r9,%r10
766
negq %r9
767
768
movq %r8,32(%rsp)
769
movq %rax,40(%rsp)
770
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
771
.Lsqr8x_body:
772
773
.byte 102,72,15,110,209
774
pxor %xmm0,%xmm0
775
.byte 102,72,15,110,207
776
.byte 102,73,15,110,218
777
movl OPENSSL_ia32cap_P+8(%rip),%eax
778
andl $0x80100,%eax
779
cmpl $0x80100,%eax
780
jne .Lsqr8x_nox
781
782
call bn_sqrx8x_internal
783
784
785
786
787
leaq (%r8,%rcx,1),%rbx
788
movq %rcx,%r9
789
movq %rcx,%rdx
790
.byte 102,72,15,126,207
791
sarq $3+2,%rcx
792
jmp .Lsqr8x_sub
793
794
.align 32
795
.Lsqr8x_nox:
796
call bn_sqr8x_internal
797
798
799
800
801
leaq (%rdi,%r9,1),%rbx
802
movq %r9,%rcx
803
movq %r9,%rdx
804
.byte 102,72,15,126,207
805
sarq $3+2,%rcx
806
jmp .Lsqr8x_sub
807
808
.align 32
809
.Lsqr8x_sub:
810
movq 0(%rbx),%r12
811
movq 8(%rbx),%r13
812
movq 16(%rbx),%r14
813
movq 24(%rbx),%r15
814
leaq 32(%rbx),%rbx
815
sbbq 0(%rbp),%r12
816
sbbq 8(%rbp),%r13
817
sbbq 16(%rbp),%r14
818
sbbq 24(%rbp),%r15
819
leaq 32(%rbp),%rbp
820
movq %r12,0(%rdi)
821
movq %r13,8(%rdi)
822
movq %r14,16(%rdi)
823
movq %r15,24(%rdi)
824
leaq 32(%rdi),%rdi
825
incq %rcx
826
jnz .Lsqr8x_sub
827
828
sbbq $0,%rax
829
leaq (%rbx,%r9,1),%rbx
830
leaq (%rdi,%r9,1),%rdi
831
832
.byte 102,72,15,110,200
833
pxor %xmm0,%xmm0
834
pshufd $0,%xmm1,%xmm1
835
movq 40(%rsp),%rsi
836
.cfi_def_cfa %rsi,8
837
jmp .Lsqr8x_cond_copy
838
839
.align 32
840
.Lsqr8x_cond_copy:
841
movdqa 0(%rbx),%xmm2
842
movdqa 16(%rbx),%xmm3
843
leaq 32(%rbx),%rbx
844
movdqu 0(%rdi),%xmm4
845
movdqu 16(%rdi),%xmm5
846
leaq 32(%rdi),%rdi
847
movdqa %xmm0,-32(%rbx)
848
movdqa %xmm0,-16(%rbx)
849
movdqa %xmm0,-32(%rbx,%rdx,1)
850
movdqa %xmm0,-16(%rbx,%rdx,1)
851
pcmpeqd %xmm1,%xmm0
852
pand %xmm1,%xmm2
853
pand %xmm1,%xmm3
854
pand %xmm0,%xmm4
855
pand %xmm0,%xmm5
856
pxor %xmm0,%xmm0
857
por %xmm2,%xmm4
858
por %xmm3,%xmm5
859
movdqu %xmm4,-32(%rdi)
860
movdqu %xmm5,-16(%rdi)
861
addq $32,%r9
862
jnz .Lsqr8x_cond_copy
863
864
movq $1,%rax
865
movq -48(%rsi),%r15
866
.cfi_restore %r15
867
movq -40(%rsi),%r14
868
.cfi_restore %r14
869
movq -32(%rsi),%r13
870
.cfi_restore %r13
871
movq -24(%rsi),%r12
872
.cfi_restore %r12
873
movq -16(%rsi),%rbp
874
.cfi_restore %rbp
875
movq -8(%rsi),%rbx
876
.cfi_restore %rbx
877
leaq (%rsi),%rsp
878
.cfi_def_cfa_register %rsp
879
.Lsqr8x_epilogue:
880
.byte 0xf3,0xc3
881
.cfi_endproc
882
.size bn_sqr8x_mont,.-bn_sqr8x_mont
883
.type bn_mulx4x_mont,@function
884
.align 32
885
bn_mulx4x_mont:
886
.cfi_startproc
887
movq %rsp,%rax
888
.cfi_def_cfa_register %rax
889
.Lmulx4x_enter:
890
pushq %rbx
891
.cfi_offset %rbx,-16
892
pushq %rbp
893
.cfi_offset %rbp,-24
894
pushq %r12
895
.cfi_offset %r12,-32
896
pushq %r13
897
.cfi_offset %r13,-40
898
pushq %r14
899
.cfi_offset %r14,-48
900
pushq %r15
901
.cfi_offset %r15,-56
902
.Lmulx4x_prologue:
903
904
shll $3,%r9d
905
xorq %r10,%r10
906
subq %r9,%r10
907
movq (%r8),%r8
908
leaq -72(%rsp,%r10,1),%rbp
909
andq $-128,%rbp
910
movq %rsp,%r11
911
subq %rbp,%r11
912
andq $-4096,%r11
913
leaq (%r11,%rbp,1),%rsp
914
movq (%rsp),%r10
915
cmpq %rbp,%rsp
916
ja .Lmulx4x_page_walk
917
jmp .Lmulx4x_page_walk_done
918
919
.align 16
920
.Lmulx4x_page_walk:
921
leaq -4096(%rsp),%rsp
922
movq (%rsp),%r10
923
cmpq %rbp,%rsp
924
ja .Lmulx4x_page_walk
925
.Lmulx4x_page_walk_done:
926
927
leaq (%rdx,%r9,1),%r10
928
929
930
931
932
933
934
935
936
937
938
939
940
movq %r9,0(%rsp)
941
shrq $5,%r9
942
movq %r10,16(%rsp)
943
subq $1,%r9
944
movq %r8,24(%rsp)
945
movq %rdi,32(%rsp)
946
movq %rax,40(%rsp)
947
.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
948
movq %r9,48(%rsp)
949
jmp .Lmulx4x_body
950
951
.align 32
952
.Lmulx4x_body:
953
leaq 8(%rdx),%rdi
954
movq (%rdx),%rdx
955
leaq 64+32(%rsp),%rbx
956
movq %rdx,%r9
957
958
mulxq 0(%rsi),%r8,%rax
959
mulxq 8(%rsi),%r11,%r14
960
addq %rax,%r11
961
movq %rdi,8(%rsp)
962
mulxq 16(%rsi),%r12,%r13
963
adcq %r14,%r12
964
adcq $0,%r13
965
966
movq %r8,%rdi
967
imulq 24(%rsp),%r8
968
xorq %rbp,%rbp
969
970
mulxq 24(%rsi),%rax,%r14
971
movq %r8,%rdx
972
leaq 32(%rsi),%rsi
973
adcxq %rax,%r13
974
adcxq %rbp,%r14
975
976
mulxq 0(%rcx),%rax,%r10
977
adcxq %rax,%rdi
978
adoxq %r11,%r10
979
mulxq 8(%rcx),%rax,%r11
980
adcxq %rax,%r10
981
adoxq %r12,%r11
982
.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
983
movq 48(%rsp),%rdi
984
movq %r10,-32(%rbx)
985
adcxq %rax,%r11
986
adoxq %r13,%r12
987
mulxq 24(%rcx),%rax,%r15
988
movq %r9,%rdx
989
movq %r11,-24(%rbx)
990
adcxq %rax,%r12
991
adoxq %rbp,%r15
992
leaq 32(%rcx),%rcx
993
movq %r12,-16(%rbx)
994
995
jmp .Lmulx4x_1st
996
997
.align 32
998
.Lmulx4x_1st:
999
adcxq %rbp,%r15
1000
mulxq 0(%rsi),%r10,%rax
1001
adcxq %r14,%r10
1002
mulxq 8(%rsi),%r11,%r14
1003
adcxq %rax,%r11
1004
mulxq 16(%rsi),%r12,%rax
1005
adcxq %r14,%r12
1006
mulxq 24(%rsi),%r13,%r14
1007
.byte 0x67,0x67
1008
movq %r8,%rdx
1009
adcxq %rax,%r13
1010
adcxq %rbp,%r14
1011
leaq 32(%rsi),%rsi
1012
leaq 32(%rbx),%rbx
1013
1014
adoxq %r15,%r10
1015
mulxq 0(%rcx),%rax,%r15
1016
adcxq %rax,%r10
1017
adoxq %r15,%r11
1018
mulxq 8(%rcx),%rax,%r15
1019
adcxq %rax,%r11
1020
adoxq %r15,%r12
1021
mulxq 16(%rcx),%rax,%r15
1022
movq %r10,-40(%rbx)
1023
adcxq %rax,%r12
1024
movq %r11,-32(%rbx)
1025
adoxq %r15,%r13
1026
mulxq 24(%rcx),%rax,%r15
1027
movq %r9,%rdx
1028
movq %r12,-24(%rbx)
1029
adcxq %rax,%r13
1030
adoxq %rbp,%r15
1031
leaq 32(%rcx),%rcx
1032
movq %r13,-16(%rbx)
1033
1034
decq %rdi
1035
jnz .Lmulx4x_1st
1036
1037
movq 0(%rsp),%rax
1038
movq 8(%rsp),%rdi
1039
adcq %rbp,%r15
1040
addq %r15,%r14
1041
sbbq %r15,%r15
1042
movq %r14,-8(%rbx)
1043
jmp .Lmulx4x_outer
1044
1045
.align 32
1046
.Lmulx4x_outer:
1047
movq (%rdi),%rdx
1048
leaq 8(%rdi),%rdi
1049
subq %rax,%rsi
1050
movq %r15,(%rbx)
1051
leaq 64+32(%rsp),%rbx
1052
subq %rax,%rcx
1053
1054
mulxq 0(%rsi),%r8,%r11
1055
xorl %ebp,%ebp
1056
movq %rdx,%r9
1057
mulxq 8(%rsi),%r14,%r12
1058
adoxq -32(%rbx),%r8
1059
adcxq %r14,%r11
1060
mulxq 16(%rsi),%r15,%r13
1061
adoxq -24(%rbx),%r11
1062
adcxq %r15,%r12
1063
adoxq -16(%rbx),%r12
1064
adcxq %rbp,%r13
1065
adoxq %rbp,%r13
1066
1067
movq %rdi,8(%rsp)
1068
movq %r8,%r15
1069
imulq 24(%rsp),%r8
1070
xorl %ebp,%ebp
1071
1072
mulxq 24(%rsi),%rax,%r14
1073
movq %r8,%rdx
1074
adcxq %rax,%r13
1075
adoxq -8(%rbx),%r13
1076
adcxq %rbp,%r14
1077
leaq 32(%rsi),%rsi
1078
adoxq %rbp,%r14
1079
1080
mulxq 0(%rcx),%rax,%r10
1081
adcxq %rax,%r15
1082
adoxq %r11,%r10
1083
mulxq 8(%rcx),%rax,%r11
1084
adcxq %rax,%r10
1085
adoxq %r12,%r11
1086
mulxq 16(%rcx),%rax,%r12
1087
movq %r10,-32(%rbx)
1088
adcxq %rax,%r11
1089
adoxq %r13,%r12
1090
mulxq 24(%rcx),%rax,%r15
1091
movq %r9,%rdx
1092
movq %r11,-24(%rbx)
1093
leaq 32(%rcx),%rcx
1094
adcxq %rax,%r12
1095
adoxq %rbp,%r15
1096
movq 48(%rsp),%rdi
1097
movq %r12,-16(%rbx)
1098
1099
jmp .Lmulx4x_inner
1100
1101
.align 32
1102
.Lmulx4x_inner:
1103
mulxq 0(%rsi),%r10,%rax
1104
adcxq %rbp,%r15
1105
adoxq %r14,%r10
1106
mulxq 8(%rsi),%r11,%r14
1107
adcxq 0(%rbx),%r10
1108
adoxq %rax,%r11
1109
mulxq 16(%rsi),%r12,%rax
1110
adcxq 8(%rbx),%r11
1111
adoxq %r14,%r12
1112
mulxq 24(%rsi),%r13,%r14
1113
movq %r8,%rdx
1114
adcxq 16(%rbx),%r12
1115
adoxq %rax,%r13
1116
adcxq 24(%rbx),%r13
1117
adoxq %rbp,%r14
1118
leaq 32(%rsi),%rsi
1119
leaq 32(%rbx),%rbx
1120
adcxq %rbp,%r14
1121
1122
adoxq %r15,%r10
1123
mulxq 0(%rcx),%rax,%r15
1124
adcxq %rax,%r10
1125
adoxq %r15,%r11
1126
mulxq 8(%rcx),%rax,%r15
1127
adcxq %rax,%r11
1128
adoxq %r15,%r12
1129
mulxq 16(%rcx),%rax,%r15
1130
movq %r10,-40(%rbx)
1131
adcxq %rax,%r12
1132
adoxq %r15,%r13
1133
mulxq 24(%rcx),%rax,%r15
1134
movq %r9,%rdx
1135
movq %r11,-32(%rbx)
1136
movq %r12,-24(%rbx)
1137
adcxq %rax,%r13
1138
adoxq %rbp,%r15
1139
leaq 32(%rcx),%rcx
1140
movq %r13,-16(%rbx)
1141
1142
decq %rdi
1143
jnz .Lmulx4x_inner
1144
1145
movq 0(%rsp),%rax
1146
movq 8(%rsp),%rdi
1147
adcq %rbp,%r15
1148
subq 0(%rbx),%rbp
1149
adcq %r15,%r14
1150
sbbq %r15,%r15
1151
movq %r14,-8(%rbx)
1152
1153
cmpq 16(%rsp),%rdi
1154
jne .Lmulx4x_outer
1155
1156
leaq 64(%rsp),%rbx
1157
subq %rax,%rcx
1158
negq %r15
1159
movq %rax,%rdx
1160
shrq $3+2,%rax
1161
movq 32(%rsp),%rdi
1162
jmp .Lmulx4x_sub
1163
1164
.align 32
1165
.Lmulx4x_sub:
1166
movq 0(%rbx),%r11
1167
movq 8(%rbx),%r12
1168
movq 16(%rbx),%r13
1169
movq 24(%rbx),%r14
1170
leaq 32(%rbx),%rbx
1171
sbbq 0(%rcx),%r11
1172
sbbq 8(%rcx),%r12
1173
sbbq 16(%rcx),%r13
1174
sbbq 24(%rcx),%r14
1175
leaq 32(%rcx),%rcx
1176
movq %r11,0(%rdi)
1177
movq %r12,8(%rdi)
1178
movq %r13,16(%rdi)
1179
movq %r14,24(%rdi)
1180
leaq 32(%rdi),%rdi
1181
decq %rax
1182
jnz .Lmulx4x_sub
1183
1184
sbbq $0,%r15
1185
leaq 64(%rsp),%rbx
1186
subq %rdx,%rdi
1187
1188
.byte 102,73,15,110,207
1189
pxor %xmm0,%xmm0
1190
pshufd $0,%xmm1,%xmm1
1191
movq 40(%rsp),%rsi
1192
.cfi_def_cfa %rsi,8
1193
jmp .Lmulx4x_cond_copy
1194
1195
.align 32
1196
.Lmulx4x_cond_copy:
1197
movdqa 0(%rbx),%xmm2
1198
movdqa 16(%rbx),%xmm3
1199
leaq 32(%rbx),%rbx
1200
movdqu 0(%rdi),%xmm4
1201
movdqu 16(%rdi),%xmm5
1202
leaq 32(%rdi),%rdi
1203
movdqa %xmm0,-32(%rbx)
1204
movdqa %xmm0,-16(%rbx)
1205
pcmpeqd %xmm1,%xmm0
1206
pand %xmm1,%xmm2
1207
pand %xmm1,%xmm3
1208
pand %xmm0,%xmm4
1209
pand %xmm0,%xmm5
1210
pxor %xmm0,%xmm0
1211
por %xmm2,%xmm4
1212
por %xmm3,%xmm5
1213
movdqu %xmm4,-32(%rdi)
1214
movdqu %xmm5,-16(%rdi)
1215
subq $32,%rdx
1216
jnz .Lmulx4x_cond_copy
1217
1218
movq %rdx,(%rbx)
1219
1220
movq $1,%rax
1221
movq -48(%rsi),%r15
1222
.cfi_restore %r15
1223
movq -40(%rsi),%r14
1224
.cfi_restore %r14
1225
movq -32(%rsi),%r13
1226
.cfi_restore %r13
1227
movq -24(%rsi),%r12
1228
.cfi_restore %r12
1229
movq -16(%rsi),%rbp
1230
.cfi_restore %rbp
1231
movq -8(%rsi),%rbx
1232
.cfi_restore %rbx
1233
leaq (%rsi),%rsp
1234
.cfi_def_cfa_register %rsp
1235
.Lmulx4x_epilogue:
1236
.byte 0xf3,0xc3
1237
.cfi_endproc
1238
.size bn_mulx4x_mont,.-bn_mulx4x_mont
1239
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1240
.align 16
1241
.section ".note.gnu.property", "a"
1242
.p2align 3
1243
.long 1f - 0f
1244
.long 4f - 1f
1245
.long 5
1246
0:
1247
# "GNU" encoded with .byte, since .asciz isn't supported
1248
# on Solaris.
1249
.byte 0x47
1250
.byte 0x4e
1251
.byte 0x55
1252
.byte 0
1253
1:
1254
.p2align 3
1255
.long 0xc0000002
1256
.long 3f - 2f
1257
2:
1258
.long 3
1259
3:
1260
.p2align 3
1261
4:
1262
1263