Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/crypto/openssl/powerpc/ppc-mont.S
39482 views
1
/* Do not modify. This file is auto-generated from ppc-mont.pl. */
2
.machine "any"
3
.text
4
5
.globl bn_mul_mont_int
6
.type bn_mul_mont_int,@function
7
.align 5
8
bn_mul_mont_int:
9
mr 9,3
10
li 3,0
11
cmpwi 8,32
12
bgelr
13
slwi 8,8,2
14
li 12,-4096
15
addi 3,8,256
16
subf 3,3,1
17
and 3,3,12
18
subf 3,1,3
19
mr 12,1
20
srwi 8,8,2
21
stwux 1,1,3
22
23
stw 20,-48(12)
24
stw 21,-44(12)
25
stw 22,-40(12)
26
stw 23,-36(12)
27
stw 24,-32(12)
28
stw 25,-28(12)
29
stw 26,-24(12)
30
stw 27,-20(12)
31
stw 28,-16(12)
32
stw 29,-12(12)
33
stw 30,-8(12)
34
stw 31,-4(12)
35
36
lwz 7,0(7)
37
addi 8,8,-2
38
39
lwz 23,0(5)
40
lwz 10,0(4)
41
addi 22,1,32
42
mullw 25,10,23
43
mulhwu 26,10,23
44
45
lwz 10,4(4)
46
lwz 11,0(6)
47
48
mullw 24,25,7
49
50
mullw 29,10,23
51
mulhwu 30,10,23
52
53
mullw 27,11,24
54
mulhwu 28,11,24
55
lwz 11,4(6)
56
addc 27,27,25
57
addze 28,28
58
59
mullw 31,11,24
60
mulhwu 0,11,24
61
62
mtctr 8
63
li 21,8
64
.align 4
65
.L1st:
66
lwzx 10,4,21
67
addc 25,29,26
68
lwzx 11,6,21
69
addze 26,30
70
mullw 29,10,23
71
addc 27,31,28
72
mulhwu 30,10,23
73
addze 28,0
74
mullw 31,11,24
75
addc 27,27,25
76
mulhwu 0,11,24
77
addze 28,28
78
stw 27,0(22)
79
80
addi 21,21,4
81
addi 22,22,4
82
bdnz .L1st
83
84
addc 25,29,26
85
addze 26,30
86
87
addc 27,31,28
88
addze 28,0
89
addc 27,27,25
90
addze 28,28
91
stw 27,0(22)
92
93
li 3,0
94
addc 28,28,26
95
addze 3,3
96
stw 28,4(22)
97
98
li 20,4
99
.align 4
100
.Louter:
101
lwzx 23,5,20
102
lwz 10,0(4)
103
addi 22,1,32
104
lwz 12,32(1)
105
mullw 25,10,23
106
mulhwu 26,10,23
107
lwz 10,4(4)
108
lwz 11,0(6)
109
addc 25,25,12
110
mullw 29,10,23
111
addze 26,26
112
mullw 24,25,7
113
mulhwu 30,10,23
114
mullw 27,11,24
115
mulhwu 28,11,24
116
lwz 11,4(6)
117
addc 27,27,25
118
mullw 31,11,24
119
addze 28,28
120
mulhwu 0,11,24
121
122
mtctr 8
123
li 21,8
124
.align 4
125
.Linner:
126
lwzx 10,4,21
127
addc 25,29,26
128
lwz 12,4(22)
129
addze 26,30
130
lwzx 11,6,21
131
addc 27,31,28
132
mullw 29,10,23
133
addze 28,0
134
mulhwu 30,10,23
135
addc 25,25,12
136
mullw 31,11,24
137
addze 26,26
138
mulhwu 0,11,24
139
addc 27,27,25
140
addi 21,21,4
141
addze 28,28
142
stw 27,0(22)
143
addi 22,22,4
144
bdnz .Linner
145
146
lwz 12,4(22)
147
addc 25,29,26
148
addze 26,30
149
addc 25,25,12
150
addze 26,26
151
152
addc 27,31,28
153
addze 28,0
154
addc 27,27,25
155
addze 28,28
156
stw 27,0(22)
157
158
addic 3,3,-1
159
li 3,0
160
adde 28,28,26
161
addze 3,3
162
stw 28,4(22)
163
164
slwi 12,8,2
165
.long 0x7c146040
166
addi 20,20,4
167
ble .Louter
168
169
addi 8,8,2
170
subfc 21,21,21
171
addi 22,1,32
172
mtctr 8
173
174
.align 4
175
.Lsub: lwzx 12,22,21
176
lwzx 11,6,21
177
subfe 10,11,12
178
stwx 10,9,21
179
addi 21,21,4
180
bdnz .Lsub
181
182
li 21,0
183
mtctr 8
184
subfe 3,21,3
185
186
.align 4
187
.Lcopy:
188
lwzx 12,22,21
189
lwzx 10,9,21
190
and 12,12,3
191
andc 10,10,3
192
stwx 21,22,21
193
or 10,10,12
194
stwx 10,9,21
195
addi 21,21,4
196
bdnz .Lcopy
197
198
lwz 12,0(1)
199
li 3,1
200
lwz 20,-48(12)
201
lwz 21,-44(12)
202
lwz 22,-40(12)
203
lwz 23,-36(12)
204
lwz 24,-32(12)
205
lwz 25,-28(12)
206
lwz 26,-24(12)
207
lwz 27,-20(12)
208
lwz 28,-16(12)
209
lwz 29,-12(12)
210
lwz 30,-8(12)
211
lwz 31,-4(12)
212
mr 1,12
213
blr
214
.long 0
215
.byte 0,12,4,0,0x80,12,6,0
216
.long 0
217
.size bn_mul_mont_int,.-bn_mul_mont_int
218
.globl bn_mul4x_mont_int
219
.type bn_mul4x_mont_int,@function
220
.align 5
221
bn_mul4x_mont_int:
222
andi. 0,8,7
223
bne .Lmul4x_do
224
.long 0x7c042840
225
bne .Lmul4x_do
226
b .Lsqr8x_do
227
.Lmul4x_do:
228
slwi 8,8,2
229
mr 9,1
230
li 10,-32*4
231
sub 10,10,8
232
stwux 1,1,10
233
234
stw 14,-4*18(9)
235
stw 15,-4*17(9)
236
stw 16,-4*16(9)
237
stw 17,-4*15(9)
238
stw 18,-4*14(9)
239
stw 19,-4*13(9)
240
stw 20,-4*12(9)
241
stw 21,-4*11(9)
242
stw 22,-4*10(9)
243
stw 23,-4*9(9)
244
stw 24,-4*8(9)
245
stw 25,-4*7(9)
246
stw 26,-4*6(9)
247
stw 27,-4*5(9)
248
stw 28,-4*4(9)
249
stw 29,-4*3(9)
250
stw 30,-4*2(9)
251
stw 31,-4*1(9)
252
253
subi 4,4,4
254
subi 6,6,4
255
subi 3,3,4
256
lwz 7,0(7)
257
258
add 14,5,8
259
add 30,4,8
260
subi 14,14,4*4
261
262
lwz 27,4*0(5)
263
li 22,0
264
lwz 9,4*1(4)
265
li 23,0
266
lwz 10,4*2(4)
267
li 24,0
268
lwz 11,4*3(4)
269
li 25,0
270
lwzu 12,4*4(4)
271
lwz 18,4*1(6)
272
lwz 19,4*2(6)
273
lwz 20,4*3(6)
274
lwzu 21,4*4(6)
275
276
stw 3,4*6(1)
277
stw 14,4*7(1)
278
li 3,0
279
addic 29,1,4*7
280
li 31,0
281
li 0,0
282
b .Loop_mul4x_1st_reduction
283
284
.align 5
285
.Loop_mul4x_1st_reduction:
286
mullw 14,9,27
287
addze 3,3
288
mullw 15,10,27
289
addi 31,31,4
290
mullw 16,11,27
291
andi. 31,31,4*4-1
292
mullw 17,12,27
293
addc 22,22,14
294
mulhwu 14,9,27
295
adde 23,23,15
296
mulhwu 15,10,27
297
adde 24,24,16
298
mullw 28,22,7
299
adde 25,25,17
300
mulhwu 16,11,27
301
addze 26,0
302
mulhwu 17,12,27
303
lwzx 27,5,31
304
addc 23,23,14
305
306
stwu 28,4(29)
307
adde 24,24,15
308
mullw 15,19,28
309
adde 25,25,16
310
mullw 16,20,28
311
adde 26,26,17
312
mullw 17,21,28
313
314
315
316
317
318
319
320
321
322
323
addic 22,22,-1
324
mulhwu 14,18,28
325
adde 22,23,15
326
mulhwu 15,19,28
327
adde 23,24,16
328
mulhwu 16,20,28
329
adde 24,25,17
330
mulhwu 17,21,28
331
adde 25,26,3
332
addze 3,0
333
addc 22,22,14
334
adde 23,23,15
335
adde 24,24,16
336
adde 25,25,17
337
338
bne .Loop_mul4x_1st_reduction
339
340
.long 0x7c1e2040
341
beq .Lmul4x4_post_condition
342
343
lwz 9,4*1(4)
344
lwz 10,4*2(4)
345
lwz 11,4*3(4)
346
lwzu 12,4*4(4)
347
lwz 28,4*8(1)
348
lwz 18,4*1(6)
349
lwz 19,4*2(6)
350
lwz 20,4*3(6)
351
lwzu 21,4*4(6)
352
b .Loop_mul4x_1st_tail
353
354
.align 5
355
.Loop_mul4x_1st_tail:
356
mullw 14,9,27
357
addze 3,3
358
mullw 15,10,27
359
addi 31,31,4
360
mullw 16,11,27
361
andi. 31,31,4*4-1
362
mullw 17,12,27
363
addc 22,22,14
364
mulhwu 14,9,27
365
adde 23,23,15
366
mulhwu 15,10,27
367
adde 24,24,16
368
mulhwu 16,11,27
369
adde 25,25,17
370
mulhwu 17,12,27
371
addze 26,0
372
lwzx 27,5,31
373
addc 23,23,14
374
mullw 14,18,28
375
adde 24,24,15
376
mullw 15,19,28
377
adde 25,25,16
378
mullw 16,20,28
379
adde 26,26,17
380
mullw 17,21,28
381
addc 22,22,14
382
mulhwu 14,18,28
383
adde 23,23,15
384
mulhwu 15,19,28
385
adde 24,24,16
386
mulhwu 16,20,28
387
adde 25,25,17
388
adde 26,26,3
389
mulhwu 17,21,28
390
addze 3,0
391
addi 28,1,4*8
392
lwzx 28,28,31
393
stwu 22,4(29)
394
addc 22,23,14
395
adde 23,24,15
396
adde 24,25,16
397
adde 25,26,17
398
399
bne .Loop_mul4x_1st_tail
400
401
sub 15,30,8
402
.long 0x7c1e2040
403
beq .Lmul4x_proceed
404
405
lwz 9,4*1(4)
406
lwz 10,4*2(4)
407
lwz 11,4*3(4)
408
lwzu 12,4*4(4)
409
lwz 18,4*1(6)
410
lwz 19,4*2(6)
411
lwz 20,4*3(6)
412
lwzu 21,4*4(6)
413
b .Loop_mul4x_1st_tail
414
415
.align 5
416
.Lmul4x_proceed:
417
lwzu 27,4*4(5)
418
addze 3,3
419
lwz 9,4*1(15)
420
lwz 10,4*2(15)
421
lwz 11,4*3(15)
422
lwz 12,4*4(15)
423
addi 4,15,4*4
424
sub 6,6,8
425
426
stw 22,4*1(29)
427
stw 23,4*2(29)
428
stw 24,4*3(29)
429
stw 25,4*4(29)
430
stw 3,4*5(29)
431
lwz 22,4*12(1)
432
lwz 23,4*13(1)
433
lwz 24,4*14(1)
434
lwz 25,4*15(1)
435
436
lwz 18,4*1(6)
437
lwz 19,4*2(6)
438
lwz 20,4*3(6)
439
lwzu 21,4*4(6)
440
addic 29,1,4*7
441
li 3,0
442
b .Loop_mul4x_reduction
443
444
.align 5
445
.Loop_mul4x_reduction:
446
mullw 14,9,27
447
addze 3,3
448
mullw 15,10,27
449
addi 31,31,4
450
mullw 16,11,27
451
andi. 31,31,4*4-1
452
mullw 17,12,27
453
addc 22,22,14
454
mulhwu 14,9,27
455
adde 23,23,15
456
mulhwu 15,10,27
457
adde 24,24,16
458
mullw 28,22,7
459
adde 25,25,17
460
mulhwu 16,11,27
461
addze 26,0
462
mulhwu 17,12,27
463
lwzx 27,5,31
464
addc 23,23,14
465
466
stwu 28,4(29)
467
adde 24,24,15
468
mullw 15,19,28
469
adde 25,25,16
470
mullw 16,20,28
471
adde 26,26,17
472
mullw 17,21,28
473
474
addic 22,22,-1
475
mulhwu 14,18,28
476
adde 22,23,15
477
mulhwu 15,19,28
478
adde 23,24,16
479
mulhwu 16,20,28
480
adde 24,25,17
481
mulhwu 17,21,28
482
adde 25,26,3
483
addze 3,0
484
addc 22,22,14
485
adde 23,23,15
486
adde 24,24,16
487
adde 25,25,17
488
489
bne .Loop_mul4x_reduction
490
491
lwz 14,4*5(29)
492
addze 3,3
493
lwz 15,4*6(29)
494
lwz 16,4*7(29)
495
lwz 17,4*8(29)
496
lwz 9,4*1(4)
497
lwz 10,4*2(4)
498
lwz 11,4*3(4)
499
lwzu 12,4*4(4)
500
addc 22,22,14
501
adde 23,23,15
502
adde 24,24,16
503
adde 25,25,17
504
505
506
lwz 28,4*8(1)
507
lwz 18,4*1(6)
508
lwz 19,4*2(6)
509
lwz 20,4*3(6)
510
lwzu 21,4*4(6)
511
b .Loop_mul4x_tail
512
513
.align 5
514
.Loop_mul4x_tail:
515
mullw 14,9,27
516
addze 3,3
517
mullw 15,10,27
518
addi 31,31,4
519
mullw 16,11,27
520
andi. 31,31,4*4-1
521
mullw 17,12,27
522
addc 22,22,14
523
mulhwu 14,9,27
524
adde 23,23,15
525
mulhwu 15,10,27
526
adde 24,24,16
527
mulhwu 16,11,27
528
adde 25,25,17
529
mulhwu 17,12,27
530
addze 26,0
531
lwzx 27,5,31
532
addc 23,23,14
533
mullw 14,18,28
534
adde 24,24,15
535
mullw 15,19,28
536
adde 25,25,16
537
mullw 16,20,28
538
adde 26,26,17
539
mullw 17,21,28
540
addc 22,22,14
541
mulhwu 14,18,28
542
adde 23,23,15
543
mulhwu 15,19,28
544
adde 24,24,16
545
mulhwu 16,20,28
546
adde 25,25,17
547
mulhwu 17,21,28
548
adde 26,26,3
549
addi 28,1,4*8
550
lwzx 28,28,31
551
addze 3,0
552
stwu 22,4(29)
553
addc 22,23,14
554
adde 23,24,15
555
adde 24,25,16
556
adde 25,26,17
557
558
bne .Loop_mul4x_tail
559
560
lwz 14,4*5(29)
561
sub 15,6,8
562
addze 3,3
563
.long 0x7c1e2040
564
beq .Loop_mul4x_break
565
566
lwz 15,4*6(29)
567
lwz 16,4*7(29)
568
lwz 17,4*8(29)
569
lwz 9,4*1(4)
570
lwz 10,4*2(4)
571
lwz 11,4*3(4)
572
lwzu 12,4*4(4)
573
addc 22,22,14
574
adde 23,23,15
575
adde 24,24,16
576
adde 25,25,17
577
578
579
lwz 18,4*1(6)
580
lwz 19,4*2(6)
581
lwz 20,4*3(6)
582
lwzu 21,4*4(6)
583
b .Loop_mul4x_tail
584
585
.align 5
586
.Loop_mul4x_break:
587
lwz 16,4*6(1)
588
lwz 17,4*7(1)
589
addc 9,22,14
590
lwz 22,4*12(1)
591
addze 10,23
592
lwz 23,4*13(1)
593
addze 11,24
594
lwz 24,4*14(1)
595
addze 12,25
596
lwz 25,4*15(1)
597
addze 3,3
598
stw 9,4*1(29)
599
sub 4,30,8
600
stw 10,4*2(29)
601
stw 11,4*3(29)
602
stw 12,4*4(29)
603
stw 3,4*5(29)
604
605
lwz 18,4*1(15)
606
lwz 19,4*2(15)
607
lwz 20,4*3(15)
608
lwz 21,4*4(15)
609
addi 6,15,4*4
610
.long 0x7c058840
611
beq .Lmul4x_post
612
613
lwzu 27,4*4(5)
614
lwz 9,4*1(4)
615
lwz 10,4*2(4)
616
lwz 11,4*3(4)
617
lwzu 12,4*4(4)
618
li 3,0
619
addic 29,1,4*7
620
b .Loop_mul4x_reduction
621
622
.align 5
623
.Lmul4x_post:
624
625
626
627
628
srwi 31,8,4
629
mr 5,16
630
subi 31,31,1
631
mr 30,16
632
subfc 14,18,22
633
addi 29,1,4*15
634
subfe 15,19,23
635
636
mtctr 31
637
.Lmul4x_sub:
638
lwz 18,4*1(6)
639
lwz 22,4*1(29)
640
subfe 16,20,24
641
lwz 19,4*2(6)
642
lwz 23,4*2(29)
643
subfe 17,21,25
644
lwz 20,4*3(6)
645
lwz 24,4*3(29)
646
lwzu 21,4*4(6)
647
lwzu 25,4*4(29)
648
stw 14,4*1(5)
649
stw 15,4*2(5)
650
subfe 14,18,22
651
stw 16,4*3(5)
652
stwu 17,4*4(5)
653
subfe 15,19,23
654
bdnz .Lmul4x_sub
655
656
lwz 9,4*1(30)
657
stw 14,4*1(5)
658
lwz 14,4*12(1)
659
subfe 16,20,24
660
lwz 10,4*2(30)
661
stw 15,4*2(5)
662
lwz 15,4*13(1)
663
subfe 17,21,25
664
subfe 3,0,3
665
addi 29,1,4*12
666
lwz 11,4*3(30)
667
stw 16,4*3(5)
668
lwz 16,4*14(1)
669
lwz 12,4*4(30)
670
stw 17,4*4(5)
671
lwz 17,4*15(1)
672
673
mtctr 31
674
.Lmul4x_cond_copy:
675
and 14,14,3
676
andc 9,9,3
677
stw 0,4*0(29)
678
and 15,15,3
679
andc 10,10,3
680
stw 0,4*1(29)
681
and 16,16,3
682
andc 11,11,3
683
stw 0,4*2(29)
684
and 17,17,3
685
andc 12,12,3
686
stw 0,4*3(29)
687
or 22,14,9
688
lwz 9,4*5(30)
689
lwz 14,4*4(29)
690
or 23,15,10
691
lwz 10,4*6(30)
692
lwz 15,4*5(29)
693
or 24,16,11
694
lwz 11,4*7(30)
695
lwz 16,4*6(29)
696
or 25,17,12
697
lwz 12,4*8(30)
698
lwz 17,4*7(29)
699
addi 29,29,4*4
700
stw 22,4*1(30)
701
stw 23,4*2(30)
702
stw 24,4*3(30)
703
stwu 25,4*4(30)
704
bdnz .Lmul4x_cond_copy
705
706
lwz 5,0(1)
707
and 14,14,3
708
andc 9,9,3
709
stw 0,4*0(29)
710
and 15,15,3
711
andc 10,10,3
712
stw 0,4*1(29)
713
and 16,16,3
714
andc 11,11,3
715
stw 0,4*2(29)
716
and 17,17,3
717
andc 12,12,3
718
stw 0,4*3(29)
719
or 22,14,9
720
or 23,15,10
721
stw 0,4*4(29)
722
or 24,16,11
723
or 25,17,12
724
stw 22,4*1(30)
725
stw 23,4*2(30)
726
stw 24,4*3(30)
727
stw 25,4*4(30)
728
729
b .Lmul4x_done
730
731
.align 4
732
.Lmul4x4_post_condition:
733
lwz 4,4*6(1)
734
lwz 5,0(1)
735
addze 3,3
736
737
subfc 9,18,22
738
subfe 10,19,23
739
subfe 11,20,24
740
subfe 12,21,25
741
subfe 3,0,3
742
743
and 18,18,3
744
and 19,19,3
745
addc 9,9,18
746
and 20,20,3
747
adde 10,10,19
748
and 21,21,3
749
adde 11,11,20
750
adde 12,12,21
751
752
stw 9,4*1(4)
753
stw 10,4*2(4)
754
stw 11,4*3(4)
755
stw 12,4*4(4)
756
757
.Lmul4x_done:
758
stw 0,4*8(1)
759
stw 0,4*9(1)
760
stw 0,4*10(1)
761
stw 0,4*11(1)
762
li 3,1
763
lwz 14,-4*18(5)
764
lwz 15,-4*17(5)
765
lwz 16,-4*16(5)
766
lwz 17,-4*15(5)
767
lwz 18,-4*14(5)
768
lwz 19,-4*13(5)
769
lwz 20,-4*12(5)
770
lwz 21,-4*11(5)
771
lwz 22,-4*10(5)
772
lwz 23,-4*9(5)
773
lwz 24,-4*8(5)
774
lwz 25,-4*7(5)
775
lwz 26,-4*6(5)
776
lwz 27,-4*5(5)
777
lwz 28,-4*4(5)
778
lwz 29,-4*3(5)
779
lwz 30,-4*2(5)
780
lwz 31,-4*1(5)
781
mr 1,5
782
blr
783
.long 0
784
.byte 0,12,4,0x20,0x80,18,6,0
785
.long 0
786
.size bn_mul4x_mont_int,.-bn_mul4x_mont_int
787
.align 5
788
__bn_sqr8x_mont:
789
.Lsqr8x_do:
790
mr 9,1
791
slwi 10,8,3
792
li 11,-32*4
793
sub 10,11,10
794
slwi 8,8,2
795
stwux 1,1,10
796
797
stw 14,-4*18(9)
798
stw 15,-4*17(9)
799
stw 16,-4*16(9)
800
stw 17,-4*15(9)
801
stw 18,-4*14(9)
802
stw 19,-4*13(9)
803
stw 20,-4*12(9)
804
stw 21,-4*11(9)
805
stw 22,-4*10(9)
806
stw 23,-4*9(9)
807
stw 24,-4*8(9)
808
stw 25,-4*7(9)
809
stw 26,-4*6(9)
810
stw 27,-4*5(9)
811
stw 28,-4*4(9)
812
stw 29,-4*3(9)
813
stw 30,-4*2(9)
814
stw 31,-4*1(9)
815
816
subi 4,4,4
817
subi 18,6,4
818
subi 3,3,4
819
lwz 7,0(7)
820
li 0,0
821
822
add 6,4,8
823
lwz 9,4*1(4)
824
825
lwz 10,4*2(4)
826
li 23,0
827
lwz 11,4*3(4)
828
li 24,0
829
lwz 12,4*4(4)
830
li 25,0
831
lwz 14,4*5(4)
832
li 26,0
833
lwz 15,4*6(4)
834
li 27,0
835
lwz 16,4*7(4)
836
li 28,0
837
lwzu 17,4*8(4)
838
li 29,0
839
840
addi 5,1,4*11
841
subic. 30,8,4*8
842
b .Lsqr8x_zero_start
843
844
.align 5
845
.Lsqr8x_zero:
846
subic. 30,30,4*8
847
stw 0,4*1(5)
848
stw 0,4*2(5)
849
stw 0,4*3(5)
850
stw 0,4*4(5)
851
stw 0,4*5(5)
852
stw 0,4*6(5)
853
stw 0,4*7(5)
854
stw 0,4*8(5)
855
.Lsqr8x_zero_start:
856
stw 0,4*9(5)
857
stw 0,4*10(5)
858
stw 0,4*11(5)
859
stw 0,4*12(5)
860
stw 0,4*13(5)
861
stw 0,4*14(5)
862
stw 0,4*15(5)
863
stwu 0,4*16(5)
864
bne .Lsqr8x_zero
865
866
stw 3,4*6(1)
867
stw 18,4*7(1)
868
stw 7,4*8(1)
869
stw 5,4*9(1)
870
stw 0,4*10(1)
871
addi 5,1,4*11
872
873
874
.align 5
875
.Lsqr8x_outer_loop:
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
mullw 18,10,9
906
mullw 19,11,9
907
mullw 20,12,9
908
mullw 21,14,9
909
addc 23,23,18
910
mullw 18,15,9
911
adde 24,24,19
912
mullw 19,16,9
913
adde 25,25,20
914
mullw 20,17,9
915
adde 26,26,21
916
mulhwu 21,10,9
917
adde 27,27,18
918
mulhwu 18,11,9
919
adde 28,28,19
920
mulhwu 19,12,9
921
adde 29,29,20
922
mulhwu 20,14,9
923
stw 22,4*1(5)
924
addze 22,0
925
stw 23,4*2(5)
926
addc 24,24,21
927
mulhwu 21,15,9
928
adde 25,25,18
929
mulhwu 18,16,9
930
adde 26,26,19
931
mulhwu 19,17,9
932
adde 27,27,20
933
mullw 20,11,10
934
adde 28,28,21
935
mullw 21,12,10
936
adde 29,29,18
937
mullw 18,14,10
938
adde 22,22,19
939
940
mullw 19,15,10
941
addc 25,25,20
942
mullw 20,16,10
943
adde 26,26,21
944
mullw 21,17,10
945
adde 27,27,18
946
mulhwu 18,11,10
947
adde 28,28,19
948
mulhwu 19,12,10
949
adde 29,29,20
950
mulhwu 20,14,10
951
adde 22,22,21
952
mulhwu 21,15,10
953
stw 24,4*3(5)
954
addze 23,0
955
stw 25,4*4(5)
956
addc 26,26,18
957
mulhwu 18,16,10
958
adde 27,27,19
959
mulhwu 19,17,10
960
adde 28,28,20
961
mullw 20,12,11
962
adde 29,29,21
963
mullw 21,14,11
964
adde 22,22,18
965
mullw 18,15,11
966
adde 23,23,19
967
968
mullw 19,16,11
969
addc 27,27,20
970
mullw 20,17,11
971
adde 28,28,21
972
mulhwu 21,12,11
973
adde 29,29,18
974
mulhwu 18,14,11
975
adde 22,22,19
976
mulhwu 19,15,11
977
adde 23,23,20
978
mulhwu 20,16,11
979
stw 26,4*5(5)
980
addze 24,0
981
stw 27,4*6(5)
982
addc 28,28,21
983
mulhwu 21,17,11
984
adde 29,29,18
985
mullw 18,14,12
986
adde 22,22,19
987
mullw 19,15,12
988
adde 23,23,20
989
mullw 20,16,12
990
adde 24,24,21
991
992
mullw 21,17,12
993
addc 29,29,18
994
mulhwu 18,14,12
995
adde 22,22,19
996
mulhwu 19,15,12
997
adde 23,23,20
998
mulhwu 20,16,12
999
adde 24,24,21
1000
mulhwu 21,17,12
1001
stw 28,4*7(5)
1002
addze 25,0
1003
stwu 29,4*8(5)
1004
addc 22,22,18
1005
mullw 18,15,14
1006
adde 23,23,19
1007
mullw 19,16,14
1008
adde 24,24,20
1009
mullw 20,17,14
1010
adde 25,25,21
1011
1012
mulhwu 21,15,14
1013
addc 23,23,18
1014
mulhwu 18,16,14
1015
adde 24,24,19
1016
mulhwu 19,17,14
1017
adde 25,25,20
1018
mullw 20,16,15
1019
addze 26,0
1020
addc 24,24,21
1021
mullw 21,17,15
1022
adde 25,25,18
1023
mulhwu 18,16,15
1024
adde 26,26,19
1025
1026
mulhwu 19,17,15
1027
addc 25,25,20
1028
mullw 20,17,16
1029
adde 26,26,21
1030
mulhwu 21,17,16
1031
addze 27,0
1032
addc 26,26,18
1033
.long 0x7c062040
1034
adde 27,27,19
1035
1036
addc 27,27,20
1037
sub 18,6,8
1038
addze 28,0
1039
add 28,28,21
1040
1041
beq .Lsqr8x_outer_break
1042
1043
mr 7,9
1044
lwz 9,4*1(5)
1045
lwz 10,4*2(5)
1046
lwz 11,4*3(5)
1047
lwz 12,4*4(5)
1048
lwz 14,4*5(5)
1049
lwz 15,4*6(5)
1050
lwz 16,4*7(5)
1051
lwz 17,4*8(5)
1052
addc 22,22,9
1053
lwz 9,4*1(4)
1054
adde 23,23,10
1055
lwz 10,4*2(4)
1056
adde 24,24,11
1057
lwz 11,4*3(4)
1058
adde 25,25,12
1059
lwz 12,4*4(4)
1060
adde 26,26,14
1061
lwz 14,4*5(4)
1062
adde 27,27,15
1063
lwz 15,4*6(4)
1064
adde 28,28,16
1065
lwz 16,4*7(4)
1066
subi 3,4,4*7
1067
addze 29,17
1068
lwzu 17,4*8(4)
1069
1070
li 30,0
1071
b .Lsqr8x_mul
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
.align 5
1096
.Lsqr8x_mul:
1097
mullw 18,9,7
1098
addze 31,0
1099
mullw 19,10,7
1100
addi 30,30,4
1101
mullw 20,11,7
1102
andi. 30,30,4*8-1
1103
mullw 21,12,7
1104
addc 22,22,18
1105
mullw 18,14,7
1106
adde 23,23,19
1107
mullw 19,15,7
1108
adde 24,24,20
1109
mullw 20,16,7
1110
adde 25,25,21
1111
mullw 21,17,7
1112
adde 26,26,18
1113
mulhwu 18,9,7
1114
adde 27,27,19
1115
mulhwu 19,10,7
1116
adde 28,28,20
1117
mulhwu 20,11,7
1118
adde 29,29,21
1119
mulhwu 21,12,7
1120
addze 31,31
1121
stwu 22,4(5)
1122
addc 22,23,18
1123
mulhwu 18,14,7
1124
adde 23,24,19
1125
mulhwu 19,15,7
1126
adde 24,25,20
1127
mulhwu 20,16,7
1128
adde 25,26,21
1129
mulhwu 21,17,7
1130
lwzx 7,3,30
1131
adde 26,27,18
1132
adde 27,28,19
1133
adde 28,29,20
1134
adde 29,31,21
1135
1136
bne .Lsqr8x_mul
1137
1138
1139
.long 0x7c043040
1140
beq .Lsqr8x_break
1141
1142
lwz 9,4*1(5)
1143
lwz 10,4*2(5)
1144
lwz 11,4*3(5)
1145
lwz 12,4*4(5)
1146
lwz 14,4*5(5)
1147
lwz 15,4*6(5)
1148
lwz 16,4*7(5)
1149
lwz 17,4*8(5)
1150
addc 22,22,9
1151
lwz 9,4*1(4)
1152
adde 23,23,10
1153
lwz 10,4*2(4)
1154
adde 24,24,11
1155
lwz 11,4*3(4)
1156
adde 25,25,12
1157
lwz 12,4*4(4)
1158
adde 26,26,14
1159
lwz 14,4*5(4)
1160
adde 27,27,15
1161
lwz 15,4*6(4)
1162
adde 28,28,16
1163
lwz 16,4*7(4)
1164
adde 29,29,17
1165
lwzu 17,4*8(4)
1166
1167
b .Lsqr8x_mul
1168
1169
.align 5
1170
.Lsqr8x_break:
1171
lwz 9,4*8(3)
1172
addi 4,3,4*15
1173
lwz 10,4*9(3)
1174
sub. 18,6,4
1175
lwz 11,4*10(3)
1176
sub 19,5,18
1177
lwz 12,4*11(3)
1178
lwz 14,4*12(3)
1179
lwz 15,4*13(3)
1180
lwz 16,4*14(3)
1181
lwz 17,4*15(3)
1182
beq .Lsqr8x_outer_loop
1183
1184
stw 22,4*1(5)
1185
lwz 22,4*1(19)
1186
stw 23,4*2(5)
1187
lwz 23,4*2(19)
1188
stw 24,4*3(5)
1189
lwz 24,4*3(19)
1190
stw 25,4*4(5)
1191
lwz 25,4*4(19)
1192
stw 26,4*5(5)
1193
lwz 26,4*5(19)
1194
stw 27,4*6(5)
1195
lwz 27,4*6(19)
1196
stw 28,4*7(5)
1197
lwz 28,4*7(19)
1198
stw 29,4*8(5)
1199
lwz 29,4*8(19)
1200
mr 5,19
1201
b .Lsqr8x_outer_loop
1202
1203
.align 5
1204
.Lsqr8x_outer_break:
1205
1206
1207
lwz 10,4*1(18)
1208
lwz 12,4*2(18)
1209
lwz 15,4*3(18)
1210
lwz 17,4*4(18)
1211
addi 4,18,4*4
1212
1213
lwz 19,4*13(1)
1214
lwz 20,4*14(1)
1215
lwz 21,4*15(1)
1216
lwz 18,4*16(1)
1217
1218
stw 22,4*1(5)
1219
srwi 30,8,4
1220
stw 23,4*2(5)
1221
subi 30,30,1
1222
stw 24,4*3(5)
1223
stw 25,4*4(5)
1224
stw 26,4*5(5)
1225
stw 27,4*6(5)
1226
stw 28,4*7(5)
1227
1228
addi 5,1,4*11
1229
mullw 22,10,10
1230
mulhwu 10,10,10
1231
add 23,19,19
1232
srwi 19,19,32-1
1233
mullw 11,12,12
1234
mulhwu 12,12,12
1235
addc 23,23,10
1236
add 24,20,20
1237
srwi 20,20,32-1
1238
add 25,21,21
1239
srwi 21,21,32-1
1240
or 24,24,19
1241
1242
mtctr 30
1243
.Lsqr4x_shift_n_add:
1244
mullw 14,15,15
1245
mulhwu 15,15,15
1246
lwz 19,4*6(5)
1247
lwz 10,4*1(4)
1248
adde 24,24,11
1249
add 26,18,18
1250
srwi 18,18,32-1
1251
or 25,25,20
1252
lwz 20,4*7(5)
1253
adde 25,25,12
1254
lwz 12,4*2(4)
1255
add 27,19,19
1256
srwi 19,19,32-1
1257
or 26,26,21
1258
lwz 21,4*8(5)
1259
mullw 16,17,17
1260
mulhwu 17,17,17
1261
adde 26,26,14
1262
add 28,20,20
1263
srwi 20,20,32-1
1264
or 27,27,18
1265
lwz 18,4*9(5)
1266
adde 27,27,15
1267
lwz 15,4*3(4)
1268
add 29,21,21
1269
srwi 21,21,32-1
1270
or 28,28,19
1271
lwz 19,4*10(5)
1272
mullw 9,10,10
1273
mulhwu 10,10,10
1274
adde 28,28,16
1275
stw 22,4*1(5)
1276
add 22,18,18
1277
srwi 18,18,32-1
1278
or 29,29,20
1279
lwz 20,4*11(5)
1280
adde 29,29,17
1281
lwzu 17,4*4(4)
1282
stw 23,4*2(5)
1283
add 23,19,19
1284
srwi 19,19,32-1
1285
or 22,22,21
1286
lwz 21,4*12(5)
1287
mullw 11,12,12
1288
mulhwu 12,12,12
1289
adde 22,22,9
1290
stw 24,4*3(5)
1291
add 24,20,20
1292
srwi 20,20,32-1
1293
or 23,23,18
1294
lwz 18,4*13(5)
1295
adde 23,23,10
1296
stw 25,4*4(5)
1297
stw 26,4*5(5)
1298
stw 27,4*6(5)
1299
stw 28,4*7(5)
1300
stwu 29,4*8(5)
1301
add 25,21,21
1302
srwi 21,21,32-1
1303
or 24,24,19
1304
bdnz .Lsqr4x_shift_n_add
1305
lwz 4,4*7(1)
1306
lwz 7,4*8(1)
1307
1308
mullw 14,15,15
1309
mulhwu 15,15,15
1310
stw 22,4*1(5)
1311
lwz 22,4*12(1)
1312
lwz 19,4*6(5)
1313
adde 24,24,11
1314
add 26,18,18
1315
srwi 18,18,32-1
1316
or 25,25,20
1317
lwz 20,4*7(5)
1318
adde 25,25,12
1319
add 27,19,19
1320
srwi 19,19,32-1
1321
or 26,26,21
1322
mullw 16,17,17
1323
mulhwu 17,17,17
1324
adde 26,26,14
1325
add 28,20,20
1326
srwi 20,20,32-1
1327
or 27,27,18
1328
stw 23,4*2(5)
1329
lwz 23,4*13(1)
1330
adde 27,27,15
1331
or 28,28,19
1332
lwz 9,4*1(4)
1333
lwz 10,4*2(4)
1334
adde 28,28,16
1335
lwz 11,4*3(4)
1336
lwz 12,4*4(4)
1337
adde 29,17,20
1338
lwz 14,4*5(4)
1339
lwz 15,4*6(4)
1340
1341
1342
1343
mullw 31,7,22
1344
li 30,8
1345
lwz 16,4*7(4)
1346
add 6,4,8
1347
lwzu 17,4*8(4)
1348
stw 24,4*3(5)
1349
lwz 24,4*14(1)
1350
stw 25,4*4(5)
1351
lwz 25,4*15(1)
1352
stw 26,4*5(5)
1353
lwz 26,4*16(1)
1354
stw 27,4*6(5)
1355
lwz 27,4*17(1)
1356
stw 28,4*7(5)
1357
lwz 28,4*18(1)
1358
stw 29,4*8(5)
1359
lwz 29,4*19(1)
1360
addi 5,1,4*11
1361
mtctr 30
1362
b .Lsqr8x_reduction
1363
1364
.align 5
1365
.Lsqr8x_reduction:
1366
1367
mullw 19,10,31
1368
mullw 20,11,31
1369
stwu 31,4(5)
1370
mullw 21,12,31
1371
1372
addic 22,22,-1
1373
mullw 18,14,31
1374
adde 22,23,19
1375
mullw 19,15,31
1376
adde 23,24,20
1377
mullw 20,16,31
1378
adde 24,25,21
1379
mullw 21,17,31
1380
adde 25,26,18
1381
mulhwu 18,9,31
1382
adde 26,27,19
1383
mulhwu 19,10,31
1384
adde 27,28,20
1385
mulhwu 20,11,31
1386
adde 28,29,21
1387
mulhwu 21,12,31
1388
addze 29,0
1389
addc 22,22,18
1390
mulhwu 18,14,31
1391
adde 23,23,19
1392
mulhwu 19,15,31
1393
adde 24,24,20
1394
mulhwu 20,16,31
1395
adde 25,25,21
1396
mulhwu 21,17,31
1397
mullw 31,7,22
1398
adde 26,26,18
1399
adde 27,27,19
1400
adde 28,28,20
1401
adde 29,29,21
1402
bdnz .Lsqr8x_reduction
1403
1404
lwz 18,4*1(5)
1405
lwz 19,4*2(5)
1406
lwz 20,4*3(5)
1407
lwz 21,4*4(5)
1408
subi 3,5,4*7
1409
.long 0x7c062040
1410
addc 22,22,18
1411
lwz 18,4*5(5)
1412
adde 23,23,19
1413
lwz 19,4*6(5)
1414
adde 24,24,20
1415
lwz 20,4*7(5)
1416
adde 25,25,21
1417
lwz 21,4*8(5)
1418
adde 26,26,18
1419
adde 27,27,19
1420
adde 28,28,20
1421
adde 29,29,21
1422
1423
beq .Lsqr8x8_post_condition
1424
1425
lwz 7,4*0(3)
1426
lwz 9,4*1(4)
1427
lwz 10,4*2(4)
1428
lwz 11,4*3(4)
1429
lwz 12,4*4(4)
1430
lwz 14,4*5(4)
1431
lwz 15,4*6(4)
1432
lwz 16,4*7(4)
1433
lwzu 17,4*8(4)
1434
li 30,0
1435
1436
.align 5
1437
.Lsqr8x_tail:
1438
mullw 18,9,7
1439
addze 31,0
1440
mullw 19,10,7
1441
addi 30,30,4
1442
mullw 20,11,7
1443
andi. 30,30,4*8-1
1444
mullw 21,12,7
1445
addc 22,22,18
1446
mullw 18,14,7
1447
adde 23,23,19
1448
mullw 19,15,7
1449
adde 24,24,20
1450
mullw 20,16,7
1451
adde 25,25,21
1452
mullw 21,17,7
1453
adde 26,26,18
1454
mulhwu 18,9,7
1455
adde 27,27,19
1456
mulhwu 19,10,7
1457
adde 28,28,20
1458
mulhwu 20,11,7
1459
adde 29,29,21
1460
mulhwu 21,12,7
1461
addze 31,31
1462
stwu 22,4(5)
1463
addc 22,23,18
1464
mulhwu 18,14,7
1465
adde 23,24,19
1466
mulhwu 19,15,7
1467
adde 24,25,20
1468
mulhwu 20,16,7
1469
adde 25,26,21
1470
mulhwu 21,17,7
1471
lwzx 7,3,30
1472
adde 26,27,18
1473
adde 27,28,19
1474
adde 28,29,20
1475
adde 29,31,21
1476
1477
bne .Lsqr8x_tail
1478
1479
1480
lwz 9,4*1(5)
1481
lwz 31,4*10(1)
1482
.long 0x7c062040
1483
lwz 10,4*2(5)
1484
sub 20,6,8
1485
lwz 11,4*3(5)
1486
lwz 12,4*4(5)
1487
lwz 14,4*5(5)
1488
lwz 15,4*6(5)
1489
lwz 16,4*7(5)
1490
lwz 17,4*8(5)
1491
beq .Lsqr8x_tail_break
1492
1493
addc 22,22,9
1494
lwz 9,4*1(4)
1495
adde 23,23,10
1496
lwz 10,4*2(4)
1497
adde 24,24,11
1498
lwz 11,4*3(4)
1499
adde 25,25,12
1500
lwz 12,4*4(4)
1501
adde 26,26,14
1502
lwz 14,4*5(4)
1503
adde 27,27,15
1504
lwz 15,4*6(4)
1505
adde 28,28,16
1506
lwz 16,4*7(4)
1507
adde 29,29,17
1508
lwzu 17,4*8(4)
1509
1510
b .Lsqr8x_tail
1511
1512
.align 5
1513
.Lsqr8x_tail_break:
1514
lwz 7,4*8(1)
1515
lwz 21,4*9(1)
1516
addi 30,5,4*8
1517
1518
addic 31,31,-1
1519
adde 18,22,9
1520
lwz 22,4*8(3)
1521
lwz 9,4*1(20)
1522
adde 19,23,10
1523
lwz 23,4*9(3)
1524
lwz 10,4*2(20)
1525
adde 24,24,11
1526
lwz 11,4*3(20)
1527
adde 25,25,12
1528
lwz 12,4*4(20)
1529
adde 26,26,14
1530
lwz 14,4*5(20)
1531
adde 27,27,15
1532
lwz 15,4*6(20)
1533
adde 28,28,16
1534
lwz 16,4*7(20)
1535
adde 29,29,17
1536
lwz 17,4*8(20)
1537
addi 4,20,4*8
1538
addze 20,0
1539
mullw 31,7,22
1540
stw 18,4*1(5)
1541
.long 0x7c1ea840
1542
stw 19,4*2(5)
1543
li 30,8
1544
stw 24,4*3(5)
1545
lwz 24,4*10(3)
1546
stw 25,4*4(5)
1547
lwz 25,4*11(3)
1548
stw 26,4*5(5)
1549
lwz 26,4*12(3)
1550
stw 27,4*6(5)
1551
lwz 27,4*13(3)
1552
stw 28,4*7(5)
1553
lwz 28,4*14(3)
1554
stw 29,4*8(5)
1555
lwz 29,4*15(3)
1556
stw 20,4*10(1)
1557
addi 5,3,4*7
1558
mtctr 30
1559
bne .Lsqr8x_reduction
1560
1561
1562
1563
1564
1565
1566
lwz 3,4*6(1)
1567
srwi 30,8,5
1568
mr 7,5
1569
addi 5,5,4*8
1570
subi 30,30,1
1571
subfc 18,9,22
1572
subfe 19,10,23
1573
mr 31,20
1574
mr 6,3
1575
1576
mtctr 30
1577
b .Lsqr8x_sub
1578
1579
.align 5
1580
.Lsqr8x_sub:
1581
lwz 9,4*1(4)
1582
lwz 22,4*1(5)
1583
lwz 10,4*2(4)
1584
lwz 23,4*2(5)
1585
subfe 20,11,24
1586
lwz 11,4*3(4)
1587
lwz 24,4*3(5)
1588
subfe 21,12,25
1589
lwz 12,4*4(4)
1590
lwz 25,4*4(5)
1591
stw 18,4*1(3)
1592
subfe 18,14,26
1593
lwz 14,4*5(4)
1594
lwz 26,4*5(5)
1595
stw 19,4*2(3)
1596
subfe 19,15,27
1597
lwz 15,4*6(4)
1598
lwz 27,4*6(5)
1599
stw 20,4*3(3)
1600
subfe 20,16,28
1601
lwz 16,4*7(4)
1602
lwz 28,4*7(5)
1603
stw 21,4*4(3)
1604
subfe 21,17,29
1605
lwzu 17,4*8(4)
1606
lwzu 29,4*8(5)
1607
stw 18,4*5(3)
1608
subfe 18,9,22
1609
stw 19,4*6(3)
1610
subfe 19,10,23
1611
stw 20,4*7(3)
1612
stwu 21,4*8(3)
1613
bdnz .Lsqr8x_sub
1614
1615
srwi 30,8,4
1616
lwz 9,4*1(6)
1617
lwz 22,4*1(7)
1618
subi 30,30,1
1619
lwz 10,4*2(6)
1620
lwz 23,4*2(7)
1621
subfe 20,11,24
1622
lwz 11,4*3(6)
1623
lwz 24,4*3(7)
1624
subfe 21,12,25
1625
lwz 12,4*4(6)
1626
lwzu 25,4*4(7)
1627
stw 18,4*1(3)
1628
subfe 18,14,26
1629
stw 19,4*2(3)
1630
subfe 19,15,27
1631
stw 20,4*3(3)
1632
subfe 20,16,28
1633
stw 21,4*4(3)
1634
subfe 21,17,29
1635
stw 18,4*5(3)
1636
subfe 31,0,31
1637
stw 19,4*6(3)
1638
stw 20,4*7(3)
1639
stw 21,4*8(3)
1640
1641
addi 5,1,4*11
1642
mtctr 30
1643
1644
.Lsqr4x_cond_copy:
1645
andc 9,9,31
1646
stw 0,-4*3(7)
1647
and 22,22,31
1648
stw 0,-4*2(7)
1649
andc 10,10,31
1650
stw 0,-4*1(7)
1651
and 23,23,31
1652
stw 0,-4*0(7)
1653
andc 11,11,31
1654
stw 0,4*1(5)
1655
and 24,24,31
1656
stw 0,4*2(5)
1657
andc 12,12,31
1658
stw 0,4*3(5)
1659
and 25,25,31
1660
stwu 0,4*4(5)
1661
or 18,9,22
1662
lwz 9,4*5(6)
1663
lwz 22,4*1(7)
1664
or 19,10,23
1665
lwz 10,4*6(6)
1666
lwz 23,4*2(7)
1667
or 20,11,24
1668
lwz 11,4*7(6)
1669
lwz 24,4*3(7)
1670
or 21,12,25
1671
lwz 12,4*8(6)
1672
lwzu 25,4*4(7)
1673
stw 18,4*1(6)
1674
stw 19,4*2(6)
1675
stw 20,4*3(6)
1676
stwu 21,4*4(6)
1677
bdnz .Lsqr4x_cond_copy
1678
1679
lwz 4,0(1)
1680
andc 9,9,31
1681
and 22,22,31
1682
andc 10,10,31
1683
and 23,23,31
1684
andc 11,11,31
1685
and 24,24,31
1686
andc 12,12,31
1687
and 25,25,31
1688
or 18,9,22
1689
or 19,10,23
1690
or 20,11,24
1691
or 21,12,25
1692
stw 18,4*1(6)
1693
stw 19,4*2(6)
1694
stw 20,4*3(6)
1695
stw 21,4*4(6)
1696
1697
b .Lsqr8x_done
1698
1699
.align 5
1700
.Lsqr8x8_post_condition:
1701
lwz 3,4*6(1)
1702
lwz 4,0(1)
1703
addze 31,0
1704
1705
1706
subfc 22,9,22
1707
subfe 23,10,23
1708
stw 0,4*12(1)
1709
stw 0,4*13(1)
1710
subfe 24,11,24
1711
stw 0,4*14(1)
1712
stw 0,4*15(1)
1713
subfe 25,12,25
1714
stw 0,4*16(1)
1715
stw 0,4*17(1)
1716
subfe 26,14,26
1717
stw 0,4*18(1)
1718
stw 0,4*19(1)
1719
subfe 27,15,27
1720
stw 0,4*20(1)
1721
stw 0,4*21(1)
1722
subfe 28,16,28
1723
stw 0,4*22(1)
1724
stw 0,4*23(1)
1725
subfe 29,17,29
1726
stw 0,4*24(1)
1727
stw 0,4*25(1)
1728
subfe 31,0,31
1729
stw 0,4*26(1)
1730
stw 0,4*27(1)
1731
1732
and 9,9,31
1733
and 10,10,31
1734
addc 22,22,9
1735
and 11,11,31
1736
adde 23,23,10
1737
and 12,12,31
1738
adde 24,24,11
1739
and 14,14,31
1740
adde 25,25,12
1741
and 15,15,31
1742
adde 26,26,14
1743
and 16,16,31
1744
adde 27,27,15
1745
and 17,17,31
1746
adde 28,28,16
1747
adde 29,29,17
1748
stw 22,4*1(3)
1749
stw 23,4*2(3)
1750
stw 24,4*3(3)
1751
stw 25,4*4(3)
1752
stw 26,4*5(3)
1753
stw 27,4*6(3)
1754
stw 28,4*7(3)
1755
stw 29,4*8(3)
1756
1757
.Lsqr8x_done:
1758
stw 0,4*8(1)
1759
stw 0,4*10(1)
1760
1761
lwz 14,-4*18(4)
1762
li 3,1
1763
lwz 15,-4*17(4)
1764
lwz 16,-4*16(4)
1765
lwz 17,-4*15(4)
1766
lwz 18,-4*14(4)
1767
lwz 19,-4*13(4)
1768
lwz 20,-4*12(4)
1769
lwz 21,-4*11(4)
1770
lwz 22,-4*10(4)
1771
lwz 23,-4*9(4)
1772
lwz 24,-4*8(4)
1773
lwz 25,-4*7(4)
1774
lwz 26,-4*6(4)
1775
lwz 27,-4*5(4)
1776
lwz 28,-4*4(4)
1777
lwz 29,-4*3(4)
1778
lwz 30,-4*2(4)
1779
lwz 31,-4*1(4)
1780
mr 1,4
1781
blr
1782
.long 0
1783
.byte 0,12,4,0x20,0x80,18,6,0
1784
.long 0
1785
.size __bn_sqr8x_mont,.-__bn_sqr8x_mont
1786
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,80,80,67,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1787
.align 2
1788
1789