Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/sh/lib/udivsi3_i4i.S
10817 views
1
/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2
2004, 2005, 2006
3
Free Software Foundation, Inc.
4
5
This file is free software; you can redistribute it and/or modify it
6
under the terms of the GNU General Public License as published by the
7
Free Software Foundation; either version 2, or (at your option) any
8
later version.
9
10
In addition to the permissions in the GNU General Public License, the
11
Free Software Foundation gives you unlimited permission to link the
12
compiled version of this file into combinations with other programs,
13
and to distribute those combinations without any restriction coming
14
from the use of this file. (The General Public License restrictions
15
do apply in other respects; for example, they cover modification of
16
the file, and distribution when not linked into a combine
17
executable.)
18
19
This file is distributed in the hope that it will be useful, but
20
WITHOUT ANY WARRANTY; without even the implied warranty of
21
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22
General Public License for more details.
23
24
You should have received a copy of the GNU General Public License
25
along with this program; see the file COPYING. If not, write to
26
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
27
Boston, MA 02110-1301, USA. */
28
29
!! libgcc routines for the Renesas / SuperH SH CPUs.
30
!! Contributed by Steve Chamberlain.
31
!! sac@cygnus.com
32
33
!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
34
!! recoded in assembly by Toshiyasu Morita
35
!! tm@netcom.com
36
37
/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
38
ELF local label prefixes by J"orn Rennecke
39
[email protected] */
40
41
/* This code used shld, thus is not suitable for SH1 / SH2. */
42
43
/* Signed / unsigned division without use of FPU, optimized for SH4.
44
Uses a lookup table for divisors in the range -128 .. +128, and
45
div1 with case distinction for larger divisors in three more ranges.
46
The code is lumped together with the table to allow the use of mova. */
47
#ifdef CONFIG_CPU_LITTLE_ENDIAN
48
#define L_LSB 0
49
#define L_LSWMSB 1
50
#define L_MSWLSB 2
51
#else
52
#define L_LSB 3
53
#define L_LSWMSB 2
54
#define L_MSWLSB 1
55
#endif
56
57
.balign 4
58
.global __udivsi3_i4i
59
.global __udivsi3_i4
60
.set __udivsi3_i4, __udivsi3_i4i
61
.type __udivsi3_i4i, @function
62
__udivsi3_i4i:
63
mov.w c128_w, r1
64
div0u
65
mov r4,r0
66
shlr8 r0
67
cmp/hi r1,r5
68
extu.w r5,r1
69
bf udiv_le128
70
cmp/eq r5,r1
71
bf udiv_ge64k
72
shlr r0
73
mov r5,r1
74
shll16 r5
75
mov.l r4,@-r15
76
div1 r5,r0
77
mov.l r1,@-r15
78
div1 r5,r0
79
div1 r5,r0
80
bra udiv_25
81
div1 r5,r0
82
83
div_le128:
84
mova div_table_ix,r0
85
bra div_le128_2
86
mov.b @(r0,r5),r1
87
udiv_le128:
88
mov.l r4,@-r15
89
mova div_table_ix,r0
90
mov.b @(r0,r5),r1
91
mov.l r5,@-r15
92
div_le128_2:
93
mova div_table_inv,r0
94
mov.l @(r0,r1),r1
95
mov r5,r0
96
tst #0xfe,r0
97
mova div_table_clz,r0
98
dmulu.l r1,r4
99
mov.b @(r0,r5),r1
100
bt/s div_by_1
101
mov r4,r0
102
mov.l @r15+,r5
103
sts mach,r0
104
/* clrt */
105
addc r4,r0
106
mov.l @r15+,r4
107
rotcr r0
108
rts
109
shld r1,r0
110
111
div_by_1_neg:
112
neg r4,r0
113
div_by_1:
114
mov.l @r15+,r5
115
rts
116
mov.l @r15+,r4
117
118
div_ge64k:
119
bt/s div_r8
120
div0u
121
shll8 r5
122
bra div_ge64k_2
123
div1 r5,r0
124
udiv_ge64k:
125
cmp/hi r0,r5
126
mov r5,r1
127
bt udiv_r8
128
shll8 r5
129
mov.l r4,@-r15
130
div1 r5,r0
131
mov.l r1,@-r15
132
div_ge64k_2:
133
div1 r5,r0
134
mov.l zero_l,r1
135
.rept 4
136
div1 r5,r0
137
.endr
138
mov.l r1,@-r15
139
div1 r5,r0
140
mov.w m256_w,r1
141
div1 r5,r0
142
mov.b r0,@(L_LSWMSB,r15)
143
xor r4,r0
144
and r1,r0
145
bra div_ge64k_end
146
xor r4,r0
147
148
div_r8:
149
shll16 r4
150
bra div_r8_2
151
shll8 r4
152
udiv_r8:
153
mov.l r4,@-r15
154
shll16 r4
155
clrt
156
shll8 r4
157
mov.l r5,@-r15
158
div_r8_2:
159
rotcl r4
160
mov r0,r1
161
div1 r5,r1
162
mov r4,r0
163
rotcl r0
164
mov r5,r4
165
div1 r5,r1
166
.rept 5
167
rotcl r0; div1 r5,r1
168
.endr
169
rotcl r0
170
mov.l @r15+,r5
171
div1 r4,r1
172
mov.l @r15+,r4
173
rts
174
rotcl r0
175
176
.global __sdivsi3_i4i
177
.global __sdivsi3_i4
178
.global __sdivsi3
179
.set __sdivsi3_i4, __sdivsi3_i4i
180
.set __sdivsi3, __sdivsi3_i4i
181
.type __sdivsi3_i4i, @function
182
/* This is link-compatible with a __sdivsi3 call,
183
but we effectively clobber only r1. */
184
__sdivsi3_i4i:
185
mov.l r4,@-r15
186
cmp/pz r5
187
mov.w c128_w, r1
188
bt/s pos_divisor
189
cmp/pz r4
190
mov.l r5,@-r15
191
neg r5,r5
192
bt/s neg_result
193
cmp/hi r1,r5
194
neg r4,r4
195
pos_result:
196
extu.w r5,r0
197
bf div_le128
198
cmp/eq r5,r0
199
mov r4,r0
200
shlr8 r0
201
bf/s div_ge64k
202
cmp/hi r0,r5
203
div0u
204
shll16 r5
205
div1 r5,r0
206
div1 r5,r0
207
div1 r5,r0
208
udiv_25:
209
mov.l zero_l,r1
210
div1 r5,r0
211
div1 r5,r0
212
mov.l r1,@-r15
213
.rept 3
214
div1 r5,r0
215
.endr
216
mov.b r0,@(L_MSWLSB,r15)
217
xtrct r4,r0
218
swap.w r0,r0
219
.rept 8
220
div1 r5,r0
221
.endr
222
mov.b r0,@(L_LSWMSB,r15)
223
div_ge64k_end:
224
.rept 8
225
div1 r5,r0
226
.endr
227
mov.l @r15+,r4 ! zero-extension and swap using LS unit.
228
extu.b r0,r0
229
mov.l @r15+,r5
230
or r4,r0
231
mov.l @r15+,r4
232
rts
233
rotcl r0
234
235
div_le128_neg:
236
tst #0xfe,r0
237
mova div_table_ix,r0
238
mov.b @(r0,r5),r1
239
mova div_table_inv,r0
240
bt/s div_by_1_neg
241
mov.l @(r0,r1),r1
242
mova div_table_clz,r0
243
dmulu.l r1,r4
244
mov.b @(r0,r5),r1
245
mov.l @r15+,r5
246
sts mach,r0
247
/* clrt */
248
addc r4,r0
249
mov.l @r15+,r4
250
rotcr r0
251
shld r1,r0
252
rts
253
neg r0,r0
254
255
pos_divisor:
256
mov.l r5,@-r15
257
bt/s pos_result
258
cmp/hi r1,r5
259
neg r4,r4
260
neg_result:
261
extu.w r5,r0
262
bf div_le128_neg
263
cmp/eq r5,r0
264
mov r4,r0
265
shlr8 r0
266
bf/s div_ge64k_neg
267
cmp/hi r0,r5
268
div0u
269
mov.l zero_l,r1
270
shll16 r5
271
div1 r5,r0
272
mov.l r1,@-r15
273
.rept 7
274
div1 r5,r0
275
.endr
276
mov.b r0,@(L_MSWLSB,r15)
277
xtrct r4,r0
278
swap.w r0,r0
279
.rept 8
280
div1 r5,r0
281
.endr
282
mov.b r0,@(L_LSWMSB,r15)
283
div_ge64k_neg_end:
284
.rept 8
285
div1 r5,r0
286
.endr
287
mov.l @r15+,r4 ! zero-extension and swap using LS unit.
288
extu.b r0,r1
289
mov.l @r15+,r5
290
or r4,r1
291
div_r8_neg_end:
292
mov.l @r15+,r4
293
rotcl r1
294
rts
295
neg r1,r0
296
297
div_ge64k_neg:
298
bt/s div_r8_neg
299
div0u
300
shll8 r5
301
mov.l zero_l,r1
302
.rept 6
303
div1 r5,r0
304
.endr
305
mov.l r1,@-r15
306
div1 r5,r0
307
mov.w m256_w,r1
308
div1 r5,r0
309
mov.b r0,@(L_LSWMSB,r15)
310
xor r4,r0
311
and r1,r0
312
bra div_ge64k_neg_end
313
xor r4,r0
314
315
c128_w:
316
.word 128
317
318
div_r8_neg:
319
clrt
320
shll16 r4
321
mov r4,r1
322
shll8 r1
323
mov r5,r4
324
.rept 7
325
rotcl r1; div1 r5,r0
326
.endr
327
mov.l @r15+,r5
328
rotcl r1
329
bra div_r8_neg_end
330
div1 r4,r0
331
332
m256_w:
333
.word 0xff00
334
/* This table has been generated by divtab-sh4.c. */
335
.balign 4
336
div_table_clz:
337
.byte 0
338
.byte 1
339
.byte 0
340
.byte -1
341
.byte -1
342
.byte -2
343
.byte -2
344
.byte -2
345
.byte -2
346
.byte -3
347
.byte -3
348
.byte -3
349
.byte -3
350
.byte -3
351
.byte -3
352
.byte -3
353
.byte -3
354
.byte -4
355
.byte -4
356
.byte -4
357
.byte -4
358
.byte -4
359
.byte -4
360
.byte -4
361
.byte -4
362
.byte -4
363
.byte -4
364
.byte -4
365
.byte -4
366
.byte -4
367
.byte -4
368
.byte -4
369
.byte -4
370
.byte -5
371
.byte -5
372
.byte -5
373
.byte -5
374
.byte -5
375
.byte -5
376
.byte -5
377
.byte -5
378
.byte -5
379
.byte -5
380
.byte -5
381
.byte -5
382
.byte -5
383
.byte -5
384
.byte -5
385
.byte -5
386
.byte -5
387
.byte -5
388
.byte -5
389
.byte -5
390
.byte -5
391
.byte -5
392
.byte -5
393
.byte -5
394
.byte -5
395
.byte -5
396
.byte -5
397
.byte -5
398
.byte -5
399
.byte -5
400
.byte -5
401
.byte -5
402
.byte -6
403
.byte -6
404
.byte -6
405
.byte -6
406
.byte -6
407
.byte -6
408
.byte -6
409
.byte -6
410
.byte -6
411
.byte -6
412
.byte -6
413
.byte -6
414
.byte -6
415
.byte -6
416
.byte -6
417
.byte -6
418
.byte -6
419
.byte -6
420
.byte -6
421
.byte -6
422
.byte -6
423
.byte -6
424
.byte -6
425
.byte -6
426
.byte -6
427
.byte -6
428
.byte -6
429
.byte -6
430
.byte -6
431
.byte -6
432
.byte -6
433
.byte -6
434
.byte -6
435
.byte -6
436
.byte -6
437
.byte -6
438
.byte -6
439
.byte -6
440
.byte -6
441
.byte -6
442
.byte -6
443
.byte -6
444
.byte -6
445
.byte -6
446
.byte -6
447
.byte -6
448
.byte -6
449
.byte -6
450
.byte -6
451
.byte -6
452
.byte -6
453
.byte -6
454
.byte -6
455
.byte -6
456
.byte -6
457
.byte -6
458
.byte -6
459
.byte -6
460
.byte -6
461
.byte -6
462
.byte -6
463
.byte -6
464
.byte -6
465
/* Lookup table translating positive divisor to index into table of
466
normalized inverse. N.B. the '0' entry is also the last entry of the
467
previous table, and causes an unaligned access for division by zero. */
468
div_table_ix:
469
.byte -6
470
.byte -128
471
.byte -128
472
.byte 0
473
.byte -128
474
.byte -64
475
.byte 0
476
.byte 64
477
.byte -128
478
.byte -96
479
.byte -64
480
.byte -32
481
.byte 0
482
.byte 32
483
.byte 64
484
.byte 96
485
.byte -128
486
.byte -112
487
.byte -96
488
.byte -80
489
.byte -64
490
.byte -48
491
.byte -32
492
.byte -16
493
.byte 0
494
.byte 16
495
.byte 32
496
.byte 48
497
.byte 64
498
.byte 80
499
.byte 96
500
.byte 112
501
.byte -128
502
.byte -120
503
.byte -112
504
.byte -104
505
.byte -96
506
.byte -88
507
.byte -80
508
.byte -72
509
.byte -64
510
.byte -56
511
.byte -48
512
.byte -40
513
.byte -32
514
.byte -24
515
.byte -16
516
.byte -8
517
.byte 0
518
.byte 8
519
.byte 16
520
.byte 24
521
.byte 32
522
.byte 40
523
.byte 48
524
.byte 56
525
.byte 64
526
.byte 72
527
.byte 80
528
.byte 88
529
.byte 96
530
.byte 104
531
.byte 112
532
.byte 120
533
.byte -128
534
.byte -124
535
.byte -120
536
.byte -116
537
.byte -112
538
.byte -108
539
.byte -104
540
.byte -100
541
.byte -96
542
.byte -92
543
.byte -88
544
.byte -84
545
.byte -80
546
.byte -76
547
.byte -72
548
.byte -68
549
.byte -64
550
.byte -60
551
.byte -56
552
.byte -52
553
.byte -48
554
.byte -44
555
.byte -40
556
.byte -36
557
.byte -32
558
.byte -28
559
.byte -24
560
.byte -20
561
.byte -16
562
.byte -12
563
.byte -8
564
.byte -4
565
.byte 0
566
.byte 4
567
.byte 8
568
.byte 12
569
.byte 16
570
.byte 20
571
.byte 24
572
.byte 28
573
.byte 32
574
.byte 36
575
.byte 40
576
.byte 44
577
.byte 48
578
.byte 52
579
.byte 56
580
.byte 60
581
.byte 64
582
.byte 68
583
.byte 72
584
.byte 76
585
.byte 80
586
.byte 84
587
.byte 88
588
.byte 92
589
.byte 96
590
.byte 100
591
.byte 104
592
.byte 108
593
.byte 112
594
.byte 116
595
.byte 120
596
.byte 124
597
.byte -128
598
/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
599
.balign 4
600
zero_l:
601
.long 0x0
602
.long 0xF81F81F9
603
.long 0xF07C1F08
604
.long 0xE9131AC0
605
.long 0xE1E1E1E2
606
.long 0xDAE6076C
607
.long 0xD41D41D5
608
.long 0xCD856891
609
.long 0xC71C71C8
610
.long 0xC0E07039
611
.long 0xBACF914D
612
.long 0xB4E81B4F
613
.long 0xAF286BCB
614
.long 0xA98EF607
615
.long 0xA41A41A5
616
.long 0x9EC8E952
617
.long 0x9999999A
618
.long 0x948B0FCE
619
.long 0x8F9C18FA
620
.long 0x8ACB90F7
621
.long 0x86186187
622
.long 0x81818182
623
.long 0x7D05F418
624
.long 0x78A4C818
625
.long 0x745D1746
626
.long 0x702E05C1
627
.long 0x6C16C16D
628
.long 0x68168169
629
.long 0x642C8591
630
.long 0x60581606
631
.long 0x5C9882BA
632
.long 0x58ED2309
633
div_table_inv:
634
.long 0x55555556
635
.long 0x51D07EAF
636
.long 0x4E5E0A73
637
.long 0x4AFD6A06
638
.long 0x47AE147B
639
.long 0x446F8657
640
.long 0x41414142
641
.long 0x3E22CBCF
642
.long 0x3B13B13C
643
.long 0x38138139
644
.long 0x3521CFB3
645
.long 0x323E34A3
646
.long 0x2F684BDB
647
.long 0x2C9FB4D9
648
.long 0x29E4129F
649
.long 0x27350B89
650
.long 0x24924925
651
.long 0x21FB7813
652
.long 0x1F7047DD
653
.long 0x1CF06ADB
654
.long 0x1A7B9612
655
.long 0x18118119
656
.long 0x15B1E5F8
657
.long 0x135C8114
658
.long 0x11111112
659
.long 0xECF56BF
660
.long 0xC9714FC
661
.long 0xA6810A7
662
.long 0x8421085
663
.long 0x624DD30
664
.long 0x4104105
665
.long 0x2040811
666
/* maximum error: 0.987342 scaled: 0.921875*/
667
668