Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/sh/lib/udivsi3_i4i.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
2
3
Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
4
2004, 2005, 2006
5
Free Software Foundation, Inc.
6
*/
7
8
!! libgcc routines for the Renesas / SuperH SH CPUs.
9
!! Contributed by Steve Chamberlain.
10
!! sac@cygnus.com
11
12
!! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines
13
!! recoded in assembly by Toshiyasu Morita
14
!! tm@netcom.com
15
16
/* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and
17
ELF local label prefixes by J"orn Rennecke
18
[email protected] */
19
20
/* This code used shld, thus is not suitable for SH1 / SH2. */
21
22
/* Signed / unsigned division without use of FPU, optimized for SH4.
23
Uses a lookup table for divisors in the range -128 .. +128, and
24
div1 with case distinction for larger divisors in three more ranges.
25
The code is lumped together with the table to allow the use of mova. */
26
#ifdef CONFIG_CPU_LITTLE_ENDIAN
27
#define L_LSB 0
28
#define L_LSWMSB 1
29
#define L_MSWLSB 2
30
#else
31
#define L_LSB 3
32
#define L_LSWMSB 2
33
#define L_MSWLSB 1
34
#endif
35
36
.balign 4
37
.global __udivsi3_i4i
38
.global __udivsi3_i4
39
.set __udivsi3_i4, __udivsi3_i4i
40
.type __udivsi3_i4i, @function
41
__udivsi3_i4i:
42
mov.w c128_w, r1
43
div0u
44
mov r4,r0
45
shlr8 r0
46
cmp/hi r1,r5
47
extu.w r5,r1
48
bf udiv_le128
49
cmp/eq r5,r1
50
bf udiv_ge64k
51
shlr r0
52
mov r5,r1
53
shll16 r5
54
mov.l r4,@-r15
55
div1 r5,r0
56
mov.l r1,@-r15
57
div1 r5,r0
58
div1 r5,r0
59
bra udiv_25
60
div1 r5,r0
61
62
div_le128:
63
mova div_table_ix,r0
64
bra div_le128_2
65
mov.b @(r0,r5),r1
66
udiv_le128:
67
mov.l r4,@-r15
68
mova div_table_ix,r0
69
mov.b @(r0,r5),r1
70
mov.l r5,@-r15
71
div_le128_2:
72
mova div_table_inv,r0
73
mov.l @(r0,r1),r1
74
mov r5,r0
75
tst #0xfe,r0
76
mova div_table_clz,r0
77
dmulu.l r1,r4
78
mov.b @(r0,r5),r1
79
bt/s div_by_1
80
mov r4,r0
81
mov.l @r15+,r5
82
sts mach,r0
83
/* clrt */
84
addc r4,r0
85
mov.l @r15+,r4
86
rotcr r0
87
rts
88
shld r1,r0
89
90
div_by_1_neg:
91
neg r4,r0
92
div_by_1:
93
mov.l @r15+,r5
94
rts
95
mov.l @r15+,r4
96
97
div_ge64k:
98
bt/s div_r8
99
div0u
100
shll8 r5
101
bra div_ge64k_2
102
div1 r5,r0
103
udiv_ge64k:
104
cmp/hi r0,r5
105
mov r5,r1
106
bt udiv_r8
107
shll8 r5
108
mov.l r4,@-r15
109
div1 r5,r0
110
mov.l r1,@-r15
111
div_ge64k_2:
112
div1 r5,r0
113
mov.l zero_l,r1
114
.rept 4
115
div1 r5,r0
116
.endr
117
mov.l r1,@-r15
118
div1 r5,r0
119
mov.w m256_w,r1
120
div1 r5,r0
121
mov.b r0,@(L_LSWMSB,r15)
122
xor r4,r0
123
and r1,r0
124
bra div_ge64k_end
125
xor r4,r0
126
127
div_r8:
128
shll16 r4
129
bra div_r8_2
130
shll8 r4
131
udiv_r8:
132
mov.l r4,@-r15
133
shll16 r4
134
clrt
135
shll8 r4
136
mov.l r5,@-r15
137
div_r8_2:
138
rotcl r4
139
mov r0,r1
140
div1 r5,r1
141
mov r4,r0
142
rotcl r0
143
mov r5,r4
144
div1 r5,r1
145
.rept 5
146
rotcl r0; div1 r5,r1
147
.endr
148
rotcl r0
149
mov.l @r15+,r5
150
div1 r4,r1
151
mov.l @r15+,r4
152
rts
153
rotcl r0
154
155
.global __sdivsi3_i4i
156
.global __sdivsi3_i4
157
.global __sdivsi3
158
.set __sdivsi3_i4, __sdivsi3_i4i
159
.set __sdivsi3, __sdivsi3_i4i
160
.type __sdivsi3_i4i, @function
161
/* This is link-compatible with a __sdivsi3 call,
162
but we effectively clobber only r1. */
163
__sdivsi3_i4i:
164
mov.l r4,@-r15
165
cmp/pz r5
166
mov.w c128_w, r1
167
bt/s pos_divisor
168
cmp/pz r4
169
mov.l r5,@-r15
170
neg r5,r5
171
bt/s neg_result
172
cmp/hi r1,r5
173
neg r4,r4
174
pos_result:
175
extu.w r5,r0
176
bf div_le128
177
cmp/eq r5,r0
178
mov r4,r0
179
shlr8 r0
180
bf/s div_ge64k
181
cmp/hi r0,r5
182
div0u
183
shll16 r5
184
div1 r5,r0
185
div1 r5,r0
186
div1 r5,r0
187
udiv_25:
188
mov.l zero_l,r1
189
div1 r5,r0
190
div1 r5,r0
191
mov.l r1,@-r15
192
.rept 3
193
div1 r5,r0
194
.endr
195
mov.b r0,@(L_MSWLSB,r15)
196
xtrct r4,r0
197
swap.w r0,r0
198
.rept 8
199
div1 r5,r0
200
.endr
201
mov.b r0,@(L_LSWMSB,r15)
202
div_ge64k_end:
203
.rept 8
204
div1 r5,r0
205
.endr
206
mov.l @r15+,r4 ! zero-extension and swap using LS unit.
207
extu.b r0,r0
208
mov.l @r15+,r5
209
or r4,r0
210
mov.l @r15+,r4
211
rts
212
rotcl r0
213
214
div_le128_neg:
215
tst #0xfe,r0
216
mova div_table_ix,r0
217
mov.b @(r0,r5),r1
218
mova div_table_inv,r0
219
bt/s div_by_1_neg
220
mov.l @(r0,r1),r1
221
mova div_table_clz,r0
222
dmulu.l r1,r4
223
mov.b @(r0,r5),r1
224
mov.l @r15+,r5
225
sts mach,r0
226
/* clrt */
227
addc r4,r0
228
mov.l @r15+,r4
229
rotcr r0
230
shld r1,r0
231
rts
232
neg r0,r0
233
234
pos_divisor:
235
mov.l r5,@-r15
236
bt/s pos_result
237
cmp/hi r1,r5
238
neg r4,r4
239
neg_result:
240
extu.w r5,r0
241
bf div_le128_neg
242
cmp/eq r5,r0
243
mov r4,r0
244
shlr8 r0
245
bf/s div_ge64k_neg
246
cmp/hi r0,r5
247
div0u
248
mov.l zero_l,r1
249
shll16 r5
250
div1 r5,r0
251
mov.l r1,@-r15
252
.rept 7
253
div1 r5,r0
254
.endr
255
mov.b r0,@(L_MSWLSB,r15)
256
xtrct r4,r0
257
swap.w r0,r0
258
.rept 8
259
div1 r5,r0
260
.endr
261
mov.b r0,@(L_LSWMSB,r15)
262
div_ge64k_neg_end:
263
.rept 8
264
div1 r5,r0
265
.endr
266
mov.l @r15+,r4 ! zero-extension and swap using LS unit.
267
extu.b r0,r1
268
mov.l @r15+,r5
269
or r4,r1
270
div_r8_neg_end:
271
mov.l @r15+,r4
272
rotcl r1
273
rts
274
neg r1,r0
275
276
div_ge64k_neg:
277
bt/s div_r8_neg
278
div0u
279
shll8 r5
280
mov.l zero_l,r1
281
.rept 6
282
div1 r5,r0
283
.endr
284
mov.l r1,@-r15
285
div1 r5,r0
286
mov.w m256_w,r1
287
div1 r5,r0
288
mov.b r0,@(L_LSWMSB,r15)
289
xor r4,r0
290
and r1,r0
291
bra div_ge64k_neg_end
292
xor r4,r0
293
294
c128_w:
295
.word 128
296
297
div_r8_neg:
298
clrt
299
shll16 r4
300
mov r4,r1
301
shll8 r1
302
mov r5,r4
303
.rept 7
304
rotcl r1; div1 r5,r0
305
.endr
306
mov.l @r15+,r5
307
rotcl r1
308
bra div_r8_neg_end
309
div1 r4,r0
310
311
m256_w:
312
.word 0xff00
313
/* This table has been generated by divtab-sh4.c. */
314
.balign 4
315
div_table_clz:
316
.byte 0
317
.byte 1
318
.byte 0
319
.byte -1
320
.byte -1
321
.byte -2
322
.byte -2
323
.byte -2
324
.byte -2
325
.byte -3
326
.byte -3
327
.byte -3
328
.byte -3
329
.byte -3
330
.byte -3
331
.byte -3
332
.byte -3
333
.byte -4
334
.byte -4
335
.byte -4
336
.byte -4
337
.byte -4
338
.byte -4
339
.byte -4
340
.byte -4
341
.byte -4
342
.byte -4
343
.byte -4
344
.byte -4
345
.byte -4
346
.byte -4
347
.byte -4
348
.byte -4
349
.byte -5
350
.byte -5
351
.byte -5
352
.byte -5
353
.byte -5
354
.byte -5
355
.byte -5
356
.byte -5
357
.byte -5
358
.byte -5
359
.byte -5
360
.byte -5
361
.byte -5
362
.byte -5
363
.byte -5
364
.byte -5
365
.byte -5
366
.byte -5
367
.byte -5
368
.byte -5
369
.byte -5
370
.byte -5
371
.byte -5
372
.byte -5
373
.byte -5
374
.byte -5
375
.byte -5
376
.byte -5
377
.byte -5
378
.byte -5
379
.byte -5
380
.byte -5
381
.byte -6
382
.byte -6
383
.byte -6
384
.byte -6
385
.byte -6
386
.byte -6
387
.byte -6
388
.byte -6
389
.byte -6
390
.byte -6
391
.byte -6
392
.byte -6
393
.byte -6
394
.byte -6
395
.byte -6
396
.byte -6
397
.byte -6
398
.byte -6
399
.byte -6
400
.byte -6
401
.byte -6
402
.byte -6
403
.byte -6
404
.byte -6
405
.byte -6
406
.byte -6
407
.byte -6
408
.byte -6
409
.byte -6
410
.byte -6
411
.byte -6
412
.byte -6
413
.byte -6
414
.byte -6
415
.byte -6
416
.byte -6
417
.byte -6
418
.byte -6
419
.byte -6
420
.byte -6
421
.byte -6
422
.byte -6
423
.byte -6
424
.byte -6
425
.byte -6
426
.byte -6
427
.byte -6
428
.byte -6
429
.byte -6
430
.byte -6
431
.byte -6
432
.byte -6
433
.byte -6
434
.byte -6
435
.byte -6
436
.byte -6
437
.byte -6
438
.byte -6
439
.byte -6
440
.byte -6
441
.byte -6
442
.byte -6
443
.byte -6
444
/* Lookup table translating positive divisor to index into table of
445
normalized inverse. N.B. the '0' entry is also the last entry of the
446
previous table, and causes an unaligned access for division by zero. */
447
div_table_ix:
448
.byte -6
449
.byte -128
450
.byte -128
451
.byte 0
452
.byte -128
453
.byte -64
454
.byte 0
455
.byte 64
456
.byte -128
457
.byte -96
458
.byte -64
459
.byte -32
460
.byte 0
461
.byte 32
462
.byte 64
463
.byte 96
464
.byte -128
465
.byte -112
466
.byte -96
467
.byte -80
468
.byte -64
469
.byte -48
470
.byte -32
471
.byte -16
472
.byte 0
473
.byte 16
474
.byte 32
475
.byte 48
476
.byte 64
477
.byte 80
478
.byte 96
479
.byte 112
480
.byte -128
481
.byte -120
482
.byte -112
483
.byte -104
484
.byte -96
485
.byte -88
486
.byte -80
487
.byte -72
488
.byte -64
489
.byte -56
490
.byte -48
491
.byte -40
492
.byte -32
493
.byte -24
494
.byte -16
495
.byte -8
496
.byte 0
497
.byte 8
498
.byte 16
499
.byte 24
500
.byte 32
501
.byte 40
502
.byte 48
503
.byte 56
504
.byte 64
505
.byte 72
506
.byte 80
507
.byte 88
508
.byte 96
509
.byte 104
510
.byte 112
511
.byte 120
512
.byte -128
513
.byte -124
514
.byte -120
515
.byte -116
516
.byte -112
517
.byte -108
518
.byte -104
519
.byte -100
520
.byte -96
521
.byte -92
522
.byte -88
523
.byte -84
524
.byte -80
525
.byte -76
526
.byte -72
527
.byte -68
528
.byte -64
529
.byte -60
530
.byte -56
531
.byte -52
532
.byte -48
533
.byte -44
534
.byte -40
535
.byte -36
536
.byte -32
537
.byte -28
538
.byte -24
539
.byte -20
540
.byte -16
541
.byte -12
542
.byte -8
543
.byte -4
544
.byte 0
545
.byte 4
546
.byte 8
547
.byte 12
548
.byte 16
549
.byte 20
550
.byte 24
551
.byte 28
552
.byte 32
553
.byte 36
554
.byte 40
555
.byte 44
556
.byte 48
557
.byte 52
558
.byte 56
559
.byte 60
560
.byte 64
561
.byte 68
562
.byte 72
563
.byte 76
564
.byte 80
565
.byte 84
566
.byte 88
567
.byte 92
568
.byte 96
569
.byte 100
570
.byte 104
571
.byte 108
572
.byte 112
573
.byte 116
574
.byte 120
575
.byte 124
576
.byte -128
577
/* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */
578
.balign 4
579
zero_l:
580
.long 0x0
581
.long 0xF81F81F9
582
.long 0xF07C1F08
583
.long 0xE9131AC0
584
.long 0xE1E1E1E2
585
.long 0xDAE6076C
586
.long 0xD41D41D5
587
.long 0xCD856891
588
.long 0xC71C71C8
589
.long 0xC0E07039
590
.long 0xBACF914D
591
.long 0xB4E81B4F
592
.long 0xAF286BCB
593
.long 0xA98EF607
594
.long 0xA41A41A5
595
.long 0x9EC8E952
596
.long 0x9999999A
597
.long 0x948B0FCE
598
.long 0x8F9C18FA
599
.long 0x8ACB90F7
600
.long 0x86186187
601
.long 0x81818182
602
.long 0x7D05F418
603
.long 0x78A4C818
604
.long 0x745D1746
605
.long 0x702E05C1
606
.long 0x6C16C16D
607
.long 0x68168169
608
.long 0x642C8591
609
.long 0x60581606
610
.long 0x5C9882BA
611
.long 0x58ED2309
612
div_table_inv:
613
.long 0x55555556
614
.long 0x51D07EAF
615
.long 0x4E5E0A73
616
.long 0x4AFD6A06
617
.long 0x47AE147B
618
.long 0x446F8657
619
.long 0x41414142
620
.long 0x3E22CBCF
621
.long 0x3B13B13C
622
.long 0x38138139
623
.long 0x3521CFB3
624
.long 0x323E34A3
625
.long 0x2F684BDB
626
.long 0x2C9FB4D9
627
.long 0x29E4129F
628
.long 0x27350B89
629
.long 0x24924925
630
.long 0x21FB7813
631
.long 0x1F7047DD
632
.long 0x1CF06ADB
633
.long 0x1A7B9612
634
.long 0x18118119
635
.long 0x15B1E5F8
636
.long 0x135C8114
637
.long 0x11111112
638
.long 0xECF56BF
639
.long 0xC9714FC
640
.long 0xA6810A7
641
.long 0x8421085
642
.long 0x624DD30
643
.long 0x4104105
644
.long 0x2040811
645
/* maximum error: 0.987342 scaled: 0.921875*/
646
647