Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/sh/lib/udivsi3_i4i-Os.S
26424 views
1
/* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0
2
*
3
* Copyright (C) 2006 Free Software Foundation, Inc.
4
*/
5
6
/* Moderately Space-optimized libgcc routines for the Renesas SH /
7
STMicroelectronics ST40 CPUs.
8
Contributed by J"orn Rennecke [email protected]. */
9
10
/* Size: 186 bytes jointly for udivsi3_i4i and sdivsi3_i4i
11
sh4-200 run times:
12
udiv small divisor: 55 cycles
13
udiv large divisor: 52 cycles
14
sdiv small divisor, positive result: 59 cycles
15
sdiv large divisor, positive result: 56 cycles
16
sdiv small divisor, negative result: 65 cycles (*)
17
sdiv large divisor, negative result: 62 cycles (*)
18
(*): r2 is restored in the rts delay slot and has a lingering latency
19
of two more cycles. */
20
.balign 4
21
.global __udivsi3_i4i
22
.global __udivsi3_i4
23
.set __udivsi3_i4, __udivsi3_i4i
24
.type __udivsi3_i4i, @function
25
.type __sdivsi3_i4i, @function
26
__udivsi3_i4i:
27
sts pr,r1
28
mov.l r4,@-r15
29
extu.w r5,r0
30
cmp/eq r5,r0
31
swap.w r4,r0
32
shlr16 r4
33
bf/s large_divisor
34
div0u
35
mov.l r5,@-r15
36
shll16 r5
37
sdiv_small_divisor:
38
div1 r5,r4
39
bsr div6
40
div1 r5,r4
41
div1 r5,r4
42
bsr div6
43
div1 r5,r4
44
xtrct r4,r0
45
xtrct r0,r4
46
bsr div7
47
swap.w r4,r4
48
div1 r5,r4
49
bsr div7
50
div1 r5,r4
51
xtrct r4,r0
52
mov.l @r15+,r5
53
swap.w r0,r0
54
mov.l @r15+,r4
55
jmp @r1
56
rotcl r0
57
div7:
58
div1 r5,r4
59
div6:
60
div1 r5,r4; div1 r5,r4; div1 r5,r4
61
div1 r5,r4; div1 r5,r4; rts; div1 r5,r4
62
63
divx3:
64
rotcl r0
65
div1 r5,r4
66
rotcl r0
67
div1 r5,r4
68
rotcl r0
69
rts
70
div1 r5,r4
71
72
large_divisor:
73
mov.l r5,@-r15
74
sdiv_large_divisor:
75
xor r4,r0
76
.rept 4
77
rotcl r0
78
bsr divx3
79
div1 r5,r4
80
.endr
81
mov.l @r15+,r5
82
mov.l @r15+,r4
83
jmp @r1
84
rotcl r0
85
86
.global __sdivsi3_i4i
87
.global __sdivsi3_i4
88
.global __sdivsi3
89
.set __sdivsi3_i4, __sdivsi3_i4i
90
.set __sdivsi3, __sdivsi3_i4i
91
__sdivsi3_i4i:
92
mov.l r4,@-r15
93
cmp/pz r5
94
mov.l r5,@-r15
95
bt/s pos_divisor
96
cmp/pz r4
97
neg r5,r5
98
extu.w r5,r0
99
bt/s neg_result
100
cmp/eq r5,r0
101
neg r4,r4
102
pos_result:
103
swap.w r4,r0
104
bra sdiv_check_divisor
105
sts pr,r1
106
pos_divisor:
107
extu.w r5,r0
108
bt/s pos_result
109
cmp/eq r5,r0
110
neg r4,r4
111
neg_result:
112
mova negate_result,r0
113
;
114
mov r0,r1
115
swap.w r4,r0
116
lds r2,macl
117
sts pr,r2
118
sdiv_check_divisor:
119
shlr16 r4
120
bf/s sdiv_large_divisor
121
div0u
122
bra sdiv_small_divisor
123
shll16 r5
124
.balign 4
125
negate_result:
126
neg r0,r0
127
jmp @r2
128
sts macl,r2
129
130