Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/libkern/qdivrem.c
34815 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1992, 1993
5
* The Regents of the University of California. All rights reserved.
6
*
7
* This software was developed by the Computer Systems Engineering group
8
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
9
* contributed to Berkeley.
10
*
11
* Redistribution and use in source and binary forms, with or without
12
* modification, are permitted provided that the following conditions
13
* are met:
14
* 1. Redistributions of source code must retain the above copyright
15
* notice, this list of conditions and the following disclaimer.
16
* 2. Redistributions in binary form must reproduce the above copyright
17
* notice, this list of conditions and the following disclaimer in the
18
* documentation and/or other materials provided with the distribution.
19
* 3. Neither the name of the University nor the names of its contributors
20
* may be used to endorse or promote products derived from this software
21
* without specific prior written permission.
22
*
23
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
* SUCH DAMAGE.
34
*/
35
36
#include <sys/cdefs.h>
37
/*
38
* Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
39
* section 4.3.1, pp. 257--259.
40
*/
41
42
#include <libkern/quad.h>
43
44
#define B (1 << HALF_BITS) /* digit base */
45
46
/* Combine two `digits' to make a single two-digit number. */
47
#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
48
49
/* select a type for digits in base B: use unsigned short if they fit */
50
#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
51
typedef unsigned short digit;
52
#else
53
typedef u_long digit;
54
#endif
55
56
/*
57
* Shift p[0]..p[len] left `sh' bits, ignoring any bits that
58
* `fall out' the left (there never will be any such anyway).
59
* We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
60
*/
61
static void
62
__shl(digit *p, int len, int sh)
63
{
64
int i;
65
66
for (i = 0; i < len; i++)
67
p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
68
p[i] = LHALF(p[i] << sh);
69
}
70
71
/*
72
* __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
73
*
74
* We do this in base 2-sup-HALF_BITS, so that all intermediate products
75
* fit within u_long. As a consequence, the maximum length dividend and
76
* divisor are 4 `digits' in this base (they are shorter if they have
77
* leading zeros).
78
*/
79
u_quad_t
80
__qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq)
81
{
82
union uu tmp;
83
digit *u, *v, *q;
84
digit v1, v2;
85
u_long qhat, rhat, t;
86
int m, n, d, j, i;
87
digit uspace[5], vspace[5], qspace[5];
88
89
/*
90
* Take care of special cases: divide by zero, and u < v.
91
*/
92
if (__predict_false(vq == 0)) {
93
/* divide by zero. */
94
static volatile const unsigned int zero = 0;
95
96
tmp.ul[H] = tmp.ul[L] = 1 / zero;
97
if (arq)
98
*arq = uq;
99
return (tmp.q);
100
}
101
if (uq < vq) {
102
if (arq)
103
*arq = uq;
104
return (0);
105
}
106
u = &uspace[0];
107
v = &vspace[0];
108
q = &qspace[0];
109
110
/*
111
* Break dividend and divisor into digits in base B, then
112
* count leading zeros to determine m and n. When done, we
113
* will have:
114
* u = (u[1]u[2]...u[m+n]) sub B
115
* v = (v[1]v[2]...v[n]) sub B
116
* v[1] != 0
117
* 1 < n <= 4 (if n = 1, we use a different division algorithm)
118
* m >= 0 (otherwise u < v, which we already checked)
119
* m + n = 4
120
* and thus
121
* m = 4 - n <= 2
122
*/
123
tmp.uq = uq;
124
u[0] = 0;
125
u[1] = HHALF(tmp.ul[H]);
126
u[2] = LHALF(tmp.ul[H]);
127
u[3] = HHALF(tmp.ul[L]);
128
u[4] = LHALF(tmp.ul[L]);
129
tmp.uq = vq;
130
v[1] = HHALF(tmp.ul[H]);
131
v[2] = LHALF(tmp.ul[H]);
132
v[3] = HHALF(tmp.ul[L]);
133
v[4] = LHALF(tmp.ul[L]);
134
for (n = 4; v[1] == 0; v++) {
135
if (--n == 1) {
136
u_long rbj; /* r*B+u[j] (not root boy jim) */
137
digit q1, q2, q3, q4;
138
139
/*
140
* Change of plan, per exercise 16.
141
* r = 0;
142
* for j = 1..4:
143
* q[j] = floor((r*B + u[j]) / v),
144
* r = (r*B + u[j]) % v;
145
* We unroll this completely here.
146
*/
147
t = v[2]; /* nonzero, by definition */
148
q1 = u[1] / t;
149
rbj = COMBINE(u[1] % t, u[2]);
150
q2 = rbj / t;
151
rbj = COMBINE(rbj % t, u[3]);
152
q3 = rbj / t;
153
rbj = COMBINE(rbj % t, u[4]);
154
q4 = rbj / t;
155
if (arq)
156
*arq = rbj % t;
157
tmp.ul[H] = COMBINE(q1, q2);
158
tmp.ul[L] = COMBINE(q3, q4);
159
return (tmp.q);
160
}
161
}
162
163
/*
164
* By adjusting q once we determine m, we can guarantee that
165
* there is a complete four-digit quotient at &qspace[1] when
166
* we finally stop.
167
*/
168
for (m = 4 - n; u[1] == 0; u++)
169
m--;
170
for (i = 4 - m; --i >= 0;)
171
q[i] = 0;
172
q += 4 - m;
173
174
/*
175
* Here we run Program D, translated from MIX to C and acquiring
176
* a few minor changes.
177
*
178
* D1: choose multiplier 1 << d to ensure v[1] >= B/2.
179
*/
180
d = 0;
181
for (t = v[1]; t < B / 2; t <<= 1)
182
d++;
183
if (d > 0) {
184
__shl(&u[0], m + n, d); /* u <<= d */
185
__shl(&v[1], n - 1, d); /* v <<= d */
186
}
187
/*
188
* D2: j = 0.
189
*/
190
j = 0;
191
v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
192
v2 = v[2]; /* for D3 */
193
do {
194
digit uj0, uj1, uj2;
195
196
/*
197
* D3: Calculate qhat (\^q, in TeX notation).
198
* Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
199
* let rhat = (u[j]*B + u[j+1]) mod v[1].
200
* While rhat < B and v[2]*qhat > rhat*B+u[j+2],
201
* decrement qhat and increase rhat correspondingly.
202
* Note that if rhat >= B, v[2]*qhat < rhat*B.
203
*/
204
uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
205
uj1 = u[j + 1]; /* for D3 only */
206
uj2 = u[j + 2]; /* for D3 only */
207
if (uj0 == v1) {
208
qhat = B;
209
rhat = uj1;
210
goto qhat_too_big;
211
} else {
212
u_long nn = COMBINE(uj0, uj1);
213
qhat = nn / v1;
214
rhat = nn % v1;
215
}
216
while (v2 * qhat > COMBINE(rhat, uj2)) {
217
qhat_too_big:
218
qhat--;
219
if ((rhat += v1) >= B)
220
break;
221
}
222
/*
223
* D4: Multiply and subtract.
224
* The variable `t' holds any borrows across the loop.
225
* We split this up so that we do not require v[0] = 0,
226
* and to eliminate a final special case.
227
*/
228
for (t = 0, i = n; i > 0; i--) {
229
t = u[i + j] - v[i] * qhat - t;
230
u[i + j] = LHALF(t);
231
t = (B - HHALF(t)) & (B - 1);
232
}
233
t = u[j] - t;
234
u[j] = LHALF(t);
235
/*
236
* D5: test remainder.
237
* There is a borrow if and only if HHALF(t) is nonzero;
238
* in that (rare) case, qhat was too large (by exactly 1).
239
* Fix it by adding v[1..n] to u[j..j+n].
240
*/
241
if (HHALF(t)) {
242
qhat--;
243
for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
244
t += u[i + j] + v[i];
245
u[i + j] = LHALF(t);
246
t = HHALF(t);
247
}
248
u[j] = LHALF(u[j] + t);
249
}
250
q[j] = qhat;
251
} while (++j <= m); /* D7: loop on j. */
252
253
/*
254
* If caller wants the remainder, we have to calculate it as
255
* u[m..m+n] >> d (this is at most n digits and thus fits in
256
* u[m+1..m+n], but we may need more source digits).
257
*/
258
if (arq) {
259
if (d) {
260
for (i = m + n; i > m; --i)
261
u[i] = (u[i] >> d) |
262
LHALF(u[i - 1] << (HALF_BITS - d));
263
u[i] = 0;
264
}
265
tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
266
tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
267
*arq = tmp.q;
268
}
269
270
tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
271
tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
272
return (tmp.q);
273
}
274
275