Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/lib/libc/quad/qdivrem.c
39476 views
1
/*-
2
* SPDX-License-Identifier: BSD-3-Clause
3
*
4
* Copyright (c) 1992, 1993
5
* The Regents of the University of California. All rights reserved.
6
*
7
* This software was developed by the Computer Systems Engineering group
8
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
9
* contributed to Berkeley.
10
*
11
* Redistribution and use in source and binary forms, with or without
12
* modification, are permitted provided that the following conditions
13
* are met:
14
* 1. Redistributions of source code must retain the above copyright
15
* notice, this list of conditions and the following disclaimer.
16
* 2. Redistributions in binary form must reproduce the above copyright
17
* notice, this list of conditions and the following disclaimer in the
18
* documentation and/or other materials provided with the distribution.
19
* 3. Neither the name of the University nor the names of its contributors
20
* may be used to endorse or promote products derived from this software
21
* without specific prior written permission.
22
*
23
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33
* SUCH DAMAGE.
34
*/
35
36
/*
37
* Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),
38
* section 4.3.1, pp. 257--259.
39
*/
40
41
#include "quad.h"
42
43
#define B (1L << HALF_BITS) /* digit base */
44
45
/* Combine two `digits' to make a single two-digit number. */
46
#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
47
48
/* select a type for digits in base B: use unsigned short if they fit */
49
#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
50
typedef unsigned short digit;
51
#else
52
typedef u_long digit;
53
#endif
54
55
/*
56
* Shift p[0]..p[len] left `sh' bits, ignoring any bits that
57
* `fall out' the left (there never will be any such anyway).
58
* We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.
59
*/
60
static void
61
shl(digit *p, int len, int sh)
62
{
63
int i;
64
65
for (i = 0; i < len; i++)
66
p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
67
p[i] = LHALF(p[i] << sh);
68
}
69
70
/*
71
* __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.
72
*
73
* We do this in base 2-sup-HALF_BITS, so that all intermediate products
74
* fit within u_long. As a consequence, the maximum length dividend and
75
* divisor are 4 `digits' in this base (they are shorter if they have
76
* leading zeros).
77
*/
78
u_quad_t
79
__qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq)
80
{
81
union uu tmp;
82
digit *u, *v, *q;
83
digit v1, v2;
84
u_long qhat, rhat, t;
85
int m, n, d, j, i;
86
digit uspace[5], vspace[5], qspace[5];
87
88
/*
89
* Take care of special cases: divide by zero, and u < v.
90
*/
91
if (__predict_false(vq == 0)) {
92
/* divide by zero. */
93
static volatile const unsigned int zero = 0;
94
95
tmp.ul[H] = tmp.ul[L] = 1 / zero;
96
if (arq)
97
*arq = uq;
98
return (tmp.q);
99
}
100
if (uq < vq) {
101
if (arq)
102
*arq = uq;
103
return (0);
104
}
105
u = &uspace[0];
106
v = &vspace[0];
107
q = &qspace[0];
108
109
/*
110
* Break dividend and divisor into digits in base B, then
111
* count leading zeros to determine m and n. When done, we
112
* will have:
113
* u = (u[1]u[2]...u[m+n]) sub B
114
* v = (v[1]v[2]...v[n]) sub B
115
* v[1] != 0
116
* 1 < n <= 4 (if n = 1, we use a different division algorithm)
117
* m >= 0 (otherwise u < v, which we already checked)
118
* m + n = 4
119
* and thus
120
* m = 4 - n <= 2
121
*/
122
tmp.uq = uq;
123
u[0] = 0;
124
u[1] = HHALF(tmp.ul[H]);
125
u[2] = LHALF(tmp.ul[H]);
126
u[3] = HHALF(tmp.ul[L]);
127
u[4] = LHALF(tmp.ul[L]);
128
tmp.uq = vq;
129
v[1] = HHALF(tmp.ul[H]);
130
v[2] = LHALF(tmp.ul[H]);
131
v[3] = HHALF(tmp.ul[L]);
132
v[4] = LHALF(tmp.ul[L]);
133
for (n = 4; v[1] == 0; v++) {
134
if (--n == 1) {
135
u_long rbj; /* r*B+u[j] (not root boy jim) */
136
digit q1, q2, q3, q4;
137
138
/*
139
* Change of plan, per exercise 16.
140
* r = 0;
141
* for j = 1..4:
142
* q[j] = floor((r*B + u[j]) / v),
143
* r = (r*B + u[j]) % v;
144
* We unroll this completely here.
145
*/
146
t = v[2]; /* nonzero, by definition */
147
q1 = u[1] / t;
148
rbj = COMBINE(u[1] % t, u[2]);
149
q2 = rbj / t;
150
rbj = COMBINE(rbj % t, u[3]);
151
q3 = rbj / t;
152
rbj = COMBINE(rbj % t, u[4]);
153
q4 = rbj / t;
154
if (arq)
155
*arq = rbj % t;
156
tmp.ul[H] = COMBINE(q1, q2);
157
tmp.ul[L] = COMBINE(q3, q4);
158
return (tmp.q);
159
}
160
}
161
162
/*
163
* By adjusting q once we determine m, we can guarantee that
164
* there is a complete four-digit quotient at &qspace[1] when
165
* we finally stop.
166
*/
167
for (m = 4 - n; u[1] == 0; u++)
168
m--;
169
for (i = 4 - m; --i >= 0;)
170
q[i] = 0;
171
q += 4 - m;
172
173
/*
174
* Here we run Program D, translated from MIX to C and acquiring
175
* a few minor changes.
176
*
177
* D1: choose multiplier 1 << d to ensure v[1] >= B/2.
178
*/
179
d = 0;
180
for (t = v[1]; t < B / 2; t <<= 1)
181
d++;
182
if (d > 0) {
183
shl(&u[0], m + n, d); /* u <<= d */
184
shl(&v[1], n - 1, d); /* v <<= d */
185
}
186
/*
187
* D2: j = 0.
188
*/
189
j = 0;
190
v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
191
v2 = v[2]; /* for D3 */
192
do {
193
digit uj0, uj1, uj2;
194
195
/*
196
* D3: Calculate qhat (\^q, in TeX notation).
197
* Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
198
* let rhat = (u[j]*B + u[j+1]) mod v[1].
199
* While rhat < B and v[2]*qhat > rhat*B+u[j+2],
200
* decrement qhat and increase rhat correspondingly.
201
* Note that if rhat >= B, v[2]*qhat < rhat*B.
202
*/
203
uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
204
uj1 = u[j + 1]; /* for D3 only */
205
uj2 = u[j + 2]; /* for D3 only */
206
if (uj0 == v1) {
207
qhat = B;
208
rhat = uj1;
209
goto qhat_too_big;
210
} else {
211
u_long n = COMBINE(uj0, uj1);
212
qhat = n / v1;
213
rhat = n % v1;
214
}
215
while (v2 * qhat > COMBINE(rhat, uj2)) {
216
qhat_too_big:
217
qhat--;
218
if ((rhat += v1) >= B)
219
break;
220
}
221
/*
222
* D4: Multiply and subtract.
223
* The variable `t' holds any borrows across the loop.
224
* We split this up so that we do not require v[0] = 0,
225
* and to eliminate a final special case.
226
*/
227
for (t = 0, i = n; i > 0; i--) {
228
t = u[i + j] - v[i] * qhat - t;
229
u[i + j] = LHALF(t);
230
t = (B - HHALF(t)) & (B - 1);
231
}
232
t = u[j] - t;
233
u[j] = LHALF(t);
234
/*
235
* D5: test remainder.
236
* There is a borrow if and only if HHALF(t) is nonzero;
237
* in that (rare) case, qhat was too large (by exactly 1).
238
* Fix it by adding v[1..n] to u[j..j+n].
239
*/
240
if (HHALF(t)) {
241
qhat--;
242
for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
243
t += u[i + j] + v[i];
244
u[i + j] = LHALF(t);
245
t = HHALF(t);
246
}
247
u[j] = LHALF(u[j] + t);
248
}
249
q[j] = qhat;
250
} while (++j <= m); /* D7: loop on j. */
251
252
/*
253
* If caller wants the remainder, we have to calculate it as
254
* u[m..m+n] >> d (this is at most n digits and thus fits in
255
* u[m+1..m+n], but we may need more source digits).
256
*/
257
if (arq) {
258
if (d) {
259
for (i = m + n; i > m; --i)
260
u[i] = (u[i] >> d) |
261
LHALF(u[i - 1] << (HALF_BITS - d));
262
u[i] = 0;
263
}
264
tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
265
tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
266
*arq = tmp.q;
267
}
268
269
tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
270
tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
271
return (tmp.q);
272
}
273
274