/*-1* SPDX-License-Identifier: BSD-3-Clause2*3* Copyright (c) 1992, 19934* The Regents of the University of California. All rights reserved.5*6* This software was developed by the Computer Systems Engineering group7* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and8* contributed to Berkeley.9*10* Redistribution and use in source and binary forms, with or without11* modification, are permitted provided that the following conditions12* are met:13* 1. Redistributions of source code must retain the above copyright14* notice, this list of conditions and the following disclaimer.15* 2. Redistributions in binary form must reproduce the above copyright16* notice, this list of conditions and the following disclaimer in the17* documentation and/or other materials provided with the distribution.18* 3. Neither the name of the University nor the names of its contributors19* may be used to endorse or promote products derived from this software20* without specific prior written permission.21*22* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND23* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE24* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE25* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE26* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL27* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS28* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)29* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT30* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY31* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF32* SUCH DAMAGE.33*/3435/*36* Multiprecision divide. This algorithm is from Knuth vol. 2 (2nd ed),37* section 4.3.1, pp. 257--259.38*/3940#include "quad.h"4142#define B (1L << HALF_BITS) /* digit base */4344/* Combine two `digits' to make a single two-digit number. */45#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))4647/* select a type for digits in base B: use unsigned short if they fit */48#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff49typedef unsigned short digit;50#else51typedef u_long digit;52#endif5354/*55* Shift p[0]..p[len] left `sh' bits, ignoring any bits that56* `fall out' the left (there never will be any such anyway).57* We may assume len >= 0. NOTE THAT THIS WRITES len+1 DIGITS.58*/59static void60shl(digit *p, int len, int sh)61{62int i;6364for (i = 0; i < len; i++)65p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));66p[i] = LHALF(p[i] << sh);67}6869/*70* __qdivrem(u, v, rem) returns u/v and, optionally, sets *rem to u%v.71*72* We do this in base 2-sup-HALF_BITS, so that all intermediate products73* fit within u_long. As a consequence, the maximum length dividend and74* divisor are 4 `digits' in this base (they are shorter if they have75* leading zeros).76*/77u_quad_t78__qdivrem(u_quad_t uq, u_quad_t vq, u_quad_t *arq)79{80union uu tmp;81digit *u, *v, *q;82digit v1, v2;83u_long qhat, rhat, t;84int m, n, d, j, i;85digit uspace[5], vspace[5], qspace[5];8687/*88* Take care of special cases: divide by zero, and u < v.89*/90if (__predict_false(vq == 0)) {91/* divide by zero. */92static volatile const unsigned int zero = 0;9394tmp.ul[H] = tmp.ul[L] = 1 / zero;95if (arq)96*arq = uq;97return (tmp.q);98}99if (uq < vq) {100if (arq)101*arq = uq;102return (0);103}104u = &uspace[0];105v = &vspace[0];106q = &qspace[0];107108/*109* Break dividend and divisor into digits in base B, then110* count leading zeros to determine m and n. When done, we111* will have:112* u = (u[1]u[2]...u[m+n]) sub B113* v = (v[1]v[2]...v[n]) sub B114* v[1] != 0115* 1 < n <= 4 (if n = 1, we use a different division algorithm)116* m >= 0 (otherwise u < v, which we already checked)117* m + n = 4118* and thus119* m = 4 - n <= 2120*/121tmp.uq = uq;122u[0] = 0;123u[1] = HHALF(tmp.ul[H]);124u[2] = LHALF(tmp.ul[H]);125u[3] = HHALF(tmp.ul[L]);126u[4] = LHALF(tmp.ul[L]);127tmp.uq = vq;128v[1] = HHALF(tmp.ul[H]);129v[2] = LHALF(tmp.ul[H]);130v[3] = HHALF(tmp.ul[L]);131v[4] = LHALF(tmp.ul[L]);132for (n = 4; v[1] == 0; v++) {133if (--n == 1) {134u_long rbj; /* r*B+u[j] (not root boy jim) */135digit q1, q2, q3, q4;136137/*138* Change of plan, per exercise 16.139* r = 0;140* for j = 1..4:141* q[j] = floor((r*B + u[j]) / v),142* r = (r*B + u[j]) % v;143* We unroll this completely here.144*/145t = v[2]; /* nonzero, by definition */146q1 = u[1] / t;147rbj = COMBINE(u[1] % t, u[2]);148q2 = rbj / t;149rbj = COMBINE(rbj % t, u[3]);150q3 = rbj / t;151rbj = COMBINE(rbj % t, u[4]);152q4 = rbj / t;153if (arq)154*arq = rbj % t;155tmp.ul[H] = COMBINE(q1, q2);156tmp.ul[L] = COMBINE(q3, q4);157return (tmp.q);158}159}160161/*162* By adjusting q once we determine m, we can guarantee that163* there is a complete four-digit quotient at &qspace[1] when164* we finally stop.165*/166for (m = 4 - n; u[1] == 0; u++)167m--;168for (i = 4 - m; --i >= 0;)169q[i] = 0;170q += 4 - m;171172/*173* Here we run Program D, translated from MIX to C and acquiring174* a few minor changes.175*176* D1: choose multiplier 1 << d to ensure v[1] >= B/2.177*/178d = 0;179for (t = v[1]; t < B / 2; t <<= 1)180d++;181if (d > 0) {182shl(&u[0], m + n, d); /* u <<= d */183shl(&v[1], n - 1, d); /* v <<= d */184}185/*186* D2: j = 0.187*/188j = 0;189v1 = v[1]; /* for D3 -- note that v[1..n] are constant */190v2 = v[2]; /* for D3 */191do {192digit uj0, uj1, uj2;193194/*195* D3: Calculate qhat (\^q, in TeX notation).196* Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and197* let rhat = (u[j]*B + u[j+1]) mod v[1].198* While rhat < B and v[2]*qhat > rhat*B+u[j+2],199* decrement qhat and increase rhat correspondingly.200* Note that if rhat >= B, v[2]*qhat < rhat*B.201*/202uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */203uj1 = u[j + 1]; /* for D3 only */204uj2 = u[j + 2]; /* for D3 only */205if (uj0 == v1) {206qhat = B;207rhat = uj1;208goto qhat_too_big;209} else {210u_long n = COMBINE(uj0, uj1);211qhat = n / v1;212rhat = n % v1;213}214while (v2 * qhat > COMBINE(rhat, uj2)) {215qhat_too_big:216qhat--;217if ((rhat += v1) >= B)218break;219}220/*221* D4: Multiply and subtract.222* The variable `t' holds any borrows across the loop.223* We split this up so that we do not require v[0] = 0,224* and to eliminate a final special case.225*/226for (t = 0, i = n; i > 0; i--) {227t = u[i + j] - v[i] * qhat - t;228u[i + j] = LHALF(t);229t = (B - HHALF(t)) & (B - 1);230}231t = u[j] - t;232u[j] = LHALF(t);233/*234* D5: test remainder.235* There is a borrow if and only if HHALF(t) is nonzero;236* in that (rare) case, qhat was too large (by exactly 1).237* Fix it by adding v[1..n] to u[j..j+n].238*/239if (HHALF(t)) {240qhat--;241for (t = 0, i = n; i > 0; i--) { /* D6: add back. */242t += u[i + j] + v[i];243u[i + j] = LHALF(t);244t = HHALF(t);245}246u[j] = LHALF(u[j] + t);247}248q[j] = qhat;249} while (++j <= m); /* D7: loop on j. */250251/*252* If caller wants the remainder, we have to calculate it as253* u[m..m+n] >> d (this is at most n digits and thus fits in254* u[m+1..m+n], but we may need more source digits).255*/256if (arq) {257if (d) {258for (i = m + n; i > m; --i)259u[i] = (u[i] >> d) |260LHALF(u[i - 1] << (HALF_BITS - d));261u[i] = 0;262}263tmp.ul[H] = COMBINE(uspace[1], uspace[2]);264tmp.ul[L] = COMBINE(uspace[3], uspace[4]);265*arq = tmp.q;266}267268tmp.ul[H] = COMBINE(qspace[1], qspace[2]);269tmp.ul[L] = COMBINE(qspace[3], qspace[4]);270return (tmp.q);271}272273274