Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/sys/powerpc/fpu/fpu_mul.c
39562 views
1
/* $NetBSD: fpu_mul.c,v 1.4 2005/12/11 12:18:42 christos Exp $ */
2
3
/*
4
* SPDX-License-Identifier: BSD-3-Clause
5
*
6
* Copyright (c) 1992, 1993
7
* The Regents of the University of California. All rights reserved.
8
*
9
* This software was developed by the Computer Systems Engineering group
10
* at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
11
* contributed to Berkeley.
12
*
13
* All advertising materials mentioning features or use of this software
14
* must display the following acknowledgement:
15
* This product includes software developed by the University of
16
* California, Lawrence Berkeley Laboratory.
17
*
18
* Redistribution and use in source and binary forms, with or without
19
* modification, are permitted provided that the following conditions
20
* are met:
21
* 1. Redistributions of source code must retain the above copyright
22
* notice, this list of conditions and the following disclaimer.
23
* 2. Redistributions in binary form must reproduce the above copyright
24
* notice, this list of conditions and the following disclaimer in the
25
* documentation and/or other materials provided with the distribution.
26
* 3. Neither the name of the University nor the names of its contributors
27
* may be used to endorse or promote products derived from this software
28
* without specific prior written permission.
29
*
30
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
31
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
32
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
33
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
34
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
35
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
36
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
37
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
38
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
39
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
40
* SUCH DAMAGE.
41
*/
42
43
/*
44
* Perform an FPU multiply (return x * y).
45
*/
46
47
#include <sys/types.h>
48
#include <sys/systm.h>
49
50
#include <machine/fpu.h>
51
52
#include <powerpc/fpu/fpu_arith.h>
53
#include <powerpc/fpu/fpu_emu.h>
54
55
/*
56
* The multiplication algorithm for normal numbers is as follows:
57
*
58
* The fraction of the product is built in the usual stepwise fashion.
59
* Each step consists of shifting the accumulator right one bit
60
* (maintaining any guard bits) and, if the next bit in y is set,
61
* adding the multiplicand (x) to the accumulator. Then, in any case,
62
* we advance one bit leftward in y. Algorithmically:
63
*
64
* A = 0;
65
* for (bit = 0; bit < FP_NMANT; bit++) {
66
* sticky |= A & 1, A >>= 1;
67
* if (Y & (1 << bit))
68
* A += X;
69
* }
70
*
71
* (X and Y here represent the mantissas of x and y respectively.)
72
* The resultant accumulator (A) is the product's mantissa. It may
73
* be as large as 11.11111... in binary and hence may need to be
74
* shifted right, but at most one bit.
75
*
76
* Since we do not have efficient multiword arithmetic, we code the
77
* accumulator as four separate words, just like any other mantissa.
78
* We use local variables in the hope that this is faster than memory.
79
* We keep x->fp_mant in locals for the same reason.
80
*
81
* In the algorithm above, the bits in y are inspected one at a time.
82
* We will pick them up 32 at a time and then deal with those 32, one
83
* at a time. Note, however, that we know several things about y:
84
*
85
* - the guard and round bits at the bottom are sure to be zero;
86
*
87
* - often many low bits are zero (y is often from a single or double
88
* precision source);
89
*
90
* - bit FP_NMANT-1 is set, and FP_1*2 fits in a word.
91
*
92
* We can also test for 32-zero-bits swiftly. In this case, the center
93
* part of the loop---setting sticky, shifting A, and not adding---will
94
* run 32 times without adding X to A. We can do a 32-bit shift faster
95
* by simply moving words. Since zeros are common, we optimize this case.
96
* Furthermore, since A is initially zero, we can omit the shift as well
97
* until we reach a nonzero word.
98
*/
99
struct fpn *
100
fpu_mul(struct fpemu *fe)
101
{
102
struct fpn *x = &fe->fe_f1, *y = &fe->fe_f2;
103
u_int a3, a2, a1, a0, x3, x2, x1, x0, bit, m;
104
int sticky;
105
FPU_DECL_CARRY;
106
107
/*
108
* Put the `heavier' operand on the right (see fpu_emu.h).
109
* Then we will have one of the following cases, taken in the
110
* following order:
111
*
112
* - y = NaN. Implied: if only one is a signalling NaN, y is.
113
* The result is y.
114
* - y = Inf. Implied: x != NaN (is 0, number, or Inf: the NaN
115
* case was taken care of earlier).
116
* If x = 0, the result is NaN. Otherwise the result
117
* is y, with its sign reversed if x is negative.
118
* - x = 0. Implied: y is 0 or number.
119
* The result is 0 (with XORed sign as usual).
120
* - other. Implied: both x and y are numbers.
121
* The result is x * y (XOR sign, multiply bits, add exponents).
122
*/
123
DPRINTF(FPE_REG, ("fpu_mul:\n"));
124
DUMPFPN(FPE_REG, x);
125
DUMPFPN(FPE_REG, y);
126
DPRINTF(FPE_REG, ("=>\n"));
127
128
ORDER(x, y);
129
if (ISNAN(y)) {
130
y->fp_sign ^= x->fp_sign;
131
fe->fe_cx |= FPSCR_VXSNAN;
132
DUMPFPN(FPE_REG, y);
133
return (y);
134
}
135
if (ISINF(y)) {
136
if (ISZERO(x)) {
137
fe->fe_cx |= FPSCR_VXIMZ;
138
return (fpu_newnan(fe));
139
}
140
y->fp_sign ^= x->fp_sign;
141
DUMPFPN(FPE_REG, y);
142
return (y);
143
}
144
if (ISZERO(x)) {
145
x->fp_sign ^= y->fp_sign;
146
DUMPFPN(FPE_REG, x);
147
return (x);
148
}
149
150
/*
151
* Setup. In the code below, the mask `m' will hold the current
152
* mantissa byte from y. The variable `bit' denotes the bit
153
* within m. We also define some macros to deal with everything.
154
*/
155
x3 = x->fp_mant[3];
156
x2 = x->fp_mant[2];
157
x1 = x->fp_mant[1];
158
x0 = x->fp_mant[0];
159
sticky = a3 = a2 = a1 = a0 = 0;
160
161
#define ADD /* A += X */ \
162
FPU_ADDS(a3, a3, x3); \
163
FPU_ADDCS(a2, a2, x2); \
164
FPU_ADDCS(a1, a1, x1); \
165
FPU_ADDC(a0, a0, x0)
166
167
#define SHR1 /* A >>= 1, with sticky */ \
168
sticky |= a3 & 1, a3 = (a3 >> 1) | (a2 << 31), \
169
a2 = (a2 >> 1) | (a1 << 31), a1 = (a1 >> 1) | (a0 << 31), a0 >>= 1
170
171
#define SHR32 /* A >>= 32, with sticky */ \
172
sticky |= a3, a3 = a2, a2 = a1, a1 = a0, a0 = 0
173
174
#define STEP /* each 1-bit step of the multiplication */ \
175
SHR1; if (bit & m) { ADD; }; bit <<= 1
176
177
/*
178
* We are ready to begin. The multiply loop runs once for each
179
* of the four 32-bit words. Some words, however, are special.
180
* As noted above, the low order bits of Y are often zero. Even
181
* if not, the first loop can certainly skip the guard bits.
182
* The last word of y has its highest 1-bit in position FP_NMANT-1,
183
* so we stop the loop when we move past that bit.
184
*/
185
if ((m = y->fp_mant[3]) == 0) {
186
/* SHR32; */ /* unneeded since A==0 */
187
} else {
188
bit = 1 << FP_NG;
189
do {
190
STEP;
191
} while (bit != 0);
192
}
193
if ((m = y->fp_mant[2]) == 0) {
194
SHR32;
195
} else {
196
bit = 1;
197
do {
198
STEP;
199
} while (bit != 0);
200
}
201
if ((m = y->fp_mant[1]) == 0) {
202
SHR32;
203
} else {
204
bit = 1;
205
do {
206
STEP;
207
} while (bit != 0);
208
}
209
m = y->fp_mant[0]; /* definitely != 0 */
210
bit = 1;
211
do {
212
STEP;
213
} while (bit <= m);
214
215
/*
216
* Done with mantissa calculation. Get exponent and handle
217
* 11.111...1 case, then put result in place. We reuse x since
218
* it already has the right class (FP_NUM).
219
*/
220
m = x->fp_exp + y->fp_exp;
221
if (a0 >= FP_2) {
222
SHR1;
223
m++;
224
}
225
x->fp_sign ^= y->fp_sign;
226
x->fp_exp = m;
227
x->fp_sticky = sticky;
228
x->fp_mant[3] = a3;
229
x->fp_mant[2] = a2;
230
x->fp_mant[1] = a1;
231
x->fp_mant[0] = a0;
232
233
DUMPFPN(FPE_REG, x);
234
return (x);
235
}
236
237