Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/mips/math-emu/sp_maddf.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
* IEEE754 floating point arithmetic
4
* single precision: MADDF.f (Fused Multiply Add)
5
* MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6
*
7
* MIPS floating point support
8
* Copyright (C) 2015 Imagination Technologies, Ltd.
9
* Author: Markos Chandras <[email protected]>
10
*/
11
12
#include "ieee754sp.h"
13
14
15
static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
16
union ieee754sp y, enum maddf_flags flags)
17
{
18
int re;
19
int rs;
20
unsigned int rm;
21
u64 rm64;
22
u64 zm64;
23
int s;
24
25
COMPXSP;
26
COMPYSP;
27
COMPZSP;
28
29
EXPLODEXSP;
30
EXPLODEYSP;
31
EXPLODEZSP;
32
33
FLUSHXSP;
34
FLUSHYSP;
35
FLUSHZSP;
36
37
ieee754_clearcx();
38
39
rs = xs ^ ys;
40
if (flags & MADDF_NEGATE_PRODUCT)
41
rs ^= 1;
42
if (flags & MADDF_NEGATE_ADDITION)
43
zs ^= 1;
44
45
/*
46
* Handle the cases when at least one of x, y or z is a NaN.
47
* Order of precedence is sNaN, qNaN and z, x, y.
48
*/
49
if (zc == IEEE754_CLASS_SNAN)
50
return ieee754sp_nanxcpt(z);
51
if (xc == IEEE754_CLASS_SNAN)
52
return ieee754sp_nanxcpt(x);
53
if (yc == IEEE754_CLASS_SNAN)
54
return ieee754sp_nanxcpt(y);
55
if (zc == IEEE754_CLASS_QNAN)
56
return z;
57
if (xc == IEEE754_CLASS_QNAN)
58
return x;
59
if (yc == IEEE754_CLASS_QNAN)
60
return y;
61
62
if (zc == IEEE754_CLASS_DNORM)
63
SPDNORMZ;
64
/* ZERO z cases are handled separately below */
65
66
switch (CLPAIR(xc, yc)) {
67
68
69
/*
70
* Infinity handling
71
*/
72
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
73
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
74
ieee754_setcx(IEEE754_INVALID_OPERATION);
75
return ieee754sp_indef();
76
77
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
78
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
79
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
80
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
81
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
82
if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
83
/*
84
* Cases of addition of infinities with opposite signs
85
* or subtraction of infinities with same signs.
86
*/
87
ieee754_setcx(IEEE754_INVALID_OPERATION);
88
return ieee754sp_indef();
89
}
90
/*
91
* z is here either not an infinity, or an infinity having the
92
* same sign as product (x*y). The result must be an infinity,
93
* and its sign is determined only by the sign of product (x*y).
94
*/
95
return ieee754sp_inf(rs);
96
97
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
98
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
99
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
100
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
101
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
102
if (zc == IEEE754_CLASS_INF)
103
return ieee754sp_inf(zs);
104
if (zc == IEEE754_CLASS_ZERO) {
105
/* Handle cases +0 + (-0) and similar ones. */
106
if (zs == rs)
107
/*
108
* Cases of addition of zeros of equal signs
109
* or subtraction of zeroes of opposite signs.
110
* The sign of the resulting zero is in any
111
* such case determined only by the sign of z.
112
*/
113
return z;
114
115
return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
116
}
117
/* x*y is here 0, and z is not 0, so just return z */
118
return z;
119
120
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121
SPDNORMX;
122
fallthrough;
123
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124
if (zc == IEEE754_CLASS_INF)
125
return ieee754sp_inf(zs);
126
SPDNORMY;
127
break;
128
129
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130
if (zc == IEEE754_CLASS_INF)
131
return ieee754sp_inf(zs);
132
SPDNORMX;
133
break;
134
135
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136
if (zc == IEEE754_CLASS_INF)
137
return ieee754sp_inf(zs);
138
/* continue to real computations */
139
}
140
141
/* Finally get to do some computation */
142
143
/*
144
* Do the multiplication bit first
145
*
146
* rm = xm * ym, re = xe + ye basically
147
*
148
* At this point xm and ym should have been normalized.
149
*/
150
151
/* rm = xm * ym, re = xe+ye basically */
152
assert(xm & SP_HIDDEN_BIT);
153
assert(ym & SP_HIDDEN_BIT);
154
155
re = xe + ye;
156
157
/* Multiple 24 bit xm and ym to give 48 bit results */
158
rm64 = (uint64_t)xm * ym;
159
160
/* Shunt to top of word */
161
rm64 = rm64 << 16;
162
163
/* Put explicit bit at bit 62 if necessary */
164
if ((int64_t) rm64 < 0) {
165
rm64 = rm64 >> 1;
166
re++;
167
}
168
169
assert(rm64 & (1 << 62));
170
171
if (zc == IEEE754_CLASS_ZERO) {
172
/*
173
* Move explicit bit from bit 62 to bit 26 since the
174
* ieee754sp_format code expects the mantissa to be
175
* 27 bits wide (24 + 3 rounding bits).
176
*/
177
rm = XSPSRS64(rm64, (62 - 26));
178
return ieee754sp_format(rs, re, rm);
179
}
180
181
/* Move explicit bit from bit 23 to bit 62 */
182
zm64 = (uint64_t)zm << (62 - 23);
183
assert(zm64 & (1 << 62));
184
185
/* Make the exponents the same */
186
if (ze > re) {
187
/*
188
* Have to shift r fraction right to align.
189
*/
190
s = ze - re;
191
rm64 = XSPSRS64(rm64, s);
192
re += s;
193
} else if (re > ze) {
194
/*
195
* Have to shift z fraction right to align.
196
*/
197
s = re - ze;
198
zm64 = XSPSRS64(zm64, s);
199
ze += s;
200
}
201
assert(ze == re);
202
assert(ze <= SP_EMAX);
203
204
/* Do the addition */
205
if (zs == rs) {
206
/*
207
* Generate 64 bit result by adding two 63 bit numbers
208
* leaving result in zm64, zs and ze.
209
*/
210
zm64 = zm64 + rm64;
211
if ((int64_t)zm64 < 0) { /* carry out */
212
zm64 = XSPSRS1(zm64);
213
ze++;
214
}
215
} else {
216
if (zm64 >= rm64) {
217
zm64 = zm64 - rm64;
218
} else {
219
zm64 = rm64 - zm64;
220
zs = rs;
221
}
222
if (zm64 == 0)
223
return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
224
225
/*
226
* Put explicit bit at bit 62 if necessary.
227
*/
228
while ((zm64 >> 62) == 0) {
229
zm64 <<= 1;
230
ze--;
231
}
232
}
233
234
/*
235
* Move explicit bit from bit 62 to bit 26 since the
236
* ieee754sp_format code expects the mantissa to be
237
* 27 bits wide (24 + 3 rounding bits).
238
*/
239
zm = XSPSRS64(zm64, (62 - 26));
240
241
return ieee754sp_format(zs, ze, zm);
242
}
243
244
union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
245
union ieee754sp y)
246
{
247
return _sp_maddf(z, x, y, 0);
248
}
249
250
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
251
union ieee754sp y)
252
{
253
return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
254
}
255
256
union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
257
union ieee754sp y)
258
{
259
return _sp_maddf(z, x, y, 0);
260
}
261
262
union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
263
union ieee754sp y)
264
{
265
return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
266
}
267
268
union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
269
union ieee754sp y)
270
{
271
return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
272
}
273
274
union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
275
union ieee754sp y)
276
{
277
return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
278
}
279
280