CoCalc -- sp

GitHub Repository: torvalds/linux
Path: blob/master/arch/mips/math-emu/sp_maddf.c
²⁶⁴²⁴ views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
 * IEEE754 floating point arithmetic
4
 * single precision: MADDF.f (Fused Multiply Add)
5
 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6
 *
7
 * MIPS floating point support
8
 * Copyright (C) 2015 Imagination Technologies, Ltd.
9
 * Author: Markos Chandras <[email protected]>
10
 */
11

12
#include "ieee754sp.h"
13

14

15
static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
16
				 union ieee754sp y, enum maddf_flags flags)
17
{
18
	int re;
19
	int rs;
20
	unsigned int rm;
21
	u64 rm64;
22
	u64 zm64;
23
	int s;
24

25
	COMPXSP;
26
	COMPYSP;
27
	COMPZSP;
28

29
	EXPLODEXSP;
30
	EXPLODEYSP;
31
	EXPLODEZSP;
32

33
	FLUSHXSP;
34
	FLUSHYSP;
35
	FLUSHZSP;
36

37
	ieee754_clearcx();
38

39
	rs = xs ^ ys;
40
	if (flags & MADDF_NEGATE_PRODUCT)
41
		rs ^= 1;
42
	if (flags & MADDF_NEGATE_ADDITION)
43
		zs ^= 1;
44

45
	/*
46
	 * Handle the cases when at least one of x, y or z is a NaN.
47
	 * Order of precedence is sNaN, qNaN and z, x, y.
48
	 */
49
	if (zc == IEEE754_CLASS_SNAN)
50
		return ieee754sp_nanxcpt(z);
51
	if (xc == IEEE754_CLASS_SNAN)
52
		return ieee754sp_nanxcpt(x);
53
	if (yc == IEEE754_CLASS_SNAN)
54
		return ieee754sp_nanxcpt(y);
55
	if (zc == IEEE754_CLASS_QNAN)
56
		return z;
57
	if (xc == IEEE754_CLASS_QNAN)
58
		return x;
59
	if (yc == IEEE754_CLASS_QNAN)
60
		return y;
61

62
	if (zc == IEEE754_CLASS_DNORM)
63
		SPDNORMZ;
64
	/* ZERO z cases are handled separately below */
65

66
	switch (CLPAIR(xc, yc)) {
67

68

69
	/*
70
	 * Infinity handling
71
	 */
72
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
73
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
74
		ieee754_setcx(IEEE754_INVALID_OPERATION);
75
		return ieee754sp_indef();
76

77
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
78
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
79
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
80
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
81
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
82
		if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
83
			/*
84
			 * Cases of addition of infinities with opposite signs
85
			 * or subtraction of infinities with same signs.
86
			 */
87
			ieee754_setcx(IEEE754_INVALID_OPERATION);
88
			return ieee754sp_indef();
89
		}
90
		/*
91
		 * z is here either not an infinity, or an infinity having the
92
		 * same sign as product (x*y). The result must be an infinity,
93
		 * and its sign is determined only by the sign of product (x*y).
94
		 */
95
		return ieee754sp_inf(rs);
96

97
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
98
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
99
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
100
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
101
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
102
		if (zc == IEEE754_CLASS_INF)
103
			return ieee754sp_inf(zs);
104
		if (zc == IEEE754_CLASS_ZERO) {
105
			/* Handle cases +0 + (-0) and similar ones. */
106
			if (zs == rs)
107
				/*
108
				 * Cases of addition of zeros of equal signs
109
				 * or subtraction of zeroes of opposite signs.
110
				 * The sign of the resulting zero is in any
111
				 * such case determined only by the sign of z.
112
				 */
113
				return z;
114

115
			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
116
		}
117
		/* x*y is here 0, and z is not 0, so just return z */
118
		return z;
119

120
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
121
		SPDNORMX;
122
		fallthrough;
123
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
124
		if (zc == IEEE754_CLASS_INF)
125
			return ieee754sp_inf(zs);
126
		SPDNORMY;
127
		break;
128

129
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
130
		if (zc == IEEE754_CLASS_INF)
131
			return ieee754sp_inf(zs);
132
		SPDNORMX;
133
		break;
134

135
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
136
		if (zc == IEEE754_CLASS_INF)
137
			return ieee754sp_inf(zs);
138
		/* continue to real computations */
139
	}
140

141
	/* Finally get to do some computation */
142

143
	/*
144
	 * Do the multiplication bit first
145
	 *
146
	 * rm = xm * ym, re = xe + ye basically
147
	 *
148
	 * At this point xm and ym should have been normalized.
149
	 */
150

151
	/* rm = xm * ym, re = xe+ye basically */
152
	assert(xm & SP_HIDDEN_BIT);
153
	assert(ym & SP_HIDDEN_BIT);
154

155
	re = xe + ye;
156

157
	/* Multiple 24 bit xm and ym to give 48 bit results */
158
	rm64 = (uint64_t)xm * ym;
159

160
	/* Shunt to top of word */
161
	rm64 = rm64 << 16;
162

163
	/* Put explicit bit at bit 62 if necessary */
164
	if ((int64_t) rm64 < 0) {
165
		rm64 = rm64 >> 1;
166
		re++;
167
	}
168

169
	assert(rm64 & (1 << 62));
170

171
	if (zc == IEEE754_CLASS_ZERO) {
172
		/*
173
		 * Move explicit bit from bit 62 to bit 26 since the
174
		 * ieee754sp_format code expects the mantissa to be
175
		 * 27 bits wide (24 + 3 rounding bits).
176
		 */
177
		rm = XSPSRS64(rm64, (62 - 26));
178
		return ieee754sp_format(rs, re, rm);
179
	}
180

181
	/* Move explicit bit from bit 23 to bit 62 */
182
	zm64 = (uint64_t)zm << (62 - 23);
183
	assert(zm64 & (1 << 62));
184

185
	/* Make the exponents the same */
186
	if (ze > re) {
187
		/*
188
		 * Have to shift r fraction right to align.
189
		 */
190
		s = ze - re;
191
		rm64 = XSPSRS64(rm64, s);
192
		re += s;
193
	} else if (re > ze) {
194
		/*
195
		 * Have to shift z fraction right to align.
196
		 */
197
		s = re - ze;
198
		zm64 = XSPSRS64(zm64, s);
199
		ze += s;
200
	}
201
	assert(ze == re);
202
	assert(ze <= SP_EMAX);
203

204
	/* Do the addition */
205
	if (zs == rs) {
206
		/*
207
		 * Generate 64 bit result by adding two 63 bit numbers
208
		 * leaving result in zm64, zs and ze.
209
		 */
210
		zm64 = zm64 + rm64;
211
		if ((int64_t)zm64 < 0) {	/* carry out */
212
			zm64 = XSPSRS1(zm64);
213
			ze++;
214
		}
215
	} else {
216
		if (zm64 >= rm64) {
217
			zm64 = zm64 - rm64;
218
		} else {
219
			zm64 = rm64 - zm64;
220
			zs = rs;
221
		}
222
		if (zm64 == 0)
223
			return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
224

225
		/*
226
		 * Put explicit bit at bit 62 if necessary.
227
		 */
228
		while ((zm64 >> 62) == 0) {
229
			zm64 <<= 1;
230
			ze--;
231
		}
232
	}
233

234
	/*
235
	 * Move explicit bit from bit 62 to bit 26 since the
236
	 * ieee754sp_format code expects the mantissa to be
237
	 * 27 bits wide (24 + 3 rounding bits).
238
	 */
239
	zm = XSPSRS64(zm64, (62 - 26));
240

241
	return ieee754sp_format(zs, ze, zm);
242
}
243

244
union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
245
				union ieee754sp y)
246
{
247
	return _sp_maddf(z, x, y, 0);
248
}
249

250
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
251
				union ieee754sp y)
252
{
253
	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
254
}
255

256
union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
257
				union ieee754sp y)
258
{
259
	return _sp_maddf(z, x, y, 0);
260
}
261

262
union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
263
				union ieee754sp y)
264
{
265
	return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
266
}
267

268
union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
269
				union ieee754sp y)
270
{
271
	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
272
}
273

274
union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
275
				union ieee754sp y)
276
{
277
	return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
278
}
279

280
Product

Resources

Company