CoCalc -- dp

GitHub Repository: torvalds/linux
Path: blob/master/arch/mips/math-emu/dp_maddf.c
⁵⁰⁹²⁰ views
1
// SPDX-License-Identifier: GPL-2.0-only
2
/*
3
 * IEEE754 floating point arithmetic
4
 * double precision: MADDF.f (Fused Multiply Add)
5
 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
6
 *
7
 * MIPS floating point support
8
 * Copyright (C) 2015 Imagination Technologies, Ltd.
9
 * Author: Markos Chandras <[email protected]>
10
 */
11

12
#include "ieee754dp.h"
13

14

15
/* 128 bits shift right logical with rounding. */
16
static void srl128(u64 *hptr, u64 *lptr, int count)
17
{
18
	u64 low;
19

20
	if (count >= 128) {
21
		*lptr = *hptr != 0 || *lptr != 0;
22
		*hptr = 0;
23
	} else if (count >= 64) {
24
		if (count == 64) {
25
			*lptr = *hptr | (*lptr != 0);
26
		} else {
27
			low = *lptr;
28
			*lptr = *hptr >> (count - 64);
29
			*lptr |= (*hptr << (128 - count)) != 0 || low != 0;
30
		}
31
		*hptr = 0;
32
	} else {
33
		low = *lptr;
34
		*lptr = low >> count | *hptr << (64 - count);
35
		*lptr |= (low << (64 - count)) != 0;
36
		*hptr = *hptr >> count;
37
	}
38
}
39

40
static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
41
				 union ieee754dp y, enum maddf_flags flags)
42
{
43
	int re;
44
	int rs;
45
	unsigned int lxm;
46
	unsigned int hxm;
47
	unsigned int lym;
48
	unsigned int hym;
49
	u64 lrm;
50
	u64 hrm;
51
	u64 lzm;
52
	u64 hzm;
53
	u64 t;
54
	u64 at;
55
	int s;
56

57
	COMPXDP;
58
	COMPYDP;
59
	COMPZDP;
60

61
	EXPLODEXDP;
62
	EXPLODEYDP;
63
	EXPLODEZDP;
64

65
	FLUSHXDP;
66
	FLUSHYDP;
67
	FLUSHZDP;
68

69
	ieee754_clearcx();
70

71
	rs = xs ^ ys;
72
	if (flags & MADDF_NEGATE_PRODUCT)
73
		rs ^= 1;
74
	if (flags & MADDF_NEGATE_ADDITION)
75
		zs ^= 1;
76

77
	/*
78
	 * Handle the cases when at least one of x, y or z is a NaN.
79
	 * Order of precedence is sNaN, qNaN and z, x, y.
80
	 */
81
	if (zc == IEEE754_CLASS_SNAN)
82
		return ieee754dp_nanxcpt(z);
83
	if (xc == IEEE754_CLASS_SNAN)
84
		return ieee754dp_nanxcpt(x);
85
	if (yc == IEEE754_CLASS_SNAN)
86
		return ieee754dp_nanxcpt(y);
87
	if (zc == IEEE754_CLASS_QNAN)
88
		return z;
89
	if (xc == IEEE754_CLASS_QNAN)
90
		return x;
91
	if (yc == IEEE754_CLASS_QNAN)
92
		return y;
93

94
	if (zc == IEEE754_CLASS_DNORM)
95
		DPDNORMZ;
96
	/* ZERO z cases are handled separately below */
97

98
	switch (CLPAIR(xc, yc)) {
99

100
	/*
101
	 * Infinity handling
102
	 */
103
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
104
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
105
		ieee754_setcx(IEEE754_INVALID_OPERATION);
106
		return ieee754dp_indef();
107

108
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
109
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
110
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
111
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
112
	case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
113
		if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
114
			/*
115
			 * Cases of addition of infinities with opposite signs
116
			 * or subtraction of infinities with same signs.
117
			 */
118
			ieee754_setcx(IEEE754_INVALID_OPERATION);
119
			return ieee754dp_indef();
120
		}
121
		/*
122
		 * z is here either not an infinity, or an infinity having the
123
		 * same sign as product (x*y). The result must be an infinity,
124
		 * and its sign is determined only by the sign of product (x*y).
125
		 */
126
		return ieee754dp_inf(rs);
127

128
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
129
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
130
	case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
131
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
132
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
133
		if (zc == IEEE754_CLASS_INF)
134
			return ieee754dp_inf(zs);
135
		if (zc == IEEE754_CLASS_ZERO) {
136
			/* Handle cases +0 + (-0) and similar ones. */
137
			if (zs == rs)
138
				/*
139
				 * Cases of addition of zeros of equal signs
140
				 * or subtraction of zeroes of opposite signs.
141
				 * The sign of the resulting zero is in any
142
				 * such case determined only by the sign of z.
143
				 */
144
				return z;
145

146
			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
147
		}
148
		/* x*y is here 0, and z is not 0, so just return z */
149
		return z;
150

151
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
152
		DPDNORMX;
153
		fallthrough;
154
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
155
		if (zc == IEEE754_CLASS_INF)
156
			return ieee754dp_inf(zs);
157
		DPDNORMY;
158
		break;
159

160
	case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
161
		if (zc == IEEE754_CLASS_INF)
162
			return ieee754dp_inf(zs);
163
		DPDNORMX;
164
		break;
165

166
	case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
167
		if (zc == IEEE754_CLASS_INF)
168
			return ieee754dp_inf(zs);
169
		/* continue to real computations */
170
	}
171

172
	/* Finally get to do some computation */
173

174
	/*
175
	 * Do the multiplication bit first
176
	 *
177
	 * rm = xm * ym, re = xe + ye basically
178
	 *
179
	 * At this point xm and ym should have been normalized.
180
	 */
181
	assert(xm & DP_HIDDEN_BIT);
182
	assert(ym & DP_HIDDEN_BIT);
183

184
	re = xe + ye;
185

186
	/* shunt to top of word */
187
	xm <<= 64 - (DP_FBITS + 1);
188
	ym <<= 64 - (DP_FBITS + 1);
189

190
	/*
191
	 * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
192
	 */
193

194
	lxm = xm;
195
	hxm = xm >> 32;
196
	lym = ym;
197
	hym = ym >> 32;
198

199
	lrm = DPXMULT(lxm, lym);
200
	hrm = DPXMULT(hxm, hym);
201

202
	t = DPXMULT(lxm, hym);
203

204
	at = lrm + (t << 32);
205
	hrm += at < lrm;
206
	lrm = at;
207

208
	hrm = hrm + (t >> 32);
209

210
	t = DPXMULT(hxm, lym);
211

212
	at = lrm + (t << 32);
213
	hrm += at < lrm;
214
	lrm = at;
215

216
	hrm = hrm + (t >> 32);
217

218
	/* Put explicit bit at bit 126 if necessary */
219
	if ((int64_t)hrm < 0) {
220
		lrm = (hrm << 63) | (lrm >> 1);
221
		hrm = hrm >> 1;
222
		re++;
223
	}
224

225
	assert(hrm & (1 << 62));
226

227
	if (zc == IEEE754_CLASS_ZERO) {
228
		/*
229
		 * Move explicit bit from bit 126 to bit 55 since the
230
		 * ieee754dp_format code expects the mantissa to be
231
		 * 56 bits wide (53 + 3 rounding bits).
232
		 */
233
		srl128(&hrm, &lrm, (126 - 55));
234
		return ieee754dp_format(rs, re, lrm);
235
	}
236

237
	/* Move explicit bit from bit 52 to bit 126 */
238
	lzm = 0;
239
	hzm = zm << 10;
240
	assert(hzm & (1 << 62));
241

242
	/* Make the exponents the same */
243
	if (ze > re) {
244
		/*
245
		 * Have to shift y fraction right to align.
246
		 */
247
		s = ze - re;
248
		srl128(&hrm, &lrm, s);
249
		re += s;
250
	} else if (re > ze) {
251
		/*
252
		 * Have to shift x fraction right to align.
253
		 */
254
		s = re - ze;
255
		srl128(&hzm, &lzm, s);
256
		ze += s;
257
	}
258
	assert(ze == re);
259
	assert(ze <= DP_EMAX);
260

261
	/* Do the addition */
262
	if (zs == rs) {
263
		/*
264
		 * Generate 128 bit result by adding two 127 bit numbers
265
		 * leaving result in hzm:lzm, zs and ze.
266
		 */
267
		hzm = hzm + hrm + (lzm > (lzm + lrm));
268
		lzm = lzm + lrm;
269
		if ((int64_t)hzm < 0) {        /* carry out */
270
			srl128(&hzm, &lzm, 1);
271
			ze++;
272
		}
273
	} else {
274
		if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
275
			hzm = hzm - hrm - (lzm < lrm);
276
			lzm = lzm - lrm;
277
		} else {
278
			hzm = hrm - hzm - (lrm < lzm);
279
			lzm = lrm - lzm;
280
			zs = rs;
281
		}
282
		if (lzm == 0 && hzm == 0)
283
			return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
284

285
		/*
286
		 * Put explicit bit at bit 126 if necessary.
287
		 */
288
		if (hzm == 0) {
289
			/* left shift by 63 or 64 bits */
290
			if ((int64_t)lzm < 0) {
291
				/* MSB of lzm is the explicit bit */
292
				hzm = lzm >> 1;
293
				lzm = lzm << 63;
294
				ze -= 63;
295
			} else {
296
				hzm = lzm;
297
				lzm = 0;
298
				ze -= 64;
299
			}
300
		}
301

302
		t = 0;
303
		while ((hzm >> (62 - t)) == 0)
304
			t++;
305

306
		assert(t <= 62);
307
		if (t) {
308
			hzm = hzm << t | lzm >> (64 - t);
309
			lzm = lzm << t;
310
			ze -= t;
311
		}
312
	}
313

314
	/*
315
	 * Move explicit bit from bit 126 to bit 55 since the
316
	 * ieee754dp_format code expects the mantissa to be
317
	 * 56 bits wide (53 + 3 rounding bits).
318
	 */
319
	srl128(&hzm, &lzm, (126 - 55));
320

321
	return ieee754dp_format(zs, ze, lzm);
322
}
323

324
union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
325
				union ieee754dp y)
326
{
327
	return _dp_maddf(z, x, y, 0);
328
}
329

330
union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
331
				union ieee754dp y)
332
{
333
	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
334
}
335

336
union ieee754dp ieee754dp_madd(union ieee754dp z, union ieee754dp x,
337
				union ieee754dp y)
338
{
339
	return _dp_maddf(z, x, y, 0);
340
}
341

342
union ieee754dp ieee754dp_msub(union ieee754dp z, union ieee754dp x,
343
				union ieee754dp y)
344
{
345
	return _dp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
346
}
347

348
union ieee754dp ieee754dp_nmadd(union ieee754dp z, union ieee754dp x,
349
				union ieee754dp y)
350
{
351
	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
352
}
353

354
union ieee754dp ieee754dp_nmsub(union ieee754dp z, union ieee754dp x,
355
				union ieee754dp y)
356
{
357
	return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
358
}
359

360
Product

Resources

Company