CoCalc -- poly

GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/math-emu/poly_sin.c
⁵⁰⁸⁹⁹ views
1
// SPDX-License-Identifier: GPL-2.0
2
/*---------------------------------------------------------------------------+
3
 |  poly_sin.c                                                               |
4
 |                                                                           |
5
 |  Computation of an approximation of the sin function and the cosine       |
6
 |  function by a polynomial.                                                |
7
 |                                                                           |
8
 | Copyright (C) 1992,1993,1994,1997,1999                                    |
9
 |                  W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
10
 |                  E-mail   [email protected]                             |
11
 |                                                                           |
12
 |                                                                           |
13
 +---------------------------------------------------------------------------*/
14

15
#include "exception.h"
16
#include "reg_constant.h"
17
#include "fpu_emu.h"
18
#include "fpu_system.h"
19
#include "control_w.h"
20
#include "poly.h"
21

22
#define	N_COEFF_P	4
23
#define	N_COEFF_N	4
24

25
static const unsigned long long pos_terms_l[N_COEFF_P] = {
26
	0xaaaaaaaaaaaaaaabLL,
27
	0x00d00d00d00cf906LL,
28
	0x000006b99159a8bbLL,
29
	0x000000000d7392e6LL
30
};
31

32
static const unsigned long long neg_terms_l[N_COEFF_N] = {
33
	0x2222222222222167LL,
34
	0x0002e3bc74aab624LL,
35
	0x0000000b09229062LL,
36
	0x00000000000c7973LL
37
};
38

39
#define	N_COEFF_PH	4
40
#define	N_COEFF_NH	4
41
static const unsigned long long pos_terms_h[N_COEFF_PH] = {
42
	0x0000000000000000LL,
43
	0x05b05b05b05b0406LL,
44
	0x000049f93edd91a9LL,
45
	0x00000000c9c9ed62LL
46
};
47

48
static const unsigned long long neg_terms_h[N_COEFF_NH] = {
49
	0xaaaaaaaaaaaaaa98LL,
50
	0x001a01a01a019064LL,
51
	0x0000008f76c68a77LL,
52
	0x0000000000d58f5eLL
53
};
54

55
/*--- poly_sine() -----------------------------------------------------------+
56
 |                                                                           |
57
 +---------------------------------------------------------------------------*/
58
void poly_sine(FPU_REG *st0_ptr)
59
{
60
	int exponent, echange;
61
	Xsig accumulator, argSqrd, argTo4;
62
	unsigned long fix_up, adj;
63
	unsigned long long fixed_arg;
64
	FPU_REG result;
65

66
	exponent = exponent(st0_ptr);
67

68
	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
69

70
	/* Split into two ranges, for arguments below and above 1.0 */
71
	/* The boundary between upper and lower is approx 0.88309101259 */
72
	if ((exponent < -1)
73
	    || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
74
		/* The argument is <= 0.88309101259 */
75

76
		argSqrd.msw = st0_ptr->sigh;
77
		argSqrd.midw = st0_ptr->sigl;
78
		argSqrd.lsw = 0;
79
		mul64_Xsig(&argSqrd, &significand(st0_ptr));
80
		shr_Xsig(&argSqrd, 2 * (-1 - exponent));
81
		argTo4.msw = argSqrd.msw;
82
		argTo4.midw = argSqrd.midw;
83
		argTo4.lsw = argSqrd.lsw;
84
		mul_Xsig_Xsig(&argTo4, &argTo4);
85

86
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
87
				N_COEFF_N - 1);
88
		mul_Xsig_Xsig(&accumulator, &argSqrd);
89
		negate_Xsig(&accumulator);
90

91
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
92
				N_COEFF_P - 1);
93

94
		shr_Xsig(&accumulator, 2);	/* Divide by four */
95
		accumulator.msw |= 0x80000000;	/* Add 1.0 */
96

97
		mul64_Xsig(&accumulator, &significand(st0_ptr));
98
		mul64_Xsig(&accumulator, &significand(st0_ptr));
99
		mul64_Xsig(&accumulator, &significand(st0_ptr));
100

101
		/* Divide by four, FPU_REG compatible, etc */
102
		exponent = 3 * exponent;
103

104
		/* The minimum exponent difference is 3 */
105
		shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
106

107
		negate_Xsig(&accumulator);
108
		XSIG_LL(accumulator) += significand(st0_ptr);
109

110
		echange = round_Xsig(&accumulator);
111

112
		setexponentpos(&result, exponent(st0_ptr) + echange);
113
	} else {
114
		/* The argument is > 0.88309101259 */
115
		/* We use sin(st(0)) = cos(pi/2-st(0)) */
116

117
		fixed_arg = significand(st0_ptr);
118

119
		if (exponent == 0) {
120
			/* The argument is >= 1.0 */
121

122
			/* Put the binary point at the left. */
123
			fixed_arg <<= 1;
124
		}
125
		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
126
		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
127
		/* There is a special case which arises due to rounding, to fix here. */
128
		if (fixed_arg == 0xffffffffffffffffLL)
129
			fixed_arg = 0;
130

131
		XSIG_LL(argSqrd) = fixed_arg;
132
		argSqrd.lsw = 0;
133
		mul64_Xsig(&argSqrd, &fixed_arg);
134

135
		XSIG_LL(argTo4) = XSIG_LL(argSqrd);
136
		argTo4.lsw = argSqrd.lsw;
137
		mul_Xsig_Xsig(&argTo4, &argTo4);
138

139
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
140
				N_COEFF_NH - 1);
141
		mul_Xsig_Xsig(&accumulator, &argSqrd);
142
		negate_Xsig(&accumulator);
143

144
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
145
				N_COEFF_PH - 1);
146
		negate_Xsig(&accumulator);
147

148
		mul64_Xsig(&accumulator, &fixed_arg);
149
		mul64_Xsig(&accumulator, &fixed_arg);
150

151
		shr_Xsig(&accumulator, 3);
152
		negate_Xsig(&accumulator);
153

154
		add_Xsig_Xsig(&accumulator, &argSqrd);
155

156
		shr_Xsig(&accumulator, 1);
157

158
		accumulator.lsw |= 1;	/* A zero accumulator here would cause problems */
159
		negate_Xsig(&accumulator);
160

161
		/* The basic computation is complete. Now fix the answer to
162
		   compensate for the error due to the approximation used for
163
		   pi/2
164
		 */
165

166
		/* This has an exponent of -65 */
167
		fix_up = 0x898cc517;
168
		/* The fix-up needs to be improved for larger args */
169
		if (argSqrd.msw & 0xffc00000) {
170
			/* Get about 32 bit precision in these: */
171
			fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
172
		}
173
		fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
174

175
		adj = accumulator.lsw;	/* temp save */
176
		accumulator.lsw -= fix_up;
177
		if (accumulator.lsw > adj)
178
			XSIG_LL(accumulator)--;
179

180
		echange = round_Xsig(&accumulator);
181

182
		setexponentpos(&result, echange - 1);
183
	}
184

185
	significand(&result) = XSIG_LL(accumulator);
186
	setsign(&result, getsign(st0_ptr));
187
	FPU_copy_to_reg0(&result, TAG_Valid);
188

189
#ifdef PARANOID
190
	if ((exponent(&result) >= 0)
191
	    && (significand(&result) > 0x8000000000000000LL)) {
192
		EXCEPTION(EX_INTERNAL | 0x150);
193
	}
194
#endif /* PARANOID */
195

196
}
197

198
/*--- poly_cos() ------------------------------------------------------------+
199
 |                                                                           |
200
 +---------------------------------------------------------------------------*/
201
void poly_cos(FPU_REG *st0_ptr)
202
{
203
	FPU_REG result;
204
	long int exponent, exp2, echange;
205
	Xsig accumulator, argSqrd, fix_up, argTo4;
206
	unsigned long long fixed_arg;
207

208
#ifdef PARANOID
209
	if ((exponent(st0_ptr) > 0)
210
	    || ((exponent(st0_ptr) == 0)
211
		&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
212
		EXCEPTION(EX_Invalid);
213
		FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
214
		return;
215
	}
216
#endif /* PARANOID */
217

218
	exponent = exponent(st0_ptr);
219

220
	accumulator.lsw = accumulator.midw = accumulator.msw = 0;
221

222
	if ((exponent < -1)
223
	    || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
224
		/* arg is < 0.687705 */
225

226
		argSqrd.msw = st0_ptr->sigh;
227
		argSqrd.midw = st0_ptr->sigl;
228
		argSqrd.lsw = 0;
229
		mul64_Xsig(&argSqrd, &significand(st0_ptr));
230

231
		if (exponent < -1) {
232
			/* shift the argument right by the required places */
233
			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
234
		}
235

236
		argTo4.msw = argSqrd.msw;
237
		argTo4.midw = argSqrd.midw;
238
		argTo4.lsw = argSqrd.lsw;
239
		mul_Xsig_Xsig(&argTo4, &argTo4);
240

241
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
242
				N_COEFF_NH - 1);
243
		mul_Xsig_Xsig(&accumulator, &argSqrd);
244
		negate_Xsig(&accumulator);
245

246
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
247
				N_COEFF_PH - 1);
248
		negate_Xsig(&accumulator);
249

250
		mul64_Xsig(&accumulator, &significand(st0_ptr));
251
		mul64_Xsig(&accumulator, &significand(st0_ptr));
252
		shr_Xsig(&accumulator, -2 * (1 + exponent));
253

254
		shr_Xsig(&accumulator, 3);
255
		negate_Xsig(&accumulator);
256

257
		add_Xsig_Xsig(&accumulator, &argSqrd);
258

259
		shr_Xsig(&accumulator, 1);
260

261
		/* It doesn't matter if accumulator is all zero here, the
262
		   following code will work ok */
263
		negate_Xsig(&accumulator);
264

265
		if (accumulator.lsw & 0x80000000)
266
			XSIG_LL(accumulator)++;
267
		if (accumulator.msw == 0) {
268
			/* The result is 1.0 */
269
			FPU_copy_to_reg0(&CONST_1, TAG_Valid);
270
			return;
271
		} else {
272
			significand(&result) = XSIG_LL(accumulator);
273

274
			/* will be a valid positive nr with expon = -1 */
275
			setexponentpos(&result, -1);
276
		}
277
	} else {
278
		fixed_arg = significand(st0_ptr);
279

280
		if (exponent == 0) {
281
			/* The argument is >= 1.0 */
282

283
			/* Put the binary point at the left. */
284
			fixed_arg <<= 1;
285
		}
286
		/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
287
		fixed_arg = 0x921fb54442d18469LL - fixed_arg;
288
		/* There is a special case which arises due to rounding, to fix here. */
289
		if (fixed_arg == 0xffffffffffffffffLL)
290
			fixed_arg = 0;
291

292
		exponent = -1;
293
		exp2 = -1;
294

295
		/* A shift is needed here only for a narrow range of arguments,
296
		   i.e. for fixed_arg approx 2^-32, but we pick up more... */
297
		if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
298
			fixed_arg <<= 16;
299
			exponent -= 16;
300
			exp2 -= 16;
301
		}
302

303
		XSIG_LL(argSqrd) = fixed_arg;
304
		argSqrd.lsw = 0;
305
		mul64_Xsig(&argSqrd, &fixed_arg);
306

307
		if (exponent < -1) {
308
			/* shift the argument right by the required places */
309
			shr_Xsig(&argSqrd, 2 * (-1 - exponent));
310
		}
311

312
		argTo4.msw = argSqrd.msw;
313
		argTo4.midw = argSqrd.midw;
314
		argTo4.lsw = argSqrd.lsw;
315
		mul_Xsig_Xsig(&argTo4, &argTo4);
316

317
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
318
				N_COEFF_N - 1);
319
		mul_Xsig_Xsig(&accumulator, &argSqrd);
320
		negate_Xsig(&accumulator);
321

322
		polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
323
				N_COEFF_P - 1);
324

325
		shr_Xsig(&accumulator, 2);	/* Divide by four */
326
		accumulator.msw |= 0x80000000;	/* Add 1.0 */
327

328
		mul64_Xsig(&accumulator, &fixed_arg);
329
		mul64_Xsig(&accumulator, &fixed_arg);
330
		mul64_Xsig(&accumulator, &fixed_arg);
331

332
		/* Divide by four, FPU_REG compatible, etc */
333
		exponent = 3 * exponent;
334

335
		/* The minimum exponent difference is 3 */
336
		shr_Xsig(&accumulator, exp2 - exponent);
337

338
		negate_Xsig(&accumulator);
339
		XSIG_LL(accumulator) += fixed_arg;
340

341
		/* The basic computation is complete. Now fix the answer to
342
		   compensate for the error due to the approximation used for
343
		   pi/2
344
		 */
345

346
		/* This has an exponent of -65 */
347
		XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
348
		fix_up.lsw = 0;
349

350
		/* The fix-up needs to be improved for larger args */
351
		if (argSqrd.msw & 0xffc00000) {
352
			/* Get about 32 bit precision in these: */
353
			fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
354
			fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
355
		}
356

357
		exp2 += norm_Xsig(&accumulator);
358
		shr_Xsig(&accumulator, 1);	/* Prevent overflow */
359
		exp2++;
360
		shr_Xsig(&fix_up, 65 + exp2);
361

362
		add_Xsig_Xsig(&accumulator, &fix_up);
363

364
		echange = round_Xsig(&accumulator);
365

366
		setexponentpos(&result, exp2 + echange);
367
		significand(&result) = XSIG_LL(accumulator);
368
	}
369

370
	FPU_copy_to_reg0(&result, TAG_Valid);
371

372
#ifdef PARANOID
373
	if ((exponent(&result) >= 0)
374
	    && (significand(&result) > 0x8000000000000000LL)) {
375
		EXCEPTION(EX_INTERNAL | 0x151);
376
	}
377
#endif /* PARANOID */
378

379
}
380

381
Product

Resources

Company