Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/arch/x86/math-emu/poly_sin.c
26424 views
1
// SPDX-License-Identifier: GPL-2.0
2
/*---------------------------------------------------------------------------+
3
| poly_sin.c |
4
| |
5
| Computation of an approximation of the sin function and the cosine |
6
| function by a polynomial. |
7
| |
8
| Copyright (C) 1992,1993,1994,1997,1999 |
9
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
10
| E-mail [email protected] |
11
| |
12
| |
13
+---------------------------------------------------------------------------*/
14
15
#include "exception.h"
16
#include "reg_constant.h"
17
#include "fpu_emu.h"
18
#include "fpu_system.h"
19
#include "control_w.h"
20
#include "poly.h"
21
22
#define N_COEFF_P 4
23
#define N_COEFF_N 4
24
25
static const unsigned long long pos_terms_l[N_COEFF_P] = {
26
0xaaaaaaaaaaaaaaabLL,
27
0x00d00d00d00cf906LL,
28
0x000006b99159a8bbLL,
29
0x000000000d7392e6LL
30
};
31
32
static const unsigned long long neg_terms_l[N_COEFF_N] = {
33
0x2222222222222167LL,
34
0x0002e3bc74aab624LL,
35
0x0000000b09229062LL,
36
0x00000000000c7973LL
37
};
38
39
#define N_COEFF_PH 4
40
#define N_COEFF_NH 4
41
static const unsigned long long pos_terms_h[N_COEFF_PH] = {
42
0x0000000000000000LL,
43
0x05b05b05b05b0406LL,
44
0x000049f93edd91a9LL,
45
0x00000000c9c9ed62LL
46
};
47
48
static const unsigned long long neg_terms_h[N_COEFF_NH] = {
49
0xaaaaaaaaaaaaaa98LL,
50
0x001a01a01a019064LL,
51
0x0000008f76c68a77LL,
52
0x0000000000d58f5eLL
53
};
54
55
/*--- poly_sine() -----------------------------------------------------------+
56
| |
57
+---------------------------------------------------------------------------*/
58
void poly_sine(FPU_REG *st0_ptr)
59
{
60
int exponent, echange;
61
Xsig accumulator, argSqrd, argTo4;
62
unsigned long fix_up, adj;
63
unsigned long long fixed_arg;
64
FPU_REG result;
65
66
exponent = exponent(st0_ptr);
67
68
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
69
70
/* Split into two ranges, for arguments below and above 1.0 */
71
/* The boundary between upper and lower is approx 0.88309101259 */
72
if ((exponent < -1)
73
|| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
74
/* The argument is <= 0.88309101259 */
75
76
argSqrd.msw = st0_ptr->sigh;
77
argSqrd.midw = st0_ptr->sigl;
78
argSqrd.lsw = 0;
79
mul64_Xsig(&argSqrd, &significand(st0_ptr));
80
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
81
argTo4.msw = argSqrd.msw;
82
argTo4.midw = argSqrd.midw;
83
argTo4.lsw = argSqrd.lsw;
84
mul_Xsig_Xsig(&argTo4, &argTo4);
85
86
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
87
N_COEFF_N - 1);
88
mul_Xsig_Xsig(&accumulator, &argSqrd);
89
negate_Xsig(&accumulator);
90
91
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
92
N_COEFF_P - 1);
93
94
shr_Xsig(&accumulator, 2); /* Divide by four */
95
accumulator.msw |= 0x80000000; /* Add 1.0 */
96
97
mul64_Xsig(&accumulator, &significand(st0_ptr));
98
mul64_Xsig(&accumulator, &significand(st0_ptr));
99
mul64_Xsig(&accumulator, &significand(st0_ptr));
100
101
/* Divide by four, FPU_REG compatible, etc */
102
exponent = 3 * exponent;
103
104
/* The minimum exponent difference is 3 */
105
shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
106
107
negate_Xsig(&accumulator);
108
XSIG_LL(accumulator) += significand(st0_ptr);
109
110
echange = round_Xsig(&accumulator);
111
112
setexponentpos(&result, exponent(st0_ptr) + echange);
113
} else {
114
/* The argument is > 0.88309101259 */
115
/* We use sin(st(0)) = cos(pi/2-st(0)) */
116
117
fixed_arg = significand(st0_ptr);
118
119
if (exponent == 0) {
120
/* The argument is >= 1.0 */
121
122
/* Put the binary point at the left. */
123
fixed_arg <<= 1;
124
}
125
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
126
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
127
/* There is a special case which arises due to rounding, to fix here. */
128
if (fixed_arg == 0xffffffffffffffffLL)
129
fixed_arg = 0;
130
131
XSIG_LL(argSqrd) = fixed_arg;
132
argSqrd.lsw = 0;
133
mul64_Xsig(&argSqrd, &fixed_arg);
134
135
XSIG_LL(argTo4) = XSIG_LL(argSqrd);
136
argTo4.lsw = argSqrd.lsw;
137
mul_Xsig_Xsig(&argTo4, &argTo4);
138
139
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
140
N_COEFF_NH - 1);
141
mul_Xsig_Xsig(&accumulator, &argSqrd);
142
negate_Xsig(&accumulator);
143
144
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
145
N_COEFF_PH - 1);
146
negate_Xsig(&accumulator);
147
148
mul64_Xsig(&accumulator, &fixed_arg);
149
mul64_Xsig(&accumulator, &fixed_arg);
150
151
shr_Xsig(&accumulator, 3);
152
negate_Xsig(&accumulator);
153
154
add_Xsig_Xsig(&accumulator, &argSqrd);
155
156
shr_Xsig(&accumulator, 1);
157
158
accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
159
negate_Xsig(&accumulator);
160
161
/* The basic computation is complete. Now fix the answer to
162
compensate for the error due to the approximation used for
163
pi/2
164
*/
165
166
/* This has an exponent of -65 */
167
fix_up = 0x898cc517;
168
/* The fix-up needs to be improved for larger args */
169
if (argSqrd.msw & 0xffc00000) {
170
/* Get about 32 bit precision in these: */
171
fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
172
}
173
fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
174
175
adj = accumulator.lsw; /* temp save */
176
accumulator.lsw -= fix_up;
177
if (accumulator.lsw > adj)
178
XSIG_LL(accumulator)--;
179
180
echange = round_Xsig(&accumulator);
181
182
setexponentpos(&result, echange - 1);
183
}
184
185
significand(&result) = XSIG_LL(accumulator);
186
setsign(&result, getsign(st0_ptr));
187
FPU_copy_to_reg0(&result, TAG_Valid);
188
189
#ifdef PARANOID
190
if ((exponent(&result) >= 0)
191
&& (significand(&result) > 0x8000000000000000LL)) {
192
EXCEPTION(EX_INTERNAL | 0x150);
193
}
194
#endif /* PARANOID */
195
196
}
197
198
/*--- poly_cos() ------------------------------------------------------------+
199
| |
200
+---------------------------------------------------------------------------*/
201
void poly_cos(FPU_REG *st0_ptr)
202
{
203
FPU_REG result;
204
long int exponent, exp2, echange;
205
Xsig accumulator, argSqrd, fix_up, argTo4;
206
unsigned long long fixed_arg;
207
208
#ifdef PARANOID
209
if ((exponent(st0_ptr) > 0)
210
|| ((exponent(st0_ptr) == 0)
211
&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
212
EXCEPTION(EX_Invalid);
213
FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
214
return;
215
}
216
#endif /* PARANOID */
217
218
exponent = exponent(st0_ptr);
219
220
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
221
222
if ((exponent < -1)
223
|| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
224
/* arg is < 0.687705 */
225
226
argSqrd.msw = st0_ptr->sigh;
227
argSqrd.midw = st0_ptr->sigl;
228
argSqrd.lsw = 0;
229
mul64_Xsig(&argSqrd, &significand(st0_ptr));
230
231
if (exponent < -1) {
232
/* shift the argument right by the required places */
233
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
234
}
235
236
argTo4.msw = argSqrd.msw;
237
argTo4.midw = argSqrd.midw;
238
argTo4.lsw = argSqrd.lsw;
239
mul_Xsig_Xsig(&argTo4, &argTo4);
240
241
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
242
N_COEFF_NH - 1);
243
mul_Xsig_Xsig(&accumulator, &argSqrd);
244
negate_Xsig(&accumulator);
245
246
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
247
N_COEFF_PH - 1);
248
negate_Xsig(&accumulator);
249
250
mul64_Xsig(&accumulator, &significand(st0_ptr));
251
mul64_Xsig(&accumulator, &significand(st0_ptr));
252
shr_Xsig(&accumulator, -2 * (1 + exponent));
253
254
shr_Xsig(&accumulator, 3);
255
negate_Xsig(&accumulator);
256
257
add_Xsig_Xsig(&accumulator, &argSqrd);
258
259
shr_Xsig(&accumulator, 1);
260
261
/* It doesn't matter if accumulator is all zero here, the
262
following code will work ok */
263
negate_Xsig(&accumulator);
264
265
if (accumulator.lsw & 0x80000000)
266
XSIG_LL(accumulator)++;
267
if (accumulator.msw == 0) {
268
/* The result is 1.0 */
269
FPU_copy_to_reg0(&CONST_1, TAG_Valid);
270
return;
271
} else {
272
significand(&result) = XSIG_LL(accumulator);
273
274
/* will be a valid positive nr with expon = -1 */
275
setexponentpos(&result, -1);
276
}
277
} else {
278
fixed_arg = significand(st0_ptr);
279
280
if (exponent == 0) {
281
/* The argument is >= 1.0 */
282
283
/* Put the binary point at the left. */
284
fixed_arg <<= 1;
285
}
286
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
287
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
288
/* There is a special case which arises due to rounding, to fix here. */
289
if (fixed_arg == 0xffffffffffffffffLL)
290
fixed_arg = 0;
291
292
exponent = -1;
293
exp2 = -1;
294
295
/* A shift is needed here only for a narrow range of arguments,
296
i.e. for fixed_arg approx 2^-32, but we pick up more... */
297
if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
298
fixed_arg <<= 16;
299
exponent -= 16;
300
exp2 -= 16;
301
}
302
303
XSIG_LL(argSqrd) = fixed_arg;
304
argSqrd.lsw = 0;
305
mul64_Xsig(&argSqrd, &fixed_arg);
306
307
if (exponent < -1) {
308
/* shift the argument right by the required places */
309
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
310
}
311
312
argTo4.msw = argSqrd.msw;
313
argTo4.midw = argSqrd.midw;
314
argTo4.lsw = argSqrd.lsw;
315
mul_Xsig_Xsig(&argTo4, &argTo4);
316
317
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
318
N_COEFF_N - 1);
319
mul_Xsig_Xsig(&accumulator, &argSqrd);
320
negate_Xsig(&accumulator);
321
322
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
323
N_COEFF_P - 1);
324
325
shr_Xsig(&accumulator, 2); /* Divide by four */
326
accumulator.msw |= 0x80000000; /* Add 1.0 */
327
328
mul64_Xsig(&accumulator, &fixed_arg);
329
mul64_Xsig(&accumulator, &fixed_arg);
330
mul64_Xsig(&accumulator, &fixed_arg);
331
332
/* Divide by four, FPU_REG compatible, etc */
333
exponent = 3 * exponent;
334
335
/* The minimum exponent difference is 3 */
336
shr_Xsig(&accumulator, exp2 - exponent);
337
338
negate_Xsig(&accumulator);
339
XSIG_LL(accumulator) += fixed_arg;
340
341
/* The basic computation is complete. Now fix the answer to
342
compensate for the error due to the approximation used for
343
pi/2
344
*/
345
346
/* This has an exponent of -65 */
347
XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
348
fix_up.lsw = 0;
349
350
/* The fix-up needs to be improved for larger args */
351
if (argSqrd.msw & 0xffc00000) {
352
/* Get about 32 bit precision in these: */
353
fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
354
fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
355
}
356
357
exp2 += norm_Xsig(&accumulator);
358
shr_Xsig(&accumulator, 1); /* Prevent overflow */
359
exp2++;
360
shr_Xsig(&fix_up, 65 + exp2);
361
362
add_Xsig_Xsig(&accumulator, &fix_up);
363
364
echange = round_Xsig(&accumulator);
365
366
setexponentpos(&result, exp2 + echange);
367
significand(&result) = XSIG_LL(accumulator);
368
}
369
370
FPU_copy_to_reg0(&result, TAG_Valid);
371
372
#ifdef PARANOID
373
if ((exponent(&result) >= 0)
374
&& (significand(&result) > 0x8000000000000000LL)) {
375
EXCEPTION(EX_INTERNAL | 0x151);
376
}
377
#endif /* PARANOID */
378
379
}
380
381