Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/arch/x86/math-emu/poly_sin.c
10820 views
1
/*---------------------------------------------------------------------------+
2
| poly_sin.c |
3
| |
4
| Computation of an approximation of the sin function and the cosine |
5
| function by a polynomial. |
6
| |
7
| Copyright (C) 1992,1993,1994,1997,1999 |
8
| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia |
9
| E-mail [email protected] |
10
| |
11
| |
12
+---------------------------------------------------------------------------*/
13
14
#include "exception.h"
15
#include "reg_constant.h"
16
#include "fpu_emu.h"
17
#include "fpu_system.h"
18
#include "control_w.h"
19
#include "poly.h"
20
21
#define N_COEFF_P 4
22
#define N_COEFF_N 4
23
24
static const unsigned long long pos_terms_l[N_COEFF_P] = {
25
0xaaaaaaaaaaaaaaabLL,
26
0x00d00d00d00cf906LL,
27
0x000006b99159a8bbLL,
28
0x000000000d7392e6LL
29
};
30
31
static const unsigned long long neg_terms_l[N_COEFF_N] = {
32
0x2222222222222167LL,
33
0x0002e3bc74aab624LL,
34
0x0000000b09229062LL,
35
0x00000000000c7973LL
36
};
37
38
#define N_COEFF_PH 4
39
#define N_COEFF_NH 4
40
static const unsigned long long pos_terms_h[N_COEFF_PH] = {
41
0x0000000000000000LL,
42
0x05b05b05b05b0406LL,
43
0x000049f93edd91a9LL,
44
0x00000000c9c9ed62LL
45
};
46
47
static const unsigned long long neg_terms_h[N_COEFF_NH] = {
48
0xaaaaaaaaaaaaaa98LL,
49
0x001a01a01a019064LL,
50
0x0000008f76c68a77LL,
51
0x0000000000d58f5eLL
52
};
53
54
/*--- poly_sine() -----------------------------------------------------------+
55
| |
56
+---------------------------------------------------------------------------*/
57
void poly_sine(FPU_REG *st0_ptr)
58
{
59
int exponent, echange;
60
Xsig accumulator, argSqrd, argTo4;
61
unsigned long fix_up, adj;
62
unsigned long long fixed_arg;
63
FPU_REG result;
64
65
exponent = exponent(st0_ptr);
66
67
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
68
69
/* Split into two ranges, for arguments below and above 1.0 */
70
/* The boundary between upper and lower is approx 0.88309101259 */
71
if ((exponent < -1)
72
|| ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa))) {
73
/* The argument is <= 0.88309101259 */
74
75
argSqrd.msw = st0_ptr->sigh;
76
argSqrd.midw = st0_ptr->sigl;
77
argSqrd.lsw = 0;
78
mul64_Xsig(&argSqrd, &significand(st0_ptr));
79
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
80
argTo4.msw = argSqrd.msw;
81
argTo4.midw = argSqrd.midw;
82
argTo4.lsw = argSqrd.lsw;
83
mul_Xsig_Xsig(&argTo4, &argTo4);
84
85
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
86
N_COEFF_N - 1);
87
mul_Xsig_Xsig(&accumulator, &argSqrd);
88
negate_Xsig(&accumulator);
89
90
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
91
N_COEFF_P - 1);
92
93
shr_Xsig(&accumulator, 2); /* Divide by four */
94
accumulator.msw |= 0x80000000; /* Add 1.0 */
95
96
mul64_Xsig(&accumulator, &significand(st0_ptr));
97
mul64_Xsig(&accumulator, &significand(st0_ptr));
98
mul64_Xsig(&accumulator, &significand(st0_ptr));
99
100
/* Divide by four, FPU_REG compatible, etc */
101
exponent = 3 * exponent;
102
103
/* The minimum exponent difference is 3 */
104
shr_Xsig(&accumulator, exponent(st0_ptr) - exponent);
105
106
negate_Xsig(&accumulator);
107
XSIG_LL(accumulator) += significand(st0_ptr);
108
109
echange = round_Xsig(&accumulator);
110
111
setexponentpos(&result, exponent(st0_ptr) + echange);
112
} else {
113
/* The argument is > 0.88309101259 */
114
/* We use sin(st(0)) = cos(pi/2-st(0)) */
115
116
fixed_arg = significand(st0_ptr);
117
118
if (exponent == 0) {
119
/* The argument is >= 1.0 */
120
121
/* Put the binary point at the left. */
122
fixed_arg <<= 1;
123
}
124
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
125
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
126
/* There is a special case which arises due to rounding, to fix here. */
127
if (fixed_arg == 0xffffffffffffffffLL)
128
fixed_arg = 0;
129
130
XSIG_LL(argSqrd) = fixed_arg;
131
argSqrd.lsw = 0;
132
mul64_Xsig(&argSqrd, &fixed_arg);
133
134
XSIG_LL(argTo4) = XSIG_LL(argSqrd);
135
argTo4.lsw = argSqrd.lsw;
136
mul_Xsig_Xsig(&argTo4, &argTo4);
137
138
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
139
N_COEFF_NH - 1);
140
mul_Xsig_Xsig(&accumulator, &argSqrd);
141
negate_Xsig(&accumulator);
142
143
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
144
N_COEFF_PH - 1);
145
negate_Xsig(&accumulator);
146
147
mul64_Xsig(&accumulator, &fixed_arg);
148
mul64_Xsig(&accumulator, &fixed_arg);
149
150
shr_Xsig(&accumulator, 3);
151
negate_Xsig(&accumulator);
152
153
add_Xsig_Xsig(&accumulator, &argSqrd);
154
155
shr_Xsig(&accumulator, 1);
156
157
accumulator.lsw |= 1; /* A zero accumulator here would cause problems */
158
negate_Xsig(&accumulator);
159
160
/* The basic computation is complete. Now fix the answer to
161
compensate for the error due to the approximation used for
162
pi/2
163
*/
164
165
/* This has an exponent of -65 */
166
fix_up = 0x898cc517;
167
/* The fix-up needs to be improved for larger args */
168
if (argSqrd.msw & 0xffc00000) {
169
/* Get about 32 bit precision in these: */
170
fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6;
171
}
172
fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg));
173
174
adj = accumulator.lsw; /* temp save */
175
accumulator.lsw -= fix_up;
176
if (accumulator.lsw > adj)
177
XSIG_LL(accumulator)--;
178
179
echange = round_Xsig(&accumulator);
180
181
setexponentpos(&result, echange - 1);
182
}
183
184
significand(&result) = XSIG_LL(accumulator);
185
setsign(&result, getsign(st0_ptr));
186
FPU_copy_to_reg0(&result, TAG_Valid);
187
188
#ifdef PARANOID
189
if ((exponent(&result) >= 0)
190
&& (significand(&result) > 0x8000000000000000LL)) {
191
EXCEPTION(EX_INTERNAL | 0x150);
192
}
193
#endif /* PARANOID */
194
195
}
196
197
/*--- poly_cos() ------------------------------------------------------------+
198
| |
199
+---------------------------------------------------------------------------*/
200
void poly_cos(FPU_REG *st0_ptr)
201
{
202
FPU_REG result;
203
long int exponent, exp2, echange;
204
Xsig accumulator, argSqrd, fix_up, argTo4;
205
unsigned long long fixed_arg;
206
207
#ifdef PARANOID
208
if ((exponent(st0_ptr) > 0)
209
|| ((exponent(st0_ptr) == 0)
210
&& (significand(st0_ptr) > 0xc90fdaa22168c234LL))) {
211
EXCEPTION(EX_Invalid);
212
FPU_copy_to_reg0(&CONST_QNaN, TAG_Special);
213
return;
214
}
215
#endif /* PARANOID */
216
217
exponent = exponent(st0_ptr);
218
219
accumulator.lsw = accumulator.midw = accumulator.msw = 0;
220
221
if ((exponent < -1)
222
|| ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54))) {
223
/* arg is < 0.687705 */
224
225
argSqrd.msw = st0_ptr->sigh;
226
argSqrd.midw = st0_ptr->sigl;
227
argSqrd.lsw = 0;
228
mul64_Xsig(&argSqrd, &significand(st0_ptr));
229
230
if (exponent < -1) {
231
/* shift the argument right by the required places */
232
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
233
}
234
235
argTo4.msw = argSqrd.msw;
236
argTo4.midw = argSqrd.midw;
237
argTo4.lsw = argSqrd.lsw;
238
mul_Xsig_Xsig(&argTo4, &argTo4);
239
240
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h,
241
N_COEFF_NH - 1);
242
mul_Xsig_Xsig(&accumulator, &argSqrd);
243
negate_Xsig(&accumulator);
244
245
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h,
246
N_COEFF_PH - 1);
247
negate_Xsig(&accumulator);
248
249
mul64_Xsig(&accumulator, &significand(st0_ptr));
250
mul64_Xsig(&accumulator, &significand(st0_ptr));
251
shr_Xsig(&accumulator, -2 * (1 + exponent));
252
253
shr_Xsig(&accumulator, 3);
254
negate_Xsig(&accumulator);
255
256
add_Xsig_Xsig(&accumulator, &argSqrd);
257
258
shr_Xsig(&accumulator, 1);
259
260
/* It doesn't matter if accumulator is all zero here, the
261
following code will work ok */
262
negate_Xsig(&accumulator);
263
264
if (accumulator.lsw & 0x80000000)
265
XSIG_LL(accumulator)++;
266
if (accumulator.msw == 0) {
267
/* The result is 1.0 */
268
FPU_copy_to_reg0(&CONST_1, TAG_Valid);
269
return;
270
} else {
271
significand(&result) = XSIG_LL(accumulator);
272
273
/* will be a valid positive nr with expon = -1 */
274
setexponentpos(&result, -1);
275
}
276
} else {
277
fixed_arg = significand(st0_ptr);
278
279
if (exponent == 0) {
280
/* The argument is >= 1.0 */
281
282
/* Put the binary point at the left. */
283
fixed_arg <<= 1;
284
}
285
/* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */
286
fixed_arg = 0x921fb54442d18469LL - fixed_arg;
287
/* There is a special case which arises due to rounding, to fix here. */
288
if (fixed_arg == 0xffffffffffffffffLL)
289
fixed_arg = 0;
290
291
exponent = -1;
292
exp2 = -1;
293
294
/* A shift is needed here only for a narrow range of arguments,
295
i.e. for fixed_arg approx 2^-32, but we pick up more... */
296
if (!(LL_MSW(fixed_arg) & 0xffff0000)) {
297
fixed_arg <<= 16;
298
exponent -= 16;
299
exp2 -= 16;
300
}
301
302
XSIG_LL(argSqrd) = fixed_arg;
303
argSqrd.lsw = 0;
304
mul64_Xsig(&argSqrd, &fixed_arg);
305
306
if (exponent < -1) {
307
/* shift the argument right by the required places */
308
shr_Xsig(&argSqrd, 2 * (-1 - exponent));
309
}
310
311
argTo4.msw = argSqrd.msw;
312
argTo4.midw = argSqrd.midw;
313
argTo4.lsw = argSqrd.lsw;
314
mul_Xsig_Xsig(&argTo4, &argTo4);
315
316
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l,
317
N_COEFF_N - 1);
318
mul_Xsig_Xsig(&accumulator, &argSqrd);
319
negate_Xsig(&accumulator);
320
321
polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l,
322
N_COEFF_P - 1);
323
324
shr_Xsig(&accumulator, 2); /* Divide by four */
325
accumulator.msw |= 0x80000000; /* Add 1.0 */
326
327
mul64_Xsig(&accumulator, &fixed_arg);
328
mul64_Xsig(&accumulator, &fixed_arg);
329
mul64_Xsig(&accumulator, &fixed_arg);
330
331
/* Divide by four, FPU_REG compatible, etc */
332
exponent = 3 * exponent;
333
334
/* The minimum exponent difference is 3 */
335
shr_Xsig(&accumulator, exp2 - exponent);
336
337
negate_Xsig(&accumulator);
338
XSIG_LL(accumulator) += fixed_arg;
339
340
/* The basic computation is complete. Now fix the answer to
341
compensate for the error due to the approximation used for
342
pi/2
343
*/
344
345
/* This has an exponent of -65 */
346
XSIG_LL(fix_up) = 0x898cc51701b839a2ll;
347
fix_up.lsw = 0;
348
349
/* The fix-up needs to be improved for larger args */
350
if (argSqrd.msw & 0xffc00000) {
351
/* Get about 32 bit precision in these: */
352
fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2;
353
fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24;
354
}
355
356
exp2 += norm_Xsig(&accumulator);
357
shr_Xsig(&accumulator, 1); /* Prevent overflow */
358
exp2++;
359
shr_Xsig(&fix_up, 65 + exp2);
360
361
add_Xsig_Xsig(&accumulator, &fix_up);
362
363
echange = round_Xsig(&accumulator);
364
365
setexponentpos(&result, exp2 + echange);
366
significand(&result) = XSIG_LL(accumulator);
367
}
368
369
FPU_copy_to_reg0(&result, TAG_Valid);
370
371
#ifdef PARANOID
372
if ((exponent(&result) >= 0)
373
&& (significand(&result) > 0x8000000000000000LL)) {
374
EXCEPTION(EX_INTERNAL | 0x151);
375
}
376
#endif /* PARANOID */
377
378
}
379
380