Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
torvalds
GitHub Repository: torvalds/linux
Path: blob/master/include/math-emu/op-2.h
26282 views
1
/* Software floating-point emulation.
2
Basic two-word fraction declaration and manipulation.
3
Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4
This file is part of the GNU C Library.
5
Contributed by Richard Henderson ([email protected]),
6
Jakub Jelinek ([email protected]),
7
David S. Miller ([email protected]) and
8
Peter Maydell ([email protected]).
9
10
The GNU C Library is free software; you can redistribute it and/or
11
modify it under the terms of the GNU Library General Public License as
12
published by the Free Software Foundation; either version 2 of the
13
License, or (at your option) any later version.
14
15
The GNU C Library is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
Library General Public License for more details.
19
20
You should have received a copy of the GNU Library General Public
21
License along with the GNU C Library; see the file COPYING.LIB. If
22
not, write to the Free Software Foundation, Inc.,
23
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24
25
#ifndef __MATH_EMU_OP_2_H__
26
#define __MATH_EMU_OP_2_H__
27
28
#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0 = 0, X##_f1 = 0
29
#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
30
#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
31
#define _FP_FRAC_HIGH_2(X) (X##_f1)
32
#define _FP_FRAC_LOW_2(X) (X##_f0)
33
#define _FP_FRAC_WORD_2(X,w) (X##_f##w)
34
#define _FP_FRAC_SLL_2(X, N) ( \
35
(void) (((N) < _FP_W_TYPE_SIZE) \
36
? ({ \
37
if (__builtin_constant_p(N) && (N) == 1) { \
38
X##_f1 = X##_f1 + X##_f1 + \
39
(((_FP_WS_TYPE) (X##_f0)) < 0); \
40
X##_f0 += X##_f0; \
41
} else { \
42
X##_f1 = X##_f1 << (N) | X##_f0 >> \
43
(_FP_W_TYPE_SIZE - (N)); \
44
X##_f0 <<= (N); \
45
} \
46
0; \
47
}) \
48
: ({ \
49
X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
50
X##_f0 = 0; \
51
})))
52
53
54
#define _FP_FRAC_SRL_2(X, N) ( \
55
(void) (((N) < _FP_W_TYPE_SIZE) \
56
? ({ \
57
X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
58
X##_f1 >>= (N); \
59
}) \
60
: ({ \
61
X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
62
X##_f1 = 0; \
63
})))
64
65
66
/* Right shift with sticky-lsb. */
67
#define _FP_FRAC_SRS_2(X, N, sz) ( \
68
(void) (((N) < _FP_W_TYPE_SIZE) \
69
? ({ \
70
X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) \
71
| (__builtin_constant_p(N) && (N) == 1 \
72
? X##_f0 & 1 \
73
: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
74
X##_f1 >>= (N); \
75
}) \
76
: ({ \
77
X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
78
| ((((N) == _FP_W_TYPE_SIZE \
79
? 0 \
80
: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
81
| X##_f0) != 0)); \
82
X##_f1 = 0; \
83
})))
84
85
#define _FP_FRAC_ADDI_2(X,I) \
86
__FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
87
88
#define _FP_FRAC_ADD_2(R,X,Y) \
89
__FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
90
91
#define _FP_FRAC_SUB_2(R,X,Y) \
92
__FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
93
94
#define _FP_FRAC_DEC_2(X,Y) \
95
__FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
96
97
#define _FP_FRAC_CLZ_2(R,X) \
98
do { \
99
if (X##_f1) \
100
__FP_CLZ(R,X##_f1); \
101
else \
102
{ \
103
__FP_CLZ(R,X##_f0); \
104
R += _FP_W_TYPE_SIZE; \
105
} \
106
} while(0)
107
108
/* Predicates */
109
#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
110
#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
111
#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
112
#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
113
#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
114
#define _FP_FRAC_GT_2(X, Y) \
115
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
116
#define _FP_FRAC_GE_2(X, Y) \
117
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
118
119
#define _FP_ZEROFRAC_2 0, 0
120
#define _FP_MINFRAC_2 0, 1
121
#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
122
123
/*
124
* Internals
125
*/
126
127
#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
128
129
#define __FP_CLZ_2(R, xh, xl) \
130
do { \
131
if (xh) \
132
__FP_CLZ(R,xh); \
133
else \
134
{ \
135
__FP_CLZ(R,xl); \
136
R += _FP_W_TYPE_SIZE; \
137
} \
138
} while(0)
139
140
#if 0
141
142
#ifndef __FP_FRAC_ADDI_2
143
#define __FP_FRAC_ADDI_2(xh, xl, i) \
144
(xh += ((xl += i) < i))
145
#endif
146
#ifndef __FP_FRAC_ADD_2
147
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
148
(rh = xh + yh + ((rl = xl + yl) < xl))
149
#endif
150
#ifndef __FP_FRAC_SUB_2
151
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
152
(rh = xh - yh - ((rl = xl - yl) > xl))
153
#endif
154
#ifndef __FP_FRAC_DEC_2
155
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
156
do { \
157
UWtype _t = xl; \
158
xh -= yh + ((xl -= yl) > _t); \
159
} while (0)
160
#endif
161
162
#else
163
164
#undef __FP_FRAC_ADDI_2
165
#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
166
#undef __FP_FRAC_ADD_2
167
#define __FP_FRAC_ADD_2 add_ssaaaa
168
#undef __FP_FRAC_SUB_2
169
#define __FP_FRAC_SUB_2 sub_ddmmss
170
#undef __FP_FRAC_DEC_2
171
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
172
173
#endif
174
175
/*
176
* Unpack the raw bits of a native fp value. Do not classify or
177
* normalize the data.
178
*/
179
180
#define _FP_UNPACK_RAW_2(fs, X, val) \
181
do { \
182
union _FP_UNION_##fs _flo; _flo.flt = (val); \
183
\
184
X##_f0 = _flo.bits.frac0; \
185
X##_f1 = _flo.bits.frac1; \
186
X##_e = _flo.bits.exp; \
187
X##_s = _flo.bits.sign; \
188
} while (0)
189
190
#define _FP_UNPACK_RAW_2_P(fs, X, val) \
191
do { \
192
union _FP_UNION_##fs *_flo = \
193
(union _FP_UNION_##fs *)(val); \
194
\
195
X##_f0 = _flo->bits.frac0; \
196
X##_f1 = _flo->bits.frac1; \
197
X##_e = _flo->bits.exp; \
198
X##_s = _flo->bits.sign; \
199
} while (0)
200
201
202
/*
203
* Repack the raw bits of a native fp value.
204
*/
205
206
#define _FP_PACK_RAW_2(fs, val, X) \
207
do { \
208
union _FP_UNION_##fs _flo; \
209
\
210
_flo.bits.frac0 = X##_f0; \
211
_flo.bits.frac1 = X##_f1; \
212
_flo.bits.exp = X##_e; \
213
_flo.bits.sign = X##_s; \
214
\
215
(val) = _flo.flt; \
216
} while (0)
217
218
#define _FP_PACK_RAW_2_P(fs, val, X) \
219
do { \
220
union _FP_UNION_##fs *_flo = \
221
(union _FP_UNION_##fs *)(val); \
222
\
223
_flo->bits.frac0 = X##_f0; \
224
_flo->bits.frac1 = X##_f1; \
225
_flo->bits.exp = X##_e; \
226
_flo->bits.sign = X##_s; \
227
} while (0)
228
229
230
/*
231
* Multiplication algorithms:
232
*/
233
234
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
235
236
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
237
do { \
238
_FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
239
\
240
doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
241
doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
242
doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
243
doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
244
\
245
__FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
246
_FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
247
_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
248
_FP_FRAC_WORD_4(_z,1)); \
249
__FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
250
_FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
251
_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
252
_FP_FRAC_WORD_4(_z,1)); \
253
\
254
/* Normalize since we know where the msb of the multiplicands \
255
were (bit B), we know that the msb of the of the product is \
256
at either 2B or 2B-1. */ \
257
_FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
258
R##_f0 = _FP_FRAC_WORD_4(_z,0); \
259
R##_f1 = _FP_FRAC_WORD_4(_z,1); \
260
} while (0)
261
262
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
263
Do only 3 multiplications instead of four. This one is for machines
264
where multiplication is much more expensive than subtraction. */
265
266
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
267
do { \
268
_FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
269
_FP_W_TYPE _d; \
270
int _c1, _c2; \
271
\
272
_b_f0 = X##_f0 + X##_f1; \
273
_c1 = _b_f0 < X##_f0; \
274
_b_f1 = Y##_f0 + Y##_f1; \
275
_c2 = _b_f1 < Y##_f0; \
276
doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
277
doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
278
doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
279
\
280
_b_f0 &= -_c2; \
281
_b_f1 &= -_c1; \
282
__FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
283
_FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
284
0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
285
__FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
286
_b_f0); \
287
__FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
288
_b_f1); \
289
__FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
290
_FP_FRAC_WORD_4(_z,1), \
291
0, _d, _FP_FRAC_WORD_4(_z,0)); \
292
__FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
293
_FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
294
__FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
295
_c_f1, _c_f0, \
296
_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
297
\
298
/* Normalize since we know where the msb of the multiplicands \
299
were (bit B), we know that the msb of the of the product is \
300
at either 2B or 2B-1. */ \
301
_FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
302
R##_f0 = _FP_FRAC_WORD_4(_z,0); \
303
R##_f1 = _FP_FRAC_WORD_4(_z,1); \
304
} while (0)
305
306
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
307
do { \
308
_FP_FRAC_DECL_4(_z); \
309
_FP_W_TYPE _x[2], _y[2]; \
310
_x[0] = X##_f0; _x[1] = X##_f1; \
311
_y[0] = Y##_f0; _y[1] = Y##_f1; \
312
\
313
mpn_mul_n(_z_f, _x, _y, 2); \
314
\
315
/* Normalize since we know where the msb of the multiplicands \
316
were (bit B), we know that the msb of the of the product is \
317
at either 2B or 2B-1. */ \
318
_FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
319
R##_f0 = _z_f[0]; \
320
R##_f1 = _z_f[1]; \
321
} while (0)
322
323
/* Do at most 120x120=240 bits multiplication using double floating
324
point multiplication. This is useful if floating point
325
multiplication has much bigger throughput than integer multiply.
326
It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
327
between 106 and 120 only.
328
Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
329
SETFETZ is a macro which will disable all FPU exceptions and set rounding
330
towards zero, RESETFE should optionally reset it back. */
331
332
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
333
do { \
334
static const double _const[] = { \
335
/* 2^-24 */ 5.9604644775390625e-08, \
336
/* 2^-48 */ 3.5527136788005009e-15, \
337
/* 2^-72 */ 2.1175823681357508e-22, \
338
/* 2^-96 */ 1.2621774483536189e-29, \
339
/* 2^28 */ 2.68435456e+08, \
340
/* 2^4 */ 1.600000e+01, \
341
/* 2^-20 */ 9.5367431640625e-07, \
342
/* 2^-44 */ 5.6843418860808015e-14, \
343
/* 2^-68 */ 3.3881317890172014e-21, \
344
/* 2^-92 */ 2.0194839173657902e-28, \
345
/* 2^-116 */ 1.2037062152420224e-35}; \
346
double _a240, _b240, _c240, _d240, _e240, _f240, \
347
_g240, _h240, _i240, _j240, _k240; \
348
union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
349
_p240, _q240, _r240, _s240; \
350
UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
351
\
352
if (wfracbits < 106 || wfracbits > 120) \
353
abort(); \
354
\
355
setfetz; \
356
\
357
_e240 = (double)(long)(X##_f0 & 0xffffff); \
358
_j240 = (double)(long)(Y##_f0 & 0xffffff); \
359
_d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
360
_i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
361
_c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
362
_h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
363
_b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
364
_g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
365
_a240 = (double)(long)(X##_f1 >> 32); \
366
_f240 = (double)(long)(Y##_f1 >> 32); \
367
_e240 *= _const[3]; \
368
_j240 *= _const[3]; \
369
_d240 *= _const[2]; \
370
_i240 *= _const[2]; \
371
_c240 *= _const[1]; \
372
_h240 *= _const[1]; \
373
_b240 *= _const[0]; \
374
_g240 *= _const[0]; \
375
_s240.d = _e240*_j240;\
376
_r240.d = _d240*_j240 + _e240*_i240;\
377
_q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
378
_p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
379
_o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
380
_n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
381
_m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
382
_l240.d = _a240*_g240 + _b240*_f240; \
383
_k240 = _a240*_f240; \
384
_r240.d += _s240.d; \
385
_q240.d += _r240.d; \
386
_p240.d += _q240.d; \
387
_o240.d += _p240.d; \
388
_n240.d += _o240.d; \
389
_m240.d += _n240.d; \
390
_l240.d += _m240.d; \
391
_k240 += _l240.d; \
392
_s240.d -= ((_const[10]+_s240.d)-_const[10]); \
393
_r240.d -= ((_const[9]+_r240.d)-_const[9]); \
394
_q240.d -= ((_const[8]+_q240.d)-_const[8]); \
395
_p240.d -= ((_const[7]+_p240.d)-_const[7]); \
396
_o240.d += _const[7]; \
397
_n240.d += _const[6]; \
398
_m240.d += _const[5]; \
399
_l240.d += _const[4]; \
400
if (_s240.d != 0.0) _y240 = 1; \
401
if (_r240.d != 0.0) _y240 = 1; \
402
if (_q240.d != 0.0) _y240 = 1; \
403
if (_p240.d != 0.0) _y240 = 1; \
404
_t240 = (DItype)_k240; \
405
_u240 = _l240.i; \
406
_v240 = _m240.i; \
407
_w240 = _n240.i; \
408
_x240 = _o240.i; \
409
R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
410
| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
411
R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
412
| ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
413
| ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
414
| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
415
| _y240; \
416
resetfe; \
417
} while (0)
418
419
/*
420
* Division algorithms:
421
*/
422
423
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
424
do { \
425
_FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
426
if (_FP_FRAC_GT_2(X, Y)) \
427
{ \
428
_n_f2 = X##_f1 >> 1; \
429
_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
430
_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
431
} \
432
else \
433
{ \
434
R##_e--; \
435
_n_f2 = X##_f1; \
436
_n_f1 = X##_f0; \
437
_n_f0 = 0; \
438
} \
439
\
440
/* Normalize, i.e. make the most significant bit of the \
441
denominator set. */ \
442
_FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
443
\
444
udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
445
umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
446
_r_f0 = _n_f0; \
447
if (_FP_FRAC_GT_2(_m, _r)) \
448
{ \
449
R##_f1--; \
450
_FP_FRAC_ADD_2(_r, Y, _r); \
451
if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
452
{ \
453
R##_f1--; \
454
_FP_FRAC_ADD_2(_r, Y, _r); \
455
} \
456
} \
457
_FP_FRAC_DEC_2(_r, _m); \
458
\
459
if (_r_f1 == Y##_f1) \
460
{ \
461
/* This is a special case, not an optimization \
462
(_r/Y##_f1 would not fit into UWtype). \
463
As _r is guaranteed to be < Y, R##_f0 can be either \
464
(UWtype)-1 or (UWtype)-2. But as we know what kind \
465
of bits it is (sticky, guard, round), we don't care. \
466
We also don't care what the reminder is, because the \
467
guard bit will be set anyway. -jj */ \
468
R##_f0 = -1; \
469
} \
470
else \
471
{ \
472
udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
473
umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
474
_r_f0 = 0; \
475
if (_FP_FRAC_GT_2(_m, _r)) \
476
{ \
477
R##_f0--; \
478
_FP_FRAC_ADD_2(_r, Y, _r); \
479
if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
480
{ \
481
R##_f0--; \
482
_FP_FRAC_ADD_2(_r, Y, _r); \
483
} \
484
} \
485
if (!_FP_FRAC_EQ_2(_r, _m)) \
486
R##_f0 |= _FP_WORK_STICKY; \
487
} \
488
} while (0)
489
490
491
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
492
do { \
493
_FP_W_TYPE _x[4], _y[2], _z[4]; \
494
_y[0] = Y##_f0; _y[1] = Y##_f1; \
495
_x[0] = _x[3] = 0; \
496
if (_FP_FRAC_GT_2(X, Y)) \
497
{ \
498
R##_e++; \
499
_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
500
X##_f1 >> (_FP_W_TYPE_SIZE - \
501
(_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
502
_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
503
} \
504
else \
505
{ \
506
_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
507
X##_f1 >> (_FP_W_TYPE_SIZE - \
508
(_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
509
_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
510
} \
511
\
512
(void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
513
R##_f1 = _z[1]; \
514
R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
515
} while (0)
516
517
518
/*
519
* Square root algorithms:
520
* We have just one right now, maybe Newton approximation
521
* should be added for those machines where division is fast.
522
*/
523
524
#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
525
do { \
526
while (q) \
527
{ \
528
T##_f1 = S##_f1 + q; \
529
if (T##_f1 <= X##_f1) \
530
{ \
531
S##_f1 = T##_f1 + q; \
532
X##_f1 -= T##_f1; \
533
R##_f1 += q; \
534
} \
535
_FP_FRAC_SLL_2(X, 1); \
536
q >>= 1; \
537
} \
538
q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
539
while (q != _FP_WORK_ROUND) \
540
{ \
541
T##_f0 = S##_f0 + q; \
542
T##_f1 = S##_f1; \
543
if (T##_f1 < X##_f1 || \
544
(T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
545
{ \
546
S##_f0 = T##_f0 + q; \
547
S##_f1 += (T##_f0 > S##_f0); \
548
_FP_FRAC_DEC_2(X, T); \
549
R##_f0 += q; \
550
} \
551
_FP_FRAC_SLL_2(X, 1); \
552
q >>= 1; \
553
} \
554
if (X##_f0 | X##_f1) \
555
{ \
556
if (S##_f1 < X##_f1 || \
557
(S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
558
R##_f0 |= _FP_WORK_ROUND; \
559
R##_f0 |= _FP_WORK_STICKY; \
560
} \
561
} while (0)
562
563
564
/*
565
* Assembly/disassembly for converting to/from integral types.
566
* No shifting or overflow handled here.
567
*/
568
569
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
570
(void) (((rsize) <= _FP_W_TYPE_SIZE) \
571
? ({ (r) = X##_f0; }) \
572
: ({ \
573
(r) = X##_f1; \
574
(r) <<= _FP_W_TYPE_SIZE; \
575
(r) += X##_f0; \
576
}))
577
578
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
579
do { \
580
X##_f0 = r; \
581
X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
582
} while (0)
583
584
/*
585
* Convert FP values between word sizes
586
*/
587
588
#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
589
do { \
590
if (S##_c != FP_CLS_NAN) \
591
_FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
592
_FP_WFRACBITS_##sfs); \
593
else \
594
_FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
595
D##_f = S##_f0; \
596
} while (0)
597
598
#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
599
do { \
600
D##_f0 = S##_f; \
601
D##_f1 = 0; \
602
_FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
603
} while (0)
604
605
#endif
606
607