Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
awilliam
GitHub Repository: awilliam/linux-vfio
Path: blob/master/include/math-emu/op-2.h
10817 views
1
/* Software floating-point emulation.
2
Basic two-word fraction declaration and manipulation.
3
Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4
This file is part of the GNU C Library.
5
Contributed by Richard Henderson ([email protected]),
6
Jakub Jelinek ([email protected]),
7
David S. Miller ([email protected]) and
8
Peter Maydell ([email protected]).
9
10
The GNU C Library is free software; you can redistribute it and/or
11
modify it under the terms of the GNU Library General Public License as
12
published by the Free Software Foundation; either version 2 of the
13
License, or (at your option) any later version.
14
15
The GNU C Library is distributed in the hope that it will be useful,
16
but WITHOUT ANY WARRANTY; without even the implied warranty of
17
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18
Library General Public License for more details.
19
20
You should have received a copy of the GNU Library General Public
21
License along with the GNU C Library; see the file COPYING.LIB. If
22
not, write to the Free Software Foundation, Inc.,
23
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
24
25
#ifndef __MATH_EMU_OP_2_H__
26
#define __MATH_EMU_OP_2_H__
27
28
#define _FP_FRAC_DECL_2(X) _FP_W_TYPE X##_f0 = 0, X##_f1 = 0
29
#define _FP_FRAC_COPY_2(D,S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
30
#define _FP_FRAC_SET_2(X,I) __FP_FRAC_SET_2(X, I)
31
#define _FP_FRAC_HIGH_2(X) (X##_f1)
32
#define _FP_FRAC_LOW_2(X) (X##_f0)
33
#define _FP_FRAC_WORD_2(X,w) (X##_f##w)
34
35
#define _FP_FRAC_SLL_2(X,N) \
36
do { \
37
if ((N) < _FP_W_TYPE_SIZE) \
38
{ \
39
if (__builtin_constant_p(N) && (N) == 1) \
40
{ \
41
X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0); \
42
X##_f0 += X##_f0; \
43
} \
44
else \
45
{ \
46
X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
47
X##_f0 <<= (N); \
48
} \
49
} \
50
else \
51
{ \
52
X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
53
X##_f0 = 0; \
54
} \
55
} while (0)
56
57
#define _FP_FRAC_SRL_2(X,N) \
58
do { \
59
if ((N) < _FP_W_TYPE_SIZE) \
60
{ \
61
X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
62
X##_f1 >>= (N); \
63
} \
64
else \
65
{ \
66
X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
67
X##_f1 = 0; \
68
} \
69
} while (0)
70
71
/* Right shift with sticky-lsb. */
72
#define _FP_FRAC_SRS_2(X,N,sz) \
73
do { \
74
if ((N) < _FP_W_TYPE_SIZE) \
75
{ \
76
X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) | \
77
(__builtin_constant_p(N) && (N) == 1 \
78
? X##_f0 & 1 \
79
: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
80
X##_f1 >>= (N); \
81
} \
82
else \
83
{ \
84
X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) | \
85
(((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \
86
X##_f1 = 0; \
87
} \
88
} while (0)
89
90
#define _FP_FRAC_ADDI_2(X,I) \
91
__FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
92
93
#define _FP_FRAC_ADD_2(R,X,Y) \
94
__FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
95
96
#define _FP_FRAC_SUB_2(R,X,Y) \
97
__FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
98
99
#define _FP_FRAC_DEC_2(X,Y) \
100
__FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
101
102
#define _FP_FRAC_CLZ_2(R,X) \
103
do { \
104
if (X##_f1) \
105
__FP_CLZ(R,X##_f1); \
106
else \
107
{ \
108
__FP_CLZ(R,X##_f0); \
109
R += _FP_W_TYPE_SIZE; \
110
} \
111
} while(0)
112
113
/* Predicates */
114
#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE)X##_f1 < 0)
115
#define _FP_FRAC_ZEROP_2(X) ((X##_f1 | X##_f0) == 0)
116
#define _FP_FRAC_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
117
#define _FP_FRAC_CLEAR_OVERP_2(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
118
#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
119
#define _FP_FRAC_GT_2(X, Y) \
120
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
121
#define _FP_FRAC_GE_2(X, Y) \
122
(X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
123
124
#define _FP_ZEROFRAC_2 0, 0
125
#define _FP_MINFRAC_2 0, 1
126
#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
127
128
/*
129
* Internals
130
*/
131
132
#define __FP_FRAC_SET_2(X,I1,I0) (X##_f0 = I0, X##_f1 = I1)
133
134
#define __FP_CLZ_2(R, xh, xl) \
135
do { \
136
if (xh) \
137
__FP_CLZ(R,xh); \
138
else \
139
{ \
140
__FP_CLZ(R,xl); \
141
R += _FP_W_TYPE_SIZE; \
142
} \
143
} while(0)
144
145
#if 0
146
147
#ifndef __FP_FRAC_ADDI_2
148
#define __FP_FRAC_ADDI_2(xh, xl, i) \
149
(xh += ((xl += i) < i))
150
#endif
151
#ifndef __FP_FRAC_ADD_2
152
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
153
(rh = xh + yh + ((rl = xl + yl) < xl))
154
#endif
155
#ifndef __FP_FRAC_SUB_2
156
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
157
(rh = xh - yh - ((rl = xl - yl) > xl))
158
#endif
159
#ifndef __FP_FRAC_DEC_2
160
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
161
do { \
162
UWtype _t = xl; \
163
xh -= yh + ((xl -= yl) > _t); \
164
} while (0)
165
#endif
166
167
#else
168
169
#undef __FP_FRAC_ADDI_2
170
#define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa(xh, xl, xh, xl, 0, i)
171
#undef __FP_FRAC_ADD_2
172
#define __FP_FRAC_ADD_2 add_ssaaaa
173
#undef __FP_FRAC_SUB_2
174
#define __FP_FRAC_SUB_2 sub_ddmmss
175
#undef __FP_FRAC_DEC_2
176
#define __FP_FRAC_DEC_2(xh, xl, yh, yl) sub_ddmmss(xh, xl, xh, xl, yh, yl)
177
178
#endif
179
180
/*
181
* Unpack the raw bits of a native fp value. Do not classify or
182
* normalize the data.
183
*/
184
185
#define _FP_UNPACK_RAW_2(fs, X, val) \
186
do { \
187
union _FP_UNION_##fs _flo; _flo.flt = (val); \
188
\
189
X##_f0 = _flo.bits.frac0; \
190
X##_f1 = _flo.bits.frac1; \
191
X##_e = _flo.bits.exp; \
192
X##_s = _flo.bits.sign; \
193
} while (0)
194
195
#define _FP_UNPACK_RAW_2_P(fs, X, val) \
196
do { \
197
union _FP_UNION_##fs *_flo = \
198
(union _FP_UNION_##fs *)(val); \
199
\
200
X##_f0 = _flo->bits.frac0; \
201
X##_f1 = _flo->bits.frac1; \
202
X##_e = _flo->bits.exp; \
203
X##_s = _flo->bits.sign; \
204
} while (0)
205
206
207
/*
208
* Repack the raw bits of a native fp value.
209
*/
210
211
#define _FP_PACK_RAW_2(fs, val, X) \
212
do { \
213
union _FP_UNION_##fs _flo; \
214
\
215
_flo.bits.frac0 = X##_f0; \
216
_flo.bits.frac1 = X##_f1; \
217
_flo.bits.exp = X##_e; \
218
_flo.bits.sign = X##_s; \
219
\
220
(val) = _flo.flt; \
221
} while (0)
222
223
#define _FP_PACK_RAW_2_P(fs, val, X) \
224
do { \
225
union _FP_UNION_##fs *_flo = \
226
(union _FP_UNION_##fs *)(val); \
227
\
228
_flo->bits.frac0 = X##_f0; \
229
_flo->bits.frac1 = X##_f1; \
230
_flo->bits.exp = X##_e; \
231
_flo->bits.sign = X##_s; \
232
} while (0)
233
234
235
/*
236
* Multiplication algorithms:
237
*/
238
239
/* Given a 1W * 1W => 2W primitive, do the extended multiplication. */
240
241
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
242
do { \
243
_FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
244
\
245
doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
246
doit(_b_f1, _b_f0, X##_f0, Y##_f1); \
247
doit(_c_f1, _c_f0, X##_f1, Y##_f0); \
248
doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
249
\
250
__FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
251
_FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0, \
252
_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
253
_FP_FRAC_WORD_4(_z,1)); \
254
__FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
255
_FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0, \
256
_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
257
_FP_FRAC_WORD_4(_z,1)); \
258
\
259
/* Normalize since we know where the msb of the multiplicands \
260
were (bit B), we know that the msb of the of the product is \
261
at either 2B or 2B-1. */ \
262
_FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
263
R##_f0 = _FP_FRAC_WORD_4(_z,0); \
264
R##_f1 = _FP_FRAC_WORD_4(_z,1); \
265
} while (0)
266
267
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
268
Do only 3 multiplications instead of four. This one is for machines
269
where multiplication is much more expensive than subtraction. */
270
271
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
272
do { \
273
_FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \
274
_FP_W_TYPE _d; \
275
int _c1, _c2; \
276
\
277
_b_f0 = X##_f0 + X##_f1; \
278
_c1 = _b_f0 < X##_f0; \
279
_b_f1 = Y##_f0 + Y##_f1; \
280
_c2 = _b_f1 < Y##_f0; \
281
doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
282
doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1); \
283
doit(_c_f1, _c_f0, X##_f1, Y##_f1); \
284
\
285
_b_f0 &= -_c2; \
286
_b_f1 &= -_c1; \
287
__FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
288
_FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d, \
289
0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1)); \
290
__FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
291
_b_f0); \
292
__FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
293
_b_f1); \
294
__FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
295
_FP_FRAC_WORD_4(_z,1), \
296
0, _d, _FP_FRAC_WORD_4(_z,0)); \
297
__FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2), \
298
_FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0); \
299
__FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), \
300
_c_f1, _c_f0, \
301
_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2)); \
302
\
303
/* Normalize since we know where the msb of the multiplicands \
304
were (bit B), we know that the msb of the of the product is \
305
at either 2B or 2B-1. */ \
306
_FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
307
R##_f0 = _FP_FRAC_WORD_4(_z,0); \
308
R##_f1 = _FP_FRAC_WORD_4(_z,1); \
309
} while (0)
310
311
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
312
do { \
313
_FP_FRAC_DECL_4(_z); \
314
_FP_W_TYPE _x[2], _y[2]; \
315
_x[0] = X##_f0; _x[1] = X##_f1; \
316
_y[0] = Y##_f0; _y[1] = Y##_f1; \
317
\
318
mpn_mul_n(_z_f, _x, _y, 2); \
319
\
320
/* Normalize since we know where the msb of the multiplicands \
321
were (bit B), we know that the msb of the of the product is \
322
at either 2B or 2B-1. */ \
323
_FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits); \
324
R##_f0 = _z_f[0]; \
325
R##_f1 = _z_f[1]; \
326
} while (0)
327
328
/* Do at most 120x120=240 bits multiplication using double floating
329
point multiplication. This is useful if floating point
330
multiplication has much bigger throughput than integer multiply.
331
It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
332
between 106 and 120 only.
333
Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
334
SETFETZ is a macro which will disable all FPU exceptions and set rounding
335
towards zero, RESETFE should optionally reset it back. */
336
337
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
338
do { \
339
static const double _const[] = { \
340
/* 2^-24 */ 5.9604644775390625e-08, \
341
/* 2^-48 */ 3.5527136788005009e-15, \
342
/* 2^-72 */ 2.1175823681357508e-22, \
343
/* 2^-96 */ 1.2621774483536189e-29, \
344
/* 2^28 */ 2.68435456e+08, \
345
/* 2^4 */ 1.600000e+01, \
346
/* 2^-20 */ 9.5367431640625e-07, \
347
/* 2^-44 */ 5.6843418860808015e-14, \
348
/* 2^-68 */ 3.3881317890172014e-21, \
349
/* 2^-92 */ 2.0194839173657902e-28, \
350
/* 2^-116 */ 1.2037062152420224e-35}; \
351
double _a240, _b240, _c240, _d240, _e240, _f240, \
352
_g240, _h240, _i240, _j240, _k240; \
353
union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
354
_p240, _q240, _r240, _s240; \
355
UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
356
\
357
if (wfracbits < 106 || wfracbits > 120) \
358
abort(); \
359
\
360
setfetz; \
361
\
362
_e240 = (double)(long)(X##_f0 & 0xffffff); \
363
_j240 = (double)(long)(Y##_f0 & 0xffffff); \
364
_d240 = (double)(long)((X##_f0 >> 24) & 0xffffff); \
365
_i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff); \
366
_c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48)); \
367
_h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48)); \
368
_b240 = (double)(long)((X##_f1 >> 8) & 0xffffff); \
369
_g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff); \
370
_a240 = (double)(long)(X##_f1 >> 32); \
371
_f240 = (double)(long)(Y##_f1 >> 32); \
372
_e240 *= _const[3]; \
373
_j240 *= _const[3]; \
374
_d240 *= _const[2]; \
375
_i240 *= _const[2]; \
376
_c240 *= _const[1]; \
377
_h240 *= _const[1]; \
378
_b240 *= _const[0]; \
379
_g240 *= _const[0]; \
380
_s240.d = _e240*_j240;\
381
_r240.d = _d240*_j240 + _e240*_i240;\
382
_q240.d = _c240*_j240 + _d240*_i240 + _e240*_h240;\
383
_p240.d = _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
384
_o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
385
_n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240; \
386
_m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240; \
387
_l240.d = _a240*_g240 + _b240*_f240; \
388
_k240 = _a240*_f240; \
389
_r240.d += _s240.d; \
390
_q240.d += _r240.d; \
391
_p240.d += _q240.d; \
392
_o240.d += _p240.d; \
393
_n240.d += _o240.d; \
394
_m240.d += _n240.d; \
395
_l240.d += _m240.d; \
396
_k240 += _l240.d; \
397
_s240.d -= ((_const[10]+_s240.d)-_const[10]); \
398
_r240.d -= ((_const[9]+_r240.d)-_const[9]); \
399
_q240.d -= ((_const[8]+_q240.d)-_const[8]); \
400
_p240.d -= ((_const[7]+_p240.d)-_const[7]); \
401
_o240.d += _const[7]; \
402
_n240.d += _const[6]; \
403
_m240.d += _const[5]; \
404
_l240.d += _const[4]; \
405
if (_s240.d != 0.0) _y240 = 1; \
406
if (_r240.d != 0.0) _y240 = 1; \
407
if (_q240.d != 0.0) _y240 = 1; \
408
if (_p240.d != 0.0) _y240 = 1; \
409
_t240 = (DItype)_k240; \
410
_u240 = _l240.i; \
411
_v240 = _m240.i; \
412
_w240 = _n240.i; \
413
_x240 = _o240.i; \
414
R##_f1 = (_t240 << (128 - (wfracbits - 1))) \
415
| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104)); \
416
R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
417
| ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
418
| ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
419
| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
420
| _y240; \
421
resetfe; \
422
} while (0)
423
424
/*
425
* Division algorithms:
426
*/
427
428
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
429
do { \
430
_FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0; \
431
if (_FP_FRAC_GT_2(X, Y)) \
432
{ \
433
_n_f2 = X##_f1 >> 1; \
434
_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1; \
435
_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1); \
436
} \
437
else \
438
{ \
439
R##_e--; \
440
_n_f2 = X##_f1; \
441
_n_f1 = X##_f0; \
442
_n_f0 = 0; \
443
} \
444
\
445
/* Normalize, i.e. make the most significant bit of the \
446
denominator set. */ \
447
_FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs); \
448
\
449
udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1); \
450
umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0); \
451
_r_f0 = _n_f0; \
452
if (_FP_FRAC_GT_2(_m, _r)) \
453
{ \
454
R##_f1--; \
455
_FP_FRAC_ADD_2(_r, Y, _r); \
456
if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
457
{ \
458
R##_f1--; \
459
_FP_FRAC_ADD_2(_r, Y, _r); \
460
} \
461
} \
462
_FP_FRAC_DEC_2(_r, _m); \
463
\
464
if (_r_f1 == Y##_f1) \
465
{ \
466
/* This is a special case, not an optimization \
467
(_r/Y##_f1 would not fit into UWtype). \
468
As _r is guaranteed to be < Y, R##_f0 can be either \
469
(UWtype)-1 or (UWtype)-2. But as we know what kind \
470
of bits it is (sticky, guard, round), we don't care. \
471
We also don't care what the reminder is, because the \
472
guard bit will be set anyway. -jj */ \
473
R##_f0 = -1; \
474
} \
475
else \
476
{ \
477
udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1); \
478
umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0); \
479
_r_f0 = 0; \
480
if (_FP_FRAC_GT_2(_m, _r)) \
481
{ \
482
R##_f0--; \
483
_FP_FRAC_ADD_2(_r, Y, _r); \
484
if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r)) \
485
{ \
486
R##_f0--; \
487
_FP_FRAC_ADD_2(_r, Y, _r); \
488
} \
489
} \
490
if (!_FP_FRAC_EQ_2(_r, _m)) \
491
R##_f0 |= _FP_WORK_STICKY; \
492
} \
493
} while (0)
494
495
496
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y) \
497
do { \
498
_FP_W_TYPE _x[4], _y[2], _z[4]; \
499
_y[0] = Y##_f0; _y[1] = Y##_f1; \
500
_x[0] = _x[3] = 0; \
501
if (_FP_FRAC_GT_2(X, Y)) \
502
{ \
503
R##_e++; \
504
_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) | \
505
X##_f1 >> (_FP_W_TYPE_SIZE - \
506
(_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE))); \
507
_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE); \
508
} \
509
else \
510
{ \
511
_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) | \
512
X##_f1 >> (_FP_W_TYPE_SIZE - \
513
(_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE))); \
514
_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE); \
515
} \
516
\
517
(void) mpn_divrem (_z, 0, _x, 4, _y, 2); \
518
R##_f1 = _z[1]; \
519
R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0); \
520
} while (0)
521
522
523
/*
524
* Square root algorithms:
525
* We have just one right now, maybe Newton approximation
526
* should be added for those machines where division is fast.
527
*/
528
529
#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
530
do { \
531
while (q) \
532
{ \
533
T##_f1 = S##_f1 + q; \
534
if (T##_f1 <= X##_f1) \
535
{ \
536
S##_f1 = T##_f1 + q; \
537
X##_f1 -= T##_f1; \
538
R##_f1 += q; \
539
} \
540
_FP_FRAC_SLL_2(X, 1); \
541
q >>= 1; \
542
} \
543
q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \
544
while (q != _FP_WORK_ROUND) \
545
{ \
546
T##_f0 = S##_f0 + q; \
547
T##_f1 = S##_f1; \
548
if (T##_f1 < X##_f1 || \
549
(T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
550
{ \
551
S##_f0 = T##_f0 + q; \
552
S##_f1 += (T##_f0 > S##_f0); \
553
_FP_FRAC_DEC_2(X, T); \
554
R##_f0 += q; \
555
} \
556
_FP_FRAC_SLL_2(X, 1); \
557
q >>= 1; \
558
} \
559
if (X##_f0 | X##_f1) \
560
{ \
561
if (S##_f1 < X##_f1 || \
562
(S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
563
R##_f0 |= _FP_WORK_ROUND; \
564
R##_f0 |= _FP_WORK_STICKY; \
565
} \
566
} while (0)
567
568
569
/*
570
* Assembly/disassembly for converting to/from integral types.
571
* No shifting or overflow handled here.
572
*/
573
574
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
575
do { \
576
if (rsize <= _FP_W_TYPE_SIZE) \
577
r = X##_f0; \
578
else \
579
{ \
580
r = X##_f1; \
581
r <<= _FP_W_TYPE_SIZE; \
582
r += X##_f0; \
583
} \
584
} while (0)
585
586
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
587
do { \
588
X##_f0 = r; \
589
X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \
590
} while (0)
591
592
/*
593
* Convert FP values between word sizes
594
*/
595
596
#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S) \
597
do { \
598
if (S##_c != FP_CLS_NAN) \
599
_FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs), \
600
_FP_WFRACBITS_##sfs); \
601
else \
602
_FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs)); \
603
D##_f = S##_f0; \
604
} while (0)
605
606
#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S) \
607
do { \
608
D##_f0 = S##_f; \
609
D##_f1 = 0; \
610
_FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs)); \
611
} while (0)
612
613
#endif
614
615