CoCalc -- op-2.h

GitHub Repository: awilliam/linux-vfio
Path: blob/master/include/math-emu/op-2.h
¹⁷⁶³⁴ views
1
/* Software floating-point emulation.
2
   Basic two-word fraction declaration and manipulation.
3
   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4
   This file is part of the GNU C Library.
5
   Contributed by Richard Henderson ([email protected]),
6
		  Jakub Jelinek ([email protected]),
7
		  David S. Miller ([email protected]) and
8
		  Peter Maydell ([email protected]).
9

10
   The GNU C Library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Library General Public License as
12
   published by the Free Software Foundation; either version 2 of the
13
   License, or (at your option) any later version.
14

15
   The GNU C Library is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
   Library General Public License for more details.
19

20
   You should have received a copy of the GNU Library General Public
21
   License along with the GNU C Library; see the file COPYING.LIB.  If
22
   not, write to the Free Software Foundation, Inc.,
23
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24

25
#ifndef __MATH_EMU_OP_2_H__
26
#define __MATH_EMU_OP_2_H__
27

28
#define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0 = 0, X##_f1 = 0
29
#define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
30
#define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
31
#define _FP_FRAC_HIGH_2(X)	(X##_f1)
32
#define _FP_FRAC_LOW_2(X)	(X##_f0)
33
#define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
34

35
#define _FP_FRAC_SLL_2(X,N)						\
36
  do {									\
37
    if ((N) < _FP_W_TYPE_SIZE)						\
38
      {									\
39
	if (__builtin_constant_p(N) && (N) == 1) 			\
40
	  {								\
41
	    X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);	\
42
	    X##_f0 += X##_f0;						\
43
	  }								\
44
	else								\
45
	  {								\
46
	    X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N));	\
47
	    X##_f0 <<= (N);						\
48
	  }								\
49
      }									\
50
    else								\
51
      {									\
52
	X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);			\
53
	X##_f0 = 0;							\
54
      }									\
55
  } while (0)
56

57
#define _FP_FRAC_SRL_2(X,N)						\
58
  do {									\
59
    if ((N) < _FP_W_TYPE_SIZE)						\
60
      {									\
61
	X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));	\
62
	X##_f1 >>= (N);							\
63
      }									\
64
    else								\
65
      {									\
66
	X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);			\
67
	X##_f1 = 0;							\
68
      }									\
69
  } while (0)
70

71
/* Right shift with sticky-lsb.  */
72
#define _FP_FRAC_SRS_2(X,N,sz)						\
73
  do {									\
74
    if ((N) < _FP_W_TYPE_SIZE)						\
75
      {									\
76
	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |	\
77
		  (__builtin_constant_p(N) && (N) == 1			\
78
		   ? X##_f0 & 1						\
79
		   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));	\
80
	X##_f1 >>= (N);							\
81
      }									\
82
    else								\
83
      {									\
84
	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |			\
85
		(((X##_f1 << (2*_FP_W_TYPE_SIZE - (N))) | X##_f0) != 0)); \
86
	X##_f1 = 0;							\
87
      }									\
88
  } while (0)
89

90
#define _FP_FRAC_ADDI_2(X,I)	\
91
  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
92

93
#define _FP_FRAC_ADD_2(R,X,Y)	\
94
  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
95

96
#define _FP_FRAC_SUB_2(R,X,Y)	\
97
  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
98

99
#define _FP_FRAC_DEC_2(X,Y)	\
100
  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
101

102
#define _FP_FRAC_CLZ_2(R,X)	\
103
  do {				\
104
    if (X##_f1)			\
105
      __FP_CLZ(R,X##_f1);	\
106
    else 			\
107
    {				\
108
      __FP_CLZ(R,X##_f0);	\
109
      R += _FP_W_TYPE_SIZE;	\
110
    }				\
111
  } while(0)
112

113
/* Predicates */
114
#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
115
#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
116
#define _FP_FRAC_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
117
#define _FP_FRAC_CLEAR_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
118
#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
119
#define _FP_FRAC_GT_2(X, Y)	\
120
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
121
#define _FP_FRAC_GE_2(X, Y)	\
122
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
123

124
#define _FP_ZEROFRAC_2		0, 0
125
#define _FP_MINFRAC_2		0, 1
126
#define _FP_MAXFRAC_2		(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
127

128
/*
129
 * Internals 
130
 */
131

132
#define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
133

134
#define __FP_CLZ_2(R, xh, xl)	\
135
  do {				\
136
    if (xh)			\
137
      __FP_CLZ(R,xh);		\
138
    else 			\
139
    {				\
140
      __FP_CLZ(R,xl);		\
141
      R += _FP_W_TYPE_SIZE;	\
142
    }				\
143
  } while(0)
144

145
#if 0
146

147
#ifndef __FP_FRAC_ADDI_2
148
#define __FP_FRAC_ADDI_2(xh, xl, i)	\
149
  (xh += ((xl += i) < i))
150
#endif
151
#ifndef __FP_FRAC_ADD_2
152
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
153
  (rh = xh + yh + ((rl = xl + yl) < xl))
154
#endif
155
#ifndef __FP_FRAC_SUB_2
156
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
157
  (rh = xh - yh - ((rl = xl - yl) > xl))
158
#endif
159
#ifndef __FP_FRAC_DEC_2
160
#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
161
  do {					\
162
    UWtype _t = xl;			\
163
    xh -= yh + ((xl -= yl) > _t);	\
164
  } while (0)
165
#endif
166

167
#else
168

169
#undef __FP_FRAC_ADDI_2
170
#define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
171
#undef __FP_FRAC_ADD_2
172
#define __FP_FRAC_ADD_2			add_ssaaaa
173
#undef __FP_FRAC_SUB_2
174
#define __FP_FRAC_SUB_2			sub_ddmmss
175
#undef __FP_FRAC_DEC_2
176
#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	sub_ddmmss(xh, xl, xh, xl, yh, yl)
177

178
#endif
179

180
/*
181
 * Unpack the raw bits of a native fp value.  Do not classify or
182
 * normalize the data.
183
 */
184

185
#define _FP_UNPACK_RAW_2(fs, X, val)			\
186
  do {							\
187
    union _FP_UNION_##fs _flo; _flo.flt = (val);	\
188
							\
189
    X##_f0 = _flo.bits.frac0;				\
190
    X##_f1 = _flo.bits.frac1;				\
191
    X##_e  = _flo.bits.exp;				\
192
    X##_s  = _flo.bits.sign;				\
193
  } while (0)
194

195
#define _FP_UNPACK_RAW_2_P(fs, X, val)			\
196
  do {							\
197
    union _FP_UNION_##fs *_flo =			\
198
      (union _FP_UNION_##fs *)(val);			\
199
							\
200
    X##_f0 = _flo->bits.frac0;				\
201
    X##_f1 = _flo->bits.frac1;				\
202
    X##_e  = _flo->bits.exp;				\
203
    X##_s  = _flo->bits.sign;				\
204
  } while (0)
205

206

207
/*
208
 * Repack the raw bits of a native fp value.
209
 */
210

211
#define _FP_PACK_RAW_2(fs, val, X)			\
212
  do {							\
213
    union _FP_UNION_##fs _flo;				\
214
							\
215
    _flo.bits.frac0 = X##_f0;				\
216
    _flo.bits.frac1 = X##_f1;				\
217
    _flo.bits.exp   = X##_e;				\
218
    _flo.bits.sign  = X##_s;				\
219
							\
220
    (val) = _flo.flt;					\
221
  } while (0)
222

223
#define _FP_PACK_RAW_2_P(fs, val, X)			\
224
  do {							\
225
    union _FP_UNION_##fs *_flo =			\
226
      (union _FP_UNION_##fs *)(val);			\
227
							\
228
    _flo->bits.frac0 = X##_f0;				\
229
    _flo->bits.frac1 = X##_f1;				\
230
    _flo->bits.exp   = X##_e;				\
231
    _flo->bits.sign  = X##_s;				\
232
  } while (0)
233

234

235
/*
236
 * Multiplication algorithms:
237
 */
238

239
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
240

241
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
242
  do {									\
243
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
244
									\
245
    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);	\
246
    doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
247
    doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
248
    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);	\
249
									\
250
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
251
		    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,		\
252
		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
253
		    _FP_FRAC_WORD_4(_z,1));				\
254
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
255
		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,		\
256
		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
257
		    _FP_FRAC_WORD_4(_z,1));				\
258
									\
259
    /* Normalize since we know where the msb of the multiplicands	\
260
       were (bit B), we know that the msb of the of the product is	\
261
       at either 2B or 2B-1.  */					\
262
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
263
    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
264
    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
265
  } while (0)
266

267
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
268
   Do only 3 multiplications instead of four. This one is for machines
269
   where multiplication is much more expensive than subtraction.  */
270

271
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
272
  do {									\
273
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
274
    _FP_W_TYPE _d;							\
275
    int _c1, _c2;							\
276
									\
277
    _b_f0 = X##_f0 + X##_f1;						\
278
    _c1 = _b_f0 < X##_f0;						\
279
    _b_f1 = Y##_f0 + Y##_f1;						\
280
    _c2 = _b_f1 < Y##_f0;						\
281
    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);			\
282
    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);	\
283
    doit(_c_f1, _c_f0, X##_f1, Y##_f1);					\
284
									\
285
    _b_f0 &= -_c2;							\
286
    _b_f1 &= -_c1;							\
287
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
288
		    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,		\
289
		    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));	\
290
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
291
		     _b_f0);						\
292
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
293
		     _b_f1);						\
294
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
295
		    _FP_FRAC_WORD_4(_z,1),				\
296
		    0, _d, _FP_FRAC_WORD_4(_z,0));			\
297
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
298
		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);		\
299
    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),	\
300
		    _c_f1, _c_f0,					\
301
		    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));	\
302
									\
303
    /* Normalize since we know where the msb of the multiplicands	\
304
       were (bit B), we know that the msb of the of the product is	\
305
       at either 2B or 2B-1.  */					\
306
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
307
    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
308
    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
309
  } while (0)
310

311
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
312
  do {									\
313
    _FP_FRAC_DECL_4(_z);						\
314
    _FP_W_TYPE _x[2], _y[2];						\
315
    _x[0] = X##_f0; _x[1] = X##_f1;					\
316
    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
317
									\
318
    mpn_mul_n(_z_f, _x, _y, 2);						\
319
									\
320
    /* Normalize since we know where the msb of the multiplicands	\
321
       were (bit B), we know that the msb of the of the product is	\
322
       at either 2B or 2B-1.  */					\
323
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
324
    R##_f0 = _z_f[0];							\
325
    R##_f1 = _z_f[1];							\
326
  } while (0)
327

328
/* Do at most 120x120=240 bits multiplication using double floating
329
   point multiplication.  This is useful if floating point
330
   multiplication has much bigger throughput than integer multiply.
331
   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
332
   between 106 and 120 only.  
333
   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
334
   SETFETZ is a macro which will disable all FPU exceptions and set rounding
335
   towards zero,  RESETFE should optionally reset it back.  */
336

337
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)	\
338
  do {										\
339
    static const double _const[] = {						\
340
      /* 2^-24 */ 5.9604644775390625e-08,					\
341
      /* 2^-48 */ 3.5527136788005009e-15,					\
342
      /* 2^-72 */ 2.1175823681357508e-22,					\
343
      /* 2^-96 */ 1.2621774483536189e-29,					\
344
      /* 2^28 */ 2.68435456e+08,						\
345
      /* 2^4 */ 1.600000e+01,							\
346
      /* 2^-20 */ 9.5367431640625e-07,						\
347
      /* 2^-44 */ 5.6843418860808015e-14,					\
348
      /* 2^-68 */ 3.3881317890172014e-21,					\
349
      /* 2^-92 */ 2.0194839173657902e-28,					\
350
      /* 2^-116 */ 1.2037062152420224e-35};					\
351
    double _a240, _b240, _c240, _d240, _e240, _f240, 				\
352
	   _g240, _h240, _i240, _j240, _k240;					\
353
    union { double d; UDItype i; } _l240, _m240, _n240, _o240,			\
354
				   _p240, _q240, _r240, _s240;			\
355
    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;			\
356
										\
357
    if (wfracbits < 106 || wfracbits > 120)					\
358
      abort();									\
359
										\
360
    setfetz;									\
361
										\
362
    _e240 = (double)(long)(X##_f0 & 0xffffff);					\
363
    _j240 = (double)(long)(Y##_f0 & 0xffffff);					\
364
    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);				\
365
    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);				\
366
    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));	\
367
    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));	\
368
    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);				\
369
    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);				\
370
    _a240 = (double)(long)(X##_f1 >> 32);					\
371
    _f240 = (double)(long)(Y##_f1 >> 32);					\
372
    _e240 *= _const[3];								\
373
    _j240 *= _const[3];								\
374
    _d240 *= _const[2];								\
375
    _i240 *= _const[2];								\
376
    _c240 *= _const[1];								\
377
    _h240 *= _const[1];								\
378
    _b240 *= _const[0];								\
379
    _g240 *= _const[0];								\
380
    _s240.d =							      _e240*_j240;\
381
    _r240.d =						_d240*_j240 + _e240*_i240;\
382
    _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240;\
383
    _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
384
    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
385
    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;		\
386
    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;				\
387
    _l240.d = _a240*_g240 + _b240*_f240;					\
388
    _k240 =   _a240*_f240;							\
389
    _r240.d += _s240.d;								\
390
    _q240.d += _r240.d;								\
391
    _p240.d += _q240.d;								\
392
    _o240.d += _p240.d;								\
393
    _n240.d += _o240.d;								\
394
    _m240.d += _n240.d;								\
395
    _l240.d += _m240.d;								\
396
    _k240 += _l240.d;								\
397
    _s240.d -= ((_const[10]+_s240.d)-_const[10]);				\
398
    _r240.d -= ((_const[9]+_r240.d)-_const[9]);					\
399
    _q240.d -= ((_const[8]+_q240.d)-_const[8]);					\
400
    _p240.d -= ((_const[7]+_p240.d)-_const[7]);					\
401
    _o240.d += _const[7];							\
402
    _n240.d += _const[6];							\
403
    _m240.d += _const[5];							\
404
    _l240.d += _const[4];							\
405
    if (_s240.d != 0.0) _y240 = 1;						\
406
    if (_r240.d != 0.0) _y240 = 1;						\
407
    if (_q240.d != 0.0) _y240 = 1;						\
408
    if (_p240.d != 0.0) _y240 = 1;						\
409
    _t240 = (DItype)_k240;							\
410
    _u240 = _l240.i;								\
411
    _v240 = _m240.i;								\
412
    _w240 = _n240.i;								\
413
    _x240 = _o240.i;								\
414
    R##_f1 = (_t240 << (128 - (wfracbits - 1)))					\
415
	     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));			\
416
    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))			\
417
    	     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))			\
418
    	     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))			\
419
    	     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))			\
420
    	     | _y240;								\
421
    resetfe;									\
422
  } while (0)
423

424
/*
425
 * Division algorithms:
426
 */
427

428
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
429
  do {									\
430
    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;		\
431
    if (_FP_FRAC_GT_2(X, Y))						\
432
      {									\
433
	_n_f2 = X##_f1 >> 1;						\
434
	_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
435
	_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
436
      }									\
437
    else								\
438
      {									\
439
	R##_e--;							\
440
	_n_f2 = X##_f1;							\
441
	_n_f1 = X##_f0;							\
442
	_n_f0 = 0;							\
443
      }									\
444
									\
445
    /* Normalize, i.e. make the most significant bit of the 		\
446
       denominator set. */						\
447
    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);				\
448
									\
449
    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
450
    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);				\
451
    _r_f0 = _n_f0;							\
452
    if (_FP_FRAC_GT_2(_m, _r))						\
453
      {									\
454
	R##_f1--;							\
455
	_FP_FRAC_ADD_2(_r, Y, _r);					\
456
	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
457
	  {								\
458
	    R##_f1--;							\
459
	    _FP_FRAC_ADD_2(_r, Y, _r);					\
460
	  }								\
461
      }									\
462
    _FP_FRAC_DEC_2(_r, _m);						\
463
									\
464
    if (_r_f1 == Y##_f1)						\
465
      {									\
466
	/* This is a special case, not an optimization			\
467
	   (_r/Y##_f1 would not fit into UWtype).			\
468
	   As _r is guaranteed to be < Y,  R##_f0 can be either		\
469
	   (UWtype)-1 or (UWtype)-2.  But as we know what kind		\
470
	   of bits it is (sticky, guard, round),  we don't care.	\
471
	   We also don't care what the reminder is,  because the	\
472
	   guard bit will be set anyway.  -jj */			\
473
	R##_f0 = -1;							\
474
      }									\
475
    else								\
476
      {									\
477
	udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
478
	umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);			\
479
	_r_f0 = 0;							\
480
	if (_FP_FRAC_GT_2(_m, _r))					\
481
	  {								\
482
	    R##_f0--;							\
483
	    _FP_FRAC_ADD_2(_r, Y, _r);					\
484
	    if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
485
	      {								\
486
		R##_f0--;						\
487
		_FP_FRAC_ADD_2(_r, Y, _r);				\
488
	      }								\
489
	  }								\
490
	if (!_FP_FRAC_EQ_2(_r, _m))					\
491
	  R##_f0 |= _FP_WORK_STICKY;					\
492
      }									\
493
  } while (0)
494

495

496
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
497
  do {									\
498
    _FP_W_TYPE _x[4], _y[2], _z[4];					\
499
    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
500
    _x[0] = _x[3] = 0;							\
501
    if (_FP_FRAC_GT_2(X, Y))						\
502
      {									\
503
	R##_e++;							\
504
	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |	\
505
		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
506
			    (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE)));	\
507
	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);	\
508
      }									\
509
    else								\
510
      {									\
511
	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |	\
512
		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
513
			    (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));	\
514
	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);	\
515
      }									\
516
									\
517
    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
518
    R##_f1 = _z[1];							\
519
    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
520
  } while (0)
521

522

523
/*
524
 * Square root algorithms:
525
 * We have just one right now, maybe Newton approximation
526
 * should be added for those machines where division is fast.
527
 */
528
 
529
#define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
530
  do {							\
531
    while (q)						\
532
      {							\
533
	T##_f1 = S##_f1 + q;				\
534
	if (T##_f1 <= X##_f1)				\
535
	  {						\
536
	    S##_f1 = T##_f1 + q;			\
537
	    X##_f1 -= T##_f1;				\
538
	    R##_f1 += q;				\
539
	  }						\
540
	_FP_FRAC_SLL_2(X, 1);				\
541
	q >>= 1;					\
542
      }							\
543
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
544
    while (q != _FP_WORK_ROUND)				\
545
      {							\
546
	T##_f0 = S##_f0 + q;				\
547
	T##_f1 = S##_f1;				\
548
	if (T##_f1 < X##_f1 || 				\
549
	    (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
550
	  {						\
551
	    S##_f0 = T##_f0 + q;			\
552
	    S##_f1 += (T##_f0 > S##_f0);		\
553
	    _FP_FRAC_DEC_2(X, T);			\
554
	    R##_f0 += q;				\
555
	  }						\
556
	_FP_FRAC_SLL_2(X, 1);				\
557
	q >>= 1;					\
558
      }							\
559
    if (X##_f0 | X##_f1)				\
560
      {							\
561
	if (S##_f1 < X##_f1 || 				\
562
	    (S##_f1 == X##_f1 && S##_f0 < X##_f0))	\
563
	  R##_f0 |= _FP_WORK_ROUND;			\
564
	R##_f0 |= _FP_WORK_STICKY;			\
565
      }							\
566
  } while (0)
567

568

569
/*
570
 * Assembly/disassembly for converting to/from integral types.  
571
 * No shifting or overflow handled here.
572
 */
573

574
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
575
  do {						\
576
    if (rsize <= _FP_W_TYPE_SIZE)		\
577
      r = X##_f0;				\
578
    else					\
579
      {						\
580
	r = X##_f1;				\
581
	r <<= _FP_W_TYPE_SIZE;			\
582
	r += X##_f0;				\
583
      }						\
584
  } while (0)
585

586
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
587
  do {									\
588
    X##_f0 = r;								\
589
    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
590
  } while (0)
591

592
/*
593
 * Convert FP values between word sizes
594
 */
595

596
#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)				\
597
  do {									\
598
    if (S##_c != FP_CLS_NAN)						\
599
      _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),	\
600
		     _FP_WFRACBITS_##sfs);				\
601
    else								\
602
      _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));	\
603
    D##_f = S##_f0;							\
604
  } while (0)
605

606
#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)				\
607
  do {									\
608
    D##_f0 = S##_f;							\
609
    D##_f1 = 0;								\
610
    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
611
  } while (0)
612

613
#endif
614

615
Product

Resources

Company