CoCalc -- op-2.h

GitHub Repository: torvalds/linux
Path: blob/master/include/math-emu/op-2.h
⁴⁹⁶⁸⁶ views
1
/* Software floating-point emulation.
2
   Basic two-word fraction declaration and manipulation.
3
   Copyright (C) 1997,1998,1999 Free Software Foundation, Inc.
4
   This file is part of the GNU C Library.
5
   Contributed by Richard Henderson ([email protected]),
6
		  Jakub Jelinek ([email protected]),
7
		  David S. Miller ([email protected]) and
8
		  Peter Maydell ([email protected]).
9

10
   The GNU C Library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Library General Public License as
12
   published by the Free Software Foundation; either version 2 of the
13
   License, or (at your option) any later version.
14

15
   The GNU C Library is distributed in the hope that it will be useful,
16
   but WITHOUT ANY WARRANTY; without even the implied warranty of
17
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
   Library General Public License for more details.
19

20
   You should have received a copy of the GNU Library General Public
21
   License along with the GNU C Library; see the file COPYING.LIB.  If
22
   not, write to the Free Software Foundation, Inc.,
23
   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24

25
#ifndef __MATH_EMU_OP_2_H__
26
#define __MATH_EMU_OP_2_H__
27

28
#define _FP_FRAC_DECL_2(X)	_FP_W_TYPE X##_f0 = 0, X##_f1 = 0
29
#define _FP_FRAC_COPY_2(D,S)	(D##_f0 = S##_f0, D##_f1 = S##_f1)
30
#define _FP_FRAC_SET_2(X,I)	__FP_FRAC_SET_2(X, I)
31
#define _FP_FRAC_HIGH_2(X)	(X##_f1)
32
#define _FP_FRAC_LOW_2(X)	(X##_f0)
33
#define _FP_FRAC_WORD_2(X,w)	(X##_f##w)
34
#define _FP_FRAC_SLL_2(X, N) (						       \
35
	(void) (((N) < _FP_W_TYPE_SIZE)					       \
36
	  ? ({								       \
37
		if (__builtin_constant_p(N) && (N) == 1) {		       \
38
			X##_f1 = X##_f1 + X##_f1 +			       \
39
				(((_FP_WS_TYPE) (X##_f0)) < 0);		       \
40
			X##_f0 += X##_f0;				       \
41
		} else {						       \
42
			X##_f1 = X##_f1 << (N) | X##_f0 >>		       \
43
						(_FP_W_TYPE_SIZE - (N));       \
44
			X##_f0 <<= (N);					       \
45
		}							       \
46
		0;							       \
47
	    })								       \
48
	  : ({								       \
49
	      X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);		       \
50
	      X##_f0 = 0;						       \
51
	  })))
52

53

54
#define _FP_FRAC_SRL_2(X, N) (						       \
55
	(void) (((N) < _FP_W_TYPE_SIZE)					       \
56
	  ? ({								       \
57
	      X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));      \
58
	      X##_f1 >>= (N);						       \
59
	    })								       \
60
	  : ({								       \
61
	      X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);		       \
62
	      X##_f1 = 0;						       \
63
	    })))
64

65

66
/* Right shift with sticky-lsb.  */
67
#define _FP_FRAC_SRS_2(X, N, sz) (					       \
68
	(void) (((N) < _FP_W_TYPE_SIZE)					       \
69
	  ? ({								       \
70
	      X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N)      \
71
			| (__builtin_constant_p(N) && (N) == 1		       \
72
			   ? X##_f0 & 1					       \
73
			   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));       \
74
		X##_f1 >>= (N);						       \
75
	    })								       \
76
	  : ({								       \
77
	      X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)		       \
78
			| ((((N) == _FP_W_TYPE_SIZE			       \
79
			     ? 0					       \
80
			     : (X##_f1 << (2*_FP_W_TYPE_SIZE - (N))))          \
81
			    | X##_f0) != 0));				       \
82
	      X##_f1 = 0;						       \
83
	    })))
84

85
#define _FP_FRAC_ADDI_2(X,I)	\
86
  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
87

88
#define _FP_FRAC_ADD_2(R,X,Y)	\
89
  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
90

91
#define _FP_FRAC_SUB_2(R,X,Y)	\
92
  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
93

94
#define _FP_FRAC_DEC_2(X,Y)	\
95
  __FP_FRAC_DEC_2(X##_f1, X##_f0, Y##_f1, Y##_f0)
96

97
#define _FP_FRAC_CLZ_2(R,X)	\
98
  do {				\
99
    if (X##_f1)			\
100
      __FP_CLZ(R,X##_f1);	\
101
    else 			\
102
    {				\
103
      __FP_CLZ(R,X##_f0);	\
104
      R += _FP_W_TYPE_SIZE;	\
105
    }				\
106
  } while(0)
107

108
/* Predicates */
109
#define _FP_FRAC_NEGP_2(X)	((_FP_WS_TYPE)X##_f1 < 0)
110
#define _FP_FRAC_ZEROP_2(X)	((X##_f1 | X##_f0) == 0)
111
#define _FP_FRAC_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
112
#define _FP_FRAC_CLEAR_OVERP_2(fs,X)	(_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
113
#define _FP_FRAC_EQ_2(X, Y)	(X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
114
#define _FP_FRAC_GT_2(X, Y)	\
115
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
116
#define _FP_FRAC_GE_2(X, Y)	\
117
  (X##_f1 > Y##_f1 || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
118

119
#define _FP_ZEROFRAC_2		0, 0
120
#define _FP_MINFRAC_2		0, 1
121
#define _FP_MAXFRAC_2		(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
122

123
/*
124
 * Internals 
125
 */
126

127
#define __FP_FRAC_SET_2(X,I1,I0)	(X##_f0 = I0, X##_f1 = I1)
128

129
#define __FP_CLZ_2(R, xh, xl)	\
130
  do {				\
131
    if (xh)			\
132
      __FP_CLZ(R,xh);		\
133
    else 			\
134
    {				\
135
      __FP_CLZ(R,xl);		\
136
      R += _FP_W_TYPE_SIZE;	\
137
    }				\
138
  } while(0)
139

140
#if 0
141

142
#ifndef __FP_FRAC_ADDI_2
143
#define __FP_FRAC_ADDI_2(xh, xl, i)	\
144
  (xh += ((xl += i) < i))
145
#endif
146
#ifndef __FP_FRAC_ADD_2
147
#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl)	\
148
  (rh = xh + yh + ((rl = xl + yl) < xl))
149
#endif
150
#ifndef __FP_FRAC_SUB_2
151
#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl)	\
152
  (rh = xh - yh - ((rl = xl - yl) > xl))
153
#endif
154
#ifndef __FP_FRAC_DEC_2
155
#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	\
156
  do {					\
157
    UWtype _t = xl;			\
158
    xh -= yh + ((xl -= yl) > _t);	\
159
  } while (0)
160
#endif
161

162
#else
163

164
#undef __FP_FRAC_ADDI_2
165
#define __FP_FRAC_ADDI_2(xh, xl, i)	add_ssaaaa(xh, xl, xh, xl, 0, i)
166
#undef __FP_FRAC_ADD_2
167
#define __FP_FRAC_ADD_2			add_ssaaaa
168
#undef __FP_FRAC_SUB_2
169
#define __FP_FRAC_SUB_2			sub_ddmmss
170
#undef __FP_FRAC_DEC_2
171
#define __FP_FRAC_DEC_2(xh, xl, yh, yl)	sub_ddmmss(xh, xl, xh, xl, yh, yl)
172

173
#endif
174

175
/*
176
 * Unpack the raw bits of a native fp value.  Do not classify or
177
 * normalize the data.
178
 */
179

180
#define _FP_UNPACK_RAW_2(fs, X, val)			\
181
  do {							\
182
    union _FP_UNION_##fs _flo; _flo.flt = (val);	\
183
							\
184
    X##_f0 = _flo.bits.frac0;				\
185
    X##_f1 = _flo.bits.frac1;				\
186
    X##_e  = _flo.bits.exp;				\
187
    X##_s  = _flo.bits.sign;				\
188
  } while (0)
189

190
#define _FP_UNPACK_RAW_2_P(fs, X, val)			\
191
  do {							\
192
    union _FP_UNION_##fs *_flo =			\
193
      (union _FP_UNION_##fs *)(val);			\
194
							\
195
    X##_f0 = _flo->bits.frac0;				\
196
    X##_f1 = _flo->bits.frac1;				\
197
    X##_e  = _flo->bits.exp;				\
198
    X##_s  = _flo->bits.sign;				\
199
  } while (0)
200

201

202
/*
203
 * Repack the raw bits of a native fp value.
204
 */
205

206
#define _FP_PACK_RAW_2(fs, val, X)			\
207
  do {							\
208
    union _FP_UNION_##fs _flo;				\
209
							\
210
    _flo.bits.frac0 = X##_f0;				\
211
    _flo.bits.frac1 = X##_f1;				\
212
    _flo.bits.exp   = X##_e;				\
213
    _flo.bits.sign  = X##_s;				\
214
							\
215
    (val) = _flo.flt;					\
216
  } while (0)
217

218
#define _FP_PACK_RAW_2_P(fs, val, X)			\
219
  do {							\
220
    union _FP_UNION_##fs *_flo =			\
221
      (union _FP_UNION_##fs *)(val);			\
222
							\
223
    _flo->bits.frac0 = X##_f0;				\
224
    _flo->bits.frac1 = X##_f1;				\
225
    _flo->bits.exp   = X##_e;				\
226
    _flo->bits.sign  = X##_s;				\
227
  } while (0)
228

229

230
/*
231
 * Multiplication algorithms:
232
 */
233

234
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
235

236
#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit)			\
237
  do {									\
238
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
239
									\
240
    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);	\
241
    doit(_b_f1, _b_f0, X##_f0, Y##_f1);					\
242
    doit(_c_f1, _c_f0, X##_f1, Y##_f0);					\
243
    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1);	\
244
									\
245
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
246
		    _FP_FRAC_WORD_4(_z,1), 0, _b_f1, _b_f0,		\
247
		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
248
		    _FP_FRAC_WORD_4(_z,1));				\
249
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
250
		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0,		\
251
		    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
252
		    _FP_FRAC_WORD_4(_z,1));				\
253
									\
254
    /* Normalize since we know where the msb of the multiplicands	\
255
       were (bit B), we know that the msb of the of the product is	\
256
       at either 2B or 2B-1.  */					\
257
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
258
    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
259
    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
260
  } while (0)
261

262
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.
263
   Do only 3 multiplications instead of four. This one is for machines
264
   where multiplication is much more expensive than subtraction.  */
265

266
#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit)		\
267
  do {									\
268
    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	\
269
    _FP_W_TYPE _d;							\
270
    int _c1, _c2;							\
271
									\
272
    _b_f0 = X##_f0 + X##_f1;						\
273
    _c1 = _b_f0 < X##_f0;						\
274
    _b_f1 = Y##_f0 + Y##_f1;						\
275
    _c2 = _b_f1 < Y##_f0;						\
276
    doit(_d, _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0);			\
277
    doit(_FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1), _b_f0, _b_f1);	\
278
    doit(_c_f1, _c_f0, X##_f1, Y##_f1);					\
279
									\
280
    _b_f0 &= -_c2;							\
281
    _b_f1 &= -_c1;							\
282
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
283
		    _FP_FRAC_WORD_4(_z,1), (_c1 & _c2), 0, _d,		\
284
		    0, _FP_FRAC_WORD_4(_z,2), _FP_FRAC_WORD_4(_z,1));	\
285
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
286
		     _b_f0);						\
287
    __FP_FRAC_ADDI_2(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
288
		     _b_f1);						\
289
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
290
		    _FP_FRAC_WORD_4(_z,1),				\
291
		    0, _d, _FP_FRAC_WORD_4(_z,0));			\
292
    __FP_FRAC_DEC_3(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),	\
293
		    _FP_FRAC_WORD_4(_z,1), 0, _c_f1, _c_f0);		\
294
    __FP_FRAC_ADD_2(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2),	\
295
		    _c_f1, _c_f0,					\
296
		    _FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2));	\
297
									\
298
    /* Normalize since we know where the msb of the multiplicands	\
299
       were (bit B), we know that the msb of the of the product is	\
300
       at either 2B or 2B-1.  */					\
301
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
302
    R##_f0 = _FP_FRAC_WORD_4(_z,0);					\
303
    R##_f1 = _FP_FRAC_WORD_4(_z,1);					\
304
  } while (0)
305

306
#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y)				\
307
  do {									\
308
    _FP_FRAC_DECL_4(_z);						\
309
    _FP_W_TYPE _x[2], _y[2];						\
310
    _x[0] = X##_f0; _x[1] = X##_f1;					\
311
    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
312
									\
313
    mpn_mul_n(_z_f, _x, _y, 2);						\
314
									\
315
    /* Normalize since we know where the msb of the multiplicands	\
316
       were (bit B), we know that the msb of the of the product is	\
317
       at either 2B or 2B-1.  */					\
318
    _FP_FRAC_SRS_4(_z, wfracbits-1, 2*wfracbits);			\
319
    R##_f0 = _z_f[0];							\
320
    R##_f1 = _z_f[1];							\
321
  } while (0)
322

323
/* Do at most 120x120=240 bits multiplication using double floating
324
   point multiplication.  This is useful if floating point
325
   multiplication has much bigger throughput than integer multiply.
326
   It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
327
   between 106 and 120 only.  
328
   Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
329
   SETFETZ is a macro which will disable all FPU exceptions and set rounding
330
   towards zero,  RESETFE should optionally reset it back.  */
331

332
#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe)	\
333
  do {										\
334
    static const double _const[] = {						\
335
      /* 2^-24 */ 5.9604644775390625e-08,					\
336
      /* 2^-48 */ 3.5527136788005009e-15,					\
337
      /* 2^-72 */ 2.1175823681357508e-22,					\
338
      /* 2^-96 */ 1.2621774483536189e-29,					\
339
      /* 2^28 */ 2.68435456e+08,						\
340
      /* 2^4 */ 1.600000e+01,							\
341
      /* 2^-20 */ 9.5367431640625e-07,						\
342
      /* 2^-44 */ 5.6843418860808015e-14,					\
343
      /* 2^-68 */ 3.3881317890172014e-21,					\
344
      /* 2^-92 */ 2.0194839173657902e-28,					\
345
      /* 2^-116 */ 1.2037062152420224e-35};					\
346
    double _a240, _b240, _c240, _d240, _e240, _f240, 				\
347
	   _g240, _h240, _i240, _j240, _k240;					\
348
    union { double d; UDItype i; } _l240, _m240, _n240, _o240,			\
349
				   _p240, _q240, _r240, _s240;			\
350
    UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0;			\
351
										\
352
    if (wfracbits < 106 || wfracbits > 120)					\
353
      abort();									\
354
										\
355
    setfetz;									\
356
										\
357
    _e240 = (double)(long)(X##_f0 & 0xffffff);					\
358
    _j240 = (double)(long)(Y##_f0 & 0xffffff);					\
359
    _d240 = (double)(long)((X##_f0 >> 24) & 0xffffff);				\
360
    _i240 = (double)(long)((Y##_f0 >> 24) & 0xffffff);				\
361
    _c240 = (double)(long)(((X##_f1 << 16) & 0xffffff) | (X##_f0 >> 48));	\
362
    _h240 = (double)(long)(((Y##_f1 << 16) & 0xffffff) | (Y##_f0 >> 48));	\
363
    _b240 = (double)(long)((X##_f1 >> 8) & 0xffffff);				\
364
    _g240 = (double)(long)((Y##_f1 >> 8) & 0xffffff);				\
365
    _a240 = (double)(long)(X##_f1 >> 32);					\
366
    _f240 = (double)(long)(Y##_f1 >> 32);					\
367
    _e240 *= _const[3];								\
368
    _j240 *= _const[3];								\
369
    _d240 *= _const[2];								\
370
    _i240 *= _const[2];								\
371
    _c240 *= _const[1];								\
372
    _h240 *= _const[1];								\
373
    _b240 *= _const[0];								\
374
    _g240 *= _const[0];								\
375
    _s240.d =							      _e240*_j240;\
376
    _r240.d =						_d240*_j240 + _e240*_i240;\
377
    _q240.d =				  _c240*_j240 + _d240*_i240 + _e240*_h240;\
378
    _p240.d =		    _b240*_j240 + _c240*_i240 + _d240*_h240 + _e240*_g240;\
379
    _o240.d = _a240*_j240 + _b240*_i240 + _c240*_h240 + _d240*_g240 + _e240*_f240;\
380
    _n240.d = _a240*_i240 + _b240*_h240 + _c240*_g240 + _d240*_f240;		\
381
    _m240.d = _a240*_h240 + _b240*_g240 + _c240*_f240;				\
382
    _l240.d = _a240*_g240 + _b240*_f240;					\
383
    _k240 =   _a240*_f240;							\
384
    _r240.d += _s240.d;								\
385
    _q240.d += _r240.d;								\
386
    _p240.d += _q240.d;								\
387
    _o240.d += _p240.d;								\
388
    _n240.d += _o240.d;								\
389
    _m240.d += _n240.d;								\
390
    _l240.d += _m240.d;								\
391
    _k240 += _l240.d;								\
392
    _s240.d -= ((_const[10]+_s240.d)-_const[10]);				\
393
    _r240.d -= ((_const[9]+_r240.d)-_const[9]);					\
394
    _q240.d -= ((_const[8]+_q240.d)-_const[8]);					\
395
    _p240.d -= ((_const[7]+_p240.d)-_const[7]);					\
396
    _o240.d += _const[7];							\
397
    _n240.d += _const[6];							\
398
    _m240.d += _const[5];							\
399
    _l240.d += _const[4];							\
400
    if (_s240.d != 0.0) _y240 = 1;						\
401
    if (_r240.d != 0.0) _y240 = 1;						\
402
    if (_q240.d != 0.0) _y240 = 1;						\
403
    if (_p240.d != 0.0) _y240 = 1;						\
404
    _t240 = (DItype)_k240;							\
405
    _u240 = _l240.i;								\
406
    _v240 = _m240.i;								\
407
    _w240 = _n240.i;								\
408
    _x240 = _o240.i;								\
409
    R##_f1 = (_t240 << (128 - (wfracbits - 1)))					\
410
	     | ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104));			\
411
    R##_f0 = ((_u240 & 0xffffff) << (168 - (wfracbits - 1)))			\
412
    	     | ((_v240 & 0xffffff) << (144 - (wfracbits - 1)))			\
413
    	     | ((_w240 & 0xffffff) << (120 - (wfracbits - 1)))			\
414
    	     | ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96))			\
415
    	     | _y240;								\
416
    resetfe;									\
417
  } while (0)
418

419
/*
420
 * Division algorithms:
421
 */
422

423
#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y)				\
424
  do {									\
425
    _FP_W_TYPE _n_f2, _n_f1, _n_f0, _r_f1, _r_f0, _m_f1, _m_f0;		\
426
    if (_FP_FRAC_GT_2(X, Y))						\
427
      {									\
428
	_n_f2 = X##_f1 >> 1;						\
429
	_n_f1 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;		\
430
	_n_f0 = X##_f0 << (_FP_W_TYPE_SIZE - 1);			\
431
      }									\
432
    else								\
433
      {									\
434
	R##_e--;							\
435
	_n_f2 = X##_f1;							\
436
	_n_f1 = X##_f0;							\
437
	_n_f0 = 0;							\
438
      }									\
439
									\
440
    /* Normalize, i.e. make the most significant bit of the 		\
441
       denominator set. */						\
442
    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs);				\
443
									\
444
    udiv_qrnnd(R##_f1, _r_f1, _n_f2, _n_f1, Y##_f1);			\
445
    umul_ppmm(_m_f1, _m_f0, R##_f1, Y##_f0);				\
446
    _r_f0 = _n_f0;							\
447
    if (_FP_FRAC_GT_2(_m, _r))						\
448
      {									\
449
	R##_f1--;							\
450
	_FP_FRAC_ADD_2(_r, Y, _r);					\
451
	if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
452
	  {								\
453
	    R##_f1--;							\
454
	    _FP_FRAC_ADD_2(_r, Y, _r);					\
455
	  }								\
456
      }									\
457
    _FP_FRAC_DEC_2(_r, _m);						\
458
									\
459
    if (_r_f1 == Y##_f1)						\
460
      {									\
461
	/* This is a special case, not an optimization			\
462
	   (_r/Y##_f1 would not fit into UWtype).			\
463
	   As _r is guaranteed to be < Y,  R##_f0 can be either		\
464
	   (UWtype)-1 or (UWtype)-2.  But as we know what kind		\
465
	   of bits it is (sticky, guard, round),  we don't care.	\
466
	   We also don't care what the reminder is,  because the	\
467
	   guard bit will be set anyway.  -jj */			\
468
	R##_f0 = -1;							\
469
      }									\
470
    else								\
471
      {									\
472
	udiv_qrnnd(R##_f0, _r_f1, _r_f1, _r_f0, Y##_f1);		\
473
	umul_ppmm(_m_f1, _m_f0, R##_f0, Y##_f0);			\
474
	_r_f0 = 0;							\
475
	if (_FP_FRAC_GT_2(_m, _r))					\
476
	  {								\
477
	    R##_f0--;							\
478
	    _FP_FRAC_ADD_2(_r, Y, _r);					\
479
	    if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))		\
480
	      {								\
481
		R##_f0--;						\
482
		_FP_FRAC_ADD_2(_r, Y, _r);				\
483
	      }								\
484
	  }								\
485
	if (!_FP_FRAC_EQ_2(_r, _m))					\
486
	  R##_f0 |= _FP_WORK_STICKY;					\
487
      }									\
488
  } while (0)
489

490

491
#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)					\
492
  do {									\
493
    _FP_W_TYPE _x[4], _y[2], _z[4];					\
494
    _y[0] = Y##_f0; _y[1] = Y##_f1;					\
495
    _x[0] = _x[3] = 0;							\
496
    if (_FP_FRAC_GT_2(X, Y))						\
497
      {									\
498
	R##_e++;							\
499
	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE) |	\
500
		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
501
			    (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE)));	\
502
	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs-1 - _FP_W_TYPE_SIZE);	\
503
      }									\
504
    else								\
505
      {									\
506
	_x[1] = (X##_f0 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE) |	\
507
		 X##_f1 >> (_FP_W_TYPE_SIZE -				\
508
			    (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE)));	\
509
	_x[2] = X##_f1 << (_FP_WFRACBITS_##fs - _FP_W_TYPE_SIZE);	\
510
      }									\
511
									\
512
    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);				\
513
    R##_f1 = _z[1];							\
514
    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);				\
515
  } while (0)
516

517

518
/*
519
 * Square root algorithms:
520
 * We have just one right now, maybe Newton approximation
521
 * should be added for those machines where division is fast.
522
 */
523
 
524
#define _FP_SQRT_MEAT_2(R, S, T, X, q)			\
525
  do {							\
526
    while (q)						\
527
      {							\
528
	T##_f1 = S##_f1 + q;				\
529
	if (T##_f1 <= X##_f1)				\
530
	  {						\
531
	    S##_f1 = T##_f1 + q;			\
532
	    X##_f1 -= T##_f1;				\
533
	    R##_f1 += q;				\
534
	  }						\
535
	_FP_FRAC_SLL_2(X, 1);				\
536
	q >>= 1;					\
537
      }							\
538
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);		\
539
    while (q != _FP_WORK_ROUND)				\
540
      {							\
541
	T##_f0 = S##_f0 + q;				\
542
	T##_f1 = S##_f1;				\
543
	if (T##_f1 < X##_f1 || 				\
544
	    (T##_f1 == X##_f1 && T##_f0 <= X##_f0))	\
545
	  {						\
546
	    S##_f0 = T##_f0 + q;			\
547
	    S##_f1 += (T##_f0 > S##_f0);		\
548
	    _FP_FRAC_DEC_2(X, T);			\
549
	    R##_f0 += q;				\
550
	  }						\
551
	_FP_FRAC_SLL_2(X, 1);				\
552
	q >>= 1;					\
553
      }							\
554
    if (X##_f0 | X##_f1)				\
555
      {							\
556
	if (S##_f1 < X##_f1 || 				\
557
	    (S##_f1 == X##_f1 && S##_f0 < X##_f0))	\
558
	  R##_f0 |= _FP_WORK_ROUND;			\
559
	R##_f0 |= _FP_WORK_STICKY;			\
560
      }							\
561
  } while (0)
562

563

564
/*
565
 * Assembly/disassembly for converting to/from integral types.  
566
 * No shifting or overflow handled here.
567
 */
568

569
#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)	\
570
	(void) (((rsize) <= _FP_W_TYPE_SIZE)	\
571
		? ({ (r) = X##_f0; })		\
572
		: ({				\
573
		     (r) = X##_f1;		\
574
		     (r) <<= _FP_W_TYPE_SIZE;	\
575
		     (r) += X##_f0;		\
576
		    }))
577

578
#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)				\
579
  do {									\
580
    X##_f0 = r;								\
581
    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
582
  } while (0)
583

584
/*
585
 * Convert FP values between word sizes
586
 */
587

588
#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)				\
589
  do {									\
590
    if (S##_c != FP_CLS_NAN)						\
591
      _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),	\
592
		     _FP_WFRACBITS_##sfs);				\
593
    else								\
594
      _FP_FRAC_SRL_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs));	\
595
    D##_f = S##_f0;							\
596
  } while (0)
597

598
#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)				\
599
  do {									\
600
    D##_f0 = S##_f;							\
601
    D##_f1 = 0;								\
602
    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));	\
603
  } while (0)
604

605
#endif
606

607
Product

Resources

Company