Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/misc/r128.h
9902 views
1
/*
2
r128.h: 128-bit (64.64) signed fixed-point arithmetic. Version 1.6.0
3
4
COMPILATION
5
-----------
6
Drop this header file somewhere in your project and include it wherever it is
7
needed. There is no separate .c file for this library. To get the code, in ONE
8
file in your project, put:
9
10
#define R128_IMPLEMENTATION
11
12
before you include this file. You may also provide a definition for R128_ASSERT
13
to force the library to use a custom assert macro.
14
15
COMPILER/LIBRARY SUPPORT
16
------------------------
17
This library requires a C89 compiler with support for 64-bit integers. If your
18
compiler does not support the long long data type, the R128_U64, etc. macros
19
must be set appropriately. On x86 and x64 targets, Intel intrinsics are used
20
for speed. If your compiler does not support these intrinsics, you can add
21
#define R128_STDC_ONLY
22
in your implementation file before including r128.h.
23
24
The only C runtime library functionality used by this library is <assert.h>.
25
This can be avoided by defining an R128_ASSERT macro in your implementation
26
file. Since this library uses 64-bit arithmetic, this may implicitly add a
27
runtime library dependency on 32-bit platforms.
28
29
C++ SUPPORT
30
-----------
31
Operator overloads are supplied for C++ files that include this file. Since all
32
C++ functions are declared inline (or static inline), the R128_IMPLEMENTATION
33
file can be either C++ or C.
34
35
LICENSE
36
-------
37
This is free and unencumbered software released into the public domain.
38
39
Anyone is free to copy, modify, publish, use, compile, sell, or
40
distribute this software, either in source code form or as a compiled
41
binary, for any purpose, commercial or non-commercial, and by any
42
means.
43
44
In jurisdictions that recognize copyright laws, the author or authors
45
of this software dedicate any and all copyright interest in the
46
software to the public domain. We make this dedication for the benefit
47
of the public at large and to the detriment of our heirs and
48
successors. We intend this dedication to be an overt act of
49
relinquishment in perpetuity of all present and future rights to this
50
software under copyright law.
51
52
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
53
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
54
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
55
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
56
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
57
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
58
OTHER DEALINGS IN THE SOFTWARE.
59
*/
60
61
#ifndef H_R128_H
62
#define H_R128_H
63
64
#include <stddef.h>
65
66
// 64-bit integer support
67
// If your compiler does not have stdint.h, add appropriate defines for these macros.
68
#if defined(_MSC_VER) && (_MSC_VER < 1600)
69
# define R128_S32 __int32
70
# define R128_U32 unsigned __int32
71
# define R128_S64 __int64
72
# define R128_U64 unsigned __int64
73
# define R128_LIT_S64(x) x##i64
74
# define R128_LIT_U64(x) x##ui64
75
#else
76
# include <stdint.h>
77
# define R128_S32 int32_t
78
# define R128_U32 uint32_t
79
# define R128_S64 long long
80
# define R128_U64 unsigned long long
81
# define R128_LIT_S64(x) x##ll
82
# define R128_LIT_U64(x) x##ull
83
#endif
84
85
#ifdef __cplusplus
86
extern "C" {
87
#endif
88
89
typedef struct R128 {
90
R128_U64 lo;
91
R128_U64 hi;
92
93
#ifdef __cplusplus
94
R128();
95
R128(double);
96
R128(int);
97
R128(R128_S64);
98
R128(R128_U64 low, R128_U64 high);
99
100
operator double() const;
101
operator R128_S64() const;
102
operator int() const;
103
operator bool() const;
104
105
bool operator!() const;
106
R128 operator~() const;
107
R128 operator-() const;
108
R128 &operator|=(const R128 &rhs);
109
R128 &operator&=(const R128 &rhs);
110
R128 &operator^=(const R128 &rhs);
111
R128 &operator+=(const R128 &rhs);
112
R128 &operator-=(const R128 &rhs);
113
R128 &operator*=(const R128 &rhs);
114
R128 &operator/=(const R128 &rhs);
115
R128 &operator%=(const R128 &rhs);
116
R128 &operator<<=(int amount);
117
R128 &operator>>=(int amount);
118
#endif //__cplusplus
119
} R128;
120
121
// Type conversion
122
extern void r128FromInt(R128 *dst, R128_S64 v);
123
extern void r128FromFloat(R128 *dst, double v);
124
extern R128_S64 r128ToInt(const R128 *v);
125
extern double r128ToFloat(const R128 *v);
126
127
// Copy
128
extern void r128Copy(R128 *dst, const R128 *src);
129
130
// Sign manipulation
131
extern void r128Neg(R128 *dst, const R128 *v); // -v
132
extern void r128Abs(R128* dst, const R128* v); // abs(v)
133
extern void r128Nabs(R128* dst, const R128* v); // -abs(v)
134
135
// Bitwise operations
136
extern void r128Not(R128 *dst, const R128 *src); // ~a
137
extern void r128Or(R128 *dst, const R128 *a, const R128 *b); // a | b
138
extern void r128And(R128 *dst, const R128 *a, const R128 *b); // a & b
139
extern void r128Xor(R128 *dst, const R128 *a, const R128 *b); // a ^ b
140
extern void r128Shl(R128 *dst, const R128 *src, int amount); // shift left by amount mod 128
141
extern void r128Shr(R128 *dst, const R128 *src, int amount); // shift right logical by amount mod 128
142
extern void r128Sar(R128 *dst, const R128 *src, int amount); // shift right arithmetic by amount mod 128
143
144
// Arithmetic
145
extern void r128Add(R128 *dst, const R128 *a, const R128 *b); // a + b
146
extern void r128Sub(R128 *dst, const R128 *a, const R128 *b); // a - b
147
extern void r128Mul(R128 *dst, const R128 *a, const R128 *b); // a * b
148
extern void r128Div(R128 *dst, const R128 *a, const R128 *b); // a / b
149
extern void r128Mod(R128 *dst, const R128 *a, const R128 *b); // a - toInt(a / b) * b
150
151
extern void r128Sqrt(R128 *dst, const R128 *v); // sqrt(v)
152
extern void r128Rsqrt(R128 *dst, const R128 *v); // 1 / sqrt(v)
153
154
// Comparison
155
extern int r128Cmp(const R128 *a, const R128 *b); // sign of a-b
156
extern void r128Min(R128 *dst, const R128 *a, const R128 *b);
157
extern void r128Max(R128 *dst, const R128 *a, const R128 *b);
158
extern void r128Floor(R128 *dst, const R128 *v);
159
extern void r128Ceil(R128 *dst, const R128 *v);
160
extern void r128Round(R128 *dst, const R128 *v); // round to nearest, rounding halfway values away from zero
161
extern int r128IsNeg(const R128 *v); // quick check for < 0
162
163
// String conversion
164
//
165
typedef enum R128ToStringSign {
166
R128ToStringSign_Default, // no sign character for positive values
167
R128ToStringSign_Space, // leading space for positive values
168
R128ToStringSign_Plus, // leading '+' for positive values
169
} R128ToStringSign;
170
171
// Formatting options for use with r128ToStringOpt. The "defaults" correspond
172
// to a format string of "%f".
173
//
174
typedef struct R128ToStringFormat {
175
// sign character for positive values. Default is R128ToStringSign_Default.
176
R128ToStringSign sign;
177
178
// minimum number of characters to write. Default is 0.
179
int width;
180
181
// place to the right of the decimal at which rounding is performed. If negative,
182
// a maximum of 20 decimal places will be written, with no trailing zeroes.
183
// (20 places is sufficient to ensure that r128FromString will convert back to the
184
// original value.) Default is -1. NOTE: This is not the same default that the C
185
// standard library uses for %f.
186
int precision;
187
188
// If non-zero, pads the output string with leading zeroes if the final result is
189
// fewer than width characters. Otherwise, leading spaces are used. Default is 0.
190
int zeroPad;
191
192
// Always print a decimal point, even if the value is an integer. Default is 0.
193
int decimal;
194
195
// Left-align output if width specifier requires padding.
196
// Default is 0 (right align).
197
int leftAlign;
198
} R128ToStringFormat;
199
200
// r128ToStringOpt: convert R128 to a decimal string, with formatting.
201
//
202
// dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written
203
// (including null terminator). No additional rounding is performed if dstSize is not large
204
// enough to hold the entire string.
205
//
206
// opt: an R128ToStringFormat struct (q.v.) with formatting options.
207
//
208
// Uses the R128_decimal global as the decimal point character.
209
// Always writes a null terminator, even if the destination buffer is not large enough.
210
//
211
// Number of bytes that will be written (i.e. how big does dst need to be?):
212
// If width is specified: width + 1 bytes.
213
// If precision is specified: at most precision + 22 bytes.
214
// If neither is specified: at most 42 bytes.
215
//
216
// Returns the number of bytes that would have been written if dst was sufficiently large,
217
// not including the final null terminator.
218
//
219
extern int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt);
220
221
// r128ToStringf: convert R128 to a decimal string, with formatting.
222
//
223
// dst and dstSize: specify the buffer to write into. At most dstSize bytes will be written
224
// (including null terminator).
225
//
226
// format: a printf-style format specifier, as one would use with floating point types.
227
// e.g. "%+5.2f". (The leading % and trailing f are optional.)
228
// NOTE: This is NOT a full replacement for sprintf. Any characters in the format string
229
// that do not correspond to a format placeholder are ignored.
230
//
231
// Uses the R128_decimal global as the decimal point character.
232
// Always writes a null terminator, even if the destination buffer is not large enough.
233
//
234
// Number of bytes that will be written (i.e. how big does dst need to be?):
235
// If the precision field is specified: at most max(width, precision + 21) + 1 bytes
236
// Otherwise: at most max(width, 41) + 1 bytes.
237
//
238
// Returns the number of bytes that would have been written if dst was sufficiently large,
239
// not including the final null terminator.
240
//
241
extern int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v);
242
243
// r128ToString: convert R128 to a decimal string, with default formatting.
244
// Equivalent to r128ToStringf(dst, dstSize, "%f", v).
245
//
246
// Uses the R128_decimal global as the decimal point character.
247
// Always writes a null terminator, even if the destination buffer is not large enough.
248
//
249
// Will write at most 42 bytes (including NUL) to dst.
250
//
251
// Returns the number of bytes that would have been written if dst was sufficiently large,
252
// not including the final null terminator.
253
//
254
extern int r128ToString(char *dst, size_t dstSize, const R128 *v);
255
256
// r128FromString: Convert string to R128.
257
//
258
// The string can be formatted either as a decimal number with optional sign
259
// or as hexadecimal with a prefix of 0x or 0X.
260
//
261
// endptr, if not NULL, is set to the character following the last character
262
// used in the conversion.
263
//
264
extern void r128FromString(R128 *dst, const char *s, char **endptr);
265
266
// Constants
267
extern const R128 R128_min; // minimum (most negative) value
268
extern const R128 R128_max; // maximum (most positive) value
269
extern const R128 R128_smallest; // smallest positive value
270
extern const R128 R128_zero; // zero
271
extern const R128 R128_one; // 1.0
272
273
extern char R128_decimal; // decimal point character used by r128From/ToString. defaults to '.'
274
275
#ifdef __cplusplus
276
}
277
278
#include <limits>
279
namespace std {
280
template<>
281
struct numeric_limits<R128>
282
{
283
static const bool is_specialized = true;
284
285
static R128 min() throw() { return R128_min; }
286
static R128 max() throw() { return R128_max; }
287
288
static const int digits = 127;
289
static const int digits10 = 38;
290
static const bool is_signed = true;
291
static const bool is_integer = false;
292
static const bool is_exact = false;
293
static const int radix = 2;
294
static R128 epsilon() throw() { return R128_smallest; }
295
static R128 round_error() throw() { return R128_one; }
296
297
static const int min_exponent = 0;
298
static const int min_exponent10 = 0;
299
static const int max_exponent = 0;
300
static const int max_exponent10 = 0;
301
302
static const bool has_infinity = false;
303
static const bool has_quiet_NaN = false;
304
static const bool has_signaling_NaN = false;
305
static const float_denorm_style has_denorm = denorm_absent;
306
static const bool has_denorm_loss = false;
307
308
static R128 infinity() throw() { return R128_zero; }
309
static R128 quiet_NaN() throw() { return R128_zero; }
310
static R128 signaling_NaN() throw() { return R128_zero; }
311
static R128 denorm_min() throw() { return R128_zero; }
312
313
static const bool is_iec559 = false;
314
static const bool is_bounded = true;
315
static const bool is_modulo = true;
316
317
static const bool traps = numeric_limits<R128_U64>::traps;
318
static const bool tinyness_before = false;
319
static const float_round_style round_style = round_toward_zero;
320
};
321
} //namespace std
322
323
inline R128::R128() {}
324
325
inline R128::R128(double v)
326
{
327
r128FromFloat(this, v);
328
}
329
330
inline R128::R128(int v)
331
{
332
r128FromInt(this, v);
333
}
334
335
inline R128::R128(R128_S64 v)
336
{
337
r128FromInt(this, v);
338
}
339
340
inline R128::R128(R128_U64 low, R128_U64 high)
341
{
342
lo = low;
343
hi = high;
344
}
345
346
inline R128::operator double() const
347
{
348
return r128ToFloat(this);
349
}
350
351
inline R128::operator R128_S64() const
352
{
353
return r128ToInt(this);
354
}
355
356
inline R128::operator int() const
357
{
358
return (int) r128ToInt(this);
359
}
360
361
inline R128::operator bool() const
362
{
363
return lo || hi;
364
}
365
366
inline bool R128::operator!() const
367
{
368
return !lo && !hi;
369
}
370
371
inline R128 R128::operator~() const
372
{
373
R128 r;
374
r128Not(&r, this);
375
return r;
376
}
377
378
inline R128 R128::operator-() const
379
{
380
R128 r;
381
r128Neg(&r, this);
382
return r;
383
}
384
385
inline R128 &R128::operator|=(const R128 &rhs)
386
{
387
r128Or(this, this, &rhs);
388
return *this;
389
}
390
391
inline R128 &R128::operator&=(const R128 &rhs)
392
{
393
r128And(this, this, &rhs);
394
return *this;
395
}
396
397
inline R128 &R128::operator^=(const R128 &rhs)
398
{
399
r128Xor(this, this, &rhs);
400
return *this;
401
}
402
403
inline R128 &R128::operator+=(const R128 &rhs)
404
{
405
r128Add(this, this, &rhs);
406
return *this;
407
}
408
409
inline R128 &R128::operator-=(const R128 &rhs)
410
{
411
r128Sub(this, this, &rhs);
412
return *this;
413
}
414
415
inline R128 &R128::operator*=(const R128 &rhs)
416
{
417
r128Mul(this, this, &rhs);
418
return *this;
419
}
420
421
inline R128 &R128::operator/=(const R128 &rhs)
422
{
423
r128Div(this, this, &rhs);
424
return *this;
425
}
426
427
inline R128 &R128::operator%=(const R128 &rhs)
428
{
429
r128Mod(this, this, &rhs);
430
return *this;
431
}
432
433
inline R128 &R128::operator<<=(int amount)
434
{
435
r128Shl(this, this, amount);
436
return *this;
437
}
438
439
inline R128 &R128::operator>>=(int amount)
440
{
441
r128Sar(this, this, amount);
442
return *this;
443
}
444
445
static inline R128 operator|(const R128 &lhs, const R128 &rhs)
446
{
447
R128 r(lhs);
448
return r |= rhs;
449
}
450
451
static inline R128 operator&(const R128 &lhs, const R128 &rhs)
452
{
453
R128 r(lhs);
454
return r &= rhs;
455
}
456
457
static inline R128 operator^(const R128 &lhs, const R128 &rhs)
458
{
459
R128 r(lhs);
460
return r ^= rhs;
461
}
462
463
static inline R128 operator+(const R128 &lhs, const R128 &rhs)
464
{
465
R128 r(lhs);
466
return r += rhs;
467
}
468
469
static inline R128 operator-(const R128 &lhs, const R128 &rhs)
470
{
471
R128 r(lhs);
472
return r -= rhs;
473
}
474
475
static inline R128 operator*(const R128 &lhs, const R128 &rhs)
476
{
477
R128 r(lhs);
478
return r *= rhs;
479
}
480
481
static inline R128 operator/(const R128 &lhs, const R128 &rhs)
482
{
483
R128 r(lhs);
484
return r /= rhs;
485
}
486
487
static inline R128 operator%(const R128 &lhs, const R128 &rhs)
488
{
489
R128 r(lhs);
490
return r %= rhs;
491
}
492
493
static inline R128 operator<<(const R128 &lhs, int amount)
494
{
495
R128 r(lhs);
496
return r <<= amount;
497
}
498
499
static inline R128 operator>>(const R128 &lhs, int amount)
500
{
501
R128 r(lhs);
502
return r >>= amount;
503
}
504
505
static inline bool operator<(const R128 &lhs, const R128 &rhs)
506
{
507
return r128Cmp(&lhs, &rhs) < 0;
508
}
509
510
static inline bool operator>(const R128 &lhs, const R128 &rhs)
511
{
512
return r128Cmp(&lhs, &rhs) > 0;
513
}
514
515
static inline bool operator<=(const R128 &lhs, const R128 &rhs)
516
{
517
return r128Cmp(&lhs, &rhs) <= 0;
518
}
519
520
static inline bool operator>=(const R128 &lhs, const R128 &rhs)
521
{
522
return r128Cmp(&lhs, &rhs) >= 0;
523
}
524
525
static inline bool operator==(const R128 &lhs, const R128 &rhs)
526
{
527
return lhs.lo == rhs.lo && lhs.hi == rhs.hi;
528
}
529
530
static inline bool operator!=(const R128 &lhs, const R128 &rhs)
531
{
532
return lhs.lo != rhs.lo || lhs.hi != rhs.hi;
533
}
534
535
#endif //__cplusplus
536
#endif //H_R128_H
537
538
#ifdef R128_IMPLEMENTATION
539
540
#ifdef R128_DEBUG_VIS
541
# define R128_DEBUG_SET(x) r128ToString(R128_last, sizeof(R128_last), x)
542
#else
543
# define R128_DEBUG_SET(x)
544
#endif
545
546
#define R128_SET2(x, l, h) do { (x)->lo = (R128_U64)(l); (x)->hi = (R128_U64)(h); } while(0)
547
#define R128_R0(x) ((R128_U32)(x)->lo)
548
#define R128_R2(x) ((R128_U32)(x)->hi)
549
#if defined(_M_IX86)
550
// workaround: MSVC x86's handling of 64-bit values is not great
551
# define R128_SET4(x, r0, r1, r2, r3) do { \
552
((R128_U32*)&(x)->lo)[0] = (R128_U32)(r0); \
553
((R128_U32*)&(x)->lo)[1] = (R128_U32)(r1); \
554
((R128_U32*)&(x)->hi)[0] = (R128_U32)(r2); \
555
((R128_U32*)&(x)->hi)[1] = (R128_U32)(r3); \
556
} while(0)
557
# define R128_R1(x) (((R128_U32*)&(x)->lo)[1])
558
# define R128_R3(x) (((R128_U32*)&(x)->hi)[1])
559
#else
560
# define R128_SET4(x, r0, r1, r2, r3) do { (x)->lo = (R128_U64)(r0) | ((R128_U64)(r1) << 32); \
561
(x)->hi = (R128_U64)(r2) | ((R128_U64)(r3) << 32); } while(0)
562
# define R128_R1(x) ((R128_U32)((x)->lo >> 32))
563
# define R128_R3(x) ((R128_U32)((x)->hi >> 32))
564
#endif
565
566
#if defined(_M_X64)
567
# define R128_INTEL 1
568
# define R128_64BIT 1
569
# ifndef R128_STDC_ONLY
570
# include <intrin.h>
571
# endif
572
#elif defined(__x86_64__)
573
# define R128_INTEL 1
574
# define R128_64BIT 1
575
# ifndef R128_STDC_ONLY
576
# include <x86intrin.h>
577
# endif
578
#elif defined(_M_IX86)
579
# define R128_INTEL 1
580
# ifndef R128_STDC_ONLY
581
# include <intrin.h>
582
# endif
583
#elif defined(__i386__)
584
# define R128_INTEL 1
585
# ifndef R128_STDC_ONLY
586
# include <x86intrin.h>
587
# endif
588
#elif defined(_M_ARM)
589
# ifndef R128_STDC_ONLY
590
# include <intrin.h>
591
# endif
592
#elif defined(_M_ARM64)
593
# define R128_64BIT 1
594
# ifndef R128_STDC_ONLY
595
# include <intrin.h>
596
# endif
597
#elif defined(__aarch64__)
598
# define R128_64BIT 1
599
#endif
600
601
#ifndef R128_INTEL
602
# define R128_INTEL 0
603
#endif
604
605
#ifndef R128_64BIT
606
# define R128_64BIT 0
607
#endif
608
609
#ifndef R128_ASSERT
610
# include <assert.h>
611
# define R128_ASSERT(x) assert(x)
612
#endif
613
614
#include <stdlib.h> // for NULL
615
616
static const R128ToStringFormat R128__defaultFormat = {
617
R128ToStringSign_Default,
618
0,
619
-1,
620
0,
621
0,
622
0
623
};
624
625
const R128 R128_min = { 0, R128_LIT_U64(0x8000000000000000) };
626
const R128 R128_max = { R128_LIT_U64(0xffffffffffffffff), R128_LIT_U64(0x7fffffffffffffff) };
627
const R128 R128_smallest = { 1, 0 };
628
const R128 R128_zero = { 0, 0 };
629
const R128 R128_one = { 0, 1 };
630
char R128_decimal = '.';
631
#ifdef R128_DEBUG_VIS
632
char R128_last[42];
633
#endif
634
635
static int r128__clz64(R128_U64 x)
636
{
637
#if defined(R128_STDC_ONLY)
638
R128_U64 n = 64, y;
639
y = x >> 32; if (y) { n -= 32; x = y; }
640
y = x >> 16; if (y) { n -= 16; x = y; }
641
y = x >> 8; if (y) { n -= 8; x = y; }
642
y = x >> 4; if (y) { n -= 4; x = y; }
643
y = x >> 2; if (y) { n -= 2; x = y; }
644
y = x >> 1; if (y) { n -= 1; x = y; }
645
return (int)(n - x);
646
#elif defined(_M_X64) || defined(_M_ARM64)
647
unsigned long idx;
648
if (_BitScanReverse64(&idx, x)) {
649
return 63 - (int)idx;
650
} else {
651
return 64;
652
}
653
#elif defined(_MSC_VER)
654
unsigned long idx;
655
if (_BitScanReverse(&idx, (R128_U32)(x >> 32))) {
656
return 31 - (int)idx;
657
} else if (_BitScanReverse(&idx, (R128_U32)x)) {
658
return 63 - (int)idx;
659
} else {
660
return 64;
661
}
662
#else
663
return x ? __builtin_clzll(x) : 64;
664
#endif
665
}
666
667
#if !R128_64BIT
668
// 32*32->64
669
static R128_U64 r128__umul64(R128_U32 a, R128_U32 b)
670
{
671
# if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
672
return __emulu(a, b);
673
# elif defined(_M_ARM) && !defined(R128_STDC_ONLY)
674
return _arm_umull(a, b);
675
# else
676
return a * (R128_U64)b;
677
# endif
678
}
679
680
// 64/32->32
681
static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem)
682
{
683
# if defined(_M_IX86) && (_MSC_VER >= 1920) && !defined(R128_STDC_ONLY)
684
unsigned __int64 n = ((unsigned __int64)nhi << 32) | nlo;
685
return _udiv64(n, d, rem);
686
# elif defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
687
__asm {
688
mov eax, nlo
689
mov edx, nhi
690
div d
691
mov ecx, rem
692
mov dword ptr [ecx], edx
693
}
694
# elif defined(__i386__) && !defined(R128_STDC_ONLY)
695
R128_U32 q, r;
696
__asm("divl %4"
697
: "=a"(q), "=d"(r)
698
: "a"(nlo), "d"(nhi), "X"(d));
699
*rem = r;
700
return q;
701
# else
702
R128_U64 n64 = ((R128_U64)nhi << 32) | nlo;
703
*rem = (R128_U32)(n64 % d);
704
return (R128_U32)(n64 / d);
705
# endif
706
}
707
#elif defined(R128_STDC_ONLY) || !R128_INTEL
708
#define r128__umul64(a, b) ((a) * (R128_U64)(b))
709
static R128_U32 r128__udiv64(R128_U32 nlo, R128_U32 nhi, R128_U32 d, R128_U32 *rem)
710
{
711
R128_U64 n64 = ((R128_U64)nhi << 32) | nlo;
712
*rem = (R128_U32)(n64 % d);
713
return (R128_U32)(n64 / d);
714
}
715
#endif //!R128_64BIT
716
717
static void r128__neg(R128 *dst, const R128 *src)
718
{
719
R128_ASSERT(dst != NULL);
720
R128_ASSERT(src != NULL);
721
722
#if R128_INTEL && !defined(R128_STDC_ONLY)
723
{
724
unsigned char carry = 0;
725
# if R128_64BIT
726
carry = _addcarry_u64(carry, ~src->lo, 1, &dst->lo);
727
carry = _addcarry_u64(carry, ~src->hi, 0, &dst->hi);
728
# else
729
R128_U32 r0, r1, r2, r3;
730
carry = _addcarry_u32(carry, ~R128_R0(src), 1, &r0);
731
carry = _addcarry_u32(carry, ~R128_R1(src), 0, &r1);
732
carry = _addcarry_u32(carry, ~R128_R2(src), 0, &r2);
733
carry = _addcarry_u32(carry, ~R128_R3(src), 0, &r3);
734
R128_SET4(dst, r0, r1, r2, r3);
735
# endif //R128_64BIT
736
}
737
#else
738
if (src->lo) {
739
dst->lo = ~src->lo + 1;
740
dst->hi = ~src->hi;
741
} else {
742
dst->lo = 0;
743
dst->hi = ~src->hi + 1;
744
}
745
#endif //R128_INTEL
746
}
747
748
// 64*64->128
749
static void r128__umul128(R128 *dst, R128_U64 a, R128_U64 b)
750
{
751
#if defined(_M_X64) && !defined(R128_STDC_ONLY)
752
dst->lo = _umul128(a, b, &dst->hi);
753
#elif R128_64BIT && !defined(_MSC_VER) && !defined(R128_STDC_ONLY)
754
unsigned __int128 p0 = a * (unsigned __int128)b;
755
dst->hi = (R128_U64)(p0 >> 64);
756
dst->lo = (R128_U64)p0;
757
#else
758
R128_U32 alo = (R128_U32)a;
759
R128_U32 ahi = (R128_U32)(a >> 32);
760
R128_U32 blo = (R128_U32)b;
761
R128_U32 bhi = (R128_U32)(b >> 32);
762
R128_U64 p0, p1, p2, p3;
763
764
p0 = r128__umul64(alo, blo);
765
p1 = r128__umul64(alo, bhi);
766
p2 = r128__umul64(ahi, blo);
767
p3 = r128__umul64(ahi, bhi);
768
769
{
770
#if R128_INTEL && !defined(R128_STDC_ONLY)
771
R128_U32 r0, r1, r2, r3;
772
unsigned char carry;
773
774
r0 = (R128_U32)(p0);
775
r1 = (R128_U32)(p0 >> 32);
776
r2 = (R128_U32)(p1 >> 32);
777
r3 = (R128_U32)(p3 >> 32);
778
779
carry = _addcarry_u32(0, r1, (R128_U32)p1, &r1);
780
carry = _addcarry_u32(carry, r2, (R128_U32)(p2 >> 32), &r2);
781
_addcarry_u32(carry, r3, 0, &r3);
782
carry = _addcarry_u32(0, r1, (R128_U32)p2, &r1);
783
carry = _addcarry_u32(carry, r2, (R128_U32)p3, &r2);
784
_addcarry_u32(carry, r3, 0, &r3);
785
786
R128_SET4(dst, r0, r1, r2, r3);
787
#else
788
R128_U64 carry, lo, hi;
789
carry = ((R128_U64)(R128_U32)p1 + (R128_U64)(R128_U32)p2 + (p0 >> 32)) >> 32;
790
791
lo = p0 + ((p1 + p2) << 32);
792
hi = p3 + ((R128_U32)(p1 >> 32) + (R128_U32)(p2 >> 32)) + carry;
793
794
R128_SET2(dst, lo, hi);
795
#endif
796
}
797
#endif
798
}
799
800
// 128/64->64
801
#if defined(_M_X64) && (_MSC_VER < 1920) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
802
// MSVC x64 provides neither inline assembly nor (pre-2019) a div intrinsic, so we do fake
803
// "inline assembly" to avoid long division or outline assembly.
804
#pragma code_seg(".text")
805
__declspec(allocate(".text") align(16)) static const unsigned char r128__udiv128Code[] = {
806
0x48, 0x8B, 0xC1, //mov rax, rcx
807
0x49, 0xF7, 0xF0, //div rax, r8
808
0x49, 0x89, 0x11, //mov qword ptr [r9], rdx
809
0xC3 //ret
810
};
811
typedef R128_U64 (*r128__udiv128Proc)(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem);
812
static const r128__udiv128Proc r128__udiv128 = (r128__udiv128Proc)(void*)r128__udiv128Code;
813
#else
814
static R128_U64 r128__udiv128(R128_U64 nlo, R128_U64 nhi, R128_U64 d, R128_U64 *rem)
815
{
816
#if defined(_M_X64) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
817
return _udiv128(nhi, nlo, d, rem);
818
#elif defined(__x86_64__) && !defined(R128_STDC_ONLY)
819
R128_U64 q, r;
820
__asm("divq %4"
821
: "=a"(q), "=d"(r)
822
: "a"(nlo), "d"(nhi), "X"(d));
823
*rem = r;
824
return q;
825
#else
826
R128_U64 tmp;
827
R128_U32 d0, d1;
828
R128_U32 n3, n2, n1, n0;
829
R128_U32 q0, q1;
830
R128_U32 r;
831
int shift;
832
833
R128_ASSERT(d != 0); //division by zero
834
R128_ASSERT(nhi < d); //overflow
835
836
// normalize
837
shift = r128__clz64(d);
838
839
if (shift) {
840
R128 tmp128;
841
R128_SET2(&tmp128, nlo, nhi);
842
r128Shl(&tmp128, &tmp128, shift);
843
n3 = R128_R3(&tmp128);
844
n2 = R128_R2(&tmp128);
845
n1 = R128_R1(&tmp128);
846
n0 = R128_R0(&tmp128);
847
d <<= shift;
848
} else {
849
n3 = (R128_U32)(nhi >> 32);
850
n2 = (R128_U32)nhi;
851
n1 = (R128_U32)(nlo >> 32);
852
n0 = (R128_U32)nlo;
853
}
854
855
d1 = (R128_U32)(d >> 32);
856
d0 = (R128_U32)d;
857
858
// first digit
859
R128_ASSERT(n3 <= d1);
860
if (n3 < d1) {
861
q1 = r128__udiv64(n2, n3, d1, &r);
862
} else {
863
q1 = 0xffffffffu;
864
r = n2 + d1;
865
}
866
refine1:
867
if (r128__umul64(q1, d0) > ((R128_U64)r << 32) + n1) {
868
--q1;
869
if (r < ~d1 + 1) {
870
r += d1;
871
goto refine1;
872
}
873
}
874
875
tmp = ((R128_U64)n2 << 32) + n1 - (r128__umul64(q1, d0) + (r128__umul64(q1, d1) << 32));
876
n2 = (R128_U32)(tmp >> 32);
877
n1 = (R128_U32)tmp;
878
879
// second digit
880
R128_ASSERT(n2 <= d1);
881
if (n2 < d1) {
882
q0 = r128__udiv64(n1, n2, d1, &r);
883
} else {
884
q0 = 0xffffffffu;
885
r = n1 + d1;
886
}
887
refine0:
888
if (r128__umul64(q0, d0) > ((R128_U64)r << 32) + n0) {
889
--q0;
890
if (r < ~d1 + 1) {
891
r += d1;
892
goto refine0;
893
}
894
}
895
896
tmp = ((R128_U64)n1 << 32) + n0 - (r128__umul64(q0, d0) + (r128__umul64(q0, d1) << 32));
897
n1 = (R128_U32)(tmp >> 32);
898
n0 = (R128_U32)tmp;
899
900
*rem = (((R128_U64)n1 << 32) + n0) >> shift;
901
return ((R128_U64)q1 << 32) + q0;
902
#endif
903
}
904
#endif
905
906
static int r128__ucmp(const R128 *a, const R128 *b)
907
{
908
if (a->hi != b->hi) {
909
if (a->hi > b->hi) {
910
return 1;
911
} else {
912
return -1;
913
}
914
} else {
915
if (a->lo == b->lo) {
916
return 0;
917
} else if (a->lo > b->lo) {
918
return 1;
919
} else {
920
return -1;
921
}
922
}
923
}
924
925
static void r128__umul(R128 *dst, const R128 *a, const R128 *b)
926
{
927
#if defined(_M_X64) && !defined(R128_STDC_ONLY)
928
R128_U64 t0, t1;
929
R128_U64 lo, hi = 0;
930
unsigned char carry;
931
932
t0 = _umul128(a->lo, b->lo, &t1);
933
carry = _addcarry_u64(0, t1, t0 >> 63, &lo);
934
_addcarry_u64(carry, hi, hi, &hi);
935
936
t0 = _umul128(a->lo, b->hi, &t1);
937
carry = _addcarry_u64(0, lo, t0, &lo);
938
_addcarry_u64(carry, hi, t1, &hi);
939
940
t0 = _umul128(a->hi, b->lo, &t1);
941
carry = _addcarry_u64(0, lo, t0, &lo);
942
_addcarry_u64(carry, hi, t1, &hi);
943
944
t0 = _umul128(a->hi, b->hi, &t1);
945
hi += t0;
946
947
R128_SET2(dst, lo, hi);
948
#elif defined(__x86_64__) && !defined(R128_STDC_ONLY)
949
unsigned __int128 p0, p1, p2, p3;
950
p0 = a->lo * (unsigned __int128)b->lo;
951
p1 = a->lo * (unsigned __int128)b->hi;
952
p2 = a->hi * (unsigned __int128)b->lo;
953
p3 = a->hi * (unsigned __int128)b->hi;
954
955
p0 = (p3 << 64) + p2 + p1 + (p0 >> 64) + ((R128_U64)p0 >> 63);
956
dst->lo = (R128_U64)p0;
957
dst->hi = (R128_U64)(p0 >> 64);
958
#else
959
R128 p0, p1, p2, p3, round;
960
961
r128__umul128(&p0, a->lo, b->lo);
962
round.hi = 0; round.lo = p0.lo >> 63;
963
p0.lo = p0.hi; p0.hi = 0; //r128Shr(&p0, &p0, 64);
964
r128Add(&p0, &p0, &round);
965
966
r128__umul128(&p1, a->hi, b->lo);
967
r128Add(&p0, &p0, &p1);
968
969
r128__umul128(&p2, a->lo, b->hi);
970
r128Add(&p0, &p0, &p2);
971
972
r128__umul128(&p3, a->hi, b->hi);
973
p3.hi = p3.lo; p3.lo = 0; //r128Shl(&p3, &p3, 64);
974
r128Add(&p0, &p0, &p3);
975
976
R128_SET2(dst, p0.lo, p0.hi);
977
#endif
978
}
979
980
// Shift d left until the high bit is set, and shift n left by the same amount.
981
// returns non-zero on overflow.
982
static int r128__norm(R128 *n, R128 *d, R128_U64 *n2)
983
{
984
R128_U64 d0, d1;
985
R128_U64 n0, n1;
986
int shift;
987
988
d1 = d->hi;
989
d0 = d->lo;
990
n1 = n->hi;
991
n0 = n->lo;
992
993
if (d1) {
994
shift = r128__clz64(d1);
995
if (shift) {
996
d1 = (d1 << shift) | (d0 >> (64 - shift));
997
d0 = d0 << shift;
998
*n2 = n1 >> (64 - shift);
999
n1 = (n1 << shift) | (n0 >> (64 - shift));
1000
n0 = n0 << shift;
1001
} else {
1002
*n2 = 0;
1003
}
1004
} else {
1005
shift = r128__clz64(d0);
1006
if (r128__clz64(n1) <= shift) {
1007
return 1; // overflow
1008
}
1009
1010
if (shift) {
1011
d1 = d0 << shift;
1012
d0 = 0;
1013
*n2 = (n1 << shift) | (n0 >> (64 - shift));
1014
n1 = n0 << shift;
1015
n0 = 0;
1016
} else {
1017
d1 = d0;
1018
d0 = 0;
1019
*n2 = n1;
1020
n1 = n0;
1021
n0 = 0;
1022
}
1023
}
1024
1025
R128_SET2(n, n0, n1);
1026
R128_SET2(d, d0, d1);
1027
return 0;
1028
}
1029
1030
static void r128__udiv(R128 *quotient, const R128 *dividend, const R128 *divisor)
1031
{
1032
R128 tmp;
1033
R128_U64 d0, d1;
1034
R128_U64 n1, n2, n3;
1035
R128 q;
1036
1037
R128_ASSERT(dividend != NULL);
1038
R128_ASSERT(divisor != NULL);
1039
R128_ASSERT(quotient != NULL);
1040
R128_ASSERT(divisor->hi != 0 || divisor->lo != 0); // divide by zero
1041
1042
// scale dividend and normalize
1043
{
1044
R128 n, d;
1045
R128_SET2(&n, dividend->lo, dividend->hi);
1046
R128_SET2(&d, divisor->lo, divisor->hi);
1047
if (r128__norm(&n, &d, &n3)) {
1048
R128_SET2(quotient, R128_max.lo, R128_max.hi);
1049
return;
1050
}
1051
1052
d1 = d.hi;
1053
d0 = d.lo;
1054
n2 = n.hi;
1055
n1 = n.lo;
1056
}
1057
1058
// first digit
1059
R128_ASSERT(n3 <= d1);
1060
{
1061
R128 t0, t1;
1062
t0.lo = n1;
1063
if (n3 < d1) {
1064
q.hi = r128__udiv128(n2, n3, d1, &t0.hi);
1065
} else {
1066
q.hi = R128_LIT_U64(0xffffffffffffffff);
1067
t0.hi = n2 + d1;
1068
}
1069
1070
refine1:
1071
r128__umul128(&t1, q.hi, d0);
1072
if (r128__ucmp(&t1, &t0) > 0) {
1073
--q.hi;
1074
if (t0.hi < ~d1 + 1) {
1075
t0.hi += d1;
1076
goto refine1;
1077
}
1078
}
1079
}
1080
1081
{
1082
R128 t0, t1, t2;
1083
t0.hi = n2;
1084
t0.lo = n1;
1085
1086
r128__umul128(&t1, q.hi, d0);
1087
r128__umul128(&t2, q.hi, d1);
1088
1089
t2.hi = t2.lo; t2.lo = 0; //r128Shl(&t2, &t2, 64);
1090
r128Add(&tmp, &t1, &t2);
1091
r128Sub(&tmp, &t0, &tmp);
1092
}
1093
n2 = tmp.hi;
1094
n1 = tmp.lo;
1095
1096
// second digit
1097
R128_ASSERT(n2 <= d1);
1098
{
1099
R128 t0, t1;
1100
t0.lo = 0;
1101
if (n2 < d1) {
1102
q.lo = r128__udiv128(n1, n2, d1, &t0.hi);
1103
} else {
1104
q.lo = R128_LIT_U64(0xffffffffffffffff);
1105
t0.hi = n1 + d1;
1106
}
1107
1108
refine0:
1109
r128__umul128(&t1, q.lo, d0);
1110
if (r128__ucmp(&t1, &t0) > 0) {
1111
--q.lo;
1112
if (t0.hi < ~d1 + 1) {
1113
t0.hi += d1;
1114
goto refine0;
1115
}
1116
}
1117
}
1118
1119
R128_SET2(quotient, q.lo, q.hi);
1120
}
1121
1122
static R128_U64 r128__umod(R128 *n, R128 *d)
1123
{
1124
R128_U64 d0, d1;
1125
R128_U64 n3, n2, n1;
1126
R128_U64 q;
1127
1128
R128_ASSERT(d != NULL);
1129
R128_ASSERT(n != NULL);
1130
R128_ASSERT(d->hi != 0 || d->lo != 0); // divide by zero
1131
1132
if (r128__norm(n, d, &n3)) {
1133
return R128_LIT_U64(0xffffffffffffffff);
1134
}
1135
1136
d1 = d->hi;
1137
d0 = d->lo;
1138
n2 = n->hi;
1139
n1 = n->lo;
1140
1141
R128_ASSERT(n3 < d1);
1142
{
1143
R128 t0, t1;
1144
t0.lo = n1;
1145
q = r128__udiv128(n2, n3, d1, &t0.hi);
1146
1147
refine1:
1148
r128__umul128(&t1, q, d0);
1149
if (r128__ucmp(&t1, &t0) > 0) {
1150
--q;
1151
if (t0.hi < ~d1 + 1) {
1152
t0.hi += d1;
1153
goto refine1;
1154
}
1155
}
1156
}
1157
1158
return q;
1159
}
1160
1161
static int r128__format(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *format)
1162
{
1163
char buf[128];
1164
R128 tmp;
1165
R128_U64 whole;
1166
char *cursor, *decimal, *dstp = dst;
1167
int sign = 0;
1168
int fullPrecision = 1;
1169
int width, precision;
1170
int padCnt, trail = 0;
1171
1172
R128_ASSERT(dst != NULL && dstSize > 0);
1173
R128_ASSERT(v != NULL);
1174
R128_ASSERT(format != NULL);
1175
1176
--dstSize;
1177
1178
R128_SET2(&tmp, v->lo, v->hi);
1179
if (r128IsNeg(&tmp)) {
1180
r128__neg(&tmp, &tmp);
1181
sign = 1;
1182
}
1183
1184
width = format->width;
1185
if (width < 0) {
1186
width = 0;
1187
}
1188
1189
precision = format->precision;
1190
if (precision < 0) {
1191
// print a maximum of 20 digits
1192
fullPrecision = 0;
1193
precision = 20;
1194
} else if (precision > sizeof(buf) - 21) {
1195
trail = precision - (sizeof(buf) - 21);
1196
precision -= trail;
1197
}
1198
1199
whole = tmp.hi;
1200
decimal = cursor = buf;
1201
1202
// fractional part first in case a carry into the whole part is required
1203
if (tmp.lo || format->decimal) {
1204
while (tmp.lo || (fullPrecision && precision)) {
1205
if ((int)(cursor - buf) == precision) {
1206
if ((R128_S64)tmp.lo < 0) {
1207
// round up, propagate carry backwards
1208
char *c;
1209
for (c = cursor - 1; c >= buf; --c) {
1210
char d = ++*c;
1211
if (d <= '9') {
1212
goto endfrac;
1213
} else {
1214
*c = '0';
1215
}
1216
}
1217
1218
// carry out into the whole part
1219
whole++;
1220
}
1221
1222
break;
1223
}
1224
1225
r128__umul128(&tmp, tmp.lo, 10);
1226
*cursor++ = (char)tmp.hi + '0';
1227
}
1228
1229
endfrac:
1230
if (format->decimal || precision) {
1231
decimal = cursor;
1232
*cursor++ = R128_decimal;
1233
}
1234
}
1235
1236
// whole part
1237
do {
1238
char digit = (char)(whole % 10);
1239
whole /= 10;
1240
*cursor++ = digit + '0';
1241
} while (whole);
1242
1243
#define R128__WRITE(c) do { if (dstp < dst + dstSize) *dstp = c; ++dstp; } while(0)
1244
1245
padCnt = width - (int)(cursor - buf) - 1;
1246
1247
// left padding
1248
if (!format->leftAlign) {
1249
char padChar = format->zeroPad ? '0' : ' ';
1250
if (format->zeroPad) {
1251
if (sign) {
1252
R128__WRITE('-');
1253
} else if (format->sign == R128ToStringSign_Plus) {
1254
R128__WRITE('+');
1255
} else if (format->sign == R128ToStringSign_Space) {
1256
R128__WRITE(' ');
1257
} else {
1258
++padCnt;
1259
}
1260
}
1261
1262
for (; padCnt > 0; --padCnt) {
1263
R128__WRITE(padChar);
1264
}
1265
}
1266
1267
if (format->leftAlign || !format->zeroPad) {
1268
if (sign) {
1269
R128__WRITE('-');
1270
} else if (format->sign == R128ToStringSign_Plus) {
1271
R128__WRITE('+');
1272
} else if (format->sign == R128ToStringSign_Space) {
1273
R128__WRITE(' ');
1274
} else {
1275
++padCnt;
1276
}
1277
}
1278
1279
{
1280
char *i;
1281
1282
// reverse the whole part
1283
for (i = cursor - 1; i >= decimal; --i) {
1284
R128__WRITE(*i);
1285
}
1286
1287
// copy the fractional part
1288
for (i = buf; i < decimal; ++i) {
1289
R128__WRITE(*i);
1290
}
1291
}
1292
1293
// right padding
1294
if (format->leftAlign) {
1295
char padChar = format->zeroPad ? '0' : ' ';
1296
for (; padCnt > 0; --padCnt) {
1297
R128__WRITE(padChar);
1298
}
1299
}
1300
1301
// trailing zeroes for very large precision
1302
while (trail--) {
1303
R128__WRITE('0');
1304
}
1305
1306
#undef R128__WRITE
1307
1308
if (dstp <= dst + dstSize) {
1309
*dstp = '\0';
1310
} else {
1311
dst[dstSize] = '\0';
1312
}
1313
return (int)(dstp - dst);
1314
}
1315
1316
void r128FromInt(R128 *dst, R128_S64 v)
1317
{
1318
R128_ASSERT(dst != NULL);
1319
dst->lo = 0;
1320
dst->hi = (R128_U64)v;
1321
R128_DEBUG_SET(dst);
1322
}
1323
1324
void r128FromFloat(R128 *dst, double v)
1325
{
1326
R128_ASSERT(dst != NULL);
1327
1328
if (v < -9223372036854775808.0) {
1329
r128Copy(dst, &R128_min);
1330
} else if (v >= 9223372036854775808.0) {
1331
r128Copy(dst, &R128_max);
1332
} else {
1333
R128 r;
1334
int sign = 0;
1335
1336
if (v < 0) {
1337
v = -v;
1338
sign = 1;
1339
}
1340
1341
r.hi = (R128_U64)(R128_S64)v;
1342
v -= (R128_S64)v;
1343
r.lo = (R128_U64)(v * 18446744073709551616.0);
1344
1345
if (sign) {
1346
r128__neg(&r, &r);
1347
}
1348
1349
r128Copy(dst, &r);
1350
}
1351
}
1352
1353
void r128FromString(R128 *dst, const char *s, char **endptr)
1354
{
1355
R128_U64 lo = 0, hi = 0;
1356
R128_U64 base = 10;
1357
1358
int sign = 0;
1359
1360
R128_ASSERT(dst != NULL);
1361
R128_ASSERT(s != NULL);
1362
1363
R128_SET2(dst, 0, 0);
1364
1365
// consume whitespace
1366
for (;;) {
1367
if (*s == ' ' || *s == '\t' || *s == '\r' || *s == '\n' || *s == '\v') {
1368
++s;
1369
} else {
1370
break;
1371
}
1372
}
1373
1374
// sign
1375
if (*s == '-') {
1376
sign = 1;
1377
++s;
1378
} else if (*s == '+') {
1379
++s;
1380
}
1381
1382
// parse base prefix
1383
if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) {
1384
base = 16;
1385
s += 2;
1386
}
1387
1388
// whole part
1389
for (;; ++s) {
1390
R128_U64 digit;
1391
1392
if ('0' <= *s && *s <= '9') {
1393
digit = *s - '0';
1394
} else if (base == 16 && 'a' <= *s && *s <= 'f') {
1395
digit = *s - 'a' + 10;
1396
} else if (base == 16 && 'A' <= *s && *s <= 'F') {
1397
digit = *s - 'A' + 10;
1398
} else {
1399
break;
1400
}
1401
1402
hi = hi * base + digit;
1403
}
1404
1405
// fractional part
1406
if (*s == R128_decimal) {
1407
const char *exp = ++s;
1408
1409
// find the last digit and work backwards
1410
for (;; ++s) {
1411
if ('0' <= *s && *s <= '9') {
1412
} else if (base == 16 && ('a' <= *s && *s <= 'f')) {
1413
} else if (base == 16 && ('A' <= *s && *s <= 'F')) {
1414
} else {
1415
break;
1416
}
1417
}
1418
1419
for (const char *c = s - 1; c >= exp; --c) {
1420
R128_U64 digit, unused;
1421
1422
if ('0' <= *c && *c <= '9') {
1423
digit = *c - '0';
1424
} else if ('a' <= *c && *c <= 'f') {
1425
digit = *c - 'a' + 10;
1426
} else {
1427
digit = *c - 'A' + 10;
1428
}
1429
1430
lo = r128__udiv128(lo, digit, base, &unused);
1431
}
1432
}
1433
1434
R128_SET2(dst, lo, hi);
1435
if (sign) {
1436
r128__neg(dst, dst);
1437
}
1438
1439
if (endptr) {
1440
*endptr = (char *) s;
1441
}
1442
}
1443
1444
R128_S64 r128ToInt(const R128 *v)
1445
{
1446
R128_ASSERT(v != NULL);
1447
if ((R128_S64)v->hi < 0) {
1448
return (R128_S64)v->hi + (v->lo != 0);
1449
} else {
1450
return (R128_S64)v->hi;
1451
}
1452
}
1453
1454
double r128ToFloat(const R128 *v)
1455
{
1456
R128 tmp;
1457
int sign = 0;
1458
double d;
1459
1460
R128_ASSERT(v != NULL);
1461
1462
R128_SET2(&tmp, v->lo, v->hi);
1463
if (r128IsNeg(&tmp)) {
1464
r128__neg(&tmp, &tmp);
1465
sign = 1;
1466
}
1467
1468
d = tmp.hi + tmp.lo * (1 / 18446744073709551616.0);
1469
if (sign) {
1470
d = -d;
1471
}
1472
1473
return d;
1474
}
1475
1476
int r128ToStringOpt(char *dst, size_t dstSize, const R128 *v, const R128ToStringFormat *opt)
1477
{
1478
return r128__format(dst, dstSize, v, opt);
1479
}
1480
1481
int r128ToStringf(char *dst, size_t dstSize, const char *format, const R128 *v)
1482
{
1483
R128ToStringFormat opts;
1484
1485
R128_ASSERT(dst != NULL && dstSize);
1486
R128_ASSERT(format != NULL);
1487
R128_ASSERT(v != NULL);
1488
1489
opts.sign = R128__defaultFormat.sign;
1490
opts.precision = R128__defaultFormat.precision;
1491
opts.zeroPad = R128__defaultFormat.zeroPad;
1492
opts.decimal = R128__defaultFormat.decimal;
1493
opts.leftAlign = R128__defaultFormat.leftAlign;
1494
1495
if (*format == '%') {
1496
++format;
1497
}
1498
1499
// flags field
1500
for (;; ++format) {
1501
if (*format == ' ' && opts.sign != R128ToStringSign_Plus) {
1502
opts.sign = R128ToStringSign_Space;
1503
} else if (*format == '+') {
1504
opts.sign = R128ToStringSign_Plus;
1505
} else if (*format == '0') {
1506
opts.zeroPad = 1;
1507
} else if (*format == '-') {
1508
opts.leftAlign = 1;
1509
} else if (*format == '#') {
1510
opts.decimal = 1;
1511
} else {
1512
break;
1513
}
1514
}
1515
1516
// width field
1517
opts.width = 0;
1518
for (;;) {
1519
if ('0' <= *format && *format <= '9') {
1520
opts.width = opts.width * 10 + *format++ - '0';
1521
} else {
1522
break;
1523
}
1524
}
1525
1526
// precision field
1527
if (*format == '.') {
1528
opts.precision = 0;
1529
++format;
1530
for (;;) {
1531
if ('0' <= *format && *format <= '9') {
1532
opts.precision = opts.precision * 10 + *format++ - '0';
1533
} else {
1534
break;
1535
}
1536
}
1537
}
1538
1539
return r128__format(dst, dstSize, v, &opts);
1540
}
1541
1542
int r128ToString(char *dst, size_t dstSize, const R128 *v)
1543
{
1544
return r128__format(dst, dstSize, v, &R128__defaultFormat);
1545
}
1546
1547
void r128Copy(R128 *dst, const R128 *src)
1548
{
1549
R128_ASSERT(dst != NULL);
1550
R128_ASSERT(src != NULL);
1551
dst->lo = src->lo;
1552
dst->hi = src->hi;
1553
R128_DEBUG_SET(dst);
1554
}
1555
1556
void r128Neg(R128 *dst, const R128 *v)
1557
{
1558
r128__neg(dst, v);
1559
R128_DEBUG_SET(dst);
1560
}
1561
1562
void r128Abs(R128* dst, const R128* v)
1563
{
1564
R128 sign, inv;
1565
1566
R128_ASSERT(dst != NULL);
1567
R128_ASSERT(v != NULL);
1568
1569
sign.lo = sign.hi = (R128_U64)(((R128_S64)v->hi) >> 63);
1570
inv.lo = v->lo ^ sign.lo;
1571
inv.hi = v->hi ^ sign.hi;
1572
1573
r128Sub(dst, &inv, &sign);
1574
}
1575
1576
void r128Nabs(R128* dst, const R128* v)
1577
{
1578
R128 sign, inv;
1579
1580
R128_ASSERT(dst != NULL);
1581
R128_ASSERT(v != NULL);
1582
1583
sign.lo = sign.hi = (R128_U64)(((R128_S64)v->hi) >> 63);
1584
inv.lo = v->lo ^ sign.lo;
1585
inv.hi = v->hi ^ sign.hi;
1586
1587
r128Sub(dst, &sign, &inv);
1588
}
1589
1590
void r128Not(R128 *dst, const R128 *src)
1591
{
1592
R128_ASSERT(dst != NULL);
1593
R128_ASSERT(src != NULL);
1594
1595
dst->lo = ~src->lo;
1596
dst->hi = ~src->hi;
1597
R128_DEBUG_SET(dst);
1598
}
1599
1600
void r128Or(R128 *dst, const R128 *a, const R128 *b)
1601
{
1602
R128_ASSERT(dst != NULL);
1603
R128_ASSERT(a != NULL);
1604
R128_ASSERT(b != NULL);
1605
1606
dst->lo = a->lo | b->lo;
1607
dst->hi = a->hi | b->hi;
1608
R128_DEBUG_SET(dst);
1609
}
1610
1611
void r128And(R128 *dst, const R128 *a, const R128 *b)
1612
{
1613
R128_ASSERT(dst != NULL);
1614
R128_ASSERT(a != NULL);
1615
R128_ASSERT(b != NULL);
1616
1617
dst->lo = a->lo & b->lo;
1618
dst->hi = a->hi & b->hi;
1619
R128_DEBUG_SET(dst);
1620
}
1621
1622
void r128Xor(R128 *dst, const R128 *a, const R128 *b)
1623
{
1624
R128_ASSERT(dst != NULL);
1625
R128_ASSERT(a != NULL);
1626
R128_ASSERT(b != NULL);
1627
1628
dst->lo = a->lo ^ b->lo;
1629
dst->hi = a->hi ^ b->hi;
1630
R128_DEBUG_SET(dst);
1631
}
1632
1633
void r128Shl(R128 *dst, const R128 *src, int amount)
1634
{
1635
R128_U64 r[4];
1636
1637
R128_ASSERT(dst != NULL);
1638
R128_ASSERT(src != NULL);
1639
1640
#if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
1641
__asm {
1642
// load src
1643
mov edx, dword ptr[src]
1644
mov ecx, amount
1645
1646
mov edi, dword ptr[edx]
1647
mov esi, dword ptr[edx + 4]
1648
mov ebx, dword ptr[edx + 8]
1649
mov eax, dword ptr[edx + 12]
1650
1651
// shift mod 32
1652
shld eax, ebx, cl
1653
shld ebx, esi, cl
1654
shld esi, edi, cl
1655
shl edi, cl
1656
1657
// clear out low 12 bytes of stack
1658
xor edx, edx
1659
mov dword ptr[r], edx
1660
mov dword ptr[r + 4], edx
1661
mov dword ptr[r + 8], edx
1662
1663
// store shifted amount offset by count/32 bits
1664
shr ecx, 5
1665
and ecx, 3
1666
mov dword ptr[r + ecx * 4 + 0], edi
1667
mov dword ptr[r + ecx * 4 + 4], esi
1668
mov dword ptr[r + ecx * 4 + 8], ebx
1669
mov dword ptr[r + ecx * 4 + 12], eax
1670
}
1671
#else
1672
1673
r[0] = src->lo;
1674
r[1] = src->hi;
1675
1676
amount &= 127;
1677
if (amount >= 64) {
1678
r[1] = r[0] << (amount - 64);
1679
r[0] = 0;
1680
} else if (amount) {
1681
# if defined(_M_X64) && !defined(R128_STDC_ONLY)
1682
r[1] = __shiftleft128(r[0], r[1], (char) amount);
1683
# else
1684
r[1] = (r[1] << amount) | (r[0] >> (64 - amount));
1685
# endif
1686
r[0] = r[0] << amount;
1687
}
1688
#endif //_M_IX86
1689
1690
dst->lo = r[0];
1691
dst->hi = r[1];
1692
R128_DEBUG_SET(dst);
1693
}
1694
1695
void r128Shr(R128 *dst, const R128 *src, int amount)
1696
{
1697
R128_U64 r[4];
1698
1699
R128_ASSERT(dst != NULL);
1700
R128_ASSERT(src != NULL);
1701
1702
#if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
1703
__asm {
1704
// load src
1705
mov edx, dword ptr[src]
1706
mov ecx, amount
1707
1708
mov edi, dword ptr[edx]
1709
mov esi, dword ptr[edx + 4]
1710
mov ebx, dword ptr[edx + 8]
1711
mov eax, dword ptr[edx + 12]
1712
1713
// shift mod 32
1714
shrd edi, esi, cl
1715
shrd esi, ebx, cl
1716
shrd ebx, eax, cl
1717
shr eax, cl
1718
1719
// clear out high 12 bytes of stack
1720
xor edx, edx
1721
mov dword ptr[r + 20], edx
1722
mov dword ptr[r + 24], edx
1723
mov dword ptr[r + 28], edx
1724
1725
// store shifted amount offset by -count/32 bits
1726
shr ecx, 5
1727
and ecx, 3
1728
neg ecx
1729
mov dword ptr[r + ecx * 4 + 16], edi
1730
mov dword ptr[r + ecx * 4 + 20], esi
1731
mov dword ptr[r + ecx * 4 + 24], ebx
1732
mov dword ptr[r + ecx * 4 + 28], eax
1733
}
1734
#else
1735
r[2] = src->lo;
1736
r[3] = src->hi;
1737
1738
amount &= 127;
1739
if (amount >= 64) {
1740
r[2] = r[3] >> (amount - 64);
1741
r[3] = 0;
1742
} else if (amount) {
1743
#if defined(_M_X64) && !defined(R128_STDC_ONLY)
1744
r[2] = __shiftright128(r[2], r[3], (char) amount);
1745
#else
1746
r[2] = (r[2] >> amount) | (r[3] << (64 - amount));
1747
#endif
1748
r[3] = r[3] >> amount;
1749
}
1750
#endif
1751
1752
dst->lo = r[2];
1753
dst->hi = r[3];
1754
R128_DEBUG_SET(dst);
1755
}
1756
1757
void r128Sar(R128 *dst, const R128 *src, int amount)
1758
{
1759
R128_U64 r[4];
1760
1761
R128_ASSERT(dst != NULL);
1762
R128_ASSERT(src != NULL);
1763
1764
#if defined(_M_IX86) && !defined(R128_STDC_ONLY) && !defined(__MINGW32__)
1765
__asm {
1766
// load src
1767
mov edx, dword ptr[src]
1768
mov ecx, amount
1769
1770
mov edi, dword ptr[edx]
1771
mov esi, dword ptr[edx + 4]
1772
mov ebx, dword ptr[edx + 8]
1773
mov eax, dword ptr[edx + 12]
1774
1775
// shift mod 32
1776
shrd edi, esi, cl
1777
shrd esi, ebx, cl
1778
shrd ebx, eax, cl
1779
sar eax, cl
1780
1781
// copy sign to high 12 bytes of stack
1782
cdq
1783
mov dword ptr[r + 20], edx
1784
mov dword ptr[r + 24], edx
1785
mov dword ptr[r + 28], edx
1786
1787
// store shifted amount offset by -count/32 bits
1788
shr ecx, 5
1789
and ecx, 3
1790
neg ecx
1791
mov dword ptr[r + ecx * 4 + 16], edi
1792
mov dword ptr[r + ecx * 4 + 20], esi
1793
mov dword ptr[r + ecx * 4 + 24], ebx
1794
mov dword ptr[r + ecx * 4 + 28], eax
1795
}
1796
#else
1797
r[2] = src->lo;
1798
r[3] = src->hi;
1799
1800
amount &= 127;
1801
if (amount >= 64) {
1802
r[2] = (R128_U64)((R128_S64)r[3] >> (amount - 64));
1803
r[3] = (R128_U64)((R128_S64)r[3] >> 63);
1804
} else if (amount) {
1805
r[2] = (r[2] >> amount) | (R128_U64)((R128_S64)r[3] << (64 - amount));
1806
r[3] = (R128_U64)((R128_S64)r[3] >> amount);
1807
}
1808
#endif
1809
1810
dst->lo = r[2];
1811
dst->hi = r[3];
1812
R128_DEBUG_SET(dst);
1813
}
1814
1815
void r128Add(R128 *dst, const R128 *a, const R128 *b)
1816
{
1817
unsigned char carry = 0;
1818
R128_ASSERT(dst != NULL);
1819
R128_ASSERT(a != NULL);
1820
R128_ASSERT(b != NULL);
1821
1822
#if R128_INTEL && !defined(R128_STDC_ONLY)
1823
# if R128_64BIT
1824
carry = _addcarry_u64(carry, a->lo, b->lo, &dst->lo);
1825
carry = _addcarry_u64(carry, a->hi, b->hi, &dst->hi);
1826
# else
1827
R128_U32 r0, r1, r2, r3;
1828
carry = _addcarry_u32(carry, R128_R0(a), R128_R0(b), &r0);
1829
carry = _addcarry_u32(carry, R128_R1(a), R128_R1(b), &r1);
1830
carry = _addcarry_u32(carry, R128_R2(a), R128_R2(b), &r2);
1831
carry = _addcarry_u32(carry, R128_R3(a), R128_R3(b), &r3);
1832
R128_SET4(dst, r0, r1, r2, r3);
1833
# endif //R128_64BIT
1834
#else
1835
{
1836
R128_U64 r = a->lo + b->lo;
1837
carry = r < a->lo;
1838
dst->lo = r;
1839
dst->hi = a->hi + b->hi + carry;
1840
}
1841
#endif //R128_INTEL
1842
1843
R128_DEBUG_SET(dst);
1844
}
1845
1846
void r128Sub(R128 *dst, const R128 *a, const R128 *b)
1847
{
1848
unsigned char borrow = 0;
1849
R128_ASSERT(dst != NULL);
1850
R128_ASSERT(a != NULL);
1851
R128_ASSERT(b != NULL);
1852
1853
#if R128_INTEL && !defined(R128_STDC_ONLY)
1854
# if R128_64BIT
1855
borrow = _subborrow_u64(borrow, a->lo, b->lo, &dst->lo);
1856
borrow = _subborrow_u64(borrow, a->hi, b->hi, &dst->hi);
1857
# else
1858
R128_U32 r0, r1, r2, r3;
1859
borrow = _subborrow_u32(borrow, R128_R0(a), R128_R0(b), &r0);
1860
borrow = _subborrow_u32(borrow, R128_R1(a), R128_R1(b), &r1);
1861
borrow = _subborrow_u32(borrow, R128_R2(a), R128_R2(b), &r2);
1862
borrow = _subborrow_u32(borrow, R128_R3(a), R128_R3(b), &r3);
1863
R128_SET4(dst, r0, r1, r2, r3);
1864
# endif //R128_64BIT
1865
#else
1866
{
1867
R128_U64 r = a->lo - b->lo;
1868
borrow = r > a->lo;
1869
dst->lo = r;
1870
dst->hi = a->hi - b->hi - borrow;
1871
}
1872
#endif //R128_INTEL
1873
1874
R128_DEBUG_SET(dst);
1875
}
1876
1877
void r128Mul(R128 *dst, const R128 *a, const R128 *b)
1878
{
1879
int sign = 0;
1880
R128 ta, tb, tc;
1881
1882
R128_ASSERT(dst != NULL);
1883
R128_ASSERT(a != NULL);
1884
R128_ASSERT(b != NULL);
1885
1886
R128_SET2(&ta, a->lo, a->hi);
1887
R128_SET2(&tb, b->lo, b->hi);
1888
1889
if (r128IsNeg(&ta)) {
1890
r128__neg(&ta, &ta);
1891
sign = !sign;
1892
}
1893
if (r128IsNeg(&tb)) {
1894
r128__neg(&tb, &tb);
1895
sign = !sign;
1896
}
1897
1898
r128__umul(&tc, &ta, &tb);
1899
if (sign) {
1900
r128__neg(&tc, &tc);
1901
}
1902
1903
r128Copy(dst, &tc);
1904
}
1905
1906
void r128Div(R128 *dst, const R128 *a, const R128 *b)
1907
{
1908
int sign = 0;
1909
R128 tn, td, tq;
1910
1911
R128_ASSERT(dst != NULL);
1912
R128_ASSERT(a != NULL);
1913
R128_ASSERT(b != NULL);
1914
1915
R128_SET2(&tn, a->lo, a->hi);
1916
R128_SET2(&td, b->lo, b->hi);
1917
1918
if (r128IsNeg(&tn)) {
1919
r128__neg(&tn, &tn);
1920
sign = !sign;
1921
}
1922
1923
if (td.lo == 0 && td.hi == 0) {
1924
// divide by zero
1925
if (sign) {
1926
r128Copy(dst, &R128_min);
1927
} else {
1928
r128Copy(dst, &R128_max);
1929
}
1930
return;
1931
} else if (r128IsNeg(&td)) {
1932
r128__neg(&td, &td);
1933
sign = !sign;
1934
}
1935
1936
r128__udiv(&tq, &tn, &td);
1937
1938
if (sign) {
1939
r128__neg(&tq, &tq);
1940
}
1941
1942
r128Copy(dst, &tq);
1943
}
1944
1945
void r128Mod(R128 *dst, const R128 *a, const R128 *b)
1946
{
1947
int sign = 0;
1948
R128 tn, td, tq;
1949
1950
R128_ASSERT(dst != NULL);
1951
R128_ASSERT(a != NULL);
1952
R128_ASSERT(b != NULL);
1953
1954
R128_SET2(&tn, a->lo, a->hi);
1955
R128_SET2(&td, b->lo, b->hi);
1956
1957
if (r128IsNeg(&tn)) {
1958
r128__neg(&tn, &tn);
1959
sign = !sign;
1960
}
1961
1962
if (td.lo == 0 && td.hi == 0) {
1963
// divide by zero
1964
if (sign) {
1965
r128Copy(dst, &R128_min);
1966
} else {
1967
r128Copy(dst, &R128_max);
1968
}
1969
return;
1970
} else if (r128IsNeg(&td)) {
1971
r128__neg(&td, &td);
1972
sign = !sign;
1973
}
1974
1975
tq.hi = r128__umod(&tn, &td);
1976
tq.lo = 0;
1977
1978
if (sign) {
1979
tq.hi = ~tq.hi + 1;
1980
}
1981
1982
r128Mul(&tq, &tq, b);
1983
r128Sub(dst, a, &tq);
1984
}
1985
1986
void r128Rsqrt(R128 *dst, const R128 *v)
1987
{
1988
static const R128 threeHalves = { R128_LIT_U64(0x8000000000000000), 1 };
1989
R128 x, est;
1990
int i;
1991
1992
if ((R128_S64)v->hi < 0) {
1993
r128Copy(dst, &R128_min);
1994
return;
1995
}
1996
1997
R128_SET2(&x, v->lo, v->hi);
1998
1999
// get initial estimate
2000
if (x.hi) {
2001
int shift = (64 + r128__clz64(x.hi)) >> 1;
2002
est.lo = R128_LIT_U64(1) << shift;
2003
est.hi = 0;
2004
} else if (x.lo) {
2005
int shift = r128__clz64(x.lo) >> 1;
2006
est.hi = R128_LIT_U64(1) << shift;
2007
est.lo = 0;
2008
} else {
2009
R128_SET2(dst, 0, 0);
2010
return;
2011
}
2012
2013
// x /= 2
2014
r128Shr(&x, &x, 1);
2015
2016
// Newton-Raphson iterate
2017
for (i = 0; i < 7; ++i) {
2018
R128 newEst;
2019
2020
// newEst = est * (threeHalves - (x / 2) * est * est);
2021
r128__umul(&newEst, &est, &est);
2022
r128__umul(&newEst, &newEst, &x);
2023
r128Sub(&newEst, &threeHalves, &newEst);
2024
r128__umul(&newEst, &est, &newEst);
2025
2026
if (newEst.lo == est.lo && newEst.hi == est.hi) {
2027
break;
2028
}
2029
R128_SET2(&est, newEst.lo, newEst.hi);
2030
}
2031
2032
r128Copy(dst, &est);
2033
}
2034
2035
void r128Sqrt(R128 *dst, const R128 *v)
2036
{
2037
R128 x, est;
2038
int i;
2039
2040
if ((R128_S64)v->hi < 0) {
2041
r128Copy(dst, &R128_min);
2042
return;
2043
}
2044
2045
R128_SET2(&x, v->lo, v->hi);
2046
2047
// get initial estimate
2048
if (x.hi) {
2049
int shift = (63 - r128__clz64(x.hi)) >> 1;
2050
r128Shr(&est, &x, shift);
2051
} else if (x.lo) {
2052
int shift = (1 + r128__clz64(x.lo)) >> 1;
2053
r128Shl(&est, &x, shift);
2054
} else {
2055
R128_SET2(dst, 0, 0);
2056
return;
2057
}
2058
2059
// Newton-Raphson iterate
2060
for (i = 0; i < 7; ++i) {
2061
R128 newEst;
2062
2063
// newEst = (est + x / est) / 2
2064
r128__udiv(&newEst, &x, &est);
2065
r128Add(&newEst, &newEst, &est);
2066
r128Shr(&newEst, &newEst, 1);
2067
2068
if (newEst.lo == est.lo && newEst.hi == est.hi) {
2069
break;
2070
}
2071
R128_SET2(&est, newEst.lo, newEst.hi);
2072
}
2073
2074
r128Copy(dst, &est);
2075
}
2076
2077
int r128Cmp(const R128 *a, const R128 *b)
2078
{
2079
R128_ASSERT(a != NULL);
2080
R128_ASSERT(b != NULL);
2081
2082
if (a->hi == b->hi) {
2083
if (a->lo == b->lo) {
2084
return 0;
2085
} else if (a->lo > b->lo) {
2086
return 1;
2087
} else {
2088
return -1;
2089
}
2090
} else if ((R128_S64)a->hi > (R128_S64)b->hi) {
2091
return 1;
2092
} else {
2093
return -1;
2094
}
2095
}
2096
2097
int r128IsNeg(const R128 *v)
2098
{
2099
R128_ASSERT(v != NULL);
2100
2101
return (R128_S64)v->hi < 0;
2102
}
2103
2104
void r128Min(R128 *dst, const R128 *a, const R128 *b)
2105
{
2106
R128_ASSERT(dst != NULL);
2107
R128_ASSERT(a != NULL);
2108
R128_ASSERT(b != NULL);
2109
2110
if (r128Cmp(a, b) < 0) {
2111
r128Copy(dst, a);
2112
} else {
2113
r128Copy(dst, b);
2114
}
2115
}
2116
2117
void r128Max(R128 *dst, const R128 *a, const R128 *b)
2118
{
2119
R128_ASSERT(dst != NULL);
2120
R128_ASSERT(a != NULL);
2121
R128_ASSERT(b != NULL);
2122
2123
if (r128Cmp(a, b) > 0) {
2124
r128Copy(dst, a);
2125
} else {
2126
r128Copy(dst, b);
2127
}
2128
}
2129
2130
void r128Floor(R128 *dst, const R128 *v)
2131
{
2132
R128_ASSERT(dst != NULL);
2133
R128_ASSERT(v != NULL);
2134
2135
dst->hi = v->hi;
2136
dst->lo = 0;
2137
R128_DEBUG_SET(dst);
2138
}
2139
2140
void r128Ceil(R128 *dst, const R128 *v)
2141
{
2142
R128_ASSERT(dst != NULL);
2143
R128_ASSERT(v != NULL);
2144
2145
dst->hi = v->hi + (v->lo != 0);
2146
dst->lo = 0;
2147
R128_DEBUG_SET(dst);
2148
}
2149
2150
void r128Round(R128* dst, const R128* v)
2151
{
2152
R128_ASSERT(dst != NULL);
2153
R128_ASSERT(v != NULL);
2154
2155
dst->hi = v->hi + (v->lo >= R128_LIT_U64(0x8000000000000000) + (R128_U64)((R128_S64)v->hi < 0));
2156
dst->lo = 0;
2157
R128_DEBUG_SET(dst);
2158
}
2159
2160
#endif //R128_IMPLEMENTATION
2161
2162