Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/math/aarch64/experimental/erfc_1u8.c
48375 views
1
/*
2
* Double-precision erfc(x) function.
3
*
4
* Copyright (c) 2023-2024, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
8
#include "math_config.h"
9
#include "test_sig.h"
10
#include "test_defs.h"
11
12
#define Shift 0x1p45
13
#define P20 0x1.5555555555555p-2 /* 1/3. */
14
#define P21 0x1.5555555555555p-1 /* 2/3. */
15
16
#define P40 0x1.999999999999ap-4 /* 1/10. */
17
#define P41 0x1.999999999999ap-2 /* 2/5. */
18
#define P42 0x1.11111111111111p-3 /* 2/15. */
19
20
#define P50 0x1.5555555555555p-3 /* 1/6. */
21
#define P51 0x1.c71c71c71c71cp-3 /* 2/9. */
22
#define P52 0x1.6c16c16c16c17p-5 /* 2/45. */
23
24
/* Qi = (i+1) / i. */
25
#define Q5 0x1.3333333333333p0
26
#define Q6 0x1.2aaaaaaaaaaabp0
27
#define Q7 0x1.2492492492492p0
28
#define Q8 0x1.2p0
29
#define Q9 0x1.1c71c71c71c72p0
30
31
/* Ri = -2 * i / ((i+1)*(i+2)). */
32
#define R5 -0x1.e79e79e79e79ep-3
33
#define R6 -0x1.b6db6db6db6dbp-3
34
#define R7 -0x1.8e38e38e38e39p-3
35
#define R8 -0x1.6c16c16c16c17p-3
36
#define R9 -0x1.4f2094f2094f2p-3
37
38
/* Fast erfc approximation based on series expansion near x rounded to
39
nearest multiple of 1/128.
40
Let d = x - r, and scale = 2 / sqrt(pi) * exp(-r^2). For x near r,
41
42
erfc(x) ~ erfc(r) - scale * d * poly(r, d), with
43
44
poly(r, d) = 1 - r d + (2/3 r^2 - 1/3) d^2 - r (1/3 r^2 - 1/2) d^3
45
+ (2/15 r^4 - 2/5 r^2 + 1/10) d^4
46
- r * (2/45 r^4 - 2/9 r^2 + 1/6) d^5
47
+ p6(r) d^6 + ... + p10(r) d^10
48
49
Polynomials p6(r) to p10(r) are computed using recurrence relation
50
51
2(i+1)p_i + 2r(i+2)p_{i+1} + (i+2)(i+3)p_{i+2} = 0,
52
with p0 = 1, and p1(r) = -r.
53
54
Values of erfc(r) and scale(r) are read from lookup tables. Stored values
55
are scaled to avoid hitting the subnormal range.
56
57
Note that for x < 0, erfc(x) = 2.0 - erfc(-x).
58
59
Maximum measured error: 1.71 ULP
60
erfc(0x1.46cfe976733p+4) got 0x1.e15fcbea3e7afp-608
61
want 0x1.e15fcbea3e7adp-608. */
62
double
63
erfc (double x)
64
{
65
/* Get top words and sign. */
66
uint64_t ix = asuint64 (x);
67
uint64_t ia = ix & 0x7fffffffffffffff;
68
double a = asdouble (ia);
69
uint64_t sign = ix & ~0x7fffffffffffffff;
70
71
/* erfc(nan)=nan, erfc(+inf)=0 and erfc(-inf)=2. */
72
if (unlikely (ia >= 0x7ff0000000000000))
73
return asdouble (sign >> 1) + 1.0 / x; /* Special cases. */
74
75
/* Return early for large enough negative values. */
76
if (x < -6.0)
77
return 2.0;
78
79
/* For |x| < 3487.0/128.0, the following approximation holds. */
80
if (likely (ia < 0x403b3e0000000000))
81
{
82
/* |x| < 0x1p-511 => accurate to 0.5 ULP. */
83
if (unlikely (ia < asuint64 (0x1p-511)))
84
return 1.0 - x;
85
86
/* Lookup erfc(r) and scale(r) in tables, e.g. set erfc(r) to 1 and scale
87
to 2/sqrt(pi), when x reduced to r = 0. */
88
double z = a + Shift;
89
uint64_t i = asuint64 (z) - asuint64 (Shift);
90
double r = z - Shift;
91
/* These values are scaled by 2^128. */
92
double erfcr = __v_erfc_data.tab[i].erfc;
93
double scale = __v_erfc_data.tab[i].scale;
94
95
/* erfc(x) ~ erfc(r) - scale * d * poly (r, d). */
96
double d = a - r;
97
double d2 = d * d;
98
double r2 = r * r;
99
/* Compute p_i as a regular (low-order) polynomial. */
100
double p1 = -r;
101
double p2 = fma (P21, r2, -P20);
102
double p3 = -r * fma (P20, r2, -0.5);
103
double p4 = fma (fma (P42, r2, -P41), r2, P40);
104
double p5 = -r * fma (fma (P52, r2, -P51), r2, P50);
105
/* Compute p_i using recurrence relation:
106
p_{i+2} = (p_i + r * Q_{i+1} * p_{i+1}) * R_{i+1}. */
107
double p6 = fma (Q5 * r, p5, p4) * R5;
108
double p7 = fma (Q6 * r, p6, p5) * R6;
109
double p8 = fma (Q7 * r, p7, p6) * R7;
110
double p9 = fma (Q8 * r, p8, p7) * R8;
111
double p10 = fma (Q9 * r, p9, p8) * R9;
112
/* Compute polynomial in d using pairwise Horner scheme. */
113
double p90 = fma (p10, d, p9);
114
double p78 = fma (p8, d, p7);
115
double p56 = fma (p6, d, p5);
116
double p34 = fma (p4, d, p3);
117
double p12 = fma (p2, d, p1);
118
double y = fma (p90, d2, p78);
119
y = fma (y, d2, p56);
120
y = fma (y, d2, p34);
121
y = fma (y, d2, p12);
122
123
y = fma (-fma (y, d2, d), scale, erfcr);
124
125
/* Handle sign and scale back in a single fma. */
126
double off = asdouble (sign >> 1);
127
double fac = asdouble (asuint64 (0x1p-128) | sign);
128
y = fma (y, fac, off);
129
130
if (unlikely (x > 26.0))
131
{
132
/* The underflow exception needs to be signaled explicitly when
133
result gets into the subnormal range. */
134
if (unlikely (y < 0x1p-1022))
135
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
136
/* Set errno to ERANGE if result rounds to 0. */
137
return __math_check_uflow (y);
138
}
139
140
return y;
141
}
142
/* Above the threshold (x > 3487.0/128.0) erfc is constant and needs to raise
143
underflow exception for positive x. */
144
return __math_uflow (0);
145
}
146
147
TEST_SIG (S, D, 1, erfc, -6.0, 28.0)
148
TEST_ULP (erfc, 1.21)
149
TEST_SYM_INTERVAL (erfc, 0, 0x1p-26, 40000)
150
TEST_INTERVAL (erfc, 0x1p-26, 28.0, 100000)
151
TEST_INTERVAL (erfc, -0x1p-26, -6.0, 100000)
152
TEST_INTERVAL (erfc, 28.0, inf, 40000)
153
TEST_INTERVAL (erfc, -6.0, -inf, 40000)
154
155