Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/math/aarch64/advsimd/expf.c
48375 views
1
/*
2
* Single-precision vector e^x function.
3
*
4
* Copyright (c) 2019-2024, Arm Limited.
5
* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
*/
7
#include "v_math.h"
8
#include "test_defs.h"
9
#include "test_sig.h"
10
11
static const struct data
12
{
13
float32x4_t c1, c3, c4, inv_ln2;
14
float ln2_hi, ln2_lo, c0, c2;
15
uint32x4_t exponent_bias, special_offset, special_bias;
16
#if !WANT_SIMD_EXCEPT
17
float32x4_t special_bound, scale_thresh;
18
#endif
19
} data = {
20
/* maxerr: 1.45358 +0.5 ulp. */
21
.c0 = 0x1.0e4020p-7f,
22
.c1 = V4 (0x1.573e2ep-5f),
23
.c2 = 0x1.555e66p-3f,
24
.c3 = V4 (0x1.fffdb6p-2f),
25
.c4 = V4 (0x1.ffffecp-1f),
26
.inv_ln2 = V4 (0x1.715476p+0f),
27
.ln2_hi = 0x1.62e4p-1f,
28
.ln2_lo = 0x1.7f7d1cp-20f,
29
.exponent_bias = V4 (0x3f800000),
30
.special_offset = V4 (0x82000000),
31
.special_bias = V4 (0x7f000000),
32
#if !WANT_SIMD_EXCEPT
33
.special_bound = V4 (126.0f),
34
.scale_thresh = V4 (192.0f),
35
#endif
36
};
37
38
#define C(i) d->poly[i]
39
40
#if WANT_SIMD_EXCEPT
41
42
# define TinyBound v_u32 (0x20000000) /* asuint (0x1p-63). */
43
# define BigBound v_u32 (0x42800000) /* asuint (0x1p6). */
44
# define SpecialBound v_u32 (0x22800000) /* BigBound - TinyBound. */
45
46
static float32x4_t VPCS_ATTR NOINLINE
47
special_case (float32x4_t x, float32x4_t y, uint32x4_t cmp)
48
{
49
/* If fenv exceptions are to be triggered correctly, fall back to the scalar
50
routine to special lanes. */
51
return v_call_f32 (expf, x, y, cmp);
52
}
53
54
#else
55
56
static float32x4_t VPCS_ATTR NOINLINE
57
special_case (float32x4_t poly, float32x4_t n, uint32x4_t e, uint32x4_t cmp1,
58
float32x4_t scale, const struct data *d)
59
{
60
/* 2^n may overflow, break it up into s1*s2. */
61
uint32x4_t b = vandq_u32 (vclezq_f32 (n), d->special_offset);
62
float32x4_t s1 = vreinterpretq_f32_u32 (vaddq_u32 (b, d->special_bias));
63
float32x4_t s2 = vreinterpretq_f32_u32 (vsubq_u32 (e, b));
64
uint32x4_t cmp2 = vcagtq_f32 (n, d->scale_thresh);
65
float32x4_t r2 = vmulq_f32 (s1, s1);
66
// (s2 + p*s2)*s1 = s2(p+1)s1
67
float32x4_t r1 = vmulq_f32 (vfmaq_f32 (s2, poly, s2), s1);
68
/* Similar to r1 but avoids double rounding in the subnormal range. */
69
float32x4_t r0 = vfmaq_f32 (scale, poly, scale);
70
float32x4_t r = vbslq_f32 (cmp1, r1, r0);
71
return vbslq_f32 (cmp2, r2, r);
72
}
73
74
#endif
75
76
float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp) (float32x4_t x)
77
{
78
const struct data *d = ptr_barrier (&data);
79
float32x4_t ln2_c02 = vld1q_f32 (&d->ln2_hi);
80
81
#if WANT_SIMD_EXCEPT
82
/* asuint(x) - TinyBound >= BigBound - TinyBound. */
83
uint32x4_t cmp = vcgeq_u32 (
84
vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)),
85
TinyBound),
86
SpecialBound);
87
float32x4_t xm = x;
88
/* If any lanes are special, mask them with 1 and retain a copy of x to allow
89
special case handler to fix special lanes later. This is only necessary if
90
fenv exceptions are to be triggered correctly. */
91
if (unlikely (v_any_u32 (cmp)))
92
x = vbslq_f32 (cmp, v_f32 (1), x);
93
#endif
94
95
/* exp(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
96
x = ln2*n + r, with r in [-ln2/2, ln2/2]. */
97
float32x4_t n = vrndaq_f32 (vmulq_f32 (x, d->inv_ln2));
98
float32x4_t r = vfmsq_laneq_f32 (x, n, ln2_c02, 0);
99
r = vfmsq_laneq_f32 (r, n, ln2_c02, 1);
100
uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtq_s32_f32 (n)), 23);
101
float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, d->exponent_bias));
102
103
#if !WANT_SIMD_EXCEPT
104
uint32x4_t cmp = vcagtq_f32 (n, d->special_bound);
105
#endif
106
107
float32x4_t r2 = vmulq_f32 (r, r);
108
float32x4_t p = vfmaq_laneq_f32 (d->c1, r, ln2_c02, 2);
109
float32x4_t q = vfmaq_laneq_f32 (d->c3, r, ln2_c02, 3);
110
q = vfmaq_f32 (q, p, r2);
111
p = vmulq_f32 (d->c4, r);
112
float32x4_t poly = vfmaq_f32 (p, q, r2);
113
114
if (unlikely (v_any_u32 (cmp)))
115
#if WANT_SIMD_EXCEPT
116
return special_case (xm, vfmaq_f32 (scale, poly, scale), cmp);
117
#else
118
return special_case (poly, n, e, cmp, scale, d);
119
#endif
120
121
return vfmaq_f32 (scale, poly, scale);
122
}
123
124
HALF_WIDTH_ALIAS_F1 (exp)
125
126
TEST_SIG (V, F, 1, exp, -9.9, 9.9)
127
TEST_ULP (V_NAME_F1 (exp), 1.49)
128
TEST_DISABLE_FENV_IF_NOT (V_NAME_F1 (exp), WANT_SIMD_EXCEPT)
129
TEST_INTERVAL (V_NAME_F1 (exp), 0, 0xffff0000, 10000)
130
TEST_SYM_INTERVAL (V_NAME_F1 (exp), 0x1p-14, 0x1p8, 500000)
131
132