CoCalc -- cbrt.c

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/arm-optimized-routines/math/aarch64/advsimd/cbrt.c
¹⁰⁹⁷⁷⁵ views
1
/*
2
 * Double-precision vector cbrt(x) function.
3
 *
4
 * Copyright (c) 2022-2024, Arm Limited.
5
 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6
 */
7

8
#include "v_math.h"
9
#include "test_sig.h"
10
#include "test_defs.h"
11
#include "v_poly_f64.h"
12

13
const static struct data
14
{
15
  float64x2_t poly[4], one_third, shift;
16
  int64x2_t exp_bias;
17
  uint64x2_t abs_mask, tiny_bound;
18
  uint32x4_t thresh;
19
  double table[5];
20
} data = {
21
  .shift = V2 (0x1.8p52),
22
  .poly = { /* Generated with fpminimax in [0.5, 1].  */
23
            V2 (0x1.c14e8ee44767p-2), V2 (0x1.dd2d3f99e4c0ep-1),
24
	    V2 (-0x1.08e83026b7e74p-1), V2 (0x1.2c74eaa3ba428p-3) },
25
  .exp_bias = V2 (1022),
26
  .abs_mask = V2(0x7fffffffffffffff),
27
  .tiny_bound = V2(0x0010000000000000), /* Smallest normal.  */
28
  .thresh = V4(0x7fe00000),   /* asuint64 (infinity) - tiny_bound.  */
29
  .one_third = V2(0x1.5555555555555p-2),
30
  .table = { /* table[i] = 2^((i - 2) / 3).  */
31
             0x1.428a2f98d728bp-1, 0x1.965fea53d6e3dp-1, 0x1p0,
32
	     0x1.428a2f98d728bp0, 0x1.965fea53d6e3dp0 }
33
};
34

35
#define MantissaMask v_u64 (0x000fffffffffffff)
36

37
static float64x2_t NOINLINE VPCS_ATTR
38
special_case (float64x2_t x, float64x2_t y, uint32x2_t special)
39
{
40
  return v_call_f64 (cbrt, x, y, vmovl_u32 (special));
41
}
42

43
/* Approximation for double-precision vector cbrt(x), using low-order
44
   polynomial and two Newton iterations.
45

46
   The vector version of frexp does not handle subnormals
47
   correctly. As a result these need to be handled by the scalar
48
   fallback, where accuracy may be worse than that of the vector code
49
   path.
50

51
   Greatest observed error in the normal range is 1.79 ULP. Errors repeat
52
   according to the exponent, for instance an error observed for double value
53
   m * 2^e will be observed for any input m * 2^(e + 3*i), where i is an
54
   integer.
55
   _ZGVnN2v_cbrt (0x1.fffff403f0bc6p+1) got 0x1.965fe72821e9bp+0
56
				       want 0x1.965fe72821e99p+0.  */
57
VPCS_ATTR float64x2_t V_NAME_D1 (cbrt) (float64x2_t x)
58
{
59
  const struct data *d = ptr_barrier (&data);
60
  uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));
61

62
  /* Subnormal, +/-0 and special values.  */
63
  uint32x2_t special
64
      = vcge_u32 (vsubhn_u64 (iax, d->tiny_bound), vget_low_u32 (d->thresh));
65

66
  /* Decompose |x| into m * 2^e, where m is in [0.5, 1.0]. This is a vector
67
     version of frexp, which gets subnormal values wrong - these have to be
68
     special-cased as a result.  */
69
  float64x2_t m = vbslq_f64 (MantissaMask, x, v_f64 (0.5));
70
  int64x2_t exp_bias = d->exp_bias;
71
  uint64x2_t ia12 = vshrq_n_u64 (iax, 52);
72
  int64x2_t e = vsubq_s64 (vreinterpretq_s64_u64 (ia12), exp_bias);
73

74
  /* Calculate rough approximation for cbrt(m) in [0.5, 1.0], starting point
75
     for Newton iterations.  */
76
  float64x2_t p = v_pairwise_poly_3_f64 (m, vmulq_f64 (m, m), d->poly);
77
  float64x2_t one_third = d->one_third;
78
  /* Two iterations of Newton's method for iteratively approximating cbrt.  */
79
  float64x2_t m_by_3 = vmulq_f64 (m, one_third);
80
  float64x2_t two_thirds = vaddq_f64 (one_third, one_third);
81
  float64x2_t a
82
      = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (p, p)), two_thirds, p);
83
  a = vfmaq_f64 (vdivq_f64 (m_by_3, vmulq_f64 (a, a)), two_thirds, a);
84

85
  /* Assemble the result by the following:
86

87
     cbrt(x) = cbrt(m) * 2 ^ (e / 3).
88

89
     We can get 2 ^ round(e / 3) using ldexp and integer divide, but since e is
90
     not necessarily a multiple of 3 we lose some information.
91

92
     Let q = 2 ^ round(e / 3), then t = 2 ^ (e / 3) / q.
93

94
     Then we know t = 2 ^ (i / 3), where i is the remainder from e / 3, which
95
     is an integer in [-2, 2], and can be looked up in the table T. Hence the
96
     result is assembled as:
97

98
     cbrt(x) = cbrt(m) * t * 2 ^ round(e / 3) * sign.  */
99

100
  float64x2_t ef = vcvtq_f64_s64 (e);
101
  float64x2_t eb3f = vrndnq_f64 (vmulq_f64 (ef, one_third));
102
  int64x2_t em3 = vcvtq_s64_f64 (vfmsq_f64 (ef, eb3f, v_f64 (3)));
103
  int64x2_t ey = vcvtq_s64_f64 (eb3f);
104

105
  float64x2_t my = (float64x2_t){ d->table[em3[0] + 2], d->table[em3[1] + 2] };
106
  my = vmulq_f64 (my, a);
107

108
  /* Vector version of ldexp.  */
109
  float64x2_t y = vreinterpretq_f64_s64 (
110
      vshlq_n_s64 (vaddq_s64 (ey, vaddq_s64 (exp_bias, v_s64 (1))), 52));
111
  y = vmulq_f64 (y, my);
112

113
  if (unlikely (v_any_u32h (special)))
114
    return special_case (x, vbslq_f64 (d->abs_mask, y, x), special);
115

116
  /* Copy sign.  */
117
  return vbslq_f64 (d->abs_mask, y, x);
118
}
119

120
/* Worse-case ULP error assumes that scalar fallback is GLIBC 2.40 cbrt, which
121
   has ULP error of 3.67 at 0x1.7a337e1ba1ec2p-257 [1]. Largest observed error
122
   in the vector path is 1.79 ULP.
123
   [1] Innocente, V., & Zimmermann, P. (2024). Accuracy of Mathematical
124
   Functions in Single, Double, Double Extended, and Quadruple Precision.  */
125
TEST_ULP (V_NAME_D1 (cbrt), 3.17)
126
TEST_SIG (V, D, 1, cbrt, -10.0, 10.0)
127
TEST_SYM_INTERVAL (V_NAME_D1 (cbrt), 0, inf, 1000000)
128

129
Product

Resources

Company