Path: blob/main/contrib/arm-optimized-routines/math/aarch64/advsimd/exp10.c
48375 views
/*1* Double-precision vector 10^x function.2*3* Copyright (c) 2023-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#define _GNU_SOURCE8#include "mathlib.h"9#include "v_math.h"10#include "test_sig.h"11#include "test_defs.h"1213/* Value of |x| above which scale overflows without special treatment. */14#define SpecialBound 306.0 /* floor (log10 (2^1023)) - 1. */15/* Value of n above which scale overflows even with special treatment. */16#define ScaleBound 163840.0 /* 1280.0 * N. */1718const static struct data19{20float64x2_t poly[4];21float64x2_t log10_2, log2_10_hi, log2_10_lo, shift;22#if !WANT_SIMD_EXCEPT23float64x2_t special_bound, scale_thresh;24#endif25} data = {26/* Coefficients generated using Remez algorithm.27rel error: 0x1.5ddf8f28p-5428abs error: 0x1.5ed266c8p-54 in [ -log10(2)/256, log10(2)/256 ]29maxerr: 1.14432 +0.5 ulp. */30.poly = { V2 (0x1.26bb1bbb5524p1), V2 (0x1.53524c73cecdap1),31V2 (0x1.047060efb781cp1), V2 (0x1.2bd76040f0d16p0) },32.log10_2 = V2 (0x1.a934f0979a371p8), /* N/log2(10). */33.log2_10_hi = V2 (0x1.34413509f79ffp-9), /* log2(10)/N. */34.log2_10_lo = V2 (-0x1.9dc1da994fd21p-66),35.shift = V2 (0x1.8p+52),36#if !WANT_SIMD_EXCEPT37.scale_thresh = V2 (ScaleBound),38.special_bound = V2 (SpecialBound),39#endif40};4142#define N (1 << V_EXP_TABLE_BITS)43#define IndexMask v_u64 (N - 1)4445#if WANT_SIMD_EXCEPT4647# define TinyBound v_u64 (0x2000000000000000) /* asuint64 (0x1p-511). */48# define BigBound v_u64 (0x4070000000000000) /* asuint64 (0x1p8). */49# define Thres v_u64 (0x2070000000000000) /* BigBound - TinyBound. */5051static float64x2_t VPCS_ATTR NOINLINE52special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp)53{54/* If fenv exceptions are to be triggered correctly, fall back to the scalar55routine for special lanes. */56return v_call_f64 (exp10, x, y, cmp);57}5859#else6061# define SpecialOffset v_u64 (0x6000000000000000) /* 0x1p513. */62/* SpecialBias1 + SpecialBias1 = asuint(1.0). */63# define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */64# define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */6566static inline float64x2_t VPCS_ATTR67special_case (float64x2_t s, float64x2_t y, float64x2_t n,68const struct data *d)69{70/* 2^(n/N) may overflow, break it up into s1*s2. */71uint64x2_t b = vandq_u64 (vcltzq_f64 (n), SpecialOffset);72float64x2_t s1 = vreinterpretq_f64_u64 (vsubq_u64 (SpecialBias1, b));73float64x2_t s2 = vreinterpretq_f64_u64 (74vaddq_u64 (vsubq_u64 (vreinterpretq_u64_f64 (s), SpecialBias2), b));75uint64x2_t cmp = vcagtq_f64 (n, d->scale_thresh);76float64x2_t r1 = vmulq_f64 (s1, s1);77float64x2_t r0 = vmulq_f64 (vfmaq_f64 (s2, y, s2), s1);78return vbslq_f64 (cmp, r1, r0);79}8081#endif8283/* Fast vector implementation of exp10.84Maximum measured error is 1.64 ulp.85_ZGVnN2v_exp10(0x1.ccd1c9d82cc8cp+0) got 0x1.f8dab6d7fed0cp+586want 0x1.f8dab6d7fed0ap+5. */87float64x2_t VPCS_ATTR V_NAME_D1 (exp10) (float64x2_t x)88{89const struct data *d = ptr_barrier (&data);90uint64x2_t cmp;91#if WANT_SIMD_EXCEPT92/* If any lanes are special, mask them with 1 and retain a copy of x to allow93special_case to fix special lanes later. This is only necessary if fenv94exceptions are to be triggered correctly. */95float64x2_t xm = x;96uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x));97cmp = vcgeq_u64 (vsubq_u64 (iax, TinyBound), Thres);98if (unlikely (v_any_u64 (cmp)))99x = vbslq_f64 (cmp, v_f64 (1), x);100#else101cmp = vcageq_f64 (x, d->special_bound);102#endif103104/* n = round(x/(log10(2)/N)). */105float64x2_t z = vfmaq_f64 (d->shift, x, d->log10_2);106uint64x2_t u = vreinterpretq_u64_f64 (z);107float64x2_t n = vsubq_f64 (z, d->shift);108109/* r = x - n*log10(2)/N. */110float64x2_t r = x;111r = vfmsq_f64 (r, d->log2_10_hi, n);112r = vfmsq_f64 (r, d->log2_10_lo, n);113114uint64x2_t e = vshlq_n_u64 (u, 52 - V_EXP_TABLE_BITS);115uint64x2_t i = vandq_u64 (u, IndexMask);116117/* y = exp10(r) - 1 ~= C0 r + C1 r^2 + C2 r^3 + C3 r^4. */118float64x2_t r2 = vmulq_f64 (r, r);119float64x2_t p = vfmaq_f64 (d->poly[0], r, d->poly[1]);120float64x2_t y = vfmaq_f64 (d->poly[2], r, d->poly[3]);121p = vfmaq_f64 (p, y, r2);122y = vmulq_f64 (r, p);123124/* s = 2^(n/N). */125u = v_lookup_u64 (__v_exp_data, i);126float64x2_t s = vreinterpretq_f64_u64 (vaddq_u64 (u, e));127128if (unlikely (v_any_u64 (cmp)))129#if WANT_SIMD_EXCEPT130return special_case (xm, vfmaq_f64 (s, y, s), cmp);131#else132return special_case (s, y, n, d);133#endif134135return vfmaq_f64 (s, y, s);136}137138#if WANT_EXP10_TESTS139TEST_SIG (S, D, 1, exp10, -9.9, 9.9)140TEST_SIG (V, D, 1, exp10, -9.9, 9.9)141TEST_ULP (V_NAME_D1 (exp10), 1.15)142TEST_DISABLE_FENV_IF_NOT (V_NAME_D1 (exp10), WANT_SIMD_EXCEPT)143TEST_SYM_INTERVAL (V_NAME_D1 (exp10), 0, SpecialBound, 5000)144TEST_SYM_INTERVAL (V_NAME_D1 (exp10), SpecialBound, ScaleBound, 5000)145TEST_SYM_INTERVAL (V_NAME_D1 (exp10), ScaleBound, inf, 10000)146#endif147148149