/*1* Double-precision 2^x function.2*3* Copyright (c) 2018, Arm Limited.4* SPDX-License-Identifier: MIT5*/67#include <math.h>8#include <stdint.h>9#include "libm.h"10#include "exp_data.h"1112#define N (1 << EXP_TABLE_BITS)13#define Shift __exp_data.exp2_shift14#define T __exp_data.tab15#define C1 __exp_data.exp2_poly[0]16#define C2 __exp_data.exp2_poly[1]17#define C3 __exp_data.exp2_poly[2]18#define C4 __exp_data.exp2_poly[3]19#define C5 __exp_data.exp2_poly[4]2021/* Handle cases that may overflow or underflow when computing the result that22is scale*(1+TMP) without intermediate rounding. The bit representation of23scale is in SBITS, however it has a computed exponent that may have24overflown into the sign bit so that needs to be adjusted before using it as25a double. (int32_t)KI is the k used in the argument reduction and exponent26adjustment of scale, positive k here means the result may overflow and27negative k means the result may underflow. */28static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki)29{30double_t scale, y;3132if ((ki & 0x80000000) == 0) {33/* k > 0, the exponent of scale might have overflowed by 1. */34sbits -= 1ull << 52;35scale = asdouble(sbits);36y = 2 * (scale + scale * tmp);37return eval_as_double(y);38}39/* k < 0, need special care in the subnormal range. */40sbits += 1022ull << 52;41scale = asdouble(sbits);42y = scale + scale * tmp;43if (y < 1.0) {44/* Round y to the right precision before scaling it into the subnormal45range to avoid double rounding that can cause 0.5+E/2 ulp error where46E is the worst-case ulp error outside the subnormal range. So this47is only useful if the goal is better than 1 ulp worst-case error. */48double_t hi, lo;49lo = scale - y + scale * tmp;50hi = 1.0 + y;51lo = 1.0 - hi + y + lo;52y = eval_as_double(hi + lo) - 1.0;53/* Avoid -0.0 with downward rounding. */54if (WANT_ROUNDING && y == 0.0)55y = 0.0;56/* The underflow exception needs to be signaled explicitly. */57fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);58}59y = 0x1p-1022 * y;60return eval_as_double(y);61}6263/* Top 12 bits of a double (sign and exponent bits). */64static inline uint32_t top12(double x)65{66return asuint64(x) >> 52;67}6869double __cdecl exp2(double x)70{71uint32_t abstop;72uint64_t ki, idx, top, sbits;73double_t kd, r, r2, scale, tail, tmp;7475abstop = top12(x) & 0x7ff;76if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {77if (abstop - top12(0x1p-54) >= 0x80000000)78/* Avoid spurious underflow for tiny x. */79/* Note: 0 is common input. */80return WANT_ROUNDING ? 1.0 + x : 1.0;81if (abstop >= top12(1024.0)) {82if (asuint64(x) == asuint64(-INFINITY))83return 0.0;84if (abstop >= top12(INFINITY))85return 1.0 + x;86if (!(asuint64(x) >> 63)) {87errno = ERANGE;88return fp_barrier(DBL_MAX) * DBL_MAX;89}90else if (x <= -2147483648.0) {91fp_barrier(x + 0x1p120f);92return 0;93}94else if (asuint64(x) >= asuint64(-1075.0)) {95errno = ERANGE;96fp_barrier(x + 0x1p120f);97return 0;98}99}100if (2 * asuint64(x) > 2 * asuint64(928.0))101/* Large x is special cased below. */102abstop = 0;103}104105/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */106/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */107kd = eval_as_double(x + Shift);108ki = asuint64(kd); /* k. */109kd -= Shift; /* k/N for int k. */110r = x - kd;111/* 2^(k/N) ~= scale * (1 + tail). */112idx = 2 * (ki % N);113top = ki << (52 - EXP_TABLE_BITS);114tail = asdouble(T[idx]);115/* This is only a valid scale when -1023*N < k < 1024*N. */116sbits = T[idx + 1] + top;117/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */118/* Evaluation is optimized assuming superscalar pipelined execution. */119r2 = r * r;120/* Without fma the worst case error is 0.5/N ulp larger. */121/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */122tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);123if (predict_false(abstop == 0))124return specialcase(tmp, sbits, ki);125scale = asdouble(sbits);126/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there127is no spurious underflow here even without fma. */128return eval_as_double(scale + scale * tmp);129}130131132