Path: blob/main/contrib/arm-optimized-routines/math/tools/tgamma128_gen.jl
48249 views
# -*- julia -*-1#2# Generate tgamma128.h, containing polynomials and constants used by3# tgamma128.c.4#5# Copyright (c) 2006-2023, Arm Limited.6# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception78# This Julia program depends on the 'Remez' and 'SpecialFunctions'9# library packages. To install them, run this at the interactive Julia10# prompt:11#12# import Pkg; Pkg.add(["Remez", "SpecialFunctions"])13#14# Tested on Julia 1.4.1 (Ubuntu 20.04) and 1.9.0 (22.04).1516import Printf17import Remez18import SpecialFunctions1920# Round a BigFloat to 128-bit long double and format it as a C99 hex21# float literal.22function quadhex(x)23sign = " "24if x < 025sign = "-"26x = -x27end2829exponent = BigInt(floor(log2(x)))30exponent = max(exponent, -16382)31@assert(exponent <= 16383) # else overflow3233x /= BigFloat(2)^exponent34@assert(1 <= x < 2)35x *= BigFloat(2)^11236mantissa = BigInt(round(x))3738mantstr = string(mantissa, base=16, pad=29)39return Printf.@sprintf("%s0x%s.%sp%+dL", sign, mantstr[1], mantstr[2:end],40exponent)41end4243# Round a BigFloat to 128-bit long double and return it still as a44# BigFloat.45function quadval(x, round=0)46sign = +147if x.sign < 048sign = -149x = -x50end5152exponent = BigInt(floor(log2(x)))53exponent = max(exponent, -16382)54@assert(exponent <= 16383) # else overflow5556x /= BigFloat(2)^exponent57@assert(1 <= x < 2)58x *= BigFloat(2)^11259if round < 060mantissa = floor(x)61elseif round > 062mantissa = ceil(x)63else64mantissa = round(x)65end6667return sign * mantissa * BigFloat(2)^(exponent - 112)68end6970# Output an array of BigFloats as a C array declaration.71function dumparray(a, name)72println("static const long double ", name, "[] = {")73for x in N74println(" ", quadhex(x), ",")75end76println("};")77end7879print("/*80* Polynomial coefficients and other constants for tgamma128.c.81*82* Copyright (c) 2006,2009,2023 Arm Limited.83* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception84*/85")8687Base.MPFR.setprecision(512)8889e = exp(BigFloat(1))9091print("92/* The largest positive value for which 128-bit tgamma does not overflow. */93")94lo = BigFloat("1000")95hi = BigFloat("2000")96while true97global lo98global hi99global max_x100101mid = (lo + hi) / 2102if mid == lo || mid == hi103max_x = mid104break105end106if SpecialFunctions.logabsgamma(mid)[1] < 16384 * log(BigFloat(2))107lo = mid108else109hi = mid110end111end112max_x = quadval(max_x, -1)113println("static const long double max_x = ", quadhex(max_x), ";")114115print("116/* Coefficients of the polynomial used in the tgamma_large() subroutine */117")118N, D, E, X = Remez.ratfn_minimax(119x -> x==0 ? sqrt(BigFloat(2)*pi/e) :120exp(SpecialFunctions.logabsgamma(1/x)[1] +121(1/x-0.5)*(1+log(x))),122(0, 1/BigFloat(8)),12324, 0,124(x, y) -> 1/y125)126dumparray(N, "coeffs_large")127128print("129/* Coefficients of the polynomial used in the tgamma_tiny() subroutine */130")131N, D, E, X = Remez.ratfn_minimax(132x -> x==0 ? 1 : 1/(x*SpecialFunctions.gamma(x)),133(0, 1/BigFloat(32)),13413, 0,135)136dumparray(N, "coeffs_tiny")137138print("139/* The location within the interval [1,2] where gamma has a minimum.140* Specified as the sum of two 128-bit values, for extra precision. */141")142lo = BigFloat("1.4")143hi = BigFloat("1.5")144while true145global lo146global hi147global min_x148149mid = (lo + hi) / 2150if mid == lo || mid == hi151min_x = mid152break153end154if SpecialFunctions.digamma(mid) < 0155lo = mid156else157hi = mid158end159end160min_x_hi = quadval(min_x, -1)161println("static const long double min_x_hi = ", quadhex(min_x_hi), ";")162println("static const long double min_x_lo = ", quadhex(min_x - min_x_hi), ";")163164print("165/* The actual minimum value that gamma takes at that location.166* Again specified as the sum of two 128-bit values. */167")168min_y = SpecialFunctions.gamma(min_x)169min_y_hi = quadval(min_y, -1)170println("static const long double min_y_hi = ", quadhex(min_y_hi), ";")171println("static const long double min_y_lo = ", quadhex(min_y - min_y_hi), ";")172173function taylor_bodge(x)174# Taylor series generated by Wolfram Alpha for (gamma(min_x+x)-min_y)/x^2.175# Used in the Remez calls below for x values very near the origin, to avoid176# significance loss problems when trying to compute it directly via that177# formula (even in MPFR's extra precision).178return BigFloat("0.428486815855585429730209907810650582960483696962660010556335457558784421896667728014324097132413696263704801646004585959298743677879606168187061990204432200")+x*(-BigFloat("0.130704158939785761928008749242671025181542078105370084716141350308119418619652583986015464395882363802104154017741656168641240436089858504560718773026275797")+x*(BigFloat("0.160890753325112844190519489594363387594505844658437718135952967735294789599989664428071656484587979507034160383271974554122934842441540146372016567834062876")+x*(-BigFloat("0.092277030213334350126864106458600575084335085690780082222880945224248438672595248111704471182201673989215223667543694847795410779036800385804729955729659506"))))179end180181print("182/* Coefficients of the polynomial used in the tgamma_central() subroutine183* for computing gamma on the interval [1,min_x] */184")185N, D, E, X = Remez.ratfn_minimax(186x -> x < BigFloat(0x1p-64) ? taylor_bodge(-x) :187(SpecialFunctions.gamma(min_x - x) - min_y) / (x*x),188(0, min_x - 1),18931, 0,190(x, y) -> x^2,191)192dumparray(N, "coeffs_central_neg")193194print("195/* Coefficients of the polynomial used in the tgamma_central() subroutine196* for computing gamma on the interval [min_x,2] */197")198N, D, E, X = Remez.ratfn_minimax(199x -> x < BigFloat(0x1p-64) ? taylor_bodge(x) :200(SpecialFunctions.gamma(min_x + x) - min_y) / (x*x),201(0, 2 - min_x),20228, 0,203(x, y) -> x^2,204)205dumparray(N, "coeffs_central_pos")206207print("208/* 128-bit float value of pi, used by the sin_pi_x_over_pi subroutine209*/210")211println("static const long double pi = ", quadhex(BigFloat(pi)), ";")212213214