Path: blob/main/contrib/arm-optimized-routines/math/tools/remez.jl
48249 views
#!/usr/bin/env julia1# -*- julia -*-23# remez.jl - implementation of the Remez algorithm for polynomial approximation4#5# Copyright (c) 2015-2019, Arm Limited.6# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception78import Base.\910# ----------------------------------------------------------------------11# Helper functions to cope with different Julia versions.12if VERSION >= v"0.7.0"13array1d(T, d) = Array{T, 1}(undef, d)14array2d(T, d1, d2) = Array{T, 2}(undef, d1, d2)15else16array1d(T, d) = Array(T, d)17array2d(T, d1, d2) = Array(T, d1, d2)18end19if VERSION < v"0.5.0"20String = ASCIIString21end22if VERSION >= v"0.6.0"23# Use Base.invokelatest to run functions made using eval(), to24# avoid "world age" error25run(f, x...) = Base.invokelatest(f, x...)26else27# Prior to 0.6.0, invokelatest doesn't exist (but fortunately the28# world age problem also doesn't seem to exist)29run(f, x...) = f(x...)30end3132# ----------------------------------------------------------------------33# Global variables configured by command-line options.34floatsuffix = "" # adjusted by --floatsuffix35xvarname = "x" # adjusted by --variable36epsbits = 256 # adjusted by --bits37debug_facilities = Set() # adjusted by --debug38full_output = false # adjusted by --full39array_format = false # adjusted by --array40preliminary_commands = array1d(String, 0) # adjusted by --pre4142# ----------------------------------------------------------------------43# Diagnostic and utility functions.4445# Enable debugging printouts from a particular subpart of this46# program.47#48# Arguments:49# facility Name of the facility to debug. For a list of facility names,50# look through the code for calls to debug().51#52# Return value is a BigFloat.53function enable_debug(facility)54push!(debug_facilities, facility)55end5657# Print a diagnostic.58#59# Arguments:60# facility Name of the facility for which this is a debug message.61# printargs Arguments to println() if debugging of that facility is62# enabled.63macro debug(facility, printargs...)64printit = quote65print("[", $facility, "] ")66end67for arg in printargs68printit = quote69$printit70print($(esc(arg)))71end72end73return quote74if $facility in debug_facilities75$printit76println()77end78end79end8081# Evaluate a polynomial.8283# Arguments:84# coeffs Array of BigFloats giving the coefficients of the polynomial.85# Starts with the constant term, i.e. coeffs[i] is the86# coefficient of x^(i-1) (because Julia arrays are 1-based).87# x Point at which to evaluate the polynomial.88#89# Return value is a BigFloat.90function poly_eval(coeffs::Array{BigFloat}, x::BigFloat)91n = length(coeffs)92if n == 093return BigFloat(0)94elseif n == 195return coeffs[1]96else97return coeffs[1] + x * poly_eval(coeffs[2:n], x)98end99end100101# Evaluate a rational function.102103# Arguments:104# ncoeffs Array of BigFloats giving the coefficients of the numerator.105# Starts with the constant term, and 1-based, as above.106# dcoeffs Array of BigFloats giving the coefficients of the denominator.107# Starts with the constant term, and 1-based, as above.108# x Point at which to evaluate the function.109#110# Return value is a BigFloat.111function ratfn_eval(ncoeffs::Array{BigFloat}, dcoeffs::Array{BigFloat},112x::BigFloat)113return poly_eval(ncoeffs, x) / poly_eval(dcoeffs, x)114end115116# Format a BigFloat into an appropriate output format.117# Arguments:118# x BigFloat to format.119#120# Return value is a string.121function float_to_str(x)122return string(x) * floatsuffix123end124125# Format a polynomial into an arithmetic expression, for pasting into126# other tools such as gnuplot.127128# Arguments:129# coeffs Array of BigFloats giving the coefficients of the polynomial.130# Starts with the constant term, and 1-based, as above.131#132# Return value is a string.133function poly_to_string(coeffs::Array{BigFloat})134n = length(coeffs)135if n == 0136return "0"137elseif n == 1138return float_to_str(coeffs[1])139else140return string(float_to_str(coeffs[1]), "+", xvarname, "*(",141poly_to_string(coeffs[2:n]), ")")142end143end144145# Format a rational function into a string.146147# Arguments:148# ncoeffs Array of BigFloats giving the coefficients of the numerator.149# Starts with the constant term, and 1-based, as above.150# dcoeffs Array of BigFloats giving the coefficients of the denominator.151# Starts with the constant term, and 1-based, as above.152#153# Return value is a string.154function ratfn_to_string(ncoeffs::Array{BigFloat}, dcoeffs::Array{BigFloat})155if length(dcoeffs) == 1 && dcoeffs[1] == 1156# Special case: if the denominator is just 1, leave it out.157return poly_to_string(ncoeffs)158else159return string("(", poly_to_string(ncoeffs), ")/(",160poly_to_string(dcoeffs), ")")161end162end163164# Format a list of x,y pairs into a string.165166# Arguments:167# xys Array of (x,y) pairs of BigFloats.168#169# Return value is a string.170function format_xylist(xys::Array{Tuple{BigFloat,BigFloat}})171return ("[\n" *172join([" "*string(x)*" -> "*string(y) for (x,y) in xys], "\n") *173"\n]")174end175176# ----------------------------------------------------------------------177# Matrix-equation solver for matrices of BigFloat.178#179# I had hoped that Julia's type-genericity would allow me to solve the180# matrix equation Mx=V by just writing 'M \ V'. Unfortunately, that181# works by translating the inputs into double precision and handing182# off to an optimised library, which misses the point when I have a183# matrix and vector of BigFloat and want my result in _better_ than184# double precision. So I have to implement my own specialisation of185# the \ operator for that case.186#187# Fortunately, the point of using BigFloats is that we have precision188# to burn, so I can do completely naïve Gaussian elimination without189# worrying about instability.190191# Arguments:192# matrix_in 2-dimensional array of BigFloats, representing a matrix M193# in row-first order, i.e. matrix_in[r,c] represents the194# entry in row r col c.195# vector_in 1-dimensional array of BigFloats, representing a vector V.196#197# Return value: a 1-dimensional array X of BigFloats, satisfying M X = V.198#199# Expects the input to be an invertible square matrix and a vector of200# the corresponding size, on pain of failing an assertion.201function \(matrix_in :: Array{BigFloat,2},202vector_in :: Array{BigFloat,1})203# Copy the inputs, because we'll be mutating them as we go.204M = copy(matrix_in)205V = copy(vector_in)206207# Input consistency criteria: matrix is square, and vector has208# length to match.209n = length(V)210@assert(n > 0)211@assert(size(M) == (n,n))212213@debug("gausselim", "starting, n=", n)214215for i = 1:1:n216# Straightforward Gaussian elimination: find the largest217# non-zero entry in column i (and in a row we haven't sorted218# out already), swap it into row i, scale that row to219# normalise it to 1, then zero out the rest of the column by220# subtracting a multiple of that row from each other row.221222@debug("gausselim", "matrix=", repr(M))223@debug("gausselim", "vector=", repr(V))224225# Find the best pivot.226bestrow = 0227bestval = 0228for j = i:1:n229if abs(M[j,i]) > bestval230bestrow = j231bestval = M[j,i]232end233end234@assert(bestrow > 0) # make sure we did actually find one235236@debug("gausselim", "bestrow=", bestrow)237238# Swap it into row i.239if bestrow != i240for k = 1:1:n241M[bestrow,k],M[i,k] = M[i,k],M[bestrow,k]242end243V[bestrow],V[i] = V[i],V[bestrow]244end245246# Scale that row so that M[i,i] becomes 1.247divisor = M[i,i]248for k = 1:1:n249M[i,k] = M[i,k] / divisor250end251V[i] = V[i] / divisor252@assert(M[i,i] == 1)253254# Zero out all other entries in column i, by subtracting255# multiples of this row.256for j = 1:1:n257if j != i258factor = M[j,i]259for k = 1:1:n260M[j,k] = M[j,k] - M[i,k] * factor261end262V[j] = V[j] - V[i] * factor263@assert(M[j,i] == 0)264end265end266end267268@debug("gausselim", "matrix=", repr(M))269@debug("gausselim", "vector=", repr(V))270@debug("gausselim", "done!")271272# Now we're done: M is the identity matrix, so the equation Mx=V273# becomes just x=V, i.e. V is already exactly the vector we want274# to return.275return V276end277278# ----------------------------------------------------------------------279# Least-squares fitting of a rational function to a set of (x,y)280# points.281#282# We use this to get an initial starting point for the Remez283# iteration. Therefore, it doesn't really need to be particularly284# accurate; it only needs to be good enough to wiggle back and forth285# across the target function the right number of times (so as to give286# enough error extrema to start optimising from) and not have any287# poles in the target interval.288#289# Least-squares fitting of a _polynomial_ is actually a sensible thing290# to do, and minimises the rms error. Doing the following trick with a291# rational function P/Q is less sensible, because it cannot be made to292# minimise the error function (P/Q-f)^2 that you actually wanted;293# instead it minimises (P-fQ)^2. But that should be good enough to294# have the properties described above.295#296# Some theory: suppose you're trying to choose a set of parameters a_i297# so as to minimise the sum of squares of some error function E_i.298# Basic calculus says, if you do this in one variable, just299# differentiate and solve for zero. In this case, that works fine even300# with multiple variables, because you _partially_ differentiate with301# respect to each a_i, giving a system of equations, and that system302# turns out to be linear so we just solve it as a matrix.303#304# In this case, our parameters are the coefficients of P and Q; to305# avoid underdetermining the system we'll fix Q's constant term at 1,306# so that our error function (as described above) is307#308# E = \sum (p_0 + p_1 x + ... + p_n x^n - y - y q_1 x - ... - y q_d x^d)^2309#310# where the sum is over all (x,y) coordinate pairs. Setting dE/dp_j=0311# (for each j) gives an equation of the form312#313# 0 = \sum 2(p_0 + p_1 x + ... + p_n x^n - y - y q_1 x - ... - y q_d x^d) x^j314#315# and setting dE/dq_j=0 gives one of the form316#317# 0 = \sum 2(p_0 + p_1 x + ... + p_n x^n - y - y q_1 x - ... - y q_d x^d) y x^j318#319# And both of those row types, treated as multivariate linear320# equations in the p,q values, have each coefficient being a value of321# the form \sum x^i, \sum y x^i or \sum y^2 x^i, for various i. (Times322# a factor of 2, but we can throw that away.) So we can go through the323# list of input coordinates summing all of those things, and then we324# have enough information to construct our matrix and solve it325# straight off for the rational function coefficients.326327# Arguments:328# f The function to be approximated. Maps BigFloat -> BigFloat.329# xvals Array of BigFloats, giving the list of x-coordinates at which330# to evaluate f.331# n Degree of the numerator polynomial of the desired rational332# function.333# d Degree of the denominator polynomial of the desired rational334# function.335# w Error-weighting function. Takes two BigFloat arguments x,y336# and returns a scaling factor for the error at that location.337# A larger value indicates that the error should be given338# greater weight in the square sum we try to minimise.339# If unspecified, defaults to giving everything the same weight.340#341# Return values: a pair of arrays of BigFloats (N,D) giving the342# coefficients of the returned rational function. N has size n+1; D343# has size d+1. Both start with the constant term, i.e. N[i] is the344# coefficient of x^(i-1) (because Julia arrays are 1-based). D[1] will345# be 1.346function ratfn_leastsquares(f::Function, xvals::Array{BigFloat}, n, d,347w = (x,y)->BigFloat(1))348# Accumulate sums of x^i y^j, for j={0,1,2} and a range of x.349# Again because Julia arrays are 1-based, we'll have sums[i,j]350# being the sum of x^(i-1) y^(j-1).351maxpow = max(n,d) * 2 + 1352sums = zeros(BigFloat, maxpow, 3)353for x = xvals354y = f(x)355weight = w(x,y)356for i = 1:1:maxpow357for j = 1:1:3358sums[i,j] += x^(i-1) * y^(j-1) * weight359end360end361end362363@debug("leastsquares", "sums=", repr(sums))364365# Build the matrix. We're solving n+d+1 equations in n+d+1366# unknowns. (We actually have to return n+d+2 coefficients, but367# one of them is hardwired to 1.)368matrix = array2d(BigFloat, n+d+1, n+d+1)369vector = array1d(BigFloat, n+d+1)370for i = 0:1:n371# Equation obtained by differentiating with respect to p_i,372# i.e. the numerator coefficient of x^i.373row = 1+i374for j = 0:1:n375matrix[row, 1+j] = sums[1+i+j, 1]376end377for j = 1:1:d378matrix[row, 1+n+j] = -sums[1+i+j, 2]379end380vector[row] = sums[1+i, 2]381end382for i = 1:1:d383# Equation obtained by differentiating with respect to q_i,384# i.e. the denominator coefficient of x^i.385row = 1+n+i386for j = 0:1:n387matrix[row, 1+j] = sums[1+i+j, 2]388end389for j = 1:1:d390matrix[row, 1+n+j] = -sums[1+i+j, 3]391end392vector[row] = sums[1+i, 3]393end394395@debug("leastsquares", "matrix=", repr(matrix))396@debug("leastsquares", "vector=", repr(vector))397398# Solve the matrix equation.399all_coeffs = matrix \ vector400401@debug("leastsquares", "all_coeffs=", repr(all_coeffs))402403# And marshal the results into two separate polynomial vectors to404# return.405ncoeffs = all_coeffs[1:n+1]406dcoeffs = vcat([1], all_coeffs[n+2:n+d+1])407return (ncoeffs, dcoeffs)408end409410# ----------------------------------------------------------------------411# Golden-section search to find a maximum of a function.412413# Arguments:414# f Function to be maximised/minimised. Maps BigFloat -> BigFloat.415# a,b,c BigFloats bracketing a maximum of the function.416#417# Expects:418# a,b,c are in order (either a<=b<=c or c<=b<=a)419# a != c (but b can equal one or the other if it wants to)420# f(a) <= f(b) >= f(c)421#422# Return value is an (x,y) pair of BigFloats giving the extremal input423# and output. (That is, y=f(x).)424function goldensection(f::Function, a::BigFloat, b::BigFloat, c::BigFloat)425# Decide on a 'good enough' threshold.426threshold = abs(c-a) * 2^(-epsbits/2)427428# We'll need the golden ratio phi, of course. Or rather, in this429# case, we need 1/phi = 0.618...430one_over_phi = 2 / (1 + sqrt(BigFloat(5)))431432# Flip round the interval endpoints so that the interval [a,b] is433# at least as large as [b,c]. (Then we can always pick our new434# point in [a,b] without having to handle lots of special cases.)435if abs(b-a) < abs(c-a)436a, c = c, a437end438439# Evaluate the function at the initial points.440fa = f(a)441fb = f(b)442fc = f(c)443444@debug("goldensection", "starting")445446while abs(c-a) > threshold447@debug("goldensection", "a: ", a, " -> ", fa)448@debug("goldensection", "b: ", b, " -> ", fb)449@debug("goldensection", "c: ", c, " -> ", fc)450451# Check invariants.452@assert(a <= b <= c || c <= b <= a)453@assert(fa <= fb >= fc)454455# Subdivide the larger of the intervals [a,b] and [b,c]. We've456# arranged that this is always [a,b], for simplicity.457d = a + (b-a) * one_over_phi458459# Now we have an interval looking like this (possibly460# reversed):461#462# a d b c463#464# and we know f(b) is bigger than either f(a) or f(c). We have465# two cases: either f(d) > f(b), or vice versa. In either466# case, we can narrow to an interval of 1/phi the size, and467# still satisfy all our invariants (three ordered points,468# [a,b] at least the width of [b,c], f(a)<=f(b)>=f(c)).469fd = f(d)470@debug("goldensection", "d: ", d, " -> ", fd)471if fd > fb472a, b, c = a, d, b473fa, fb, fc = fa, fd, fb474@debug("goldensection", "adb case")475else476a, b, c = c, b, d477fa, fb, fc = fc, fb, fd478@debug("goldensection", "cbd case")479end480end481482@debug("goldensection", "done: ", b, " -> ", fb)483return (b, fb)484end485486# ----------------------------------------------------------------------487# Find the extrema of a function within a given interval.488489# Arguments:490# f The function to be approximated. Maps BigFloat -> BigFloat.491# grid A set of points at which to evaluate f. Must be high enough492# resolution to make extrema obvious.493#494# Returns an array of (x,y) pairs of BigFloats, with each x,y giving495# the extremum location and its value (i.e. y=f(x)).496function find_extrema(f::Function, grid::Array{BigFloat})497len = length(grid)498extrema = array1d(Tuple{BigFloat, BigFloat}, 0)499for i = 1:1:len500# We have to provide goldensection() with three points501# bracketing the extremum. If the extremum is at one end of502# the interval, then the only way we can do that is to set two503# of the points equal (which goldensection() will cope with).504prev = max(1, i-1)505next = min(i+1, len)506507# Find our three pairs of (x,y) coordinates.508xp, xi, xn = grid[prev], grid[i], grid[next]509yp, yi, yn = f(xp), f(xi), f(xn)510511# See if they look like an extremum, and if so, ask512# goldensection() to give a more exact location for it.513if yp <= yi >= yn514push!(extrema, goldensection(f, xp, xi, xn))515elseif yp >= yi <= yn516x, y = goldensection(x->-f(x), xp, xi, xn)517push!(extrema, (x, -y))518end519end520return extrema521end522523# ----------------------------------------------------------------------524# Winnow a list of a function's extrema to give a subsequence of a525# specified length, with the extrema in the subsequence alternating526# signs, and with the smallest absolute value of an extremum in the527# subsequence as large as possible.528#529# We do this using a dynamic-programming approach. We work along the530# provided array of extrema, and at all times, we track the best set531# of extrema we have so far seen for each possible (length, sign of532# last extremum) pair. Each new extremum is evaluated to see whether533# it can be added to any previously seen best subsequence to make a534# new subsequence that beats the previous record holder in its slot.535536# Arguments:537# extrema An array of (x,y) pairs of BigFloats giving the input extrema.538# n Number of extrema required as output.539#540# Returns a new array of (x,y) pairs which is a subsequence of the541# original sequence. (So, in particular, if the input was sorted by x542# then so will the output be.)543function winnow_extrema(extrema::Array{Tuple{BigFloat,BigFloat}}, n)544# best[i,j] gives the best sequence so far of length i and with545# sign j (where signs are coded as 1=positive, 2=negative), in the546# form of a tuple (cost, actual array of x,y pairs).547best = fill((BigFloat(0), array1d(Tuple{BigFloat,BigFloat}, 0)), n, 2)548549for (x,y) = extrema550if y > 0551sign = 1552elseif y < 0553sign = 2554else555# A zero-valued extremum cannot possibly contribute to any556# optimal sequence, so we simply ignore it!557continue558end559560for i = 1:1:n561# See if we can create a new entry for best[i,sign] by562# appending our current (x,y) to some previous thing.563if i == 1564# Special case: we don't store a best zero-length565# sequence :-)566candidate = (abs(y), [(x,y)])567else568othersign = 3-sign # map 1->2 and 2->1569oldscore, oldlist = best[i-1, othersign]570newscore = min(abs(y), oldscore)571newlist = vcat(oldlist, [(x,y)])572candidate = (newscore, newlist)573end574# If our new candidate improves on the previous value of575# best[i,sign], then replace it.576if candidate[1] > best[i,sign][1]577best[i,sign] = candidate578end579end580end581582# Our ultimate return value has to be either best[n,1] or583# best[n,2], but it could be either. See which one has the higher584# score.585if best[n,1][1] > best[n,2][1]586ret = best[n,1][2]587else588ret = best[n,2][2]589end590# Make sure we did actually _find_ a good answer.591@assert(length(ret) == n)592return ret593end594595# ----------------------------------------------------------------------596# Construct a rational-function approximation with equal and597# alternating weighted deviation at a specific set of x-coordinates.598599# Arguments:600# f The function to be approximated. Maps BigFloat -> BigFloat.601# coords An array of BigFloats giving the x-coordinates. There should602# be n+d+2 of them.603# n, d The degrees of the numerator and denominator of the desired604# approximation.605# prev_err A plausible value for the alternating weighted deviation.606# (Required to kickstart a binary search in the nonlinear case;607# see comments below.)608# w Error-weighting function. Takes two BigFloat arguments x,y609# and returns a scaling factor for the error at that location.610# The returned approximation R should have the minimum possible611# maximum value of abs((f(x)-R(x)) * w(x,f(x))). Optional612# parameter, defaulting to the always-return-1 function.613#614# Return values: a pair of arrays of BigFloats (N,D) giving the615# coefficients of the returned rational function. N has size n+1; D616# has size d+1. Both start with the constant term, i.e. N[i] is the617# coefficient of x^(i-1) (because Julia arrays are 1-based). D[1] will618# be 1.619function ratfn_equal_deviation(f::Function, coords::Array{BigFloat},620n, d, prev_err::BigFloat,621w = (x,y)->BigFloat(1))622@debug("equaldev", "n=", n, " d=", d, " coords=", repr(coords))623@assert(length(coords) == n+d+2)624625if d == 0626# Special case: we're after a polynomial. In this case, we627# have the particularly easy job of just constructing and628# solving a system of n+2 linear equations, to find the n+1629# coefficients of the polynomial and also the amount of630# deviation at the specified coordinates. Each equation is of631# the form632#633# p_0 x^0 + p_1 x^1 + ... + p_n x^n ± e/w(x) = f(x)634#635# in which the p_i and e are the variables, and the powers of636# x and calls to w and f are the coefficients.637638matrix = array2d(BigFloat, n+2, n+2)639vector = array1d(BigFloat, n+2)640currsign = +1641for i = 1:1:n+2642x = coords[i]643for j = 0:1:n644matrix[i,1+j] = x^j645end646y = f(x)647vector[i] = y648matrix[i, n+2] = currsign / w(x,y)649currsign = -currsign650end651652@debug("equaldev", "matrix=", repr(matrix))653@debug("equaldev", "vector=", repr(vector))654655outvector = matrix \ vector656657@debug("equaldev", "outvector=", repr(outvector))658659ncoeffs = outvector[1:n+1]660dcoeffs = [BigFloat(1)]661return ncoeffs, dcoeffs662else663# For a nontrivial rational function, the system of equations664# we need to solve becomes nonlinear, because each equation665# now takes the form666#667# p_0 x^0 + p_1 x^1 + ... + p_n x^n668# --------------------------------- ± e/w(x) = f(x)669# x^0 + q_1 x^1 + ... + q_d x^d670#671# and multiplying up by the denominator gives you a lot of672# terms containing e × q_i. So we can't do this the really673# easy way using a matrix equation as above.674#675# Fortunately, this is a fairly easy kind of nonlinear system.676# The equations all become linear if you switch to treating e677# as a constant, so a reasonably sensible approach is to pick678# a candidate value of e, solve all but one of the equations679# for the remaining unknowns, and then see what the error680# turns out to be in the final equation. The Chebyshev681# alternation theorem guarantees that that error in the last682# equation will be anti-monotonic in the input e, so we can683# just binary-search until we get the two as close to equal as684# we need them.685686function try_e(e)687# Try a given value of e, derive the coefficients of the688# resulting rational function by setting up equations689# based on the first n+d+1 of the n+d+2 coordinates, and690# see what the error turns out to be at the final691# coordinate.692matrix = array2d(BigFloat, n+d+1, n+d+1)693vector = array1d(BigFloat, n+d+1)694currsign = +1695for i = 1:1:n+d+1696x = coords[i]697y = f(x)698y_adj = y - currsign * e / w(x,y)699for j = 0:1:n700matrix[i,1+j] = x^j701end702for j = 1:1:d703matrix[i,1+n+j] = -x^j * y_adj704end705vector[i] = y_adj706currsign = -currsign707end708709@debug("equaldev", "trying e=", e)710@debug("equaldev", "matrix=", repr(matrix))711@debug("equaldev", "vector=", repr(vector))712713outvector = matrix \ vector714715@debug("equaldev", "outvector=", repr(outvector))716717ncoeffs = outvector[1:n+1]718dcoeffs = vcat([BigFloat(1)], outvector[n+2:n+d+1])719720x = coords[n+d+2]721y = f(x)722last_e = (ratfn_eval(ncoeffs, dcoeffs, x) - y) * w(x,y) * -currsign723724@debug("equaldev", "last e=", last_e)725726return ncoeffs, dcoeffs, last_e727end728729threshold = 2^(-epsbits/2) # convergence threshold730731# Start by trying our previous iteration's error value. This732# value (e0) will be one end of our binary-search interval,733# and whatever it caused the last point's error to be, that734# (e1) will be the other end.735e0 = prev_err736@debug("equaldev", "e0 = ", e0)737nc, dc, e1 = try_e(e0)738@debug("equaldev", "e1 = ", e1)739if abs(e1-e0) <= threshold740# If we're _really_ lucky, we hit the error right on the741# nose just by doing that!742return nc, dc743end744s = sign(e1-e0)745@debug("equaldev", "s = ", s)746747# Verify by assertion that trying our other interval endpoint748# e1 gives a value that's wrong in the other direction.749# (Otherwise our binary search won't get a sensible answer at750# all.)751nc, dc, e2 = try_e(e1)752@debug("equaldev", "e2 = ", e2)753@assert(sign(e2-e1) == -s)754755# Now binary-search until our two endpoints narrow enough.756local emid757while abs(e1-e0) > threshold758emid = (e1+e0)/2759nc, dc, enew = try_e(emid)760if sign(enew-emid) == s761e0 = emid762else763e1 = emid764end765end766767@debug("equaldev", "final e=", emid)768return nc, dc769end770end771772# ----------------------------------------------------------------------773# Top-level function to find a minimax rational-function approximation.774775# Arguments:776# f The function to be approximated. Maps BigFloat -> BigFloat.777# interval A pair of BigFloats giving the endpoints of the interval778# (in either order) on which to approximate f.779# n, d The degrees of the numerator and denominator of the desired780# approximation.781# w Error-weighting function. Takes two BigFloat arguments x,y782# and returns a scaling factor for the error at that location.783# The returned approximation R should have the minimum possible784# maximum value of abs((f(x)-R(x)) * w(x,f(x))). Optional785# parameter, defaulting to the always-return-1 function.786#787# Return values: a tuple (N,D,E,X), where788789# N,D A pair of arrays of BigFloats giving the coefficients790# of the returned rational function. N has size n+1; D791# has size d+1. Both start with the constant term, i.e.792# N[i] is the coefficient of x^(i-1) (because Julia793# arrays are 1-based). D[1] will be 1.794# E The maximum weighted error (BigFloat).795# X An array of pairs of BigFloats giving the locations of n+2796# points and the weighted error at each of those points. The797# weighted error values will have alternating signs, which798# means that the Chebyshev alternation theorem guarantees799# that any other function of the same degree must exceed800# the error of this one at at least one of those points.801function ratfn_minimax(f::Function, interval::Tuple{BigFloat,BigFloat}, n, d,802w = (x,y)->BigFloat(1))803# We start off by finding a least-squares approximation. This804# doesn't need to be perfect, but if we can get it reasonably good805# then it'll save iterations in the refining stage.806#807# Least-squares approximations tend to look nicer in a minimax808# sense if you evaluate the function at a big pile of Chebyshev809# nodes rather than uniformly spaced points. These values will810# also make a good grid to use for the initial search for error811# extrema, so we'll keep them around for that reason too.812813# Construct the grid.814lo, hi = minimum(interval), maximum(interval)815local grid816let817mid = (hi+lo)/2818halfwid = (hi-lo)/2819nnodes = 16 * (n+d+1)820pi = 2*asin(BigFloat(1))821grid = [ mid - halfwid * cos(pi*i/nnodes) for i=0:1:nnodes ]822end823824# Find the initial least-squares approximation.825(nc, dc) = ratfn_leastsquares(f, grid, n, d, w)826@debug("minimax", "initial leastsquares approx = ",827ratfn_to_string(nc, dc))828829# Threshold of convergence. We stop when the relative difference830# between the min and max (winnowed) error extrema is less than831# this.832#833# This is set to the cube root of machine epsilon on a more or834# less empirical basis, because the rational-function case will835# not converge reliably if you set it to only the square root.836# (Repeatable by using the --test mode.) On the assumption that837# input and output error in each iteration can be expected to be838# related by a simple power law (because it'll just be down to how839# many leading terms of a Taylor series are zero), the cube root840# was the next thing to try.841threshold = 2^(-epsbits/3)842843# Main loop.844while true845# Find all the error extrema we can.846function compute_error(x)847real_y = f(x)848approx_y = ratfn_eval(nc, dc, x)849return (approx_y - real_y) * w(x, real_y)850end851extrema = find_extrema(compute_error, grid)852@debug("minimax", "all extrema = ", format_xylist(extrema))853854# Winnow the extrema down to the right number, and ensure they855# have alternating sign.856extrema = winnow_extrema(extrema, n+d+2)857@debug("minimax", "winnowed extrema = ", format_xylist(extrema))858859# See if we've finished.860min_err = minimum([abs(y) for (x,y) = extrema])861max_err = maximum([abs(y) for (x,y) = extrema])862variation = (max_err - min_err) / max_err863@debug("minimax", "extremum variation = ", variation)864if variation < threshold865@debug("minimax", "done!")866return nc, dc, max_err, extrema867end868869# If not, refine our function by equalising the error at the870# extrema points, and go round again.871(nc, dc) = ratfn_equal_deviation(f, map(x->x[1], extrema),872n, d, max_err, w)873@debug("minimax", "refined approx = ", ratfn_to_string(nc, dc))874end875end876877# ----------------------------------------------------------------------878# Check if a polynomial is well-conditioned for accurate evaluation in879# a given interval by Horner's rule.880#881# This is true if at every step where Horner's rule computes882# (coefficient + x*value_so_far), the constant coefficient you're883# adding on is of larger magnitude than the x*value_so_far operand.884# And this has to be true for every x in the interval.885#886# Arguments:887# coeffs The coefficients of the polynomial under test. Starts with888# the constant term, i.e. coeffs[i] is the coefficient of889# x^(i-1) (because Julia arrays are 1-based).890# lo, hi The bounds of the interval.891#892# Return value: the largest ratio (x*value_so_far / coefficient), at893# any step of evaluation, for any x in the interval. If this is less894# than 1, the polynomial is at least somewhat well-conditioned;895# ideally you want it to be more like 1/8 or 1/16 or so, so that the896# relative rounding error accumulated at each step are reduced by897# several factors of 2 when the next coefficient is added on.898899function wellcond(coeffs, lo, hi)900x = max(abs(lo), abs(hi))901worst = 0902so_far = 0903for i = length(coeffs):-1:1904coeff = abs(coeffs[i])905so_far *= x906if coeff != 0907thisval = so_far / coeff908worst = max(worst, thisval)909so_far += coeff910end911end912return worst913end914915# ----------------------------------------------------------------------916# Small set of unit tests.917918function test()919passes = 0920fails = 0921922function approx_eq(x, y, limit=1e-6)923return abs(x - y) < limit924end925926function test(condition)927if condition928passes += 1929else930println("fail")931fails += 1932end933end934935# Test Gaussian elimination.936println("Gaussian test 1:")937m = BigFloat[1 1 2; 3 5 8; 13 34 21]938v = BigFloat[1, -1, 2]939ret = m \ v940println(" ",repr(ret))941test(approx_eq(ret[1], 109/26))942test(approx_eq(ret[2], -105/130))943test(approx_eq(ret[3], -31/26))944945# Test leastsquares rational functions.946println("Leastsquares test 1:")947n = 10000948a = array1d(BigFloat, n+1)949for i = 0:1:n950a[1+i] = i/BigFloat(n)951end952(nc, dc) = ratfn_leastsquares(x->exp(x), a, 2, 2)953println(" ",ratfn_to_string(nc, dc))954for x = a955test(approx_eq(exp(x), ratfn_eval(nc, dc, x), 1e-4))956end957958# Test golden section search.959println("Golden section test 1:")960x, y = goldensection(x->sin(x),961BigFloat(0), BigFloat(1)/10, BigFloat(4))962println(" ", x, " -> ", y)963test(approx_eq(x, asin(BigFloat(1))))964test(approx_eq(y, 1))965966# Test extrema-winnowing algorithm.967println("Winnow test 1:")968extrema = [(x, sin(20*x)*sin(197*x))969for x in BigFloat(0):BigFloat(1)/1000:BigFloat(1)]970winnowed = winnow_extrema(extrema, 12)971println(" ret = ", format_xylist(winnowed))972prevx, prevy = -1, 0973for (x,y) = winnowed974test(x > prevx)975test(y != 0)976test(prevy * y <= 0) # tolerates initial prevx having no sign977test(abs(y) > 0.9)978prevx, prevy = x, y979end980981# Test actual minimax approximation.982println("Minimax test 1 (polynomial):")983(nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 4, 0)984println(" ",e)985println(" ",ratfn_to_string(nc, dc))986test(0 < e < 1e-3)987for x = 0:BigFloat(1)/1000:1988test(abs(ratfn_eval(nc, dc, x) - exp(x)) <= e * 1.0000001)989end990991println("Minimax test 2 (rational):")992(nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 2, 2)993println(" ",e)994println(" ",ratfn_to_string(nc, dc))995test(0 < e < 1e-3)996for x = 0:BigFloat(1)/1000:1997test(abs(ratfn_eval(nc, dc, x) - exp(x)) <= e * 1.0000001)998end9991000println("Minimax test 3 (polynomial, weighted):")1001(nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 4, 0,1002(x,y)->1/y)1003println(" ",e)1004println(" ",ratfn_to_string(nc, dc))1005test(0 < e < 1e-3)1006for x = 0:BigFloat(1)/1000:11007test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001)1008end10091010println("Minimax test 4 (rational, weighted):")1011(nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 2, 2,1012(x,y)->1/y)1013println(" ",e)1014println(" ",ratfn_to_string(nc, dc))1015test(0 < e < 1e-3)1016for x = 0:BigFloat(1)/1000:11017test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001)1018end10191020println("Minimax test 5 (rational, weighted, odd degree):")1021(nc, dc, e, x) = ratfn_minimax(x->exp(x), (BigFloat(0), BigFloat(1)), 2, 1,1022(x,y)->1/y)1023println(" ",e)1024println(" ",ratfn_to_string(nc, dc))1025test(0 < e < 1e-3)1026for x = 0:BigFloat(1)/1000:11027test(abs(ratfn_eval(nc, dc, x) - exp(x))/exp(x) <= e * 1.0000001)1028end10291030total = passes + fails1031println(passes, " passes ", fails, " fails ", total, " total")1032end10331034# ----------------------------------------------------------------------1035# Online help.1036function help()1037print("""1038Usage:10391040remez.jl [options] <lo> <hi> <n> <d> <expr> [<weight>]10411042Arguments:10431044<lo>, <hi>10451046Bounds of the interval on which to approximate the target1047function. These are parsed and evaluated as Julia expressions,1048so you can write things like '1/BigFloat(6)' to get an1049accurate representation of 1/6, or '4*atan(BigFloat(1))' to1050get pi. (Unfortunately, the obvious 'BigFloat(pi)' doesn't1051work in Julia.)10521053<n>, <d>10541055The desired degree of polynomial(s) you want for your1056approximation. These should be non-negative integers. If you1057want a rational function as output, set <n> to the degree of1058the numerator, and <d> the denominator. If you just want an1059ordinary polynomial, set <d> to 0, and <n> to the degree of1060the polynomial you want.10611062<expr>10631064A Julia expression giving the function to be approximated on1065the interval. The input value is predefined as 'x' when this1066expression is evaluated, so you should write something along1067the lines of 'sin(x)' or 'sqrt(1+tan(x)^2)' etc.10681069<weight>10701071If provided, a Julia expression giving the weighting factor1072for the approximation error. The output polynomial will1073minimise the largest absolute value of (P-f) * w at any point1074in the interval, where P is the value of the polynomial, f is1075the value of the target function given by <expr>, and w is the1076weight given by this function.10771078When this expression is evaluated, the input value to P and f1079is predefined as 'x', and also the true output value f(x) is1080predefined as 'y'. So you can minimise the relative error by1081simply writing '1/y'.10821083If the <weight> argument is not provided, the default1084weighting function always returns 1, so that the polynomial1085will minimise the maximum absolute error |P-f|.10861087Computation options:10881089--pre=<predef_expr>10901091Evaluate the Julia expression <predef_expr> before starting1092the computation. This permits you to pre-define variables or1093functions which the Julia expressions in your main arguments1094can refer to. All of <lo>, <hi>, <expr> and <weight> can make1095use of things defined by <predef_expr>.10961097One internal remez.jl function that you might sometimes find1098useful in this expression is 'goldensection', which finds the1099location and value of a maximum of a function. For example,1100one implementation strategy for the gamma function involves1101translating it to put its unique local minimum at the origin,1102in which case you can write something like this11031104--pre='(m,my) = goldensection(x -> -gamma(x),1105BigFloat(1), BigFloat(1.5), BigFloat(2))'11061107to predefine 'm' as the location of gamma's minimum, and 'my'1108as the (negated) value that gamma actually takes at that1109point, i.e. -gamma(m).11101111(Since 'goldensection' always finds a maximum, we had to1112negate gamma in the input function to make it find a minimum1113instead. Consult the comments in the source for more details1114on the use of this function.)11151116If you use this option more than once, all the expressions you1117provide will be run in sequence.11181119--bits=<bits>11201121Specify the accuracy to which you want the output polynomial,1122in bits. Default 256, which should be more than enough.11231124--bigfloatbits=<bits>11251126Turn up the precision used by Julia for its BigFloat1127evaluation. Default is Julia's default (also 256). You might1128want to try setting this higher than the --bits value if the1129algorithm is failing to converge for some reason.11301131Output options:11321133--full11341135Instead of just printing the approximation function itself,1136also print auxiliary information:1137- the locations of the error extrema, and the actual1138(weighted) error at each of those locations1139- the overall maximum error of the function1140- a 'well-conditioning quotient', giving the worst-case ratio1141between any polynomial coefficient and the largest possible1142value of the higher-order terms it will be added to.11431144The well-conditioning quotient should be less than 1, ideally1145by several factors of two, for accurate evaluation in the1146target precision. If you request a rational function, a1147separate well-conditioning quotient will be printed for the1148numerator and denominator.11491150Use this option when deciding how wide an interval to1151approximate your function on, and what degree of polynomial1152you need.11531154--variable=<identifier>11551156When writing the output polynomial or rational function in its1157usual form as an arithmetic expression, use <identifier> as1158the name of the input variable. Default is 'x'.11591160--suffix=<suffix>11611162When writing the output polynomial or rational function in its1163usual form as an arithmetic expression, write <suffix> after1164every floating-point literal. For example, '--suffix=F' will1165generate a C expression in which the coefficients are literals1166of type 'float' rather than 'double'.11671168--array11691170Instead of writing the output polynomial as an arithmetic1171expression in Horner's rule form, write out just its1172coefficients, one per line, each with a trailing comma.1173Suitable for pasting into a C array declaration.11741175This option is not currently supported if the output is a1176rational function, because you'd need two separate arrays for1177the numerator and denominator coefficients and there's no1178obviously right way to provide both of those together.11791180Debug and test options:11811182--debug=<facility>11831184Enable debugging output from various parts of the Remez1185calculation. <facility> should be the name of one of the1186classes of diagnostic output implemented in the program.1187Useful values include 'gausselim', 'leastsquares',1188'goldensection', 'equaldev', 'minimax'. This is probably1189mostly useful to people debugging problems with the script, so1190consult the source code for more information about what the1191diagnostic output for each of those facilities will be.11921193If you want diagnostics from more than one facility, specify1194this option multiple times with different arguments.11951196--test11971198Run remez.jl's internal test suite. No arguments needed.11991200Miscellaneous options:12011202--help12031204Display this text and exit. No arguments needed.12051206""")1207end12081209# ----------------------------------------------------------------------1210# Main program.12111212function main()1213nargs = length(argwords)1214if nargs != 5 && nargs != 61215error("usage: remez.jl <lo> <hi> <n> <d> <expr> [<weight>]\n" *1216" run 'remez.jl --help' for more help")1217end12181219for preliminary_command in preliminary_commands1220eval(Meta.parse(preliminary_command))1221end12221223lo = BigFloat(eval(Meta.parse(argwords[1])))1224hi = BigFloat(eval(Meta.parse(argwords[2])))1225n = parse(Int,argwords[3])1226d = parse(Int,argwords[4])1227f = eval(Meta.parse("x -> " * argwords[5]))12281229# Wrap the user-provided function with a function of our own. This1230# arranges to detect silly FP values (inf,nan) early and diagnose1231# them sensibly, and also lets us log all evaluations of the1232# function in case you suspect it's doing the wrong thing at some1233# special-case point.1234function func(x)1235y = run(f,x)1236@debug("f", x, " -> ", y)1237if !isfinite(y)1238error("f(" * string(x) * ") returned non-finite value " * string(y))1239end1240return y1241end12421243if nargs == 61244# Wrap the user-provided weight function similarly.1245w = eval(Meta.parse("(x,y) -> " * argwords[6]))1246function wrapped_weight(x,y)1247ww = run(w,x,y)1248if !isfinite(ww)1249error("w(" * string(x) * "," * string(y) *1250") returned non-finite value " * string(ww))1251end1252return ww1253end1254weight = wrapped_weight1255else1256weight = (x,y)->BigFloat(1)1257end12581259(nc, dc, e, extrema) = ratfn_minimax(func, (lo, hi), n, d, weight)1260if array_format1261if d == 01262functext = join([string(x)*",\n" for x=nc],"")1263else1264# It's unclear how you should best format an array of1265# coefficients for a rational function, so I'll leave1266# implementing this option until I have a use case.1267error("--array unsupported for rational functions")1268end1269else1270functext = ratfn_to_string(nc, dc) * "\n"1271end1272if full_output1273# Print everything you might want to know about the function1274println("extrema = ", format_xylist(extrema))1275println("maxerror = ", string(e))1276if length(dc) > 11277println("wellconditioning_numerator = ",1278string(wellcond(nc, lo, hi)))1279println("wellconditioning_denominator = ",1280string(wellcond(dc, lo, hi)))1281else1282println("wellconditioning = ", string(wellcond(nc, lo, hi)))1283end1284print("function = ", functext)1285else1286# Just print the text people will want to paste into their code1287print(functext)1288end1289end12901291# ----------------------------------------------------------------------1292# Top-level code: parse the argument list and decide what to do.12931294what_to_do = main12951296doing_opts = true1297argwords = array1d(String, 0)1298for arg = ARGS1299global doing_opts, what_to_do, argwords1300global full_output, array_format, xvarname, floatsuffix, epsbits1301if doing_opts && startswith(arg, "-")1302if arg == "--"1303doing_opts = false1304elseif arg == "--help"1305what_to_do = help1306elseif arg == "--test"1307what_to_do = test1308elseif arg == "--full"1309full_output = true1310elseif arg == "--array"1311array_format = true1312elseif startswith(arg, "--debug=")1313enable_debug(arg[length("--debug=")+1:end])1314elseif startswith(arg, "--variable=")1315xvarname = arg[length("--variable=")+1:end]1316elseif startswith(arg, "--suffix=")1317floatsuffix = arg[length("--suffix=")+1:end]1318elseif startswith(arg, "--bits=")1319epsbits = parse(Int,arg[length("--bits=")+1:end])1320elseif startswith(arg, "--bigfloatbits=")1321set_bigfloat_precision(1322parse(Int,arg[length("--bigfloatbits=")+1:end]))1323elseif startswith(arg, "--pre=")1324push!(preliminary_commands, arg[length("--pre=")+1:end])1325else1326error("unrecognised option: ", arg)1327end1328else1329push!(argwords, arg)1330end1331end13321333what_to_do()133413351336