Path: blob/main/contrib/arm-optimized-routines/math/include/mathlib.h
48254 views
/*1* Public API.2*3* Copyright (c) 2015-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#ifndef _MATHLIB_H8#define _MATHLIB_H910#if __aarch64__11/* Low-accuracy scalar implementations of C23 routines. */12float arm_math_cospif (float);13double arm_math_cospi (double);14float arm_math_sinpif (float);15double arm_math_sinpi (double);16float arm_math_tanpif (float);17double arm_math_tanpi (double);18void arm_math_sincospif (float, float *, float *);19void arm_math_sincospi (double, double *, double *);20#endif2122/* SIMD declaration for autovectorisation with fast-math enabled. Only GCC is23supported, and vector routines are only supported on Linux on AArch64. */24#if defined __aarch64__ && __linux__ && defined(__GNUC__) \25&& !defined(__clang__) && defined(__FAST_MATH__)26# define DECL_SIMD_aarch64 __attribute__ ((__simd__ ("notinbranch"), const))27#else28# define DECL_SIMD_aarch6429#endif3031#if WANT_EXPERIMENTAL_MATH3233float arm_math_erff (float);34DECL_SIMD_aarch64 float cospif (float);35DECL_SIMD_aarch64 float erfinvf (float);36DECL_SIMD_aarch64 float sinpif (float);37DECL_SIMD_aarch64 float tanpif (float);3839double arm_math_erf (double);40DECL_SIMD_aarch64 double cospi (double);41DECL_SIMD_aarch64 double erfinv (double);42DECL_SIMD_aarch64 double sinpi (double);43DECL_SIMD_aarch64 double tanpi (double);4445long double erfinvl (long double);4647#endif4849/* Note these routines may not be provided by AOR (some are only available with50WANT_EXPERIMENTAL_MATH, some are not provided at all. Redeclare them here to51add vector annotations. */52DECL_SIMD_aarch64 float acosf (float);53DECL_SIMD_aarch64 float acoshf (float);54DECL_SIMD_aarch64 float asinf (float);55DECL_SIMD_aarch64 float asinhf (float);56DECL_SIMD_aarch64 float atan2f (float, float);57DECL_SIMD_aarch64 float atanf (float);58DECL_SIMD_aarch64 float atanhf (float);59DECL_SIMD_aarch64 float cbrtf (float);60DECL_SIMD_aarch64 float cosf (float);61DECL_SIMD_aarch64 float coshf (float);62DECL_SIMD_aarch64 float erfcf (float);63DECL_SIMD_aarch64 float erff (float);64DECL_SIMD_aarch64 float exp10f (float);65DECL_SIMD_aarch64 float exp2f (float);66DECL_SIMD_aarch64 float expf (float);67DECL_SIMD_aarch64 float expm1f (float);68DECL_SIMD_aarch64 float hypotf (float, float);69DECL_SIMD_aarch64 float log10f (float);70DECL_SIMD_aarch64 float log1pf (float);71DECL_SIMD_aarch64 float log2f (float);72DECL_SIMD_aarch64 float logf (float);73DECL_SIMD_aarch64 float powf (float, float);74DECL_SIMD_aarch64 float sinf (float);75void sincosf (float, float *, float *);76DECL_SIMD_aarch64 float sinhf (float);77DECL_SIMD_aarch64 float tanf (float);78DECL_SIMD_aarch64 float tanhf (float);7980DECL_SIMD_aarch64 double acos (double);81DECL_SIMD_aarch64 double acosh (double);82DECL_SIMD_aarch64 double asin (double);83DECL_SIMD_aarch64 double asinh (double);84DECL_SIMD_aarch64 double atan2 (double, double);85DECL_SIMD_aarch64 double atan (double);86DECL_SIMD_aarch64 double atanh (double);87DECL_SIMD_aarch64 double cbrt (double);88DECL_SIMD_aarch64 double cos (double);89DECL_SIMD_aarch64 double cosh (double);90DECL_SIMD_aarch64 double erfc (double);91DECL_SIMD_aarch64 double erf (double);92DECL_SIMD_aarch64 double exp10 (double);93DECL_SIMD_aarch64 double exp2 (double);94DECL_SIMD_aarch64 double exp (double);95DECL_SIMD_aarch64 double expm1 (double);96DECL_SIMD_aarch64 double hypot (double, double);97DECL_SIMD_aarch64 double log10 (double);98DECL_SIMD_aarch64 double log1p (double);99DECL_SIMD_aarch64 double log2 (double);100DECL_SIMD_aarch64 double log (double);101DECL_SIMD_aarch64 double pow (double, double);102DECL_SIMD_aarch64 double sin (double);103DECL_SIMD_aarch64 double sinh (double);104DECL_SIMD_aarch64 double tan (double);105DECL_SIMD_aarch64 double tanh (double);106107#if __aarch64__ && __linux__108# include <arm_neon.h>109# undef __vpcs110# define __vpcs __attribute__((__aarch64_vector_pcs__))111112/* Vector functions following the vector PCS using ABI names. */113__vpcs float32x4_t _ZGVnN4v_acosf (float32x4_t);114__vpcs float32x4_t _ZGVnN4v_acoshf (float32x4_t);115__vpcs float32x4_t _ZGVnN4v_asinf (float32x4_t);116__vpcs float32x4_t _ZGVnN4v_asinhf (float32x4_t);117__vpcs float32x4_t _ZGVnN4v_atanf (float32x4_t);118__vpcs float32x4_t _ZGVnN4v_atanhf (float32x4_t);119__vpcs float32x4_t _ZGVnN4v_cbrtf (float32x4_t);120__vpcs float32x4_t _ZGVnN4v_cosf (float32x4_t);121__vpcs float32x4_t _ZGVnN4v_coshf (float32x4_t);122__vpcs float32x4_t _ZGVnN4v_cospif (float32x4_t);123__vpcs float32x4_t _ZGVnN4v_erfcf (float32x4_t);124__vpcs float32x4_t _ZGVnN4v_erff (float32x4_t);125__vpcs float32x4_t _ZGVnN4v_exp10f (float32x4_t);126__vpcs float32x4_t _ZGVnN4v_exp2f (float32x4_t);127__vpcs float32x4_t _ZGVnN4v_exp2f_1u (float32x4_t);128__vpcs float32x4_t _ZGVnN4v_expf (float32x4_t);129__vpcs float32x4_t _ZGVnN4v_expf_1u (float32x4_t);130__vpcs float32x4_t _ZGVnN4v_expm1f (float32x4_t);131__vpcs float32x4_t _ZGVnN4v_log10f (float32x4_t);132__vpcs float32x4_t _ZGVnN4v_log1pf (float32x4_t);133__vpcs float32x4_t _ZGVnN4v_log2f (float32x4_t);134__vpcs float32x4_t _ZGVnN4v_logf (float32x4_t);135__vpcs float32x4_t _ZGVnN4v_sinf (float32x4_t);136__vpcs float32x4_t _ZGVnN4v_sinhf (float32x4_t);137__vpcs float32x4_t _ZGVnN4v_sinpif (float32x4_t);138__vpcs float32x4_t _ZGVnN4v_tanf (float32x4_t);139__vpcs float32x4_t _ZGVnN4v_tanhf (float32x4_t);140__vpcs float32x4_t _ZGVnN4v_tanpif (float32x4_t);141__vpcs float32x4_t _ZGVnN4vl4_modff (float32x4_t, float *);142__vpcs float32x4_t _ZGVnN4vv_atan2f (float32x4_t, float32x4_t);143__vpcs float32x4_t _ZGVnN4vv_hypotf (float32x4_t, float32x4_t);144__vpcs float32x4_t _ZGVnN4vv_powf (float32x4_t, float32x4_t);145__vpcs float32x4x2_t _ZGVnN4v_cexpif (float32x4_t);146__vpcs void _ZGVnN4vl4l4_sincosf (float32x4_t, float *, float *);147__vpcs void _ZGVnN4vl4l4_sincospif (float32x4_t, float *, float *);148149__vpcs float64x2_t _ZGVnN2v_acos (float64x2_t);150__vpcs float64x2_t _ZGVnN2v_acosh (float64x2_t);151__vpcs float64x2_t _ZGVnN2v_asin (float64x2_t);152__vpcs float64x2_t _ZGVnN2v_asinh (float64x2_t);153__vpcs float64x2_t _ZGVnN2v_atan (float64x2_t);154__vpcs float64x2_t _ZGVnN2v_atanh (float64x2_t);155__vpcs float64x2_t _ZGVnN2v_cbrt (float64x2_t);156__vpcs float64x2_t _ZGVnN2v_cos (float64x2_t);157__vpcs float64x2_t _ZGVnN2v_cosh (float64x2_t);158__vpcs float64x2_t _ZGVnN2v_cospi (float64x2_t);159__vpcs float64x2_t _ZGVnN2v_erf (float64x2_t);160__vpcs float64x2_t _ZGVnN2v_erfc (float64x2_t);161__vpcs float64x2_t _ZGVnN2v_exp (float64x2_t);162__vpcs float64x2_t _ZGVnN2v_exp10 (float64x2_t);163__vpcs float64x2_t _ZGVnN2v_exp2 (float64x2_t);164__vpcs float64x2_t _ZGVnN2v_expm1 (float64x2_t);165__vpcs float64x2_t _ZGVnN2v_log (float64x2_t);166__vpcs float64x2_t _ZGVnN2v_log10 (float64x2_t);167__vpcs float64x2_t _ZGVnN2v_log1p (float64x2_t);168__vpcs float64x2_t _ZGVnN2v_log2 (float64x2_t);169__vpcs float64x2_t _ZGVnN2v_sin (float64x2_t);170__vpcs float64x2_t _ZGVnN2v_sinh (float64x2_t);171__vpcs float64x2_t _ZGVnN2v_sinpi (float64x2_t);172__vpcs float64x2_t _ZGVnN2v_tan (float64x2_t);173__vpcs float64x2_t _ZGVnN2v_tanh (float64x2_t);174__vpcs float64x2_t _ZGVnN2v_tanpi (float64x2_t);175__vpcs float64x2_t _ZGVnN2vl8_modf (float64x2_t, double *);176__vpcs float64x2_t _ZGVnN2vv_atan2 (float64x2_t, float64x2_t);177__vpcs float64x2_t _ZGVnN2vv_hypot (float64x2_t, float64x2_t);178__vpcs float64x2_t _ZGVnN2vv_pow (float64x2_t, float64x2_t);179__vpcs float64x2x2_t _ZGVnN2v_cexpi (float64x2_t);180__vpcs void _ZGVnN2vl8l8_sincos (float64x2_t, double *, double *);181__vpcs void _ZGVnN2vl8l8_sincospi (float64x2_t, double *, double *);182183# if WANT_EXPERIMENTAL_MATH184__vpcs float32x4_t _ZGVnN4v_erfinvf (float32x4_t);185__vpcs float64x2_t _ZGVnN2v_erfinv (float64x2_t);186# endif187188# include <arm_sve.h>189svfloat32_t _ZGVsMxv_acosf (svfloat32_t, svbool_t);190svfloat32_t _ZGVsMxv_acoshf (svfloat32_t, svbool_t);191svfloat32_t _ZGVsMxv_asinf (svfloat32_t, svbool_t);192svfloat32_t _ZGVsMxv_asinhf (svfloat32_t, svbool_t);193svfloat32_t _ZGVsMxv_atanf (svfloat32_t, svbool_t);194svfloat32_t _ZGVsMxv_atanhf (svfloat32_t, svbool_t);195svfloat32_t _ZGVsMxv_cbrtf (svfloat32_t, svbool_t);196svfloat32_t _ZGVsMxv_cosf (svfloat32_t, svbool_t);197svfloat32_t _ZGVsMxv_coshf (svfloat32_t, svbool_t);198svfloat32_t _ZGVsMxv_cospif (svfloat32_t, svbool_t);199svfloat32_t _ZGVsMxv_erfcf (svfloat32_t, svbool_t);200svfloat32_t _ZGVsMxv_erff (svfloat32_t, svbool_t);201svfloat32_t _ZGVsMxv_exp10f (svfloat32_t, svbool_t);202svfloat32_t _ZGVsMxv_exp2f (svfloat32_t, svbool_t);203svfloat32_t _ZGVsMxv_expf (svfloat32_t, svbool_t);204svfloat32_t _ZGVsMxv_expm1f (svfloat32_t, svbool_t);205svfloat32_t _ZGVsMxv_log10f (svfloat32_t, svbool_t);206svfloat32_t _ZGVsMxv_log1pf (svfloat32_t, svbool_t);207svfloat32_t _ZGVsMxv_log2f (svfloat32_t, svbool_t);208svfloat32_t _ZGVsMxv_logf (svfloat32_t, svbool_t);209svfloat32_t _ZGVsMxv_sinf (svfloat32_t, svbool_t);210svfloat32_t _ZGVsMxv_sinhf (svfloat32_t, svbool_t);211svfloat32_t _ZGVsMxv_sinpif (svfloat32_t, svbool_t);212svfloat32_t _ZGVsMxv_tanf (svfloat32_t, svbool_t);213svfloat32_t _ZGVsMxv_tanhf (svfloat32_t, svbool_t);214svfloat32_t _ZGVsMxv_tanpif (svfloat32_t, svbool_t);215svfloat32_t _ZGVsMxvl4_modff (svfloat32_t, float *, svbool_t);216svfloat32_t _ZGVsMxvv_atan2f (svfloat32_t, svfloat32_t, svbool_t);217svfloat32_t _ZGVsMxvv_hypotf (svfloat32_t, svfloat32_t, svbool_t);218svfloat32_t _ZGVsMxvv_powf (svfloat32_t, svfloat32_t, svbool_t);219svfloat32x2_t _ZGVsMxv_cexpif (svfloat32_t, svbool_t);220void _ZGVsMxvl4l4_sincosf (svfloat32_t, float *, float *, svbool_t);221void _ZGVsMxvl4l4_sincospif (svfloat32_t, float *, float *, svbool_t);222223svfloat64_t _ZGVsMxv_acos (svfloat64_t, svbool_t);224svfloat64_t _ZGVsMxv_acosh (svfloat64_t, svbool_t);225svfloat64_t _ZGVsMxv_asin (svfloat64_t, svbool_t);226svfloat64_t _ZGVsMxv_asinh (svfloat64_t, svbool_t);227svfloat64_t _ZGVsMxv_atan (svfloat64_t, svbool_t);228svfloat64_t _ZGVsMxv_atanh (svfloat64_t, svbool_t);229svfloat64_t _ZGVsMxv_cbrt (svfloat64_t, svbool_t);230svfloat64_t _ZGVsMxv_cos (svfloat64_t, svbool_t);231svfloat64_t _ZGVsMxv_cosh (svfloat64_t, svbool_t);232svfloat64_t _ZGVsMxv_cospi (svfloat64_t, svbool_t);233svfloat64_t _ZGVsMxv_erf (svfloat64_t, svbool_t);234svfloat64_t _ZGVsMxv_erfc (svfloat64_t, svbool_t);235svfloat64_t _ZGVsMxv_exp (svfloat64_t, svbool_t);236svfloat64_t _ZGVsMxv_exp10 (svfloat64_t, svbool_t);237svfloat64_t _ZGVsMxv_exp2 (svfloat64_t, svbool_t);238svfloat64_t _ZGVsMxv_expm1 (svfloat64_t, svbool_t);239svfloat64_t _ZGVsMxv_log (svfloat64_t, svbool_t);240svfloat64_t _ZGVsMxv_log10 (svfloat64_t, svbool_t);241svfloat64_t _ZGVsMxv_log1p (svfloat64_t, svbool_t);242svfloat64_t _ZGVsMxv_log2 (svfloat64_t, svbool_t);243svfloat64_t _ZGVsMxv_sin (svfloat64_t, svbool_t);244svfloat64_t _ZGVsMxv_sinh (svfloat64_t, svbool_t);245svfloat64_t _ZGVsMxv_sinpi (svfloat64_t, svbool_t);246svfloat64_t _ZGVsMxv_tan (svfloat64_t, svbool_t);247svfloat64_t _ZGVsMxv_tanh (svfloat64_t, svbool_t);248svfloat64_t _ZGVsMxv_tanpi (svfloat64_t, svbool_t);249svfloat64_t _ZGVsMxvl8_modf (svfloat64_t, double *, svbool_t);250svfloat64_t _ZGVsMxvv_atan2 (svfloat64_t, svfloat64_t, svbool_t);251svfloat64_t _ZGVsMxvv_hypot (svfloat64_t, svfloat64_t, svbool_t);252svfloat64_t _ZGVsMxvv_pow (svfloat64_t, svfloat64_t, svbool_t);253svfloat64x2_t _ZGVsMxv_cexpi (svfloat64_t, svbool_t);254void _ZGVsMxvl8l8_sincos (svfloat64_t, double *, double *, svbool_t);255void _ZGVsMxvl8l8_sincospi (svfloat64_t, double *, double *, svbool_t);256257# if WANT_EXPERIMENTAL_MATH258259svfloat32_t _ZGVsMxv_erfinvf (svfloat32_t, svbool_t);260svfloat32_t _ZGVsMxvv_powi (svfloat32_t, svint32_t, svbool_t);261262svfloat64_t _ZGVsMxvv_powk (svfloat64_t, svint64_t, svbool_t);263svfloat64_t _ZGVsMxv_erfinv (svfloat64_t, svbool_t);264265# endif266#endif267268#endif269270271