Path: blob/main/contrib/arm-optimized-routines/math/aarch64/sve/cosf.c
48378 views
/*1* Single-precision SVE cos(x) function.2*3* Copyright (c) 2019-2024, Arm Limited.4* SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception5*/67#include "sv_math.h"8#include "test_sig.h"9#include "test_defs.h"1011static const struct data12{13float neg_pio2_1, neg_pio2_2, neg_pio2_3, inv_pio2, shift;14} data = {15/* Polynomial coefficients are hard-wired in FTMAD instructions. */16.neg_pio2_1 = -0x1.921fb6p+0f,17.neg_pio2_2 = 0x1.777a5cp-25f,18.neg_pio2_3 = 0x1.ee59dap-50f,19.inv_pio2 = 0x1.45f306p-1f,20/* Original shift used in AdvSIMD cosf,21plus a contribution to set the bit #0 of q22as expected by trigonometric instructions. */23.shift = 0x1.800002p+23f24};2526#define RangeVal 0x49800000 /* asuint32(0x1p20f). */2728static svfloat32_t NOINLINE29special_case (svfloat32_t x, svfloat32_t y, svbool_t oob)30{31return sv_call_f32 (cosf, x, y, oob);32}3334/* A fast SVE implementation of cosf based on trigonometric35instructions (FTMAD, FTSSEL, FTSMUL).36Maximum measured error: 2.06 ULPs.37SV_NAME_F1 (cos)(0x1.dea2f2p+19) got 0x1.fffe7ap-638want 0x1.fffe76p-6. */39svfloat32_t SV_NAME_F1 (cos) (svfloat32_t x, const svbool_t pg)40{41const struct data *d = ptr_barrier (&data);4243svfloat32_t r = svabs_x (pg, x);44svbool_t oob = svcmpge (pg, svreinterpret_u32 (r), RangeVal);4546/* Load some constants in quad-word chunks to minimise memory access. */47svfloat32_t negpio2_and_invpio2 = svld1rq (svptrue_b32 (), &d->neg_pio2_1);4849/* n = rint(|x|/(pi/2)). */50svfloat32_t q = svmla_lane (sv_f32 (d->shift), r, negpio2_and_invpio2, 3);51svfloat32_t n = svsub_x (pg, q, d->shift);5253/* r = |x| - n*(pi/2) (range reduction into -pi/4 .. pi/4). */54r = svmla_lane (r, n, negpio2_and_invpio2, 0);55r = svmla_lane (r, n, negpio2_and_invpio2, 1);56r = svmla_lane (r, n, negpio2_and_invpio2, 2);5758/* Final multiplicative factor: 1.0 or x depending on bit #0 of q. */59svfloat32_t f = svtssel (r, svreinterpret_u32 (q));6061/* cos(r) poly approx. */62svfloat32_t r2 = svtsmul (r, svreinterpret_u32 (q));63svfloat32_t y = sv_f32 (0.0f);64y = svtmad (y, r2, 4);65y = svtmad (y, r2, 3);66y = svtmad (y, r2, 2);67y = svtmad (y, r2, 1);68y = svtmad (y, r2, 0);6970if (unlikely (svptest_any (pg, oob)))71return special_case (x, svmul_x (svnot_z (pg, oob), f, y), oob);72/* Apply factor. */73return svmul_x (pg, f, y);74}7576TEST_SIG (SV, F, 1, cos, -3.1, 3.1)77TEST_ULP (SV_NAME_F1 (cos), 1.57)78TEST_DISABLE_FENV (SV_NAME_F1 (cos))79TEST_INTERVAL (SV_NAME_F1 (cos), 0, 0xffff0000, 10000)80TEST_INTERVAL (SV_NAME_F1 (cos), 0x1p-4, 0x1p4, 500000)81CLOSE_SVE_ATTR828384