Path: blob/master/thirdparty/astcenc/astcenc_mathlib.h
9896 views
// SPDX-License-Identifier: Apache-2.01// ----------------------------------------------------------------------------2// Copyright 2011-2025 Arm Limited3//4// Licensed under the Apache License, Version 2.0 (the "License"); you may not5// use this file except in compliance with the License. You may obtain a copy6// of the License at:7//8// http://www.apache.org/licenses/LICENSE-2.09//10// Unless required by applicable law or agreed to in writing, software11// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT12// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the13// License for the specific language governing permissions and limitations14// under the License.15// ----------------------------------------------------------------------------1617/*18* This module implements a variety of mathematical data types and library19* functions used by the codec.20*/2122#ifndef ASTC_MATHLIB_H_INCLUDED23#define ASTC_MATHLIB_H_INCLUDED2425#include <cassert>26#include <cstdint>27#include <cmath>2829#ifndef ASTCENC_POPCNT30#if defined(__POPCNT__)31#define ASTCENC_POPCNT 132#else33#define ASTCENC_POPCNT 034#endif35#endif3637#ifndef ASTCENC_F16C38#if defined(__F16C__)39#define ASTCENC_F16C 140#else41#define ASTCENC_F16C 042#endif43#endif4445#ifndef ASTCENC_SSE46#if defined(__SSE4_2__)47#define ASTCENC_SSE 4248#elif defined(__SSE4_1__)49#define ASTCENC_SSE 4150#elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC))51#define ASTCENC_SSE 2052#else53#define ASTCENC_SSE 054#endif55#endif5657#ifndef ASTCENC_AVX58#if defined(__AVX2__)59#define ASTCENC_AVX 260#define ASTCENC_X86_GATHERS 161#elif defined(__AVX__)62#define ASTCENC_AVX 163#define ASTCENC_X86_GATHERS 164#else65#define ASTCENC_AVX 066#endif67#endif6869#ifndef ASTCENC_NEON70#if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)71#define ASTCENC_NEON 172#else73#define ASTCENC_NEON 074#endif75#endif7677#ifndef ASTCENC_SVE78#if defined(__ARM_FEATURE_SVE)79#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 25680#define ASTCENC_SVE 881// Auto-detected SVE can only assume vector width of 4 is available, but82// must also allow for hardware being longer and so all use of intrinsics83// must explicitly use predicate masks to limit to 4-wide.84#else85#define ASTCENC_SVE 486#endif87#else88#define ASTCENC_SVE 089#endif90#endif9192// Force vector-sized SIMD alignment93#if ASTCENC_AVX || ASTCENC_SVE == 894#define ASTCENC_VECALIGN 3295#elif ASTCENC_SSE || ASTCENC_NEON || ASTCENC_SVE == 496#define ASTCENC_VECALIGN 1697// Use default alignment for non-SIMD builds98#else99#define ASTCENC_VECALIGN 0100#endif101102// C++11 states that alignas(0) should be ignored but GCC doesn't do103// this on some versions, so workaround and avoid emitting alignas(0)104#if ASTCENC_VECALIGN > 0105#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)106#else107#define ASTCENC_ALIGNAS108#endif109110#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0111#include <immintrin.h>112#endif113114/* ============================================================================115Fast math library; note that many of the higher-order functions in this set116use approximations which are less accurate, but faster, than <cmath> standard117library equivalents.118119Note: Many of these are not necessarily faster than simple C versions when120used on a single scalar value, but are included for testing purposes as most121have an option based on SSE intrinsics and therefore provide an obvious route122to future vectorization.123============================================================================ */124125// Union for manipulation of float bit patterns126typedef union127{128uint32_t u;129int32_t s;130float f;131} if32;132133// These are namespaced to avoid colliding with C standard library functions.134namespace astc135{136137static const float PI = 3.14159265358979323846f;138static const float PI_OVER_TWO = 1.57079632679489661923f;139140/**141* @brief SP float absolute value.142*143* @param v The value to make absolute.144*145* @return The absolute value.146*/147static inline float fabs(float v)148{149return std::fabs(v);150}151152/**153* @brief Test if a float value is a nan.154*155* @param v The value test.156*157* @return Zero is not a NaN, non-zero otherwise.158*/159static inline bool isnan(float v)160{161return v != v;162}163164/**165* @brief Return the minimum of two values.166*167* For floats, NaNs are turned into @c q.168*169* @param p The first value to compare.170* @param q The second value to compare.171*172* @return The smallest value.173*/174template<typename T>175static inline T min(T p, T q)176{177return p < q ? p : q;178}179180/**181* @brief Return the minimum of three values.182*183* For floats, NaNs are turned into @c r.184*185* @param p The first value to compare.186* @param q The second value to compare.187* @param r The third value to compare.188*189* @return The smallest value.190*/191template<typename T>192static inline T min(T p, T q, T r)193{194return min(min(p, q), r);195}196197/**198* @brief Return the minimum of four values.199*200* For floats, NaNs are turned into @c s.201*202* @param p The first value to compare.203* @param q The second value to compare.204* @param r The third value to compare.205* @param s The fourth value to compare.206*207* @return The smallest value.208*/209template<typename T>210static inline T min(T p, T q, T r, T s)211{212return min(min(p, q), min(r, s));213}214215/**216* @brief Return the maximum of two values.217*218* For floats, NaNs are turned into @c q.219*220* @param p The first value to compare.221* @param q The second value to compare.222*223* @return The largest value.224*/225template<typename T>226static inline T max(T p, T q)227{228return p > q ? p : q;229}230231/**232* @brief Return the maximum of three values.233*234* For floats, NaNs are turned into @c r.235*236* @param p The first value to compare.237* @param q The second value to compare.238* @param r The third value to compare.239*240* @return The largest value.241*/242template<typename T>243static inline T max(T p, T q, T r)244{245return max(max(p, q), r);246}247248/**249* @brief Return the maximum of four values.250*251* For floats, NaNs are turned into @c s.252*253* @param p The first value to compare.254* @param q The second value to compare.255* @param r The third value to compare.256* @param s The fourth value to compare.257*258* @return The largest value.259*/260template<typename T>261static inline T max(T p, T q, T r, T s)262{263return max(max(p, q), max(r, s));264}265266/**267* @brief Clamp a value value between @c mn and @c mx.268*269* For floats, NaNs are turned into @c mn.270*271* @param v The value to clamp.272* @param mn The min value (inclusive).273* @param mx The max value (inclusive).274*275* @return The clamped value.276*/277template<typename T>278inline T clamp(T v, T mn, T mx)279{280// Do not reorder; correct NaN handling relies on the fact that comparison281// with NaN returns false and will fall-though to the "min" value.282if (v > mx) return mx;283if (v > mn) return v;284return mn;285}286287/**288* @brief Clamp a float value between 0.0f and 1.0f.289*290* NaNs are turned into 0.0f.291*292* @param v The value to clamp.293*294* @return The clamped value.295*/296static inline float clamp1f(float v)297{298return astc::clamp(v, 0.0f, 1.0f);299}300301/**302* @brief Clamp a float value between 0.0f and 255.0f.303*304* NaNs are turned into 0.0f.305*306* @param v The value to clamp.307*308* @return The clamped value.309*/310static inline float clamp255f(float v)311{312return astc::clamp(v, 0.0f, 255.0f);313}314315/**316* @brief SP float round-down.317*318* @param v The value to round.319*320* @return The rounded value.321*/322static inline float flt_rd(float v)323{324return std::floor(v);325}326327/**328* @brief SP float round-to-nearest and convert to integer.329*330* @param v The value to round.331*332* @return The rounded value.333*/334static inline int flt2int_rtn(float v)335{336337return static_cast<int>(v + 0.5f);338}339340/**341* @brief SP float round down and convert to integer.342*343* @param v The value to round.344*345* @return The rounded value.346*/347static inline int flt2int_rd(float v)348{349return static_cast<int>(v);350}351352/**353* @brief SP float bit-interpreted as an integer.354*355* @param v The value to bitcast.356*357* @return The converted value.358*/359static inline int float_as_int(float v)360{361union { int a; float b; } u;362u.b = v;363return u.a;364}365366/**367* @brief Integer bit-interpreted as an SP float.368*369* @param v The value to bitcast.370*371* @return The converted value.372*/373static inline float int_as_float(int v)374{375union { int a; float b; } u;376u.a = v;377return u.b;378}379380/**381* @brief Fast approximation of 1.0 / sqrt(val).382*383* @param v The input value.384*385* @return The approximated result.386*/387static inline float rsqrt(float v)388{389return 1.0f / std::sqrt(v);390}391392/**393* @brief Fast approximation of sqrt(val).394*395* @param v The input value.396*397* @return The approximated result.398*/399static inline float sqrt(float v)400{401return std::sqrt(v);402}403404/**405* @brief Extract mantissa and exponent of a float value.406*407* @param v The input value.408* @param[out] expo The output exponent.409*410* @return The mantissa.411*/412static inline float frexp(float v, int* expo)413{414if32 p;415p.f = v;416*expo = ((p.u >> 23) & 0xFF) - 126;417p.u = (p.u & 0x807fffff) | 0x3f000000;418return p.f;419}420421/**422* @brief Initialize the seed structure for a random number generator.423*424* Important note: For the purposes of ASTC we want sets of random numbers to425* use the codec, but we want the same seed value across instances and threads426* to ensure that image output is stable across compressor runs and across427* platforms. Every PRNG created by this call will therefore return the same428* sequence of values ...429*430* @param state The state structure to initialize.431*/432void rand_init(uint64_t state[2]);433434/**435* @brief Return the next random number from the generator.436*437* This RNG is an implementation of the "xoroshoro-128+ 1.0" PRNG, based on the438* public-domain implementation given by David Blackman & Sebastiano Vigna at439* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c440*441* @param state The state structure to use/update.442*/443uint64_t rand(uint64_t state[2]);444445}446447/* ============================================================================448Softfloat library with fp32 and fp16 conversion functionality.449============================================================================ */450#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)451/* narrowing float->float conversions */452uint16_t float_to_sf16(float val);453float sf16_to_float(uint16_t val);454#endif455456/*********************************457Vector library458*********************************/459#include "astcenc_vecmathlib.h"460461/*********************************462Declaration of line types463*********************************/464// parametric line, 2D: The line is given by line = a + b * t.465466struct line2467{468vfloat4 a;469vfloat4 b;470};471472// parametric line, 3D473struct line3474{475vfloat4 a;476vfloat4 b;477};478479struct line4480{481vfloat4 a;482vfloat4 b;483};484485486struct processed_line2487{488vfloat4 amod;489vfloat4 bs;490};491492struct processed_line3493{494vfloat4 amod;495vfloat4 bs;496};497498struct processed_line4499{500vfloat4 amod;501vfloat4 bs;502};503504#endif505506507