Path: blob/21.2-virgl/src/gallium/drivers/swr/rasterizer/common/os.h
4574 views
/****************************************************************************1* Copyright (C) 2014-2017 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21****************************************************************************/2223#ifndef __SWR_OS_H__24#define __SWR_OS_H__2526#include <cstddef>27#include "core/knobs.h"2829#if (defined(FORCE_WINDOWS) || defined(_WIN32)) && !defined(FORCE_LINUX)3031#define SWR_API __cdecl32#define SWR_VISIBLE __declspec(dllexport)3334#ifndef NOMINMAX35#undef UNICODE36#define NOMINMAX37#include <windows.h>38#undef NOMINMAX39#define UNICODE40#else41#undef UNICODE42#include <windows.h>43#define UNICODE44#endif45#include <intrin.h>46#include <cstdint>4748#if defined(MemoryFence)49// Windows.h defines MemoryFence as _mm_mfence, but this conflicts with llvm::sys::MemoryFence50#undef MemoryFence51#endif5253#if defined(_MSC_VER)54#define OSALIGN(RWORD, WIDTH) __declspec(align(WIDTH)) RWORD55#elif defined(__GNUC__)56#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))57#endif5859#if defined(_DEBUG)60// We compile Debug builds with inline function expansion enabled. This allows61// functions compiled with __forceinline to be inlined even in Debug builds.62// The inline_depth(0) pragma below will disable inline function expansion for63// normal INLINE / inline functions, but not for __forceinline functions.64// Our SIMD function wrappers (see simdlib.hpp) use __forceinline even in65// Debug builds.66#define INLINE inline67#pragma inline_depth(0)68#else69// Use of __forceinline increases compile time dramatically in release builds70// and provides almost 0 measurable benefit. Disable until we have a compelling71// use-case72// #define INLINE __forceinline73#define INLINE inline74#endif75#ifndef FORCEINLINE76#define FORCEINLINE __forceinline77#endif7879#define DEBUGBREAK __debugbreak()8081#define PRAGMA_WARNING_PUSH_DISABLE(...) \82__pragma(warning(push)); \83__pragma(warning(disable : __VA_ARGS__));8485#define PRAGMA_WARNING_POP() __pragma(warning(pop))8687static inline void* AlignedMalloc(size_t _Size, size_t _Alignment)88{89return _aligned_malloc(_Size, _Alignment);90}9192static inline void AlignedFree(void* p)93{94return _aligned_free(p);95}9697#if defined(_WIN64)98#define BitScanReverseSizeT BitScanReverse6499#define BitScanForwardSizeT BitScanForward64100#define _mm_popcount_sizeT _mm_popcnt_u64101#else102#define BitScanReverseSizeT BitScanReverse103#define BitScanForwardSizeT BitScanForward104#define _mm_popcount_sizeT _mm_popcnt_u32105#endif106107#if !defined(_WIN64)108extern "C" {109inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)110{111if (Mask == 0)112return 0;113#ifdef __GNUC__114*Index = __builtin_ctzll(Mask);115#else116*Index = 0;117for (int i = 0; i < 64; ++ i)118if ((1ULL << i) & Mask)119*Index = i;120#endif121return 1;122}123124inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)125{126if (Mask == 0)127return 0;128#ifdef __GNUC__129*Index = 63 - __builtin_clzll(Mask);130#else131*Index = 0;132for (int i = 63; i >= 0; -- i)133if ((1ULL << i) & Mask)134*Index = i;135#endif136return 1;137}138}139#endif140141#elif defined(__APPLE__) || defined(FORCE_LINUX) || defined(__linux__) || defined(__gnu_linux__)142143#define SWR_API144#define SWR_VISIBLE __attribute__((visibility("default")))145146#include <stdlib.h>147#include <string.h>148#include <x86intrin.h>149#include <stdint.h>150#include <sys/types.h>151#include <unistd.h>152#include <sys/stat.h>153#include <stdio.h>154#include <limits.h>155156typedef void VOID;157typedef void* LPVOID;158typedef int INT;159typedef unsigned int UINT;160typedef void* HANDLE;161typedef int LONG;162typedef unsigned int DWORD;163164#undef FALSE165#define FALSE 0166167#undef TRUE168#define TRUE 1169170#define MAX_PATH PATH_MAX171172#define OSALIGN(RWORD, WIDTH) RWORD __attribute__((aligned(WIDTH)))173#ifndef INLINE174#define INLINE __inline175#endif176#ifndef FORCEINLINE177#define FORCEINLINE INLINE178#endif179#define DEBUGBREAK asm("int $3")180181#if !defined(__CYGWIN__)182183#ifndef __cdecl184#define __cdecl185#endif186#ifndef __stdcall187#define __stdcall188#endif189190#if defined(__GNUC__) && !defined(__INTEL_COMPILER)191#define __declspec(x) __declspec_##x192#define __declspec_align(y) __attribute__((aligned(y)))193#define __declspec_deprecated __attribute__((deprecated))194#define __declspec_dllexport195#define __declspec_dllimport196#define __declspec_noinline __attribute__((__noinline__))197#define __declspec_nothrow __attribute__((nothrow))198#define __declspec_novtable199#define __declspec_thread __thread200#else201#define __declspec(X)202#endif203204#endif205206#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)207208#if !defined(__clang__) && (__GNUC__) && (GCC_VERSION < 40500)209inline uint64_t __rdtsc()210{211long low, high;212asm volatile("rdtsc" : "=a"(low), "=d"(high));213return (low | ((uint64_t)high << 32));214}215#endif216217#if !defined(__clang__) && !defined(__INTEL_COMPILER)218// Intrinsic not defined in gcc < 10219#if (__GNUC__) && (GCC_VERSION < 100000)220static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)221{222_mm_storeu_si128((__m128i*)lo, _mm256_castsi256_si128(a));223_mm_storeu_si128((__m128i*)hi, _mm256_extractf128_si256(a, 0x1));224}225#endif226227// gcc prior to 4.9 doesn't have _mm*_undefined_*228#if (__GNUC__) && (GCC_VERSION < 40900)229#define _mm_undefined_si128 _mm_setzero_si128230#define _mm256_undefined_ps _mm256_setzero_ps231#endif232#endif233234inline unsigned char _BitScanForward64(unsigned long* Index, uint64_t Mask)235{236if (Mask == 0)237return 0;238*Index = __builtin_ctzll(Mask);239return 1;240}241242inline unsigned char _BitScanForward(unsigned long* Index, uint32_t Mask)243{244if (Mask == 0)245return 0;246*Index = __builtin_ctz(Mask);247return 1;248}249250inline unsigned char _BitScanReverse64(unsigned long* Index, uint64_t Mask)251{252if (Mask == 0)253return 0;254*Index = 63 - __builtin_clzll(Mask);255return 1;256}257258inline unsigned char _BitScanReverse(unsigned long* Index, uint32_t Mask)259{260if (Mask == 0)261return 0;262*Index = 31 - __builtin_clz(Mask);263return 1;264}265266inline void* AlignedMalloc(size_t size, size_t alignment)267{268void* ret;269if (posix_memalign(&ret, alignment, size))270{271return NULL;272}273return ret;274}275276static inline void AlignedFree(void* p)277{278free(p);279}280281#define _countof(a) (sizeof(a) / sizeof(*(a)))282283#define sprintf_s sprintf284#define strcpy_s(dst, size, src) strncpy(dst, src, size)285#define GetCurrentProcessId getpid286287#define InterlockedCompareExchange(Dest, Exchange, Comparand) \288__sync_val_compare_and_swap(Dest, Comparand, Exchange)289#define InterlockedExchangeAdd(Addend, Value) __sync_fetch_and_add(Addend, Value)290#define InterlockedDecrement(Append) __sync_sub_and_fetch(Append, 1)291#define InterlockedDecrement64(Append) __sync_sub_and_fetch(Append, 1)292#define InterlockedIncrement(Append) __sync_add_and_fetch(Append, 1)293#define InterlockedAdd(Addend, Value) __sync_add_and_fetch(Addend, Value)294#define InterlockedAdd64(Addend, Value) __sync_add_and_fetch(Addend, Value)295#define _ReadWriteBarrier() asm volatile("" ::: "memory")296297#define PRAGMA_WARNING_PUSH_DISABLE(...)298#define PRAGMA_WARNING_POP()299300#define ZeroMemory(dst, size) memset(dst, 0, size)301#else302303#error Unsupported OS/system.304305#endif306307#define THREAD thread_local308309// Universal types310typedef uint8_t KILOBYTE[1024];311typedef KILOBYTE MEGABYTE[1024];312typedef MEGABYTE GIGABYTE[1024];313314#define OSALIGNLINE(RWORD) OSALIGN(RWORD, 64)315#define OSALIGNSIMD(RWORD) OSALIGN(RWORD, KNOB_SIMD_BYTES)316#define OSALIGNSIMD16(RWORD) OSALIGN(RWORD, KNOB_SIMD16_BYTES)317318#include "common/swr_assert.h"319320#ifdef __GNUC__321#define ATTR_UNUSED __attribute__((unused))322#else323#define ATTR_UNUSED324#endif325326#define SWR_FUNC(_retType, _funcName, /* args */...) \327typedef _retType(SWR_API* PFN##_funcName)(__VA_ARGS__); \328_retType SWR_API _funcName(__VA_ARGS__);329330// Defined in os.cpp331void SWR_API SetCurrentThreadName(const char* pThreadName);332void SWR_API CreateDirectoryPath(const std::string& path);333334/// Execute Command (block until finished)335/// @returns process exit value336int SWR_API337ExecCmd(const std::string& cmd, ///< (In) Command line string338const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process339std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text340std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text341const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text342343344/// Helper for setting up FP state345/// @returns old csr state346static INLINE uint32_t SetOptimalVectorCSR()347{348uint32_t oldCSR = _mm_getcsr();349350uint32_t newCSR = (oldCSR & ~(_MM_ROUND_MASK | _MM_DENORMALS_ZERO_MASK | _MM_FLUSH_ZERO_MASK));351newCSR |= (_MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON);352_mm_setcsr(newCSR);353354return oldCSR;355}356357/// Set Vector CSR state.358/// @param csrState - should be value returned from SetOptimalVectorCSR()359static INLINE void RestoreVectorCSR(uint32_t csrState)360{361_mm_setcsr(csrState);362}363364#endif //__SWR_OS_H__365366367