Path: blob/master/tools/android-sdk/renderscript/clang-include/cuda_builtin_vars.h
496 views
/*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------===1*2* Permission is hereby granted, free of charge, to any person obtaining a copy3* of this software and associated documentation files (the "Software"), to deal4* in the Software without restriction, including without limitation the rights5* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell6* copies of the Software, and to permit persons to whom the Software is7* furnished to do so, subject to the following conditions:8*9* The above copyright notice and this permission notice shall be included in10* all copies or substantial portions of the Software.11*12* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR13* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,14* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE15* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER16* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,17* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN18* THE SOFTWARE.19*20*===-----------------------------------------------------------------------===21*/2223#ifndef __CUDA_BUILTIN_VARS_H24#define __CUDA_BUILTIN_VARS_H2526// Forward declares from vector_types.h.27struct uint3;28struct dim3;2930// The file implements built-in CUDA variables using __declspec(property).31// https://msdn.microsoft.com/en-us/library/yhfk0thd.aspx32// All read accesses of built-in variable fields get converted into calls to a33// getter function which in turn calls the appropriate builtin to fetch the34// value.35//36// Example:37// int x = threadIdx.x;38// IR output:39// %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #340// PTX output:41// mov.u32 %r2, %tid.x;4243#define __CUDA_DEVICE_BUILTIN(FIELD, INTRINSIC) \44__declspec(property(get = __fetch_builtin_##FIELD)) unsigned int FIELD; \45static inline __attribute__((always_inline)) \46__attribute__((device)) unsigned int __fetch_builtin_##FIELD(void) { \47return INTRINSIC; \48}4950#if __cplusplus >= 201103L51#define __DELETE =delete52#else53#define __DELETE54#endif5556// Make sure nobody can create instances of the special varible types. nvcc57// also disallows taking address of special variables, so we disable address-of58// operator as well.59#define __CUDA_DISALLOW_BUILTINVAR_ACCESS(TypeName) \60__attribute__((device)) TypeName() __DELETE; \61__attribute__((device)) TypeName(const TypeName &) __DELETE; \62__attribute__((device)) void operator=(const TypeName &) const __DELETE; \63__attribute__((device)) TypeName *operator&() const __DELETE6465struct __cuda_builtin_threadIdx_t {66__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_tid_x());67__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_tid_y());68__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z());69// threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a70// uint3). This function is defined after we pull in vector_types.h.71__attribute__((device)) operator uint3() const;72private:73__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t);74};7576struct __cuda_builtin_blockIdx_t {77__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ctaid_x());78__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ctaid_y());79__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z());80// blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a81// uint3). This function is defined after we pull in vector_types.h.82__attribute__((device)) operator uint3() const;83private:84__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t);85};8687struct __cuda_builtin_blockDim_t {88__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_ntid_x());89__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_ntid_y());90__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ntid_z());91// blockDim should be convertible to dim3 (in fact in nvcc, it *is* a92// dim3). This function is defined after we pull in vector_types.h.93__attribute__((device)) operator dim3() const;94private:95__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t);96};9798struct __cuda_builtin_gridDim_t {99__CUDA_DEVICE_BUILTIN(x,__nvvm_read_ptx_sreg_nctaid_x());100__CUDA_DEVICE_BUILTIN(y,__nvvm_read_ptx_sreg_nctaid_y());101__CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_nctaid_z());102// gridDim should be convertible to dim3 (in fact in nvcc, it *is* a103// dim3). This function is defined after we pull in vector_types.h.104__attribute__((device)) operator dim3() const;105private:106__CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t);107};108109#define __CUDA_BUILTIN_VAR \110extern const __attribute__((device)) __attribute__((weak))111__CUDA_BUILTIN_VAR __cuda_builtin_threadIdx_t threadIdx;112__CUDA_BUILTIN_VAR __cuda_builtin_blockIdx_t blockIdx;113__CUDA_BUILTIN_VAR __cuda_builtin_blockDim_t blockDim;114__CUDA_BUILTIN_VAR __cuda_builtin_gridDim_t gridDim;115116// warpSize should translate to read of %WARP_SZ but there's currently no117// builtin to do so. According to PTX v4.2 docs 'to date, all target118// architectures have a WARP_SZ value of 32'.119__attribute__((device)) const int warpSize = 32;120121#undef __CUDA_DEVICE_BUILTIN122#undef __CUDA_BUILTIN_VAR123#undef __CUDA_DISALLOW_BUILTINVAR_ACCESS124125#endif /* __CUDA_BUILTIN_VARS_H */126127128