Path: blob/21.2-virgl/src/gallium/frontends/clover/core/device.cpp
4572 views
//1// Copyright 2012 Francisco Jerez2//3// Permission is hereby granted, free of charge, to any person obtaining a4// copy of this software and associated documentation files (the "Software"),5// to deal in the Software without restriction, including without limitation6// the rights to use, copy, modify, merge, publish, distribute, sublicense,7// and/or sell copies of the Software, and to permit persons to whom the8// Software is furnished to do so, subject to the following conditions:9//10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19// OTHER DEALINGS IN THE SOFTWARE.20//2122#include <algorithm>23#include "core/device.hpp"24#include "core/platform.hpp"25#include "pipe/p_screen.h"26#include "pipe/p_state.h"27#include "spirv/invocation.hpp"28#include "util/bitscan.h"29#include "util/u_debug.h"30#include "spirv/invocation.hpp"31#include "nir/invocation.hpp"32#include <fstream>3334using namespace clover;3536namespace {37template<typename T>38std::vector<T>39get_compute_param(pipe_screen *pipe, pipe_shader_ir ir_format,40pipe_compute_cap cap) {41int sz = pipe->get_compute_param(pipe, ir_format, cap, NULL);42std::vector<T> v(sz / sizeof(T));4344pipe->get_compute_param(pipe, ir_format, cap, &v.front());45return v;46}47}4849device::device(clover::platform &platform, pipe_loader_device *ldev) :50platform(platform), clc_cache(NULL), ldev(ldev) {51unsigned major = 1, minor = 1;52debug_get_version_option("CLOVER_DEVICE_VERSION_OVERRIDE", &major, &minor);53version = CL_MAKE_VERSION(major, minor, 0);5455major = 1, minor = 1;56debug_get_version_option("CLOVER_DEVICE_CLC_VERSION_OVERRIDE", &major, &minor);57clc_version = CL_MAKE_VERSION(major, minor, 0);5859pipe = pipe_loader_create_screen(ldev);60if (pipe && pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {61if (supports_ir(PIPE_SHADER_IR_NATIVE))62return;63#ifdef HAVE_CLOVER_SPIRV64if (supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED)) {65nir::check_for_libclc(*this);66clc_cache = nir::create_clc_disk_cache();67clc_nir = lazy<std::shared_ptr<nir_shader>>([&] () { std::string log; return std::shared_ptr<nir_shader>(nir::load_libclc_nir(*this, log), ralloc_free); });68return;69}70#endif71}72if (pipe)73pipe->destroy(pipe);74throw error(CL_INVALID_DEVICE);75}7677device::~device() {78if (clc_cache)79disk_cache_destroy(clc_cache);80if (pipe)81pipe->destroy(pipe);82if (ldev)83pipe_loader_release(&ldev, 1);84}8586bool87device::operator==(const device &dev) const {88return this == &dev;89}9091cl_device_type92device::type() const {93switch (ldev->type) {94case PIPE_LOADER_DEVICE_SOFTWARE:95return CL_DEVICE_TYPE_CPU;96case PIPE_LOADER_DEVICE_PCI:97case PIPE_LOADER_DEVICE_PLATFORM:98return CL_DEVICE_TYPE_GPU;99default:100unreachable("Unknown device type.");101}102}103104cl_uint105device::vendor_id() const {106switch (ldev->type) {107case PIPE_LOADER_DEVICE_SOFTWARE:108case PIPE_LOADER_DEVICE_PLATFORM:109return 0;110case PIPE_LOADER_DEVICE_PCI:111return ldev->u.pci.vendor_id;112default:113unreachable("Unknown device type.");114}115}116117size_t118device::max_images_read() const {119return PIPE_MAX_SHADER_SAMPLER_VIEWS;120}121122size_t123device::max_images_write() const {124return PIPE_MAX_SHADER_IMAGES;125}126127size_t128device::max_image_buffer_size() const {129return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE);130}131132cl_uint133device::max_image_size() const {134return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_2D_SIZE);135}136137cl_uint138device::max_image_size_3d() const {139return 1 << (pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_3D_LEVELS) - 1);140}141142size_t143device::max_image_array_number() const {144return pipe->get_param(pipe, PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS);145}146147cl_uint148device::max_samplers() const {149return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,150PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);151}152153cl_ulong154device::max_mem_global() const {155return get_compute_param<uint64_t>(pipe, ir_format(),156PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE)[0];157}158159cl_ulong160device::max_mem_local() const {161return get_compute_param<uint64_t>(pipe, ir_format(),162PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE)[0];163}164165cl_ulong166device::max_mem_input() const {167return get_compute_param<uint64_t>(pipe, ir_format(),168PIPE_COMPUTE_CAP_MAX_INPUT_SIZE)[0];169}170171cl_ulong172device::max_const_buffer_size() const {173return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,174PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE);175}176177cl_uint178device::max_const_buffers() const {179return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,180PIPE_SHADER_CAP_MAX_CONST_BUFFERS);181}182183size_t184device::max_threads_per_block() const {185return get_compute_param<uint64_t>(186pipe, ir_format(), PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK)[0];187}188189cl_ulong190device::max_mem_alloc_size() const {191return get_compute_param<uint64_t>(pipe, ir_format(),192PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE)[0];193}194195cl_uint196device::max_clock_frequency() const {197return get_compute_param<uint32_t>(pipe, ir_format(),198PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY)[0];199}200201cl_uint202device::max_compute_units() const {203return get_compute_param<uint32_t>(pipe, ir_format(),204PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS)[0];205}206207cl_uint208device::max_printf_buffer_size() const {209return 1024 * 1024;210}211212bool213device::image_support() const {214return get_compute_param<uint32_t>(pipe, ir_format(),215PIPE_COMPUTE_CAP_IMAGES_SUPPORTED)[0];216}217218bool219device::has_doubles() const {220return pipe->get_param(pipe, PIPE_CAP_DOUBLES);221}222223bool224device::has_halves() const {225return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,226PIPE_SHADER_CAP_FP16);227}228229bool230device::has_int64_atomics() const {231return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,232PIPE_SHADER_CAP_INT64_ATOMICS);233}234235bool236device::has_unified_memory() const {237return pipe->get_param(pipe, PIPE_CAP_UMA);238}239240size_t241device::mem_base_addr_align() const {242uint64_t page_size = 0;243os_get_page_size(&page_size);244return std::max((size_t)page_size, sizeof(cl_long) * 16);245}246247cl_device_svm_capabilities248device::svm_support() const {249// Without CAP_RESOURCE_FROM_USER_MEMORY SVM and CL_MEM_USE_HOST_PTR250// interactions won't work according to spec as clover manages a GPU side251// copy of the host data.252//253// The biggest problem are memory buffers created with CL_MEM_USE_HOST_PTR,254// but the application and/or the kernel updates the memory via SVM and not255// the cl_mem buffer.256// We can't even do proper tracking on what memory might have been accessed257// as the host ptr to the buffer could be within a SVM region, where through258// the CL API there is no reliable way of knowing if a certain cl_mem buffer259// was accessed by a kernel or not and the runtime can't reliably know from260// which side the GPU buffer content needs to be updated.261//262// Another unsolvable scenario is a cl_mem object passed by cl_mem reference263// and SVM pointer into the same kernel at the same time.264if (allows_user_pointers() && pipe->get_param(pipe, PIPE_CAP_SYSTEM_SVM))265// we can emulate all lower levels if we support fine grain system266return CL_DEVICE_SVM_FINE_GRAIN_SYSTEM |267CL_DEVICE_SVM_COARSE_GRAIN_BUFFER |268CL_DEVICE_SVM_FINE_GRAIN_BUFFER;269return 0;270}271272bool273device::allows_user_pointers() const {274return pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY) ||275pipe->get_param(pipe, PIPE_CAP_RESOURCE_FROM_USER_MEMORY_COMPUTE_ONLY);276}277278std::vector<size_t>279device::max_block_size() const {280auto v = get_compute_param<uint64_t>(pipe, ir_format(),281PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);282return { v.begin(), v.end() };283}284285cl_uint286device::subgroup_size() const {287return get_compute_param<uint32_t>(pipe, ir_format(),288PIPE_COMPUTE_CAP_SUBGROUP_SIZE)[0];289}290291cl_uint292device::address_bits() const {293return get_compute_param<uint32_t>(pipe, ir_format(),294PIPE_COMPUTE_CAP_ADDRESS_BITS)[0];295}296297std::string298device::device_name() const {299return pipe->get_name(pipe);300}301302std::string303device::vendor_name() const {304return pipe->get_device_vendor(pipe);305}306307enum pipe_shader_ir308device::ir_format() const {309if (supports_ir(PIPE_SHADER_IR_NATIVE))310return PIPE_SHADER_IR_NATIVE;311312assert(supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED));313return PIPE_SHADER_IR_NIR_SERIALIZED;314}315316std::string317device::ir_target() const {318std::vector<char> target = get_compute_param<char>(319pipe, ir_format(), PIPE_COMPUTE_CAP_IR_TARGET);320return { target.data() };321}322323enum pipe_endian324device::endianness() const {325return (enum pipe_endian)pipe->get_param(pipe, PIPE_CAP_ENDIANNESS);326}327328std::string329device::device_version_as_string() const {330static const std::string version_string =331std::to_string(CL_VERSION_MAJOR(version)) + "." +332std::to_string(CL_VERSION_MINOR(version));333return version_string;334}335336std::string337device::device_clc_version_as_string() const {338static const std::string version_string =339std::to_string(CL_VERSION_MAJOR(clc_version)) + "." +340std::to_string(CL_VERSION_MINOR(clc_version));341return version_string;342}343344bool345device::supports_ir(enum pipe_shader_ir ir) const {346return pipe->get_shader_param(pipe, PIPE_SHADER_COMPUTE,347PIPE_SHADER_CAP_SUPPORTED_IRS) & (1 << ir);348}349350std::vector<cl_name_version>351device::supported_extensions() const {352std::vector<cl_name_version> vec;353354vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_byte_addressable_store" } );355vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_base_atomics" } );356vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_global_int32_extended_atomics" } );357vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_base_atomics" } );358vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_local_int32_extended_atomics" } );359if (has_int64_atomics()) {360vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_int64_base_atomics" } );361vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_int64_extended_atomics" } );362}363if (has_doubles())364vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_fp64" } );365if (has_halves())366vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_fp16" } );367if (svm_support())368vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_arm_shared_virtual_memory" } );369if (!clover::spirv::supported_versions().empty() &&370supports_ir(PIPE_SHADER_IR_NIR_SERIALIZED))371vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_il_program" } );372vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "cl_khr_extended_versioning" } );373return vec;374}375376std::string377device::supported_extensions_as_string() const {378static std::string extensions_string;379380if (!extensions_string.empty())381return extensions_string;382383const auto extension_list = supported_extensions();384for (const auto &extension : extension_list) {385if (!extensions_string.empty())386extensions_string += " ";387extensions_string += extension.name;388}389return extensions_string;390}391392std::vector<cl_name_version>393device::supported_il_versions() const {394return clover::spirv::supported_versions();395}396397const void *398device::get_compiler_options(enum pipe_shader_ir ir) const {399return pipe->get_compiler_options(pipe, ir, PIPE_SHADER_COMPUTE);400}401402cl_version403device::device_version() const {404return version;405}406407cl_version408device::device_clc_version() const {409return clc_version;410}411412std::vector<cl_name_version>413device::opencl_c_all_versions() const {414std::vector<cl_name_version> vec;415vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 0, 0), "OpenCL C" } );416vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 1, 0), "OpenCL C" } );417418if (CL_VERSION_MAJOR(clc_version) == 1 &&419CL_VERSION_MINOR(clc_version) == 2)420vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 2, 0), "OpenCL C" } );421if (CL_VERSION_MAJOR(clc_version) == 3) {422vec.push_back( cl_name_version{ CL_MAKE_VERSION(1, 2, 0), "OpenCL C" } );423vec.push_back( cl_name_version{ CL_MAKE_VERSION(3, 0, 0), "OpenCL C" } );424}425return vec;426}427428std::vector<cl_name_version>429device::opencl_c_features() const {430std::vector<cl_name_version> vec;431432vec.push_back( cl_name_version {CL_MAKE_VERSION(3, 0, 0), "__opencl_c_int64" });433if (has_doubles())434vec.push_back( cl_name_version {CL_MAKE_VERSION(3, 0, 0), "__opencl_c_fp64" });435436return vec;437}438439440