Path: blob/21.2-virgl/src/gallium/frontends/clover/api/kernel.cpp
4572 views
//1// Copyright 2012 Francisco Jerez2//3// Permission is hereby granted, free of charge, to any person obtaining a4// copy of this software and associated documentation files (the "Software"),5// to deal in the Software without restriction, including without limitation6// the rights to use, copy, modify, merge, publish, distribute, sublicense,7// and/or sell copies of the Software, and to permit persons to whom the8// Software is furnished to do so, subject to the following conditions:9//10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19// OTHER DEALINGS IN THE SOFTWARE.20//2122#include "api/util.hpp"23#include "core/kernel.hpp"24#include "core/event.hpp"2526using namespace clover;2728CLOVER_API cl_kernel29clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {30auto &prog = obj(d_prog);3132if (!name)33throw error(CL_INVALID_VALUE);3435auto &sym = find(name_equals(name), prog.symbols());3637ret_error(r_errcode, CL_SUCCESS);38return new kernel(prog, name, range(sym.args));3940} catch (std::out_of_range &e) {41ret_error(r_errcode, CL_INVALID_KERNEL_NAME);42return NULL;4344} catch (error &e) {45ret_error(r_errcode, e);46return NULL;47}4849CLOVER_API cl_int50clCreateKernelsInProgram(cl_program d_prog, cl_uint count,51cl_kernel *rd_kerns, cl_uint *r_count) try {52auto &prog = obj(d_prog);53auto &syms = prog.symbols();5455if (rd_kerns && count < syms.size())56throw error(CL_INVALID_VALUE);5758if (rd_kerns)59copy(map([&](const module::symbol &sym) {60return desc(new kernel(prog,61std::string(sym.name.begin(),62sym.name.end()),63range(sym.args)));64}, syms),65rd_kerns);6667if (r_count)68*r_count = syms.size();6970return CL_SUCCESS;7172} catch (error &e) {73return e.get();74}7576CLOVER_API cl_int77clRetainKernel(cl_kernel d_kern) try {78obj(d_kern).retain();79return CL_SUCCESS;8081} catch (error &e) {82return e.get();83}8485CLOVER_API cl_int86clReleaseKernel(cl_kernel d_kern) try {87if (obj(d_kern).release())88delete pobj(d_kern);8990return CL_SUCCESS;9192} catch (error &e) {93return e.get();94}9596CLOVER_API cl_int97clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,98const void *value) try {99obj(d_kern).args().at(idx).set(size, value);100return CL_SUCCESS;101102} catch (std::out_of_range &e) {103return CL_INVALID_ARG_INDEX;104105} catch (error &e) {106return e.get();107}108109CLOVER_API cl_int110clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,111size_t size, void *r_buf, size_t *r_size) try {112property_buffer buf { r_buf, size, r_size };113auto &kern = obj(d_kern);114115switch (param) {116case CL_KERNEL_FUNCTION_NAME:117buf.as_string() = kern.name();118break;119120case CL_KERNEL_NUM_ARGS:121buf.as_scalar<cl_uint>() = kern.args().size();122break;123124case CL_KERNEL_REFERENCE_COUNT:125buf.as_scalar<cl_uint>() = kern.ref_count();126break;127128case CL_KERNEL_CONTEXT:129buf.as_scalar<cl_context>() = desc(kern.program().context());130break;131132case CL_KERNEL_PROGRAM:133buf.as_scalar<cl_program>() = desc(kern.program());134break;135136case CL_KERNEL_ATTRIBUTES:137buf.as_string() = find(name_equals(kern.name()), kern.program().symbols()).attributes;138break;139140default:141throw error(CL_INVALID_VALUE);142}143144return CL_SUCCESS;145146} catch (error &e) {147return e.get();148}149150CLOVER_API cl_int151clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,152cl_kernel_work_group_info param,153size_t size, void *r_buf, size_t *r_size) try {154property_buffer buf { r_buf, size, r_size };155auto &kern = obj(d_kern);156auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));157158if (!count(dev, kern.program().devices()))159throw error(CL_INVALID_DEVICE);160161switch (param) {162case CL_KERNEL_WORK_GROUP_SIZE:163buf.as_scalar<size_t>() = dev.max_threads_per_block();164break;165166case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:167buf.as_vector<size_t>() = kern.required_block_size();168break;169170case CL_KERNEL_LOCAL_MEM_SIZE:171buf.as_scalar<cl_ulong>() = kern.mem_local();172break;173174case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:175buf.as_scalar<size_t>() = dev.subgroup_size();176break;177178case CL_KERNEL_PRIVATE_MEM_SIZE:179buf.as_scalar<cl_ulong>() = kern.mem_private();180break;181182default:183throw error(CL_INVALID_VALUE);184}185186return CL_SUCCESS;187188} catch (error &e) {189return e.get();190191} catch (std::out_of_range &e) {192return CL_INVALID_DEVICE;193}194195CLOVER_API cl_int196clGetKernelArgInfo(cl_kernel d_kern,197cl_uint idx, cl_kernel_arg_info param,198size_t size, void *r_buf, size_t *r_size) try {199property_buffer buf { r_buf, size, r_size };200201auto info = obj(d_kern).args_infos().at(idx);202203if (info.arg_name.empty())204return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;205206switch (param) {207case CL_KERNEL_ARG_ADDRESS_QUALIFIER:208buf.as_scalar<cl_kernel_arg_address_qualifier>() = info.address_qualifier;209break;210211case CL_KERNEL_ARG_ACCESS_QUALIFIER:212buf.as_scalar<cl_kernel_arg_access_qualifier>() = info.access_qualifier;213break;214215case CL_KERNEL_ARG_TYPE_NAME:216buf.as_string() = info.type_name;217break;218219case CL_KERNEL_ARG_TYPE_QUALIFIER:220buf.as_scalar<cl_kernel_arg_type_qualifier>() = info.type_qualifier;221break;222223case CL_KERNEL_ARG_NAME:224buf.as_string() = info.arg_name;225break;226227default:228throw error(CL_INVALID_VALUE);229}230231return CL_SUCCESS;232233} catch (std::out_of_range &e) {234return CL_INVALID_ARG_INDEX;235236} catch (error &e) {237return e.get();238}239240namespace {241///242/// Common argument checking shared by kernel invocation commands.243///244void245validate_common(const command_queue &q, kernel &kern,246const ref_vector<event> &deps) {247if (kern.program().context() != q.context() ||248any_of([&](const event &ev) {249return ev.context() != q.context();250}, deps))251throw error(CL_INVALID_CONTEXT);252253if (any_of([](kernel::argument &arg) {254return !arg.set();255}, kern.args()))256throw error(CL_INVALID_KERNEL_ARGS);257258// If the command queue's device is not associated to the program, we get259// a module, with no sections, which will also fail the following test.260auto &m = kern.program().build(q.device()).binary;261if (!any_of(type_equals(module::section::text_executable), m.secs))262throw error(CL_INVALID_PROGRAM_EXECUTABLE);263}264265std::vector<size_t>266validate_grid_size(const command_queue &q, cl_uint dims,267const size_t *d_grid_size) {268auto grid_size = range(d_grid_size, dims);269270if (dims < 1 || dims > q.device().max_block_size().size())271throw error(CL_INVALID_WORK_DIMENSION);272273if (!d_grid_size || any_of(is_zero(), grid_size))274throw error(CL_INVALID_GLOBAL_WORK_SIZE);275276return grid_size;277}278279std::vector<size_t>280validate_grid_offset(const command_queue &q, cl_uint dims,281const size_t *d_grid_offset) {282if (d_grid_offset)283return range(d_grid_offset, dims);284else285return std::vector<size_t>(dims, 0);286}287288std::vector<size_t>289validate_block_size(const command_queue &q, const kernel &kern,290cl_uint dims, const size_t *d_grid_size,291const size_t *d_block_size) {292auto grid_size = range(d_grid_size, dims);293294if (d_block_size) {295auto block_size = range(d_block_size, dims);296297if (any_of(is_zero(), block_size) ||298any_of(greater(), block_size, q.device().max_block_size()))299throw error(CL_INVALID_WORK_ITEM_SIZE);300301if (any_of(modulus(), grid_size, block_size))302throw error(CL_INVALID_WORK_GROUP_SIZE);303304if (fold(multiplies(), 1u, block_size) >305q.device().max_threads_per_block())306throw error(CL_INVALID_WORK_GROUP_SIZE);307308return block_size;309310} else {311return kern.optimal_block_size(q, grid_size);312}313}314}315316CLOVER_API cl_int317clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,318cl_uint dims, const size_t *d_grid_offset,319const size_t *d_grid_size, const size_t *d_block_size,320cl_uint num_deps, const cl_event *d_deps,321cl_event *rd_ev) try {322auto &q = obj(d_q);323auto &kern = obj(d_kern);324auto deps = objs<wait_list_tag>(d_deps, num_deps);325auto grid_size = validate_grid_size(q, dims, d_grid_size);326auto grid_offset = validate_grid_offset(q, dims, d_grid_offset);327auto block_size = validate_block_size(q, kern, dims,328d_grid_size, d_block_size);329330validate_common(q, kern, deps);331332auto hev = create<hard_event>(333q, CL_COMMAND_NDRANGE_KERNEL, deps,334[=, &kern, &q](event &) {335kern.launch(q, grid_offset, grid_size, block_size);336});337338ret_object(rd_ev, hev);339return CL_SUCCESS;340341} catch (error &e) {342return e.get();343}344345CLOVER_API cl_int346clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,347cl_uint num_deps, const cl_event *d_deps,348cl_event *rd_ev) try {349auto &q = obj(d_q);350auto &kern = obj(d_kern);351auto deps = objs<wait_list_tag>(d_deps, num_deps);352353validate_common(q, kern, deps);354355auto hev = create<hard_event>(356q, CL_COMMAND_TASK, deps,357[=, &kern, &q](event &) {358kern.launch(q, { 0 }, { 1 }, { 1 });359});360361ret_object(rd_ev, hev);362return CL_SUCCESS;363364} catch (error &e) {365return e.get();366}367368CLOVER_API cl_int369clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),370void *args, size_t args_size,371cl_uint num_mems, const cl_mem *d_mems,372const void **mem_handles, cl_uint num_deps,373const cl_event *d_deps, cl_event *rd_ev) {374return CL_INVALID_OPERATION;375}376377CLOVER_API cl_int378clSetKernelArgSVMPointer(cl_kernel d_kern,379cl_uint arg_index,380const void *arg_value) try {381if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices()))382return CL_INVALID_OPERATION;383obj(d_kern).args().at(arg_index).set_svm(arg_value);384return CL_SUCCESS;385386} catch (std::out_of_range &e) {387return CL_INVALID_ARG_INDEX;388389} catch (error &e) {390return e.get();391}392393CLOVER_API cl_int394clSetKernelExecInfo(cl_kernel d_kern,395cl_kernel_exec_info param_name,396size_t param_value_size,397const void *param_value) try {398399if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices()))400return CL_INVALID_OPERATION;401402auto &kern = obj(d_kern);403404const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),405kern.program().context().devices());406407if (!param_value)408return CL_INVALID_VALUE;409410switch (param_name) {411case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM:412case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM: {413if (param_value_size != sizeof(cl_bool))414return CL_INVALID_VALUE;415416cl_bool val = *static_cast<const cl_bool*>(param_value);417if (val == CL_TRUE && !has_system_svm)418return CL_INVALID_OPERATION;419else420return CL_SUCCESS;421}422423case CL_KERNEL_EXEC_INFO_SVM_PTRS:424case CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM:425if (has_system_svm)426return CL_SUCCESS;427428CLOVER_NOT_SUPPORTED_UNTIL("2.0");429return CL_INVALID_VALUE;430431default:432return CL_INVALID_VALUE;433}434435} catch (error &e) {436return e.get();437}438439440