Path: blob/21.2-virgl/src/gallium/frontends/clover/api/transfer.cpp
4572 views
//1// Copyright 2012 Francisco Jerez2//3// Permission is hereby granted, free of charge, to any person obtaining a4// copy of this software and associated documentation files (the "Software"),5// to deal in the Software without restriction, including without limitation6// the rights to use, copy, modify, merge, publish, distribute, sublicense,7// and/or sell copies of the Software, and to permit persons to whom the8// Software is furnished to do so, subject to the following conditions:9//10// The above copyright notice and this permission notice shall be included in11// all copies or substantial portions of the Software.12//13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19// OTHER DEALINGS IN THE SOFTWARE.20//2122#include <cstring>2324#include "util/bitscan.h"2526#include "api/dispatch.hpp"27#include "api/util.hpp"28#include "core/event.hpp"29#include "core/memory.hpp"3031using namespace clover;3233namespace {34typedef resource::vector vector_t;3536vector_t37vector(const size_t *p) {38if (!p)39throw error(CL_INVALID_VALUE);40return range(p, 3);41}4243vector_t44pitch(const vector_t ®ion, vector_t pitch) {45for (auto x : zip(tail(pitch),46map(multiplies(), region, pitch))) {47// The spec defines a value of zero as the natural pitch,48// i.e. the unaligned size of the previous dimension.49if (std::get<0>(x) == 0)50std::get<0>(x) = std::get<1>(x);51}5253return pitch;54}5556///57/// Size of a region in bytes.58///59size_t60size(const vector_t &pitch, const vector_t ®ion) {61if (any_of(is_zero(), region))62return 0;63else64return dot(pitch, region - vector_t{ 0, 1, 1 });65}6667///68/// Common argument checking shared by memory transfer commands.69///70void71validate_common(command_queue &q,72const ref_vector<event> &deps) {73if (any_of([&](const event &ev) {74return ev.context() != q.context();75}, deps))76throw error(CL_INVALID_CONTEXT);77}7879///80/// Common error checking for a buffer object argument.81///82void83validate_object(command_queue &q, buffer &mem, const vector_t &origin,84const vector_t &pitch, const vector_t ®ion) {85if (mem.context() != q.context())86throw error(CL_INVALID_CONTEXT);8788// The region must fit within the specified pitch,89if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))90throw error(CL_INVALID_VALUE);9192// ...and within the specified object.93if (dot(pitch, origin) + size(pitch, region) > mem.size())94throw error(CL_INVALID_VALUE);9596if (any_of(is_zero(), region))97throw error(CL_INVALID_VALUE);98}99100///101/// Common error checking for an image argument.102///103void104validate_object(command_queue &q, image &img,105const vector_t &orig, const vector_t ®ion) {106vector_t size = { img.width(), img.height(), img.depth() };107const auto &dev = q.device();108109if (!dev.image_support())110throw error(CL_INVALID_OPERATION);111112if (img.context() != q.context())113throw error(CL_INVALID_CONTEXT);114115if (any_of(greater(), orig + region, size))116throw error(CL_INVALID_VALUE);117118if (any_of(is_zero(), region))119throw error(CL_INVALID_VALUE);120121switch (img.type()) {122case CL_MEM_OBJECT_IMAGE1D: {123const size_t max = dev.max_image_size();124if (img.width() > max)125throw error(CL_INVALID_IMAGE_SIZE);126break;127}128case CL_MEM_OBJECT_IMAGE2D: {129const size_t max = dev.max_image_size();130if (img.width() > max || img.height() > max)131throw error(CL_INVALID_IMAGE_SIZE);132break;133}134case CL_MEM_OBJECT_IMAGE3D: {135const size_t max = dev.max_image_size_3d();136if (img.width() > max || img.height() > max || img.depth() > max)137throw error(CL_INVALID_IMAGE_SIZE);138break;139}140// XXX: Implement missing checks once Clover supports more image types.141default:142throw error(CL_INVALID_IMAGE_SIZE);143}144}145146///147/// Common error checking for a host pointer argument.148///149void150validate_object(command_queue &q, const void *ptr, const vector_t &orig,151const vector_t &pitch, const vector_t ®ion) {152if (!ptr)153throw error(CL_INVALID_VALUE);154155// The region must fit within the specified pitch.156if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch)))157throw error(CL_INVALID_VALUE);158}159160///161/// Common argument checking for a copy between two buffer objects.162///163void164validate_copy(command_queue &q, buffer &dst_mem,165const vector_t &dst_orig, const vector_t &dst_pitch,166buffer &src_mem,167const vector_t &src_orig, const vector_t &src_pitch,168const vector_t ®ion) {169if (dst_mem == src_mem) {170auto dst_offset = dot(dst_pitch, dst_orig);171auto src_offset = dot(src_pitch, src_orig);172173if (interval_overlaps()(174dst_offset, dst_offset + size(dst_pitch, region),175src_offset, src_offset + size(src_pitch, region)))176throw error(CL_MEM_COPY_OVERLAP);177}178}179180///181/// Common argument checking for a copy between two image objects.182///183void184validate_copy(command_queue &q,185image &dst_img, const vector_t &dst_orig,186image &src_img, const vector_t &src_orig,187const vector_t ®ion) {188if (dst_img.format() != src_img.format())189throw error(CL_IMAGE_FORMAT_MISMATCH);190191if (dst_img == src_img) {192if (all_of(interval_overlaps(),193dst_orig, dst_orig + region,194src_orig, src_orig + region))195throw error(CL_MEM_COPY_OVERLAP);196}197}198199///200/// Checks that the host access flags of the memory object are201/// within the allowed set \a flags.202///203void204validate_object_access(const memory_obj &mem, const cl_mem_flags flags) {205if (mem.flags() & ~flags &206(CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY |207CL_MEM_HOST_NO_ACCESS))208throw error(CL_INVALID_OPERATION);209}210211///212/// Checks that the mapping flags are correct.213///214void215validate_map_flags(const memory_obj &mem, const cl_map_flags flags) {216if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) &&217(flags & CL_MAP_WRITE_INVALIDATE_REGION))218throw error(CL_INVALID_VALUE);219220if (flags & CL_MAP_READ)221validate_object_access(mem, CL_MEM_HOST_READ_ONLY);222223if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION))224validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);225}226227///228/// Checks that the memory migration flags are correct.229///230void231validate_mem_migration_flags(const cl_mem_migration_flags flags) {232const cl_mem_migration_flags valid =233CL_MIGRATE_MEM_OBJECT_HOST |234CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED;235236if (flags & ~valid)237throw error(CL_INVALID_VALUE);238}239240///241/// Class that encapsulates the task of mapping an object of type242/// \a T. The return value of get() should be implicitly243/// convertible to \a void *.244///245template<typename T>246struct _map;247248template<>249struct _map<image*> {250_map(command_queue &q, image *img, cl_map_flags flags,251vector_t offset, vector_t pitch, vector_t region) :252map(q, img->resource_in(q), flags, true, offset, region),253pitch(map.pitch())254{ }255256template<typename T>257operator T *() const {258return static_cast<T *>(map);259}260261mapping map;262vector_t pitch;263};264265template<>266struct _map<buffer*> {267_map(command_queue &q, buffer *mem, cl_map_flags flags,268vector_t offset, vector_t pitch, vector_t region) :269map(q, mem->resource_in(q), flags, true,270{{ dot(pitch, offset) }}, {{ size(pitch, region) }}),271pitch(pitch)272{ }273274template<typename T>275operator T *() const {276return static_cast<T *>(map);277}278279mapping map;280vector_t pitch;281};282283template<typename P>284struct _map<P *> {285_map(command_queue &q, P *ptr, cl_map_flags flags,286vector_t offset, vector_t pitch, vector_t region) :287ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch)288{ }289290template<typename T>291operator T *() const {292return static_cast<T *>(ptr);293}294295P *ptr;296vector_t pitch;297};298299///300/// Software copy from \a src_obj to \a dst_obj. They can be301/// either pointers or memory objects.302///303template<typename T, typename S>304std::function<void (event &)>305soft_copy_op(command_queue &q,306T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch,307S src_obj, const vector_t &src_orig, const vector_t &src_pitch,308const vector_t ®ion) {309return [=, &q](event &) {310_map<T> dst = { q, dst_obj, CL_MAP_WRITE,311dst_orig, dst_pitch, region };312_map<S> src = { q, src_obj, CL_MAP_READ,313src_orig, src_pitch, region };314assert(src.pitch[0] == dst.pitch[0]);315vector_t v = {};316317for (v[2] = 0; v[2] < region[2]; ++v[2]) {318for (v[1] = 0; v[1] < region[1]; ++v[1]) {319std::memcpy(320static_cast<char *>(dst) + dot(dst.pitch, v),321static_cast<const char *>(src) + dot(src.pitch, v),322src.pitch[0] * region[0]);323}324}325};326}327328///329/// Hardware copy from \a src_obj to \a dst_obj.330///331template<typename T, typename S>332std::function<void (event &)>333hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig,334S src_obj, const vector_t &src_orig, const vector_t ®ion) {335return [=, &q](event &) {336dst_obj->resource_in(q).copy(q, dst_orig, region,337src_obj->resource_in(q), src_orig);338};339}340}341342CLOVER_API cl_int343clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,344size_t offset, size_t size, void *ptr,345cl_uint num_deps, const cl_event *d_deps,346cl_event *rd_ev) try {347auto &q = obj(d_q);348auto &mem = obj<buffer>(d_mem);349auto deps = objs<wait_list_tag>(d_deps, num_deps);350vector_t region = { size, 1, 1 };351vector_t obj_origin = { offset };352auto obj_pitch = pitch(region, {{ 1 }});353354validate_common(q, deps);355validate_object(q, ptr, {}, obj_pitch, region);356validate_object(q, mem, obj_origin, obj_pitch, region);357validate_object_access(mem, CL_MEM_HOST_READ_ONLY);358359auto hev = create<hard_event>(360q, CL_COMMAND_READ_BUFFER, deps,361soft_copy_op(q, ptr, {}, obj_pitch,362&mem, obj_origin, obj_pitch,363region));364365if (blocking)366hev().wait_signalled();367368ret_object(rd_ev, hev);369return CL_SUCCESS;370371} catch (error &e) {372return e.get();373}374375CLOVER_API cl_int376clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,377size_t offset, size_t size, const void *ptr,378cl_uint num_deps, const cl_event *d_deps,379cl_event *rd_ev) try {380auto &q = obj(d_q);381auto &mem = obj<buffer>(d_mem);382auto deps = objs<wait_list_tag>(d_deps, num_deps);383vector_t region = { size, 1, 1 };384vector_t obj_origin = { offset };385auto obj_pitch = pitch(region, {{ 1 }});386387validate_common(q, deps);388validate_object(q, mem, obj_origin, obj_pitch, region);389validate_object(q, ptr, {}, obj_pitch, region);390validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);391392auto hev = create<hard_event>(393q, CL_COMMAND_WRITE_BUFFER, deps,394soft_copy_op(q, &mem, obj_origin, obj_pitch,395ptr, {}, obj_pitch,396region));397398if (blocking)399hev().wait_signalled();400401ret_object(rd_ev, hev);402return CL_SUCCESS;403404} catch (error &e) {405return e.get();406}407408CLOVER_API cl_int409clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,410const size_t *p_obj_origin,411const size_t *p_host_origin,412const size_t *p_region,413size_t obj_row_pitch, size_t obj_slice_pitch,414size_t host_row_pitch, size_t host_slice_pitch,415void *ptr,416cl_uint num_deps, const cl_event *d_deps,417cl_event *rd_ev) try {418auto &q = obj(d_q);419auto &mem = obj<buffer>(d_mem);420auto deps = objs<wait_list_tag>(d_deps, num_deps);421auto region = vector(p_region);422auto obj_origin = vector(p_obj_origin);423auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});424auto host_origin = vector(p_host_origin);425auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});426427validate_common(q, deps);428validate_object(q, ptr, host_origin, host_pitch, region);429validate_object(q, mem, obj_origin, obj_pitch, region);430validate_object_access(mem, CL_MEM_HOST_READ_ONLY);431432auto hev = create<hard_event>(433q, CL_COMMAND_READ_BUFFER_RECT, deps,434soft_copy_op(q, ptr, host_origin, host_pitch,435&mem, obj_origin, obj_pitch,436region));437438if (blocking)439hev().wait_signalled();440441ret_object(rd_ev, hev);442return CL_SUCCESS;443444} catch (error &e) {445return e.get();446}447448CLOVER_API cl_int449clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,450const size_t *p_obj_origin,451const size_t *p_host_origin,452const size_t *p_region,453size_t obj_row_pitch, size_t obj_slice_pitch,454size_t host_row_pitch, size_t host_slice_pitch,455const void *ptr,456cl_uint num_deps, const cl_event *d_deps,457cl_event *rd_ev) try {458auto &q = obj(d_q);459auto &mem = obj<buffer>(d_mem);460auto deps = objs<wait_list_tag>(d_deps, num_deps);461auto region = vector(p_region);462auto obj_origin = vector(p_obj_origin);463auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }});464auto host_origin = vector(p_host_origin);465auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }});466467validate_common(q, deps);468validate_object(q, mem, obj_origin, obj_pitch, region);469validate_object(q, ptr, host_origin, host_pitch, region);470validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY);471472auto hev = create<hard_event>(473q, CL_COMMAND_WRITE_BUFFER_RECT, deps,474soft_copy_op(q, &mem, obj_origin, obj_pitch,475ptr, host_origin, host_pitch,476region));477478if (blocking)479hev().wait_signalled();480481ret_object(rd_ev, hev);482return CL_SUCCESS;483484} catch (error &e) {485return e.get();486}487488CLOVER_API cl_int489clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem,490const void *pattern, size_t pattern_size,491size_t offset, size_t size,492cl_uint num_deps, const cl_event *d_deps,493cl_event *rd_ev) try {494auto &q = obj(d_queue);495auto &mem = obj<buffer>(d_mem);496auto deps = objs<wait_list_tag>(d_deps, num_deps);497vector_t region = { size, 1, 1 };498vector_t origin = { offset };499auto dst_pitch = pitch(region, {{ 1 }});500501validate_common(q, deps);502validate_object(q, mem, origin, dst_pitch, region);503504if (!pattern)505return CL_INVALID_VALUE;506507if (!util_is_power_of_two_nonzero(pattern_size) ||508pattern_size > 128 || size % pattern_size509|| offset % pattern_size) {510return CL_INVALID_VALUE;511}512513auto sub = dynamic_cast<sub_buffer *>(&mem);514if (sub && sub->offset() % q.device().mem_base_addr_align()) {515return CL_MISALIGNED_SUB_BUFFER_OFFSET;516}517518std::string data = std::string((char *)pattern, pattern_size);519auto hev = create<hard_event>(520q, CL_COMMAND_FILL_BUFFER, deps,521[=, &q, &mem](event &) {522mem.resource_in(q).clear(q, origin, region, data);523});524525ret_object(rd_ev, hev);526return CL_SUCCESS;527528} catch (error &e) {529return e.get();530}531532CLOVER_API cl_int533clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,534size_t src_offset, size_t dst_offset, size_t size,535cl_uint num_deps, const cl_event *d_deps,536cl_event *rd_ev) try {537auto &q = obj(d_q);538auto &src_mem = obj<buffer>(d_src_mem);539auto &dst_mem = obj<buffer>(d_dst_mem);540auto deps = objs<wait_list_tag>(d_deps, num_deps);541vector_t region = { size, 1, 1 };542vector_t dst_origin = { dst_offset };543auto dst_pitch = pitch(region, {{ 1 }});544vector_t src_origin = { src_offset };545auto src_pitch = pitch(region, {{ 1 }});546547validate_common(q, deps);548validate_object(q, dst_mem, dst_origin, dst_pitch, region);549validate_object(q, src_mem, src_origin, src_pitch, region);550validate_copy(q, dst_mem, dst_origin, dst_pitch,551src_mem, src_origin, src_pitch, region);552553auto hev = create<hard_event>(554q, CL_COMMAND_COPY_BUFFER, deps,555hard_copy_op(q, &dst_mem, dst_origin,556&src_mem, src_origin, region));557558ret_object(rd_ev, hev);559return CL_SUCCESS;560561} catch (error &e) {562return e.get();563}564565CLOVER_API cl_int566clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem,567cl_mem d_dst_mem,568const size_t *p_src_origin, const size_t *p_dst_origin,569const size_t *p_region,570size_t src_row_pitch, size_t src_slice_pitch,571size_t dst_row_pitch, size_t dst_slice_pitch,572cl_uint num_deps, const cl_event *d_deps,573cl_event *rd_ev) try {574auto &q = obj(d_q);575auto &src_mem = obj<buffer>(d_src_mem);576auto &dst_mem = obj<buffer>(d_dst_mem);577auto deps = objs<wait_list_tag>(d_deps, num_deps);578auto region = vector(p_region);579auto dst_origin = vector(p_dst_origin);580auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }});581auto src_origin = vector(p_src_origin);582auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }});583584validate_common(q, deps);585validate_object(q, dst_mem, dst_origin, dst_pitch, region);586validate_object(q, src_mem, src_origin, src_pitch, region);587validate_copy(q, dst_mem, dst_origin, dst_pitch,588src_mem, src_origin, src_pitch, region);589590auto hev = create<hard_event>(591q, CL_COMMAND_COPY_BUFFER_RECT, deps,592soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,593&src_mem, src_origin, src_pitch,594region));595596ret_object(rd_ev, hev);597return CL_SUCCESS;598599} catch (error &e) {600return e.get();601}602603CLOVER_API cl_int604clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,605const size_t *p_origin, const size_t *p_region,606size_t row_pitch, size_t slice_pitch, void *ptr,607cl_uint num_deps, const cl_event *d_deps,608cl_event *rd_ev) try {609auto &q = obj(d_q);610auto &img = obj<image>(d_mem);611auto deps = objs<wait_list_tag>(d_deps, num_deps);612auto region = vector(p_region);613auto dst_pitch = pitch(region, {{ img.pixel_size(),614row_pitch, slice_pitch }});615auto src_origin = vector(p_origin);616auto src_pitch = pitch(region, {{ img.pixel_size(),617img.row_pitch(), img.slice_pitch() }});618619validate_common(q, deps);620validate_object(q, ptr, {}, dst_pitch, region);621validate_object(q, img, src_origin, region);622validate_object_access(img, CL_MEM_HOST_READ_ONLY);623624auto hev = create<hard_event>(625q, CL_COMMAND_READ_IMAGE, deps,626soft_copy_op(q, ptr, {}, dst_pitch,627&img, src_origin, src_pitch,628region));629630if (blocking)631hev().wait_signalled();632633ret_object(rd_ev, hev);634return CL_SUCCESS;635636} catch (error &e) {637return e.get();638}639640CLOVER_API cl_int641clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,642const size_t *p_origin, const size_t *p_region,643size_t row_pitch, size_t slice_pitch, const void *ptr,644cl_uint num_deps, const cl_event *d_deps,645cl_event *rd_ev) try {646auto &q = obj(d_q);647auto &img = obj<image>(d_mem);648auto deps = objs<wait_list_tag>(d_deps, num_deps);649auto region = vector(p_region);650auto dst_origin = vector(p_origin);651auto dst_pitch = pitch(region, {{ img.pixel_size(),652img.row_pitch(), img.slice_pitch() }});653auto src_pitch = pitch(region, {{ img.pixel_size(),654row_pitch, slice_pitch }});655656validate_common(q, deps);657validate_object(q, img, dst_origin, region);658validate_object(q, ptr, {}, src_pitch, region);659validate_object_access(img, CL_MEM_HOST_WRITE_ONLY);660661auto hev = create<hard_event>(662q, CL_COMMAND_WRITE_IMAGE, deps,663soft_copy_op(q, &img, dst_origin, dst_pitch,664ptr, {}, src_pitch,665region));666667if (blocking)668hev().wait_signalled();669670ret_object(rd_ev, hev);671return CL_SUCCESS;672673} catch (error &e) {674return e.get();675}676677CLOVER_API cl_int678clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem,679const void *fill_color,680const size_t *p_origin, const size_t *p_region,681cl_uint num_deps, const cl_event *d_deps,682cl_event *rd_ev) try {683auto &q = obj(d_queue);684auto &img = obj<image>(d_mem);685auto deps = objs<wait_list_tag>(d_deps, num_deps);686auto origin = vector(p_origin);687auto region = vector(p_region);688689validate_common(q, deps);690validate_object(q, img, origin, region);691692if (!fill_color)693return CL_INVALID_VALUE;694695std::string data = std::string((char *)fill_color, sizeof(cl_uint4));696auto hev = create<hard_event>(697q, CL_COMMAND_FILL_IMAGE, deps,698[=, &q, &img](event &) {699img.resource_in(q).clear(q, origin, region, data);700});701702ret_object(rd_ev, hev);703return CL_SUCCESS;704705} catch (error &e) {706return e.get();707}708709CLOVER_API cl_int710clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem,711const size_t *p_src_origin, const size_t *p_dst_origin,712const size_t *p_region,713cl_uint num_deps, const cl_event *d_deps,714cl_event *rd_ev) try {715auto &q = obj(d_q);716auto &src_img = obj<image>(d_src_mem);717auto &dst_img = obj<image>(d_dst_mem);718auto deps = objs<wait_list_tag>(d_deps, num_deps);719auto region = vector(p_region);720auto dst_origin = vector(p_dst_origin);721auto src_origin = vector(p_src_origin);722723validate_common(q, deps);724validate_object(q, dst_img, dst_origin, region);725validate_object(q, src_img, src_origin, region);726validate_copy(q, dst_img, dst_origin, src_img, src_origin, region);727728auto hev = create<hard_event>(729q, CL_COMMAND_COPY_IMAGE, deps,730hard_copy_op(q, &dst_img, dst_origin,731&src_img, src_origin,732region));733734ret_object(rd_ev, hev);735return CL_SUCCESS;736737} catch (error &e) {738return e.get();739}740741CLOVER_API cl_int742clEnqueueCopyImageToBuffer(cl_command_queue d_q,743cl_mem d_src_mem, cl_mem d_dst_mem,744const size_t *p_src_origin, const size_t *p_region,745size_t dst_offset,746cl_uint num_deps, const cl_event *d_deps,747cl_event *rd_ev) try {748auto &q = obj(d_q);749auto &src_img = obj<image>(d_src_mem);750auto &dst_mem = obj<buffer>(d_dst_mem);751auto deps = objs<wait_list_tag>(d_deps, num_deps);752auto region = vector(p_region);753vector_t dst_origin = { dst_offset };754auto dst_pitch = pitch(region, {{ src_img.pixel_size() }});755auto src_origin = vector(p_src_origin);756auto src_pitch = pitch(region, {{ src_img.pixel_size(),757src_img.row_pitch(),758src_img.slice_pitch() }});759760validate_common(q, deps);761validate_object(q, dst_mem, dst_origin, dst_pitch, region);762validate_object(q, src_img, src_origin, region);763764auto hev = create<hard_event>(765q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps,766soft_copy_op(q, &dst_mem, dst_origin, dst_pitch,767&src_img, src_origin, src_pitch,768region));769770ret_object(rd_ev, hev);771return CL_SUCCESS;772773} catch (error &e) {774return e.get();775}776777CLOVER_API cl_int778clEnqueueCopyBufferToImage(cl_command_queue d_q,779cl_mem d_src_mem, cl_mem d_dst_mem,780size_t src_offset,781const size_t *p_dst_origin, const size_t *p_region,782cl_uint num_deps, const cl_event *d_deps,783cl_event *rd_ev) try {784auto &q = obj(d_q);785auto &src_mem = obj<buffer>(d_src_mem);786auto &dst_img = obj<image>(d_dst_mem);787auto deps = objs<wait_list_tag>(d_deps, num_deps);788auto region = vector(p_region);789auto dst_origin = vector(p_dst_origin);790auto dst_pitch = pitch(region, {{ dst_img.pixel_size(),791dst_img.row_pitch(),792dst_img.slice_pitch() }});793vector_t src_origin = { src_offset };794auto src_pitch = pitch(region, {{ dst_img.pixel_size() }});795796validate_common(q, deps);797validate_object(q, dst_img, dst_origin, region);798validate_object(q, src_mem, src_origin, src_pitch, region);799800auto hev = create<hard_event>(801q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps,802soft_copy_op(q, &dst_img, dst_origin, dst_pitch,803&src_mem, src_origin, src_pitch,804region));805806ret_object(rd_ev, hev);807return CL_SUCCESS;808809} catch (error &e) {810return e.get();811}812813CLOVER_API void *814clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,815cl_map_flags flags, size_t offset, size_t size,816cl_uint num_deps, const cl_event *d_deps,817cl_event *rd_ev, cl_int *r_errcode) try {818auto &q = obj(d_q);819auto &mem = obj<buffer>(d_mem);820auto deps = objs<wait_list_tag>(d_deps, num_deps);821vector_t region = { size, 1, 1 };822vector_t obj_origin = { offset };823auto obj_pitch = pitch(region, {{ 1 }});824825validate_common(q, deps);826validate_object(q, mem, obj_origin, obj_pitch, region);827validate_map_flags(mem, flags);828829auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region);830831auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps);832if (blocking)833hev().wait_signalled();834835ret_object(rd_ev, hev);836ret_error(r_errcode, CL_SUCCESS);837return *map;838839} catch (error &e) {840ret_error(r_errcode, e);841return NULL;842}843844CLOVER_API void *845clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking,846cl_map_flags flags,847const size_t *p_origin, const size_t *p_region,848size_t *row_pitch, size_t *slice_pitch,849cl_uint num_deps, const cl_event *d_deps,850cl_event *rd_ev, cl_int *r_errcode) try {851auto &q = obj(d_q);852auto &img = obj<image>(d_mem);853auto deps = objs<wait_list_tag>(d_deps, num_deps);854auto region = vector(p_region);855auto origin = vector(p_origin);856857validate_common(q, deps);858validate_object(q, img, origin, region);859validate_map_flags(img, flags);860861if (!row_pitch)862throw error(CL_INVALID_VALUE);863864if (img.slice_pitch() && !slice_pitch)865throw error(CL_INVALID_VALUE);866867auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region);868*row_pitch = map->pitch()[1];869if (slice_pitch)870*slice_pitch = map->pitch()[2];871872auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps);873if (blocking)874hev().wait_signalled();875876ret_object(rd_ev, hev);877ret_error(r_errcode, CL_SUCCESS);878return *map;879880} catch (error &e) {881ret_error(r_errcode, e);882return NULL;883}884885CLOVER_API cl_int886clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr,887cl_uint num_deps, const cl_event *d_deps,888cl_event *rd_ev) try {889auto &q = obj(d_q);890auto &mem = obj(d_mem);891auto deps = objs<wait_list_tag>(d_deps, num_deps);892893validate_common(q, deps);894895auto hev = create<hard_event>(896q, CL_COMMAND_UNMAP_MEM_OBJECT, deps,897[=, &q, &mem](event &) {898mem.resource_in(q).del_map(ptr);899});900901ret_object(rd_ev, hev);902return CL_SUCCESS;903904} catch (error &e) {905return e.get();906}907908CLOVER_API cl_int909clEnqueueMigrateMemObjects(cl_command_queue d_q,910cl_uint num_mems,911const cl_mem *d_mems,912cl_mem_migration_flags flags,913cl_uint num_deps,914const cl_event *d_deps,915cl_event *rd_ev) try {916auto &q = obj(d_q);917auto mems = objs<memory_obj>(d_mems, num_mems);918auto deps = objs<wait_list_tag>(d_deps, num_deps);919920validate_common(q, deps);921validate_mem_migration_flags(flags);922923if (any_of([&](const memory_obj &m) {924return m.context() != q.context();925}, mems))926throw error(CL_INVALID_CONTEXT);927928auto hev = create<hard_event>(929q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,930[=, &q](event &) {931for (auto &mem: mems) {932if (flags & CL_MIGRATE_MEM_OBJECT_HOST) {933if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED))934mem.resource_out(q);935936// For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be937// efficient we would need cl*ReadBuffer* to implement938// reading from host memory.939940} else {941if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)942mem.resource_undef(q);943else944mem.resource_in(q);945}946}947});948949ret_object(rd_ev, hev);950return CL_SUCCESS;;951952} catch (error &e) {953return e.get();954}955956cl_int957clover::EnqueueSVMFree(cl_command_queue d_q,958cl_uint num_svm_pointers,959void *svm_pointers[],960void (CL_CALLBACK *pfn_free_func) (961cl_command_queue queue, cl_uint num_svm_pointers,962void *svm_pointers[], void *user_data),963void *user_data,964cl_uint num_events_in_wait_list,965const cl_event *event_wait_list,966cl_event *event,967cl_int cmd) try {968969if (bool(num_svm_pointers) != bool(svm_pointers))970return CL_INVALID_VALUE;971972auto &q = obj(d_q);973974if (!q.device().svm_support())975return CL_INVALID_OPERATION;976977bool can_emulate = q.device().has_system_svm();978auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);979980validate_common(q, deps);981982std::vector<void *> svm_pointers_cpy(svm_pointers,983svm_pointers + num_svm_pointers);984if (!pfn_free_func) {985if (!can_emulate) {986CLOVER_NOT_SUPPORTED_UNTIL("2.0");987return CL_INVALID_VALUE;988}989pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers,990void *svm_pointers[], void *) {991clover::context &ctx = obj(d_q).context();992for (void *p : range(svm_pointers, num_svm_pointers)) {993ctx.remove_svm_allocation(p);994free(p);995}996};997}998999auto hev = create<hard_event>(q, cmd, deps,1000[=](clover::event &) mutable {1001pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(),1002user_data);1003});10041005ret_object(event, hev);1006return CL_SUCCESS;10071008} catch (error &e) {1009return e.get();1010}10111012CLOVER_API cl_int1013clEnqueueSVMFree(cl_command_queue d_q,1014cl_uint num_svm_pointers,1015void *svm_pointers[],1016void (CL_CALLBACK *pfn_free_func) (1017cl_command_queue queue, cl_uint num_svm_pointers,1018void *svm_pointers[], void *user_data),1019void *user_data,1020cl_uint num_events_in_wait_list,1021const cl_event *event_wait_list,1022cl_event *event) {10231024return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers,1025pfn_free_func, user_data, num_events_in_wait_list,1026event_wait_list, event, CL_COMMAND_SVM_FREE);1027}10281029cl_int1030clover::EnqueueSVMMemcpy(cl_command_queue d_q,1031cl_bool blocking_copy,1032void *dst_ptr,1033const void *src_ptr,1034size_t size,1035cl_uint num_events_in_wait_list,1036const cl_event *event_wait_list,1037cl_event *event,1038cl_int cmd) try {1039auto &q = obj(d_q);10401041if (!q.device().svm_support())1042return CL_INVALID_OPERATION;10431044if (dst_ptr == nullptr || src_ptr == nullptr)1045return CL_INVALID_VALUE;10461047if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) -1048reinterpret_cast<ptrdiff_t>(src_ptr))) < size)1049return CL_MEM_COPY_OVERLAP;105010511052bool can_emulate = q.device().has_system_svm();1053auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);10541055validate_common(q, deps);10561057if (can_emulate) {1058auto hev = create<hard_event>(q, cmd, deps,1059[=](clover::event &) {1060memcpy(dst_ptr, src_ptr, size);1061});10621063if (blocking_copy)1064hev().wait();1065ret_object(event, hev);1066return CL_SUCCESS;1067}10681069CLOVER_NOT_SUPPORTED_UNTIL("2.0");1070return CL_INVALID_VALUE;10711072} catch (error &e) {1073return e.get();1074}10751076CLOVER_API cl_int1077clEnqueueSVMMemcpy(cl_command_queue d_q,1078cl_bool blocking_copy,1079void *dst_ptr,1080const void *src_ptr,1081size_t size,1082cl_uint num_events_in_wait_list,1083const cl_event *event_wait_list,1084cl_event *event) {10851086return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr,1087size, num_events_in_wait_list, event_wait_list,1088event, CL_COMMAND_SVM_MEMCPY);1089}10901091cl_int1092clover::EnqueueSVMMemFill(cl_command_queue d_q,1093void *svm_ptr,1094const void *pattern,1095size_t pattern_size,1096size_t size,1097cl_uint num_events_in_wait_list,1098const cl_event *event_wait_list,1099cl_event *event,1100cl_int cmd) try {1101auto &q = obj(d_q);11021103if (!q.device().svm_support())1104return CL_INVALID_OPERATION;11051106if (svm_ptr == nullptr || pattern == nullptr ||1107!util_is_power_of_two_nonzero(pattern_size) ||1108pattern_size > 128 ||1109!ptr_is_aligned(svm_ptr, pattern_size) ||1110size % pattern_size)1111return CL_INVALID_VALUE;11121113bool can_emulate = q.device().has_system_svm();1114auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);11151116validate_common(q, deps);11171118if (can_emulate) {1119auto hev = create<hard_event>(q, cmd, deps,1120[=](clover::event &) {1121void *ptr = svm_ptr;1122for (size_t s = size; s; s -= pattern_size) {1123memcpy(ptr, pattern, pattern_size);1124ptr = static_cast<uint8_t*>(ptr) + pattern_size;1125}1126});11271128ret_object(event, hev);1129return CL_SUCCESS;1130}11311132CLOVER_NOT_SUPPORTED_UNTIL("2.0");1133return CL_INVALID_VALUE;11341135} catch (error &e) {1136return e.get();1137}11381139CLOVER_API cl_int1140clEnqueueSVMMemFill(cl_command_queue d_q,1141void *svm_ptr,1142const void *pattern,1143size_t pattern_size,1144size_t size,1145cl_uint num_events_in_wait_list,1146const cl_event *event_wait_list,1147cl_event *event) {11481149return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size,1150size, num_events_in_wait_list, event_wait_list,1151event, CL_COMMAND_SVM_MEMFILL);1152}11531154cl_int1155clover::EnqueueSVMMap(cl_command_queue d_q,1156cl_bool blocking_map,1157cl_map_flags map_flags,1158void *svm_ptr,1159size_t size,1160cl_uint num_events_in_wait_list,1161const cl_event *event_wait_list,1162cl_event *event,1163cl_int cmd) try {1164auto &q = obj(d_q);11651166if (!q.device().svm_support())1167return CL_INVALID_OPERATION;11681169if (svm_ptr == nullptr || size == 0)1170return CL_INVALID_VALUE;11711172bool can_emulate = q.device().has_system_svm();1173auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);11741175validate_common(q, deps);11761177if (can_emulate) {1178auto hev = create<hard_event>(q, cmd, deps,1179[](clover::event &) { });11801181ret_object(event, hev);1182return CL_SUCCESS;1183}11841185CLOVER_NOT_SUPPORTED_UNTIL("2.0");1186return CL_INVALID_VALUE;11871188} catch (error &e) {1189return e.get();1190}11911192CLOVER_API cl_int1193clEnqueueSVMMap(cl_command_queue d_q,1194cl_bool blocking_map,1195cl_map_flags map_flags,1196void *svm_ptr,1197size_t size,1198cl_uint num_events_in_wait_list,1199const cl_event *event_wait_list,1200cl_event *event) {12011202return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size,1203num_events_in_wait_list, event_wait_list, event,1204CL_COMMAND_SVM_MAP);1205}12061207cl_int1208clover::EnqueueSVMUnmap(cl_command_queue d_q,1209void *svm_ptr,1210cl_uint num_events_in_wait_list,1211const cl_event *event_wait_list,1212cl_event *event,1213cl_int cmd) try {1214auto &q = obj(d_q);12151216if (!q.device().svm_support())1217return CL_INVALID_OPERATION;12181219if (svm_ptr == nullptr)1220return CL_INVALID_VALUE;12211222bool can_emulate = q.device().has_system_svm();1223auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list);12241225validate_common(q, deps);12261227if (can_emulate) {1228auto hev = create<hard_event>(q, cmd, deps,1229[](clover::event &) { });12301231ret_object(event, hev);1232return CL_SUCCESS;1233}12341235CLOVER_NOT_SUPPORTED_UNTIL("2.0");1236return CL_INVALID_VALUE;12371238} catch (error &e) {1239return e.get();1240}12411242CLOVER_API cl_int1243clEnqueueSVMUnmap(cl_command_queue d_q,1244void *svm_ptr,1245cl_uint num_events_in_wait_list,1246const cl_event *event_wait_list,1247cl_event *event) {12481249return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list,1250event_wait_list, event, CL_COMMAND_SVM_UNMAP);1251}12521253CLOVER_API cl_int1254clEnqueueSVMMigrateMem(cl_command_queue d_q,1255cl_uint num_svm_pointers,1256const void **svm_pointers,1257const size_t *sizes,1258const cl_mem_migration_flags flags,1259cl_uint num_deps,1260const cl_event *d_deps,1261cl_event *rd_ev) try {1262auto &q = obj(d_q);1263auto deps = objs<wait_list_tag>(d_deps, num_deps);12641265validate_common(q, deps);1266validate_mem_migration_flags(flags);12671268if (!q.device().svm_support())1269return CL_INVALID_OPERATION;12701271if (!num_svm_pointers || !svm_pointers)1272return CL_INVALID_VALUE;12731274std::vector<size_t> sizes_copy(num_svm_pointers);1275std::vector<const void*> ptrs(num_svm_pointers);12761277for (unsigned i = 0; i < num_svm_pointers; ++i) {1278const void *ptr = svm_pointers[i];1279size_t size = sizes ? sizes[i] : 0;1280if (!ptr)1281return CL_INVALID_VALUE;12821283auto p = q.context().find_svm_allocation(ptr);1284if (!p.first)1285return CL_INVALID_VALUE;12861287std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first;1288if (size && size + pdiff > p.second)1289return CL_INVALID_VALUE;12901291sizes_copy[i] = size ? size : p.second;1292ptrs[i] = size ? svm_pointers[i] : p.first;1293}12941295auto hev = create<hard_event>(1296q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps,1297[=, &q](event &) {1298q.svm_migrate(ptrs, sizes_copy, flags);1299});13001301ret_object(rd_ev, hev);1302return CL_SUCCESS;13031304} catch (error &e) {1305return e.get();1306}130713081309