Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/frontends/clover/core/kernel.cpp
4572 views
1
//
2
// Copyright 2012 Francisco Jerez
3
//
4
// Permission is hereby granted, free of charge, to any person obtaining a
5
// copy of this software and associated documentation files (the "Software"),
6
// to deal in the Software without restriction, including without limitation
7
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
// and/or sell copies of the Software, and to permit persons to whom the
9
// Software is furnished to do so, subject to the following conditions:
10
//
11
// The above copyright notice and this permission notice shall be included in
12
// all copies or substantial portions of the Software.
13
//
14
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
// OTHER DEALINGS IN THE SOFTWARE.
21
//
22
23
#include "core/kernel.hpp"
24
#include "core/resource.hpp"
25
#include "util/factor.hpp"
26
#include "util/u_math.h"
27
#include "pipe/p_context.h"
28
29
using namespace clover;
30
31
kernel::kernel(clover::program &prog, const std::string &name,
32
const std::vector<module::argument> &margs) :
33
program(prog), _name(name), exec(*this),
34
program_ref(prog._kernel_ref_counter) {
35
for (auto &marg : margs) {
36
if (marg.semantic == module::argument::general)
37
_args.emplace_back(argument::create(marg));
38
}
39
for (auto &dev : prog.devices()) {
40
auto &m = prog.build(dev).binary;
41
auto msym = find(name_equals(name), m.syms);
42
const auto f = id_type_equals(msym.section, module::section::data_constant);
43
if (!any_of(f, m.secs))
44
continue;
45
46
auto mconst = find(f, m.secs);
47
auto rb = std::make_unique<root_buffer>(prog.context(), std::vector<cl_mem_properties>(),
48
CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY,
49
mconst.size, mconst.data.data());
50
_constant_buffers.emplace(&dev, std::move(rb));
51
}
52
}
53
54
template<typename V>
55
static inline std::vector<uint>
56
pad_vector(command_queue &q, const V &v, uint x) {
57
std::vector<uint> w { v.begin(), v.end() };
58
w.resize(q.device().max_block_size().size(), x);
59
return w;
60
}
61
62
void
63
kernel::launch(command_queue &q,
64
const std::vector<size_t> &grid_offset,
65
const std::vector<size_t> &grid_size,
66
const std::vector<size_t> &block_size) {
67
const auto m = program().build(q.device()).binary;
68
const auto reduced_grid_size =
69
map(divides(), grid_size, block_size);
70
void *st = exec.bind(&q, grid_offset);
71
struct pipe_grid_info info = {};
72
73
// The handles are created during exec_context::bind(), so we need make
74
// sure to call exec_context::bind() before retrieving them.
75
std::vector<uint32_t *> g_handles = map([&](size_t h) {
76
return (uint32_t *)&exec.input[h];
77
}, exec.g_handles);
78
79
q.pipe->bind_compute_state(q.pipe, st);
80
q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE,
81
0, exec.samplers.size(),
82
exec.samplers.data());
83
84
q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
85
exec.sviews.size(), 0, exec.sviews.data());
86
q.pipe->set_shader_images(q.pipe, PIPE_SHADER_COMPUTE, 0,
87
exec.iviews.size(), 0, exec.iviews.data());
88
q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(),
89
exec.resources.data());
90
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(),
91
exec.g_buffers.data(), g_handles.data());
92
93
// Fill information for the launch_grid() call.
94
info.work_dim = grid_size.size();
95
copy(pad_vector(q, block_size, 1), info.block);
96
copy(pad_vector(q, reduced_grid_size, 1), info.grid);
97
info.pc = find(name_equals(_name), m.syms).offset;
98
info.input = exec.input.data();
99
100
q.pipe->launch_grid(q.pipe, &info);
101
102
q.pipe->set_global_binding(q.pipe, 0, exec.g_buffers.size(), NULL, NULL);
103
q.pipe->set_compute_resources(q.pipe, 0, exec.resources.size(), NULL);
104
q.pipe->set_shader_images(q.pipe, PIPE_SHADER_COMPUTE, 0,
105
0, exec.iviews.size(), NULL);
106
q.pipe->set_sampler_views(q.pipe, PIPE_SHADER_COMPUTE, 0,
107
0, exec.sviews.size(), NULL);
108
q.pipe->bind_sampler_states(q.pipe, PIPE_SHADER_COMPUTE, 0,
109
exec.samplers.size(), NULL);
110
111
q.pipe->memory_barrier(q.pipe, PIPE_BARRIER_GLOBAL_BUFFER);
112
exec.unbind();
113
}
114
115
size_t
116
kernel::mem_local() const {
117
size_t sz = 0;
118
119
for (auto &arg : args()) {
120
if (dynamic_cast<local_argument *>(&arg))
121
sz += arg.storage();
122
}
123
124
return sz;
125
}
126
127
size_t
128
kernel::mem_private() const {
129
return 0;
130
}
131
132
const std::string &
133
kernel::name() const {
134
return _name;
135
}
136
137
std::vector<size_t>
138
kernel::optimal_block_size(const command_queue &q,
139
const std::vector<size_t> &grid_size) const {
140
return factor::find_grid_optimal_factor<size_t>(
141
q.device().max_threads_per_block(), q.device().max_block_size(),
142
grid_size);
143
}
144
145
std::vector<size_t>
146
kernel::required_block_size() const {
147
return find(name_equals(_name), program().symbols()).reqd_work_group_size;
148
}
149
150
kernel::argument_range
151
kernel::args() {
152
return map(derefs(), _args);
153
}
154
155
kernel::const_argument_range
156
kernel::args() const {
157
return map(derefs(), _args);
158
}
159
160
std::vector<clover::module::arg_info>
161
kernel::args_infos() {
162
std::vector<clover::module::arg_info> infos;
163
for (auto &marg: find(name_equals(_name), program().symbols()).args)
164
if (marg.semantic == clover::module::argument::general)
165
infos.emplace_back(marg.info);
166
167
return infos;
168
}
169
170
const module &
171
kernel::module(const command_queue &q) const {
172
return program().build(q.device()).binary;
173
}
174
175
kernel::exec_context::exec_context(kernel &kern) :
176
kern(kern), q(NULL), print_handler(), mem_local(0), st(NULL), cs() {
177
}
178
179
kernel::exec_context::~exec_context() {
180
if (st)
181
q->pipe->delete_compute_state(q->pipe, st);
182
}
183
184
void *
185
kernel::exec_context::bind(intrusive_ptr<command_queue> _q,
186
const std::vector<size_t> &grid_offset) {
187
std::swap(q, _q);
188
189
// Bind kernel arguments.
190
auto &m = kern.program().build(q->device()).binary;
191
auto msym = find(name_equals(kern.name()), m.syms);
192
auto margs = msym.args;
193
auto msec = find(id_type_equals(msym.section, module::section::text_executable), m.secs);
194
auto explicit_arg = kern._args.begin();
195
196
for (auto &marg : margs) {
197
switch (marg.semantic) {
198
case module::argument::general:
199
(*(explicit_arg++))->bind(*this, marg);
200
break;
201
202
case module::argument::grid_dimension: {
203
const cl_uint dimension = grid_offset.size();
204
auto arg = argument::create(marg);
205
206
arg->set(sizeof(dimension), &dimension);
207
arg->bind(*this, marg);
208
break;
209
}
210
case module::argument::grid_offset: {
211
for (cl_uint x : pad_vector(*q, grid_offset, 0)) {
212
auto arg = argument::create(marg);
213
214
arg->set(sizeof(x), &x);
215
arg->bind(*this, marg);
216
}
217
break;
218
}
219
case module::argument::image_size: {
220
auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
221
std::vector<cl_uint> image_size{
222
static_cast<cl_uint>(img->width()),
223
static_cast<cl_uint>(img->height()),
224
static_cast<cl_uint>(img->depth())};
225
for (auto x : image_size) {
226
auto arg = argument::create(marg);
227
228
arg->set(sizeof(x), &x);
229
arg->bind(*this, marg);
230
}
231
break;
232
}
233
case module::argument::image_format: {
234
auto img = dynamic_cast<image_argument &>(**(explicit_arg - 1)).get();
235
cl_image_format fmt = img->format();
236
std::vector<cl_uint> image_format{
237
static_cast<cl_uint>(fmt.image_channel_data_type),
238
static_cast<cl_uint>(fmt.image_channel_order)};
239
for (auto x : image_format) {
240
auto arg = argument::create(marg);
241
242
arg->set(sizeof(x), &x);
243
arg->bind(*this, marg);
244
}
245
break;
246
}
247
case module::argument::constant_buffer: {
248
auto arg = argument::create(marg);
249
cl_mem buf = kern._constant_buffers.at(&q->device()).get();
250
arg->set(sizeof(buf), &buf);
251
arg->bind(*this, marg);
252
break;
253
}
254
case module::argument::printf_buffer: {
255
print_handler = printf_handler::create(q, m.printf_infos,
256
m.printf_strings_in_buffer,
257
q->device().max_printf_buffer_size());
258
cl_mem print_mem = print_handler->get_mem();
259
260
auto arg = argument::create(marg);
261
arg->set(sizeof(cl_mem), &print_mem);
262
arg->bind(*this, marg);
263
break;
264
}
265
}
266
}
267
268
// Create a new compute state if anything changed.
269
if (!st || q != _q ||
270
cs.req_local_mem != mem_local ||
271
cs.req_input_mem != input.size()) {
272
if (st)
273
_q->pipe->delete_compute_state(_q->pipe, st);
274
275
cs.ir_type = q->device().ir_format();
276
cs.prog = &(msec.data[0]);
277
cs.req_local_mem = mem_local;
278
cs.req_input_mem = input.size();
279
st = q->pipe->create_compute_state(q->pipe, &cs);
280
if (!st) {
281
unbind(); // Cleanup
282
throw error(CL_OUT_OF_RESOURCES);
283
}
284
}
285
286
return st;
287
}
288
289
void
290
kernel::exec_context::unbind() {
291
if (print_handler)
292
print_handler->print();
293
294
for (auto &arg : kern.args())
295
arg.unbind(*this);
296
297
input.clear();
298
samplers.clear();
299
sviews.clear();
300
iviews.clear();
301
resources.clear();
302
g_buffers.clear();
303
g_handles.clear();
304
mem_local = 0;
305
}
306
307
namespace {
308
template<typename T>
309
std::vector<uint8_t>
310
bytes(const T& x) {
311
return { (uint8_t *)&x, (uint8_t *)&x + sizeof(x) };
312
}
313
314
///
315
/// Transform buffer \a v from the native byte order into the byte
316
/// order specified by \a e.
317
///
318
template<typename T>
319
void
320
byteswap(T &v, pipe_endian e) {
321
if (PIPE_ENDIAN_NATIVE != e)
322
std::reverse(v.begin(), v.end());
323
}
324
325
///
326
/// Pad buffer \a v to the next multiple of \a n.
327
///
328
template<typename T>
329
void
330
align(T &v, size_t n) {
331
v.resize(util_align_npot(v.size(), n));
332
}
333
334
bool
335
msb(const std::vector<uint8_t> &s) {
336
if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
337
return s.back() & 0x80;
338
else
339
return s.front() & 0x80;
340
}
341
342
///
343
/// Resize buffer \a v to size \a n using sign or zero extension
344
/// according to \a ext.
345
///
346
template<typename T>
347
void
348
extend(T &v, enum module::argument::ext_type ext, size_t n) {
349
const size_t m = std::min(v.size(), n);
350
const bool sign_ext = (ext == module::argument::sign_ext);
351
const uint8_t fill = (sign_ext && msb(v) ? ~0 : 0);
352
T w(n, fill);
353
354
if (PIPE_ENDIAN_NATIVE == PIPE_ENDIAN_LITTLE)
355
std::copy_n(v.begin(), m, w.begin());
356
else
357
std::copy_n(v.end() - m, m, w.end() - m);
358
359
std::swap(v, w);
360
}
361
362
///
363
/// Append buffer \a w to \a v.
364
///
365
template<typename T>
366
void
367
insert(T &v, const T &w) {
368
v.insert(v.end(), w.begin(), w.end());
369
}
370
371
///
372
/// Append \a n elements to the end of buffer \a v.
373
///
374
template<typename T>
375
size_t
376
allocate(T &v, size_t n) {
377
size_t pos = v.size();
378
v.resize(pos + n);
379
return pos;
380
}
381
}
382
383
std::unique_ptr<kernel::argument>
384
kernel::argument::create(const module::argument &marg) {
385
switch (marg.type) {
386
case module::argument::scalar:
387
return std::unique_ptr<kernel::argument>(new scalar_argument(marg.size));
388
389
case module::argument::global:
390
return std::unique_ptr<kernel::argument>(new global_argument);
391
392
case module::argument::local:
393
return std::unique_ptr<kernel::argument>(new local_argument);
394
395
case module::argument::constant:
396
return std::unique_ptr<kernel::argument>(new constant_argument);
397
398
case module::argument::image_rd:
399
return std::unique_ptr<kernel::argument>(new image_rd_argument);
400
401
case module::argument::image_wr:
402
return std::unique_ptr<kernel::argument>(new image_wr_argument);
403
404
case module::argument::sampler:
405
return std::unique_ptr<kernel::argument>(new sampler_argument);
406
407
}
408
throw error(CL_INVALID_KERNEL_DEFINITION);
409
}
410
411
kernel::argument::argument() : _set(false) {
412
}
413
414
bool
415
kernel::argument::set() const {
416
return _set;
417
}
418
419
size_t
420
kernel::argument::storage() const {
421
return 0;
422
}
423
424
kernel::scalar_argument::scalar_argument(size_t size) : size(size) {
425
}
426
427
void
428
kernel::scalar_argument::set(size_t size, const void *value) {
429
if (!value)
430
throw error(CL_INVALID_ARG_VALUE);
431
432
if (size != this->size)
433
throw error(CL_INVALID_ARG_SIZE);
434
435
v = { (uint8_t *)value, (uint8_t *)value + size };
436
_set = true;
437
}
438
439
void
440
kernel::scalar_argument::bind(exec_context &ctx,
441
const module::argument &marg) {
442
auto w = v;
443
444
extend(w, marg.ext_type, marg.target_size);
445
byteswap(w, ctx.q->device().endianness());
446
align(ctx.input, marg.target_align);
447
insert(ctx.input, w);
448
}
449
450
void
451
kernel::scalar_argument::unbind(exec_context &ctx) {
452
}
453
454
kernel::global_argument::global_argument() : buf(nullptr), svm(nullptr) {
455
}
456
457
void
458
kernel::global_argument::set(size_t size, const void *value) {
459
if (size != sizeof(cl_mem))
460
throw error(CL_INVALID_ARG_SIZE);
461
462
buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
463
svm = nullptr;
464
_set = true;
465
}
466
467
void
468
kernel::global_argument::set_svm(const void *value) {
469
svm = value;
470
buf = nullptr;
471
_set = true;
472
}
473
474
void
475
kernel::global_argument::bind(exec_context &ctx,
476
const module::argument &marg) {
477
align(ctx.input, marg.target_align);
478
479
if (buf) {
480
const resource &r = buf->resource_in(*ctx.q);
481
ctx.g_handles.push_back(ctx.input.size());
482
ctx.g_buffers.push_back(r.pipe);
483
484
// How to handle multi-demensional offsets?
485
// We don't need to. Buffer offsets are always
486
// one-dimensional.
487
auto v = bytes(r.offset[0]);
488
extend(v, marg.ext_type, marg.target_size);
489
byteswap(v, ctx.q->device().endianness());
490
insert(ctx.input, v);
491
} else if (svm) {
492
auto v = bytes(svm);
493
extend(v, marg.ext_type, marg.target_size);
494
byteswap(v, ctx.q->device().endianness());
495
insert(ctx.input, v);
496
} else {
497
// Null pointer.
498
allocate(ctx.input, marg.target_size);
499
}
500
}
501
502
void
503
kernel::global_argument::unbind(exec_context &ctx) {
504
}
505
506
size_t
507
kernel::local_argument::storage() const {
508
return _storage;
509
}
510
511
void
512
kernel::local_argument::set(size_t size, const void *value) {
513
if (value)
514
throw error(CL_INVALID_ARG_VALUE);
515
516
if (!size)
517
throw error(CL_INVALID_ARG_SIZE);
518
519
_storage = size;
520
_set = true;
521
}
522
523
void
524
kernel::local_argument::bind(exec_context &ctx,
525
const module::argument &marg) {
526
auto v = bytes(ctx.mem_local);
527
528
extend(v, module::argument::zero_ext, marg.target_size);
529
byteswap(v, ctx.q->device().endianness());
530
align(ctx.input, marg.target_align);
531
insert(ctx.input, v);
532
533
ctx.mem_local += _storage;
534
}
535
536
void
537
kernel::local_argument::unbind(exec_context &ctx) {
538
}
539
540
kernel::constant_argument::constant_argument() : buf(nullptr), st(nullptr) {
541
}
542
543
void
544
kernel::constant_argument::set(size_t size, const void *value) {
545
if (size != sizeof(cl_mem))
546
throw error(CL_INVALID_ARG_SIZE);
547
548
buf = pobj<buffer>(value ? *(cl_mem *)value : NULL);
549
_set = true;
550
}
551
552
void
553
kernel::constant_argument::bind(exec_context &ctx,
554
const module::argument &marg) {
555
align(ctx.input, marg.target_align);
556
557
if (buf) {
558
resource &r = buf->resource_in(*ctx.q);
559
auto v = bytes(ctx.resources.size() << 24 | r.offset[0]);
560
561
extend(v, module::argument::zero_ext, marg.target_size);
562
byteswap(v, ctx.q->device().endianness());
563
insert(ctx.input, v);
564
565
st = r.bind_surface(*ctx.q, false);
566
ctx.resources.push_back(st);
567
} else {
568
// Null pointer.
569
allocate(ctx.input, marg.target_size);
570
}
571
}
572
573
void
574
kernel::constant_argument::unbind(exec_context &ctx) {
575
if (buf)
576
buf->resource_in(*ctx.q).unbind_surface(*ctx.q, st);
577
}
578
579
void
580
kernel::image_rd_argument::set(size_t size, const void *value) {
581
if (!value)
582
throw error(CL_INVALID_ARG_VALUE);
583
584
if (size != sizeof(cl_mem))
585
throw error(CL_INVALID_ARG_SIZE);
586
587
img = &obj<image>(*(cl_mem *)value);
588
_set = true;
589
}
590
591
void
592
kernel::image_rd_argument::bind(exec_context &ctx,
593
const module::argument &marg) {
594
auto v = bytes(ctx.sviews.size());
595
596
extend(v, module::argument::zero_ext, marg.target_size);
597
byteswap(v, ctx.q->device().endianness());
598
align(ctx.input, marg.target_align);
599
insert(ctx.input, v);
600
601
st = img->resource_in(*ctx.q).bind_sampler_view(*ctx.q);
602
ctx.sviews.push_back(st);
603
}
604
605
void
606
kernel::image_rd_argument::unbind(exec_context &ctx) {
607
img->resource_in(*ctx.q).unbind_sampler_view(*ctx.q, st);
608
}
609
610
void
611
kernel::image_wr_argument::set(size_t size, const void *value) {
612
if (!value)
613
throw error(CL_INVALID_ARG_VALUE);
614
615
if (size != sizeof(cl_mem))
616
throw error(CL_INVALID_ARG_SIZE);
617
618
img = &obj<image>(*(cl_mem *)value);
619
_set = true;
620
}
621
622
void
623
kernel::image_wr_argument::bind(exec_context &ctx,
624
const module::argument &marg) {
625
auto v = bytes(ctx.iviews.size());
626
627
extend(v, module::argument::zero_ext, marg.target_size);
628
byteswap(v, ctx.q->device().endianness());
629
align(ctx.input, marg.target_align);
630
insert(ctx.input, v);
631
ctx.iviews.push_back(img->resource_in(*ctx.q).create_image_view(*ctx.q));
632
}
633
634
void
635
kernel::image_wr_argument::unbind(exec_context &ctx) {
636
}
637
638
kernel::sampler_argument::sampler_argument() : s(nullptr), st(nullptr) {
639
}
640
641
void
642
kernel::sampler_argument::set(size_t size, const void *value) {
643
if (!value)
644
throw error(CL_INVALID_SAMPLER);
645
646
if (size != sizeof(cl_sampler))
647
throw error(CL_INVALID_ARG_SIZE);
648
649
s = &obj(*(cl_sampler *)value);
650
_set = true;
651
}
652
653
void
654
kernel::sampler_argument::bind(exec_context &ctx,
655
const module::argument &marg) {
656
st = s->bind(*ctx.q);
657
ctx.samplers.push_back(st);
658
}
659
660
void
661
kernel::sampler_argument::unbind(exec_context &ctx) {
662
s->unbind(*ctx.q, st);
663
}
664
665