Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/microsoft/clc/compute_test.cpp
4560 views
1
/*
2
* Copyright © Microsoft Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <stdio.h>
25
#include <stdint.h>
26
#include <stdexcept>
27
28
#include <directx/d3d12.h>
29
#include <dxgi1_4.h>
30
#include <gtest/gtest.h>
31
#include <wrl.h>
32
33
#include "util/u_debug.h"
34
#include "clc_compiler.h"
35
#include "compute_test.h"
36
#include "dxcapi.h"
37
38
using std::runtime_error;
39
using Microsoft::WRL::ComPtr;
40
41
enum compute_test_debug_flags {
42
COMPUTE_DEBUG_EXPERIMENTAL_SHADERS = 1 << 0,
43
COMPUTE_DEBUG_USE_HW_D3D = 1 << 1,
44
COMPUTE_DEBUG_OPTIMIZE_LIBCLC = 1 << 2,
45
COMPUTE_DEBUG_SERIALIZE_LIBCLC = 1 << 3,
46
};
47
48
static const struct debug_named_value compute_debug_options[] = {
49
{ "experimental_shaders", COMPUTE_DEBUG_EXPERIMENTAL_SHADERS, "Enable experimental shaders" },
50
{ "use_hw_d3d", COMPUTE_DEBUG_USE_HW_D3D, "Use a hardware D3D device" },
51
{ "optimize_libclc", COMPUTE_DEBUG_OPTIMIZE_LIBCLC, "Optimize the clc_context before using it" },
52
{ "serialize_libclc", COMPUTE_DEBUG_SERIALIZE_LIBCLC, "Serialize and deserialize the clc_context" },
53
DEBUG_NAMED_VALUE_END
54
};
55
56
DEBUG_GET_ONCE_FLAGS_OPTION(debug_compute, "COMPUTE_TEST_DEBUG", compute_debug_options, 0)
57
58
static void warning_callback(void *priv, const char *msg)
59
{
60
fprintf(stderr, "WARNING: %s\n", msg);
61
}
62
63
static void error_callback(void *priv, const char *msg)
64
{
65
fprintf(stderr, "ERROR: %s\n", msg);
66
}
67
68
static const struct clc_logger logger = {
69
NULL,
70
error_callback,
71
warning_callback,
72
};
73
74
void
75
ComputeTest::enable_d3d12_debug_layer()
76
{
77
HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
78
if (!hD3D12Mod) {
79
fprintf(stderr, "D3D12: failed to load D3D12.DLL\n");
80
return;
81
}
82
83
typedef HRESULT(WINAPI * PFN_D3D12_GET_DEBUG_INTERFACE)(REFIID riid,
84
void **ppFactory);
85
PFN_D3D12_GET_DEBUG_INTERFACE D3D12GetDebugInterface = (PFN_D3D12_GET_DEBUG_INTERFACE)GetProcAddress(hD3D12Mod, "D3D12GetDebugInterface");
86
if (!D3D12GetDebugInterface) {
87
fprintf(stderr, "D3D12: failed to load D3D12GetDebugInterface from D3D12.DLL\n");
88
return;
89
}
90
91
ID3D12Debug *debug;
92
if (FAILED(D3D12GetDebugInterface(__uuidof(ID3D12Debug), (void **)& debug))) {
93
fprintf(stderr, "D3D12: D3D12GetDebugInterface failed\n");
94
return;
95
}
96
97
debug->EnableDebugLayer();
98
}
99
100
IDXGIFactory4 *
101
ComputeTest::get_dxgi_factory()
102
{
103
static const GUID IID_IDXGIFactory4 = {
104
0x1bc6ea02, 0xef36, 0x464f,
105
{ 0xbf, 0x0c, 0x21, 0xca, 0x39, 0xe5, 0x16, 0x8a }
106
};
107
108
typedef HRESULT(WINAPI * PFN_CREATE_DXGI_FACTORY)(REFIID riid,
109
void **ppFactory);
110
PFN_CREATE_DXGI_FACTORY CreateDXGIFactory;
111
112
HMODULE hDXGIMod = LoadLibrary("DXGI.DLL");
113
if (!hDXGIMod)
114
throw runtime_error("Failed to load DXGI.DLL");
115
116
CreateDXGIFactory = (PFN_CREATE_DXGI_FACTORY)GetProcAddress(hDXGIMod, "CreateDXGIFactory");
117
if (!CreateDXGIFactory)
118
throw runtime_error("Failed to load CreateDXGIFactory from DXGI.DLL");
119
120
IDXGIFactory4 *factory = NULL;
121
HRESULT hr = CreateDXGIFactory(IID_IDXGIFactory4, (void **)&factory);
122
if (FAILED(hr))
123
throw runtime_error("CreateDXGIFactory failed");
124
125
return factory;
126
}
127
128
IDXGIAdapter1 *
129
ComputeTest::choose_adapter(IDXGIFactory4 *factory)
130
{
131
IDXGIAdapter1 *ret;
132
133
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_USE_HW_D3D) {
134
for (unsigned i = 0; SUCCEEDED(factory->EnumAdapters1(i, &ret)); i++) {
135
DXGI_ADAPTER_DESC1 desc;
136
ret->GetDesc1(&desc);
137
if (!(desc.Flags & D3D_DRIVER_TYPE_SOFTWARE))
138
return ret;
139
}
140
throw runtime_error("Failed to enum hardware adapter");
141
} else {
142
if (FAILED(factory->EnumWarpAdapter(__uuidof(IDXGIAdapter1),
143
(void **)& ret)))
144
throw runtime_error("Failed to enum warp adapter");
145
return ret;
146
}
147
}
148
149
ID3D12Device *
150
ComputeTest::create_device(IDXGIAdapter1 *adapter)
151
{
152
typedef HRESULT(WINAPI *PFN_D3D12CREATEDEVICE)(IUnknown *, D3D_FEATURE_LEVEL, REFIID, void **);
153
PFN_D3D12CREATEDEVICE D3D12CreateDevice;
154
155
HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
156
if (!hD3D12Mod)
157
throw runtime_error("failed to load D3D12.DLL");
158
159
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS) {
160
typedef HRESULT(WINAPI *PFN_D3D12ENABLEEXPERIMENTALFEATURES)(UINT, const IID *, void *, UINT *);
161
PFN_D3D12ENABLEEXPERIMENTALFEATURES D3D12EnableExperimentalFeatures;
162
D3D12EnableExperimentalFeatures = (PFN_D3D12ENABLEEXPERIMENTALFEATURES)
163
GetProcAddress(hD3D12Mod, "D3D12EnableExperimentalFeatures");
164
if (FAILED(D3D12EnableExperimentalFeatures(1, &D3D12ExperimentalShaderModels, NULL, NULL)))
165
throw runtime_error("failed to enable experimental shader models");
166
}
167
168
D3D12CreateDevice = (PFN_D3D12CREATEDEVICE)GetProcAddress(hD3D12Mod, "D3D12CreateDevice");
169
if (!D3D12CreateDevice)
170
throw runtime_error("failed to load D3D12CreateDevice from D3D12.DLL");
171
172
ID3D12Device *dev;
173
if (FAILED(D3D12CreateDevice(adapter, D3D_FEATURE_LEVEL_12_0,
174
__uuidof(ID3D12Device), (void **)& dev)))
175
throw runtime_error("D3D12CreateDevice failed");
176
177
return dev;
178
}
179
180
ComPtr<ID3D12RootSignature>
181
ComputeTest::create_root_signature(const ComputeTest::Resources &resources)
182
{
183
D3D12_ROOT_PARAMETER1 root_param;
184
root_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
185
root_param.DescriptorTable.NumDescriptorRanges = resources.ranges.size();
186
root_param.DescriptorTable.pDescriptorRanges = resources.ranges.data();
187
root_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
188
189
D3D12_ROOT_SIGNATURE_DESC1 root_sig_desc;
190
root_sig_desc.NumParameters = 1;
191
root_sig_desc.pParameters = &root_param;
192
root_sig_desc.NumStaticSamplers = 0;
193
root_sig_desc.pStaticSamplers = NULL;
194
root_sig_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
195
196
D3D12_VERSIONED_ROOT_SIGNATURE_DESC versioned_desc;
197
versioned_desc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
198
versioned_desc.Desc_1_1 = root_sig_desc;
199
200
ID3DBlob *sig, *error;
201
if (FAILED(D3D12SerializeVersionedRootSignature(&versioned_desc,
202
&sig, &error)))
203
throw runtime_error("D3D12SerializeVersionedRootSignature failed");
204
205
ComPtr<ID3D12RootSignature> ret;
206
if (FAILED(dev->CreateRootSignature(0,
207
sig->GetBufferPointer(),
208
sig->GetBufferSize(),
209
__uuidof(ret),
210
(void **)& ret)))
211
throw runtime_error("CreateRootSignature failed");
212
213
return ret;
214
}
215
216
ComPtr<ID3D12PipelineState>
217
ComputeTest::create_pipeline_state(ComPtr<ID3D12RootSignature> &root_sig,
218
const struct clc_dxil_object &dxil)
219
{
220
D3D12_COMPUTE_PIPELINE_STATE_DESC pipeline_desc = { root_sig.Get() };
221
pipeline_desc.CS.pShaderBytecode = dxil.binary.data;
222
pipeline_desc.CS.BytecodeLength = dxil.binary.size;
223
224
ComPtr<ID3D12PipelineState> pipeline_state;
225
if (FAILED(dev->CreateComputePipelineState(&pipeline_desc,
226
__uuidof(pipeline_state),
227
(void **)& pipeline_state)))
228
throw runtime_error("Failed to create pipeline state");
229
return pipeline_state;
230
}
231
232
ComPtr<ID3D12Resource>
233
ComputeTest::create_buffer(int size, D3D12_HEAP_TYPE heap_type)
234
{
235
D3D12_RESOURCE_DESC desc;
236
desc.Format = DXGI_FORMAT_UNKNOWN;
237
desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
238
desc.Width = size;
239
desc.Height = 1;
240
desc.DepthOrArraySize = 1;
241
desc.MipLevels = 1;
242
desc.SampleDesc.Count = 1;
243
desc.SampleDesc.Quality = 0;
244
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
245
desc.Flags = heap_type == D3D12_HEAP_TYPE_DEFAULT ? D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS : D3D12_RESOURCE_FLAG_NONE;
246
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
247
248
D3D12_HEAP_PROPERTIES heap_pris = dev->GetCustomHeapProperties(0, heap_type);
249
250
ComPtr<ID3D12Resource> res;
251
if (FAILED(dev->CreateCommittedResource(&heap_pris,
252
D3D12_HEAP_FLAG_NONE, &desc, D3D12_RESOURCE_STATE_COMMON,
253
NULL, __uuidof(ID3D12Resource), (void **)&res)))
254
throw runtime_error("CreateCommittedResource failed");
255
256
return res;
257
}
258
259
ComPtr<ID3D12Resource>
260
ComputeTest::create_upload_buffer_with_data(const void *data, size_t size)
261
{
262
auto upload_res = create_buffer(size, D3D12_HEAP_TYPE_UPLOAD);
263
264
void *ptr = NULL;
265
D3D12_RANGE res_range = { 0, (SIZE_T)size };
266
if (FAILED(upload_res->Map(0, &res_range, (void **)&ptr)))
267
throw runtime_error("Failed to map upload-buffer");
268
assert(ptr);
269
memcpy(ptr, data, size);
270
upload_res->Unmap(0, &res_range);
271
return upload_res;
272
}
273
274
ComPtr<ID3D12Resource>
275
ComputeTest::create_sized_buffer_with_data(size_t buffer_size,
276
const void *data,
277
size_t data_size)
278
{
279
auto upload_res = create_upload_buffer_with_data(data, data_size);
280
281
auto res = create_buffer(buffer_size, D3D12_HEAP_TYPE_DEFAULT);
282
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
283
cmdlist->CopyBufferRegion(res.Get(), 0, upload_res.Get(), 0, data_size);
284
resource_barrier(res, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COMMON);
285
execute_cmdlist();
286
287
return res;
288
}
289
290
void
291
ComputeTest::get_buffer_data(ComPtr<ID3D12Resource> res,
292
void *buf, size_t size)
293
{
294
auto readback_res = create_buffer(align(size, 4), D3D12_HEAP_TYPE_READBACK);
295
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_SOURCE);
296
cmdlist->CopyResource(readback_res.Get(), res.Get());
297
resource_barrier(res, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_COMMON);
298
execute_cmdlist();
299
300
void *ptr = NULL;
301
D3D12_RANGE res_range = { 0, size };
302
if (FAILED(readback_res->Map(0, &res_range, &ptr)))
303
throw runtime_error("Failed to map readback-buffer");
304
305
memcpy(buf, ptr, size);
306
307
D3D12_RANGE empty_range = { 0, 0 };
308
readback_res->Unmap(0, &empty_range);
309
}
310
311
void
312
ComputeTest::resource_barrier(ComPtr<ID3D12Resource> &res,
313
D3D12_RESOURCE_STATES state_before,
314
D3D12_RESOURCE_STATES state_after)
315
{
316
D3D12_RESOURCE_BARRIER barrier;
317
barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
318
barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
319
barrier.Transition.pResource = res.Get();
320
barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
321
barrier.Transition.StateBefore = state_before;
322
barrier.Transition.StateAfter = state_after;
323
cmdlist->ResourceBarrier(1, &barrier);
324
}
325
326
void
327
ComputeTest::execute_cmdlist()
328
{
329
if (FAILED(cmdlist->Close()))
330
throw runtime_error("Closing ID3D12GraphicsCommandList failed");
331
332
ID3D12CommandList *cmdlists[] = { cmdlist };
333
cmdqueue->ExecuteCommandLists(1, cmdlists);
334
cmdqueue_fence->SetEventOnCompletion(fence_value, event);
335
cmdqueue->Signal(cmdqueue_fence, fence_value);
336
fence_value++;
337
WaitForSingleObject(event, INFINITE);
338
339
if (FAILED(cmdalloc->Reset()))
340
throw runtime_error("resetting ID3D12CommandAllocator failed");
341
342
if (FAILED(cmdlist->Reset(cmdalloc, NULL)))
343
throw runtime_error("resetting ID3D12GraphicsCommandList failed");
344
}
345
346
void
347
ComputeTest::create_uav_buffer(ComPtr<ID3D12Resource> res,
348
size_t width, size_t byte_stride,
349
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
350
{
351
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
352
uav_desc.Format = DXGI_FORMAT_R32_TYPELESS;
353
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
354
uav_desc.Buffer.FirstElement = 0;
355
uav_desc.Buffer.NumElements = DIV_ROUND_UP(width * byte_stride, 4);
356
uav_desc.Buffer.StructureByteStride = 0;
357
uav_desc.Buffer.CounterOffsetInBytes = 0;
358
uav_desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_RAW;
359
360
dev->CreateUnorderedAccessView(res.Get(), NULL, &uav_desc, cpu_handle);
361
}
362
363
void
364
ComputeTest::create_cbv(ComPtr<ID3D12Resource> res, size_t size,
365
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle)
366
{
367
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
368
cbv_desc.BufferLocation = res ? res->GetGPUVirtualAddress() : 0;
369
cbv_desc.SizeInBytes = size;
370
371
dev->CreateConstantBufferView(&cbv_desc, cpu_handle);
372
}
373
374
ComPtr<ID3D12Resource>
375
ComputeTest::add_uav_resource(ComputeTest::Resources &resources,
376
unsigned spaceid, unsigned resid,
377
const void *data, size_t num_elems,
378
size_t elem_size)
379
{
380
size_t size = align(elem_size * num_elems, 4);
381
D3D12_CPU_DESCRIPTOR_HANDLE handle;
382
ComPtr<ID3D12Resource> res;
383
handle = uav_heap->GetCPUDescriptorHandleForHeapStart();
384
handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
385
386
if (size) {
387
if (data)
388
res = create_buffer_with_data(data, size);
389
else
390
res = create_buffer(size, D3D12_HEAP_TYPE_DEFAULT);
391
392
resource_barrier(res, D3D12_RESOURCE_STATE_COMMON,
393
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
394
}
395
create_uav_buffer(res, num_elems, elem_size, handle);
396
resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_UAV, spaceid, resid);
397
return res;
398
}
399
400
ComPtr<ID3D12Resource>
401
ComputeTest::add_cbv_resource(ComputeTest::Resources &resources,
402
unsigned spaceid, unsigned resid,
403
const void *data, size_t size)
404
{
405
unsigned aligned_size = align(size, 256);
406
D3D12_CPU_DESCRIPTOR_HANDLE handle;
407
ComPtr<ID3D12Resource> res;
408
handle = uav_heap->GetCPUDescriptorHandleForHeapStart();
409
handle = offset_cpu_handle(handle, resources.descs.size() * uav_heap_incr);
410
411
if (size) {
412
assert(data);
413
res = create_sized_buffer_with_data(aligned_size, data, size);
414
}
415
create_cbv(res, aligned_size, handle);
416
resources.add(res, D3D12_DESCRIPTOR_RANGE_TYPE_CBV, spaceid, resid);
417
return res;
418
}
419
420
void
421
ComputeTest::run_shader_with_raw_args(Shader shader,
422
const CompileArgs &compile_args,
423
const std::vector<RawShaderArg *> &args)
424
{
425
if (args.size() < 1)
426
throw runtime_error("no inputs");
427
428
static HMODULE hD3D12Mod = LoadLibrary("D3D12.DLL");
429
if (!hD3D12Mod)
430
throw runtime_error("Failed to load D3D12.DLL");
431
432
D3D12SerializeVersionedRootSignature = (PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE)GetProcAddress(hD3D12Mod, "D3D12SerializeVersionedRootSignature");
433
434
if (args.size() != shader.dxil->kernel->num_args)
435
throw runtime_error("incorrect number of inputs");
436
437
struct clc_runtime_kernel_conf conf = { 0 };
438
439
// Older WARP and some hardware doesn't support int64, so for these tests, unconditionally lower away int64
440
// A more complex runtime can be smarter about detecting when this needs to be done
441
conf.lower_bit_size = 64;
442
443
if (!shader.dxil->metadata.local_size[0])
444
conf.local_size[0] = compile_args.x;
445
else
446
conf.local_size[0] = shader.dxil->metadata.local_size[0];
447
448
if (!shader.dxil->metadata.local_size[1])
449
conf.local_size[1] = compile_args.y;
450
else
451
conf.local_size[1] = shader.dxil->metadata.local_size[1];
452
453
if (!shader.dxil->metadata.local_size[2])
454
conf.local_size[2] = compile_args.z;
455
else
456
conf.local_size[2] = shader.dxil->metadata.local_size[2];
457
458
if (compile_args.x % conf.local_size[0] ||
459
compile_args.y % conf.local_size[1] ||
460
compile_args.z % conf.local_size[2])
461
throw runtime_error("invalid global size must be a multiple of local size");
462
463
std::vector<struct clc_runtime_arg_info> argsinfo(args.size());
464
465
conf.args = argsinfo.data();
466
conf.support_global_work_id_offsets =
467
compile_args.work_props.global_offset_x != 0 ||
468
compile_args.work_props.global_offset_y != 0 ||
469
compile_args.work_props.global_offset_z != 0;
470
conf.support_workgroup_id_offsets =
471
compile_args.work_props.group_id_offset_x != 0 ||
472
compile_args.work_props.group_id_offset_y != 0 ||
473
compile_args.work_props.group_id_offset_z != 0;
474
475
for (unsigned i = 0; i < shader.dxil->kernel->num_args; ++i) {
476
RawShaderArg *arg = args[i];
477
size_t size = arg->get_elem_size() * arg->get_num_elems();
478
479
switch (shader.dxil->kernel->args[i].address_qualifier) {
480
case CLC_KERNEL_ARG_ADDRESS_LOCAL:
481
argsinfo[i].localptr.size = size;
482
break;
483
default:
484
break;
485
}
486
}
487
488
configure(shader, &conf);
489
validate(shader);
490
491
std::shared_ptr<struct clc_dxil_object> &dxil = shader.dxil;
492
493
std::vector<uint8_t> argsbuf(dxil->metadata.kernel_inputs_buf_size);
494
std::vector<ComPtr<ID3D12Resource>> argres(shader.dxil->kernel->num_args);
495
clc_work_properties_data work_props = compile_args.work_props;
496
if (!conf.support_workgroup_id_offsets) {
497
work_props.group_count_total_x = compile_args.x / conf.local_size[0];
498
work_props.group_count_total_y = compile_args.y / conf.local_size[1];
499
work_props.group_count_total_z = compile_args.z / conf.local_size[2];
500
}
501
if (work_props.work_dim == 0)
502
work_props.work_dim = 3;
503
Resources resources;
504
505
for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
506
RawShaderArg *arg = args[i];
507
size_t size = arg->get_elem_size() * arg->get_num_elems();
508
void *slot = argsbuf.data() + dxil->metadata.args[i].offset;
509
510
switch (dxil->kernel->args[i].address_qualifier) {
511
case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
512
case CLC_KERNEL_ARG_ADDRESS_GLOBAL: {
513
assert(dxil->metadata.args[i].size == sizeof(uint64_t));
514
uint64_t *ptr_slot = (uint64_t *)slot;
515
if (arg->get_data())
516
*ptr_slot = (uint64_t)dxil->metadata.args[i].globconstptr.buf_id << 32;
517
else
518
*ptr_slot = ~0ull;
519
break;
520
}
521
case CLC_KERNEL_ARG_ADDRESS_LOCAL: {
522
assert(dxil->metadata.args[i].size == sizeof(uint64_t));
523
uint64_t *ptr_slot = (uint64_t *)slot;
524
*ptr_slot = dxil->metadata.args[i].localptr.sharedmem_offset;
525
break;
526
}
527
case CLC_KERNEL_ARG_ADDRESS_PRIVATE: {
528
assert(size == dxil->metadata.args[i].size);
529
memcpy(slot, arg->get_data(), size);
530
break;
531
}
532
default:
533
assert(0);
534
}
535
}
536
537
for (unsigned i = 0; i < dxil->kernel->num_args; ++i) {
538
RawShaderArg *arg = args[i];
539
540
if (dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
541
dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) {
542
argres[i] = add_uav_resource(resources, 0,
543
dxil->metadata.args[i].globconstptr.buf_id,
544
arg->get_data(), arg->get_num_elems(),
545
arg->get_elem_size());
546
}
547
}
548
549
if (dxil->metadata.printf.uav_id > 0)
550
add_uav_resource(resources, 0, dxil->metadata.printf.uav_id, NULL, 1024 * 1024 / 4, 4);
551
552
for (unsigned i = 0; i < dxil->metadata.num_consts; ++i)
553
add_uav_resource(resources, 0, dxil->metadata.consts[i].uav_id,
554
dxil->metadata.consts[i].data,
555
dxil->metadata.consts[i].size / 4, 4);
556
557
if (argsbuf.size())
558
add_cbv_resource(resources, 0, dxil->metadata.kernel_inputs_cbv_id,
559
argsbuf.data(), argsbuf.size());
560
561
add_cbv_resource(resources, 0, dxil->metadata.work_properties_cbv_id,
562
&work_props, sizeof(work_props));
563
564
auto root_sig = create_root_signature(resources);
565
auto pipeline_state = create_pipeline_state(root_sig, *dxil);
566
567
cmdlist->SetDescriptorHeaps(1, &uav_heap);
568
cmdlist->SetComputeRootSignature(root_sig.Get());
569
cmdlist->SetComputeRootDescriptorTable(0, uav_heap->GetGPUDescriptorHandleForHeapStart());
570
cmdlist->SetPipelineState(pipeline_state.Get());
571
572
cmdlist->Dispatch(compile_args.x / conf.local_size[0],
573
compile_args.y / conf.local_size[1],
574
compile_args.z / conf.local_size[2]);
575
576
for (auto &range : resources.ranges) {
577
if (range.RangeType == D3D12_DESCRIPTOR_RANGE_TYPE_UAV) {
578
for (unsigned i = range.OffsetInDescriptorsFromTableStart;
579
i < range.NumDescriptors; i++) {
580
if (!resources.descs[i].Get())
581
continue;
582
583
resource_barrier(resources.descs[i],
584
D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
585
D3D12_RESOURCE_STATE_COMMON);
586
}
587
}
588
}
589
590
execute_cmdlist();
591
592
for (unsigned i = 0; i < args.size(); i++) {
593
if (!(args[i]->get_direction() & SHADER_ARG_OUTPUT))
594
continue;
595
596
assert(dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL);
597
get_buffer_data(argres[i], args[i]->get_data(),
598
args[i]->get_elem_size() * args[i]->get_num_elems());
599
}
600
601
ComPtr<ID3D12InfoQueue> info_queue;
602
dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
603
if (info_queue)
604
{
605
EXPECT_EQ(0, info_queue->GetNumStoredMessages());
606
for (unsigned i = 0; i < info_queue->GetNumStoredMessages(); ++i) {
607
SIZE_T message_size = 0;
608
info_queue->GetMessageA(i, nullptr, &message_size);
609
D3D12_MESSAGE* message = (D3D12_MESSAGE*)malloc(message_size);
610
info_queue->GetMessageA(i, message, &message_size);
611
FAIL() << message->pDescription;
612
free(message);
613
}
614
}
615
}
616
617
void
618
ComputeTest::SetUp()
619
{
620
static struct clc_context *compiler_ctx_g = nullptr;
621
622
if (!compiler_ctx_g) {
623
clc_context_options options = { };
624
options.optimize = (debug_get_option_debug_compute() & COMPUTE_DEBUG_OPTIMIZE_LIBCLC) != 0;
625
626
compiler_ctx_g = clc_context_new(&logger, &options);
627
if (!compiler_ctx_g)
628
throw runtime_error("failed to create CLC compiler context");
629
630
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_SERIALIZE_LIBCLC) {
631
void *serialized = nullptr;
632
size_t serialized_size = 0;
633
clc_context_serialize(compiler_ctx_g, &serialized, &serialized_size);
634
if (!serialized)
635
throw runtime_error("failed to serialize CLC compiler context");
636
637
clc_free_context(compiler_ctx_g);
638
compiler_ctx_g = nullptr;
639
640
compiler_ctx_g = clc_context_deserialize(serialized, serialized_size);
641
if (!compiler_ctx_g)
642
throw runtime_error("failed to deserialize CLC compiler context");
643
644
clc_context_free_serialized(serialized);
645
}
646
}
647
compiler_ctx = compiler_ctx_g;
648
649
enable_d3d12_debug_layer();
650
651
factory = get_dxgi_factory();
652
if (!factory)
653
throw runtime_error("failed to create DXGI factory");
654
655
adapter = choose_adapter(factory);
656
if (!adapter)
657
throw runtime_error("failed to choose adapter");
658
659
dev = create_device(adapter);
660
if (!dev)
661
throw runtime_error("failed to create device");
662
663
if (FAILED(dev->CreateFence(0, D3D12_FENCE_FLAG_NONE,
664
__uuidof(cmdqueue_fence),
665
(void **)&cmdqueue_fence)))
666
throw runtime_error("failed to create fence\n");
667
668
D3D12_COMMAND_QUEUE_DESC queue_desc;
669
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
670
queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
671
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
672
queue_desc.NodeMask = 0;
673
if (FAILED(dev->CreateCommandQueue(&queue_desc,
674
__uuidof(cmdqueue),
675
(void **)&cmdqueue)))
676
throw runtime_error("failed to create command queue");
677
678
if (FAILED(dev->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_COMPUTE,
679
__uuidof(cmdalloc), (void **)&cmdalloc)))
680
throw runtime_error("failed to create command allocator");
681
682
if (FAILED(dev->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
683
cmdalloc, NULL, __uuidof(cmdlist), (void **)&cmdlist)))
684
throw runtime_error("failed to create command list");
685
686
D3D12_DESCRIPTOR_HEAP_DESC heap_desc;
687
heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
688
heap_desc.NumDescriptors = 1000;
689
heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
690
heap_desc.NodeMask = 0;
691
if (FAILED(dev->CreateDescriptorHeap(&heap_desc,
692
__uuidof(uav_heap), (void **)&uav_heap)))
693
throw runtime_error("failed to create descriptor heap");
694
695
uav_heap_incr = dev->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
696
697
event = CreateEvent(NULL, FALSE, FALSE, NULL);
698
if (!event)
699
throw runtime_error("Failed to create event");
700
fence_value = 1;
701
}
702
703
void
704
ComputeTest::TearDown()
705
{
706
CloseHandle(event);
707
708
uav_heap->Release();
709
cmdlist->Release();
710
cmdalloc->Release();
711
cmdqueue->Release();
712
cmdqueue_fence->Release();
713
dev->Release();
714
adapter->Release();
715
factory->Release();
716
}
717
718
PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE ComputeTest::D3D12SerializeVersionedRootSignature;
719
720
bool
721
validate_module(const struct clc_dxil_object &dxil)
722
{
723
static HMODULE hmod = LoadLibrary("DXIL.DLL");
724
if (!hmod) {
725
/* Enabling experimental shaders allows us to run unsigned shader code,
726
* such as when under the debugger where we can't run the validator. */
727
if (debug_get_option_debug_compute() & COMPUTE_DEBUG_EXPERIMENTAL_SHADERS)
728
return true;
729
else
730
throw runtime_error("failed to load DXIL.DLL");
731
}
732
733
DxcCreateInstanceProc pfnDxcCreateInstance =
734
(DxcCreateInstanceProc)GetProcAddress(hmod, "DxcCreateInstance");
735
if (!pfnDxcCreateInstance)
736
throw runtime_error("failed to load DxcCreateInstance");
737
738
struct shader_blob : public IDxcBlob {
739
shader_blob(void *data, size_t size) : data(data), size(size) {}
740
LPVOID STDMETHODCALLTYPE GetBufferPointer() override { return data; }
741
SIZE_T STDMETHODCALLTYPE GetBufferSize() override { return size; }
742
HRESULT STDMETHODCALLTYPE QueryInterface(REFIID, void **) override { return E_NOINTERFACE; }
743
ULONG STDMETHODCALLTYPE AddRef() override { return 1; }
744
ULONG STDMETHODCALLTYPE Release() override { return 0; }
745
void *data;
746
size_t size;
747
} blob(dxil.binary.data, dxil.binary.size);
748
749
IDxcValidator *validator;
750
if (FAILED(pfnDxcCreateInstance(CLSID_DxcValidator, __uuidof(IDxcValidator),
751
(void **)&validator)))
752
throw runtime_error("failed to create IDxcValidator");
753
754
IDxcOperationResult *result;
755
if (FAILED(validator->Validate(&blob, DxcValidatorFlags_InPlaceEdit,
756
&result)))
757
throw runtime_error("Validate failed");
758
759
HRESULT hr;
760
if (FAILED(result->GetStatus(&hr)) ||
761
FAILED(hr)) {
762
IDxcBlobEncoding *message;
763
result->GetErrorBuffer(&message);
764
fprintf(stderr, "D3D12: validation failed: %*s\n",
765
(int)message->GetBufferSize(),
766
(char *)message->GetBufferPointer());
767
message->Release();
768
validator->Release();
769
result->Release();
770
return false;
771
}
772
773
validator->Release();
774
result->Release();
775
return true;
776
}
777
778
static void
779
dump_blob(const char *path, const struct clc_dxil_object &dxil)
780
{
781
FILE *fp = fopen(path, "wb");
782
if (fp) {
783
fwrite(dxil.binary.data, 1, dxil.binary.size, fp);
784
fclose(fp);
785
printf("D3D12: wrote '%s'...\n", path);
786
}
787
}
788
789
ComputeTest::Shader
790
ComputeTest::compile(const std::vector<const char *> &sources,
791
const std::vector<const char *> &compile_args,
792
bool create_library)
793
{
794
struct clc_compile_args args = { 0 };
795
args.args = compile_args.data();
796
args.num_args = (unsigned)compile_args.size();
797
ComputeTest::Shader shader;
798
799
std::vector<Shader> shaders;
800
801
args.source.name = "obj.cl";
802
803
for (unsigned i = 0; i < sources.size(); i++) {
804
args.source.value = sources[i];
805
806
auto obj = clc_compile(compiler_ctx, &args, &logger);
807
if (!obj)
808
throw runtime_error("failed to compile object!");
809
810
Shader shader;
811
shader.obj = std::shared_ptr<struct clc_object>(obj, clc_free_object);
812
shaders.push_back(shader);
813
}
814
815
if (shaders.size() == 1 && create_library)
816
return shaders[0];
817
818
return link(shaders, create_library);
819
}
820
821
ComputeTest::Shader
822
ComputeTest::link(const std::vector<Shader> &sources,
823
bool create_library)
824
{
825
std::vector<const clc_object*> objs;
826
for (auto& source : sources)
827
objs.push_back(&*source.obj);
828
829
struct clc_linker_args link_args = {};
830
link_args.in_objs = objs.data();
831
link_args.num_in_objs = (unsigned)objs.size();
832
link_args.create_library = create_library;
833
struct clc_object *obj = clc_link(compiler_ctx,
834
&link_args,
835
&logger);
836
if (!obj)
837
throw runtime_error("failed to link objects!");
838
839
ComputeTest::Shader shader;
840
shader.obj = std::shared_ptr<struct clc_object>(obj, clc_free_object);
841
if (!link_args.create_library)
842
configure(shader, NULL);
843
844
return shader;
845
}
846
847
void
848
ComputeTest::configure(Shader &shader,
849
const struct clc_runtime_kernel_conf *conf)
850
{
851
struct clc_dxil_object *dxil;
852
853
dxil = clc_to_dxil(compiler_ctx, shader.obj.get(), "main_test", conf, &logger);
854
if (!dxil)
855
throw runtime_error("failed to compile kernel!");
856
857
shader.dxil = std::shared_ptr<struct clc_dxil_object>(dxil, clc_free_dxil_object);
858
}
859
860
void
861
ComputeTest::validate(ComputeTest::Shader &shader)
862
{
863
dump_blob("unsigned.cso", *shader.dxil);
864
if (!validate_module(*shader.dxil))
865
throw runtime_error("failed to validate module!");
866
867
dump_blob("signed.cso", *shader.dxil);
868
}
869
870