Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/vulkan/anv_device.c
4547 views
1
/*
2
* Copyright © 2015 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <assert.h>
25
#include <stdbool.h>
26
#include <string.h>
27
#ifdef MAJOR_IN_MKDEV
28
#include <sys/mkdev.h>
29
#endif
30
#ifdef MAJOR_IN_SYSMACROS
31
#include <sys/sysmacros.h>
32
#endif
33
#include <sys/mman.h>
34
#include <sys/stat.h>
35
#include <unistd.h>
36
#include <fcntl.h>
37
#include "drm-uapi/drm_fourcc.h"
38
#include "drm-uapi/drm.h"
39
#include <xf86drm.h>
40
41
#include "anv_private.h"
42
#include "anv_measure.h"
43
#include "util/debug.h"
44
#include "util/build_id.h"
45
#include "util/disk_cache.h"
46
#include "util/mesa-sha1.h"
47
#include "util/os_file.h"
48
#include "util/os_misc.h"
49
#include "util/u_atomic.h"
50
#include "util/u_string.h"
51
#include "util/driconf.h"
52
#include "git_sha1.h"
53
#include "vk_util.h"
54
#include "vk_deferred_operation.h"
55
#include "common/intel_aux_map.h"
56
#include "common/intel_defines.h"
57
#include "common/intel_uuid.h"
58
#include "perf/intel_perf.h"
59
60
#include "genxml/gen7_pack.h"
61
62
static const driOptionDescription anv_dri_options[] = {
63
DRI_CONF_SECTION_PERFORMANCE
64
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
65
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
66
DRI_CONF_SECTION_END
67
68
DRI_CONF_SECTION_DEBUG
69
DRI_CONF_ALWAYS_FLUSH_CACHE(false)
70
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
71
DRI_CONF_SECTION_END
72
};
73
74
/* This is probably far to big but it reflects the max size used for messages
75
* in OpenGLs KHR_debug.
76
*/
77
#define MAX_DEBUG_MESSAGE_LENGTH 4096
78
79
/* Render engine timestamp register */
80
#define TIMESTAMP 0x2358
81
82
/* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
83
#if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
84
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
85
#endif
86
87
static void
88
compiler_debug_log(void *data, const char *fmt, ...)
89
{
90
char str[MAX_DEBUG_MESSAGE_LENGTH];
91
struct anv_device *device = (struct anv_device *)data;
92
struct anv_instance *instance = device->physical->instance;
93
94
if (list_is_empty(&instance->vk.debug_report.callbacks))
95
return;
96
97
va_list args;
98
va_start(args, fmt);
99
(void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
100
va_end(args);
101
102
vk_debug_report(&instance->vk,
103
VK_DEBUG_REPORT_DEBUG_BIT_EXT,
104
NULL, 0, 0, "anv", str);
105
}
106
107
static void
108
compiler_perf_log(void *data, const char *fmt, ...)
109
{
110
va_list args;
111
va_start(args, fmt);
112
113
if (INTEL_DEBUG & DEBUG_PERF)
114
mesa_logd_v(fmt, args);
115
116
va_end(args);
117
}
118
119
static uint64_t
120
anv_compute_heap_size(int fd, uint64_t gtt_size)
121
{
122
/* Query the total ram from the system */
123
uint64_t total_ram;
124
if (!os_get_total_physical_memory(&total_ram))
125
return 0;
126
127
/* We don't want to burn too much ram with the GPU. If the user has 4GiB
128
* or less, we use at most half. If they have more than 4GiB, we use 3/4.
129
*/
130
uint64_t available_ram;
131
if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
132
available_ram = total_ram / 2;
133
else
134
available_ram = total_ram * 3 / 4;
135
136
/* We also want to leave some padding for things we allocate in the driver,
137
* so don't go over 3/4 of the GTT either.
138
*/
139
uint64_t available_gtt = gtt_size * 3 / 4;
140
141
return MIN2(available_ram, available_gtt);
142
}
143
144
#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
145
defined(VK_USE_PLATFORM_XCB_KHR) || \
146
defined(VK_USE_PLATFORM_XLIB_KHR) || \
147
defined(VK_USE_PLATFORM_DISPLAY_KHR)
148
#define ANV_USE_WSI_PLATFORM
149
#endif
150
151
#ifdef ANDROID
152
#define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
153
#else
154
#define ANV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
155
#endif
156
157
VkResult anv_EnumerateInstanceVersion(
158
uint32_t* pApiVersion)
159
{
160
*pApiVersion = ANV_API_VERSION;
161
return VK_SUCCESS;
162
}
163
164
static const struct vk_instance_extension_table instance_extensions = {
165
.KHR_device_group_creation = true,
166
.KHR_external_fence_capabilities = true,
167
.KHR_external_memory_capabilities = true,
168
.KHR_external_semaphore_capabilities = true,
169
.KHR_get_physical_device_properties2 = true,
170
.EXT_debug_report = true,
171
172
#ifdef ANV_USE_WSI_PLATFORM
173
.KHR_get_surface_capabilities2 = true,
174
.KHR_surface = true,
175
.KHR_surface_protected_capabilities = true,
176
#endif
177
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
178
.KHR_wayland_surface = true,
179
#endif
180
#ifdef VK_USE_PLATFORM_XCB_KHR
181
.KHR_xcb_surface = true,
182
#endif
183
#ifdef VK_USE_PLATFORM_XLIB_KHR
184
.KHR_xlib_surface = true,
185
#endif
186
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
187
.EXT_acquire_xlib_display = true,
188
#endif
189
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
190
.KHR_display = true,
191
.KHR_get_display_properties2 = true,
192
.EXT_direct_mode_display = true,
193
.EXT_display_surface_counter = true,
194
.EXT_acquire_drm_display = true,
195
#endif
196
};
197
198
static void
199
get_device_extensions(const struct anv_physical_device *device,
200
struct vk_device_extension_table *ext)
201
{
202
*ext = (struct vk_device_extension_table) {
203
.KHR_8bit_storage = device->info.ver >= 8,
204
.KHR_16bit_storage = device->info.ver >= 8,
205
.KHR_bind_memory2 = true,
206
.KHR_buffer_device_address = device->has_a64_buffer_access,
207
.KHR_copy_commands2 = true,
208
.KHR_create_renderpass2 = true,
209
.KHR_dedicated_allocation = true,
210
.KHR_deferred_host_operations = true,
211
.KHR_depth_stencil_resolve = true,
212
.KHR_descriptor_update_template = true,
213
.KHR_device_group = true,
214
.KHR_draw_indirect_count = true,
215
.KHR_driver_properties = true,
216
.KHR_external_fence = device->has_syncobj_wait,
217
.KHR_external_fence_fd = device->has_syncobj_wait,
218
.KHR_external_memory = true,
219
.KHR_external_memory_fd = true,
220
.KHR_external_semaphore = true,
221
.KHR_external_semaphore_fd = true,
222
.KHR_fragment_shading_rate = device->info.ver >= 11,
223
.KHR_get_memory_requirements2 = true,
224
.KHR_image_format_list = true,
225
.KHR_imageless_framebuffer = true,
226
#ifdef ANV_USE_WSI_PLATFORM
227
.KHR_incremental_present = true,
228
#endif
229
.KHR_maintenance1 = true,
230
.KHR_maintenance2 = true,
231
.KHR_maintenance3 = true,
232
.KHR_multiview = true,
233
.KHR_performance_query =
234
device->use_softpin && device->perf &&
235
(device->perf->i915_perf_version >= 3 ||
236
INTEL_DEBUG & DEBUG_NO_OACONFIG) &&
237
device->use_call_secondary,
238
.KHR_pipeline_executable_properties = true,
239
.KHR_push_descriptor = true,
240
.KHR_relaxed_block_layout = true,
241
.KHR_sampler_mirror_clamp_to_edge = true,
242
.KHR_sampler_ycbcr_conversion = true,
243
.KHR_separate_depth_stencil_layouts = true,
244
.KHR_shader_atomic_int64 = device->info.ver >= 9 &&
245
device->use_softpin,
246
.KHR_shader_clock = true,
247
.KHR_shader_draw_parameters = true,
248
.KHR_shader_float16_int8 = device->info.ver >= 8,
249
.KHR_shader_float_controls = device->info.ver >= 8,
250
.KHR_shader_non_semantic_info = true,
251
.KHR_shader_subgroup_extended_types = device->info.ver >= 8,
252
.KHR_shader_subgroup_uniform_control_flow = true,
253
.KHR_shader_terminate_invocation = true,
254
.KHR_spirv_1_4 = true,
255
.KHR_storage_buffer_storage_class = true,
256
#ifdef ANV_USE_WSI_PLATFORM
257
.KHR_swapchain = true,
258
.KHR_swapchain_mutable_format = true,
259
#endif
260
.KHR_timeline_semaphore = true,
261
.KHR_uniform_buffer_standard_layout = true,
262
.KHR_variable_pointers = true,
263
.KHR_vulkan_memory_model = true,
264
.KHR_workgroup_memory_explicit_layout = true,
265
.KHR_zero_initialize_workgroup_memory = true,
266
.EXT_4444_formats = true,
267
.EXT_buffer_device_address = device->has_a64_buffer_access,
268
.EXT_calibrated_timestamps = device->has_reg_timestamp,
269
.EXT_color_write_enable = true,
270
.EXT_conditional_rendering = device->info.verx10 >= 75,
271
.EXT_conservative_rasterization = device->info.ver >= 9,
272
.EXT_custom_border_color = device->info.ver >= 8,
273
.EXT_depth_clip_enable = true,
274
.EXT_descriptor_indexing = device->has_a64_buffer_access &&
275
device->has_bindless_images,
276
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
277
.EXT_display_control = true,
278
#endif
279
.EXT_extended_dynamic_state = true,
280
.EXT_extended_dynamic_state2 = true,
281
.EXT_external_memory_dma_buf = true,
282
.EXT_external_memory_host = true,
283
.EXT_fragment_shader_interlock = device->info.ver >= 9,
284
.EXT_global_priority = device->has_context_priority,
285
.EXT_host_query_reset = true,
286
.EXT_image_robustness = true,
287
.EXT_image_drm_format_modifier = true,
288
.EXT_index_type_uint8 = true,
289
.EXT_inline_uniform_block = true,
290
.EXT_line_rasterization = true,
291
.EXT_memory_budget = device->has_mem_available,
292
.EXT_pci_bus_info = true,
293
.EXT_physical_device_drm = true,
294
.EXT_pipeline_creation_cache_control = true,
295
.EXT_pipeline_creation_feedback = true,
296
.EXT_post_depth_coverage = device->info.ver >= 9,
297
.EXT_private_data = true,
298
.EXT_provoking_vertex = true,
299
.EXT_queue_family_foreign = true,
300
.EXT_robustness2 = true,
301
.EXT_sample_locations = true,
302
.EXT_sampler_filter_minmax = device->info.ver >= 9,
303
.EXT_scalar_block_layout = true,
304
.EXT_separate_stencil_usage = true,
305
.EXT_shader_atomic_float = true,
306
.EXT_shader_demote_to_helper_invocation = true,
307
.EXT_shader_stencil_export = device->info.ver >= 9,
308
.EXT_shader_subgroup_ballot = true,
309
.EXT_shader_subgroup_vote = true,
310
.EXT_shader_viewport_index_layer = true,
311
.EXT_subgroup_size_control = true,
312
.EXT_texel_buffer_alignment = true,
313
.EXT_transform_feedback = true,
314
.EXT_vertex_attribute_divisor = true,
315
.EXT_ycbcr_image_arrays = true,
316
#ifdef ANDROID
317
.ANDROID_external_memory_android_hardware_buffer = true,
318
.ANDROID_native_buffer = true,
319
#endif
320
.GOOGLE_decorate_string = true,
321
.GOOGLE_hlsl_functionality1 = true,
322
.GOOGLE_user_type = true,
323
.INTEL_performance_query = device->perf &&
324
device->perf->i915_perf_version >= 3,
325
.INTEL_shader_integer_functions2 = device->info.ver >= 8,
326
.EXT_multi_draw = true,
327
.NV_compute_shader_derivatives = true,
328
};
329
}
330
331
static bool
332
anv_get_query_meminfo(struct anv_physical_device *device, int fd)
333
{
334
struct drm_i915_query_memory_regions *mem_regions =
335
intel_i915_query_alloc(fd, DRM_I915_QUERY_MEMORY_REGIONS);
336
if (mem_regions == NULL)
337
return false;
338
339
for(int i = 0; i < mem_regions->num_regions; i++) {
340
switch(mem_regions->regions[i].region.memory_class) {
341
case I915_MEMORY_CLASS_SYSTEM:
342
device->sys.region = mem_regions->regions[i].region;
343
device->sys.size = mem_regions->regions[i].probed_size;
344
break;
345
case I915_MEMORY_CLASS_DEVICE:
346
device->vram.region = mem_regions->regions[i].region;
347
device->vram.size = mem_regions->regions[i].probed_size;
348
break;
349
default:
350
break;
351
}
352
}
353
354
free(mem_regions);
355
return true;
356
}
357
358
static void
359
anv_init_meminfo(struct anv_physical_device *device, int fd)
360
{
361
if (anv_get_query_meminfo(device, fd))
362
return;
363
364
uint64_t heap_size = anv_compute_heap_size(fd, device->gtt_size);
365
366
if (heap_size > (2ull << 30) && !device->supports_48bit_addresses) {
367
/* When running with an overridden PCI ID, we may get a GTT size from
368
* the kernel that is greater than 2 GiB but the execbuf check for 48bit
369
* address support can still fail. Just clamp the address space size to
370
* 2 GiB if we don't have 48-bit support.
371
*/
372
mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
373
"not support for 48-bit addresses",
374
__FILE__, __LINE__);
375
heap_size = 2ull << 30;
376
}
377
378
device->sys.size = heap_size;
379
}
380
381
static VkResult
382
anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
383
{
384
if (anv_gem_get_context_param(fd, 0, I915_CONTEXT_PARAM_GTT_SIZE,
385
&device->gtt_size) == -1) {
386
/* If, for whatever reason, we can't actually get the GTT size from the
387
* kernel (too old?) fall back to the aperture size.
388
*/
389
anv_perf_warn(NULL, NULL,
390
"Failed to get I915_CONTEXT_PARAM_GTT_SIZE: %m");
391
392
if (intel_get_aperture_size(fd, &device->gtt_size) == -1) {
393
return vk_errorfi(device->instance, NULL,
394
VK_ERROR_INITIALIZATION_FAILED,
395
"failed to get aperture size: %m");
396
}
397
}
398
399
/* We only allow 48-bit addresses with softpin because knowing the actual
400
* address is required for the vertex cache flush workaround.
401
*/
402
device->supports_48bit_addresses = (device->info.ver >= 8) &&
403
device->gtt_size > (4ULL << 30 /* GiB */);
404
405
anv_init_meminfo(device, fd);
406
assert(device->sys.size != 0);
407
408
if (device->vram.size > 0) {
409
/* We can create 2 different heaps when we have local memory support,
410
* first heap with local memory size and second with system memory size.
411
*/
412
device->memory.heap_count = 2;
413
device->memory.heaps[0] = (struct anv_memory_heap) {
414
.size = device->vram.size,
415
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
416
.is_local_mem = true,
417
};
418
device->memory.heaps[1] = (struct anv_memory_heap) {
419
.size = device->sys.size,
420
.flags = 0,
421
.is_local_mem = false,
422
};
423
424
device->memory.type_count = 3;
425
device->memory.types[0] = (struct anv_memory_type) {
426
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
427
.heapIndex = 0,
428
};
429
device->memory.types[1] = (struct anv_memory_type) {
430
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
431
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
432
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
433
.heapIndex = 1,
434
};
435
device->memory.types[2] = (struct anv_memory_type) {
436
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
437
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
438
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
439
.heapIndex = 0,
440
};
441
} else if (device->info.has_llc) {
442
device->memory.heap_count = 1;
443
device->memory.heaps[0] = (struct anv_memory_heap) {
444
.size = device->sys.size,
445
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
446
.is_local_mem = false,
447
};
448
449
/* Big core GPUs share LLC with the CPU and thus one memory type can be
450
* both cached and coherent at the same time.
451
*/
452
device->memory.type_count = 1;
453
device->memory.types[0] = (struct anv_memory_type) {
454
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
455
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
456
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
457
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
458
.heapIndex = 0,
459
};
460
} else {
461
device->memory.heap_count = 1;
462
device->memory.heaps[0] = (struct anv_memory_heap) {
463
.size = device->sys.size,
464
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
465
.is_local_mem = false,
466
};
467
468
/* The spec requires that we expose a host-visible, coherent memory
469
* type, but Atom GPUs don't share LLC. Thus we offer two memory types
470
* to give the application a choice between cached, but not coherent and
471
* coherent but uncached (WC though).
472
*/
473
device->memory.type_count = 2;
474
device->memory.types[0] = (struct anv_memory_type) {
475
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
476
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
477
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
478
.heapIndex = 0,
479
};
480
device->memory.types[1] = (struct anv_memory_type) {
481
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
482
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
483
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
484
.heapIndex = 0,
485
};
486
}
487
488
device->memory.need_clflush = false;
489
for (unsigned i = 0; i < device->memory.type_count; i++) {
490
VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
491
if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
492
!(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
493
device->memory.need_clflush = true;
494
}
495
496
return VK_SUCCESS;
497
}
498
499
static VkResult
500
anv_physical_device_init_uuids(struct anv_physical_device *device)
501
{
502
const struct build_id_note *note =
503
build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
504
if (!note) {
505
return vk_errorfi(device->instance, NULL,
506
VK_ERROR_INITIALIZATION_FAILED,
507
"Failed to find build-id");
508
}
509
510
unsigned build_id_len = build_id_length(note);
511
if (build_id_len < 20) {
512
return vk_errorfi(device->instance, NULL,
513
VK_ERROR_INITIALIZATION_FAILED,
514
"build-id too short. It needs to be a SHA");
515
}
516
517
memcpy(device->driver_build_sha1, build_id_data(note), 20);
518
519
struct mesa_sha1 sha1_ctx;
520
uint8_t sha1[20];
521
STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
522
523
/* The pipeline cache UUID is used for determining when a pipeline cache is
524
* invalid. It needs both a driver build and the PCI ID of the device.
525
*/
526
_mesa_sha1_init(&sha1_ctx);
527
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
528
_mesa_sha1_update(&sha1_ctx, &device->info.chipset_id,
529
sizeof(device->info.chipset_id));
530
_mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
531
sizeof(device->always_use_bindless));
532
_mesa_sha1_update(&sha1_ctx, &device->has_a64_buffer_access,
533
sizeof(device->has_a64_buffer_access));
534
_mesa_sha1_update(&sha1_ctx, &device->has_bindless_images,
535
sizeof(device->has_bindless_images));
536
_mesa_sha1_update(&sha1_ctx, &device->has_bindless_samplers,
537
sizeof(device->has_bindless_samplers));
538
_mesa_sha1_final(&sha1_ctx, sha1);
539
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
540
541
intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
542
intel_uuid_compute_device_id(device->device_uuid, &device->isl_dev, VK_UUID_SIZE);
543
544
return VK_SUCCESS;
545
}
546
547
static void
548
anv_physical_device_init_disk_cache(struct anv_physical_device *device)
549
{
550
#ifdef ENABLE_SHADER_CACHE
551
char renderer[10];
552
ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
553
device->info.chipset_id);
554
assert(len == sizeof(renderer) - 2);
555
556
char timestamp[41];
557
_mesa_sha1_format(timestamp, device->driver_build_sha1);
558
559
const uint64_t driver_flags =
560
brw_get_compiler_config_value(device->compiler);
561
device->disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
562
#else
563
device->disk_cache = NULL;
564
#endif
565
}
566
567
static void
568
anv_physical_device_free_disk_cache(struct anv_physical_device *device)
569
{
570
#ifdef ENABLE_SHADER_CACHE
571
if (device->disk_cache)
572
disk_cache_destroy(device->disk_cache);
573
#else
574
assert(device->disk_cache == NULL);
575
#endif
576
}
577
578
/* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
579
* queue overrides.
580
*
581
* To override the number queues:
582
* * "gc" is for graphics queues with compute support
583
* * "g" is for graphics queues with no compute support
584
* * "c" is for compute queues with no graphics support
585
*
586
* For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
587
* advertised queues to be 2 queues with graphics+compute support, and 1 queue
588
* with compute-only support.
589
*
590
* ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
591
* include 1 queue with compute-only support, but it will not change the
592
* number of graphics+compute queues.
593
*
594
* ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
595
* to include 1 queue with compute-only support, and it would override the
596
* number of graphics+compute queues to be 0.
597
*/
598
static void
599
anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
600
{
601
int gc_override = -1;
602
int g_override = -1;
603
int c_override = -1;
604
char *env = getenv("ANV_QUEUE_OVERRIDE");
605
606
if (env == NULL)
607
return;
608
609
env = strdup(env);
610
char *save = NULL;
611
char *next = strtok_r(env, ",", &save);
612
while (next != NULL) {
613
if (strncmp(next, "gc=", 3) == 0) {
614
gc_override = strtol(next + 3, NULL, 0);
615
} else if (strncmp(next, "g=", 2) == 0) {
616
g_override = strtol(next + 2, NULL, 0);
617
} else if (strncmp(next, "c=", 2) == 0) {
618
c_override = strtol(next + 2, NULL, 0);
619
} else {
620
mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
621
}
622
next = strtok_r(NULL, ",", &save);
623
}
624
free(env);
625
if (gc_override >= 0)
626
*gc_count = gc_override;
627
if (g_override >= 0)
628
*g_count = g_override;
629
if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
630
mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
631
"Vulkan specification");
632
if (c_override >= 0)
633
*c_count = c_override;
634
}
635
636
static void
637
anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
638
{
639
uint32_t family_count = 0;
640
641
if (pdevice->engine_info) {
642
int gc_count =
643
anv_gem_count_engines(pdevice->engine_info, I915_ENGINE_CLASS_RENDER);
644
int g_count = 0;
645
int c_count = 0;
646
647
anv_override_engine_counts(&gc_count, &g_count, &c_count);
648
649
if (gc_count > 0) {
650
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
651
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
652
VK_QUEUE_COMPUTE_BIT |
653
VK_QUEUE_TRANSFER_BIT,
654
.queueCount = gc_count,
655
.engine_class = I915_ENGINE_CLASS_RENDER,
656
};
657
}
658
if (g_count > 0) {
659
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
660
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
661
VK_QUEUE_TRANSFER_BIT,
662
.queueCount = g_count,
663
.engine_class = I915_ENGINE_CLASS_RENDER,
664
};
665
}
666
if (c_count > 0) {
667
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
668
.queueFlags = VK_QUEUE_COMPUTE_BIT |
669
VK_QUEUE_TRANSFER_BIT,
670
.queueCount = c_count,
671
.engine_class = I915_ENGINE_CLASS_RENDER,
672
};
673
}
674
/* Increase count below when other families are added as a reminder to
675
* increase the ANV_MAX_QUEUE_FAMILIES value.
676
*/
677
STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
678
} else {
679
/* Default to a single render queue */
680
pdevice->queue.families[family_count++] = (struct anv_queue_family) {
681
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
682
VK_QUEUE_COMPUTE_BIT |
683
VK_QUEUE_TRANSFER_BIT,
684
.queueCount = 1,
685
.engine_class = I915_ENGINE_CLASS_RENDER,
686
};
687
family_count = 1;
688
}
689
assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
690
pdevice->queue.family_count = family_count;
691
}
692
693
static VkResult
694
anv_physical_device_try_create(struct anv_instance *instance,
695
drmDevicePtr drm_device,
696
struct anv_physical_device **device_out)
697
{
698
const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
699
const char *path = drm_device->nodes[DRM_NODE_RENDER];
700
VkResult result;
701
int fd;
702
int master_fd = -1;
703
704
brw_process_intel_debug_variable();
705
706
fd = open(path, O_RDWR | O_CLOEXEC);
707
if (fd < 0) {
708
if (errno == ENOMEM) {
709
return vk_errorfi(instance, NULL, VK_ERROR_OUT_OF_HOST_MEMORY,
710
"Unable to open device %s: out of memory", path);
711
}
712
return vk_errorfi(instance, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
713
"Unable to open device %s: %m", path);
714
}
715
716
struct intel_device_info devinfo;
717
if (!intel_get_device_info_from_fd(fd, &devinfo)) {
718
result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
719
goto fail_fd;
720
}
721
722
const char *device_name = intel_get_device_name(devinfo.chipset_id);
723
724
if (devinfo.is_haswell) {
725
mesa_logw("Haswell Vulkan support is incomplete");
726
} else if (devinfo.ver == 7 && !devinfo.is_baytrail) {
727
mesa_logw("Ivy Bridge Vulkan support is incomplete");
728
} else if (devinfo.ver == 7 && devinfo.is_baytrail) {
729
mesa_logw("Bay Trail Vulkan support is incomplete");
730
} else if (devinfo.ver >= 8 && devinfo.ver <= 12) {
731
/* Gfx8-12 fully supported */
732
} else {
733
result = vk_errorfi(instance, NULL, VK_ERROR_INCOMPATIBLE_DRIVER,
734
"Vulkan not yet supported on %s", device_name);
735
goto fail_fd;
736
}
737
738
struct anv_physical_device *device =
739
vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
740
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
741
if (device == NULL) {
742
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
743
goto fail_fd;
744
}
745
746
struct vk_physical_device_dispatch_table dispatch_table;
747
vk_physical_device_dispatch_table_from_entrypoints(
748
&dispatch_table, &anv_physical_device_entrypoints, true);
749
750
result = vk_physical_device_init(&device->vk, &instance->vk,
751
NULL, /* We set up extensions later */
752
&dispatch_table);
753
if (result != VK_SUCCESS) {
754
vk_error(result);
755
goto fail_alloc;
756
}
757
device->instance = instance;
758
759
assert(strlen(path) < ARRAY_SIZE(device->path));
760
snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
761
762
device->info = devinfo;
763
device->name = device_name;
764
765
device->no_hw = device->info.no_hw;
766
if (getenv("INTEL_NO_HW") != NULL)
767
device->no_hw = true;
768
769
device->pci_info.domain = drm_device->businfo.pci->domain;
770
device->pci_info.bus = drm_device->businfo.pci->bus;
771
device->pci_info.device = drm_device->businfo.pci->dev;
772
device->pci_info.function = drm_device->businfo.pci->func;
773
774
device->cmd_parser_version = -1;
775
if (device->info.ver == 7) {
776
device->cmd_parser_version =
777
anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
778
if (device->cmd_parser_version == -1) {
779
result = vk_errorfi(device->instance, NULL,
780
VK_ERROR_INITIALIZATION_FAILED,
781
"failed to get command parser version");
782
goto fail_base;
783
}
784
}
785
786
if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
787
result = vk_errorfi(device->instance, NULL,
788
VK_ERROR_INITIALIZATION_FAILED,
789
"kernel missing gem wait");
790
goto fail_base;
791
}
792
793
if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
794
result = vk_errorfi(device->instance, NULL,
795
VK_ERROR_INITIALIZATION_FAILED,
796
"kernel missing execbuf2");
797
goto fail_base;
798
}
799
800
if (!device->info.has_llc &&
801
anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
802
result = vk_errorfi(device->instance, NULL,
803
VK_ERROR_INITIALIZATION_FAILED,
804
"kernel missing wc mmap");
805
goto fail_base;
806
}
807
808
if (device->info.ver >= 8 && !device->info.is_cherryview &&
809
!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)) {
810
result = vk_errorfi(device->instance, NULL,
811
VK_ERROR_INITIALIZATION_FAILED,
812
"kernel missing softpin");
813
goto fail_alloc;
814
}
815
816
device->has_exec_async = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC);
817
device->has_exec_capture = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE);
818
device->has_exec_fence = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE);
819
device->has_syncobj = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY);
820
device->has_syncobj_wait = device->has_syncobj &&
821
anv_gem_supports_syncobj_wait(fd);
822
device->has_syncobj_wait_available =
823
anv_gem_get_drm_cap(fd, DRM_CAP_SYNCOBJ_TIMELINE) != 0;
824
825
device->has_context_priority = anv_gem_has_context_priority(fd);
826
827
/* Initialize memory regions struct to 0. */
828
memset(&device->vram, 0, sizeof(device->vram));
829
memset(&device->sys, 0, sizeof(device->sys));
830
831
result = anv_physical_device_init_heaps(device, fd);
832
if (result != VK_SUCCESS)
833
goto fail_base;
834
835
device->use_softpin = device->info.ver >= 8 &&
836
!device->info.is_cherryview;
837
assert(device->use_softpin == device->supports_48bit_addresses);
838
839
device->has_context_isolation =
840
anv_gem_get_param(fd, I915_PARAM_HAS_CONTEXT_ISOLATION);
841
842
device->has_exec_timeline =
843
anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES);
844
if (env_var_as_boolean("ANV_QUEUE_THREAD_DISABLE", false))
845
device->has_exec_timeline = false;
846
847
device->has_thread_submit =
848
device->has_syncobj_wait_available && device->has_exec_timeline;
849
850
device->always_use_bindless =
851
env_var_as_boolean("ANV_ALWAYS_BINDLESS", false);
852
853
device->use_call_secondary =
854
device->use_softpin &&
855
!env_var_as_boolean("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
856
857
/* We first got the A64 messages on broadwell and we can only use them if
858
* we can pass addresses directly into the shader which requires softpin.
859
*/
860
device->has_a64_buffer_access = device->info.ver >= 8 &&
861
device->use_softpin;
862
863
/* We first get bindless image access on Skylake.
864
*/
865
device->has_bindless_images = device->info.ver >= 9;
866
867
/* We've had bindless samplers since Ivy Bridge (forever in Vulkan terms)
868
* because it's just a matter of setting the sampler address in the sample
869
* message header. However, we've not bothered to wire it up for vec4 so
870
* we leave it disabled on gfx7.
871
*/
872
device->has_bindless_samplers = device->info.ver >= 8;
873
874
device->has_implicit_ccs = device->info.has_aux_map;
875
876
/* Check if we can read the GPU timestamp register from the CPU */
877
uint64_t u64_ignore;
878
device->has_reg_timestamp = anv_gem_reg_read(fd, TIMESTAMP | I915_REG_READ_8B_WA,
879
&u64_ignore) == 0;
880
881
uint64_t avail_mem;
882
device->has_mem_available = os_get_available_system_memory(&avail_mem);
883
884
device->always_flush_cache =
885
driQueryOptionb(&instance->dri_options, "always_flush_cache");
886
887
device->has_mmap_offset =
888
anv_gem_get_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4;
889
890
/* GENs prior to 8 do not support EU/Subslice info */
891
device->subslice_total = intel_device_info_subslice_total(&device->info);
892
device->eu_total = intel_device_info_eu_total(&device->info);
893
894
if (device->info.is_cherryview) {
895
/* Logical CS threads = EUs per subslice * num threads per EU */
896
uint32_t max_cs_threads =
897
device->eu_total / device->subslice_total * device->info.num_thread_per_eu;
898
899
/* Fuse configurations may give more threads than expected, never less. */
900
if (max_cs_threads > device->info.max_cs_threads)
901
device->info.max_cs_threads = max_cs_threads;
902
}
903
904
device->compiler = brw_compiler_create(NULL, &device->info);
905
if (device->compiler == NULL) {
906
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
907
goto fail_base;
908
}
909
device->compiler->shader_debug_log = compiler_debug_log;
910
device->compiler->shader_perf_log = compiler_perf_log;
911
device->compiler->supports_pull_constants = false;
912
device->compiler->constant_buffer_0_is_relative =
913
device->info.ver < 8 || !device->has_context_isolation;
914
device->compiler->supports_shader_constants = true;
915
device->compiler->compact_params = false;
916
device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
917
918
/* Broadwell PRM says:
919
*
920
* "Before Gfx8, there was a historical configuration control field to
921
* swizzle address bit[6] for in X/Y tiling modes. This was set in three
922
* different places: TILECTL[1:0], ARB_MODE[5:4], and
923
* DISP_ARB_CTL[14:13].
924
*
925
* For Gfx8 and subsequent generations, the swizzle fields are all
926
* reserved, and the CPU's memory controller performs all address
927
* swizzling modifications."
928
*/
929
bool swizzled =
930
device->info.ver < 8 && anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
931
932
isl_device_init(&device->isl_dev, &device->info, swizzled);
933
934
result = anv_physical_device_init_uuids(device);
935
if (result != VK_SUCCESS)
936
goto fail_compiler;
937
938
anv_physical_device_init_disk_cache(device);
939
940
if (instance->vk.enabled_extensions.KHR_display) {
941
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
942
if (master_fd >= 0) {
943
/* prod the device with a GETPARAM call which will fail if
944
* we don't have permission to even render on this device
945
*/
946
if (anv_gem_get_param(master_fd, I915_PARAM_CHIPSET_ID) == 0) {
947
close(master_fd);
948
master_fd = -1;
949
}
950
}
951
}
952
device->master_fd = master_fd;
953
954
device->engine_info = anv_gem_get_engine_info(fd);
955
anv_physical_device_init_queue_families(device);
956
957
result = anv_init_wsi(device);
958
if (result != VK_SUCCESS)
959
goto fail_engine_info;
960
961
anv_physical_device_init_perf(device, fd);
962
963
anv_measure_device_init(device);
964
965
get_device_extensions(device, &device->vk.supported_extensions);
966
967
device->local_fd = fd;
968
969
anv_genX(&device->info, init_physical_device_state)(device);
970
971
*device_out = device;
972
973
struct stat st;
974
975
if (stat(primary_path, &st) == 0) {
976
device->has_master = true;
977
device->master_major = major(st.st_rdev);
978
device->master_minor = minor(st.st_rdev);
979
} else {
980
device->has_master = false;
981
device->master_major = 0;
982
device->master_minor = 0;
983
}
984
985
if (stat(path, &st) == 0) {
986
device->has_local = true;
987
device->local_major = major(st.st_rdev);
988
device->local_minor = minor(st.st_rdev);
989
} else {
990
device->has_local = false;
991
device->local_major = 0;
992
device->local_minor = 0;
993
}
994
995
return VK_SUCCESS;
996
997
fail_engine_info:
998
free(device->engine_info);
999
anv_physical_device_free_disk_cache(device);
1000
fail_compiler:
1001
ralloc_free(device->compiler);
1002
fail_base:
1003
vk_physical_device_finish(&device->vk);
1004
fail_alloc:
1005
vk_free(&instance->vk.alloc, device);
1006
fail_fd:
1007
close(fd);
1008
if (master_fd != -1)
1009
close(master_fd);
1010
return result;
1011
}
1012
1013
static void
1014
anv_physical_device_destroy(struct anv_physical_device *device)
1015
{
1016
anv_finish_wsi(device);
1017
anv_measure_device_destroy(device);
1018
free(device->engine_info);
1019
anv_physical_device_free_disk_cache(device);
1020
ralloc_free(device->compiler);
1021
ralloc_free(device->perf);
1022
close(device->local_fd);
1023
if (device->master_fd >= 0)
1024
close(device->master_fd);
1025
vk_physical_device_finish(&device->vk);
1026
vk_free(&device->instance->vk.alloc, device);
1027
}
1028
1029
VkResult anv_EnumerateInstanceExtensionProperties(
1030
const char* pLayerName,
1031
uint32_t* pPropertyCount,
1032
VkExtensionProperties* pProperties)
1033
{
1034
if (pLayerName)
1035
return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1036
1037
return vk_enumerate_instance_extension_properties(
1038
&instance_extensions, pPropertyCount, pProperties);
1039
}
1040
1041
static void
1042
anv_init_dri_options(struct anv_instance *instance)
1043
{
1044
driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1045
ARRAY_SIZE(anv_dri_options));
1046
driParseConfigFiles(&instance->dri_options,
1047
&instance->available_dri_options, 0, "anv", NULL,
1048
instance->vk.app_info.app_name,
1049
instance->vk.app_info.app_version,
1050
instance->vk.app_info.engine_name,
1051
instance->vk.app_info.engine_version);
1052
}
1053
1054
VkResult anv_CreateInstance(
1055
const VkInstanceCreateInfo* pCreateInfo,
1056
const VkAllocationCallbacks* pAllocator,
1057
VkInstance* pInstance)
1058
{
1059
struct anv_instance *instance;
1060
VkResult result;
1061
1062
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1063
1064
if (pAllocator == NULL)
1065
pAllocator = vk_default_allocator();
1066
1067
instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1068
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1069
if (!instance)
1070
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1071
1072
struct vk_instance_dispatch_table dispatch_table;
1073
vk_instance_dispatch_table_from_entrypoints(
1074
&dispatch_table, &anv_instance_entrypoints, true);
1075
1076
result = vk_instance_init(&instance->vk, &instance_extensions,
1077
&dispatch_table, pCreateInfo, pAllocator);
1078
if (result != VK_SUCCESS) {
1079
vk_free(pAllocator, instance);
1080
return vk_error(result);
1081
}
1082
1083
instance->physical_devices_enumerated = false;
1084
list_inithead(&instance->physical_devices);
1085
1086
instance->pipeline_cache_enabled =
1087
env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
1088
1089
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1090
1091
anv_init_dri_options(instance);
1092
1093
*pInstance = anv_instance_to_handle(instance);
1094
1095
return VK_SUCCESS;
1096
}
1097
1098
void anv_DestroyInstance(
1099
VkInstance _instance,
1100
const VkAllocationCallbacks* pAllocator)
1101
{
1102
ANV_FROM_HANDLE(anv_instance, instance, _instance);
1103
1104
if (!instance)
1105
return;
1106
1107
list_for_each_entry_safe(struct anv_physical_device, pdevice,
1108
&instance->physical_devices, link)
1109
anv_physical_device_destroy(pdevice);
1110
1111
VG(VALGRIND_DESTROY_MEMPOOL(instance));
1112
1113
driDestroyOptionCache(&instance->dri_options);
1114
driDestroyOptionInfo(&instance->available_dri_options);
1115
1116
vk_instance_finish(&instance->vk);
1117
vk_free(&instance->vk.alloc, instance);
1118
}
1119
1120
static VkResult
1121
anv_enumerate_physical_devices(struct anv_instance *instance)
1122
{
1123
if (instance->physical_devices_enumerated)
1124
return VK_SUCCESS;
1125
1126
instance->physical_devices_enumerated = true;
1127
1128
/* TODO: Check for more devices ? */
1129
drmDevicePtr devices[8];
1130
int max_devices;
1131
1132
max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1133
if (max_devices < 1)
1134
return VK_SUCCESS;
1135
1136
VkResult result = VK_SUCCESS;
1137
for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1138
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1139
devices[i]->bustype == DRM_BUS_PCI &&
1140
devices[i]->deviceinfo.pci->vendor_id == 0x8086) {
1141
1142
struct anv_physical_device *pdevice;
1143
result = anv_physical_device_try_create(instance, devices[i],
1144
&pdevice);
1145
/* Incompatible DRM device, skip. */
1146
if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1147
result = VK_SUCCESS;
1148
continue;
1149
}
1150
1151
/* Error creating the physical device, report the error. */
1152
if (result != VK_SUCCESS)
1153
break;
1154
1155
list_addtail(&pdevice->link, &instance->physical_devices);
1156
}
1157
}
1158
drmFreeDevices(devices, max_devices);
1159
1160
/* If we successfully enumerated any devices, call it success */
1161
return result;
1162
}
1163
1164
VkResult anv_EnumeratePhysicalDevices(
1165
VkInstance _instance,
1166
uint32_t* pPhysicalDeviceCount,
1167
VkPhysicalDevice* pPhysicalDevices)
1168
{
1169
ANV_FROM_HANDLE(anv_instance, instance, _instance);
1170
VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
1171
1172
VkResult result = anv_enumerate_physical_devices(instance);
1173
if (result != VK_SUCCESS)
1174
return result;
1175
1176
list_for_each_entry(struct anv_physical_device, pdevice,
1177
&instance->physical_devices, link) {
1178
vk_outarray_append(&out, i) {
1179
*i = anv_physical_device_to_handle(pdevice);
1180
}
1181
}
1182
1183
return vk_outarray_status(&out);
1184
}
1185
1186
VkResult anv_EnumeratePhysicalDeviceGroups(
1187
VkInstance _instance,
1188
uint32_t* pPhysicalDeviceGroupCount,
1189
VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
1190
{
1191
ANV_FROM_HANDLE(anv_instance, instance, _instance);
1192
VK_OUTARRAY_MAKE(out, pPhysicalDeviceGroupProperties,
1193
pPhysicalDeviceGroupCount);
1194
1195
VkResult result = anv_enumerate_physical_devices(instance);
1196
if (result != VK_SUCCESS)
1197
return result;
1198
1199
list_for_each_entry(struct anv_physical_device, pdevice,
1200
&instance->physical_devices, link) {
1201
vk_outarray_append(&out, p) {
1202
p->physicalDeviceCount = 1;
1203
memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1204
p->physicalDevices[0] = anv_physical_device_to_handle(pdevice);
1205
p->subsetAllocation = false;
1206
1207
vk_foreach_struct(ext, p->pNext)
1208
anv_debug_ignored_stype(ext->sType);
1209
}
1210
}
1211
1212
return vk_outarray_status(&out);
1213
}
1214
1215
void anv_GetPhysicalDeviceFeatures(
1216
VkPhysicalDevice physicalDevice,
1217
VkPhysicalDeviceFeatures* pFeatures)
1218
{
1219
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1220
1221
*pFeatures = (VkPhysicalDeviceFeatures) {
1222
.robustBufferAccess = true,
1223
.fullDrawIndexUint32 = true,
1224
.imageCubeArray = true,
1225
.independentBlend = true,
1226
.geometryShader = true,
1227
.tessellationShader = true,
1228
.sampleRateShading = true,
1229
.dualSrcBlend = true,
1230
.logicOp = true,
1231
.multiDrawIndirect = true,
1232
.drawIndirectFirstInstance = true,
1233
.depthClamp = true,
1234
.depthBiasClamp = true,
1235
.fillModeNonSolid = true,
1236
.depthBounds = pdevice->info.ver >= 12,
1237
.wideLines = true,
1238
.largePoints = true,
1239
.alphaToOne = true,
1240
.multiViewport = true,
1241
.samplerAnisotropy = true,
1242
.textureCompressionETC2 = pdevice->info.ver >= 8 ||
1243
pdevice->info.is_baytrail,
1244
.textureCompressionASTC_LDR = pdevice->info.ver >= 9, /* FINISHME CHV */
1245
.textureCompressionBC = true,
1246
.occlusionQueryPrecise = true,
1247
.pipelineStatisticsQuery = true,
1248
.fragmentStoresAndAtomics = true,
1249
.shaderTessellationAndGeometryPointSize = true,
1250
.shaderImageGatherExtended = true,
1251
.shaderStorageImageExtendedFormats = true,
1252
.shaderStorageImageMultisample = false,
1253
.shaderStorageImageReadWithoutFormat = false,
1254
.shaderStorageImageWriteWithoutFormat = true,
1255
.shaderUniformBufferArrayDynamicIndexing = true,
1256
.shaderSampledImageArrayDynamicIndexing = true,
1257
.shaderStorageBufferArrayDynamicIndexing = true,
1258
.shaderStorageImageArrayDynamicIndexing = true,
1259
.shaderClipDistance = true,
1260
.shaderCullDistance = true,
1261
.shaderFloat64 = pdevice->info.ver >= 8 &&
1262
pdevice->info.has_64bit_float,
1263
.shaderInt64 = pdevice->info.ver >= 8,
1264
.shaderInt16 = pdevice->info.ver >= 8,
1265
.shaderResourceMinLod = pdevice->info.ver >= 9,
1266
.variableMultisampleRate = true,
1267
.inheritedQueries = true,
1268
};
1269
1270
/* We can't do image stores in vec4 shaders */
1271
pFeatures->vertexPipelineStoresAndAtomics =
1272
pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1273
pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
1274
1275
struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1276
1277
/* The new DOOM and Wolfenstein games require depthBounds without
1278
* checking for it. They seem to run fine without it so just claim it's
1279
* there and accept the consequences.
1280
*/
1281
if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1282
pFeatures->depthBounds = true;
1283
}
1284
1285
static void
1286
anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
1287
VkPhysicalDeviceVulkan11Features *f)
1288
{
1289
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1290
1291
f->storageBuffer16BitAccess = pdevice->info.ver >= 8;
1292
f->uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8;
1293
f->storagePushConstant16 = pdevice->info.ver >= 8;
1294
f->storageInputOutput16 = false;
1295
f->multiview = true;
1296
f->multiviewGeometryShader = true;
1297
f->multiviewTessellationShader = true;
1298
f->variablePointersStorageBuffer = true;
1299
f->variablePointers = true;
1300
f->protectedMemory = false;
1301
f->samplerYcbcrConversion = true;
1302
f->shaderDrawParameters = true;
1303
}
1304
1305
static void
1306
anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
1307
VkPhysicalDeviceVulkan12Features *f)
1308
{
1309
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1310
1311
f->samplerMirrorClampToEdge = true;
1312
f->drawIndirectCount = true;
1313
f->storageBuffer8BitAccess = pdevice->info.ver >= 8;
1314
f->uniformAndStorageBuffer8BitAccess = pdevice->info.ver >= 8;
1315
f->storagePushConstant8 = pdevice->info.ver >= 8;
1316
f->shaderBufferInt64Atomics = pdevice->info.ver >= 9 &&
1317
pdevice->use_softpin;
1318
f->shaderSharedInt64Atomics = false;
1319
f->shaderFloat16 = pdevice->info.ver >= 8;
1320
f->shaderInt8 = pdevice->info.ver >= 8;
1321
1322
bool descIndexing = pdevice->has_a64_buffer_access &&
1323
pdevice->has_bindless_images;
1324
f->descriptorIndexing = descIndexing;
1325
f->shaderInputAttachmentArrayDynamicIndexing = false;
1326
f->shaderUniformTexelBufferArrayDynamicIndexing = descIndexing;
1327
f->shaderStorageTexelBufferArrayDynamicIndexing = descIndexing;
1328
f->shaderUniformBufferArrayNonUniformIndexing = false;
1329
f->shaderSampledImageArrayNonUniformIndexing = descIndexing;
1330
f->shaderStorageBufferArrayNonUniformIndexing = descIndexing;
1331
f->shaderStorageImageArrayNonUniformIndexing = descIndexing;
1332
f->shaderInputAttachmentArrayNonUniformIndexing = false;
1333
f->shaderUniformTexelBufferArrayNonUniformIndexing = descIndexing;
1334
f->shaderStorageTexelBufferArrayNonUniformIndexing = descIndexing;
1335
f->descriptorBindingUniformBufferUpdateAfterBind = false;
1336
f->descriptorBindingSampledImageUpdateAfterBind = descIndexing;
1337
f->descriptorBindingStorageImageUpdateAfterBind = descIndexing;
1338
f->descriptorBindingStorageBufferUpdateAfterBind = descIndexing;
1339
f->descriptorBindingUniformTexelBufferUpdateAfterBind = descIndexing;
1340
f->descriptorBindingStorageTexelBufferUpdateAfterBind = descIndexing;
1341
f->descriptorBindingUpdateUnusedWhilePending = descIndexing;
1342
f->descriptorBindingPartiallyBound = descIndexing;
1343
f->descriptorBindingVariableDescriptorCount = descIndexing;
1344
f->runtimeDescriptorArray = descIndexing;
1345
1346
f->samplerFilterMinmax = pdevice->info.ver >= 9;
1347
f->scalarBlockLayout = true;
1348
f->imagelessFramebuffer = true;
1349
f->uniformBufferStandardLayout = true;
1350
f->shaderSubgroupExtendedTypes = true;
1351
f->separateDepthStencilLayouts = true;
1352
f->hostQueryReset = true;
1353
f->timelineSemaphore = true;
1354
f->bufferDeviceAddress = pdevice->has_a64_buffer_access;
1355
f->bufferDeviceAddressCaptureReplay = pdevice->has_a64_buffer_access;
1356
f->bufferDeviceAddressMultiDevice = false;
1357
f->vulkanMemoryModel = true;
1358
f->vulkanMemoryModelDeviceScope = true;
1359
f->vulkanMemoryModelAvailabilityVisibilityChains = true;
1360
f->shaderOutputViewportIndex = true;
1361
f->shaderOutputLayer = true;
1362
f->subgroupBroadcastDynamicId = true;
1363
}
1364
1365
void anv_GetPhysicalDeviceFeatures2(
1366
VkPhysicalDevice physicalDevice,
1367
VkPhysicalDeviceFeatures2* pFeatures)
1368
{
1369
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1370
anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1371
1372
VkPhysicalDeviceVulkan11Features core_1_1 = {
1373
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1374
};
1375
anv_get_physical_device_features_1_1(pdevice, &core_1_1);
1376
1377
VkPhysicalDeviceVulkan12Features core_1_2 = {
1378
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1379
};
1380
anv_get_physical_device_features_1_2(pdevice, &core_1_2);
1381
1382
#define CORE_FEATURE(major, minor, feature) \
1383
features->feature = core_##major##_##minor.feature
1384
1385
1386
vk_foreach_struct(ext, pFeatures->pNext) {
1387
switch (ext->sType) {
1388
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1389
VkPhysicalDevice4444FormatsFeaturesEXT *features =
1390
(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1391
features->formatA4R4G4B4 = true;
1392
features->formatA4B4G4R4 = false;
1393
break;
1394
}
1395
1396
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
1397
VkPhysicalDevice8BitStorageFeaturesKHR *features =
1398
(VkPhysicalDevice8BitStorageFeaturesKHR *)ext;
1399
CORE_FEATURE(1, 2, storageBuffer8BitAccess);
1400
CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
1401
CORE_FEATURE(1, 2, storagePushConstant8);
1402
break;
1403
}
1404
1405
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
1406
VkPhysicalDevice16BitStorageFeatures *features =
1407
(VkPhysicalDevice16BitStorageFeatures *)ext;
1408
CORE_FEATURE(1, 1, storageBuffer16BitAccess);
1409
CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
1410
CORE_FEATURE(1, 1, storagePushConstant16);
1411
CORE_FEATURE(1, 1, storageInputOutput16);
1412
break;
1413
}
1414
1415
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1416
VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
1417
features->accelerationStructure = false;
1418
features->accelerationStructureCaptureReplay = false;
1419
features->accelerationStructureIndirectBuild = false;
1420
features->accelerationStructureHostCommands = false;
1421
features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1422
break;
1423
}
1424
1425
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1426
VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
1427
features->bufferDeviceAddress = pdevice->has_a64_buffer_access;
1428
features->bufferDeviceAddressCaptureReplay = false;
1429
features->bufferDeviceAddressMultiDevice = false;
1430
break;
1431
}
1432
1433
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR: {
1434
VkPhysicalDeviceBufferDeviceAddressFeaturesKHR *features = (void *)ext;
1435
CORE_FEATURE(1, 2, bufferDeviceAddress);
1436
CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1437
CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1438
break;
1439
}
1440
1441
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1442
VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1443
(VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1444
features->colorWriteEnable = true;
1445
break;
1446
}
1447
1448
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1449
VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1450
(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1451
features->computeDerivativeGroupQuads = true;
1452
features->computeDerivativeGroupLinear = true;
1453
break;
1454
}
1455
1456
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1457
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1458
(VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1459
features->conditionalRendering = pdevice->info.verx10 >= 75;
1460
features->inheritedConditionalRendering = pdevice->info.verx10 >= 75;
1461
break;
1462
}
1463
1464
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1465
VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1466
(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1467
features->customBorderColors = pdevice->info.ver >= 8;
1468
features->customBorderColorWithoutFormat = pdevice->info.ver >= 8;
1469
break;
1470
}
1471
1472
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1473
VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1474
(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1475
features->depthClipEnable = true;
1476
break;
1477
}
1478
1479
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
1480
VkPhysicalDeviceFloat16Int8FeaturesKHR *features = (void *)ext;
1481
CORE_FEATURE(1, 2, shaderFloat16);
1482
CORE_FEATURE(1, 2, shaderInt8);
1483
break;
1484
}
1485
1486
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
1487
VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
1488
(VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
1489
features->fragmentShaderSampleInterlock = pdevice->info.ver >= 9;
1490
features->fragmentShaderPixelInterlock = pdevice->info.ver >= 9;
1491
features->fragmentShaderShadingRateInterlock = false;
1492
break;
1493
}
1494
1495
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES_EXT: {
1496
VkPhysicalDeviceHostQueryResetFeaturesEXT *features =
1497
(VkPhysicalDeviceHostQueryResetFeaturesEXT *)ext;
1498
CORE_FEATURE(1, 2, hostQueryReset);
1499
break;
1500
}
1501
1502
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT: {
1503
VkPhysicalDeviceDescriptorIndexingFeaturesEXT *features =
1504
(VkPhysicalDeviceDescriptorIndexingFeaturesEXT *)ext;
1505
CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
1506
CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
1507
CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
1508
CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
1509
CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
1510
CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
1511
CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
1512
CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
1513
CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
1514
CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
1515
CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
1516
CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
1517
CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
1518
CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
1519
CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
1520
CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
1521
CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
1522
CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
1523
CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
1524
CORE_FEATURE(1, 2, runtimeDescriptorArray);
1525
break;
1526
}
1527
1528
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1529
VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1530
(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1531
features->attachmentFragmentShadingRate = false;
1532
features->pipelineFragmentShadingRate = true;
1533
features->primitiveFragmentShadingRate = false;
1534
break;
1535
}
1536
1537
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1538
VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1539
(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1540
features->robustImageAccess = true;
1541
break;
1542
}
1543
1544
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1545
VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1546
(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1547
features->indexTypeUint8 = true;
1548
break;
1549
}
1550
1551
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1552
VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1553
(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1554
features->inlineUniformBlock = true;
1555
features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1556
break;
1557
}
1558
1559
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1560
VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1561
(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1562
features->rectangularLines = true;
1563
features->bresenhamLines = true;
1564
/* Support for Smooth lines with MSAA was removed on gfx11. From the
1565
* BSpec section "Multisample ModesState" table for "AA Line Support
1566
* Requirements":
1567
*
1568
* GFX10:BUG:######## NUM_MULTISAMPLES == 1
1569
*
1570
* Fortunately, this isn't a case most people care about.
1571
*/
1572
features->smoothLines = pdevice->info.ver < 10;
1573
features->stippledRectangularLines = false;
1574
features->stippledBresenhamLines = true;
1575
features->stippledSmoothLines = false;
1576
break;
1577
}
1578
1579
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
1580
VkPhysicalDeviceMultiviewFeatures *features =
1581
(VkPhysicalDeviceMultiviewFeatures *)ext;
1582
CORE_FEATURE(1, 1, multiview);
1583
CORE_FEATURE(1, 1, multiviewGeometryShader);
1584
CORE_FEATURE(1, 1, multiviewTessellationShader);
1585
break;
1586
}
1587
1588
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR: {
1589
VkPhysicalDeviceImagelessFramebufferFeaturesKHR *features =
1590
(VkPhysicalDeviceImagelessFramebufferFeaturesKHR *)ext;
1591
CORE_FEATURE(1, 2, imagelessFramebuffer);
1592
break;
1593
}
1594
1595
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1596
VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1597
(VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1598
feature->performanceCounterQueryPools = true;
1599
/* HW only supports a single configuration at a time. */
1600
feature->performanceCounterMultipleQueryPools = false;
1601
break;
1602
}
1603
1604
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1605
VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1606
(VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1607
features->pipelineCreationCacheControl = true;
1608
break;
1609
}
1610
1611
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1612
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1613
(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1614
features->pipelineExecutableInfo = true;
1615
break;
1616
}
1617
1618
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1619
VkPhysicalDevicePrivateDataFeaturesEXT *features = (void *)ext;
1620
features->privateData = true;
1621
break;
1622
}
1623
1624
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
1625
VkPhysicalDeviceProtectedMemoryFeatures *features = (void *)ext;
1626
CORE_FEATURE(1, 1, protectedMemory);
1627
break;
1628
}
1629
1630
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1631
VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1632
(VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1633
features->provokingVertexLast = true;
1634
features->transformFeedbackPreservesProvokingVertex = true;
1635
break;
1636
}
1637
1638
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1639
VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1640
features->robustBufferAccess2 = true;
1641
features->robustImageAccess2 = true;
1642
features->nullDescriptor = true;
1643
break;
1644
}
1645
1646
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
1647
VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
1648
(VkPhysicalDeviceSamplerYcbcrConversionFeatures *) ext;
1649
CORE_FEATURE(1, 1, samplerYcbcrConversion);
1650
break;
1651
}
1652
1653
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES_EXT: {
1654
VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *features =
1655
(VkPhysicalDeviceScalarBlockLayoutFeaturesEXT *)ext;
1656
CORE_FEATURE(1, 2, scalarBlockLayout);
1657
break;
1658
}
1659
1660
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
1661
VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
1662
(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
1663
CORE_FEATURE(1, 2, separateDepthStencilLayouts);
1664
break;
1665
}
1666
1667
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1668
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
1669
features->shaderBufferFloat32Atomics = true;
1670
features->shaderBufferFloat32AtomicAdd = false;
1671
features->shaderBufferFloat64Atomics = false;
1672
features->shaderBufferFloat64AtomicAdd = false;
1673
features->shaderSharedFloat32Atomics = true;
1674
features->shaderSharedFloat32AtomicAdd = false;
1675
features->shaderSharedFloat64Atomics = false;
1676
features->shaderSharedFloat64AtomicAdd = false;
1677
features->shaderImageFloat32Atomics = true;
1678
features->shaderImageFloat32AtomicAdd = false;
1679
features->sparseImageFloat32Atomics = false;
1680
features->sparseImageFloat32AtomicAdd = false;
1681
break;
1682
}
1683
1684
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
1685
VkPhysicalDeviceShaderAtomicInt64FeaturesKHR *features = (void *)ext;
1686
CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
1687
CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
1688
break;
1689
}
1690
1691
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1692
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features = (void *)ext;
1693
features->shaderDemoteToHelperInvocation = true;
1694
break;
1695
}
1696
1697
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1698
VkPhysicalDeviceShaderClockFeaturesKHR *features =
1699
(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1700
features->shaderSubgroupClock = true;
1701
features->shaderDeviceClock = false;
1702
break;
1703
}
1704
1705
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
1706
VkPhysicalDeviceShaderDrawParametersFeatures *features = (void *)ext;
1707
CORE_FEATURE(1, 1, shaderDrawParameters);
1708
break;
1709
}
1710
1711
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1712
VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1713
(VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1714
features->shaderIntegerFunctions2 = true;
1715
break;
1716
}
1717
1718
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES_KHR: {
1719
VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR *features =
1720
(VkPhysicalDeviceShaderSubgroupExtendedTypesFeaturesKHR *)ext;
1721
CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
1722
break;
1723
}
1724
1725
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1726
VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1727
(VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1728
features->shaderSubgroupUniformControlFlow = true;
1729
break;
1730
}
1731
1732
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1733
VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1734
(VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1735
features->shaderTerminateInvocation = true;
1736
break;
1737
}
1738
1739
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1740
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1741
(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1742
features->subgroupSizeControl = true;
1743
features->computeFullSubgroups = true;
1744
break;
1745
}
1746
1747
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1748
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1749
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1750
features->texelBufferAlignment = true;
1751
break;
1752
}
1753
1754
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR: {
1755
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
1756
(VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
1757
CORE_FEATURE(1, 2, timelineSemaphore);
1758
break;
1759
}
1760
1761
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
1762
VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
1763
CORE_FEATURE(1, 1, variablePointersStorageBuffer);
1764
CORE_FEATURE(1, 1, variablePointers);
1765
break;
1766
}
1767
1768
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1769
VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1770
(VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1771
features->transformFeedback = true;
1772
features->geometryStreams = true;
1773
break;
1774
}
1775
1776
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES_KHR: {
1777
VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *features =
1778
(VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR *)ext;
1779
CORE_FEATURE(1, 2, uniformBufferStandardLayout);
1780
break;
1781
}
1782
1783
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1784
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1785
(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1786
features->vertexAttributeInstanceRateDivisor = true;
1787
features->vertexAttributeInstanceRateZeroDivisor = true;
1788
break;
1789
}
1790
1791
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES:
1792
anv_get_physical_device_features_1_1(pdevice, (void *)ext);
1793
break;
1794
1795
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES:
1796
anv_get_physical_device_features_1_2(pdevice, (void *)ext);
1797
break;
1798
1799
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
1800
VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features = (void *)ext;
1801
CORE_FEATURE(1, 2, vulkanMemoryModel);
1802
CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
1803
CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
1804
break;
1805
}
1806
1807
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1808
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1809
(VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1810
features->workgroupMemoryExplicitLayout = true;
1811
features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1812
features->workgroupMemoryExplicitLayout8BitAccess = true;
1813
features->workgroupMemoryExplicitLayout16BitAccess = true;
1814
break;
1815
}
1816
1817
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1818
VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1819
(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1820
features->ycbcrImageArrays = true;
1821
break;
1822
}
1823
1824
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1825
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1826
(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1827
features->extendedDynamicState = true;
1828
break;
1829
}
1830
1831
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1832
VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1833
(VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1834
features->extendedDynamicState2 = true;
1835
features->extendedDynamicState2LogicOp = true;
1836
features->extendedDynamicState2PatchControlPoints = false;
1837
break;
1838
}
1839
1840
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
1841
VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
1842
(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
1843
features->shaderZeroInitializeWorkgroupMemory = true;
1844
break;
1845
}
1846
1847
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1848
VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1849
features->multiDraw = true;
1850
break;
1851
}
1852
1853
default:
1854
anv_debug_ignored_stype(ext->sType);
1855
break;
1856
}
1857
}
1858
1859
#undef CORE_FEATURE
1860
}
1861
1862
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
1863
1864
#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1865
#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
1866
1867
#define MAX_CUSTOM_BORDER_COLORS 4096
1868
1869
void anv_GetPhysicalDeviceProperties(
1870
VkPhysicalDevice physicalDevice,
1871
VkPhysicalDeviceProperties* pProperties)
1872
{
1873
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1874
const struct intel_device_info *devinfo = &pdevice->info;
1875
1876
/* See assertions made when programming the buffer surface state. */
1877
const uint32_t max_raw_buffer_sz = devinfo->ver >= 7 ?
1878
(1ul << 30) : (1ul << 27);
1879
1880
const uint32_t max_ssbos = pdevice->has_a64_buffer_access ? UINT16_MAX : 64;
1881
const uint32_t max_textures =
1882
pdevice->has_bindless_images ? UINT16_MAX : 128;
1883
const uint32_t max_samplers =
1884
pdevice->has_bindless_samplers ? UINT16_MAX :
1885
(devinfo->verx10 >= 75) ? 128 : 16;
1886
const uint32_t max_images =
1887
pdevice->has_bindless_images ? UINT16_MAX : MAX_IMAGES;
1888
1889
/* If we can use bindless for everything, claim a high per-stage limit,
1890
* otherwise use the binding table size, minus the slots reserved for
1891
* render targets and one slot for the descriptor buffer. */
1892
const uint32_t max_per_stage =
1893
pdevice->has_bindless_images && pdevice->has_a64_buffer_access
1894
? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
1895
1896
/* Limit max_threads to 64 for the GPGPU_WALKER command */
1897
const uint32_t max_workgroup_size = 32 * MIN2(64, devinfo->max_cs_threads);
1898
1899
VkSampleCountFlags sample_counts =
1900
isl_device_get_sample_counts(&pdevice->isl_dev);
1901
1902
1903
VkPhysicalDeviceLimits limits = {
1904
.maxImageDimension1D = (1 << 14),
1905
.maxImageDimension2D = (1 << 14),
1906
.maxImageDimension3D = (1 << 11),
1907
.maxImageDimensionCube = (1 << 14),
1908
.maxImageArrayLayers = (1 << 11),
1909
.maxTexelBufferElements = 128 * 1024 * 1024,
1910
.maxUniformBufferRange = (1ul << 27),
1911
.maxStorageBufferRange = max_raw_buffer_sz,
1912
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1913
.maxMemoryAllocationCount = UINT32_MAX,
1914
.maxSamplerAllocationCount = 64 * 1024,
1915
.bufferImageGranularity = 64, /* A cache line */
1916
.sparseAddressSpaceSize = 0,
1917
.maxBoundDescriptorSets = MAX_SETS,
1918
.maxPerStageDescriptorSamplers = max_samplers,
1919
.maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1920
.maxPerStageDescriptorStorageBuffers = max_ssbos,
1921
.maxPerStageDescriptorSampledImages = max_textures,
1922
.maxPerStageDescriptorStorageImages = max_images,
1923
.maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1924
.maxPerStageResources = max_per_stage,
1925
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1926
.maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1927
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1928
.maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1929
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1930
.maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1931
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1932
.maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1933
.maxVertexInputAttributes = MAX_VBS,
1934
.maxVertexInputBindings = MAX_VBS,
1935
.maxVertexInputAttributeOffset = 2047,
1936
.maxVertexInputBindingStride = 2048,
1937
.maxVertexOutputComponents = 128,
1938
.maxTessellationGenerationLevel = 64,
1939
.maxTessellationPatchSize = 32,
1940
.maxTessellationControlPerVertexInputComponents = 128,
1941
.maxTessellationControlPerVertexOutputComponents = 128,
1942
.maxTessellationControlPerPatchOutputComponents = 128,
1943
.maxTessellationControlTotalOutputComponents = 2048,
1944
.maxTessellationEvaluationInputComponents = 128,
1945
.maxTessellationEvaluationOutputComponents = 128,
1946
.maxGeometryShaderInvocations = 32,
1947
.maxGeometryInputComponents = devinfo->ver >= 8 ? 128 : 64,
1948
.maxGeometryOutputComponents = 128,
1949
.maxGeometryOutputVertices = 256,
1950
.maxGeometryTotalOutputComponents = 1024,
1951
.maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1952
.maxFragmentOutputAttachments = 8,
1953
.maxFragmentDualSrcAttachments = 1,
1954
.maxFragmentCombinedOutputResources = 8,
1955
.maxComputeSharedMemorySize = 64 * 1024,
1956
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1957
.maxComputeWorkGroupInvocations = max_workgroup_size,
1958
.maxComputeWorkGroupSize = {
1959
max_workgroup_size,
1960
max_workgroup_size,
1961
max_workgroup_size,
1962
},
1963
.subPixelPrecisionBits = 8,
1964
.subTexelPrecisionBits = 8,
1965
.mipmapPrecisionBits = 8,
1966
.maxDrawIndexedIndexValue = UINT32_MAX,
1967
.maxDrawIndirectCount = UINT32_MAX,
1968
.maxSamplerLodBias = 16,
1969
.maxSamplerAnisotropy = 16,
1970
.maxViewports = MAX_VIEWPORTS,
1971
.maxViewportDimensions = { (1 << 14), (1 << 14) },
1972
.viewportBoundsRange = { INT16_MIN, INT16_MAX },
1973
.viewportSubPixelBits = 13, /* We take a float? */
1974
.minMemoryMapAlignment = 4096, /* A page */
1975
/* The dataport requires texel alignment so we need to assume a worst
1976
* case of R32G32B32A32 which is 16 bytes.
1977
*/
1978
.minTexelBufferOffsetAlignment = 16,
1979
.minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1980
.minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1981
.minTexelOffset = -8,
1982
.maxTexelOffset = 7,
1983
.minTexelGatherOffset = -32,
1984
.maxTexelGatherOffset = 31,
1985
.minInterpolationOffset = -0.5,
1986
.maxInterpolationOffset = 0.4375,
1987
.subPixelInterpolationOffsetBits = 4,
1988
.maxFramebufferWidth = (1 << 14),
1989
.maxFramebufferHeight = (1 << 14),
1990
.maxFramebufferLayers = (1 << 11),
1991
.framebufferColorSampleCounts = sample_counts,
1992
.framebufferDepthSampleCounts = sample_counts,
1993
.framebufferStencilSampleCounts = sample_counts,
1994
.framebufferNoAttachmentsSampleCounts = sample_counts,
1995
.maxColorAttachments = MAX_RTS,
1996
.sampledImageColorSampleCounts = sample_counts,
1997
.sampledImageIntegerSampleCounts = sample_counts,
1998
.sampledImageDepthSampleCounts = sample_counts,
1999
.sampledImageStencilSampleCounts = sample_counts,
2000
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
2001
.maxSampleMaskWords = 1,
2002
.timestampComputeAndGraphics = true,
2003
.timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
2004
.maxClipDistances = 8,
2005
.maxCullDistances = 8,
2006
.maxCombinedClipAndCullDistances = 8,
2007
.discreteQueuePriorities = 2,
2008
.pointSizeRange = { 0.125, 255.875 },
2009
.lineWidthRange = {
2010
0.0,
2011
(devinfo->ver >= 9 || devinfo->is_cherryview) ?
2012
2047.9921875 : 7.9921875,
2013
},
2014
.pointSizeGranularity = (1.0 / 8.0),
2015
.lineWidthGranularity = (1.0 / 128.0),
2016
.strictLines = false,
2017
.standardSampleLocations = true,
2018
.optimalBufferCopyOffsetAlignment = 128,
2019
.optimalBufferCopyRowPitchAlignment = 128,
2020
.nonCoherentAtomSize = 64,
2021
};
2022
2023
*pProperties = (VkPhysicalDeviceProperties) {
2024
.apiVersion = ANV_API_VERSION,
2025
.driverVersion = vk_get_driver_version(),
2026
.vendorID = 0x8086,
2027
.deviceID = pdevice->info.chipset_id,
2028
.deviceType = pdevice->info.has_local_mem ?
2029
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
2030
VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
2031
.limits = limits,
2032
.sparseProperties = {0}, /* Broadwell doesn't do sparse. */
2033
};
2034
2035
snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
2036
"%s", pdevice->name);
2037
memcpy(pProperties->pipelineCacheUUID,
2038
pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
2039
}
2040
2041
static void
2042
anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
2043
VkPhysicalDeviceVulkan11Properties *p)
2044
{
2045
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
2046
2047
memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
2048
memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
2049
memset(p->deviceLUID, 0, VK_LUID_SIZE);
2050
p->deviceNodeMask = 0;
2051
p->deviceLUIDValid = false;
2052
2053
p->subgroupSize = BRW_SUBGROUP_SIZE;
2054
VkShaderStageFlags scalar_stages = 0;
2055
for (unsigned stage = 0; stage < MESA_VULKAN_SHADER_STAGES; stage++) {
2056
if (pdevice->compiler->scalar_stage[stage])
2057
scalar_stages |= mesa_to_vk_shader_stage(stage);
2058
}
2059
p->subgroupSupportedStages = scalar_stages;
2060
p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
2061
VK_SUBGROUP_FEATURE_VOTE_BIT |
2062
VK_SUBGROUP_FEATURE_BALLOT_BIT |
2063
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
2064
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
2065
VK_SUBGROUP_FEATURE_QUAD_BIT;
2066
if (pdevice->info.ver >= 8) {
2067
/* TODO: There's no technical reason why these can't be made to
2068
* work on gfx7 but they don't at the moment so it's best to leave
2069
* the feature disabled than enabled and broken.
2070
*/
2071
p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
2072
VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
2073
}
2074
p->subgroupQuadOperationsInAllStages = pdevice->info.ver >= 8;
2075
2076
p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
2077
p->maxMultiviewViewCount = 16;
2078
p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
2079
p->protectedNoFault = false;
2080
/* This value doesn't matter for us today as our per-stage descriptors are
2081
* the real limit.
2082
*/
2083
p->maxPerSetDescriptors = 1024;
2084
p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
2085
}
2086
2087
static void
2088
anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
2089
VkPhysicalDeviceVulkan12Properties *p)
2090
{
2091
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2092
2093
p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA_KHR;
2094
memset(p->driverName, 0, sizeof(p->driverName));
2095
snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE_KHR,
2096
"Intel open-source Mesa driver");
2097
memset(p->driverInfo, 0, sizeof(p->driverInfo));
2098
snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
2099
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
2100
p->conformanceVersion = (VkConformanceVersionKHR) {
2101
.major = 1,
2102
.minor = 2,
2103
.subminor = 0,
2104
.patch = 0,
2105
};
2106
2107
p->denormBehaviorIndependence =
2108
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
2109
p->roundingModeIndependence =
2110
VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE_KHR;
2111
2112
/* Broadwell does not support HF denorms and there are restrictions
2113
* other gens. According to Kabylake's PRM:
2114
*
2115
* "math - Extended Math Function
2116
* [...]
2117
* Restriction : Half-float denorms are always retained."
2118
*/
2119
p->shaderDenormFlushToZeroFloat16 = false;
2120
p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8;
2121
p->shaderRoundingModeRTEFloat16 = true;
2122
p->shaderRoundingModeRTZFloat16 = true;
2123
p->shaderSignedZeroInfNanPreserveFloat16 = true;
2124
2125
p->shaderDenormFlushToZeroFloat32 = true;
2126
p->shaderDenormPreserveFloat32 = true;
2127
p->shaderRoundingModeRTEFloat32 = true;
2128
p->shaderRoundingModeRTZFloat32 = true;
2129
p->shaderSignedZeroInfNanPreserveFloat32 = true;
2130
2131
p->shaderDenormFlushToZeroFloat64 = true;
2132
p->shaderDenormPreserveFloat64 = true;
2133
p->shaderRoundingModeRTEFloat64 = true;
2134
p->shaderRoundingModeRTZFloat64 = true;
2135
p->shaderSignedZeroInfNanPreserveFloat64 = true;
2136
2137
/* It's a bit hard to exactly map our implementation to the limits
2138
* described by Vulkan. The bindless surface handle in the extended
2139
* message descriptors is 20 bits and it's an index into the table of
2140
* RENDER_SURFACE_STATE structs that starts at bindless surface base
2141
* address. This means that we can have at must 1M surface states
2142
* allocated at any given time. Since most image views take two
2143
* descriptors, this means we have a limit of about 500K image views.
2144
*
2145
* However, since we allocate surface states at vkCreateImageView time,
2146
* this means our limit is actually something on the order of 500K image
2147
* views allocated at any time. The actual limit describe by Vulkan, on
2148
* the other hand, is a limit of how many you can have in a descriptor set.
2149
* Assuming anyone using 1M descriptors will be using the same image view
2150
* twice a bunch of times (or a bunch of null descriptors), we can safely
2151
* advertise a larger limit here.
2152
*/
2153
const unsigned max_bindless_views = 1 << 20;
2154
p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
2155
p->shaderUniformBufferArrayNonUniformIndexingNative = false;
2156
p->shaderSampledImageArrayNonUniformIndexingNative = false;
2157
p->shaderStorageBufferArrayNonUniformIndexingNative = true;
2158
p->shaderStorageImageArrayNonUniformIndexingNative = false;
2159
p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2160
p->robustBufferAccessUpdateAfterBind = true;
2161
p->quadDivergentImplicitLod = false;
2162
p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
2163
p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2164
p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
2165
p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
2166
p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
2167
p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
2168
p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
2169
p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
2170
p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2171
p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2172
p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
2173
p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2174
p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
2175
p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
2176
p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
2177
2178
/* We support all of the depth resolve modes */
2179
p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
2180
VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
2181
VK_RESOLVE_MODE_MIN_BIT_KHR |
2182
VK_RESOLVE_MODE_MAX_BIT_KHR;
2183
/* Average doesn't make sense for stencil so we don't support that */
2184
p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR;
2185
if (pdevice->info.ver >= 8) {
2186
/* The advanced stencil resolve modes currently require stencil
2187
* sampling be supported by the hardware.
2188
*/
2189
p->supportedStencilResolveModes |= VK_RESOLVE_MODE_MIN_BIT_KHR |
2190
VK_RESOLVE_MODE_MAX_BIT_KHR;
2191
}
2192
p->independentResolveNone = true;
2193
p->independentResolve = true;
2194
2195
p->filterMinmaxSingleComponentFormats = pdevice->info.ver >= 9;
2196
p->filterMinmaxImageComponentMapping = pdevice->info.ver >= 9;
2197
2198
p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2199
2200
p->framebufferIntegerColorSampleCounts =
2201
isl_device_get_sample_counts(&pdevice->isl_dev);
2202
}
2203
2204
void anv_GetPhysicalDeviceProperties2(
2205
VkPhysicalDevice physicalDevice,
2206
VkPhysicalDeviceProperties2* pProperties)
2207
{
2208
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2209
2210
anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2211
2212
VkPhysicalDeviceVulkan11Properties core_1_1 = {
2213
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2214
};
2215
anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2216
2217
VkPhysicalDeviceVulkan12Properties core_1_2 = {
2218
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2219
};
2220
anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2221
2222
#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
2223
memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
2224
sizeof(core_##major##_##minor.core_property))
2225
2226
#define CORE_PROPERTY(major, minor, property) \
2227
CORE_RENAMED_PROPERTY(major, minor, property, property)
2228
2229
vk_foreach_struct(ext, pProperties->pNext) {
2230
switch (ext->sType) {
2231
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2232
VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = (void *)ext;
2233
props->maxGeometryCount = (1u << 24) - 1;
2234
props->maxInstanceCount = (1u << 24) - 1;
2235
props->maxPrimitiveCount = (1u << 29) - 1;
2236
props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
2237
props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
2238
props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
2239
props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
2240
props->minAccelerationStructureScratchOffsetAlignment = 64;
2241
break;
2242
}
2243
2244
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2245
/* TODO: Real limits */
2246
VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2247
(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2248
/* There's nothing in the public docs about this value as far as I
2249
* can tell. However, this is the value the Windows driver reports
2250
* and there's a comment on a rejected HW feature in the internal
2251
* docs that says:
2252
*
2253
* "This is similar to conservative rasterization, except the
2254
* primitive area is not extended by 1/512 and..."
2255
*
2256
* That's a bit of an obtuse reference but it's the best we've got
2257
* for now.
2258
*/
2259
properties->primitiveOverestimationSize = 1.0f / 512.0f;
2260
properties->maxExtraPrimitiveOverestimationSize = 0.0f;
2261
properties->extraPrimitiveOverestimationSizeGranularity = 0.0f;
2262
properties->primitiveUnderestimation = false;
2263
properties->conservativePointAndLineRasterization = false;
2264
properties->degenerateTrianglesRasterized = true;
2265
properties->degenerateLinesRasterized = false;
2266
properties->fullyCoveredFragmentShaderInputVariable = false;
2267
properties->conservativeRasterizationPostDepthCoverage = true;
2268
break;
2269
}
2270
2271
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2272
VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
2273
(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2274
properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
2275
break;
2276
}
2277
2278
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES_KHR: {
2279
VkPhysicalDeviceDepthStencilResolvePropertiesKHR *properties =
2280
(VkPhysicalDeviceDepthStencilResolvePropertiesKHR *)ext;
2281
CORE_PROPERTY(1, 2, supportedDepthResolveModes);
2282
CORE_PROPERTY(1, 2, supportedStencilResolveModes);
2283
CORE_PROPERTY(1, 2, independentResolveNone);
2284
CORE_PROPERTY(1, 2, independentResolve);
2285
break;
2286
}
2287
2288
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT: {
2289
VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties =
2290
(VkPhysicalDeviceDescriptorIndexingPropertiesEXT *)ext;
2291
CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
2292
CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
2293
CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
2294
CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
2295
CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
2296
CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
2297
CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
2298
CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
2299
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
2300
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
2301
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
2302
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
2303
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
2304
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
2305
CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
2306
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
2307
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
2308
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
2309
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
2310
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
2311
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
2312
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
2313
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
2314
break;
2315
}
2316
2317
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2318
VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2319
(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2320
/* Those must be 0 if attachmentFragmentShadingRate is not
2321
* supported.
2322
*/
2323
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2324
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2325
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
2326
2327
props->primitiveFragmentShadingRateWithMultipleViewports = false;
2328
props->layeredShadingRateAttachments = false;
2329
props->fragmentShadingRateNonTrivialCombinerOps = false;
2330
props->maxFragmentSize = (VkExtent2D) { 4, 4 };
2331
props->maxFragmentSizeAspectRatio = 4;
2332
props->maxFragmentShadingRateCoverageSamples = 4 * 4 * 16;
2333
props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_16_BIT;
2334
props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2335
props->fragmentShadingRateWithSampleMask = true;
2336
props->fragmentShadingRateWithShaderSampleMask = false;
2337
props->fragmentShadingRateWithConservativeRasterization = true;
2338
props->fragmentShadingRateWithFragmentShaderInterlock = true;
2339
props->fragmentShadingRateWithCustomSampleLocations = true;
2340
props->fragmentShadingRateStrictMultiplyCombiner = false;
2341
break;
2342
}
2343
2344
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
2345
VkPhysicalDeviceDriverPropertiesKHR *properties =
2346
(VkPhysicalDeviceDriverPropertiesKHR *) ext;
2347
CORE_PROPERTY(1, 2, driverID);
2348
CORE_PROPERTY(1, 2, driverName);
2349
CORE_PROPERTY(1, 2, driverInfo);
2350
CORE_PROPERTY(1, 2, conformanceVersion);
2351
break;
2352
}
2353
2354
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2355
VkPhysicalDeviceDrmPropertiesEXT *props =
2356
(VkPhysicalDeviceDrmPropertiesEXT *)ext;
2357
2358
props->hasPrimary = pdevice->has_master;
2359
props->primaryMajor = pdevice->master_major;
2360
props->primaryMinor = pdevice->master_minor;
2361
2362
props->hasRender = pdevice->has_local;
2363
props->renderMajor = pdevice->local_major;
2364
props->renderMinor = pdevice->local_minor;
2365
2366
break;
2367
}
2368
2369
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2370
VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
2371
(VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
2372
/* Userptr needs page aligned memory. */
2373
props->minImportedHostPointerAlignment = 4096;
2374
break;
2375
}
2376
2377
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
2378
VkPhysicalDeviceIDProperties *properties =
2379
(VkPhysicalDeviceIDProperties *)ext;
2380
CORE_PROPERTY(1, 1, deviceUUID);
2381
CORE_PROPERTY(1, 1, driverUUID);
2382
CORE_PROPERTY(1, 1, deviceLUID);
2383
CORE_PROPERTY(1, 1, deviceLUIDValid);
2384
break;
2385
}
2386
2387
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2388
VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2389
(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2390
props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2391
props->maxPerStageDescriptorInlineUniformBlocks =
2392
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2393
props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2394
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2395
props->maxDescriptorSetInlineUniformBlocks =
2396
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2397
props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
2398
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2399
break;
2400
}
2401
2402
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2403
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2404
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2405
/* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
2406
* Sampling Rules - Legacy Mode", it says the following:
2407
*
2408
* "Note that the device divides a pixel into a 16x16 array of
2409
* subpixels, referenced by their upper left corners."
2410
*
2411
* This is the only known reference in the PRMs to the subpixel
2412
* precision of line rasterization and a "16x16 array of subpixels"
2413
* implies 4 subpixel precision bits. Empirical testing has shown
2414
* that 4 subpixel precision bits applies to all line rasterization
2415
* types.
2416
*/
2417
props->lineSubPixelPrecisionBits = 4;
2418
break;
2419
}
2420
2421
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
2422
VkPhysicalDeviceMaintenance3Properties *properties =
2423
(VkPhysicalDeviceMaintenance3Properties *)ext;
2424
/* This value doesn't matter for us today as our per-stage
2425
* descriptors are the real limit.
2426
*/
2427
CORE_PROPERTY(1, 1, maxPerSetDescriptors);
2428
CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
2429
break;
2430
}
2431
2432
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
2433
VkPhysicalDeviceMultiviewProperties *properties =
2434
(VkPhysicalDeviceMultiviewProperties *)ext;
2435
CORE_PROPERTY(1, 1, maxMultiviewViewCount);
2436
CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
2437
break;
2438
}
2439
2440
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2441
VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2442
(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2443
properties->pciDomain = pdevice->pci_info.domain;
2444
properties->pciBus = pdevice->pci_info.bus;
2445
properties->pciDevice = pdevice->pci_info.device;
2446
properties->pciFunction = pdevice->pci_info.function;
2447
break;
2448
}
2449
2450
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
2451
VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
2452
(VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
2453
/* We could support this by spawning a shader to do the equation
2454
* normalization.
2455
*/
2456
properties->allowCommandBufferQueryCopies = false;
2457
break;
2458
}
2459
2460
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
2461
VkPhysicalDevicePointClippingProperties *properties =
2462
(VkPhysicalDevicePointClippingProperties *) ext;
2463
CORE_PROPERTY(1, 1, pointClippingBehavior);
2464
break;
2465
}
2466
2467
#pragma GCC diagnostic push
2468
#pragma GCC diagnostic ignored "-Wswitch"
2469
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2470
VkPhysicalDevicePresentationPropertiesANDROID *props =
2471
(VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2472
props->sharedImage = VK_FALSE;
2473
break;
2474
}
2475
#pragma GCC diagnostic pop
2476
2477
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
2478
VkPhysicalDeviceProtectedMemoryProperties *properties =
2479
(VkPhysicalDeviceProtectedMemoryProperties *)ext;
2480
CORE_PROPERTY(1, 1, protectedNoFault);
2481
break;
2482
}
2483
2484
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2485
VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
2486
(VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2487
properties->provokingVertexModePerPipeline = true;
2488
properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
2489
break;
2490
}
2491
2492
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2493
VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2494
(VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
2495
properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2496
break;
2497
}
2498
2499
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2500
VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
2501
properties->robustStorageBufferAccessSizeAlignment =
2502
ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
2503
properties->robustUniformBufferAccessSizeAlignment =
2504
ANV_UBO_ALIGNMENT;
2505
break;
2506
}
2507
2508
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES_EXT: {
2509
VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *properties =
2510
(VkPhysicalDeviceSamplerFilterMinmaxPropertiesEXT *)ext;
2511
CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
2512
CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
2513
break;
2514
}
2515
2516
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
2517
VkPhysicalDeviceSubgroupProperties *properties = (void *)ext;
2518
CORE_PROPERTY(1, 1, subgroupSize);
2519
CORE_RENAMED_PROPERTY(1, 1, supportedStages,
2520
subgroupSupportedStages);
2521
CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
2522
subgroupSupportedOperations);
2523
CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
2524
subgroupQuadOperationsInAllStages);
2525
break;
2526
}
2527
2528
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2529
VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2530
(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2531
STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
2532
props->minSubgroupSize = 8;
2533
props->maxSubgroupSize = 32;
2534
/* Limit max_threads to 64 for the GPGPU_WALKER command. */
2535
props->maxComputeWorkgroupSubgroups = MIN2(64, pdevice->info.max_cs_threads);
2536
props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2537
break;
2538
}
2539
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES_KHR : {
2540
VkPhysicalDeviceFloatControlsPropertiesKHR *properties = (void *)ext;
2541
CORE_PROPERTY(1, 2, denormBehaviorIndependence);
2542
CORE_PROPERTY(1, 2, roundingModeIndependence);
2543
CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
2544
CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
2545
CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
2546
CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
2547
CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
2548
CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
2549
CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
2550
CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
2551
CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
2552
CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
2553
CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
2554
CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
2555
CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
2556
CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
2557
CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
2558
break;
2559
}
2560
2561
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2562
VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
2563
(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2564
2565
props->sampleLocationSampleCounts =
2566
isl_device_get_sample_counts(&pdevice->isl_dev);
2567
2568
/* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2569
props->maxSampleLocationGridSize.width = 1;
2570
props->maxSampleLocationGridSize.height = 1;
2571
2572
props->sampleLocationCoordinateRange[0] = 0;
2573
props->sampleLocationCoordinateRange[1] = 0.9375;
2574
props->sampleLocationSubPixelBits = 4;
2575
2576
props->variableSampleLocations = true;
2577
break;
2578
}
2579
2580
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2581
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *props =
2582
(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2583
2584
/* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
2585
* Base Address:
2586
*
2587
* "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
2588
* specifies the base address of the first element of the surface,
2589
* computed in software by adding the surface base address to the
2590
* byte offset of the element in the buffer. The base address must
2591
* be aligned to element size."
2592
*
2593
* The typed dataport messages require that things be texel aligned.
2594
* Otherwise, we may just load/store the wrong data or, in the worst
2595
* case, there may be hangs.
2596
*/
2597
props->storageTexelBufferOffsetAlignmentBytes = 16;
2598
props->storageTexelBufferOffsetSingleTexelAlignment = true;
2599
2600
/* The sampler, however, is much more forgiving and it can handle
2601
* arbitrary byte alignment for linear and buffer surfaces. It's
2602
* hard to find a good PRM citation for this but years of empirical
2603
* experience demonstrate that this is true.
2604
*/
2605
props->uniformTexelBufferOffsetAlignmentBytes = 1;
2606
props->uniformTexelBufferOffsetSingleTexelAlignment = false;
2607
break;
2608
}
2609
2610
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR: {
2611
VkPhysicalDeviceTimelineSemaphorePropertiesKHR *properties =
2612
(VkPhysicalDeviceTimelineSemaphorePropertiesKHR *) ext;
2613
CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
2614
break;
2615
}
2616
2617
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2618
VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2619
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2620
2621
props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2622
props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2623
props->maxTransformFeedbackBufferSize = (1ull << 32);
2624
props->maxTransformFeedbackStreamDataSize = 128 * 4;
2625
props->maxTransformFeedbackBufferDataSize = 128 * 4;
2626
props->maxTransformFeedbackBufferDataStride = 2048;
2627
props->transformFeedbackQueries = true;
2628
props->transformFeedbackStreamsLinesTriangles = false;
2629
props->transformFeedbackRasterizationStreamSelect = false;
2630
/* This requires MI_MATH */
2631
props->transformFeedbackDraw = pdevice->info.verx10 >= 75;
2632
break;
2633
}
2634
2635
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2636
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2637
(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2638
/* We have to restrict this a bit for multiview */
2639
props->maxVertexAttribDivisor = UINT32_MAX / 16;
2640
break;
2641
}
2642
2643
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2644
VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2645
props->maxMultiDrawCount = 2048;
2646
break;
2647
}
2648
2649
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
2650
anv_get_physical_device_properties_1_1(pdevice, (void *)ext);
2651
break;
2652
2653
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
2654
anv_get_physical_device_properties_1_2(pdevice, (void *)ext);
2655
break;
2656
2657
default:
2658
anv_debug_ignored_stype(ext->sType);
2659
break;
2660
}
2661
}
2662
2663
#undef CORE_RENAMED_PROPERTY
2664
#undef CORE_PROPERTY
2665
}
2666
2667
static const VkQueueFamilyProperties
2668
anv_queue_family_properties_template = {
2669
.timestampValidBits = 36, /* XXX: Real value here */
2670
.minImageTransferGranularity = { 1, 1, 1 },
2671
};
2672
2673
void anv_GetPhysicalDeviceQueueFamilyProperties(
2674
VkPhysicalDevice physicalDevice,
2675
uint32_t* pCount,
2676
VkQueueFamilyProperties* pQueueFamilyProperties)
2677
{
2678
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2679
VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
2680
2681
for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2682
struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2683
vk_outarray_append(&out, p) {
2684
*p = anv_queue_family_properties_template;
2685
p->queueFlags = queue_family->queueFlags;
2686
p->queueCount = queue_family->queueCount;
2687
}
2688
}
2689
}
2690
2691
void anv_GetPhysicalDeviceQueueFamilyProperties2(
2692
VkPhysicalDevice physicalDevice,
2693
uint32_t* pQueueFamilyPropertyCount,
2694
VkQueueFamilyProperties2* pQueueFamilyProperties)
2695
{
2696
ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2697
VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
2698
2699
for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2700
struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2701
vk_outarray_append(&out, p) {
2702
p->queueFamilyProperties = anv_queue_family_properties_template;
2703
p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2704
p->queueFamilyProperties.queueCount = queue_family->queueCount;
2705
2706
vk_foreach_struct(s, p->pNext) {
2707
anv_debug_ignored_stype(s->sType);
2708
}
2709
}
2710
}
2711
}
2712
2713
void anv_GetPhysicalDeviceMemoryProperties(
2714
VkPhysicalDevice physicalDevice,
2715
VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2716
{
2717
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2718
2719
pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2720
for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2721
pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2722
.propertyFlags = physical_device->memory.types[i].propertyFlags,
2723
.heapIndex = physical_device->memory.types[i].heapIndex,
2724
};
2725
}
2726
2727
pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2728
for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2729
pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2730
.size = physical_device->memory.heaps[i].size,
2731
.flags = physical_device->memory.heaps[i].flags,
2732
};
2733
}
2734
}
2735
2736
static void
2737
anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2738
VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2739
{
2740
ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2741
uint64_t sys_available;
2742
ASSERTED bool has_available_memory =
2743
os_get_available_system_memory(&sys_available);
2744
assert(has_available_memory);
2745
2746
VkDeviceSize total_heaps_size = 0;
2747
for (size_t i = 0; i < device->memory.heap_count; i++)
2748
total_heaps_size += device->memory.heaps[i].size;
2749
2750
for (size_t i = 0; i < device->memory.heap_count; i++) {
2751
VkDeviceSize heap_size = device->memory.heaps[i].size;
2752
VkDeviceSize heap_used = device->memory.heaps[i].used;
2753
VkDeviceSize heap_budget;
2754
2755
double heap_proportion = (double) heap_size / total_heaps_size;
2756
VkDeviceSize sys_available_prop = sys_available * heap_proportion;
2757
2758
/*
2759
* Let's not incite the app to starve the system: report at most 90% of
2760
* available system memory.
2761
*/
2762
uint64_t heap_available = sys_available_prop * 9 / 10;
2763
heap_budget = MIN2(heap_size, heap_used + heap_available);
2764
2765
/*
2766
* Round down to the nearest MB
2767
*/
2768
heap_budget &= ~((1ull << 20) - 1);
2769
2770
/*
2771
* The heapBudget value must be non-zero for array elements less than
2772
* VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2773
* value must be less than or equal to VkMemoryHeap::size for each heap.
2774
*/
2775
assert(0 < heap_budget && heap_budget <= heap_size);
2776
2777
memoryBudget->heapUsage[i] = heap_used;
2778
memoryBudget->heapBudget[i] = heap_budget;
2779
}
2780
2781
/* The heapBudget and heapUsage values must be zero for array elements
2782
* greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2783
*/
2784
for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2785
memoryBudget->heapBudget[i] = 0;
2786
memoryBudget->heapUsage[i] = 0;
2787
}
2788
}
2789
2790
void anv_GetPhysicalDeviceMemoryProperties2(
2791
VkPhysicalDevice physicalDevice,
2792
VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2793
{
2794
anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2795
&pMemoryProperties->memoryProperties);
2796
2797
vk_foreach_struct(ext, pMemoryProperties->pNext) {
2798
switch (ext->sType) {
2799
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2800
anv_get_memory_budget(physicalDevice, (void*)ext);
2801
break;
2802
default:
2803
anv_debug_ignored_stype(ext->sType);
2804
break;
2805
}
2806
}
2807
}
2808
2809
void
2810
anv_GetDeviceGroupPeerMemoryFeatures(
2811
VkDevice device,
2812
uint32_t heapIndex,
2813
uint32_t localDeviceIndex,
2814
uint32_t remoteDeviceIndex,
2815
VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
2816
{
2817
assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
2818
*pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2819
VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2820
VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2821
VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2822
}
2823
2824
PFN_vkVoidFunction anv_GetInstanceProcAddr(
2825
VkInstance _instance,
2826
const char* pName)
2827
{
2828
ANV_FROM_HANDLE(anv_instance, instance, _instance);
2829
return vk_instance_get_proc_addr(&instance->vk,
2830
&anv_instance_entrypoints,
2831
pName);
2832
}
2833
2834
/* With version 1+ of the loader interface the ICD should expose
2835
* vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2836
*/
2837
PUBLIC
2838
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2839
VkInstance instance,
2840
const char* pName);
2841
2842
PUBLIC
2843
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2844
VkInstance instance,
2845
const char* pName)
2846
{
2847
return anv_GetInstanceProcAddr(instance, pName);
2848
}
2849
2850
/* With version 4+ of the loader interface the ICD should expose
2851
* vk_icdGetPhysicalDeviceProcAddr()
2852
*/
2853
PUBLIC
2854
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
2855
VkInstance _instance,
2856
const char* pName);
2857
2858
PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(
2859
VkInstance _instance,
2860
const char* pName)
2861
{
2862
ANV_FROM_HANDLE(anv_instance, instance, _instance);
2863
return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2864
}
2865
2866
static struct anv_state
2867
anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2868
{
2869
struct anv_state state;
2870
2871
state = anv_state_pool_alloc(pool, size, align);
2872
memcpy(state.map, p, size);
2873
2874
return state;
2875
}
2876
2877
static void
2878
anv_device_init_border_colors(struct anv_device *device)
2879
{
2880
if (device->info.is_haswell) {
2881
static const struct hsw_border_color border_colors[] = {
2882
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2883
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2884
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2885
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2886
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2887
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2888
};
2889
2890
device->border_colors =
2891
anv_state_pool_emit_data(&device->dynamic_state_pool,
2892
sizeof(border_colors), 512, border_colors);
2893
} else {
2894
static const struct gfx8_border_color border_colors[] = {
2895
[VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
2896
[VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
2897
[VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
2898
[VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
2899
[VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
2900
[VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
2901
};
2902
2903
device->border_colors =
2904
anv_state_pool_emit_data(&device->dynamic_state_pool,
2905
sizeof(border_colors), 64, border_colors);
2906
}
2907
}
2908
2909
static VkResult
2910
anv_device_init_trivial_batch(struct anv_device *device)
2911
{
2912
VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
2913
ANV_BO_ALLOC_MAPPED,
2914
0 /* explicit_address */,
2915
&device->trivial_batch_bo);
2916
if (result != VK_SUCCESS)
2917
return result;
2918
2919
struct anv_batch batch = {
2920
.start = device->trivial_batch_bo->map,
2921
.next = device->trivial_batch_bo->map,
2922
.end = device->trivial_batch_bo->map + 4096,
2923
};
2924
2925
anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
2926
anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
2927
2928
if (!device->info.has_llc)
2929
intel_clflush_range(batch.start, batch.next - batch.start);
2930
2931
return VK_SUCCESS;
2932
}
2933
2934
static int
2935
vk_priority_to_gen(int priority)
2936
{
2937
switch (priority) {
2938
case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2939
return INTEL_CONTEXT_LOW_PRIORITY;
2940
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2941
return INTEL_CONTEXT_MEDIUM_PRIORITY;
2942
case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2943
return INTEL_CONTEXT_HIGH_PRIORITY;
2944
case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2945
return INTEL_CONTEXT_REALTIME_PRIORITY;
2946
default:
2947
unreachable("Invalid priority");
2948
}
2949
}
2950
2951
static bool
2952
get_bo_from_pool(struct intel_batch_decode_bo *ret,
2953
struct anv_block_pool *pool,
2954
uint64_t address)
2955
{
2956
anv_block_pool_foreach_bo(bo, pool) {
2957
uint64_t bo_address = intel_48b_address(bo->offset);
2958
if (address >= bo_address && address < (bo_address + bo->size)) {
2959
*ret = (struct intel_batch_decode_bo) {
2960
.addr = bo_address,
2961
.size = bo->size,
2962
.map = bo->map,
2963
};
2964
return true;
2965
}
2966
}
2967
return false;
2968
}
2969
2970
/* Finding a buffer for batch decoding */
2971
static struct intel_batch_decode_bo
2972
decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
2973
{
2974
struct anv_device *device = v_batch;
2975
struct intel_batch_decode_bo ret_bo = {};
2976
2977
assert(ppgtt);
2978
2979
if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
2980
return ret_bo;
2981
if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
2982
return ret_bo;
2983
if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
2984
return ret_bo;
2985
if (get_bo_from_pool(&ret_bo, &device->surface_state_pool.block_pool, address))
2986
return ret_bo;
2987
2988
if (!device->cmd_buffer_being_decoded)
2989
return (struct intel_batch_decode_bo) { };
2990
2991
struct anv_batch_bo **bo;
2992
2993
u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
2994
/* The decoder zeroes out the top 16 bits, so we need to as well */
2995
uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
2996
2997
if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
2998
return (struct intel_batch_decode_bo) {
2999
.addr = bo_address,
3000
.size = (*bo)->bo->size,
3001
.map = (*bo)->bo->map,
3002
};
3003
}
3004
}
3005
3006
return (struct intel_batch_decode_bo) { };
3007
}
3008
3009
struct intel_aux_map_buffer {
3010
struct intel_buffer base;
3011
struct anv_state state;
3012
};
3013
3014
static struct intel_buffer *
3015
intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
3016
{
3017
struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
3018
if (!buf)
3019
return NULL;
3020
3021
struct anv_device *device = (struct anv_device*)driver_ctx;
3022
assert(device->physical->supports_48bit_addresses &&
3023
device->physical->use_softpin);
3024
3025
struct anv_state_pool *pool = &device->dynamic_state_pool;
3026
buf->state = anv_state_pool_alloc(pool, size, size);
3027
3028
buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
3029
buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
3030
buf->base.map = buf->state.map;
3031
buf->base.driver_bo = &buf->state;
3032
return &buf->base;
3033
}
3034
3035
static void
3036
intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
3037
{
3038
struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
3039
struct anv_device *device = (struct anv_device*)driver_ctx;
3040
struct anv_state_pool *pool = &device->dynamic_state_pool;
3041
anv_state_pool_free(pool, buf->state);
3042
free(buf);
3043
}
3044
3045
static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
3046
.alloc = intel_aux_map_buffer_alloc,
3047
.free = intel_aux_map_buffer_free,
3048
};
3049
3050
static VkResult
3051
check_physical_device_features(VkPhysicalDevice physicalDevice,
3052
const VkPhysicalDeviceFeatures *features)
3053
{
3054
VkPhysicalDeviceFeatures supported_features;
3055
anv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
3056
VkBool32 *supported_feature = (VkBool32 *)&supported_features;
3057
VkBool32 *enabled_feature = (VkBool32 *)features;
3058
unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
3059
for (uint32_t i = 0; i < num_features; i++) {
3060
if (enabled_feature[i] && !supported_feature[i])
3061
return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
3062
}
3063
3064
return VK_SUCCESS;
3065
}
3066
3067
VkResult anv_CreateDevice(
3068
VkPhysicalDevice physicalDevice,
3069
const VkDeviceCreateInfo* pCreateInfo,
3070
const VkAllocationCallbacks* pAllocator,
3071
VkDevice* pDevice)
3072
{
3073
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3074
VkResult result;
3075
struct anv_device *device;
3076
3077
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
3078
3079
/* Check enabled features */
3080
bool robust_buffer_access = false;
3081
if (pCreateInfo->pEnabledFeatures) {
3082
result = check_physical_device_features(physicalDevice,
3083
pCreateInfo->pEnabledFeatures);
3084
if (result != VK_SUCCESS)
3085
return result;
3086
3087
if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3088
robust_buffer_access = true;
3089
}
3090
3091
vk_foreach_struct_const(ext, pCreateInfo->pNext) {
3092
switch (ext->sType) {
3093
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3094
const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3095
result = check_physical_device_features(physicalDevice,
3096
&features->features);
3097
if (result != VK_SUCCESS)
3098
return result;
3099
3100
if (features->features.robustBufferAccess)
3101
robust_buffer_access = true;
3102
break;
3103
}
3104
3105
default:
3106
/* Don't warn */
3107
break;
3108
}
3109
}
3110
3111
/* Check requested queues and fail if we are requested to create any
3112
* queues with flags we don't support.
3113
*/
3114
assert(pCreateInfo->queueCreateInfoCount > 0);
3115
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3116
if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
3117
return vk_error(VK_ERROR_INITIALIZATION_FAILED);
3118
}
3119
3120
/* Check if client specified queue priority. */
3121
const VkDeviceQueueGlobalPriorityCreateInfoEXT *queue_priority =
3122
vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
3123
DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3124
3125
VkQueueGlobalPriorityEXT priority =
3126
queue_priority ? queue_priority->globalPriority :
3127
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT;
3128
3129
device = vk_alloc2(&physical_device->instance->vk.alloc, pAllocator,
3130
sizeof(*device), 8,
3131
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3132
if (!device)
3133
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3134
3135
struct vk_device_dispatch_table dispatch_table;
3136
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3137
anv_genX(&physical_device->info, device_entrypoints), true);
3138
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3139
&anv_device_entrypoints, false);
3140
3141
result = vk_device_init(&device->vk, &physical_device->vk,
3142
&dispatch_table, pCreateInfo, pAllocator);
3143
if (result != VK_SUCCESS) {
3144
vk_error(result);
3145
goto fail_alloc;
3146
}
3147
3148
if (INTEL_DEBUG & DEBUG_BATCH) {
3149
const unsigned decode_flags =
3150
INTEL_BATCH_DECODE_FULL |
3151
((INTEL_DEBUG & DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
3152
INTEL_BATCH_DECODE_OFFSETS |
3153
INTEL_BATCH_DECODE_FLOATS;
3154
3155
intel_batch_decode_ctx_init(&device->decoder_ctx,
3156
&physical_device->info,
3157
stderr, decode_flags, NULL,
3158
decode_get_bo, NULL, device);
3159
}
3160
3161
device->physical = physical_device;
3162
device->no_hw = physical_device->no_hw;
3163
device->_lost = false;
3164
3165
/* XXX(chadv): Can we dup() physicalDevice->fd here? */
3166
device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
3167
if (device->fd == -1) {
3168
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3169
goto fail_device;
3170
}
3171
3172
uint32_t num_queues = 0;
3173
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
3174
num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
3175
3176
if (device->physical->engine_info) {
3177
/* The kernel API supports at most 64 engines */
3178
assert(num_queues <= 64);
3179
uint16_t engine_classes[64];
3180
int engine_count = 0;
3181
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3182
const VkDeviceQueueCreateInfo *queueCreateInfo =
3183
&pCreateInfo->pQueueCreateInfos[i];
3184
3185
assert(queueCreateInfo->queueFamilyIndex <
3186
physical_device->queue.family_count);
3187
struct anv_queue_family *queue_family =
3188
&physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
3189
3190
for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
3191
engine_classes[engine_count++] = queue_family->engine_class;
3192
}
3193
device->context_id =
3194
anv_gem_create_context_engines(device,
3195
physical_device->engine_info,
3196
engine_count, engine_classes);
3197
} else {
3198
assert(num_queues == 1);
3199
device->context_id = anv_gem_create_context(device);
3200
}
3201
if (device->context_id == -1) {
3202
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3203
goto fail_fd;
3204
}
3205
3206
device->has_thread_submit = physical_device->has_thread_submit;
3207
3208
device->queues =
3209
vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3210
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3211
if (device->queues == NULL) {
3212
result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3213
goto fail_context_id;
3214
}
3215
3216
device->queue_count = 0;
3217
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3218
const VkDeviceQueueCreateInfo *queueCreateInfo =
3219
&pCreateInfo->pQueueCreateInfos[i];
3220
3221
for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3222
/* When using legacy contexts, we use I915_EXEC_RENDER but, with
3223
* engine-based contexts, the bottom 6 bits of exec_flags are used
3224
* for the engine ID.
3225
*/
3226
uint32_t exec_flags = device->physical->engine_info ?
3227
device->queue_count : I915_EXEC_RENDER;
3228
3229
result = anv_queue_init(device, &device->queues[device->queue_count],
3230
exec_flags, queueCreateInfo);
3231
if (result != VK_SUCCESS)
3232
goto fail_queues;
3233
3234
device->queue_count++;
3235
}
3236
}
3237
3238
if (physical_device->use_softpin) {
3239
if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3240
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3241
goto fail_queues;
3242
}
3243
3244
/* keep the page with address zero out of the allocator */
3245
util_vma_heap_init(&device->vma_lo,
3246
LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3247
3248
util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3249
CLIENT_VISIBLE_HEAP_SIZE);
3250
3251
/* Leave the last 4GiB out of the high vma range, so that no state
3252
* base address + size can overflow 48 bits. For more information see
3253
* the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3254
*/
3255
util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3256
physical_device->gtt_size - (1ull << 32) -
3257
HIGH_HEAP_MIN_ADDRESS);
3258
}
3259
3260
list_inithead(&device->memory_objects);
3261
3262
/* As per spec, the driver implementation may deny requests to acquire
3263
* a priority above the default priority (MEDIUM) if the caller does not
3264
* have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
3265
* is returned.
3266
*/
3267
if (physical_device->has_context_priority) {
3268
int err = anv_gem_set_context_param(device->fd, device->context_id,
3269
I915_CONTEXT_PARAM_PRIORITY,
3270
vk_priority_to_gen(priority));
3271
if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT) {
3272
result = vk_error(VK_ERROR_NOT_PERMITTED_EXT);
3273
goto fail_vmas;
3274
}
3275
}
3276
3277
device->info = physical_device->info;
3278
device->isl_dev = physical_device->isl_dev;
3279
3280
/* On Broadwell and later, we can use batch chaining to more efficiently
3281
* implement growing command buffers. Prior to Haswell, the kernel
3282
* command parser gets in the way and we have to fall back to growing
3283
* the batch.
3284
*/
3285
device->can_chain_batches = device->info.ver >= 8;
3286
3287
device->robust_buffer_access = robust_buffer_access;
3288
3289
if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3290
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3291
goto fail_queues;
3292
}
3293
3294
pthread_condattr_t condattr;
3295
if (pthread_condattr_init(&condattr) != 0) {
3296
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3297
goto fail_mutex;
3298
}
3299
if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3300
pthread_condattr_destroy(&condattr);
3301
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3302
goto fail_mutex;
3303
}
3304
if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3305
pthread_condattr_destroy(&condattr);
3306
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
3307
goto fail_mutex;
3308
}
3309
pthread_condattr_destroy(&condattr);
3310
3311
result = anv_bo_cache_init(&device->bo_cache);
3312
if (result != VK_SUCCESS)
3313
goto fail_queue_cond;
3314
3315
anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
3316
3317
/* Because scratch is also relative to General State Base Address, we leave
3318
* the base address 0 and start the pool memory at an offset. This way we
3319
* get the correct offsets in the anv_states that get allocated from it.
3320
*/
3321
result = anv_state_pool_init(&device->general_state_pool, device,
3322
"general pool",
3323
0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
3324
if (result != VK_SUCCESS)
3325
goto fail_batch_bo_pool;
3326
3327
result = anv_state_pool_init(&device->dynamic_state_pool, device,
3328
"dynamic pool",
3329
DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
3330
if (result != VK_SUCCESS)
3331
goto fail_general_state_pool;
3332
3333
if (device->info.ver >= 8) {
3334
/* The border color pointer is limited to 24 bits, so we need to make
3335
* sure that any such color used at any point in the program doesn't
3336
* exceed that limit.
3337
* We achieve that by reserving all the custom border colors we support
3338
* right off the bat, so they are close to the base address.
3339
*/
3340
anv_state_reserved_pool_init(&device->custom_border_colors,
3341
&device->dynamic_state_pool,
3342
MAX_CUSTOM_BORDER_COLORS,
3343
sizeof(struct gfx8_border_color), 64);
3344
}
3345
3346
result = anv_state_pool_init(&device->instruction_state_pool, device,
3347
"instruction pool",
3348
INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
3349
if (result != VK_SUCCESS)
3350
goto fail_dynamic_state_pool;
3351
3352
result = anv_state_pool_init(&device->surface_state_pool, device,
3353
"surface state pool",
3354
SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3355
if (result != VK_SUCCESS)
3356
goto fail_instruction_state_pool;
3357
3358
if (physical_device->use_softpin) {
3359
int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3360
(int64_t)SURFACE_STATE_POOL_MIN_ADDRESS;
3361
assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3362
result = anv_state_pool_init(&device->binding_table_pool, device,
3363
"binding table pool",
3364
SURFACE_STATE_POOL_MIN_ADDRESS,
3365
bt_pool_offset, 4096);
3366
if (result != VK_SUCCESS)
3367
goto fail_surface_state_pool;
3368
}
3369
3370
if (device->info.has_aux_map) {
3371
device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3372
&physical_device->info);
3373
if (!device->aux_map_ctx)
3374
goto fail_binding_table_pool;
3375
}
3376
3377
result = anv_device_alloc_bo(device, "workaround", 4096,
3378
ANV_BO_ALLOC_CAPTURE | ANV_BO_ALLOC_MAPPED |
3379
ANV_BO_ALLOC_LOCAL_MEM /* flags */,
3380
0 /* explicit_address */,
3381
&device->workaround_bo);
3382
if (result != VK_SUCCESS)
3383
goto fail_surface_aux_map_pool;
3384
3385
device->workaround_address = (struct anv_address) {
3386
.bo = device->workaround_bo,
3387
.offset = align_u32(
3388
intel_debug_write_identifiers(device->workaround_bo->map,
3389
device->workaround_bo->size,
3390
"Anv") + 8, 8),
3391
};
3392
3393
device->debug_frame_desc =
3394
intel_debug_get_identifier_block(device->workaround_bo->map,
3395
device->workaround_bo->size,
3396
INTEL_DEBUG_BLOCK_TYPE_FRAME);
3397
3398
result = anv_device_init_trivial_batch(device);
3399
if (result != VK_SUCCESS)
3400
goto fail_workaround_bo;
3401
3402
/* Allocate a null surface state at surface state offset 0. This makes
3403
* NULL descriptor handling trivial because we can just memset structures
3404
* to zero and they have a valid descriptor.
3405
*/
3406
device->null_surface_state =
3407
anv_state_pool_alloc(&device->surface_state_pool,
3408
device->isl_dev.ss.size,
3409
device->isl_dev.ss.align);
3410
isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3411
.size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3412
assert(device->null_surface_state.offset == 0);
3413
3414
anv_scratch_pool_init(device, &device->scratch_pool);
3415
3416
/* TODO(RT): Do we want some sort of data structure for this? */
3417
memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3418
3419
result = anv_genX(&device->info, init_device_state)(device);
3420
if (result != VK_SUCCESS)
3421
goto fail_trivial_batch_bo_and_scratch_pool;
3422
3423
anv_pipeline_cache_init(&device->default_pipeline_cache, device,
3424
true /* cache_enabled */, false /* external_sync */);
3425
3426
result = anv_device_init_rt_shaders(device);
3427
if (result != VK_SUCCESS)
3428
goto fail_rt_trampoline;
3429
3430
anv_device_init_blorp(device);
3431
3432
anv_device_init_border_colors(device);
3433
3434
anv_device_perf_init(device);
3435
3436
*pDevice = anv_device_to_handle(device);
3437
3438
return VK_SUCCESS;
3439
3440
fail_rt_trampoline:
3441
anv_pipeline_cache_finish(&device->default_pipeline_cache);
3442
fail_trivial_batch_bo_and_scratch_pool:
3443
anv_scratch_pool_finish(device, &device->scratch_pool);
3444
anv_device_release_bo(device, device->trivial_batch_bo);
3445
fail_workaround_bo:
3446
anv_device_release_bo(device, device->workaround_bo);
3447
fail_surface_aux_map_pool:
3448
if (device->info.has_aux_map) {
3449
intel_aux_map_finish(device->aux_map_ctx);
3450
device->aux_map_ctx = NULL;
3451
}
3452
fail_binding_table_pool:
3453
if (physical_device->use_softpin)
3454
anv_state_pool_finish(&device->binding_table_pool);
3455
fail_surface_state_pool:
3456
anv_state_pool_finish(&device->surface_state_pool);
3457
fail_instruction_state_pool:
3458
anv_state_pool_finish(&device->instruction_state_pool);
3459
fail_dynamic_state_pool:
3460
if (device->info.ver >= 8)
3461
anv_state_reserved_pool_finish(&device->custom_border_colors);
3462
anv_state_pool_finish(&device->dynamic_state_pool);
3463
fail_general_state_pool:
3464
anv_state_pool_finish(&device->general_state_pool);
3465
fail_batch_bo_pool:
3466
anv_bo_pool_finish(&device->batch_bo_pool);
3467
anv_bo_cache_finish(&device->bo_cache);
3468
fail_queue_cond:
3469
pthread_cond_destroy(&device->queue_submit);
3470
fail_mutex:
3471
pthread_mutex_destroy(&device->mutex);
3472
fail_vmas:
3473
if (physical_device->use_softpin) {
3474
util_vma_heap_finish(&device->vma_hi);
3475
util_vma_heap_finish(&device->vma_cva);
3476
util_vma_heap_finish(&device->vma_lo);
3477
}
3478
fail_queues:
3479
for (uint32_t i = 0; i < device->queue_count; i++)
3480
anv_queue_finish(&device->queues[i]);
3481
vk_free(&device->vk.alloc, device->queues);
3482
fail_context_id:
3483
anv_gem_destroy_context(device, device->context_id);
3484
fail_fd:
3485
close(device->fd);
3486
fail_device:
3487
vk_device_finish(&device->vk);
3488
fail_alloc:
3489
vk_free(&device->vk.alloc, device);
3490
3491
return result;
3492
}
3493
3494
void anv_DestroyDevice(
3495
VkDevice _device,
3496
const VkAllocationCallbacks* pAllocator)
3497
{
3498
ANV_FROM_HANDLE(anv_device, device, _device);
3499
3500
if (!device)
3501
return;
3502
3503
anv_device_finish_blorp(device);
3504
3505
anv_device_finish_rt_shaders(device);
3506
3507
anv_pipeline_cache_finish(&device->default_pipeline_cache);
3508
3509
#ifdef HAVE_VALGRIND
3510
/* We only need to free these to prevent valgrind errors. The backing
3511
* BO will go away in a couple of lines so we don't actually leak.
3512
*/
3513
if (device->info.ver >= 8)
3514
anv_state_reserved_pool_finish(&device->custom_border_colors);
3515
anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3516
anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3517
#endif
3518
3519
for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3520
if (device->rt_scratch_bos[i] != NULL)
3521
anv_device_release_bo(device, device->rt_scratch_bos[i]);
3522
}
3523
3524
anv_scratch_pool_finish(device, &device->scratch_pool);
3525
3526
anv_device_release_bo(device, device->workaround_bo);
3527
anv_device_release_bo(device, device->trivial_batch_bo);
3528
3529
if (device->info.has_aux_map) {
3530
intel_aux_map_finish(device->aux_map_ctx);
3531
device->aux_map_ctx = NULL;
3532
}
3533
3534
if (device->physical->use_softpin)
3535
anv_state_pool_finish(&device->binding_table_pool);
3536
anv_state_pool_finish(&device->surface_state_pool);
3537
anv_state_pool_finish(&device->instruction_state_pool);
3538
anv_state_pool_finish(&device->dynamic_state_pool);
3539
anv_state_pool_finish(&device->general_state_pool);
3540
3541
anv_bo_pool_finish(&device->batch_bo_pool);
3542
3543
anv_bo_cache_finish(&device->bo_cache);
3544
3545
if (device->physical->use_softpin) {
3546
util_vma_heap_finish(&device->vma_hi);
3547
util_vma_heap_finish(&device->vma_cva);
3548
util_vma_heap_finish(&device->vma_lo);
3549
}
3550
3551
pthread_cond_destroy(&device->queue_submit);
3552
pthread_mutex_destroy(&device->mutex);
3553
3554
for (uint32_t i = 0; i < device->queue_count; i++)
3555
anv_queue_finish(&device->queues[i]);
3556
vk_free(&device->vk.alloc, device->queues);
3557
3558
anv_gem_destroy_context(device, device->context_id);
3559
3560
if (INTEL_DEBUG & DEBUG_BATCH)
3561
intel_batch_decode_ctx_finish(&device->decoder_ctx);
3562
3563
close(device->fd);
3564
3565
vk_device_finish(&device->vk);
3566
vk_free(&device->vk.alloc, device);
3567
}
3568
3569
VkResult anv_EnumerateInstanceLayerProperties(
3570
uint32_t* pPropertyCount,
3571
VkLayerProperties* pProperties)
3572
{
3573
if (pProperties == NULL) {
3574
*pPropertyCount = 0;
3575
return VK_SUCCESS;
3576
}
3577
3578
/* None supported at this time */
3579
return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
3580
}
3581
3582
void anv_GetDeviceQueue2(
3583
VkDevice _device,
3584
const VkDeviceQueueInfo2* pQueueInfo,
3585
VkQueue* pQueue)
3586
{
3587
ANV_FROM_HANDLE(anv_device, device, _device);
3588
struct anv_physical_device *pdevice = device->physical;
3589
3590
assert(pQueueInfo->queueFamilyIndex < pdevice->queue.family_count);
3591
struct anv_queue_family *queue_family =
3592
&pdevice->queue.families[pQueueInfo->queueFamilyIndex];
3593
3594
int idx_in_family = 0;
3595
struct anv_queue *queue = NULL;
3596
for (uint32_t i = 0; i < device->queue_count; i++) {
3597
if (device->queues[i].family != queue_family)
3598
continue;
3599
3600
if (idx_in_family == pQueueInfo->queueIndex) {
3601
queue = &device->queues[i];
3602
break;
3603
}
3604
3605
idx_in_family++;
3606
}
3607
assert(queue != NULL);
3608
3609
if (queue && queue->flags == pQueueInfo->flags)
3610
*pQueue = anv_queue_to_handle(queue);
3611
else
3612
*pQueue = NULL;
3613
}
3614
3615
void
3616
_anv_device_report_lost(struct anv_device *device)
3617
{
3618
assert(p_atomic_read(&device->_lost) > 0);
3619
3620
device->lost_reported = true;
3621
3622
for (uint32_t i = 0; i < device->queue_count; i++) {
3623
struct anv_queue *queue = &device->queues[i];
3624
if (queue->lost) {
3625
__vk_errorf(device->physical->instance, &device->vk.base,
3626
VK_ERROR_DEVICE_LOST,
3627
queue->error_file, queue->error_line,
3628
"%s", queue->error_msg);
3629
}
3630
}
3631
}
3632
3633
VkResult
3634
_anv_device_set_lost(struct anv_device *device,
3635
const char *file, int line,
3636
const char *msg, ...)
3637
{
3638
VkResult err;
3639
va_list ap;
3640
3641
if (p_atomic_read(&device->_lost) > 0)
3642
return VK_ERROR_DEVICE_LOST;
3643
3644
p_atomic_inc(&device->_lost);
3645
device->lost_reported = true;
3646
3647
va_start(ap, msg);
3648
err = __vk_errorv(device->physical->instance, &device->vk.base,
3649
VK_ERROR_DEVICE_LOST, file, line, msg, ap);
3650
va_end(ap);
3651
3652
if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
3653
abort();
3654
3655
return err;
3656
}
3657
3658
VkResult
3659
_anv_queue_set_lost(struct anv_queue *queue,
3660
const char *file, int line,
3661
const char *msg, ...)
3662
{
3663
va_list ap;
3664
3665
if (queue->lost)
3666
return VK_ERROR_DEVICE_LOST;
3667
3668
queue->lost = true;
3669
3670
queue->error_file = file;
3671
queue->error_line = line;
3672
va_start(ap, msg);
3673
vsnprintf(queue->error_msg, sizeof(queue->error_msg),
3674
msg, ap);
3675
va_end(ap);
3676
3677
p_atomic_inc(&queue->device->_lost);
3678
3679
if (env_var_as_boolean("ANV_ABORT_ON_DEVICE_LOSS", false))
3680
abort();
3681
3682
return VK_ERROR_DEVICE_LOST;
3683
}
3684
3685
VkResult
3686
anv_device_query_status(struct anv_device *device)
3687
{
3688
/* This isn't likely as most of the callers of this function already check
3689
* for it. However, it doesn't hurt to check and it potentially lets us
3690
* avoid an ioctl.
3691
*/
3692
if (anv_device_is_lost(device))
3693
return VK_ERROR_DEVICE_LOST;
3694
3695
uint32_t active, pending;
3696
int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
3697
&active, &pending);
3698
if (ret == -1) {
3699
/* We don't know the real error. */
3700
return anv_device_set_lost(device, "get_reset_stats failed: %m");
3701
}
3702
3703
if (active) {
3704
return anv_device_set_lost(device, "GPU hung on one of our command buffers");
3705
} else if (pending) {
3706
return anv_device_set_lost(device, "GPU hung with commands in-flight");
3707
}
3708
3709
return VK_SUCCESS;
3710
}
3711
3712
VkResult
3713
anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo)
3714
{
3715
/* Note: This only returns whether or not the BO is in use by an i915 GPU.
3716
* Other usages of the BO (such as on different hardware) will not be
3717
* flagged as "busy" by this ioctl. Use with care.
3718
*/
3719
int ret = anv_gem_busy(device, bo->gem_handle);
3720
if (ret == 1) {
3721
return VK_NOT_READY;
3722
} else if (ret == -1) {
3723
/* We don't know the real error. */
3724
return anv_device_set_lost(device, "gem wait failed: %m");
3725
}
3726
3727
/* Query for device status after the busy call. If the BO we're checking
3728
* got caught in a GPU hang we don't want to return VK_SUCCESS to the
3729
* client because it clearly doesn't have valid data. Yes, this most
3730
* likely means an ioctl, but we just did an ioctl to query the busy status
3731
* so it's no great loss.
3732
*/
3733
return anv_device_query_status(device);
3734
}
3735
3736
VkResult
3737
anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3738
int64_t timeout)
3739
{
3740
int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3741
if (ret == -1 && errno == ETIME) {
3742
return VK_TIMEOUT;
3743
} else if (ret == -1) {
3744
/* We don't know the real error. */
3745
return anv_device_set_lost(device, "gem wait failed: %m");
3746
}
3747
3748
/* Query for device status after the wait. If the BO we're waiting on got
3749
* caught in a GPU hang we don't want to return VK_SUCCESS to the client
3750
* because it clearly doesn't have valid data. Yes, this most likely means
3751
* an ioctl, but we just did an ioctl to wait so it's no great loss.
3752
*/
3753
return anv_device_query_status(device);
3754
}
3755
3756
VkResult anv_DeviceWaitIdle(
3757
VkDevice _device)
3758
{
3759
ANV_FROM_HANDLE(anv_device, device, _device);
3760
3761
if (anv_device_is_lost(device))
3762
return VK_ERROR_DEVICE_LOST;
3763
3764
for (uint32_t i = 0; i < device->queue_count; i++) {
3765
VkResult res = anv_queue_submit_simple_batch(&device->queues[i], NULL);
3766
if (res != VK_SUCCESS)
3767
return res;
3768
}
3769
3770
return VK_SUCCESS;
3771
}
3772
3773
uint64_t
3774
anv_vma_alloc(struct anv_device *device,
3775
uint64_t size, uint64_t align,
3776
enum anv_bo_alloc_flags alloc_flags,
3777
uint64_t client_address)
3778
{
3779
pthread_mutex_lock(&device->vma_mutex);
3780
3781
uint64_t addr = 0;
3782
3783
if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3784
if (client_address) {
3785
if (util_vma_heap_alloc_addr(&device->vma_cva,
3786
client_address, size)) {
3787
addr = client_address;
3788
}
3789
} else {
3790
addr = util_vma_heap_alloc(&device->vma_cva, size, align);
3791
}
3792
/* We don't want to fall back to other heaps */
3793
goto done;
3794
}
3795
3796
assert(client_address == 0);
3797
3798
if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3799
addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3800
3801
if (addr == 0)
3802
addr = util_vma_heap_alloc(&device->vma_lo, size, align);
3803
3804
done:
3805
pthread_mutex_unlock(&device->vma_mutex);
3806
3807
assert(addr == intel_48b_address(addr));
3808
return intel_canonical_address(addr);
3809
}
3810
3811
void
3812
anv_vma_free(struct anv_device *device,
3813
uint64_t address, uint64_t size)
3814
{
3815
const uint64_t addr_48b = intel_48b_address(address);
3816
3817
pthread_mutex_lock(&device->vma_mutex);
3818
3819
if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3820
addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3821
util_vma_heap_free(&device->vma_lo, addr_48b, size);
3822
} else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3823
addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3824
util_vma_heap_free(&device->vma_cva, addr_48b, size);
3825
} else {
3826
assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3827
util_vma_heap_free(&device->vma_hi, addr_48b, size);
3828
}
3829
3830
pthread_mutex_unlock(&device->vma_mutex);
3831
}
3832
3833
VkResult anv_AllocateMemory(
3834
VkDevice _device,
3835
const VkMemoryAllocateInfo* pAllocateInfo,
3836
const VkAllocationCallbacks* pAllocator,
3837
VkDeviceMemory* pMem)
3838
{
3839
ANV_FROM_HANDLE(anv_device, device, _device);
3840
struct anv_physical_device *pdevice = device->physical;
3841
struct anv_device_memory *mem;
3842
VkResult result = VK_SUCCESS;
3843
3844
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3845
3846
/* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3847
assert(pAllocateInfo->allocationSize > 0);
3848
3849
VkDeviceSize aligned_alloc_size =
3850
align_u64(pAllocateInfo->allocationSize, 4096);
3851
3852
if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3853
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3854
3855
assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3856
struct anv_memory_type *mem_type =
3857
&pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3858
assert(mem_type->heapIndex < pdevice->memory.heap_count);
3859
struct anv_memory_heap *mem_heap =
3860
&pdevice->memory.heaps[mem_type->heapIndex];
3861
3862
uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3863
if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3864
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
3865
3866
mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3867
VK_OBJECT_TYPE_DEVICE_MEMORY);
3868
if (mem == NULL)
3869
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3870
3871
mem->type = mem_type;
3872
mem->map = NULL;
3873
mem->map_size = 0;
3874
mem->ahw = NULL;
3875
mem->host_ptr = NULL;
3876
3877
enum anv_bo_alloc_flags alloc_flags = 0;
3878
3879
const VkExportMemoryAllocateInfo *export_info = NULL;
3880
const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3881
const VkImportMemoryFdInfoKHR *fd_info = NULL;
3882
const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3883
const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3884
VkMemoryAllocateFlags vk_flags = 0;
3885
uint64_t client_address = 0;
3886
3887
vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3888
switch (ext->sType) {
3889
case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
3890
export_info = (void *)ext;
3891
break;
3892
3893
case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
3894
ahw_import_info = (void *)ext;
3895
break;
3896
3897
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
3898
fd_info = (void *)ext;
3899
break;
3900
3901
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
3902
host_ptr_info = (void *)ext;
3903
break;
3904
3905
case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
3906
const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
3907
vk_flags = flags_info->flags;
3908
break;
3909
}
3910
3911
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
3912
dedicated_info = (void *)ext;
3913
break;
3914
3915
case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO_KHR: {
3916
const VkMemoryOpaqueCaptureAddressAllocateInfoKHR *addr_info =
3917
(const VkMemoryOpaqueCaptureAddressAllocateInfoKHR *)ext;
3918
client_address = addr_info->opaqueCaptureAddress;
3919
break;
3920
}
3921
3922
default:
3923
anv_debug_ignored_stype(ext->sType);
3924
break;
3925
}
3926
}
3927
3928
/* By default, we want all VkDeviceMemory objects to support CCS */
3929
if (device->physical->has_implicit_ccs)
3930
alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
3931
3932
if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR)
3933
alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
3934
3935
if ((export_info && export_info->handleTypes) ||
3936
(fd_info && fd_info->handleType) ||
3937
(host_ptr_info && host_ptr_info->handleType)) {
3938
/* Anything imported or exported is EXTERNAL */
3939
alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
3940
3941
/* We can't have implicit CCS on external memory with an AUX-table.
3942
* Doing so would require us to sync the aux tables across processes
3943
* which is impractical.
3944
*/
3945
if (device->info.has_aux_map)
3946
alloc_flags &= ~ANV_BO_ALLOC_IMPLICIT_CCS;
3947
}
3948
3949
/* Check if we need to support Android HW buffer export. If so,
3950
* create AHardwareBuffer and import memory from it.
3951
*/
3952
bool android_export = false;
3953
if (export_info && export_info->handleTypes &
3954
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
3955
android_export = true;
3956
3957
if (ahw_import_info) {
3958
result = anv_import_ahw_memory(_device, mem, ahw_import_info);
3959
if (result != VK_SUCCESS)
3960
goto fail;
3961
3962
goto success;
3963
} else if (android_export) {
3964
result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
3965
if (result != VK_SUCCESS)
3966
goto fail;
3967
3968
goto success;
3969
}
3970
3971
/* The Vulkan spec permits handleType to be 0, in which case the struct is
3972
* ignored.
3973
*/
3974
if (fd_info && fd_info->handleType) {
3975
/* At the moment, we support only the below handle types. */
3976
assert(fd_info->handleType ==
3977
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3978
fd_info->handleType ==
3979
VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3980
3981
result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
3982
client_address, &mem->bo);
3983
if (result != VK_SUCCESS)
3984
goto fail;
3985
3986
/* For security purposes, we reject importing the bo if it's smaller
3987
* than the requested allocation size. This prevents a malicious client
3988
* from passing a buffer to a trusted client, lying about the size, and
3989
* telling the trusted client to try and texture from an image that goes
3990
* out-of-bounds. This sort of thing could lead to GPU hangs or worse
3991
* in the trusted client. The trusted client can protect itself against
3992
* this sort of attack but only if it can trust the buffer size.
3993
*/
3994
if (mem->bo->size < aligned_alloc_size) {
3995
result = vk_errorf(device, &device->vk.base,
3996
VK_ERROR_INVALID_EXTERNAL_HANDLE,
3997
"aligned allocationSize too large for "
3998
"VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
3999
"%"PRIu64"B > %"PRIu64"B",
4000
aligned_alloc_size, mem->bo->size);
4001
anv_device_release_bo(device, mem->bo);
4002
goto fail;
4003
}
4004
4005
/* From the Vulkan spec:
4006
*
4007
* "Importing memory from a file descriptor transfers ownership of
4008
* the file descriptor from the application to the Vulkan
4009
* implementation. The application must not perform any operations on
4010
* the file descriptor after a successful import."
4011
*
4012
* If the import fails, we leave the file descriptor open.
4013
*/
4014
close(fd_info->fd);
4015
goto success;
4016
}
4017
4018
if (host_ptr_info && host_ptr_info->handleType) {
4019
if (host_ptr_info->handleType ==
4020
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
4021
result = vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
4022
goto fail;
4023
}
4024
4025
assert(host_ptr_info->handleType ==
4026
VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
4027
4028
result = anv_device_import_bo_from_host_ptr(device,
4029
host_ptr_info->pHostPointer,
4030
pAllocateInfo->allocationSize,
4031
alloc_flags,
4032
client_address,
4033
&mem->bo);
4034
if (result != VK_SUCCESS)
4035
goto fail;
4036
4037
mem->host_ptr = host_ptr_info->pHostPointer;
4038
goto success;
4039
}
4040
4041
/* Set ALLOC_LOCAL_MEM flag if heap has device local bit set and requested
4042
* memory property flag has DEVICE_LOCAL_BIT set.
4043
*/
4044
if (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)
4045
alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM;
4046
4047
/* Regular allocate (not importing memory). */
4048
4049
result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
4050
alloc_flags, client_address, &mem->bo);
4051
if (result != VK_SUCCESS)
4052
goto fail;
4053
4054
if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
4055
ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
4056
4057
/* Some legacy (non-modifiers) consumers need the tiling to be set on
4058
* the BO. In this case, we have a dedicated allocation.
4059
*/
4060
if (image->needs_set_tiling) {
4061
const uint32_t i915_tiling =
4062
isl_tiling_to_i915_tiling(image->planes[0].primary_surface.isl.tiling);
4063
int ret = anv_gem_set_tiling(device, mem->bo->gem_handle,
4064
image->planes[0].primary_surface.isl.row_pitch_B,
4065
i915_tiling);
4066
if (ret) {
4067
anv_device_release_bo(device, mem->bo);
4068
result = vk_errorf(device, &device->vk.base,
4069
VK_ERROR_OUT_OF_DEVICE_MEMORY,
4070
"failed to set BO tiling: %m");
4071
goto fail;
4072
}
4073
}
4074
}
4075
4076
success:
4077
mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
4078
if (mem_heap_used > mem_heap->size) {
4079
p_atomic_add(&mem_heap->used, -mem->bo->size);
4080
anv_device_release_bo(device, mem->bo);
4081
result = vk_errorf(device, &device->vk.base,
4082
VK_ERROR_OUT_OF_DEVICE_MEMORY,
4083
"Out of heap memory");
4084
goto fail;
4085
}
4086
4087
pthread_mutex_lock(&device->mutex);
4088
list_addtail(&mem->link, &device->memory_objects);
4089
pthread_mutex_unlock(&device->mutex);
4090
4091
*pMem = anv_device_memory_to_handle(mem);
4092
4093
return VK_SUCCESS;
4094
4095
fail:
4096
vk_object_free(&device->vk, pAllocator, mem);
4097
4098
return result;
4099
}
4100
4101
VkResult anv_GetMemoryFdKHR(
4102
VkDevice device_h,
4103
const VkMemoryGetFdInfoKHR* pGetFdInfo,
4104
int* pFd)
4105
{
4106
ANV_FROM_HANDLE(anv_device, dev, device_h);
4107
ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
4108
4109
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4110
4111
assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4112
pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4113
4114
return anv_device_export_bo(dev, mem->bo, pFd);
4115
}
4116
4117
VkResult anv_GetMemoryFdPropertiesKHR(
4118
VkDevice _device,
4119
VkExternalMemoryHandleTypeFlagBits handleType,
4120
int fd,
4121
VkMemoryFdPropertiesKHR* pMemoryFdProperties)
4122
{
4123
ANV_FROM_HANDLE(anv_device, device, _device);
4124
4125
switch (handleType) {
4126
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4127
/* dma-buf can be imported as any memory type */
4128
pMemoryFdProperties->memoryTypeBits =
4129
(1 << device->physical->memory.type_count) - 1;
4130
return VK_SUCCESS;
4131
4132
default:
4133
/* The valid usage section for this function says:
4134
*
4135
* "handleType must not be one of the handle types defined as
4136
* opaque."
4137
*
4138
* So opaque handle types fall into the default "unsupported" case.
4139
*/
4140
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
4141
}
4142
}
4143
4144
VkResult anv_GetMemoryHostPointerPropertiesEXT(
4145
VkDevice _device,
4146
VkExternalMemoryHandleTypeFlagBits handleType,
4147
const void* pHostPointer,
4148
VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
4149
{
4150
ANV_FROM_HANDLE(anv_device, device, _device);
4151
4152
assert(pMemoryHostPointerProperties->sType ==
4153
VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
4154
4155
switch (handleType) {
4156
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
4157
/* Host memory can be imported as any memory type. */
4158
pMemoryHostPointerProperties->memoryTypeBits =
4159
(1ull << device->physical->memory.type_count) - 1;
4160
4161
return VK_SUCCESS;
4162
4163
default:
4164
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
4165
}
4166
}
4167
4168
void anv_FreeMemory(
4169
VkDevice _device,
4170
VkDeviceMemory _mem,
4171
const VkAllocationCallbacks* pAllocator)
4172
{
4173
ANV_FROM_HANDLE(anv_device, device, _device);
4174
ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
4175
4176
if (mem == NULL)
4177
return;
4178
4179
pthread_mutex_lock(&device->mutex);
4180
list_del(&mem->link);
4181
pthread_mutex_unlock(&device->mutex);
4182
4183
if (mem->map)
4184
anv_UnmapMemory(_device, _mem);
4185
4186
p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
4187
-mem->bo->size);
4188
4189
anv_device_release_bo(device, mem->bo);
4190
4191
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
4192
if (mem->ahw)
4193
AHardwareBuffer_release(mem->ahw);
4194
#endif
4195
4196
vk_object_free(&device->vk, pAllocator, mem);
4197
}
4198
4199
VkResult anv_MapMemory(
4200
VkDevice _device,
4201
VkDeviceMemory _memory,
4202
VkDeviceSize offset,
4203
VkDeviceSize size,
4204
VkMemoryMapFlags flags,
4205
void** ppData)
4206
{
4207
ANV_FROM_HANDLE(anv_device, device, _device);
4208
ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4209
4210
if (mem == NULL) {
4211
*ppData = NULL;
4212
return VK_SUCCESS;
4213
}
4214
4215
if (mem->host_ptr) {
4216
*ppData = mem->host_ptr + offset;
4217
return VK_SUCCESS;
4218
}
4219
4220
if (size == VK_WHOLE_SIZE)
4221
size = mem->bo->size - offset;
4222
4223
/* From the Vulkan spec version 1.0.32 docs for MapMemory:
4224
*
4225
* * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
4226
* assert(size != 0);
4227
* * If size is not equal to VK_WHOLE_SIZE, size must be less than or
4228
* equal to the size of the memory minus offset
4229
*/
4230
assert(size > 0);
4231
assert(offset + size <= mem->bo->size);
4232
4233
/* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
4234
* takes a VkDeviceMemory pointer, it seems like only one map of the memory
4235
* at a time is valid. We could just mmap up front and return an offset
4236
* pointer here, but that may exhaust virtual memory on 32 bit
4237
* userspace. */
4238
4239
uint32_t gem_flags = 0;
4240
4241
if (!device->info.has_llc &&
4242
(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
4243
gem_flags |= I915_MMAP_WC;
4244
4245
/* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
4246
uint64_t map_offset;
4247
if (!device->physical->has_mmap_offset)
4248
map_offset = offset & ~4095ull;
4249
else
4250
map_offset = 0;
4251
assert(offset >= map_offset);
4252
uint64_t map_size = (offset + size) - map_offset;
4253
4254
/* Let's map whole pages */
4255
map_size = align_u64(map_size, 4096);
4256
4257
void *map = anv_gem_mmap(device, mem->bo->gem_handle,
4258
map_offset, map_size, gem_flags);
4259
if (map == MAP_FAILED)
4260
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
4261
4262
mem->map = map;
4263
mem->map_size = map_size;
4264
4265
*ppData = mem->map + (offset - map_offset);
4266
4267
return VK_SUCCESS;
4268
}
4269
4270
void anv_UnmapMemory(
4271
VkDevice _device,
4272
VkDeviceMemory _memory)
4273
{
4274
ANV_FROM_HANDLE(anv_device, device, _device);
4275
ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4276
4277
if (mem == NULL || mem->host_ptr)
4278
return;
4279
4280
anv_gem_munmap(device, mem->map, mem->map_size);
4281
4282
mem->map = NULL;
4283
mem->map_size = 0;
4284
}
4285
4286
static void
4287
clflush_mapped_ranges(struct anv_device *device,
4288
uint32_t count,
4289
const VkMappedMemoryRange *ranges)
4290
{
4291
for (uint32_t i = 0; i < count; i++) {
4292
ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
4293
if (ranges[i].offset >= mem->map_size)
4294
continue;
4295
4296
if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4297
continue;
4298
4299
intel_clflush_range(mem->map + ranges[i].offset,
4300
MIN2(ranges[i].size, mem->map_size - ranges[i].offset));
4301
}
4302
}
4303
4304
VkResult anv_FlushMappedMemoryRanges(
4305
VkDevice _device,
4306
uint32_t memoryRangeCount,
4307
const VkMappedMemoryRange* pMemoryRanges)
4308
{
4309
ANV_FROM_HANDLE(anv_device, device, _device);
4310
4311
if (!device->physical->memory.need_clflush)
4312
return VK_SUCCESS;
4313
4314
/* Make sure the writes we're flushing have landed. */
4315
__builtin_ia32_mfence();
4316
4317
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
4318
4319
return VK_SUCCESS;
4320
}
4321
4322
VkResult anv_InvalidateMappedMemoryRanges(
4323
VkDevice _device,
4324
uint32_t memoryRangeCount,
4325
const VkMappedMemoryRange* pMemoryRanges)
4326
{
4327
ANV_FROM_HANDLE(anv_device, device, _device);
4328
4329
if (!device->physical->memory.need_clflush)
4330
return VK_SUCCESS;
4331
4332
clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
4333
4334
/* Make sure no reads get moved up above the invalidate. */
4335
__builtin_ia32_mfence();
4336
4337
return VK_SUCCESS;
4338
}
4339
4340
void anv_GetBufferMemoryRequirements2(
4341
VkDevice _device,
4342
const VkBufferMemoryRequirementsInfo2* pInfo,
4343
VkMemoryRequirements2* pMemoryRequirements)
4344
{
4345
ANV_FROM_HANDLE(anv_device, device, _device);
4346
ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4347
4348
/* The Vulkan spec (git aaed022) says:
4349
*
4350
* memoryTypeBits is a bitfield and contains one bit set for every
4351
* supported memory type for the resource. The bit `1<<i` is set if and
4352
* only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4353
* structure for the physical device is supported.
4354
*/
4355
uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4356
4357
/* Base alignment requirement of a cache line */
4358
uint32_t alignment = 16;
4359
4360
if (buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4361
alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
4362
4363
pMemoryRequirements->memoryRequirements.size = buffer->size;
4364
pMemoryRequirements->memoryRequirements.alignment = alignment;
4365
4366
/* Storage and Uniform buffers should have their size aligned to
4367
* 32-bits to avoid boundary checks when last DWord is not complete.
4368
* This would ensure that not internal padding would be needed for
4369
* 16-bit types.
4370
*/
4371
if (device->robust_buffer_access &&
4372
(buffer->usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4373
buffer->usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4374
pMemoryRequirements->memoryRequirements.size = align_u64(buffer->size, 4);
4375
4376
pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4377
4378
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4379
switch (ext->sType) {
4380
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4381
VkMemoryDedicatedRequirements *requirements = (void *)ext;
4382
requirements->prefersDedicatedAllocation = false;
4383
requirements->requiresDedicatedAllocation = false;
4384
break;
4385
}
4386
4387
default:
4388
anv_debug_ignored_stype(ext->sType);
4389
break;
4390
}
4391
}
4392
}
4393
4394
void anv_GetDeviceMemoryCommitment(
4395
VkDevice device,
4396
VkDeviceMemory memory,
4397
VkDeviceSize* pCommittedMemoryInBytes)
4398
{
4399
*pCommittedMemoryInBytes = 0;
4400
}
4401
4402
static void
4403
anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
4404
{
4405
ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4406
ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4407
4408
assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4409
4410
if (mem) {
4411
assert(pBindInfo->memoryOffset < mem->bo->size);
4412
assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->size);
4413
buffer->address = (struct anv_address) {
4414
.bo = mem->bo,
4415
.offset = pBindInfo->memoryOffset,
4416
};
4417
} else {
4418
buffer->address = ANV_NULL_ADDRESS;
4419
}
4420
}
4421
4422
VkResult anv_BindBufferMemory2(
4423
VkDevice device,
4424
uint32_t bindInfoCount,
4425
const VkBindBufferMemoryInfo* pBindInfos)
4426
{
4427
for (uint32_t i = 0; i < bindInfoCount; i++)
4428
anv_bind_buffer_memory(&pBindInfos[i]);
4429
4430
return VK_SUCCESS;
4431
}
4432
4433
VkResult anv_QueueBindSparse(
4434
VkQueue _queue,
4435
uint32_t bindInfoCount,
4436
const VkBindSparseInfo* pBindInfo,
4437
VkFence fence)
4438
{
4439
ANV_FROM_HANDLE(anv_queue, queue, _queue);
4440
if (anv_device_is_lost(queue->device))
4441
return VK_ERROR_DEVICE_LOST;
4442
4443
return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);
4444
}
4445
4446
// Event functions
4447
4448
VkResult anv_CreateEvent(
4449
VkDevice _device,
4450
const VkEventCreateInfo* pCreateInfo,
4451
const VkAllocationCallbacks* pAllocator,
4452
VkEvent* pEvent)
4453
{
4454
ANV_FROM_HANDLE(anv_device, device, _device);
4455
struct anv_event *event;
4456
4457
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4458
4459
event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4460
VK_OBJECT_TYPE_EVENT);
4461
if (event == NULL)
4462
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4463
4464
event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4465
sizeof(uint64_t), 8);
4466
*(uint64_t *)event->state.map = VK_EVENT_RESET;
4467
4468
*pEvent = anv_event_to_handle(event);
4469
4470
return VK_SUCCESS;
4471
}
4472
4473
void anv_DestroyEvent(
4474
VkDevice _device,
4475
VkEvent _event,
4476
const VkAllocationCallbacks* pAllocator)
4477
{
4478
ANV_FROM_HANDLE(anv_device, device, _device);
4479
ANV_FROM_HANDLE(anv_event, event, _event);
4480
4481
if (!event)
4482
return;
4483
4484
anv_state_pool_free(&device->dynamic_state_pool, event->state);
4485
4486
vk_object_free(&device->vk, pAllocator, event);
4487
}
4488
4489
VkResult anv_GetEventStatus(
4490
VkDevice _device,
4491
VkEvent _event)
4492
{
4493
ANV_FROM_HANDLE(anv_device, device, _device);
4494
ANV_FROM_HANDLE(anv_event, event, _event);
4495
4496
if (anv_device_is_lost(device))
4497
return VK_ERROR_DEVICE_LOST;
4498
4499
return *(uint64_t *)event->state.map;
4500
}
4501
4502
VkResult anv_SetEvent(
4503
VkDevice _device,
4504
VkEvent _event)
4505
{
4506
ANV_FROM_HANDLE(anv_event, event, _event);
4507
4508
*(uint64_t *)event->state.map = VK_EVENT_SET;
4509
4510
return VK_SUCCESS;
4511
}
4512
4513
VkResult anv_ResetEvent(
4514
VkDevice _device,
4515
VkEvent _event)
4516
{
4517
ANV_FROM_HANDLE(anv_event, event, _event);
4518
4519
*(uint64_t *)event->state.map = VK_EVENT_RESET;
4520
4521
return VK_SUCCESS;
4522
}
4523
4524
// Buffer functions
4525
4526
VkResult anv_CreateBuffer(
4527
VkDevice _device,
4528
const VkBufferCreateInfo* pCreateInfo,
4529
const VkAllocationCallbacks* pAllocator,
4530
VkBuffer* pBuffer)
4531
{
4532
ANV_FROM_HANDLE(anv_device, device, _device);
4533
struct anv_buffer *buffer;
4534
4535
/* Don't allow creating buffers bigger than our address space. The real
4536
* issue here is that we may align up the buffer size and we don't want
4537
* doing so to cause roll-over. However, no one has any business
4538
* allocating a buffer larger than our GTT size.
4539
*/
4540
if (pCreateInfo->size > device->physical->gtt_size)
4541
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
4542
4543
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
4544
4545
buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
4546
VK_OBJECT_TYPE_BUFFER);
4547
if (buffer == NULL)
4548
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4549
4550
buffer->create_flags = pCreateInfo->flags;
4551
buffer->size = pCreateInfo->size;
4552
buffer->usage = pCreateInfo->usage;
4553
buffer->address = ANV_NULL_ADDRESS;
4554
4555
*pBuffer = anv_buffer_to_handle(buffer);
4556
4557
return VK_SUCCESS;
4558
}
4559
4560
void anv_DestroyBuffer(
4561
VkDevice _device,
4562
VkBuffer _buffer,
4563
const VkAllocationCallbacks* pAllocator)
4564
{
4565
ANV_FROM_HANDLE(anv_device, device, _device);
4566
ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4567
4568
if (!buffer)
4569
return;
4570
4571
vk_object_free(&device->vk, pAllocator, buffer);
4572
}
4573
4574
VkDeviceAddress anv_GetBufferDeviceAddress(
4575
VkDevice device,
4576
const VkBufferDeviceAddressInfoKHR* pInfo)
4577
{
4578
ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4579
4580
assert(!anv_address_is_null(buffer->address));
4581
assert(buffer->address.bo->flags & EXEC_OBJECT_PINNED);
4582
4583
return anv_address_physical(buffer->address);
4584
}
4585
4586
uint64_t anv_GetBufferOpaqueCaptureAddress(
4587
VkDevice device,
4588
const VkBufferDeviceAddressInfoKHR* pInfo)
4589
{
4590
return 0;
4591
}
4592
4593
uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4594
VkDevice device,
4595
const VkDeviceMemoryOpaqueCaptureAddressInfoKHR* pInfo)
4596
{
4597
ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4598
4599
assert(memory->bo->flags & EXEC_OBJECT_PINNED);
4600
assert(memory->bo->has_client_visible_address);
4601
4602
return intel_48b_address(memory->bo->offset);
4603
}
4604
4605
void
4606
anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
4607
enum isl_format format,
4608
isl_surf_usage_flags_t usage,
4609
struct anv_address address,
4610
uint32_t range, uint32_t stride)
4611
{
4612
isl_buffer_fill_state(&device->isl_dev, state.map,
4613
.address = anv_address_physical(address),
4614
.mocs = isl_mocs(&device->isl_dev, usage,
4615
address.bo && address.bo->is_external),
4616
.size_B = range,
4617
.format = format,
4618
.swizzle = ISL_SWIZZLE_IDENTITY,
4619
.stride_B = stride);
4620
}
4621
4622
void anv_DestroySampler(
4623
VkDevice _device,
4624
VkSampler _sampler,
4625
const VkAllocationCallbacks* pAllocator)
4626
{
4627
ANV_FROM_HANDLE(anv_device, device, _device);
4628
ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4629
4630
if (!sampler)
4631
return;
4632
4633
if (sampler->bindless_state.map) {
4634
anv_state_pool_free(&device->dynamic_state_pool,
4635
sampler->bindless_state);
4636
}
4637
4638
if (sampler->custom_border_color.map) {
4639
anv_state_reserved_pool_free(&device->custom_border_colors,
4640
sampler->custom_border_color);
4641
}
4642
4643
vk_object_free(&device->vk, pAllocator, sampler);
4644
}
4645
4646
VkResult anv_CreateFramebuffer(
4647
VkDevice _device,
4648
const VkFramebufferCreateInfo* pCreateInfo,
4649
const VkAllocationCallbacks* pAllocator,
4650
VkFramebuffer* pFramebuffer)
4651
{
4652
ANV_FROM_HANDLE(anv_device, device, _device);
4653
struct anv_framebuffer *framebuffer;
4654
4655
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
4656
4657
size_t size = sizeof(*framebuffer);
4658
4659
/* VK_KHR_imageless_framebuffer extension says:
4660
*
4661
* If flags includes VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR,
4662
* parameter pAttachments is ignored.
4663
*/
4664
if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR))
4665
size += sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
4666
4667
framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
4668
VK_OBJECT_TYPE_FRAMEBUFFER);
4669
if (framebuffer == NULL)
4670
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
4671
4672
framebuffer->width = pCreateInfo->width;
4673
framebuffer->height = pCreateInfo->height;
4674
framebuffer->layers = pCreateInfo->layers;
4675
4676
if (!(pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT_KHR)) {
4677
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
4678
ANV_FROM_HANDLE(anv_image_view, iview, pCreateInfo->pAttachments[i]);
4679
framebuffer->attachments[i] = iview;
4680
}
4681
framebuffer->attachment_count = pCreateInfo->attachmentCount;
4682
}
4683
4684
*pFramebuffer = anv_framebuffer_to_handle(framebuffer);
4685
4686
return VK_SUCCESS;
4687
}
4688
4689
void anv_DestroyFramebuffer(
4690
VkDevice _device,
4691
VkFramebuffer _fb,
4692
const VkAllocationCallbacks* pAllocator)
4693
{
4694
ANV_FROM_HANDLE(anv_device, device, _device);
4695
ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
4696
4697
if (!fb)
4698
return;
4699
4700
vk_object_free(&device->vk, pAllocator, fb);
4701
}
4702
4703
static const VkTimeDomainEXT anv_time_domains[] = {
4704
VK_TIME_DOMAIN_DEVICE_EXT,
4705
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4706
#ifdef CLOCK_MONOTONIC_RAW
4707
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4708
#endif
4709
};
4710
4711
VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4712
VkPhysicalDevice physicalDevice,
4713
uint32_t *pTimeDomainCount,
4714
VkTimeDomainEXT *pTimeDomains)
4715
{
4716
int d;
4717
VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
4718
4719
for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4720
vk_outarray_append(&out, i) {
4721
*i = anv_time_domains[d];
4722
}
4723
}
4724
4725
return vk_outarray_status(&out);
4726
}
4727
4728
static uint64_t
4729
anv_clock_gettime(clockid_t clock_id)
4730
{
4731
struct timespec current;
4732
int ret;
4733
4734
ret = clock_gettime(clock_id, &current);
4735
#ifdef CLOCK_MONOTONIC_RAW
4736
if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
4737
ret = clock_gettime(CLOCK_MONOTONIC, &current);
4738
#endif
4739
if (ret < 0)
4740
return 0;
4741
4742
return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
4743
}
4744
4745
VkResult anv_GetCalibratedTimestampsEXT(
4746
VkDevice _device,
4747
uint32_t timestampCount,
4748
const VkCalibratedTimestampInfoEXT *pTimestampInfos,
4749
uint64_t *pTimestamps,
4750
uint64_t *pMaxDeviation)
4751
{
4752
ANV_FROM_HANDLE(anv_device, device, _device);
4753
uint64_t timestamp_frequency = device->info.timestamp_frequency;
4754
int ret;
4755
int d;
4756
uint64_t begin, end;
4757
uint64_t max_clock_period = 0;
4758
4759
#ifdef CLOCK_MONOTONIC_RAW
4760
begin = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4761
#else
4762
begin = anv_clock_gettime(CLOCK_MONOTONIC);
4763
#endif
4764
4765
for (d = 0; d < timestampCount; d++) {
4766
switch (pTimestampInfos[d].timeDomain) {
4767
case VK_TIME_DOMAIN_DEVICE_EXT:
4768
ret = anv_gem_reg_read(device->fd, TIMESTAMP | I915_REG_READ_8B_WA,
4769
&pTimestamps[d]);
4770
4771
if (ret != 0) {
4772
return anv_device_set_lost(device, "Failed to read the TIMESTAMP "
4773
"register: %m");
4774
}
4775
uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
4776
max_clock_period = MAX2(max_clock_period, device_period);
4777
break;
4778
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
4779
pTimestamps[d] = anv_clock_gettime(CLOCK_MONOTONIC);
4780
max_clock_period = MAX2(max_clock_period, 1);
4781
break;
4782
4783
#ifdef CLOCK_MONOTONIC_RAW
4784
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
4785
pTimestamps[d] = begin;
4786
break;
4787
#endif
4788
default:
4789
pTimestamps[d] = 0;
4790
break;
4791
}
4792
}
4793
4794
#ifdef CLOCK_MONOTONIC_RAW
4795
end = anv_clock_gettime(CLOCK_MONOTONIC_RAW);
4796
#else
4797
end = anv_clock_gettime(CLOCK_MONOTONIC);
4798
#endif
4799
4800
/*
4801
* The maximum deviation is the sum of the interval over which we
4802
* perform the sampling and the maximum period of any sampled
4803
* clock. That's because the maximum skew between any two sampled
4804
* clock edges is when the sampled clock with the largest period is
4805
* sampled at the end of that period but right at the beginning of the
4806
* sampling interval and some other clock is sampled right at the
4807
* begining of its sampling period and right at the end of the
4808
* sampling interval. Let's assume the GPU has the longest clock
4809
* period and that the application is sampling GPU and monotonic:
4810
*
4811
* s e
4812
* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
4813
* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4814
*
4815
* g
4816
* 0 1 2 3
4817
* GPU -----_____-----_____-----_____-----_____
4818
*
4819
* m
4820
* x y z 0 1 2 3 4 5 6 7 8 9 a b c
4821
* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
4822
*
4823
* Interval <----------------->
4824
* Deviation <-------------------------->
4825
*
4826
* s = read(raw) 2
4827
* g = read(GPU) 1
4828
* m = read(monotonic) 2
4829
* e = read(raw) b
4830
*
4831
* We round the sample interval up by one tick to cover sampling error
4832
* in the interval clock
4833
*/
4834
4835
uint64_t sample_interval = end - begin + 1;
4836
4837
*pMaxDeviation = sample_interval + max_clock_period;
4838
4839
return VK_SUCCESS;
4840
}
4841
4842
void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
4843
VkPhysicalDevice physicalDevice,
4844
VkSampleCountFlagBits samples,
4845
VkMultisamplePropertiesEXT* pMultisampleProperties)
4846
{
4847
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4848
4849
assert(pMultisampleProperties->sType ==
4850
VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
4851
4852
VkExtent2D grid_size;
4853
if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4854
grid_size.width = 1;
4855
grid_size.height = 1;
4856
} else {
4857
grid_size.width = 0;
4858
grid_size.height = 0;
4859
}
4860
pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4861
4862
vk_foreach_struct(ext, pMultisampleProperties->pNext)
4863
anv_debug_ignored_stype(ext->sType);
4864
}
4865
4866
/* vk_icd.h does not declare this function, so we declare it here to
4867
* suppress Wmissing-prototypes.
4868
*/
4869
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4870
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
4871
4872
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4873
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
4874
{
4875
/* For the full details on loader interface versioning, see
4876
* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4877
* What follows is a condensed summary, to help you navigate the large and
4878
* confusing official doc.
4879
*
4880
* - Loader interface v0 is incompatible with later versions. We don't
4881
* support it.
4882
*
4883
* - In loader interface v1:
4884
* - The first ICD entrypoint called by the loader is
4885
* vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4886
* entrypoint.
4887
* - The ICD must statically expose no other Vulkan symbol unless it is
4888
* linked with -Bsymbolic.
4889
* - Each dispatchable Vulkan handle created by the ICD must be
4890
* a pointer to a struct whose first member is VK_LOADER_DATA. The
4891
* ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4892
* - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4893
* vkDestroySurfaceKHR(). The ICD must be capable of working with
4894
* such loader-managed surfaces.
4895
*
4896
* - Loader interface v2 differs from v1 in:
4897
* - The first ICD entrypoint called by the loader is
4898
* vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4899
* statically expose this entrypoint.
4900
*
4901
* - Loader interface v3 differs from v2 in:
4902
* - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4903
* vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4904
* because the loader no longer does so.
4905
*
4906
* - Loader interface v4 differs from v3 in:
4907
* - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
4908
*/
4909
*pSupportedVersion = MIN2(*pSupportedVersion, 4u);
4910
return VK_SUCCESS;
4911
}
4912
4913
VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
4914
VkPhysicalDevice physicalDevice,
4915
uint32_t* pFragmentShadingRateCount,
4916
VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
4917
{
4918
ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4919
VK_OUTARRAY_MAKE(out, pFragmentShadingRates, pFragmentShadingRateCount);
4920
4921
#define append_rate(_samples, _width, _height) \
4922
do { \
4923
vk_outarray_append(&out, __r) { \
4924
__r->sampleCounts = _samples; \
4925
__r->fragmentSize = (VkExtent2D) { \
4926
.width = _width, \
4927
.height = _height, \
4928
}; \
4929
} \
4930
} while (0)
4931
4932
VkSampleCountFlags sample_counts =
4933
isl_device_get_sample_counts(&physical_device->isl_dev);
4934
4935
for (uint32_t x = 4; x >= 1; x /= 2) {
4936
for (uint32_t y = 4; y >= 1; y /= 2) {
4937
/* For size {1, 1}, the sample count must be ~0 */
4938
if (x == 1 && y == 1)
4939
append_rate(~0, x, y);
4940
else
4941
append_rate(sample_counts, x, y);
4942
}
4943
}
4944
4945
#undef append_rate
4946
4947
return vk_outarray_status(&out);
4948
}
4949
4950