Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/common/intel_aux_map.c
4547 views
1
/*
2
* Copyright (c) 2018 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
/**
25
* The aux map provides a multi-level lookup of the main surface address which
26
* ends up providing information about the auxiliary surface data, including
27
* the address where the auxiliary data resides.
28
*
29
* The 48-bit VMA (GPU) address of the main surface is split to do the address
30
* lookup:
31
*
32
* 48 bit address of main surface
33
* +--------+--------+--------+------+
34
* | 47:36 | 35:24 | 23:16 | 15:0 |
35
* | L3-idx | L2-idx | L1-idx | ... |
36
* +--------+--------+--------+------+
37
*
38
* The GFX_AUX_TABLE_BASE_ADDR points to a buffer. The L3 Table Entry is
39
* located by indexing into this buffer as a uint64_t array using the L3-idx
40
* value. The 64-bit L3 entry is defined as:
41
*
42
* +-------+-------------+------+---+
43
* | 63:48 | 47:15 | 14:1 | 0 |
44
* | ... | L2-tbl-addr | ... | V |
45
* +-------+-------------+------+---+
46
*
47
* If the `V` (valid) bit is set, then the L2-tbl-addr gives the address for
48
* the level-2 table entries, with the lower address bits filled with zero.
49
* The L2 Table Entry is located by indexing into this buffer as a uint64_t
50
* array using the L2-idx value. The 64-bit L2 entry is similar to the L3
51
* entry, except with 2 additional address bits:
52
*
53
* +-------+-------------+------+---+
54
* | 63:48 | 47:13 | 12:1 | 0 |
55
* | ... | L1-tbl-addr | ... | V |
56
* +-------+-------------+------+---+
57
*
58
* If the `V` bit is set, then the L1-tbl-addr gives the address for the
59
* level-1 table entries, with the lower address bits filled with zero. The L1
60
* Table Entry is located by indexing into this buffer as a uint64_t array
61
* using the L1-idx value. The 64-bit L1 entry is defined as:
62
*
63
* +--------+------+-------+-------+-------+---------------+-----+---+
64
* | 63:58 | 57 | 56:54 | 53:52 | 51:48 | 47:8 | 7:1 | 0 |
65
* | Format | Y/Cr | Depth | TM | ... | aux-data-addr | ... | V |
66
* +--------+------+-------+-------+-------+---------------+-----+---+
67
*
68
* Where:
69
* - Format: See `get_format_encoding`
70
* - Y/Cr: 0=Y(Luma), 1=Cr(Chroma)
71
* - (bit) Depth: See `get_bpp_encoding`
72
* - TM (Tile-mode): 0=Ys, 1=Y, 2=rsvd, 3=rsvd
73
* - aux-data-addr: VMA/GPU address for the aux-data
74
* - V: entry is valid
75
*/
76
77
#include "intel_aux_map.h"
78
#include "intel_gem.h"
79
80
#include "dev/intel_device_info.h"
81
#include "isl/isl.h"
82
83
#include "drm-uapi/i915_drm.h"
84
#include "util/list.h"
85
#include "util/ralloc.h"
86
#include "util/u_atomic.h"
87
#include "main/macros.h"
88
89
#include <inttypes.h>
90
#include <stdlib.h>
91
#include <stdio.h>
92
#include <pthread.h>
93
94
static const bool aux_map_debug = false;
95
96
struct aux_map_buffer {
97
struct list_head link;
98
struct intel_buffer *buffer;
99
};
100
101
struct intel_aux_map_context {
102
void *driver_ctx;
103
pthread_mutex_t mutex;
104
struct intel_mapped_pinned_buffer_alloc *buffer_alloc;
105
uint32_t num_buffers;
106
struct list_head buffers;
107
uint64_t level3_base_addr;
108
uint64_t *level3_map;
109
uint32_t tail_offset, tail_remaining;
110
uint32_t state_num;
111
};
112
113
static bool
114
add_buffer(struct intel_aux_map_context *ctx)
115
{
116
struct aux_map_buffer *buf = ralloc(ctx, struct aux_map_buffer);
117
if (!buf)
118
return false;
119
120
const uint32_t size = 0x100000;
121
buf->buffer = ctx->buffer_alloc->alloc(ctx->driver_ctx, size);
122
if (!buf->buffer) {
123
ralloc_free(buf);
124
return false;
125
}
126
127
assert(buf->buffer->map != NULL);
128
129
list_addtail(&buf->link, &ctx->buffers);
130
ctx->tail_offset = 0;
131
ctx->tail_remaining = size;
132
p_atomic_inc(&ctx->num_buffers);
133
134
return true;
135
}
136
137
static void
138
advance_current_pos(struct intel_aux_map_context *ctx, uint32_t size)
139
{
140
assert(ctx->tail_remaining >= size);
141
ctx->tail_remaining -= size;
142
ctx->tail_offset += size;
143
}
144
145
static bool
146
align_and_verify_space(struct intel_aux_map_context *ctx, uint32_t size,
147
uint32_t align)
148
{
149
if (ctx->tail_remaining < size)
150
return false;
151
152
struct aux_map_buffer *tail =
153
list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
154
uint64_t gpu = tail->buffer->gpu + ctx->tail_offset;
155
uint64_t aligned = align64(gpu, align);
156
157
if ((aligned - gpu) + size > ctx->tail_remaining) {
158
return false;
159
} else {
160
if (aligned - gpu > 0)
161
advance_current_pos(ctx, aligned - gpu);
162
return true;
163
}
164
}
165
166
static void
167
get_current_pos(struct intel_aux_map_context *ctx, uint64_t *gpu, uint64_t **map)
168
{
169
assert(!list_is_empty(&ctx->buffers));
170
struct aux_map_buffer *tail =
171
list_last_entry(&ctx->buffers, struct aux_map_buffer, link);
172
if (gpu)
173
*gpu = tail->buffer->gpu + ctx->tail_offset;
174
if (map)
175
*map = (uint64_t*)((uint8_t*)tail->buffer->map + ctx->tail_offset);
176
}
177
178
static bool
179
add_sub_table(struct intel_aux_map_context *ctx, uint32_t size,
180
uint32_t align, uint64_t *gpu, uint64_t **map)
181
{
182
if (!align_and_verify_space(ctx, size, align)) {
183
if (!add_buffer(ctx))
184
return false;
185
UNUSED bool aligned = align_and_verify_space(ctx, size, align);
186
assert(aligned);
187
}
188
get_current_pos(ctx, gpu, map);
189
memset(*map, 0, size);
190
advance_current_pos(ctx, size);
191
return true;
192
}
193
194
uint32_t
195
intel_aux_map_get_state_num(struct intel_aux_map_context *ctx)
196
{
197
return p_atomic_read(&ctx->state_num);
198
}
199
200
struct intel_aux_map_context *
201
intel_aux_map_init(void *driver_ctx,
202
struct intel_mapped_pinned_buffer_alloc *buffer_alloc,
203
const struct intel_device_info *devinfo)
204
{
205
struct intel_aux_map_context *ctx;
206
if (devinfo->ver < 12)
207
return NULL;
208
209
ctx = ralloc(NULL, struct intel_aux_map_context);
210
if (!ctx)
211
return NULL;
212
213
if (pthread_mutex_init(&ctx->mutex, NULL))
214
return NULL;
215
216
ctx->driver_ctx = driver_ctx;
217
ctx->buffer_alloc = buffer_alloc;
218
ctx->num_buffers = 0;
219
list_inithead(&ctx->buffers);
220
ctx->tail_offset = 0;
221
ctx->tail_remaining = 0;
222
ctx->state_num = 0;
223
224
if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &ctx->level3_base_addr,
225
&ctx->level3_map)) {
226
if (aux_map_debug)
227
fprintf(stderr, "AUX-MAP L3: 0x%"PRIx64", map=%p\n",
228
ctx->level3_base_addr, ctx->level3_map);
229
p_atomic_inc(&ctx->state_num);
230
return ctx;
231
} else {
232
ralloc_free(ctx);
233
return NULL;
234
}
235
}
236
237
void
238
intel_aux_map_finish(struct intel_aux_map_context *ctx)
239
{
240
if (!ctx)
241
return;
242
243
pthread_mutex_destroy(&ctx->mutex);
244
list_for_each_entry_safe(struct aux_map_buffer, buf, &ctx->buffers, link) {
245
ctx->buffer_alloc->free(ctx->driver_ctx, buf->buffer);
246
list_del(&buf->link);
247
p_atomic_dec(&ctx->num_buffers);
248
ralloc_free(buf);
249
}
250
251
ralloc_free(ctx);
252
}
253
254
uint64_t
255
intel_aux_map_get_base(struct intel_aux_map_context *ctx)
256
{
257
/**
258
* This get initialized in intel_aux_map_init, and never changes, so there is
259
* no need to lock the mutex.
260
*/
261
return ctx->level3_base_addr;
262
}
263
264
static struct aux_map_buffer *
265
find_buffer(struct intel_aux_map_context *ctx, uint64_t addr)
266
{
267
list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
268
if (buf->buffer->gpu <= addr && buf->buffer->gpu_end > addr) {
269
return buf;
270
}
271
}
272
return NULL;
273
}
274
275
static uint64_t *
276
get_u64_entry_ptr(struct intel_aux_map_context *ctx, uint64_t addr)
277
{
278
struct aux_map_buffer *buf = find_buffer(ctx, addr);
279
assert(buf);
280
uintptr_t map_offset = addr - buf->buffer->gpu;
281
return (uint64_t*)((uint8_t*)buf->buffer->map + map_offset);
282
}
283
284
static uint8_t
285
get_bpp_encoding(enum isl_format format)
286
{
287
if (isl_format_is_yuv(format)) {
288
switch (format) {
289
case ISL_FORMAT_YCRCB_NORMAL:
290
case ISL_FORMAT_YCRCB_SWAPY:
291
case ISL_FORMAT_PLANAR_420_8: return 3;
292
case ISL_FORMAT_PLANAR_420_12: return 2;
293
case ISL_FORMAT_PLANAR_420_10: return 1;
294
case ISL_FORMAT_PLANAR_420_16: return 0;
295
default:
296
unreachable("Unsupported format!");
297
return 0;
298
}
299
} else {
300
switch (isl_format_get_layout(format)->bpb) {
301
case 16: return 0;
302
case 8: return 4;
303
case 32: return 5;
304
case 64: return 6;
305
case 128: return 7;
306
default:
307
unreachable("Unsupported bpp!");
308
return 0;
309
}
310
}
311
}
312
313
#define INTEL_AUX_MAP_ENTRY_Y_TILED_BIT (0x1ull << 52)
314
315
uint64_t
316
intel_aux_map_format_bits(enum isl_tiling tiling, enum isl_format format,
317
uint8_t plane)
318
{
319
if (aux_map_debug)
320
fprintf(stderr, "AUX-MAP entry %s, bpp_enc=%d\n",
321
isl_format_get_name(format),
322
isl_format_get_aux_map_encoding(format));
323
324
assert(isl_tiling_is_any_y(tiling));
325
326
uint64_t format_bits =
327
((uint64_t)isl_format_get_aux_map_encoding(format) << 58) |
328
((uint64_t)(plane > 0) << 57) |
329
((uint64_t)get_bpp_encoding(format) << 54) |
330
INTEL_AUX_MAP_ENTRY_Y_TILED_BIT;
331
332
assert((format_bits & INTEL_AUX_MAP_FORMAT_BITS_MASK) == format_bits);
333
334
return format_bits;
335
}
336
337
uint64_t
338
intel_aux_map_format_bits_for_isl_surf(const struct isl_surf *isl_surf)
339
{
340
assert(!isl_format_is_planar(isl_surf->format));
341
return intel_aux_map_format_bits(isl_surf->tiling, isl_surf->format, 0);
342
}
343
344
static void
345
get_aux_entry(struct intel_aux_map_context *ctx, uint64_t address,
346
uint32_t *l1_index_out, uint64_t *l1_entry_addr_out,
347
uint64_t **l1_entry_map_out)
348
{
349
uint32_t l3_index = (address >> 36) & 0xfff;
350
uint64_t *l3_entry = &ctx->level3_map[l3_index];
351
352
uint64_t *l2_map;
353
if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
354
uint64_t l2_gpu;
355
if (add_sub_table(ctx, 32 * 1024, 32 * 1024, &l2_gpu, &l2_map)) {
356
if (aux_map_debug)
357
fprintf(stderr, "AUX-MAP L3[0x%x]: 0x%"PRIx64", map=%p\n",
358
l3_index, l2_gpu, l2_map);
359
} else {
360
unreachable("Failed to add L2 Aux-Map Page Table!");
361
}
362
*l3_entry = (l2_gpu & 0xffffffff8000ULL) | 1;
363
} else {
364
uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL);
365
l2_map = get_u64_entry_ptr(ctx, l2_addr);
366
}
367
uint32_t l2_index = (address >> 24) & 0xfff;
368
uint64_t *l2_entry = &l2_map[l2_index];
369
370
uint64_t l1_addr, *l1_map;
371
if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
372
if (add_sub_table(ctx, 8 * 1024, 8 * 1024, &l1_addr, &l1_map)) {
373
if (aux_map_debug)
374
fprintf(stderr, "AUX-MAP L2[0x%x]: 0x%"PRIx64", map=%p\n",
375
l2_index, l1_addr, l1_map);
376
} else {
377
unreachable("Failed to add L1 Aux-Map Page Table!");
378
}
379
*l2_entry = (l1_addr & 0xffffffffe000ULL) | 1;
380
} else {
381
l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL);
382
l1_map = get_u64_entry_ptr(ctx, l1_addr);
383
}
384
uint32_t l1_index = (address >> 16) & 0xff;
385
if (l1_index_out)
386
*l1_index_out = l1_index;
387
if (l1_entry_addr_out)
388
*l1_entry_addr_out = l1_addr + l1_index * sizeof(*l1_map);
389
if (l1_entry_map_out)
390
*l1_entry_map_out = &l1_map[l1_index];
391
}
392
393
static void
394
add_mapping(struct intel_aux_map_context *ctx, uint64_t address,
395
uint64_t aux_address, uint64_t format_bits,
396
bool *state_changed)
397
{
398
if (aux_map_debug)
399
fprintf(stderr, "AUX-MAP 0x%"PRIx64" => 0x%"PRIx64"\n", address,
400
aux_address);
401
402
uint32_t l1_index;
403
uint64_t *l1_entry;
404
get_aux_entry(ctx, address, &l1_index, NULL, &l1_entry);
405
406
const uint64_t l1_data =
407
(aux_address & INTEL_AUX_MAP_ADDRESS_MASK) |
408
format_bits |
409
INTEL_AUX_MAP_ENTRY_VALID_BIT;
410
411
const uint64_t current_l1_data = *l1_entry;
412
if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
413
assert((aux_address & 0xffULL) == 0);
414
if (aux_map_debug)
415
fprintf(stderr, "AUX-MAP L1[0x%x] 0x%"PRIx64" -> 0x%"PRIx64"\n",
416
l1_index, current_l1_data, l1_data);
417
/**
418
* We use non-zero bits in 63:1 to indicate the entry had been filled
419
* previously. If these bits are non-zero and they don't exactly match
420
* what we want to program into the entry, then we must force the
421
* aux-map tables to be flushed.
422
*/
423
if (current_l1_data != 0 && \
424
(current_l1_data | INTEL_AUX_MAP_ENTRY_VALID_BIT) != l1_data)
425
*state_changed = true;
426
*l1_entry = l1_data;
427
} else {
428
if (aux_map_debug)
429
fprintf(stderr, "AUX-MAP L1[0x%x] is already marked valid!\n",
430
l1_index);
431
assert(*l1_entry == l1_data);
432
}
433
}
434
435
uint64_t *
436
intel_aux_map_get_entry(struct intel_aux_map_context *ctx,
437
uint64_t address,
438
uint64_t *entry_address)
439
{
440
pthread_mutex_lock(&ctx->mutex);
441
uint64_t *l1_entry_map;
442
get_aux_entry(ctx, address, NULL, entry_address, &l1_entry_map);
443
pthread_mutex_unlock(&ctx->mutex);
444
445
return l1_entry_map;
446
}
447
448
void
449
intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t address,
450
uint64_t aux_address, uint64_t main_size_B,
451
uint64_t format_bits)
452
{
453
bool state_changed = false;
454
pthread_mutex_lock(&ctx->mutex);
455
uint64_t map_addr = address;
456
uint64_t dest_aux_addr = aux_address;
457
assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address);
458
assert(align64(aux_address, INTEL_AUX_MAP_AUX_PAGE_SIZE) == aux_address);
459
while (map_addr - address < main_size_B) {
460
add_mapping(ctx, map_addr, dest_aux_addr, format_bits, &state_changed);
461
map_addr += INTEL_AUX_MAP_MAIN_PAGE_SIZE;
462
dest_aux_addr += INTEL_AUX_MAP_AUX_PAGE_SIZE;
463
}
464
pthread_mutex_unlock(&ctx->mutex);
465
if (state_changed)
466
p_atomic_inc(&ctx->state_num);
467
}
468
469
/**
470
* We mark the leaf entry as invalid, but we don't attempt to cleanup the
471
* other levels of translation mappings. Since we attempt to re-use VMA
472
* ranges, hopefully this will not lead to unbounded growth of the translation
473
* tables.
474
*/
475
static void
476
remove_mapping(struct intel_aux_map_context *ctx, uint64_t address,
477
bool *state_changed)
478
{
479
uint32_t l3_index = (address >> 36) & 0xfff;
480
uint64_t *l3_entry = &ctx->level3_map[l3_index];
481
482
uint64_t *l2_map;
483
if ((*l3_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
484
return;
485
} else {
486
uint64_t l2_addr = intel_canonical_address(*l3_entry & ~0x7fffULL);
487
l2_map = get_u64_entry_ptr(ctx, l2_addr);
488
}
489
uint32_t l2_index = (address >> 24) & 0xfff;
490
uint64_t *l2_entry = &l2_map[l2_index];
491
492
uint64_t *l1_map;
493
if ((*l2_entry & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
494
return;
495
} else {
496
uint64_t l1_addr = intel_canonical_address(*l2_entry & ~0x1fffULL);
497
l1_map = get_u64_entry_ptr(ctx, l1_addr);
498
}
499
uint32_t l1_index = (address >> 16) & 0xff;
500
uint64_t *l1_entry = &l1_map[l1_index];
501
502
const uint64_t current_l1_data = *l1_entry;
503
const uint64_t l1_data = current_l1_data & ~1ull;
504
505
if ((current_l1_data & INTEL_AUX_MAP_ENTRY_VALID_BIT) == 0) {
506
return;
507
} else {
508
if (aux_map_debug)
509
fprintf(stderr, "AUX-MAP [0x%x][0x%x][0x%x] L1 entry removed!\n",
510
l3_index, l2_index, l1_index);
511
/**
512
* We use non-zero bits in 63:1 to indicate the entry had been filled
513
* previously. In the unlikely event that these are all zero, we force a
514
* flush of the aux-map tables.
515
*/
516
if (unlikely(l1_data == 0))
517
*state_changed = true;
518
*l1_entry = l1_data;
519
}
520
}
521
522
void
523
intel_aux_map_unmap_range(struct intel_aux_map_context *ctx, uint64_t address,
524
uint64_t size)
525
{
526
bool state_changed = false;
527
pthread_mutex_lock(&ctx->mutex);
528
if (aux_map_debug)
529
fprintf(stderr, "AUX-MAP remove 0x%"PRIx64"-0x%"PRIx64"\n", address,
530
address + size);
531
532
uint64_t map_addr = address;
533
assert(align64(address, INTEL_AUX_MAP_MAIN_PAGE_SIZE) == address);
534
while (map_addr - address < size) {
535
remove_mapping(ctx, map_addr, &state_changed);
536
map_addr += 64 * 1024;
537
}
538
pthread_mutex_unlock(&ctx->mutex);
539
if (state_changed)
540
p_atomic_inc(&ctx->state_num);
541
}
542
543
uint32_t
544
intel_aux_map_get_num_buffers(struct intel_aux_map_context *ctx)
545
{
546
return p_atomic_read(&ctx->num_buffers);
547
}
548
549
void
550
intel_aux_map_fill_bos(struct intel_aux_map_context *ctx, void **driver_bos,
551
uint32_t max_bos)
552
{
553
assert(p_atomic_read(&ctx->num_buffers) >= max_bos);
554
uint32_t i = 0;
555
list_for_each_entry(struct aux_map_buffer, buf, &ctx->buffers, link) {
556
if (i >= max_bos)
557
return;
558
driver_bos[i++] = buf->buffer->driver_bo;
559
}
560
}
561
562