Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/panfrost/pan_job.c
4570 views
1
/*
2
* Copyright (C) 2019-2020 Collabora, Ltd.
3
* Copyright (C) 2019 Alyssa Rosenzweig
4
* Copyright (C) 2014-2017 Broadcom
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice (including the next
14
* paragraph) shall be included in all copies or substantial portions of the
15
* Software.
16
*
17
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
* SOFTWARE.
24
*
25
*/
26
27
#include <assert.h>
28
29
#include "drm-uapi/panfrost_drm.h"
30
31
#include "pan_bo.h"
32
#include "pan_context.h"
33
#include "util/hash_table.h"
34
#include "util/ralloc.h"
35
#include "util/format/u_format.h"
36
#include "util/u_pack_color.h"
37
#include "util/rounding.h"
38
#include "util/u_framebuffer.h"
39
#include "pan_util.h"
40
#include "decode.h"
41
#include "panfrost-quirks.h"
42
43
static unsigned
44
panfrost_batch_idx(struct panfrost_batch *batch)
45
{
46
return batch - batch->ctx->batches.slots;
47
}
48
49
static void
50
panfrost_batch_init(struct panfrost_context *ctx,
51
const struct pipe_framebuffer_state *key,
52
struct panfrost_batch *batch)
53
{
54
struct panfrost_device *dev = pan_device(ctx->base.screen);
55
56
batch->ctx = ctx;
57
58
batch->seqnum = ++ctx->batches.seqnum;
59
60
batch->first_bo = INT32_MAX;
61
batch->last_bo = INT32_MIN;
62
util_sparse_array_init(&batch->bos, sizeof(uint32_t), 64);
63
64
batch->minx = batch->miny = ~0;
65
batch->maxx = batch->maxy = 0;
66
67
util_copy_framebuffer_state(&batch->key, key);
68
util_dynarray_init(&batch->resources, NULL);
69
70
/* Preallocate the main pool, since every batch has at least one job
71
* structure so it will be used */
72
panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true, true);
73
74
/* Don't preallocate the invisible pool, since not every batch will use
75
* the pre-allocation, particularly if the varyings are larger than the
76
* preallocation and a reallocation is needed after anyway. */
77
panfrost_pool_init(&batch->invisible_pool, NULL, dev,
78
PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
79
80
panfrost_batch_add_fbo_bos(batch);
81
82
/* Reserve the framebuffer and local storage descriptors */
83
batch->framebuffer =
84
(dev->quirks & MIDGARD_SFBD) ?
85
pan_pool_alloc_desc(&batch->pool.base, SINGLE_TARGET_FRAMEBUFFER) :
86
pan_pool_alloc_desc_aggregate(&batch->pool.base,
87
PAN_DESC(MULTI_TARGET_FRAMEBUFFER),
88
PAN_DESC(ZS_CRC_EXTENSION),
89
PAN_DESC_ARRAY(MAX2(key->nr_cbufs, 1), RENDER_TARGET));
90
91
/* Add the MFBD tag now, other tags will be added at submit-time */
92
if (!(dev->quirks & MIDGARD_SFBD))
93
batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
94
95
/* On Midgard, the TLS is embedded in the FB descriptor */
96
if (pan_is_bifrost(dev))
97
batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
98
else
99
batch->tls = batch->framebuffer;
100
}
101
102
static void
103
panfrost_batch_cleanup(struct panfrost_batch *batch)
104
{
105
if (!batch)
106
return;
107
108
struct panfrost_context *ctx = batch->ctx;
109
struct panfrost_device *dev = pan_device(ctx->base.screen);
110
111
assert(batch->seqnum);
112
113
if (ctx->batch == batch)
114
ctx->batch = NULL;
115
116
unsigned batch_idx = panfrost_batch_idx(batch);
117
118
for (int i = batch->first_bo; i <= batch->last_bo; i++) {
119
uint32_t *flags = util_sparse_array_get(&batch->bos, i);
120
121
if (!*flags)
122
continue;
123
124
struct panfrost_bo *bo = pan_lookup_bo(dev, i);
125
panfrost_bo_unreference(bo);
126
}
127
128
util_dynarray_foreach(&batch->resources, struct panfrost_resource *, rsrc) {
129
BITSET_CLEAR((*rsrc)->track.users, batch_idx);
130
131
if ((*rsrc)->track.writer == batch)
132
(*rsrc)->track.writer = NULL;
133
134
pipe_resource_reference((struct pipe_resource **) rsrc, NULL);
135
}
136
137
util_dynarray_fini(&batch->resources);
138
panfrost_pool_cleanup(&batch->pool);
139
panfrost_pool_cleanup(&batch->invisible_pool);
140
141
util_unreference_framebuffer_state(&batch->key);
142
143
util_sparse_array_finish(&batch->bos);
144
145
memset(batch, 0, sizeof(*batch));
146
}
147
148
static void
149
panfrost_batch_submit(struct panfrost_batch *batch,
150
uint32_t in_sync, uint32_t out_sync);
151
152
static struct panfrost_batch *
153
panfrost_get_batch(struct panfrost_context *ctx,
154
const struct pipe_framebuffer_state *key)
155
{
156
struct panfrost_batch *batch = NULL;
157
158
for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
159
if (ctx->batches.slots[i].seqnum &&
160
util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
161
/* We found a match, increase the seqnum for the LRU
162
* eviction logic.
163
*/
164
ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
165
return &ctx->batches.slots[i];
166
}
167
168
if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
169
batch = &ctx->batches.slots[i];
170
}
171
172
assert(batch);
173
174
/* The selected slot is used, we need to flush the batch */
175
if (batch->seqnum)
176
panfrost_batch_submit(batch, 0, 0);
177
178
panfrost_batch_init(ctx, key, batch);
179
180
return batch;
181
}
182
183
struct panfrost_batch *
184
panfrost_get_fresh_batch(struct panfrost_context *ctx,
185
const struct pipe_framebuffer_state *key)
186
{
187
struct panfrost_batch *batch = panfrost_get_batch(ctx, key);
188
189
panfrost_dirty_state_all(ctx);
190
191
/* The batch has no draw/clear queued, let's return it directly.
192
* Note that it's perfectly fine to re-use a batch with an
193
* existing clear, we'll just update it with the new clear request.
194
*/
195
if (!batch->scoreboard.first_job) {
196
ctx->batch = batch;
197
return batch;
198
}
199
200
/* Otherwise, we need to flush the existing one and instantiate a new
201
* one.
202
*/
203
panfrost_batch_submit(batch, 0, 0);
204
batch = panfrost_get_batch(ctx, key);
205
return batch;
206
}
207
208
/* Get the job corresponding to the FBO we're currently rendering into */
209
210
struct panfrost_batch *
211
panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
212
{
213
/* If we already began rendering, use that */
214
215
if (ctx->batch) {
216
assert(util_framebuffer_state_equal(&ctx->batch->key,
217
&ctx->pipe_framebuffer));
218
return ctx->batch;
219
}
220
221
/* If not, look up the job */
222
struct panfrost_batch *batch = panfrost_get_batch(ctx,
223
&ctx->pipe_framebuffer);
224
225
/* Set this job as the current FBO job. Will be reset when updating the
226
* FB state and when submitting or releasing a job.
227
*/
228
ctx->batch = batch;
229
panfrost_dirty_state_all(ctx);
230
return batch;
231
}
232
233
struct panfrost_batch *
234
panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
235
{
236
struct panfrost_batch *batch;
237
238
batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
239
panfrost_dirty_state_all(ctx);
240
241
/* The batch has no draw/clear queued, let's return it directly.
242
* Note that it's perfectly fine to re-use a batch with an
243
* existing clear, we'll just update it with the new clear request.
244
*/
245
if (!batch->scoreboard.first_job) {
246
ctx->batch = batch;
247
return batch;
248
}
249
250
/* Otherwise, we need to freeze the existing one and instantiate a new
251
* one.
252
*/
253
panfrost_batch_submit(batch, 0, 0);
254
batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
255
ctx->batch = batch;
256
return batch;
257
}
258
259
static void
260
panfrost_batch_update_access(struct panfrost_batch *batch,
261
struct panfrost_resource *rsrc, bool writes)
262
{
263
struct panfrost_context *ctx = batch->ctx;
264
uint32_t batch_idx = panfrost_batch_idx(batch);
265
struct panfrost_batch *writer = rsrc->track.writer;
266
267
if (unlikely(!BITSET_TEST(rsrc->track.users, batch_idx))) {
268
BITSET_SET(rsrc->track.users, batch_idx);
269
270
/* Reference the resource on the batch */
271
struct pipe_resource **dst = util_dynarray_grow(&batch->resources,
272
struct pipe_resource *, 1);
273
274
*dst = NULL;
275
pipe_resource_reference(dst, &rsrc->base);
276
}
277
278
/* Flush users if required */
279
if (writes || ((writer != NULL) && (writer != batch))) {
280
unsigned i;
281
BITSET_FOREACH_SET(i, rsrc->track.users, PAN_MAX_BATCHES) {
282
/* Skip the entry if this our batch. */
283
if (i == batch_idx)
284
continue;
285
286
panfrost_batch_submit(&ctx->batches.slots[i], 0, 0);
287
}
288
}
289
290
if (writes)
291
rsrc->track.writer = batch;
292
}
293
294
static void
295
panfrost_batch_add_bo_old(struct panfrost_batch *batch,
296
struct panfrost_bo *bo, uint32_t flags)
297
{
298
if (!bo)
299
return;
300
301
uint32_t *entry = util_sparse_array_get(&batch->bos, bo->gem_handle);
302
uint32_t old_flags = *entry;
303
304
if (!old_flags) {
305
batch->num_bos++;
306
batch->first_bo = MIN2(batch->first_bo, bo->gem_handle);
307
batch->last_bo = MAX2(batch->last_bo, bo->gem_handle);
308
panfrost_bo_reference(bo);
309
}
310
311
if (old_flags == flags)
312
return;
313
314
flags |= old_flags;
315
*entry = flags;
316
}
317
318
static uint32_t
319
panfrost_access_for_stage(enum pipe_shader_type stage)
320
{
321
return (stage == PIPE_SHADER_FRAGMENT) ?
322
PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER;
323
}
324
325
void
326
panfrost_batch_add_bo(struct panfrost_batch *batch,
327
struct panfrost_bo *bo, enum pipe_shader_type stage)
328
{
329
panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ |
330
panfrost_access_for_stage(stage));
331
}
332
333
void
334
panfrost_batch_read_rsrc(struct panfrost_batch *batch,
335
struct panfrost_resource *rsrc,
336
enum pipe_shader_type stage)
337
{
338
uint32_t access = PAN_BO_ACCESS_READ |
339
panfrost_access_for_stage(stage);
340
341
panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
342
343
if (rsrc->image.crc.bo)
344
panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
345
346
if (rsrc->separate_stencil)
347
panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
348
349
panfrost_batch_update_access(batch, rsrc, false);
350
}
351
352
void
353
panfrost_batch_write_rsrc(struct panfrost_batch *batch,
354
struct panfrost_resource *rsrc,
355
enum pipe_shader_type stage)
356
{
357
uint32_t access = PAN_BO_ACCESS_WRITE |
358
panfrost_access_for_stage(stage);
359
360
panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
361
362
if (rsrc->image.crc.bo)
363
panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
364
365
if (rsrc->separate_stencil)
366
panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
367
368
panfrost_batch_update_access(batch, rsrc, true);
369
}
370
371
/* Adds the BO backing surface to a batch if the surface is non-null */
372
373
static void
374
panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf)
375
{
376
if (surf) {
377
struct panfrost_resource *rsrc = pan_resource(surf->texture);
378
panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
379
}
380
}
381
382
void
383
panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
384
{
385
for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
386
panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
387
388
panfrost_batch_add_surface(batch, batch->key.zsbuf);
389
}
390
391
struct panfrost_bo *
392
panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
393
uint32_t create_flags, enum pipe_shader_type stage,
394
const char *label)
395
{
396
struct panfrost_bo *bo;
397
398
bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
399
create_flags, label);
400
panfrost_batch_add_bo(batch, bo, stage);
401
402
/* panfrost_batch_add_bo() has retained a reference and
403
* panfrost_bo_create() initialize the refcnt to 1, so let's
404
* unreference the BO here so it gets released when the batch is
405
* destroyed (unless it's retained by someone else in the meantime).
406
*/
407
panfrost_bo_unreference(bo);
408
return bo;
409
}
410
411
/* Returns the polygon list's GPU address if available, or otherwise allocates
412
* the polygon list. It's perfectly fast to use allocate/free BO directly,
413
* since we'll hit the BO cache and this is one-per-batch anyway. */
414
415
static mali_ptr
416
panfrost_batch_get_polygon_list(struct panfrost_batch *batch)
417
{
418
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
419
420
assert(!pan_is_bifrost(dev));
421
422
if (!batch->tiler_ctx.midgard.polygon_list) {
423
bool has_draws = batch->scoreboard.first_tiler != NULL;
424
unsigned size =
425
panfrost_tiler_get_polygon_list_size(dev,
426
batch->key.width,
427
batch->key.height,
428
has_draws);
429
size = util_next_power_of_two(size);
430
431
/* Create the BO as invisible if we can. In the non-hierarchical tiler case,
432
* we need to write the polygon list manually because there's not WRITE_VALUE
433
* job in the chain (maybe we should add one...). */
434
bool init_polygon_list = !has_draws && (dev->quirks & MIDGARD_NO_HIER_TILING);
435
batch->tiler_ctx.midgard.polygon_list =
436
panfrost_batch_create_bo(batch, size,
437
init_polygon_list ? 0 : PAN_BO_INVISIBLE,
438
PIPE_SHADER_VERTEX,
439
"Polygon list");
440
panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
441
PIPE_SHADER_FRAGMENT);
442
443
if (init_polygon_list) {
444
assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
445
uint32_t *polygon_list_body =
446
batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
447
MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
448
polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */
449
}
450
451
batch->tiler_ctx.midgard.disable = !has_draws;
452
}
453
454
return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
455
}
456
457
struct panfrost_bo *
458
panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
459
unsigned size_per_thread,
460
unsigned thread_tls_alloc,
461
unsigned core_count)
462
{
463
unsigned size = panfrost_get_total_stack_size(size_per_thread,
464
thread_tls_alloc,
465
core_count);
466
467
if (batch->scratchpad) {
468
assert(batch->scratchpad->size >= size);
469
} else {
470
batch->scratchpad = panfrost_batch_create_bo(batch, size,
471
PAN_BO_INVISIBLE,
472
PIPE_SHADER_VERTEX,
473
"Thread local storage");
474
475
panfrost_batch_add_bo(batch, batch->scratchpad,
476
PIPE_SHADER_FRAGMENT);
477
}
478
479
return batch->scratchpad;
480
}
481
482
struct panfrost_bo *
483
panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
484
unsigned size,
485
unsigned workgroup_count)
486
{
487
if (batch->shared_memory) {
488
assert(batch->shared_memory->size >= size);
489
} else {
490
batch->shared_memory = panfrost_batch_create_bo(batch, size,
491
PAN_BO_INVISIBLE,
492
PIPE_SHADER_VERTEX,
493
"Workgroup shared memory");
494
}
495
496
return batch->shared_memory;
497
}
498
499
mali_ptr
500
panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)
501
{
502
struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
503
assert(pan_is_bifrost(dev));
504
505
if (!vertex_count)
506
return 0;
507
508
if (batch->tiler_ctx.bifrost)
509
return batch->tiler_ctx.bifrost;
510
511
struct panfrost_ptr t =
512
pan_pool_alloc_desc(&batch->pool.base, BIFROST_TILER_HEAP);
513
514
pan_emit_bifrost_tiler_heap(dev, t.cpu);
515
516
mali_ptr heap = t.gpu;
517
518
t = pan_pool_alloc_desc(&batch->pool.base, BIFROST_TILER);
519
pan_emit_bifrost_tiler(dev, batch->key.width, batch->key.height,
520
util_framebuffer_get_num_samples(&batch->key),
521
heap, t.cpu);
522
523
batch->tiler_ctx.bifrost = t.gpu;
524
return batch->tiler_ctx.bifrost;
525
}
526
527
static void
528
panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
529
struct pan_fb_info *fb,
530
struct pan_image_view *rts,
531
struct pan_image_view *zs,
532
struct pan_image_view *s,
533
bool reserve)
534
{
535
memset(fb, 0, sizeof(*fb));
536
memset(rts, 0, sizeof(*rts) * 8);
537
memset(zs, 0, sizeof(*zs));
538
memset(s, 0, sizeof(*s));
539
540
fb->width = batch->key.width;
541
fb->height = batch->key.height;
542
fb->extent.minx = batch->minx;
543
fb->extent.miny = batch->miny;
544
fb->extent.maxx = batch->maxx - 1;
545
fb->extent.maxy = batch->maxy - 1;
546
fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
547
fb->rt_count = batch->key.nr_cbufs;
548
549
static const unsigned char id_swz[] = {
550
PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
551
};
552
553
for (unsigned i = 0; i < fb->rt_count; i++) {
554
struct pipe_surface *surf = batch->key.cbufs[i];
555
556
if (!surf)
557
continue;
558
559
struct panfrost_resource *prsrc = pan_resource(surf->texture);
560
unsigned mask = PIPE_CLEAR_COLOR0 << i;
561
562
if (batch->clear & mask) {
563
fb->rts[i].clear = true;
564
memcpy(fb->rts[i].clear_value, batch->clear_color[i],
565
sizeof((fb->rts[i].clear_value)));
566
}
567
568
fb->rts[i].discard = !reserve && !(batch->resolve & mask);
569
570
rts[i].format = surf->format;
571
rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
572
rts[i].last_level = rts[i].first_level = surf->u.tex.level;
573
rts[i].first_layer = surf->u.tex.first_layer;
574
rts[i].last_layer = surf->u.tex.last_layer;
575
rts[i].image = &prsrc->image;
576
rts[i].nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
577
memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
578
fb->rts[i].crc_valid = &prsrc->valid.crc;
579
fb->rts[i].view = &rts[i];
580
581
/* Preload if the RT is read or updated */
582
if (!(batch->clear & mask) &&
583
((batch->read & mask) ||
584
((batch->draws & mask) &&
585
BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
586
fb->rts[i].preload = true;
587
588
}
589
590
const struct pan_image_view *s_view = NULL, *z_view = NULL;
591
struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
592
593
if (batch->key.zsbuf) {
594
struct pipe_surface *surf = batch->key.zsbuf;
595
z_rsrc = pan_resource(surf->texture);
596
597
zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?
598
PIPE_FORMAT_Z32_FLOAT : surf->format;
599
zs->dim = MALI_TEXTURE_DIMENSION_2D;
600
zs->last_level = zs->first_level = surf->u.tex.level;
601
zs->first_layer = surf->u.tex.first_layer;
602
zs->last_layer = surf->u.tex.last_layer;
603
zs->image = &z_rsrc->image;
604
zs->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
605
memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
606
fb->zs.view.zs = zs;
607
z_view = zs;
608
if (util_format_is_depth_and_stencil(zs->format)) {
609
s_view = zs;
610
s_rsrc = z_rsrc;
611
}
612
613
if (z_rsrc->separate_stencil) {
614
s_rsrc = z_rsrc->separate_stencil;
615
s->format = PIPE_FORMAT_S8_UINT;
616
s->dim = MALI_TEXTURE_DIMENSION_2D;
617
s->last_level = s->first_level = surf->u.tex.level;
618
s->first_layer = surf->u.tex.first_layer;
619
s->last_layer = surf->u.tex.last_layer;
620
s->image = &s_rsrc->image;
621
s->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
622
memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
623
fb->zs.view.s = s;
624
s_view = s;
625
}
626
}
627
628
if (batch->clear & PIPE_CLEAR_DEPTH) {
629
fb->zs.clear.z = true;
630
fb->zs.clear_value.depth = batch->clear_depth;
631
}
632
633
if (batch->clear & PIPE_CLEAR_STENCIL) {
634
fb->zs.clear.s = true;
635
fb->zs.clear_value.stencil = batch->clear_stencil;
636
}
637
638
fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
639
fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
640
641
if (!fb->zs.clear.z &&
642
((batch->read & PIPE_CLEAR_DEPTH) ||
643
((batch->draws & PIPE_CLEAR_DEPTH) &&
644
z_rsrc && BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
645
fb->zs.preload.z = true;
646
647
if (!fb->zs.clear.s &&
648
((batch->read & PIPE_CLEAR_STENCIL) ||
649
((batch->draws & PIPE_CLEAR_STENCIL) &&
650
s_rsrc && BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
651
fb->zs.preload.s = true;
652
653
/* Preserve both component if we have a combined ZS view and
654
* one component needs to be preserved.
655
*/
656
if (s_view == z_view && fb->zs.discard.z != fb->zs.discard.s) {
657
bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
658
659
fb->zs.discard.z = false;
660
fb->zs.discard.s = false;
661
fb->zs.preload.z = !fb->zs.clear.z && valid;
662
fb->zs.preload.s = !fb->zs.clear.s && valid;
663
}
664
}
665
666
static int
667
panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
668
mali_ptr first_job_desc,
669
uint32_t reqs,
670
uint32_t in_sync,
671
uint32_t out_sync)
672
{
673
struct panfrost_context *ctx = batch->ctx;
674
struct pipe_context *gallium = (struct pipe_context *) ctx;
675
struct panfrost_device *dev = pan_device(gallium->screen);
676
struct drm_panfrost_submit submit = {0,};
677
uint32_t *bo_handles;
678
int ret;
679
680
/* If we trace, we always need a syncobj, so make one of our own if we
681
* weren't given one to use. Remember that we did so, so we can free it
682
* after we're done but preventing double-frees if we were given a
683
* syncobj */
684
685
if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
686
out_sync = ctx->syncobj;
687
688
submit.out_sync = out_sync;
689
submit.jc = first_job_desc;
690
submit.requirements = reqs;
691
if (in_sync) {
692
submit.in_syncs = (u64)(uintptr_t)(&in_sync);
693
submit.in_sync_count = 1;
694
}
695
696
bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
697
panfrost_pool_num_bos(&batch->invisible_pool) +
698
batch->num_bos + 2,
699
sizeof(*bo_handles));
700
assert(bo_handles);
701
702
for (int i = batch->first_bo; i <= batch->last_bo; i++) {
703
uint32_t *flags = util_sparse_array_get(&batch->bos, i);
704
705
if (!*flags)
706
continue;
707
708
assert(submit.bo_handle_count < batch->num_bos);
709
bo_handles[submit.bo_handle_count++] = i;
710
711
/* Update the BO access flags so that panfrost_bo_wait() knows
712
* about all pending accesses.
713
* We only keep the READ/WRITE info since this is all the BO
714
* wait logic cares about.
715
* We also preserve existing flags as this batch might not
716
* be the first one to access the BO.
717
*/
718
struct panfrost_bo *bo = pan_lookup_bo(dev, i);
719
720
bo->gpu_access |= *flags & (PAN_BO_ACCESS_RW);
721
}
722
723
panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);
724
submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
725
panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);
726
submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
727
728
/* Add the tiler heap to the list of accessed BOs if the batch has at
729
* least one tiler job. Tiler heap is written by tiler jobs and read
730
* by fragment jobs (the polygon list is coming from this heap).
731
*/
732
if (batch->scoreboard.first_tiler)
733
bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
734
735
/* Always used on Bifrost, occassionally used on Midgard */
736
bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
737
738
submit.bo_handles = (u64) (uintptr_t) bo_handles;
739
if (ctx->is_noop)
740
ret = 0;
741
else
742
ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
743
free(bo_handles);
744
745
if (ret)
746
return errno;
747
748
/* Trace the job if we're doing that */
749
if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
750
/* Wait so we can get errors reported back */
751
drmSyncobjWait(dev->fd, &out_sync, 1,
752
INT64_MAX, 0, NULL);
753
754
if (dev->debug & PAN_DBG_TRACE)
755
pandecode_jc(submit.jc, pan_is_bifrost(dev), dev->gpu_id);
756
757
if (dev->debug & PAN_DBG_SYNC)
758
pandecode_abort_on_fault(submit.jc);
759
}
760
761
return 0;
762
}
763
764
/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
765
* outsync corresponding to the later of the two (since there will be an
766
* implicit dep between them) */
767
768
static int
769
panfrost_batch_submit_jobs(struct panfrost_batch *batch,
770
const struct pan_fb_info *fb,
771
uint32_t in_sync, uint32_t out_sync)
772
{
773
struct pipe_screen *pscreen = batch->ctx->base.screen;
774
struct panfrost_screen *screen = pan_screen(pscreen);
775
struct panfrost_device *dev = pan_device(pscreen);
776
bool has_draws = batch->scoreboard.first_job;
777
bool has_tiler = batch->scoreboard.first_tiler;
778
bool has_frag = has_tiler || batch->clear;
779
int ret = 0;
780
781
/* Take the submit lock to make sure no tiler jobs from other context
782
* are inserted between our tiler and fragment jobs, failing to do that
783
* might result in tiler heap corruption.
784
*/
785
if (has_tiler)
786
pthread_mutex_lock(&dev->submit_lock);
787
788
if (has_draws) {
789
ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
790
0, in_sync, has_frag ? 0 : out_sync);
791
792
if (ret)
793
goto done;
794
}
795
796
if (has_frag) {
797
/* Whether we program the fragment job for draws or not depends
798
* on whether there is any *tiler* activity (so fragment
799
* shaders). If there are draws but entirely RASTERIZER_DISCARD
800
* (say, for transform feedback), we want a fragment job that
801
* *only* clears, since otherwise the tiler structures will be
802
* uninitialized leading to faults (or state leaks) */
803
804
mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);
805
ret = panfrost_batch_submit_ioctl(batch, fragjob,
806
PANFROST_JD_REQ_FS, 0,
807
out_sync);
808
if (ret)
809
goto done;
810
}
811
812
done:
813
if (has_tiler)
814
pthread_mutex_unlock(&dev->submit_lock);
815
816
return ret;
817
}
818
819
static void
820
panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
821
{
822
if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
823
return;
824
825
struct pipe_surface *surf = batch->key.cbufs[0];
826
struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
827
828
if (pres && pres->damage.tile_map.enable) {
829
fb->tile_map.base =
830
pan_pool_upload_aligned(&batch->pool.base,
831
pres->damage.tile_map.data,
832
pres->damage.tile_map.size,
833
64);
834
fb->tile_map.stride = pres->damage.tile_map.stride;
835
}
836
}
837
838
static void
839
panfrost_batch_submit(struct panfrost_batch *batch,
840
uint32_t in_sync, uint32_t out_sync)
841
{
842
struct pipe_screen *pscreen = batch->ctx->base.screen;
843
struct panfrost_screen *screen = pan_screen(pscreen);
844
struct panfrost_device *dev = pan_device(pscreen);
845
int ret;
846
847
/* Nothing to do! */
848
if (!batch->scoreboard.first_job && !batch->clear)
849
goto out;
850
851
struct pan_fb_info fb;
852
struct pan_image_view rts[8], zs, s;
853
854
panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
855
856
screen->vtbl.preload(batch, &fb);
857
858
if (!pan_is_bifrost(dev)) {
859
mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch);
860
861
panfrost_scoreboard_initialize_tiler(&batch->pool.base,
862
&batch->scoreboard,
863
polygon_list);
864
}
865
866
/* Now that all draws are in, we can finally prepare the
867
* FBD for the batch (if there is one). */
868
869
screen->vtbl.emit_tls(batch);
870
panfrost_emit_tile_map(batch, &fb);
871
872
if (batch->scoreboard.first_tiler || batch->clear)
873
screen->vtbl.emit_fbd(batch, &fb);
874
875
ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync);
876
877
if (ret)
878
fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
879
880
/* We must reset the damage info of our render targets here even
881
* though a damage reset normally happens when the DRI layer swaps
882
* buffers. That's because there can be implicit flushes the GL
883
* app is not aware of, and those might impact the damage region: if
884
* part of the damaged portion is drawn during those implicit flushes,
885
* you have to reload those areas before next draws are pushed, and
886
* since the driver can't easily know what's been modified by the draws
887
* it flushed, the easiest solution is to reload everything.
888
*/
889
for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
890
if (!batch->key.cbufs[i])
891
continue;
892
893
panfrost_resource_set_damage_region(batch->ctx->base.screen,
894
batch->key.cbufs[i]->texture,
895
0, NULL);
896
}
897
898
out:
899
panfrost_batch_cleanup(batch);
900
}
901
902
/* Submit all batches, applying the out_sync to the currently bound batch */
903
904
void
905
panfrost_flush_all_batches(struct panfrost_context *ctx)
906
{
907
struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
908
panfrost_batch_submit(batch, ctx->syncobj, ctx->syncobj);
909
910
for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
911
if (ctx->batches.slots[i].seqnum) {
912
panfrost_batch_submit(&ctx->batches.slots[i],
913
ctx->syncobj, ctx->syncobj);
914
}
915
}
916
}
917
918
void
919
panfrost_flush_writer(struct panfrost_context *ctx,
920
struct panfrost_resource *rsrc)
921
{
922
if (rsrc->track.writer) {
923
panfrost_batch_submit(rsrc->track.writer, ctx->syncobj, ctx->syncobj);
924
rsrc->track.writer = NULL;
925
}
926
}
927
928
void
929
panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
930
struct panfrost_resource *rsrc)
931
{
932
unsigned i;
933
BITSET_FOREACH_SET(i, rsrc->track.users, PAN_MAX_BATCHES) {
934
panfrost_batch_submit(&ctx->batches.slots[i],
935
ctx->syncobj, ctx->syncobj);
936
}
937
938
assert(!BITSET_COUNT(rsrc->track.users));
939
rsrc->track.writer = NULL;
940
}
941
942
void
943
panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
944
{
945
struct panfrost_context *ctx = batch->ctx;
946
947
for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
948
struct panfrost_shader_state *ss;
949
950
ss = panfrost_get_shader_state(ctx, i);
951
if (!ss)
952
continue;
953
954
batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
955
}
956
}
957
958
void
959
panfrost_batch_clear(struct panfrost_batch *batch,
960
unsigned buffers,
961
const union pipe_color_union *color,
962
double depth, unsigned stencil)
963
{
964
struct panfrost_context *ctx = batch->ctx;
965
966
if (buffers & PIPE_CLEAR_COLOR) {
967
for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
968
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
969
continue;
970
971
enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
972
pan_pack_color(batch->clear_color[i], color, format);
973
}
974
}
975
976
if (buffers & PIPE_CLEAR_DEPTH) {
977
batch->clear_depth = depth;
978
}
979
980
if (buffers & PIPE_CLEAR_STENCIL) {
981
batch->clear_stencil = stencil;
982
}
983
984
batch->clear |= buffers;
985
batch->resolve |= buffers;
986
987
/* Clearing affects the entire framebuffer (by definition -- this is
988
* the Gallium clear callback, which clears the whole framebuffer. If
989
* the scissor test were enabled from the GL side, the gallium frontend
990
* would emit a quad instead and we wouldn't go down this code path) */
991
992
panfrost_batch_union_scissor(batch, 0, 0,
993
ctx->pipe_framebuffer.width,
994
ctx->pipe_framebuffer.height);
995
}
996
997
/* Given a new bounding rectangle (scissor), let the job cover the union of the
998
* new and old bounding rectangles */
999
1000
void
1001
panfrost_batch_union_scissor(struct panfrost_batch *batch,
1002
unsigned minx, unsigned miny,
1003
unsigned maxx, unsigned maxy)
1004
{
1005
batch->minx = MIN2(batch->minx, minx);
1006
batch->miny = MIN2(batch->miny, miny);
1007
batch->maxx = MAX2(batch->maxx, maxx);
1008
batch->maxy = MAX2(batch->maxy, maxy);
1009
}
1010
1011
void
1012
panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
1013
unsigned minx, unsigned miny,
1014
unsigned maxx, unsigned maxy)
1015
{
1016
batch->minx = MAX2(batch->minx, minx);
1017
batch->miny = MAX2(batch->miny, miny);
1018
batch->maxx = MIN2(batch->maxx, maxx);
1019
batch->maxy = MIN2(batch->maxy, maxy);
1020
}
1021
1022