CoCalc -- pan

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/panfrost/pan_job.c
⁴⁵⁷⁰ views
1
/*
2
 * Copyright (C) 2019-2020 Collabora, Ltd.
3
 * Copyright (C) 2019 Alyssa Rosenzweig
4
 * Copyright (C) 2014-2017 Broadcom
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice (including the next
14
 * paragraph) shall be included in all copies or substantial portions of the
15
 * Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
 * SOFTWARE.
24
 *
25
 */
26

27
#include <assert.h>
28

29
#include "drm-uapi/panfrost_drm.h"
30

31
#include "pan_bo.h"
32
#include "pan_context.h"
33
#include "util/hash_table.h"
34
#include "util/ralloc.h"
35
#include "util/format/u_format.h"
36
#include "util/u_pack_color.h"
37
#include "util/rounding.h"
38
#include "util/u_framebuffer.h"
39
#include "pan_util.h"
40
#include "decode.h"
41
#include "panfrost-quirks.h"
42

43
static unsigned
44
panfrost_batch_idx(struct panfrost_batch *batch)
45
{
46
        return batch - batch->ctx->batches.slots;
47
}
48

49
static void
50
panfrost_batch_init(struct panfrost_context *ctx,
51
                    const struct pipe_framebuffer_state *key,
52
                    struct panfrost_batch *batch)
53
{
54
        struct panfrost_device *dev = pan_device(ctx->base.screen);
55

56
        batch->ctx = ctx;
57

58
        batch->seqnum = ++ctx->batches.seqnum;
59

60
        batch->first_bo = INT32_MAX;
61
        batch->last_bo = INT32_MIN;
62
        util_sparse_array_init(&batch->bos, sizeof(uint32_t), 64);
63

64
        batch->minx = batch->miny = ~0;
65
        batch->maxx = batch->maxy = 0;
66

67
        util_copy_framebuffer_state(&batch->key, key);
68
        util_dynarray_init(&batch->resources, NULL);
69

70
        /* Preallocate the main pool, since every batch has at least one job
71
         * structure so it will be used */
72
        panfrost_pool_init(&batch->pool, NULL, dev, 0, 65536, "Batch pool", true, true);
73

74
        /* Don't preallocate the invisible pool, since not every batch will use
75
         * the pre-allocation, particularly if the varyings are larger than the
76
         * preallocation and a reallocation is needed after anyway. */
77
        panfrost_pool_init(&batch->invisible_pool, NULL, dev,
78
                        PAN_BO_INVISIBLE, 65536, "Varyings", false, true);
79

80
        panfrost_batch_add_fbo_bos(batch);
81

82
        /* Reserve the framebuffer and local storage descriptors */
83
        batch->framebuffer =
84
                (dev->quirks & MIDGARD_SFBD) ?
85
                pan_pool_alloc_desc(&batch->pool.base, SINGLE_TARGET_FRAMEBUFFER) :
86
                pan_pool_alloc_desc_aggregate(&batch->pool.base,
87
                                              PAN_DESC(MULTI_TARGET_FRAMEBUFFER),
88
                                              PAN_DESC(ZS_CRC_EXTENSION),
89
                                              PAN_DESC_ARRAY(MAX2(key->nr_cbufs, 1), RENDER_TARGET));
90

91
        /* Add the MFBD tag now, other tags will be added at submit-time */
92
        if (!(dev->quirks & MIDGARD_SFBD))
93
                batch->framebuffer.gpu |= MALI_FBD_TAG_IS_MFBD;
94

95
        /* On Midgard, the TLS is embedded in the FB descriptor */
96
        if (pan_is_bifrost(dev))
97
                batch->tls = pan_pool_alloc_desc(&batch->pool.base, LOCAL_STORAGE);
98
        else
99
                batch->tls = batch->framebuffer;
100
}
101

102
static void
103
panfrost_batch_cleanup(struct panfrost_batch *batch)
104
{
105
        if (!batch)
106
                return;
107

108
        struct panfrost_context *ctx = batch->ctx;
109
        struct panfrost_device *dev = pan_device(ctx->base.screen);
110

111
        assert(batch->seqnum);
112

113
        if (ctx->batch == batch)
114
                ctx->batch = NULL;
115

116
        unsigned batch_idx = panfrost_batch_idx(batch);
117

118
        for (int i = batch->first_bo; i <= batch->last_bo; i++) {
119
                uint32_t *flags = util_sparse_array_get(&batch->bos, i);
120

121
                if (!*flags)
122
                        continue;
123

124
                struct panfrost_bo *bo = pan_lookup_bo(dev, i);
125
                panfrost_bo_unreference(bo);
126
        }
127

128
        util_dynarray_foreach(&batch->resources, struct panfrost_resource *, rsrc) {
129
                BITSET_CLEAR((*rsrc)->track.users, batch_idx);
130

131
                if ((*rsrc)->track.writer == batch)
132
                        (*rsrc)->track.writer = NULL;
133

134
                pipe_resource_reference((struct pipe_resource **) rsrc, NULL);
135
        }
136

137
        util_dynarray_fini(&batch->resources);
138
        panfrost_pool_cleanup(&batch->pool);
139
        panfrost_pool_cleanup(&batch->invisible_pool);
140

141
        util_unreference_framebuffer_state(&batch->key);
142

143
        util_sparse_array_finish(&batch->bos);
144

145
        memset(batch, 0, sizeof(*batch));
146
}
147

148
static void
149
panfrost_batch_submit(struct panfrost_batch *batch,
150
                      uint32_t in_sync, uint32_t out_sync);
151

152
static struct panfrost_batch *
153
panfrost_get_batch(struct panfrost_context *ctx,
154
                   const struct pipe_framebuffer_state *key)
155
{
156
        struct panfrost_batch *batch = NULL;
157

158
        for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
159
                if (ctx->batches.slots[i].seqnum &&
160
                    util_framebuffer_state_equal(&ctx->batches.slots[i].key, key)) {
161
                        /* We found a match, increase the seqnum for the LRU
162
                         * eviction logic.
163
                         */
164
                        ctx->batches.slots[i].seqnum = ++ctx->batches.seqnum;
165
                        return &ctx->batches.slots[i];
166
                }
167

168
                if (!batch || batch->seqnum > ctx->batches.slots[i].seqnum)
169
                        batch = &ctx->batches.slots[i];
170
        }
171

172
        assert(batch);
173

174
        /* The selected slot is used, we need to flush the batch */
175
        if (batch->seqnum)
176
                panfrost_batch_submit(batch, 0, 0);
177

178
        panfrost_batch_init(ctx, key, batch);
179

180
        return batch;
181
}
182

183
struct panfrost_batch *
184
panfrost_get_fresh_batch(struct panfrost_context *ctx,
185
                         const struct pipe_framebuffer_state *key)
186
{
187
        struct panfrost_batch *batch = panfrost_get_batch(ctx, key);
188

189
        panfrost_dirty_state_all(ctx);
190

191
        /* The batch has no draw/clear queued, let's return it directly.
192
         * Note that it's perfectly fine to re-use a batch with an
193
         * existing clear, we'll just update it with the new clear request.
194
         */
195
        if (!batch->scoreboard.first_job) {
196
                ctx->batch = batch;
197
                return batch;
198
        }
199

200
        /* Otherwise, we need to flush the existing one and instantiate a new
201
         * one.
202
         */
203
        panfrost_batch_submit(batch, 0, 0);
204
        batch = panfrost_get_batch(ctx, key);
205
        return batch;
206
}
207

208
/* Get the job corresponding to the FBO we're currently rendering into */
209

210
struct panfrost_batch *
211
panfrost_get_batch_for_fbo(struct panfrost_context *ctx)
212
{
213
        /* If we already began rendering, use that */
214

215
        if (ctx->batch) {
216
                assert(util_framebuffer_state_equal(&ctx->batch->key,
217
                                                    &ctx->pipe_framebuffer));
218
                return ctx->batch;
219
        }
220

221
        /* If not, look up the job */
222
        struct panfrost_batch *batch = panfrost_get_batch(ctx,
223
                                                          &ctx->pipe_framebuffer);
224

225
        /* Set this job as the current FBO job. Will be reset when updating the
226
         * FB state and when submitting or releasing a job.
227
         */
228
        ctx->batch = batch;
229
        panfrost_dirty_state_all(ctx);
230
        return batch;
231
}
232

233
struct panfrost_batch *
234
panfrost_get_fresh_batch_for_fbo(struct panfrost_context *ctx)
235
{
236
        struct panfrost_batch *batch;
237

238
        batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
239
        panfrost_dirty_state_all(ctx);
240

241
        /* The batch has no draw/clear queued, let's return it directly.
242
         * Note that it's perfectly fine to re-use a batch with an
243
         * existing clear, we'll just update it with the new clear request.
244
         */
245
        if (!batch->scoreboard.first_job) {
246
                ctx->batch = batch;
247
                return batch;
248
        }
249

250
        /* Otherwise, we need to freeze the existing one and instantiate a new
251
         * one.
252
         */
253
        panfrost_batch_submit(batch, 0, 0);
254
        batch = panfrost_get_batch(ctx, &ctx->pipe_framebuffer);
255
        ctx->batch = batch;
256
        return batch;
257
}
258

259
static void
260
panfrost_batch_update_access(struct panfrost_batch *batch,
261
                             struct panfrost_resource *rsrc, bool writes)
262
{
263
        struct panfrost_context *ctx = batch->ctx;
264
        uint32_t batch_idx = panfrost_batch_idx(batch);
265
        struct panfrost_batch *writer = rsrc->track.writer;
266

267
        if (unlikely(!BITSET_TEST(rsrc->track.users, batch_idx))) {
268
                BITSET_SET(rsrc->track.users, batch_idx);
269

270
                /* Reference the resource on the batch */
271
                struct pipe_resource **dst = util_dynarray_grow(&batch->resources,
272
                                struct pipe_resource *, 1);
273

274
                *dst = NULL;
275
                pipe_resource_reference(dst, &rsrc->base);
276
        }
277

278
        /* Flush users if required */
279
        if (writes || ((writer != NULL) && (writer != batch))) {
280
                unsigned i;
281
                BITSET_FOREACH_SET(i, rsrc->track.users, PAN_MAX_BATCHES) {
282
                        /* Skip the entry if this our batch. */
283
                        if (i == batch_idx)
284
                                continue;
285

286
                        panfrost_batch_submit(&ctx->batches.slots[i], 0, 0);
287
                }
288
        }
289

290
        if (writes)
291
                rsrc->track.writer = batch;
292
}
293

294
static void
295
panfrost_batch_add_bo_old(struct panfrost_batch *batch,
296
                struct panfrost_bo *bo, uint32_t flags)
297
{
298
        if (!bo)
299
                return;
300

301
        uint32_t *entry = util_sparse_array_get(&batch->bos, bo->gem_handle);
302
        uint32_t old_flags = *entry;
303

304
        if (!old_flags) {
305
                batch->num_bos++;
306
                batch->first_bo = MIN2(batch->first_bo, bo->gem_handle);
307
                batch->last_bo = MAX2(batch->last_bo, bo->gem_handle);
308
                panfrost_bo_reference(bo);
309
        }
310

311
        if (old_flags == flags)
312
                return;
313

314
        flags |= old_flags;
315
        *entry = flags;
316
}
317

318
static uint32_t
319
panfrost_access_for_stage(enum pipe_shader_type stage)
320
{
321
        return (stage == PIPE_SHADER_FRAGMENT) ?
322
                PAN_BO_ACCESS_FRAGMENT : PAN_BO_ACCESS_VERTEX_TILER;
323
}
324

325
void
326
panfrost_batch_add_bo(struct panfrost_batch *batch,
327
                struct panfrost_bo *bo, enum pipe_shader_type stage)
328
{
329
        panfrost_batch_add_bo_old(batch, bo, PAN_BO_ACCESS_READ |
330
                        panfrost_access_for_stage(stage));
331
}
332

333
void
334
panfrost_batch_read_rsrc(struct panfrost_batch *batch,
335
                         struct panfrost_resource *rsrc,
336
                         enum pipe_shader_type stage)
337
{
338
        uint32_t access = PAN_BO_ACCESS_READ |
339
                panfrost_access_for_stage(stage);
340

341
        panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
342

343
        if (rsrc->image.crc.bo)
344
                panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
345

346
        if (rsrc->separate_stencil)
347
                panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
348

349
        panfrost_batch_update_access(batch, rsrc, false);
350
}
351

352
void
353
panfrost_batch_write_rsrc(struct panfrost_batch *batch,
354
                         struct panfrost_resource *rsrc,
355
                         enum pipe_shader_type stage)
356
{
357
        uint32_t access = PAN_BO_ACCESS_WRITE |
358
                panfrost_access_for_stage(stage);
359

360
        panfrost_batch_add_bo_old(batch, rsrc->image.data.bo, access);
361

362
        if (rsrc->image.crc.bo)
363
                panfrost_batch_add_bo_old(batch, rsrc->image.crc.bo, access);
364

365
        if (rsrc->separate_stencil)
366
                panfrost_batch_add_bo_old(batch, rsrc->separate_stencil->image.data.bo, access);
367

368
        panfrost_batch_update_access(batch, rsrc, true);
369
}
370

371
/* Adds the BO backing surface to a batch if the surface is non-null */
372

373
static void
374
panfrost_batch_add_surface(struct panfrost_batch *batch, struct pipe_surface *surf)
375
{
376
        if (surf) {
377
                struct panfrost_resource *rsrc = pan_resource(surf->texture);
378
                panfrost_batch_write_rsrc(batch, rsrc, PIPE_SHADER_FRAGMENT);
379
        }
380
}
381

382
void
383
panfrost_batch_add_fbo_bos(struct panfrost_batch *batch)
384
{
385
        for (unsigned i = 0; i < batch->key.nr_cbufs; ++i)
386
                panfrost_batch_add_surface(batch, batch->key.cbufs[i]);
387

388
        panfrost_batch_add_surface(batch, batch->key.zsbuf);
389
}
390

391
struct panfrost_bo *
392
panfrost_batch_create_bo(struct panfrost_batch *batch, size_t size,
393
                         uint32_t create_flags, enum pipe_shader_type stage,
394
                         const char *label)
395
{
396
        struct panfrost_bo *bo;
397

398
        bo = panfrost_bo_create(pan_device(batch->ctx->base.screen), size,
399
                                create_flags, label);
400
        panfrost_batch_add_bo(batch, bo, stage);
401

402
        /* panfrost_batch_add_bo() has retained a reference and
403
         * panfrost_bo_create() initialize the refcnt to 1, so let's
404
         * unreference the BO here so it gets released when the batch is
405
         * destroyed (unless it's retained by someone else in the meantime).
406
         */
407
        panfrost_bo_unreference(bo);
408
        return bo;
409
}
410

411
/* Returns the polygon list's GPU address if available, or otherwise allocates
412
 * the polygon list.  It's perfectly fast to use allocate/free BO directly,
413
 * since we'll hit the BO cache and this is one-per-batch anyway. */
414

415
static mali_ptr
416
panfrost_batch_get_polygon_list(struct panfrost_batch *batch)
417
{
418
        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
419

420
        assert(!pan_is_bifrost(dev));
421

422
        if (!batch->tiler_ctx.midgard.polygon_list) {
423
                bool has_draws = batch->scoreboard.first_tiler != NULL;
424
                unsigned size =
425
                        panfrost_tiler_get_polygon_list_size(dev,
426
                                                             batch->key.width,
427
                                                             batch->key.height,
428
                                                             has_draws);
429
                size = util_next_power_of_two(size);
430

431
                /* Create the BO as invisible if we can. In the non-hierarchical tiler case,
432
                 * we need to write the polygon list manually because there's not WRITE_VALUE
433
                 * job in the chain (maybe we should add one...). */
434
                bool init_polygon_list = !has_draws && (dev->quirks & MIDGARD_NO_HIER_TILING);
435
                batch->tiler_ctx.midgard.polygon_list =
436
                        panfrost_batch_create_bo(batch, size,
437
                                                 init_polygon_list ? 0 : PAN_BO_INVISIBLE,
438
                                                 PIPE_SHADER_VERTEX,
439
                                                 "Polygon list");
440
                panfrost_batch_add_bo(batch, batch->tiler_ctx.midgard.polygon_list,
441
                                PIPE_SHADER_FRAGMENT);
442

443
                if (init_polygon_list) {
444
                        assert(batch->tiler_ctx.midgard.polygon_list->ptr.cpu);
445
                        uint32_t *polygon_list_body =
446
                                batch->tiler_ctx.midgard.polygon_list->ptr.cpu +
447
                                MALI_MIDGARD_TILER_MINIMUM_HEADER_SIZE;
448
                         polygon_list_body[0] = 0xa0000000; /* TODO: Just that? */
449
                }
450

451
                batch->tiler_ctx.midgard.disable = !has_draws;
452
        }
453

454
        return batch->tiler_ctx.midgard.polygon_list->ptr.gpu;
455
}
456

457
struct panfrost_bo *
458
panfrost_batch_get_scratchpad(struct panfrost_batch *batch,
459
                unsigned size_per_thread,
460
                unsigned thread_tls_alloc,
461
                unsigned core_count)
462
{
463
        unsigned size = panfrost_get_total_stack_size(size_per_thread,
464
                        thread_tls_alloc,
465
                        core_count);
466

467
        if (batch->scratchpad) {
468
                assert(batch->scratchpad->size >= size);
469
        } else {
470
                batch->scratchpad = panfrost_batch_create_bo(batch, size,
471
                                             PAN_BO_INVISIBLE,
472
                                             PIPE_SHADER_VERTEX,
473
                                             "Thread local storage");
474

475
                panfrost_batch_add_bo(batch, batch->scratchpad,
476
                                PIPE_SHADER_FRAGMENT);
477
        }
478

479
        return batch->scratchpad;
480
}
481

482
struct panfrost_bo *
483
panfrost_batch_get_shared_memory(struct panfrost_batch *batch,
484
                unsigned size,
485
                unsigned workgroup_count)
486
{
487
        if (batch->shared_memory) {
488
                assert(batch->shared_memory->size >= size);
489
        } else {
490
                batch->shared_memory = panfrost_batch_create_bo(batch, size,
491
                                             PAN_BO_INVISIBLE,
492
                                             PIPE_SHADER_VERTEX,
493
                                             "Workgroup shared memory");
494
        }
495

496
        return batch->shared_memory;
497
}
498

499
mali_ptr
500
panfrost_batch_get_bifrost_tiler(struct panfrost_batch *batch, unsigned vertex_count)
501
{
502
        struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
503
        assert(pan_is_bifrost(dev));
504

505
        if (!vertex_count)
506
                return 0;
507

508
        if (batch->tiler_ctx.bifrost)
509
                return batch->tiler_ctx.bifrost;
510

511
        struct panfrost_ptr t =
512
                pan_pool_alloc_desc(&batch->pool.base, BIFROST_TILER_HEAP);
513

514
        pan_emit_bifrost_tiler_heap(dev, t.cpu);
515

516
        mali_ptr heap = t.gpu;
517

518
        t = pan_pool_alloc_desc(&batch->pool.base, BIFROST_TILER);
519
        pan_emit_bifrost_tiler(dev, batch->key.width, batch->key.height,
520
                               util_framebuffer_get_num_samples(&batch->key),
521
                               heap, t.cpu);
522

523
        batch->tiler_ctx.bifrost = t.gpu;
524
        return batch->tiler_ctx.bifrost;
525
}
526

527
static void
528
panfrost_batch_to_fb_info(const struct panfrost_batch *batch,
529
                          struct pan_fb_info *fb,
530
                          struct pan_image_view *rts,
531
                          struct pan_image_view *zs,
532
                          struct pan_image_view *s,
533
                          bool reserve)
534
{
535
        memset(fb, 0, sizeof(*fb));
536
        memset(rts, 0, sizeof(*rts) * 8);
537
        memset(zs, 0, sizeof(*zs));
538
        memset(s, 0, sizeof(*s));
539

540
        fb->width = batch->key.width;
541
        fb->height = batch->key.height;
542
        fb->extent.minx = batch->minx;
543
        fb->extent.miny = batch->miny;
544
        fb->extent.maxx = batch->maxx - 1;
545
        fb->extent.maxy = batch->maxy - 1;
546
        fb->nr_samples = util_framebuffer_get_num_samples(&batch->key);
547
        fb->rt_count = batch->key.nr_cbufs;
548

549
        static const unsigned char id_swz[] = {
550
                PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
551
        };
552

553
        for (unsigned i = 0; i < fb->rt_count; i++) {
554
                struct pipe_surface *surf = batch->key.cbufs[i];
555

556
                if (!surf)
557
                        continue;
558

559
                struct panfrost_resource *prsrc = pan_resource(surf->texture);
560
                unsigned mask = PIPE_CLEAR_COLOR0 << i;
561

562
                if (batch->clear & mask) {
563
                        fb->rts[i].clear = true;
564
                        memcpy(fb->rts[i].clear_value, batch->clear_color[i],
565
                               sizeof((fb->rts[i].clear_value)));
566
                }
567

568
                fb->rts[i].discard = !reserve && !(batch->resolve & mask);
569

570
                rts[i].format = surf->format;
571
                rts[i].dim = MALI_TEXTURE_DIMENSION_2D;
572
                rts[i].last_level = rts[i].first_level = surf->u.tex.level;
573
                rts[i].first_layer = surf->u.tex.first_layer;
574
                rts[i].last_layer = surf->u.tex.last_layer;
575
                rts[i].image = &prsrc->image;
576
                rts[i].nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
577
                memcpy(rts[i].swizzle, id_swz, sizeof(rts[i].swizzle));
578
                fb->rts[i].crc_valid = &prsrc->valid.crc;
579
                fb->rts[i].view = &rts[i];
580

581
                /* Preload if the RT is read or updated */
582
                if (!(batch->clear & mask) &&
583
                    ((batch->read & mask) ||
584
                     ((batch->draws & mask) &&
585
                      BITSET_TEST(prsrc->valid.data, fb->rts[i].view->first_level))))
586
                        fb->rts[i].preload = true;
587

588
        }
589

590
        const struct pan_image_view *s_view = NULL, *z_view = NULL;
591
        struct panfrost_resource *z_rsrc = NULL, *s_rsrc = NULL;
592

593
        if (batch->key.zsbuf) {
594
                struct pipe_surface *surf = batch->key.zsbuf;
595
                z_rsrc = pan_resource(surf->texture);
596

597
                zs->format = surf->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT ?
598
                             PIPE_FORMAT_Z32_FLOAT : surf->format;
599
                zs->dim = MALI_TEXTURE_DIMENSION_2D;
600
                zs->last_level = zs->first_level = surf->u.tex.level;
601
                zs->first_layer = surf->u.tex.first_layer;
602
                zs->last_layer = surf->u.tex.last_layer;
603
                zs->image = &z_rsrc->image;
604
                zs->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
605
                memcpy(zs->swizzle, id_swz, sizeof(zs->swizzle));
606
                fb->zs.view.zs = zs;
607
                z_view = zs;
608
                if (util_format_is_depth_and_stencil(zs->format)) {
609
                        s_view = zs;
610
                        s_rsrc = z_rsrc;
611
                }
612

613
                if (z_rsrc->separate_stencil) {
614
                        s_rsrc = z_rsrc->separate_stencil;
615
                        s->format = PIPE_FORMAT_S8_UINT;
616
                        s->dim = MALI_TEXTURE_DIMENSION_2D;
617
                        s->last_level = s->first_level = surf->u.tex.level;
618
                        s->first_layer = surf->u.tex.first_layer;
619
                        s->last_layer = surf->u.tex.last_layer;
620
                        s->image = &s_rsrc->image;
621
                        s->nr_samples = surf->nr_samples ? : MAX2(surf->texture->nr_samples, 1);
622
                        memcpy(s->swizzle, id_swz, sizeof(s->swizzle));
623
                        fb->zs.view.s = s;
624
                        s_view = s;
625
                }
626
        }
627

628
        if (batch->clear & PIPE_CLEAR_DEPTH) {
629
                fb->zs.clear.z = true;
630
                fb->zs.clear_value.depth = batch->clear_depth;
631
        }
632

633
        if (batch->clear & PIPE_CLEAR_STENCIL) {
634
                fb->zs.clear.s = true;
635
                fb->zs.clear_value.stencil = batch->clear_stencil;
636
        }
637

638
        fb->zs.discard.z = !reserve && !(batch->resolve & PIPE_CLEAR_DEPTH);
639
        fb->zs.discard.s = !reserve && !(batch->resolve & PIPE_CLEAR_STENCIL);
640

641
        if (!fb->zs.clear.z &&
642
            ((batch->read & PIPE_CLEAR_DEPTH) ||
643
             ((batch->draws & PIPE_CLEAR_DEPTH) &&
644
              z_rsrc && BITSET_TEST(z_rsrc->valid.data, z_view->first_level))))
645
                fb->zs.preload.z = true;
646

647
        if (!fb->zs.clear.s &&
648
            ((batch->read & PIPE_CLEAR_STENCIL) ||
649
             ((batch->draws & PIPE_CLEAR_STENCIL) &&
650
              s_rsrc && BITSET_TEST(s_rsrc->valid.data, s_view->first_level))))
651
                fb->zs.preload.s = true;
652

653
        /* Preserve both component if we have a combined ZS view and
654
         * one component needs to be preserved.
655
         */
656
        if (s_view == z_view && fb->zs.discard.z != fb->zs.discard.s) {
657
                bool valid = BITSET_TEST(z_rsrc->valid.data, z_view->first_level);
658

659
                fb->zs.discard.z = false;
660
                fb->zs.discard.s = false;
661
                fb->zs.preload.z = !fb->zs.clear.z && valid;
662
                fb->zs.preload.s = !fb->zs.clear.s && valid;
663
        }
664
}
665

666
static int
667
panfrost_batch_submit_ioctl(struct panfrost_batch *batch,
668
                            mali_ptr first_job_desc,
669
                            uint32_t reqs,
670
                            uint32_t in_sync,
671
                            uint32_t out_sync)
672
{
673
        struct panfrost_context *ctx = batch->ctx;
674
        struct pipe_context *gallium = (struct pipe_context *) ctx;
675
        struct panfrost_device *dev = pan_device(gallium->screen);
676
        struct drm_panfrost_submit submit = {0,};
677
        uint32_t *bo_handles;
678
        int ret;
679

680
        /* If we trace, we always need a syncobj, so make one of our own if we
681
         * weren't given one to use. Remember that we did so, so we can free it
682
         * after we're done but preventing double-frees if we were given a
683
         * syncobj */
684

685
        if (!out_sync && dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC))
686
                out_sync = ctx->syncobj;
687

688
        submit.out_sync = out_sync;
689
        submit.jc = first_job_desc;
690
        submit.requirements = reqs;
691
        if (in_sync) {
692
                submit.in_syncs = (u64)(uintptr_t)(&in_sync);
693
                submit.in_sync_count = 1;
694
        }
695

696
        bo_handles = calloc(panfrost_pool_num_bos(&batch->pool) +
697
                            panfrost_pool_num_bos(&batch->invisible_pool) +
698
                            batch->num_bos + 2,
699
                            sizeof(*bo_handles));
700
        assert(bo_handles);
701

702
        for (int i = batch->first_bo; i <= batch->last_bo; i++) {
703
                uint32_t *flags = util_sparse_array_get(&batch->bos, i);
704

705
                if (!*flags)
706
                        continue;
707

708
                assert(submit.bo_handle_count < batch->num_bos);
709
                bo_handles[submit.bo_handle_count++] = i;
710

711
                /* Update the BO access flags so that panfrost_bo_wait() knows
712
                 * about all pending accesses.
713
                 * We only keep the READ/WRITE info since this is all the BO
714
                 * wait logic cares about.
715
                 * We also preserve existing flags as this batch might not
716
                 * be the first one to access the BO.
717
                 */
718
                struct panfrost_bo *bo = pan_lookup_bo(dev, i);
719

720
                bo->gpu_access |= *flags & (PAN_BO_ACCESS_RW);
721
        }
722

723
        panfrost_pool_get_bo_handles(&batch->pool, bo_handles + submit.bo_handle_count);
724
        submit.bo_handle_count += panfrost_pool_num_bos(&batch->pool);
725
        panfrost_pool_get_bo_handles(&batch->invisible_pool, bo_handles + submit.bo_handle_count);
726
        submit.bo_handle_count += panfrost_pool_num_bos(&batch->invisible_pool);
727

728
        /* Add the tiler heap to the list of accessed BOs if the batch has at
729
         * least one tiler job. Tiler heap is written by tiler jobs and read
730
         * by fragment jobs (the polygon list is coming from this heap).
731
         */
732
        if (batch->scoreboard.first_tiler)
733
                bo_handles[submit.bo_handle_count++] = dev->tiler_heap->gem_handle;
734

735
        /* Always used on Bifrost, occassionally used on Midgard */
736
        bo_handles[submit.bo_handle_count++] = dev->sample_positions->gem_handle;
737

738
        submit.bo_handles = (u64) (uintptr_t) bo_handles;
739
        if (ctx->is_noop)
740
                ret = 0;
741
        else
742
                ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
743
        free(bo_handles);
744

745
        if (ret)
746
                return errno;
747

748
        /* Trace the job if we're doing that */
749
        if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) {
750
                /* Wait so we can get errors reported back */
751
                drmSyncobjWait(dev->fd, &out_sync, 1,
752
                               INT64_MAX, 0, NULL);
753

754
                if (dev->debug & PAN_DBG_TRACE)
755
                        pandecode_jc(submit.jc, pan_is_bifrost(dev), dev->gpu_id);
756

757
                if (dev->debug & PAN_DBG_SYNC)
758
                        pandecode_abort_on_fault(submit.jc);
759
        }
760

761
        return 0;
762
}
763

764
/* Submit both vertex/tiler and fragment jobs for a batch, possibly with an
765
 * outsync corresponding to the later of the two (since there will be an
766
 * implicit dep between them) */
767

768
static int
769
panfrost_batch_submit_jobs(struct panfrost_batch *batch,
770
                           const struct pan_fb_info *fb,
771
                           uint32_t in_sync, uint32_t out_sync)
772
{
773
        struct pipe_screen *pscreen = batch->ctx->base.screen;
774
        struct panfrost_screen *screen = pan_screen(pscreen);
775
        struct panfrost_device *dev = pan_device(pscreen);
776
        bool has_draws = batch->scoreboard.first_job;
777
        bool has_tiler = batch->scoreboard.first_tiler;
778
        bool has_frag = has_tiler || batch->clear;
779
        int ret = 0;
780

781
        /* Take the submit lock to make sure no tiler jobs from other context
782
         * are inserted between our tiler and fragment jobs, failing to do that
783
         * might result in tiler heap corruption.
784
         */
785
        if (has_tiler)
786
                pthread_mutex_lock(&dev->submit_lock);
787

788
        if (has_draws) {
789
                ret = panfrost_batch_submit_ioctl(batch, batch->scoreboard.first_job,
790
                                                  0, in_sync, has_frag ? 0 : out_sync);
791

792
                if (ret)
793
                        goto done;
794
        }
795

796
        if (has_frag) {
797
                /* Whether we program the fragment job for draws or not depends
798
                 * on whether there is any *tiler* activity (so fragment
799
                 * shaders). If there are draws but entirely RASTERIZER_DISCARD
800
                 * (say, for transform feedback), we want a fragment job that
801
                 * *only* clears, since otherwise the tiler structures will be
802
                 * uninitialized leading to faults (or state leaks) */
803

804
                mali_ptr fragjob = screen->vtbl.emit_fragment_job(batch, fb);
805
                ret = panfrost_batch_submit_ioctl(batch, fragjob,
806
                                                  PANFROST_JD_REQ_FS, 0,
807
                                                  out_sync);
808
                if (ret)
809
                        goto done;
810
        }
811

812
done:
813
        if (has_tiler)
814
                pthread_mutex_unlock(&dev->submit_lock);
815

816
        return ret;
817
}
818

819
static void
820
panfrost_emit_tile_map(struct panfrost_batch *batch, struct pan_fb_info *fb)
821
{
822
        if (batch->key.nr_cbufs < 1 || !batch->key.cbufs[0])
823
                return;
824

825
        struct pipe_surface *surf = batch->key.cbufs[0];
826
        struct panfrost_resource *pres = surf ? pan_resource(surf->texture) : NULL;
827

828
        if (pres && pres->damage.tile_map.enable) {
829
                fb->tile_map.base =
830
                        pan_pool_upload_aligned(&batch->pool.base,
831
                                                pres->damage.tile_map.data,
832
                                                pres->damage.tile_map.size,
833
                                                64);
834
                fb->tile_map.stride = pres->damage.tile_map.stride;
835
        }
836
}
837

838
static void
839
panfrost_batch_submit(struct panfrost_batch *batch,
840
                      uint32_t in_sync, uint32_t out_sync)
841
{
842
        struct pipe_screen *pscreen = batch->ctx->base.screen;
843
        struct panfrost_screen *screen = pan_screen(pscreen);
844
        struct panfrost_device *dev = pan_device(pscreen);
845
        int ret;
846

847
        /* Nothing to do! */
848
        if (!batch->scoreboard.first_job && !batch->clear)
849
                goto out;
850

851
        struct pan_fb_info fb;
852
        struct pan_image_view rts[8], zs, s;
853

854
        panfrost_batch_to_fb_info(batch, &fb, rts, &zs, &s, false);
855

856
        screen->vtbl.preload(batch, &fb);
857

858
        if (!pan_is_bifrost(dev)) {
859
                mali_ptr polygon_list = panfrost_batch_get_polygon_list(batch);
860

861
                panfrost_scoreboard_initialize_tiler(&batch->pool.base,
862
                                                     &batch->scoreboard,
863
                                                     polygon_list);
864
        }
865

866
        /* Now that all draws are in, we can finally prepare the
867
         * FBD for the batch (if there is one). */
868

869
        screen->vtbl.emit_tls(batch);
870
        panfrost_emit_tile_map(batch, &fb);
871

872
        if (batch->scoreboard.first_tiler || batch->clear)
873
                screen->vtbl.emit_fbd(batch, &fb);
874

875
        ret = panfrost_batch_submit_jobs(batch, &fb, in_sync, out_sync);
876

877
        if (ret)
878
                fprintf(stderr, "panfrost_batch_submit failed: %d\n", ret);
879

880
        /* We must reset the damage info of our render targets here even
881
         * though a damage reset normally happens when the DRI layer swaps
882
         * buffers. That's because there can be implicit flushes the GL
883
         * app is not aware of, and those might impact the damage region: if
884
         * part of the damaged portion is drawn during those implicit flushes,
885
         * you have to reload those areas before next draws are pushed, and
886
         * since the driver can't easily know what's been modified by the draws
887
         * it flushed, the easiest solution is to reload everything.
888
         */
889
        for (unsigned i = 0; i < batch->key.nr_cbufs; i++) {
890
                if (!batch->key.cbufs[i])
891
                        continue;
892

893
                panfrost_resource_set_damage_region(batch->ctx->base.screen,
894
                                                    batch->key.cbufs[i]->texture,
895
                                                    0, NULL);
896
        }
897

898
out:
899
        panfrost_batch_cleanup(batch);
900
}
901

902
/* Submit all batches, applying the out_sync to the currently bound batch */
903

904
void
905
panfrost_flush_all_batches(struct panfrost_context *ctx)
906
{
907
        struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
908
        panfrost_batch_submit(batch, ctx->syncobj, ctx->syncobj);
909

910
        for (unsigned i = 0; i < PAN_MAX_BATCHES; i++) {
911
                if (ctx->batches.slots[i].seqnum) {
912
                        panfrost_batch_submit(&ctx->batches.slots[i],
913
                                              ctx->syncobj, ctx->syncobj);
914
                }
915
        }
916
}
917

918
void
919
panfrost_flush_writer(struct panfrost_context *ctx,
920
                      struct panfrost_resource *rsrc)
921
{
922
        if (rsrc->track.writer) {
923
                panfrost_batch_submit(rsrc->track.writer, ctx->syncobj, ctx->syncobj);
924
                rsrc->track.writer = NULL;
925
        }
926
}
927

928
void
929
panfrost_flush_batches_accessing_rsrc(struct panfrost_context *ctx,
930
                                      struct panfrost_resource *rsrc)
931
{
932
        unsigned i;
933
        BITSET_FOREACH_SET(i, rsrc->track.users, PAN_MAX_BATCHES) {
934
                panfrost_batch_submit(&ctx->batches.slots[i],
935
                                      ctx->syncobj, ctx->syncobj);
936
        }
937

938
        assert(!BITSET_COUNT(rsrc->track.users));
939
        rsrc->track.writer = NULL;
940
}
941

942
void
943
panfrost_batch_adjust_stack_size(struct panfrost_batch *batch)
944
{
945
        struct panfrost_context *ctx = batch->ctx;
946

947
        for (unsigned i = 0; i < PIPE_SHADER_TYPES; ++i) {
948
                struct panfrost_shader_state *ss;
949

950
                ss = panfrost_get_shader_state(ctx, i);
951
                if (!ss)
952
                        continue;
953

954
                batch->stack_size = MAX2(batch->stack_size, ss->info.tls_size);
955
        }
956
}
957

958
void
959
panfrost_batch_clear(struct panfrost_batch *batch,
960
                     unsigned buffers,
961
                     const union pipe_color_union *color,
962
                     double depth, unsigned stencil)
963
{
964
        struct panfrost_context *ctx = batch->ctx;
965

966
        if (buffers & PIPE_CLEAR_COLOR) {
967
                for (unsigned i = 0; i < ctx->pipe_framebuffer.nr_cbufs; ++i) {
968
                        if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
969
                                continue;
970

971
                        enum pipe_format format = ctx->pipe_framebuffer.cbufs[i]->format;
972
                        pan_pack_color(batch->clear_color[i], color, format);
973
                }
974
        }
975

976
        if (buffers & PIPE_CLEAR_DEPTH) {
977
                batch->clear_depth = depth;
978
        }
979

980
        if (buffers & PIPE_CLEAR_STENCIL) {
981
                batch->clear_stencil = stencil;
982
        }
983

984
        batch->clear |= buffers;
985
        batch->resolve |= buffers;
986

987
        /* Clearing affects the entire framebuffer (by definition -- this is
988
         * the Gallium clear callback, which clears the whole framebuffer. If
989
         * the scissor test were enabled from the GL side, the gallium frontend
990
         * would emit a quad instead and we wouldn't go down this code path) */
991

992
        panfrost_batch_union_scissor(batch, 0, 0,
993
                                     ctx->pipe_framebuffer.width,
994
                                     ctx->pipe_framebuffer.height);
995
}
996

997
/* Given a new bounding rectangle (scissor), let the job cover the union of the
998
 * new and old bounding rectangles */
999

1000
void
1001
panfrost_batch_union_scissor(struct panfrost_batch *batch,
1002
                             unsigned minx, unsigned miny,
1003
                             unsigned maxx, unsigned maxy)
1004
{
1005
        batch->minx = MIN2(batch->minx, minx);
1006
        batch->miny = MIN2(batch->miny, miny);
1007
        batch->maxx = MAX2(batch->maxx, maxx);
1008
        batch->maxy = MAX2(batch->maxy, maxy);
1009
}
1010

1011
void
1012
panfrost_batch_intersection_scissor(struct panfrost_batch *batch,
1013
                                  unsigned minx, unsigned miny,
1014
                                  unsigned maxx, unsigned maxy)
1015
{
1016
        batch->minx = MAX2(batch->minx, minx);
1017
        batch->miny = MAX2(batch->miny, miny);
1018
        batch->maxx = MIN2(batch->maxx, maxx);
1019
        batch->maxy = MIN2(batch->maxy, maxy);
1020
}
1021

1022
Product

Resources

Company