Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/lima/lima_job.c
4565 views
1
/*
2
* Copyright (C) 2017-2019 Lima Project
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice shall be included in
12
* all copies or substantial portions of the Software.
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20
* OTHER DEALINGS IN THE SOFTWARE.
21
*
22
*/
23
24
#include <stdlib.h>
25
#include <string.h>
26
27
#include "xf86drm.h"
28
#include "drm-uapi/lima_drm.h"
29
30
#include "util/u_math.h"
31
#include "util/ralloc.h"
32
#include "util/os_time.h"
33
#include "util/hash_table.h"
34
#include "util/format/u_format.h"
35
#include "util/u_upload_mgr.h"
36
#include "util/u_inlines.h"
37
38
#include "lima_screen.h"
39
#include "lima_context.h"
40
#include "lima_job.h"
41
#include "lima_bo.h"
42
#include "lima_util.h"
43
#include "lima_format.h"
44
#include "lima_resource.h"
45
#include "lima_texture.h"
46
#include "lima_fence.h"
47
#include "lima_gpu.h"
48
49
#define VOID2U64(x) ((uint64_t)(unsigned long)(x))
50
51
static void
52
lima_get_fb_info(struct lima_job *job)
53
{
54
struct lima_context *ctx = job->ctx;
55
struct lima_job_fb_info *fb = &job->fb;
56
57
fb->width = ctx->framebuffer.base.width;
58
fb->height = ctx->framebuffer.base.height;
59
60
int width = align(fb->width, 16) >> 4;
61
int height = align(fb->height, 16) >> 4;
62
63
struct lima_screen *screen = lima_screen(ctx->base.screen);
64
65
fb->tiled_w = width;
66
fb->tiled_h = height;
67
68
fb->shift_h = 0;
69
fb->shift_w = 0;
70
71
int limit = screen->plb_max_blk;
72
while ((width * height) > limit) {
73
if (width >= height) {
74
width = (width + 1) >> 1;
75
fb->shift_w++;
76
} else {
77
height = (height + 1) >> 1;
78
fb->shift_h++;
79
}
80
}
81
82
fb->block_w = width;
83
fb->block_h = height;
84
85
fb->shift_min = MIN3(fb->shift_w, fb->shift_h, 2);
86
}
87
88
static struct lima_job *
89
lima_job_create(struct lima_context *ctx)
90
{
91
struct lima_job *s;
92
93
s = rzalloc(ctx, struct lima_job);
94
if (!s)
95
return NULL;
96
97
s->fd = lima_screen(ctx->base.screen)->fd;
98
s->ctx = ctx;
99
100
s->damage_rect.minx = s->damage_rect.miny = 0xffff;
101
s->damage_rect.maxx = s->damage_rect.maxy = 0;
102
s->draws = 0;
103
104
s->clear.depth = 0x00ffffff;
105
106
for (int i = 0; i < 2; i++) {
107
util_dynarray_init(s->gem_bos + i, s);
108
util_dynarray_init(s->bos + i, s);
109
}
110
111
util_dynarray_init(&s->vs_cmd_array, s);
112
util_dynarray_init(&s->plbu_cmd_array, s);
113
util_dynarray_init(&s->plbu_cmd_head, s);
114
115
struct lima_context_framebuffer *fb = &ctx->framebuffer;
116
pipe_surface_reference(&s->key.cbuf, fb->base.cbufs[0]);
117
pipe_surface_reference(&s->key.zsbuf, fb->base.zsbuf);
118
119
lima_get_fb_info(s);
120
121
s->dump = lima_dump_create();
122
123
return s;
124
}
125
126
static void
127
lima_job_free(struct lima_job *job)
128
{
129
struct lima_context *ctx = job->ctx;
130
131
_mesa_hash_table_remove_key(ctx->jobs, &job->key);
132
133
if (job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0))
134
_mesa_hash_table_remove_key(ctx->write_jobs, job->key.cbuf->texture);
135
if (job->key.zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)))
136
_mesa_hash_table_remove_key(ctx->write_jobs, job->key.zsbuf->texture);
137
138
pipe_surface_reference(&job->key.cbuf, NULL);
139
pipe_surface_reference(&job->key.zsbuf, NULL);
140
141
lima_dump_free(job->dump);
142
job->dump = NULL;
143
144
/* TODO: do we need a cache for job? */
145
ralloc_free(job);
146
}
147
148
static struct lima_job *
149
_lima_job_get(struct lima_context *ctx)
150
{
151
struct lima_context_framebuffer *fb = &ctx->framebuffer;
152
struct lima_job_key local_key = {
153
.cbuf = fb->base.cbufs[0],
154
.zsbuf = fb->base.zsbuf,
155
};
156
157
struct hash_entry *entry = _mesa_hash_table_search(ctx->jobs, &local_key);
158
if (entry)
159
return entry->data;
160
161
struct lima_job *job = lima_job_create(ctx);
162
if (!job)
163
return NULL;
164
165
_mesa_hash_table_insert(ctx->jobs, &job->key, job);
166
167
return job;
168
}
169
170
/*
171
* Note: this function can only be called in draw code path,
172
* must not exist in flush code path.
173
*/
174
struct lima_job *
175
lima_job_get(struct lima_context *ctx)
176
{
177
if (ctx->job)
178
return ctx->job;
179
180
ctx->job = _lima_job_get(ctx);
181
return ctx->job;
182
}
183
184
bool lima_job_add_bo(struct lima_job *job, int pipe,
185
struct lima_bo *bo, uint32_t flags)
186
{
187
util_dynarray_foreach(job->gem_bos + pipe, struct drm_lima_gem_submit_bo, gem_bo) {
188
if (bo->handle == gem_bo->handle) {
189
gem_bo->flags |= flags;
190
return true;
191
}
192
}
193
194
struct drm_lima_gem_submit_bo *job_bo =
195
util_dynarray_grow(job->gem_bos + pipe, struct drm_lima_gem_submit_bo, 1);
196
job_bo->handle = bo->handle;
197
job_bo->flags = flags;
198
199
struct lima_bo **jbo = util_dynarray_grow(job->bos + pipe, struct lima_bo *, 1);
200
*jbo = bo;
201
202
/* prevent bo from being freed when job start */
203
lima_bo_reference(bo);
204
205
return true;
206
}
207
208
static bool
209
lima_job_start(struct lima_job *job, int pipe, void *frame, uint32_t size)
210
{
211
struct lima_context *ctx = job->ctx;
212
struct drm_lima_gem_submit req = {
213
.ctx = ctx->id,
214
.pipe = pipe,
215
.nr_bos = job->gem_bos[pipe].size / sizeof(struct drm_lima_gem_submit_bo),
216
.bos = VOID2U64(util_dynarray_begin(job->gem_bos + pipe)),
217
.frame = VOID2U64(frame),
218
.frame_size = size,
219
.out_sync = ctx->out_sync[pipe],
220
};
221
222
if (ctx->in_sync_fd >= 0) {
223
int err = drmSyncobjImportSyncFile(job->fd, ctx->in_sync[pipe],
224
ctx->in_sync_fd);
225
if (err)
226
return false;
227
228
req.in_sync[0] = ctx->in_sync[pipe];
229
close(ctx->in_sync_fd);
230
ctx->in_sync_fd = -1;
231
}
232
233
bool ret = drmIoctl(job->fd, DRM_IOCTL_LIMA_GEM_SUBMIT, &req) == 0;
234
235
util_dynarray_foreach(job->bos + pipe, struct lima_bo *, bo) {
236
lima_bo_unreference(*bo);
237
}
238
239
return ret;
240
}
241
242
static bool
243
lima_job_wait(struct lima_job *job, int pipe, uint64_t timeout_ns)
244
{
245
int64_t abs_timeout = os_time_get_absolute_timeout(timeout_ns);
246
if (abs_timeout == OS_TIMEOUT_INFINITE)
247
abs_timeout = INT64_MAX;
248
249
struct lima_context *ctx = job->ctx;
250
return !drmSyncobjWait(job->fd, ctx->out_sync + pipe, 1, abs_timeout, 0, NULL);
251
}
252
253
static bool
254
lima_job_has_bo(struct lima_job *job, struct lima_bo *bo, bool all)
255
{
256
for (int i = 0; i < 2; i++) {
257
util_dynarray_foreach(job->gem_bos + i, struct drm_lima_gem_submit_bo, gem_bo) {
258
if (bo->handle == gem_bo->handle) {
259
if (all || gem_bo->flags & LIMA_SUBMIT_BO_WRITE)
260
return true;
261
else
262
break;
263
}
264
}
265
}
266
267
return false;
268
}
269
270
void *
271
lima_job_create_stream_bo(struct lima_job *job, int pipe,
272
unsigned size, uint32_t *va)
273
{
274
struct lima_context *ctx = job->ctx;
275
276
void *cpu;
277
unsigned offset;
278
struct pipe_resource *pres = NULL;
279
u_upload_alloc(ctx->uploader, 0, size, 0x40, &offset, &pres, &cpu);
280
281
struct lima_resource *res = lima_resource(pres);
282
*va = res->bo->va + offset;
283
284
lima_job_add_bo(job, pipe, res->bo, LIMA_SUBMIT_BO_READ);
285
286
pipe_resource_reference(&pres, NULL);
287
288
return cpu;
289
}
290
291
static inline struct lima_damage_region *
292
lima_job_get_damage(struct lima_job *job)
293
{
294
if (!(job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0)))
295
return NULL;
296
297
struct lima_surface *surf = lima_surface(job->key.cbuf);
298
struct lima_resource *res = lima_resource(surf->base.texture);
299
return &res->damage;
300
}
301
302
static bool
303
lima_fb_cbuf_needs_reload(struct lima_job *job)
304
{
305
if (!job->key.cbuf)
306
return false;
307
308
struct lima_surface *surf = lima_surface(job->key.cbuf);
309
struct lima_resource *res = lima_resource(surf->base.texture);
310
if (res->damage.region) {
311
/* for EGL_KHR_partial_update, when EGL_EXT_buffer_age is enabled,
312
* we need to reload damage region, otherwise just want to reload
313
* the region not aligned to tile boundary */
314
//if (!res->damage.aligned)
315
// return true;
316
return true;
317
}
318
else if (surf->reload & PIPE_CLEAR_COLOR0)
319
return true;
320
321
return false;
322
}
323
324
static bool
325
lima_fb_zsbuf_needs_reload(struct lima_job *job)
326
{
327
if (!job->key.zsbuf)
328
return false;
329
330
struct lima_surface *surf = lima_surface(job->key.zsbuf);
331
if (surf->reload & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
332
return true;
333
334
return false;
335
}
336
337
static void
338
lima_pack_reload_plbu_cmd(struct lima_job *job, struct pipe_surface *psurf)
339
{
340
#define lima_reload_render_state_offset 0x0000
341
#define lima_reload_gl_pos_offset 0x0040
342
#define lima_reload_varying_offset 0x0080
343
#define lima_reload_tex_desc_offset 0x00c0
344
#define lima_reload_tex_array_offset 0x0100
345
#define lima_reload_buffer_size 0x0140
346
347
struct lima_context *ctx = job->ctx;
348
struct lima_surface *surf = lima_surface(psurf);
349
int level = psurf->u.tex.level;
350
unsigned first_layer = psurf->u.tex.first_layer;
351
352
uint32_t va;
353
void *cpu = lima_job_create_stream_bo(
354
job, LIMA_PIPE_PP, lima_reload_buffer_size, &va);
355
356
struct lima_screen *screen = lima_screen(ctx->base.screen);
357
358
uint32_t reload_shader_first_instr_size =
359
((uint32_t *)(screen->pp_buffer->map + pp_reload_program_offset))[0] & 0x1f;
360
uint32_t reload_shader_va = screen->pp_buffer->va + pp_reload_program_offset;
361
362
struct lima_render_state reload_render_state = {
363
.alpha_blend = 0xf03b1ad2,
364
.depth_test = 0x0000000e,
365
.depth_range = 0xffff0000,
366
.stencil_front = 0x00000007,
367
.stencil_back = 0x00000007,
368
.multi_sample = 0x0000f007,
369
.shader_address = reload_shader_va | reload_shader_first_instr_size,
370
.varying_types = 0x00000001,
371
.textures_address = va + lima_reload_tex_array_offset,
372
.aux0 = 0x00004021,
373
.varyings_address = va + lima_reload_varying_offset,
374
};
375
376
if (util_format_is_depth_or_stencil(psurf->format)) {
377
reload_render_state.alpha_blend &= 0x0fffffff;
378
if (psurf->format != PIPE_FORMAT_Z16_UNORM)
379
reload_render_state.depth_test |= 0x400;
380
if (surf->reload & PIPE_CLEAR_DEPTH)
381
reload_render_state.depth_test |= 0x801;
382
if (surf->reload & PIPE_CLEAR_STENCIL) {
383
reload_render_state.depth_test |= 0x1000;
384
reload_render_state.stencil_front = 0x0000024f;
385
reload_render_state.stencil_back = 0x0000024f;
386
reload_render_state.stencil_test = 0x0000ffff;
387
}
388
}
389
390
memcpy(cpu + lima_reload_render_state_offset, &reload_render_state,
391
sizeof(reload_render_state));
392
393
lima_tex_desc *td = cpu + lima_reload_tex_desc_offset;
394
memset(td, 0, lima_min_tex_desc_size);
395
lima_texture_desc_set_res(ctx, td, psurf->texture, level, level, first_layer);
396
td->format = lima_format_get_texel_reload(psurf->format);
397
td->unnorm_coords = 1;
398
td->texture_type = LIMA_TEXTURE_TYPE_2D;
399
td->min_img_filter_nearest = 1;
400
td->mag_img_filter_nearest = 1;
401
td->wrap_s_clamp_to_edge = 1;
402
td->wrap_t_clamp_to_edge = 1;
403
td->unknown_2_2 = 0x1;
404
405
uint32_t *ta = cpu + lima_reload_tex_array_offset;
406
ta[0] = va + lima_reload_tex_desc_offset;
407
408
struct lima_job_fb_info *fb = &job->fb;
409
float reload_gl_pos[] = {
410
fb->width, 0, 0, 1,
411
0, 0, 0, 1,
412
0, fb->height, 0, 1,
413
};
414
memcpy(cpu + lima_reload_gl_pos_offset, reload_gl_pos,
415
sizeof(reload_gl_pos));
416
417
float reload_varying[] = {
418
fb->width, 0, 0, 0,
419
0, fb->height, 0, 0,
420
};
421
memcpy(cpu + lima_reload_varying_offset, reload_varying,
422
sizeof(reload_varying));
423
424
PLBU_CMD_BEGIN(&job->plbu_cmd_head, 20);
425
426
PLBU_CMD_VIEWPORT_LEFT(0);
427
PLBU_CMD_VIEWPORT_RIGHT(fui(fb->width));
428
PLBU_CMD_VIEWPORT_BOTTOM(0);
429
PLBU_CMD_VIEWPORT_TOP(fui(fb->height));
430
431
PLBU_CMD_RSW_VERTEX_ARRAY(
432
va + lima_reload_render_state_offset,
433
va + lima_reload_gl_pos_offset);
434
435
PLBU_CMD_UNKNOWN2();
436
PLBU_CMD_UNKNOWN1();
437
438
PLBU_CMD_INDICES(screen->pp_buffer->va + pp_shared_index_offset);
439
PLBU_CMD_INDEXED_DEST(va + lima_reload_gl_pos_offset);
440
PLBU_CMD_DRAW_ELEMENTS(0xf, 0, 3);
441
442
PLBU_CMD_END();
443
444
lima_dump_command_stream_print(job->dump, cpu, lima_reload_buffer_size,
445
false, "reload plbu cmd at va %x\n", va);
446
}
447
448
static void
449
lima_pack_head_plbu_cmd(struct lima_job *job)
450
{
451
struct lima_context *ctx = job->ctx;
452
struct lima_job_fb_info *fb = &job->fb;
453
454
PLBU_CMD_BEGIN(&job->plbu_cmd_head, 10);
455
456
PLBU_CMD_UNKNOWN2();
457
PLBU_CMD_BLOCK_STEP(fb->shift_min, fb->shift_h, fb->shift_w);
458
PLBU_CMD_TILED_DIMENSIONS(fb->tiled_w, fb->tiled_h);
459
PLBU_CMD_BLOCK_STRIDE(fb->block_w);
460
461
PLBU_CMD_ARRAY_ADDRESS(
462
ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size,
463
fb->block_w * fb->block_h);
464
465
PLBU_CMD_END();
466
467
if (lima_fb_cbuf_needs_reload(job))
468
lima_pack_reload_plbu_cmd(job, job->key.cbuf);
469
470
if (lima_fb_zsbuf_needs_reload(job))
471
lima_pack_reload_plbu_cmd(job, job->key.zsbuf);
472
}
473
474
static void
475
hilbert_rotate(int n, int *x, int *y, int rx, int ry)
476
{
477
if (ry == 0) {
478
if (rx == 1) {
479
*x = n-1 - *x;
480
*y = n-1 - *y;
481
}
482
483
/* Swap x and y */
484
int t = *x;
485
*x = *y;
486
*y = t;
487
}
488
}
489
490
static void
491
hilbert_coords(int n, int d, int *x, int *y)
492
{
493
int rx, ry, i, t=d;
494
495
*x = *y = 0;
496
497
for (i = 0; (1 << i) < n; i++) {
498
499
rx = 1 & (t / 2);
500
ry = 1 & (t ^ rx);
501
502
hilbert_rotate(1 << i, x, y, rx, ry);
503
504
*x += rx << i;
505
*y += ry << i;
506
507
t /= 4;
508
}
509
}
510
511
static int
512
lima_get_pp_stream_size(int num_pp, int tiled_w, int tiled_h, uint32_t *off)
513
{
514
/* carefully calculate each stream start address:
515
* 1. overflow: each stream size may be different due to
516
* fb->tiled_w * fb->tiled_h can't be divided by num_pp,
517
* extra size should be added to the preceeding stream
518
* 2. alignment: each stream address should be 0x20 aligned
519
*/
520
int delta = tiled_w * tiled_h / num_pp * 16 + 16;
521
int remain = tiled_w * tiled_h % num_pp;
522
int offset = 0;
523
524
for (int i = 0; i < num_pp; i++) {
525
off[i] = offset;
526
527
offset += delta;
528
if (remain) {
529
offset += 16;
530
remain--;
531
}
532
offset = align(offset, 0x20);
533
}
534
535
return offset;
536
}
537
538
static void
539
lima_generate_pp_stream(struct lima_job *job, int off_x, int off_y,
540
int tiled_w, int tiled_h)
541
{
542
struct lima_context *ctx = job->ctx;
543
struct lima_pp_stream_state *ps = &ctx->pp_stream;
544
struct lima_job_fb_info *fb = &job->fb;
545
struct lima_screen *screen = lima_screen(ctx->base.screen);
546
int i, num_pp = screen->num_pp;
547
548
/* use hilbert_coords to generates 1D to 2D relationship.
549
* 1D for pp stream index and 2D for plb block x/y on framebuffer.
550
* if multi pp, interleave the 1D index to make each pp's render target
551
* close enough which should result close workload
552
*/
553
int max = MAX2(tiled_w, tiled_h);
554
int index = 0;
555
uint32_t *stream[4];
556
int si[4] = {0};
557
int dim = 0;
558
int count = 0;
559
560
/* Don't update count if we get zero rect. We'll just generate
561
* PP stream with just terminators in it.
562
*/
563
if ((tiled_w * tiled_h) != 0) {
564
dim = util_logbase2_ceil(max);
565
count = 1 << (dim + dim);
566
}
567
568
for (i = 0; i < num_pp; i++)
569
stream[i] = ps->map + ps->offset[i];
570
571
for (i = 0; i < count; i++) {
572
int x, y;
573
hilbert_coords(max, i, &x, &y);
574
if (x < tiled_w && y < tiled_h) {
575
x += off_x;
576
y += off_y;
577
578
int pp = index % num_pp;
579
int offset = ((y >> fb->shift_h) * fb->block_w +
580
(x >> fb->shift_w)) * LIMA_CTX_PLB_BLK_SIZE;
581
int plb_va = ctx->plb[ctx->plb_index]->va + offset;
582
583
stream[pp][si[pp]++] = 0;
584
stream[pp][si[pp]++] = 0xB8000000 | x | (y << 8);
585
stream[pp][si[pp]++] = 0xE0000002 | ((plb_va >> 3) & ~0xE0000003);
586
stream[pp][si[pp]++] = 0xB0000000;
587
588
index++;
589
}
590
}
591
592
for (i = 0; i < num_pp; i++) {
593
stream[i][si[i]++] = 0;
594
stream[i][si[i]++] = 0xBC000000;
595
stream[i][si[i]++] = 0;
596
stream[i][si[i]++] = 0;
597
598
lima_dump_command_stream_print(
599
job->dump, stream[i], si[i] * 4,
600
false, "pp plb stream %d at va %x\n",
601
i, ps->va + ps->offset[i]);
602
}
603
}
604
605
static void
606
lima_free_stale_pp_stream_bo(struct lima_context *ctx)
607
{
608
list_for_each_entry_safe(struct lima_ctx_plb_pp_stream, entry,
609
&ctx->plb_pp_stream_lru_list, lru_list) {
610
if (ctx->plb_stream_cache_size <= lima_plb_pp_stream_cache_size)
611
break;
612
613
struct hash_entry *hash_entry =
614
_mesa_hash_table_search(ctx->plb_pp_stream, &entry->key);
615
if (hash_entry)
616
_mesa_hash_table_remove(ctx->plb_pp_stream, hash_entry);
617
list_del(&entry->lru_list);
618
619
ctx->plb_stream_cache_size -= entry->bo->size;
620
lima_bo_unreference(entry->bo);
621
622
ralloc_free(entry);
623
}
624
}
625
626
static void
627
lima_update_damage_pp_stream(struct lima_job *job)
628
{
629
struct lima_context *ctx = job->ctx;
630
struct lima_damage_region *ds = lima_job_get_damage(job);
631
struct lima_job_fb_info *fb = &job->fb;
632
struct pipe_scissor_state bound;
633
struct pipe_scissor_state *dr = &job->damage_rect;
634
635
if (ds && ds->region) {
636
struct pipe_scissor_state *dbound = &ds->bound;
637
bound.minx = MAX2(dbound->minx, dr->minx >> 4);
638
bound.miny = MAX2(dbound->miny, dr->miny >> 4);
639
bound.maxx = MIN2(dbound->maxx, (dr->maxx + 0xf) >> 4);
640
bound.maxy = MIN2(dbound->maxy, (dr->maxy + 0xf) >> 4);
641
} else {
642
bound.minx = dr->minx >> 4;
643
bound.miny = dr->miny >> 4;
644
bound.maxx = (dr->maxx + 0xf) >> 4;
645
bound.maxy = (dr->maxy + 0xf) >> 4;
646
}
647
648
/* Clamp to FB size */
649
bound.minx = MIN2(bound.minx, fb->tiled_w);
650
bound.miny = MIN2(bound.miny, fb->tiled_h);
651
bound.maxx = MIN2(bound.maxx, fb->tiled_w);
652
bound.maxy = MIN2(bound.maxy, fb->tiled_h);
653
654
struct lima_ctx_plb_pp_stream_key key = {
655
.plb_index = ctx->plb_index,
656
.minx = bound.minx,
657
.miny = bound.miny,
658
.maxx = bound.maxx,
659
.maxy = bound.maxy,
660
.shift_w = fb->shift_w,
661
.shift_h = fb->shift_h,
662
.block_w = fb->block_w,
663
.block_h = fb->block_h,
664
};
665
666
struct hash_entry *entry =
667
_mesa_hash_table_search(ctx->plb_pp_stream, &key);
668
if (entry) {
669
struct lima_ctx_plb_pp_stream *s = entry->data;
670
671
list_del(&s->lru_list);
672
list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list);
673
674
ctx->pp_stream.map = lima_bo_map(s->bo);
675
ctx->pp_stream.va = s->bo->va;
676
memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
677
678
lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
679
680
return;
681
}
682
683
lima_free_stale_pp_stream_bo(ctx);
684
685
struct lima_screen *screen = lima_screen(ctx->base.screen);
686
struct lima_ctx_plb_pp_stream *s =
687
rzalloc(ctx->plb_pp_stream, struct lima_ctx_plb_pp_stream);
688
689
list_inithead(&s->lru_list);
690
s->key.plb_index = ctx->plb_index;
691
s->key.minx = bound.minx;
692
s->key.maxx = bound.maxx;
693
s->key.miny = bound.miny;
694
s->key.maxy = bound.maxy;
695
s->key.shift_w = fb->shift_w;
696
s->key.shift_h = fb->shift_h;
697
s->key.block_w = fb->block_w;
698
s->key.block_h = fb->block_h;
699
700
int tiled_w = bound.maxx - bound.minx;
701
int tiled_h = bound.maxy - bound.miny;
702
int size = lima_get_pp_stream_size(
703
screen->num_pp, tiled_w, tiled_h, s->offset);
704
705
s->bo = lima_bo_create(screen, size, 0);
706
707
ctx->pp_stream.map = lima_bo_map(s->bo);
708
ctx->pp_stream.va = s->bo->va;
709
memcpy(ctx->pp_stream.offset, s->offset, sizeof(s->offset));
710
711
lima_generate_pp_stream(job, bound.minx, bound.miny, tiled_w, tiled_h);
712
713
ctx->plb_stream_cache_size += size;
714
list_addtail(&s->lru_list, &ctx->plb_pp_stream_lru_list);
715
_mesa_hash_table_insert(ctx->plb_pp_stream, &s->key, s);
716
717
lima_job_add_bo(job, LIMA_PIPE_PP, s->bo, LIMA_SUBMIT_BO_READ);
718
}
719
720
static bool
721
lima_damage_fullscreen(struct lima_job *job)
722
{
723
struct pipe_scissor_state *dr = &job->damage_rect;
724
725
return dr->minx == 0 &&
726
dr->miny == 0 &&
727
dr->maxx == job->fb.width &&
728
dr->maxy == job->fb.height;
729
}
730
731
static void
732
lima_update_pp_stream(struct lima_job *job)
733
{
734
struct lima_context *ctx = job->ctx;
735
struct lima_screen *screen = lima_screen(ctx->base.screen);
736
struct lima_damage_region *damage = lima_job_get_damage(job);
737
if ((screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) ||
738
(damage && damage->region) || !lima_damage_fullscreen(job))
739
lima_update_damage_pp_stream(job);
740
else
741
/* Mali450 doesn't need full PP stream */
742
ctx->pp_stream.map = NULL;
743
}
744
745
static void
746
lima_update_job_bo(struct lima_job *job)
747
{
748
struct lima_context *ctx = job->ctx;
749
750
lima_job_add_bo(job, LIMA_PIPE_GP, ctx->plb_gp_stream,
751
LIMA_SUBMIT_BO_READ);
752
lima_job_add_bo(job, LIMA_PIPE_GP, ctx->plb[ctx->plb_index],
753
LIMA_SUBMIT_BO_WRITE);
754
lima_job_add_bo(job, LIMA_PIPE_GP, ctx->gp_tile_heap[ctx->plb_index],
755
LIMA_SUBMIT_BO_WRITE);
756
757
lima_dump_command_stream_print(
758
job->dump, ctx->plb_gp_stream->map + ctx->plb_index * ctx->plb_gp_size,
759
ctx->plb_gp_size, false, "gp plb stream at va %x\n",
760
ctx->plb_gp_stream->va + ctx->plb_index * ctx->plb_gp_size);
761
762
lima_job_add_bo(job, LIMA_PIPE_PP, ctx->plb[ctx->plb_index],
763
LIMA_SUBMIT_BO_READ);
764
lima_job_add_bo(job, LIMA_PIPE_PP, ctx->gp_tile_heap[ctx->plb_index],
765
LIMA_SUBMIT_BO_READ);
766
767
struct lima_screen *screen = lima_screen(ctx->base.screen);
768
lima_job_add_bo(job, LIMA_PIPE_PP, screen->pp_buffer, LIMA_SUBMIT_BO_READ);
769
}
770
771
static void
772
lima_finish_plbu_cmd(struct util_dynarray *plbu_cmd_array)
773
{
774
int i = 0;
775
uint32_t *plbu_cmd = util_dynarray_ensure_cap(plbu_cmd_array, plbu_cmd_array->size + 2 * 4);
776
777
plbu_cmd[i++] = 0x00000000;
778
plbu_cmd[i++] = 0x50000000; /* END */
779
780
plbu_cmd_array->size += i * 4;
781
}
782
783
static void
784
lima_pack_wb_zsbuf_reg(struct lima_job *job, uint32_t *wb_reg, int wb_idx)
785
{
786
struct lima_job_fb_info *fb = &job->fb;
787
struct pipe_surface *zsbuf = job->key.zsbuf;
788
struct lima_resource *res = lima_resource(zsbuf->texture);
789
int level = zsbuf->u.tex.level;
790
uint32_t format = lima_format_get_pixel(zsbuf->format);
791
792
struct lima_pp_wb_reg *wb = (void *)wb_reg;
793
wb[wb_idx].type = 0x01; /* 1 for depth, stencil */
794
wb[wb_idx].address = res->bo->va + res->levels[level].offset;
795
wb[wb_idx].pixel_format = format;
796
if (res->tiled) {
797
wb[wb_idx].pixel_layout = 0x2;
798
wb[wb_idx].pitch = fb->tiled_w;
799
} else {
800
wb[wb_idx].pixel_layout = 0x0;
801
wb[wb_idx].pitch = res->levels[level].stride / 8;
802
}
803
wb[wb_idx].mrt_bits = 0;
804
}
805
806
static void
807
lima_pack_wb_cbuf_reg(struct lima_job *job, uint32_t *frame_reg,
808
uint32_t *wb_reg, int wb_idx)
809
{
810
struct lima_job_fb_info *fb = &job->fb;
811
struct pipe_surface *cbuf = job->key.cbuf;
812
struct lima_resource *res = lima_resource(cbuf->texture);
813
int level = cbuf->u.tex.level;
814
unsigned layer = cbuf->u.tex.first_layer;
815
uint32_t format = lima_format_get_pixel(cbuf->format);
816
bool swap_channels = lima_format_get_pixel_swap_rb(cbuf->format);
817
818
struct lima_pp_frame_reg *frame = (void *)frame_reg;
819
frame->channel_layout = lima_format_get_channel_layout(cbuf->format);
820
821
struct lima_pp_wb_reg *wb = (void *)wb_reg;
822
wb[wb_idx].type = 0x02; /* 2 for color buffer */
823
wb[wb_idx].address = res->bo->va + res->levels[level].offset + layer * res->levels[level].layer_stride;
824
wb[wb_idx].pixel_format = format;
825
if (res->tiled) {
826
wb[wb_idx].pixel_layout = 0x2;
827
wb[wb_idx].pitch = fb->tiled_w;
828
} else {
829
wb[wb_idx].pixel_layout = 0x0;
830
wb[wb_idx].pitch = res->levels[level].stride / 8;
831
}
832
wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
833
}
834
835
static void
836
lima_pack_pp_frame_reg(struct lima_job *job, uint32_t *frame_reg,
837
uint32_t *wb_reg)
838
{
839
struct lima_context *ctx = job->ctx;
840
struct lima_job_fb_info *fb = &job->fb;
841
struct pipe_surface *cbuf = job->key.cbuf;
842
struct lima_pp_frame_reg *frame = (void *)frame_reg;
843
struct lima_screen *screen = lima_screen(ctx->base.screen);
844
int wb_idx = 0;
845
846
frame->render_address = screen->pp_buffer->va + pp_frame_rsw_offset;
847
frame->flags = 0x02;
848
if (cbuf && util_format_is_float(cbuf->format)) {
849
frame->flags |= 0x01; /* enable fp16 */
850
frame->clear_value_color = (uint32_t)(job->clear.color_16pc & 0xffffffffUL);
851
frame->clear_value_color_1 = (uint32_t)(job->clear.color_16pc >> 32);
852
frame->clear_value_color_2 = 0;
853
frame->clear_value_color_3 = 0;
854
}
855
else {
856
frame->clear_value_color = job->clear.color_8pc;
857
frame->clear_value_color_1 = job->clear.color_8pc;
858
frame->clear_value_color_2 = job->clear.color_8pc;
859
frame->clear_value_color_3 = job->clear.color_8pc;
860
}
861
862
frame->clear_value_depth = job->clear.depth;
863
frame->clear_value_stencil = job->clear.stencil;
864
frame->one = 1;
865
866
frame->width = fb->width - 1;
867
frame->height = fb->height - 1;
868
869
/* frame->fragment_stack_address is overwritten per-pp in the kernel
870
* by the values of pp_frame.fragment_stack_address[i] */
871
872
/* These are "stack size" and "stack offset" shifted,
873
* here they are assumed to be always the same. */
874
frame->fragment_stack_size = job->pp_max_stack_size << 16 | job->pp_max_stack_size;
875
876
/* related with MSAA and different value when r4p0/r7p0 */
877
frame->supersampled_height = fb->height * 2 - 1;
878
frame->scale = 0xE0C;
879
880
frame->dubya = 0x77;
881
frame->onscreen = 1;
882
frame->blocking = (fb->shift_min << 28) | (fb->shift_h << 16) | fb->shift_w;
883
884
/* Set default layout to 8888 */
885
frame->channel_layout = 0x8888;
886
887
if (cbuf && (job->resolve & PIPE_CLEAR_COLOR0))
888
lima_pack_wb_cbuf_reg(job, frame_reg, wb_reg, wb_idx++);
889
890
if (job->key.zsbuf &&
891
(job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)))
892
lima_pack_wb_zsbuf_reg(job, wb_reg, wb_idx++);
893
}
894
895
void
896
lima_do_job(struct lima_job *job)
897
{
898
#define pp_stack_pp_size 0x400
899
900
struct lima_context *ctx = job->ctx;
901
902
lima_pack_head_plbu_cmd(job);
903
lima_finish_plbu_cmd(&job->plbu_cmd_array);
904
905
lima_update_job_bo(job);
906
907
int vs_cmd_size = job->vs_cmd_array.size;
908
uint32_t vs_cmd_va = 0;
909
910
if (vs_cmd_size) {
911
void *vs_cmd = lima_job_create_stream_bo(
912
job, LIMA_PIPE_GP, vs_cmd_size, &vs_cmd_va);
913
memcpy(vs_cmd, util_dynarray_begin(&job->vs_cmd_array), vs_cmd_size);
914
915
lima_dump_command_stream_print(
916
job->dump, vs_cmd, vs_cmd_size, false, "flush vs cmd at va %x\n", vs_cmd_va);
917
lima_dump_vs_command_stream_print(job->dump, vs_cmd, vs_cmd_size, vs_cmd_va);
918
}
919
920
uint32_t plbu_cmd_va;
921
int plbu_cmd_size = job->plbu_cmd_array.size + job->plbu_cmd_head.size;
922
void *plbu_cmd = lima_job_create_stream_bo(
923
job, LIMA_PIPE_GP, plbu_cmd_size, &plbu_cmd_va);
924
memcpy(plbu_cmd,
925
util_dynarray_begin(&job->plbu_cmd_head),
926
job->plbu_cmd_head.size);
927
memcpy(plbu_cmd + job->plbu_cmd_head.size,
928
util_dynarray_begin(&job->plbu_cmd_array),
929
job->plbu_cmd_array.size);
930
931
lima_dump_command_stream_print(
932
job->dump, plbu_cmd, plbu_cmd_size, false, "flush plbu cmd at va %x\n", plbu_cmd_va);
933
lima_dump_plbu_command_stream_print(job->dump, plbu_cmd, plbu_cmd_size, plbu_cmd_va);
934
935
struct lima_screen *screen = lima_screen(ctx->base.screen);
936
struct drm_lima_gp_frame gp_frame;
937
struct lima_gp_frame_reg *gp_frame_reg = (void *)gp_frame.frame;
938
gp_frame_reg->vs_cmd_start = vs_cmd_va;
939
gp_frame_reg->vs_cmd_end = vs_cmd_va + vs_cmd_size;
940
gp_frame_reg->plbu_cmd_start = plbu_cmd_va;
941
gp_frame_reg->plbu_cmd_end = plbu_cmd_va + plbu_cmd_size;
942
gp_frame_reg->tile_heap_start = ctx->gp_tile_heap[ctx->plb_index]->va;
943
gp_frame_reg->tile_heap_end = ctx->gp_tile_heap[ctx->plb_index]->va + ctx->gp_tile_heap_size;
944
945
lima_dump_command_stream_print(
946
job->dump, &gp_frame, sizeof(gp_frame), false, "add gp frame\n");
947
948
if (!lima_job_start(job, LIMA_PIPE_GP, &gp_frame, sizeof(gp_frame)))
949
fprintf(stderr, "gp job error\n");
950
951
if (job->dump) {
952
if (lima_job_wait(job, LIMA_PIPE_GP, PIPE_TIMEOUT_INFINITE)) {
953
if (ctx->gp_output) {
954
float *pos = lima_bo_map(ctx->gp_output);
955
lima_dump_command_stream_print(
956
job->dump, pos, 4 * 4 * 16, true, "gl_pos dump at va %x\n",
957
ctx->gp_output->va);
958
}
959
960
uint32_t *plb = lima_bo_map(ctx->plb[ctx->plb_index]);
961
lima_dump_command_stream_print(
962
job->dump, plb, LIMA_CTX_PLB_BLK_SIZE, false, "plb dump at va %x\n",
963
ctx->plb[ctx->plb_index]->va);
964
}
965
else {
966
fprintf(stderr, "gp job wait error\n");
967
exit(1);
968
}
969
}
970
971
uint32_t pp_stack_va = 0;
972
if (job->pp_max_stack_size) {
973
lima_job_create_stream_bo(
974
job, LIMA_PIPE_PP,
975
screen->num_pp * job->pp_max_stack_size * pp_stack_pp_size,
976
&pp_stack_va);
977
}
978
979
lima_update_pp_stream(job);
980
981
struct lima_pp_stream_state *ps = &ctx->pp_stream;
982
if (screen->gpu_type == DRM_LIMA_PARAM_GPU_ID_MALI400) {
983
struct drm_lima_m400_pp_frame pp_frame = {0};
984
lima_pack_pp_frame_reg(job, pp_frame.frame, pp_frame.wb);
985
pp_frame.num_pp = screen->num_pp;
986
987
for (int i = 0; i < screen->num_pp; i++) {
988
pp_frame.plbu_array_address[i] = ps->va + ps->offset[i];
989
if (job->pp_max_stack_size)
990
pp_frame.fragment_stack_address[i] = pp_stack_va +
991
job->pp_max_stack_size * pp_stack_pp_size * i;
992
}
993
994
lima_dump_command_stream_print(
995
job->dump, &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
996
997
if (!lima_job_start(job, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame)))
998
fprintf(stderr, "pp job error\n");
999
}
1000
else {
1001
struct drm_lima_m450_pp_frame pp_frame = {0};
1002
lima_pack_pp_frame_reg(job, pp_frame.frame, pp_frame.wb);
1003
pp_frame.num_pp = screen->num_pp;
1004
1005
if (job->pp_max_stack_size)
1006
for (int i = 0; i < screen->num_pp; i++)
1007
pp_frame.fragment_stack_address[i] = pp_stack_va +
1008
job->pp_max_stack_size * pp_stack_pp_size * i;
1009
1010
if (ps->map) {
1011
for (int i = 0; i < screen->num_pp; i++)
1012
pp_frame.plbu_array_address[i] = ps->va + ps->offset[i];
1013
}
1014
else {
1015
pp_frame.use_dlbu = true;
1016
1017
struct lima_job_fb_info *fb = &job->fb;
1018
pp_frame.dlbu_regs[0] = ctx->plb[ctx->plb_index]->va;
1019
pp_frame.dlbu_regs[1] = ((fb->tiled_h - 1) << 16) | (fb->tiled_w - 1);
1020
unsigned s = util_logbase2(LIMA_CTX_PLB_BLK_SIZE) - 7;
1021
pp_frame.dlbu_regs[2] = (s << 28) | (fb->shift_h << 16) | fb->shift_w;
1022
pp_frame.dlbu_regs[3] = ((fb->tiled_h - 1) << 24) | ((fb->tiled_w - 1) << 16);
1023
}
1024
1025
lima_dump_command_stream_print(
1026
job->dump, &pp_frame, sizeof(pp_frame), false, "add pp frame\n");
1027
1028
if (!lima_job_start(job, LIMA_PIPE_PP, &pp_frame, sizeof(pp_frame)))
1029
fprintf(stderr, "pp job error\n");
1030
}
1031
1032
if (job->dump) {
1033
if (!lima_job_wait(job, LIMA_PIPE_PP, PIPE_TIMEOUT_INFINITE)) {
1034
fprintf(stderr, "pp wait error\n");
1035
exit(1);
1036
}
1037
}
1038
1039
ctx->plb_index = (ctx->plb_index + 1) % lima_ctx_num_plb;
1040
1041
/* Set reload flags for next draw. It'll be unset if buffer is cleared */
1042
if (job->key.cbuf && (job->resolve & PIPE_CLEAR_COLOR0)) {
1043
struct lima_surface *surf = lima_surface(job->key.cbuf);
1044
surf->reload = PIPE_CLEAR_COLOR0;
1045
}
1046
1047
if (job->key.zsbuf && (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
1048
struct lima_surface *surf = lima_surface(job->key.zsbuf);
1049
surf->reload = (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL));
1050
}
1051
1052
if (ctx->job == job)
1053
ctx->job = NULL;
1054
1055
lima_job_free(job);
1056
}
1057
1058
void
1059
lima_flush(struct lima_context *ctx)
1060
{
1061
hash_table_foreach(ctx->jobs, entry) {
1062
struct lima_job *job = entry->data;
1063
lima_do_job(job);
1064
}
1065
}
1066
1067
void
1068
lima_flush_job_accessing_bo(
1069
struct lima_context *ctx, struct lima_bo *bo, bool write)
1070
{
1071
hash_table_foreach(ctx->jobs, entry) {
1072
struct lima_job *job = entry->data;
1073
if (lima_job_has_bo(job, bo, write))
1074
lima_do_job(job);
1075
}
1076
}
1077
1078
/*
1079
* This is for current job flush previous job which write to the resource it wants
1080
* to read. Tipical usage is flush the FBO which is used as current task's texture.
1081
*/
1082
void
1083
lima_flush_previous_job_writing_resource(
1084
struct lima_context *ctx, struct pipe_resource *prsc)
1085
{
1086
struct hash_entry *entry = _mesa_hash_table_search(ctx->write_jobs, prsc);
1087
1088
if (entry) {
1089
struct lima_job *job = entry->data;
1090
1091
/* do not flush current job */
1092
if (job != ctx->job)
1093
lima_do_job(job);
1094
}
1095
}
1096
1097
static void
1098
lima_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
1099
unsigned flags)
1100
{
1101
struct lima_context *ctx = lima_context(pctx);
1102
1103
lima_flush(ctx);
1104
1105
if (fence) {
1106
int drm_fd = lima_screen(ctx->base.screen)->fd;
1107
int fd;
1108
1109
if (!drmSyncobjExportSyncFile(drm_fd, ctx->out_sync[LIMA_PIPE_PP], &fd))
1110
*fence = lima_fence_create(fd);
1111
}
1112
}
1113
1114
static bool
1115
lima_job_compare(const void *s1, const void *s2)
1116
{
1117
return memcmp(s1, s2, sizeof(struct lima_job_key)) == 0;
1118
}
1119
1120
static uint32_t
1121
lima_job_hash(const void *key)
1122
{
1123
return _mesa_hash_data(key, sizeof(struct lima_job_key));
1124
}
1125
1126
bool lima_job_init(struct lima_context *ctx)
1127
{
1128
int fd = lima_screen(ctx->base.screen)->fd;
1129
1130
ctx->jobs = _mesa_hash_table_create(ctx, lima_job_hash, lima_job_compare);
1131
if (!ctx->jobs)
1132
return false;
1133
1134
ctx->write_jobs = _mesa_hash_table_create(
1135
ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
1136
if (!ctx->write_jobs)
1137
return false;
1138
1139
ctx->in_sync_fd = -1;
1140
1141
for (int i = 0; i < 2; i++) {
1142
if (drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->in_sync + i) ||
1143
drmSyncobjCreate(fd, DRM_SYNCOBJ_CREATE_SIGNALED, ctx->out_sync + i))
1144
return false;
1145
}
1146
1147
ctx->base.flush = lima_pipe_flush;
1148
1149
return true;
1150
}
1151
1152
void lima_job_fini(struct lima_context *ctx)
1153
{
1154
int fd = lima_screen(ctx->base.screen)->fd;
1155
1156
lima_flush(ctx);
1157
1158
for (int i = 0; i < 2; i++) {
1159
if (ctx->in_sync[i])
1160
drmSyncobjDestroy(fd, ctx->in_sync[i]);
1161
if (ctx->out_sync[i])
1162
drmSyncobjDestroy(fd, ctx->out_sync[i]);
1163
}
1164
1165
if (ctx->in_sync_fd >= 0)
1166
close(ctx->in_sync_fd);
1167
}
1168
1169