CoCalc -- freedreno

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_batch.h
⁴⁵⁷⁰ views
1
/*
2
 * Copyright (C) 2016 Rob Clark <[email protected]>
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors:
24
 *    Rob Clark <[email protected]>
25
 */
26

27
#ifndef FREEDRENO_BATCH_H_
28
#define FREEDRENO_BATCH_H_
29

30
#include "util/list.h"
31
#include "util/simple_mtx.h"
32
#include "util/u_inlines.h"
33
#include "util/u_queue.h"
34
#include "util/u_trace.h"
35

36
#include "freedreno_context.h"
37
#include "freedreno_fence.h"
38
#include "freedreno_util.h"
39

40
#ifdef __cplusplus
41
extern "C" {
42
#endif
43

44
struct fd_resource;
45
struct fd_batch_key;
46
struct fd_batch_result;
47

48
/* A batch tracks everything about a cmdstream batch/submit, including the
49
 * ringbuffers used for binning, draw, and gmem cmds, list of associated
50
 * fd_resource-s, etc.
51
 */
52
struct fd_batch {
53
   struct pipe_reference reference;
54
   unsigned seqno;
55
   unsigned idx; /* index into cache->batches[] */
56

57
   struct u_trace trace;
58

59
   /* To detect cases where we can skip cmdstream to record timestamp: */
60
   uint32_t *last_timestamp_cmd;
61

62
   int in_fence_fd;
63
   struct pipe_fence_handle *fence;
64

65
   struct fd_context *ctx;
66

67
   /* emit_lock serializes cmdstream emission and flush.  Acquire before
68
    * screen->lock.
69
    */
70
   simple_mtx_t submit_lock;
71

72
   /* do we need to mem2gmem before rendering.  We don't, if for example,
73
    * there was a glClear() that invalidated the entire previous buffer
74
    * contents.  Keep track of which buffer(s) are cleared, or needs
75
    * restore.  Masks of PIPE_CLEAR_*
76
    *
77
    * The 'cleared' bits will be set for buffers which are *entirely*
78
    * cleared, and 'partial_cleared' bits will be set if you must
79
    * check cleared_scissor.
80
    *
81
    * The 'invalidated' bits are set for cleared buffers, and buffers
82
    * where the contents are undefined, ie. what we don't need to restore
83
    * to gmem.
84
    */
85
   enum {
86
      /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
87
      FD_BUFFER_COLOR = PIPE_CLEAR_COLOR,
88
      FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
89
      FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
90
      FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
91
   } invalidated, cleared, fast_cleared, restore, resolve;
92

93
   /* is this a non-draw batch (ie compute/blit which has no pfb state)? */
94
   bool nondraw : 1;
95
   bool needs_flush : 1;
96
   bool flushed : 1;
97
   bool tessellation : 1; /* tessellation used in batch */
98

99
   /* Keep track if WAIT_FOR_IDLE is needed for registers we need
100
    * to update via RMW:
101
    */
102
   bool needs_wfi : 1;
103

104
   /* To decide whether to render to system memory, keep track of the
105
    * number of draws, and whether any of them require multisample,
106
    * depth_test (or depth write), stencil_test, blending, and
107
    * color_logic_Op (since those functions are disabled when by-
108
    * passing GMEM.
109
    */
110
   enum fd_gmem_reason gmem_reason;
111

112
   /* At submit time, once we've decided that this batch will use GMEM
113
    * rendering, the appropriate gmem state is looked up:
114
    */
115
   const struct fd_gmem_stateobj *gmem_state;
116

117
   /* A calculated "draw cost" value for the batch, which tries to
118
    * estimate the bandwidth-per-sample of all the draws according
119
    * to:
120
    *
121
    *    foreach_draw (...) {
122
    *      cost += num_mrt;
123
    *      if (blend_enabled)
124
    *        cost += num_mrt;
125
    *      if (depth_test_enabled)
126
    *        cost++;
127
    *      if (depth_write_enabled)
128
    *        cost++;
129
    *    }
130
    *
131
    * The idea is that each sample-passed minimally does one write
132
    * per MRT.  If blend is enabled, the hw will additionally do
133
    * a framebuffer read per sample-passed (for each MRT with blend
134
    * enabled).  If depth-test is enabled, the hw will additionally
135
    * a depth buffer read.  If depth-write is enable, the hw will
136
    * additionally do a depth buffer write.
137
    *
138
    * This does ignore depth buffer traffic for samples which do not
139
    * pass do to depth-test fail, and some other details.  But it is
140
    * just intended to be a rough estimate that is easy to calculate.
141
    */
142
   unsigned cost;
143

144
   /* Tells the gen specific backend where to write stats used for
145
    * the autotune module.
146
    *
147
    * Pointer only valid during gmem emit code.
148
    */
149
   struct fd_batch_result *autotune_result;
150

151
   unsigned num_draws;    /* number of draws in current batch */
152
   unsigned num_vertices; /* number of vertices in current batch */
153

154
   /* Currently only used on a6xx, to calculate vsc prim/draw stream
155
    * sizes:
156
    */
157
   unsigned num_bins_per_pipe;
158
   unsigned prim_strm_bits;
159
   unsigned draw_strm_bits;
160

161
   /* Track the maximal bounds of the scissor of all the draws within a
162
    * batch.  Used at the tile rendering step (fd_gmem_render_tiles(),
163
    * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem.
164
    */
165
   struct pipe_scissor_state max_scissor;
166

167
   /* Keep track of DRAW initiators that need to be patched up depending
168
    * on whether we using binning or not:
169
    */
170
   struct util_dynarray draw_patches;
171

172
   /* texture state that needs patching for fb_read: */
173
   struct util_dynarray fb_read_patches;
174

175
   /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
176
    * once we know whether or not to use GMEM, and GMEM tile pitch.
177
    *
178
    * (only for a3xx.. but having gen specific subclasses of fd_batch
179
    * seemed overkill for now)
180
    */
181
   struct util_dynarray rbrc_patches;
182

183
   /* Keep track of GMEM related values that need to be patched up once we
184
    * know the gmem layout:
185
    */
186
   struct util_dynarray gmem_patches;
187

188
   /* Keep track of pointer to start of MEM exports for a20x binning shaders
189
    *
190
    * this is so the end of the shader can be cut off at the right point
191
    * depending on the GMEM configuration
192
    */
193
   struct util_dynarray shader_patches;
194

195
   struct pipe_framebuffer_state framebuffer;
196

197
   struct fd_submit *submit;
198

199
   /** draw pass cmdstream: */
200
   struct fd_ringbuffer *draw;
201
   /** binning pass cmdstream: */
202
   struct fd_ringbuffer *binning;
203
   /** tiling/gmem (IB0) cmdstream: */
204
   struct fd_ringbuffer *gmem;
205

206
   /** preemble cmdstream (executed once before first tile): */
207
   struct fd_ringbuffer *prologue;
208

209
   /** epilogue cmdstream (executed after each tile): */
210
   struct fd_ringbuffer *epilogue;
211

212
   struct fd_ringbuffer *tile_setup;
213
   struct fd_ringbuffer *tile_fini;
214

215
   union pipe_color_union clear_color[MAX_RENDER_TARGETS];
216
   double clear_depth;
217
   unsigned clear_stencil;
218

219
   /**
220
    * hw query related state:
221
    */
222
   /*@{*/
223
   /* next sample offset.. incremented for each sample in the batch/
224
    * submit, reset to zero on next submit.
225
    */
226
   uint32_t next_sample_offset;
227

228
   /* cached samples (in case multiple queries need to reference
229
    * the same sample snapshot)
230
    */
231
   struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
232

233
   /* which sample providers were used in the current batch: */
234
   uint32_t query_providers_used;
235

236
   /* which sample providers are currently enabled in the batch: */
237
   uint32_t query_providers_active;
238

239
   /* list of samples in current batch: */
240
   struct util_dynarray samples;
241

242
   /* current query result bo and tile stride: */
243
   struct pipe_resource *query_buf;
244
   uint32_t query_tile_stride;
245
   /*@}*/
246

247
   /* Set of resources used by currently-unsubmitted batch (read or
248
    * write).. does not hold a reference to the resource.
249
    */
250
   struct set *resources;
251

252
   /** key in batch-cache (if not null): */
253
   struct fd_batch_key *key;
254
   uint32_t hash;
255

256
   /** set of dependent batches.. holds refs to dependent batches: */
257
   uint32_t dependents_mask;
258

259
   /* Buffer for tessellation engine input
260
    */
261
   struct fd_bo *tessfactor_bo;
262
   uint32_t tessfactor_size;
263

264
   /* Buffer for passing parameters between TCS and TES
265
    */
266
   struct fd_bo *tessparam_bo;
267
   uint32_t tessparam_size;
268

269
   struct fd_ringbuffer *tess_addrs_constobj;
270
};
271

272
struct fd_batch *fd_batch_create(struct fd_context *ctx, bool nondraw);
273

274
void fd_batch_reset(struct fd_batch *batch) assert_dt;
275
void fd_batch_flush(struct fd_batch *batch) assert_dt;
276
void fd_batch_add_dep(struct fd_batch *batch, struct fd_batch *dep) assert_dt;
277
void fd_batch_resource_write(struct fd_batch *batch,
278
                             struct fd_resource *rsc) assert_dt;
279
void fd_batch_resource_read_slowpath(struct fd_batch *batch,
280
                                     struct fd_resource *rsc) assert_dt;
281
void fd_batch_check_size(struct fd_batch *batch) assert_dt;
282

283
uint32_t fd_batch_key_hash(const void *_key);
284
bool fd_batch_key_equals(const void *_a, const void *_b);
285
struct fd_batch_key *fd_batch_key_clone(void *mem_ctx,
286
                                        const struct fd_batch_key *key);
287

288
/* not called directly: */
289
void __fd_batch_describe(char *buf, const struct fd_batch *batch) assert_dt;
290
void __fd_batch_destroy(struct fd_batch *batch);
291

292
/*
293
 * NOTE the rule is, you need to hold the screen->lock when destroying
294
 * a batch..  so either use fd_batch_reference() (which grabs the lock
295
 * for you) if you don't hold the lock, or fd_batch_reference_locked()
296
 * if you do hold the lock.
297
 *
298
 * WARNING the _locked() version can briefly drop the lock.  Without
299
 * recursive mutexes, I'm not sure there is much else we can do (since
300
 * __fd_batch_destroy() needs to unref resources)
301
 *
302
 * WARNING you must acquire the screen->lock and use the _locked()
303
 * version in case that the batch being ref'd can disappear under
304
 * you.
305
 */
306

307
static inline void
308
fd_batch_reference_locked(struct fd_batch **ptr, struct fd_batch *batch)
309
{
310
   struct fd_batch *old_batch = *ptr;
311

312
   /* only need lock if a reference is dropped: */
313
   if (old_batch)
314
      fd_screen_assert_locked(old_batch->ctx->screen);
315

316
   if (pipe_reference_described(
317
          &(*ptr)->reference, &batch->reference,
318
          (debug_reference_descriptor)__fd_batch_describe))
319
      __fd_batch_destroy(old_batch);
320

321
   *ptr = batch;
322
}
323

324
static inline void
325
fd_batch_reference(struct fd_batch **ptr, struct fd_batch *batch)
326
{
327
   struct fd_batch *old_batch = *ptr;
328
   struct fd_context *ctx = old_batch ? old_batch->ctx : NULL;
329

330
   if (ctx)
331
      fd_screen_lock(ctx->screen);
332

333
   fd_batch_reference_locked(ptr, batch);
334

335
   if (ctx)
336
      fd_screen_unlock(ctx->screen);
337
}
338

339
static inline void
340
fd_batch_unlock_submit(struct fd_batch *batch)
341
{
342
   simple_mtx_unlock(&batch->submit_lock);
343
}
344

345
/**
346
 * Returns true if emit-lock was acquired, false if failed to acquire lock,
347
 * ie. batch already flushed.
348
 */
349
static inline bool MUST_CHECK
350
fd_batch_lock_submit(struct fd_batch *batch)
351
{
352
   simple_mtx_lock(&batch->submit_lock);
353
   bool ret = !batch->flushed;
354
   if (!ret)
355
      fd_batch_unlock_submit(batch);
356
   return ret;
357
}
358

359
/**
360
 * Mark the batch as having something worth flushing (rendering, blit, query,
361
 * etc)
362
 */
363
static inline void
364
fd_batch_needs_flush(struct fd_batch *batch)
365
{
366
   batch->needs_flush = true;
367
   fd_fence_ref(&batch->ctx->last_fence, NULL);
368
}
369

370
/* Since we reorder batches and can pause/resume queries (notably for disabling
371
 * queries dueing some meta operations), we update the current query state for
372
 * the batch before each draw.
373
 */
374
static inline void
375
fd_batch_update_queries(struct fd_batch *batch) assert_dt
376
{
377
   struct fd_context *ctx = batch->ctx;
378

379
   if (ctx->query_update_batch)
380
      ctx->query_update_batch(batch, false);
381
}
382

383
static inline void
384
fd_batch_finish_queries(struct fd_batch *batch) assert_dt
385
{
386
   struct fd_context *ctx = batch->ctx;
387

388
   if (ctx->query_update_batch)
389
      ctx->query_update_batch(batch, true);
390
}
391

392
static inline void
393
fd_reset_wfi(struct fd_batch *batch)
394
{
395
   batch->needs_wfi = true;
396
}
397

398
void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt;
399

400
/* emit a CP_EVENT_WRITE:
401
 */
402
static inline void
403
fd_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
404
               enum vgt_event_type evt)
405
{
406
   OUT_PKT3(ring, CP_EVENT_WRITE, 1);
407
   OUT_RING(ring, evt);
408
   fd_reset_wfi(batch);
409
}
410

411
/* Get per-tile epilogue */
412
static inline struct fd_ringbuffer *
413
fd_batch_get_epilogue(struct fd_batch *batch)
414
{
415
   if (batch->epilogue == NULL) {
416
      batch->epilogue = fd_submit_new_ringbuffer(batch->submit, 0x1000,
417
                                                 (enum fd_ringbuffer_flags)0);
418
   }
419

420
   return batch->epilogue;
421
}
422

423
struct fd_ringbuffer *fd_batch_get_prologue(struct fd_batch *batch);
424

425
#ifdef __cplusplus
426
}
427
#endif
428

429
#endif /* FREEDRENO_BATCH_H_ */
430

431
Product

Resources

Company