Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/drm/freedreno_ringbuffer.h
4564 views
1
/*
2
* Copyright (C) 2012-2018 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark <[email protected]>
25
*/
26
27
#ifndef FREEDRENO_RINGBUFFER_H_
28
#define FREEDRENO_RINGBUFFER_H_
29
30
#include <stdio.h>
31
#include "util/u_atomic.h"
32
#include "util/u_debug.h"
33
#include "util/u_queue.h"
34
35
#include "adreno_common.xml.h"
36
#include "adreno_pm4.xml.h"
37
#include "freedreno_drmif.h"
38
#include "freedreno_pm4.h"
39
40
#ifdef __cplusplus
41
extern "C" {
42
#endif
43
44
struct fd_submit;
45
struct fd_ringbuffer;
46
47
enum fd_ringbuffer_flags {
48
49
/* Primary ringbuffer for a submit, ie. an IB1 level rb
50
* which kernel must setup RB->IB1 CP_INDIRECT_BRANCH
51
* packets.
52
*/
53
FD_RINGBUFFER_PRIMARY = 0x1,
54
55
/* Hint that the stateobj will be used for streaming state
56
* that is used once or a few times and then discarded.
57
*
58
* For sub-allocation, non streaming stateobj's should be
59
* sub-allocated from a page size buffer, so one long lived
60
* state obj doesn't prevent other pages from being freed.
61
* (Ie. it would be no worse than allocating a page sized
62
* bo for each small non-streaming stateobj).
63
*
64
* But streaming stateobj's could be sub-allocated from a
65
* larger buffer to reduce the alloc/del overhead.
66
*/
67
FD_RINGBUFFER_STREAMING = 0x2,
68
69
/* Indicates that "growable" cmdstream can be used,
70
* consisting of multiple physical cmdstream buffers
71
*/
72
FD_RINGBUFFER_GROWABLE = 0x4,
73
74
/* Internal use only: */
75
_FD_RINGBUFFER_OBJECT = 0x8,
76
};
77
78
/* A submit object manages/tracks all the state buildup for a "submit"
79
* ioctl to the kernel. Additionally, with the exception of long-lived
80
* non-STREAMING stateobj rb's, rb's are allocated from the submit.
81
*/
82
struct fd_submit *fd_submit_new(struct fd_pipe *pipe);
83
84
/* NOTE: all ringbuffer's create from the submit should be unref'd
85
* before destroying the submit.
86
*/
87
void fd_submit_del(struct fd_submit *submit);
88
89
struct fd_submit * fd_submit_ref(struct fd_submit *submit);
90
91
/* Allocate a new rb from the submit. */
92
struct fd_ringbuffer *fd_submit_new_ringbuffer(struct fd_submit *submit,
93
uint32_t size,
94
enum fd_ringbuffer_flags flags);
95
96
/**
97
* Encapsulates submit out-fence(s), which consist of a 'timestamp' (per-
98
* pipe (submitqueue) sequence number) and optionally, if requested, an
99
* out-fence-fd
100
*/
101
struct fd_submit_fence {
102
/**
103
* The ready fence is signaled once the submit is actually flushed down
104
* to the kernel, and fence/fence_fd are populated. You must wait for
105
* this fence to be signaled before reading fence/fence_fd.
106
*/
107
struct util_queue_fence ready;
108
109
struct fd_fence fence;
110
111
/**
112
* Optional dma_fence fd, returned by submit if use_fence_fd is true
113
*/
114
int fence_fd;
115
bool use_fence_fd;
116
};
117
118
/* in_fence_fd: -1 for no in-fence, else fence fd
119
* out_fence can be NULL if no output fence is required
120
*/
121
int fd_submit_flush(struct fd_submit *submit, int in_fence_fd,
122
struct fd_submit_fence *out_fence);
123
124
struct fd_ringbuffer;
125
struct fd_reloc;
126
127
struct fd_ringbuffer_funcs {
128
void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
129
void (*emit_reloc)(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
130
uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
131
struct fd_ringbuffer *target, uint32_t cmd_idx);
132
uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
133
bool (*check_size)(struct fd_ringbuffer *ring);
134
void (*destroy)(struct fd_ringbuffer *ring);
135
};
136
137
/* the ringbuffer object is not opaque so that OUT_RING() type stuff
138
* can be inlined. Note that users should not make assumptions about
139
* the size of this struct.
140
*/
141
struct fd_ringbuffer {
142
uint32_t *cur, *end, *start;
143
const struct fd_ringbuffer_funcs *funcs;
144
145
// size or end coudl probably go away
146
int size;
147
int32_t refcnt;
148
enum fd_ringbuffer_flags flags;
149
};
150
151
/* Allocate a new long-lived state object, not associated with
152
* a submit:
153
*/
154
struct fd_ringbuffer *fd_ringbuffer_new_object(struct fd_pipe *pipe,
155
uint32_t size);
156
157
static inline void
158
fd_ringbuffer_del(struct fd_ringbuffer *ring)
159
{
160
if (!p_atomic_dec_zero(&ring->refcnt))
161
return;
162
163
ring->funcs->destroy(ring);
164
}
165
166
static inline struct fd_ringbuffer *
167
fd_ringbuffer_ref(struct fd_ringbuffer *ring)
168
{
169
p_atomic_inc(&ring->refcnt);
170
return ring;
171
}
172
173
static inline void
174
fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
175
{
176
assert(ring->funcs->grow); /* unsupported on kgsl */
177
178
/* there is an upper bound on IB size, which appears to be 0x0fffff */
179
ring->size = MIN2(ring->size << 1, 0x0fffff);
180
181
ring->funcs->grow(ring, ring->size);
182
}
183
184
static inline bool
185
fd_ringbuffer_check_size(struct fd_ringbuffer *ring)
186
{
187
return ring->funcs->check_size(ring);
188
}
189
190
static inline void
191
fd_ringbuffer_emit(struct fd_ringbuffer *ring, uint32_t data)
192
{
193
(*ring->cur++) = data;
194
}
195
196
struct fd_reloc {
197
struct fd_bo *bo;
198
uint64_t iova;
199
#define FD_RELOC_READ 0x0001
200
#define FD_RELOC_WRITE 0x0002
201
#define FD_RELOC_DUMP 0x0004
202
uint32_t offset;
203
uint32_t orlo;
204
int32_t shift;
205
uint32_t orhi; /* used for a5xx+ */
206
};
207
208
/* We always mark BOs for write, instead of tracking it across reloc
209
* sources in userspace. On the kernel side, this means we track a single
210
* excl fence in the BO instead of a set of read fences, which is cheaper.
211
* The downside is that a dmabuf-shared device won't be able to read in
212
* parallel with a read-only access by freedreno, but most other drivers
213
* have decided that that usecase isn't important enough to do this
214
* tracking, as well.
215
*/
216
#define FD_RELOC_FLAGS_INIT (FD_RELOC_READ | FD_RELOC_WRITE)
217
218
/* NOTE: relocs are 2 dwords on a5xx+ */
219
220
static inline void
221
fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc)
222
{
223
ring->funcs->emit_reloc(ring, reloc);
224
}
225
226
static inline uint32_t
227
fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
228
{
229
if (!ring->funcs->cmd_count)
230
return 1;
231
return ring->funcs->cmd_count(ring);
232
}
233
234
static inline uint32_t
235
fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
236
struct fd_ringbuffer *target,
237
uint32_t cmd_idx)
238
{
239
return ring->funcs->emit_reloc_ring(ring, target, cmd_idx);
240
}
241
242
static inline uint32_t
243
offset_bytes(void *end, void *start)
244
{
245
return ((char *)end) - ((char *)start);
246
}
247
248
static inline uint32_t
249
fd_ringbuffer_size(struct fd_ringbuffer *ring)
250
{
251
/* only really needed for stateobj ringbuffers, and won't really
252
* do what you expect for growable rb's.. so lets just restrict
253
* this to stateobj's for now:
254
*/
255
debug_assert(!(ring->flags & FD_RINGBUFFER_GROWABLE));
256
return offset_bytes(ring->cur, ring->start);
257
}
258
259
#define LOG_DWORDS 0
260
261
static inline void
262
OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
263
{
264
if (LOG_DWORDS) {
265
fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring,
266
(uint32_t)(ring->cur - ring->start), data);
267
}
268
fd_ringbuffer_emit(ring, data);
269
}
270
271
/*
272
* NOTE: OUT_RELOC() is 2 dwords (64b) on a5xx+
273
*/
274
#ifndef __cplusplus
275
static inline void
276
OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, uint32_t offset,
277
uint64_t or, int32_t shift)
278
{
279
if (LOG_DWORDS) {
280
fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring,
281
(uint32_t)(ring->cur - ring->start), bo, offset, shift);
282
}
283
debug_assert(offset < fd_bo_size(bo));
284
285
uint64_t iova = fd_bo_get_iova(bo) + offset;
286
287
if (shift < 0)
288
iova >>= -shift;
289
else
290
iova <<= shift;
291
292
iova |= or ;
293
294
fd_ringbuffer_reloc(ring, &(struct fd_reloc){
295
.bo = bo,
296
.iova = iova,
297
.offset = offset,
298
.orlo = or
299
,
300
.shift = shift,
301
.orhi = or >> 32,
302
});
303
}
304
#endif
305
306
static inline void
307
OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
308
{
309
fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
310
}
311
312
static inline void
313
BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
314
{
315
if (unlikely(ring->cur + ndwords > ring->end))
316
fd_ringbuffer_grow(ring, ndwords);
317
}
318
319
static inline void
320
OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
321
{
322
BEGIN_RING(ring, cnt + 1);
323
OUT_RING(ring, pm4_pkt0_hdr(regindx, cnt));
324
}
325
326
static inline void
327
OUT_PKT2(struct fd_ringbuffer *ring)
328
{
329
BEGIN_RING(ring, 1);
330
OUT_RING(ring, CP_TYPE2_PKT);
331
}
332
333
static inline void
334
OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
335
{
336
BEGIN_RING(ring, cnt + 1);
337
OUT_RING(ring, CP_TYPE3_PKT | ((cnt - 1) << 16) | ((opcode & 0xFF) << 8));
338
}
339
340
/*
341
* Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3
342
*/
343
344
static inline void
345
OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
346
{
347
BEGIN_RING(ring, cnt + 1);
348
OUT_RING(ring, pm4_pkt4_hdr(regindx, cnt));
349
}
350
351
static inline void
352
OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
353
{
354
BEGIN_RING(ring, cnt + 1);
355
OUT_RING(ring, pm4_pkt7_hdr(opcode, cnt));
356
}
357
358
static inline void
359
OUT_WFI(struct fd_ringbuffer *ring)
360
{
361
OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
362
OUT_RING(ring, 0x00000000);
363
}
364
365
static inline void
366
OUT_WFI5(struct fd_ringbuffer *ring)
367
{
368
OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
369
}
370
371
#ifdef __cplusplus
372
} /* end of extern "C" */
373
#endif
374
375
#endif /* FREEDRENO_RINGBUFFER_H_ */
376
377