Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/svga/svga_pipe_streamout.c
4570 views
1
/**********************************************************
2
* Copyright 2014 VMware, Inc. All rights reserved.
3
*
4
* Permission is hereby granted, free of charge, to any person
5
* obtaining a copy of this software and associated documentation
6
* files (the "Software"), to deal in the Software without
7
* restriction, including without limitation the rights to use, copy,
8
* modify, merge, publish, distribute, sublicense, and/or sell copies
9
* of the Software, and to permit persons to whom the Software is
10
* furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice shall be
13
* included in all copies or substantial portions of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*
24
**********************************************************/
25
26
#include "util/u_memory.h"
27
#include "util/u_bitmask.h"
28
29
#include "svga_cmd.h"
30
#include "svga_context.h"
31
#include "svga_resource_buffer.h"
32
#include "svga_shader.h"
33
#include "svga_debug.h"
34
#include "svga_streamout.h"
35
36
struct svga_stream_output_target {
37
struct pipe_stream_output_target base;
38
};
39
40
/** cast wrapper */
41
static inline struct svga_stream_output_target *
42
svga_stream_output_target(struct pipe_stream_output_target *s)
43
{
44
return (struct svga_stream_output_target *)s;
45
}
46
47
48
/**
49
* A helper function to send different version of the DefineStreamOutput command
50
* depending on if device is SM5 capable or not.
51
*/
52
static enum pipe_error
53
svga_define_stream_output(struct svga_context *svga,
54
SVGA3dStreamOutputId soid,
55
uint32 numOutputStreamEntries,
56
uint32 numOutputStreamStrides,
57
uint32 streamStrides[SVGA3D_DX_MAX_SOTARGETS],
58
const SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS],
59
uint32 rasterizedStream,
60
struct svga_stream_output *streamout)
61
{
62
unsigned i;
63
64
SVGA_DBG(DEBUG_STREAMOUT, "%s: id=%d\n", __FUNCTION__, soid);
65
SVGA_DBG(DEBUG_STREAMOUT,
66
"numOutputStreamEntires=%d\n", numOutputStreamEntries);
67
68
for (i = 0; i < numOutputStreamEntries; i++) {
69
SVGA_DBG(DEBUG_STREAMOUT,
70
" %d: slot=%d regIdx=%d regMask=0x%x stream=%d\n",
71
i, decls[i].outputSlot, decls[i].registerIndex,
72
decls[i].registerMask, decls[i].stream);
73
}
74
75
SVGA_DBG(DEBUG_STREAMOUT,
76
"numOutputStreamStrides=%d\n", numOutputStreamStrides);
77
for (i = 0; i < numOutputStreamStrides; i++) {
78
SVGA_DBG(DEBUG_STREAMOUT, " %d ", streamStrides[i]);
79
}
80
SVGA_DBG(DEBUG_STREAMOUT, "\n");
81
82
if (svga_have_sm5(svga) &&
83
(numOutputStreamEntries > SVGA3D_MAX_DX10_STREAMOUT_DECLS ||
84
numOutputStreamStrides > 1)) {
85
unsigned bufSize = sizeof(SVGA3dStreamOutputDeclarationEntry)
86
* numOutputStreamEntries;
87
struct svga_winsys_buffer *declBuf;
88
struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
89
void *map;
90
91
declBuf = svga_winsys_buffer_create(svga, 1, SVGA_BUFFER_USAGE_PINNED,
92
bufSize);
93
if (!declBuf)
94
return PIPE_ERROR;
95
map = sws->buffer_map(sws, declBuf, PIPE_MAP_WRITE);
96
if (!map) {
97
sws->buffer_destroy(sws, declBuf);
98
return PIPE_ERROR;
99
}
100
101
/* copy decls to buffer */
102
memcpy(map, decls, bufSize);
103
104
/* unmap buffer */
105
sws->buffer_unmap(sws, declBuf);
106
streamout->declBuf = declBuf;
107
108
SVGA_RETRY(svga, SVGA3D_sm5_DefineAndBindStreamOutput
109
(svga->swc, soid,
110
numOutputStreamEntries,
111
numOutputStreamStrides,
112
streamStrides,
113
streamout->declBuf,
114
rasterizedStream,
115
bufSize));
116
} else {
117
SVGA_RETRY(svga, SVGA3D_vgpu10_DefineStreamOutput(svga->swc, soid,
118
numOutputStreamEntries,
119
streamStrides,
120
decls));
121
}
122
123
return PIPE_OK;
124
}
125
126
127
/**
128
* Creates stream output from the stream output info.
129
*/
130
struct svga_stream_output *
131
svga_create_stream_output(struct svga_context *svga,
132
struct svga_shader *shader,
133
const struct pipe_stream_output_info *info)
134
{
135
struct svga_stream_output *streamout;
136
SVGA3dStreamOutputDeclarationEntry decls[SVGA3D_MAX_STREAMOUT_DECLS];
137
unsigned strides[SVGA3D_DX_MAX_SOTARGETS];
138
unsigned dstOffset[SVGA3D_DX_MAX_SOTARGETS];
139
unsigned numStreamStrides = 0;
140
unsigned numDecls;
141
unsigned i;
142
enum pipe_error ret;
143
unsigned id;
144
ASSERTED unsigned maxDecls;
145
146
assert(info->num_outputs <= PIPE_MAX_SO_OUTPUTS);
147
148
/* Gallium utility creates shaders with stream output.
149
* For non-DX10, just return NULL.
150
*/
151
if (!svga_have_vgpu10(svga))
152
return NULL;
153
154
if (svga_have_sm5(svga))
155
maxDecls = SVGA3D_MAX_STREAMOUT_DECLS;
156
else if (svga_have_vgpu10(svga))
157
maxDecls = SVGA3D_MAX_DX10_STREAMOUT_DECLS;
158
159
assert(info->num_outputs <= maxDecls);
160
161
/* Allocate an integer ID for the stream output */
162
id = util_bitmask_add(svga->stream_output_id_bm);
163
if (id == UTIL_BITMASK_INVALID_INDEX) {
164
return NULL;
165
}
166
167
/* Allocate the streamout data structure */
168
streamout = CALLOC_STRUCT(svga_stream_output);
169
170
if (!streamout)
171
return NULL;
172
173
streamout->info = *info;
174
streamout->id = id;
175
streamout->pos_out_index = -1;
176
streamout->streammask = 0;
177
178
/* Init whole decls and stride arrays to zero to avoid garbage values */
179
memset(decls, 0, sizeof(decls));
180
memset(strides, 0, sizeof(strides));
181
memset(dstOffset, 0, sizeof(dstOffset));
182
183
SVGA_DBG(DEBUG_STREAMOUT, "%s: num_outputs=%d\n",
184
__FUNCTION__, info->num_outputs);
185
186
for (i = 0, numDecls = 0; i < info->num_outputs; i++, numDecls++) {
187
unsigned reg_idx = info->output[i].register_index;
188
unsigned buf_idx = info->output[i].output_buffer;
189
const enum tgsi_semantic sem_name =
190
shader->info.output_semantic_name[reg_idx];
191
192
assert(buf_idx <= PIPE_MAX_SO_BUFFERS);
193
194
numStreamStrides = MAX2(numStreamStrides, buf_idx);
195
196
SVGA_DBG(DEBUG_STREAMOUT,
197
" %d: register_index=%d output_buffer=%d stream=%d\n",
198
i, reg_idx, buf_idx, info->output[i].stream);
199
200
SVGA_DBG(DEBUG_STREAMOUT,
201
" dst_offset=%d start_component=%d num_components=%d\n",
202
info->output[i].dst_offset,
203
info->output[i].start_component,
204
info->output[i].num_components);
205
206
streamout->buffer_stream |= info->output[i].stream << (buf_idx * 4);
207
208
/**
209
* Check if the destination offset of the current output
210
* is at the expected offset. If it is greater, then that means
211
* there is a gap in the stream output. We need to insert
212
* extra declaration entries with an invalid register index
213
* to specify a gap.
214
*/
215
while (info->output[i].dst_offset > dstOffset[buf_idx]) {
216
217
unsigned numComponents = info->output[i].dst_offset -
218
dstOffset[buf_idx];;
219
220
assert(svga_have_sm5(svga));
221
222
/* We can only specify at most 4 components to skip in each
223
* declaration entry.
224
*/
225
numComponents = numComponents > 4 ? 4 : numComponents;
226
227
decls[numDecls].outputSlot = buf_idx,
228
decls[numDecls].stream = info->output[i].stream;
229
decls[numDecls].registerIndex = SVGA3D_INVALID_ID;
230
decls[numDecls].registerMask = (1 << numComponents) - 1;
231
232
dstOffset[buf_idx] += numComponents;
233
numDecls++;
234
}
235
236
if (sem_name == TGSI_SEMANTIC_POSITION) {
237
/**
238
* Check if streaming out POSITION. If so, replace the
239
* register index with the index for NON_ADJUSTED POSITION.
240
*/
241
decls[numDecls].registerIndex = shader->info.num_outputs;
242
243
/* Save this output index, so we can tell later if this stream output
244
* includes an output of a vertex position
245
*/
246
streamout->pos_out_index = numDecls;
247
}
248
else if (sem_name == TGSI_SEMANTIC_CLIPDIST) {
249
/**
250
* Use the shadow copy for clip distance because
251
* CLIPDIST instruction is only emitted for enabled clip planes.
252
* It's valid to write to ClipDistance variable for non-enabled
253
* clip planes.
254
*/
255
decls[numDecls].registerIndex =
256
shader->info.num_outputs + 1 +
257
shader->info.output_semantic_index[reg_idx];
258
}
259
else {
260
decls[numDecls].registerIndex = reg_idx;
261
}
262
263
decls[numDecls].outputSlot = buf_idx;
264
decls[numDecls].registerMask =
265
((1 << info->output[i].num_components) - 1)
266
<< info->output[i].start_component;
267
268
decls[numDecls].stream = info->output[i].stream;
269
assert(decls[numDecls].stream == 0 || svga_have_sm5(svga));
270
271
/* Set the bit in streammask for the enabled stream */
272
streamout->streammask |= 1 << info->output[i].stream;
273
274
/* Update the expected offset for the next output */
275
dstOffset[buf_idx] += info->output[i].num_components;
276
277
strides[buf_idx] = info->stride[buf_idx] * sizeof(float);
278
}
279
280
assert(numDecls <= maxDecls);
281
282
/* Send the DefineStreamOutput command.
283
* Note, rasterizedStream is always 0.
284
*/
285
ret = svga_define_stream_output(svga, id,
286
numDecls, numStreamStrides+1,
287
strides, decls, 0, streamout);
288
289
if (ret != PIPE_OK) {
290
util_bitmask_clear(svga->stream_output_id_bm, id);
291
FREE(streamout);
292
streamout = NULL;
293
}
294
return streamout;
295
}
296
297
298
enum pipe_error
299
svga_set_stream_output(struct svga_context *svga,
300
struct svga_stream_output *streamout)
301
{
302
unsigned id = streamout ? streamout->id : SVGA3D_INVALID_ID;
303
304
if (!svga_have_vgpu10(svga)) {
305
return PIPE_OK;
306
}
307
308
SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x id=%d\n", __FUNCTION__,
309
streamout, id);
310
311
if (svga->current_so != streamout) {
312
313
/* Before unbinding the current stream output, stop the stream output
314
* statistics queries for the active streams.
315
*/
316
if (svga_have_sm5(svga) && svga->current_so) {
317
svga->vcount_buffer_stream = svga->current_so->buffer_stream;
318
svga_end_stream_output_queries(svga, svga->current_so->streammask);
319
}
320
321
enum pipe_error ret = SVGA3D_vgpu10_SetStreamOutput(svga->swc, id);
322
if (ret != PIPE_OK) {
323
return ret;
324
}
325
326
svga->current_so = streamout;
327
328
/* After binding the new stream output, start the stream output
329
* statistics queries for the active streams.
330
*/
331
if (svga_have_sm5(svga) && svga->current_so) {
332
svga_begin_stream_output_queries(svga, svga->current_so->streammask);
333
}
334
}
335
336
return PIPE_OK;
337
}
338
339
void
340
svga_delete_stream_output(struct svga_context *svga,
341
struct svga_stream_output *streamout)
342
{
343
struct svga_winsys_screen *sws = svga_screen(svga->pipe.screen)->sws;
344
345
SVGA_DBG(DEBUG_STREAMOUT, "%s streamout=0x%x\n", __FUNCTION__, streamout);
346
347
assert(svga_have_vgpu10(svga));
348
assert(streamout != NULL);
349
350
SVGA_RETRY(svga, SVGA3D_vgpu10_DestroyStreamOutput(svga->swc,
351
streamout->id));
352
353
if (svga_have_sm5(svga) && streamout->declBuf) {
354
sws->buffer_destroy(sws, streamout->declBuf);
355
}
356
357
/* Before deleting the current streamout, make sure to stop any pending
358
* SO queries.
359
*/
360
if (svga->current_so == streamout) {
361
if (svga->in_streamout)
362
svga_end_stream_output_queries(svga, svga->current_so->streammask);
363
svga->current_so = NULL;
364
}
365
366
/* Release the ID */
367
util_bitmask_clear(svga->stream_output_id_bm, streamout->id);
368
369
/* Free streamout structure */
370
FREE(streamout);
371
}
372
373
374
static struct pipe_stream_output_target *
375
svga_create_stream_output_target(struct pipe_context *pipe,
376
struct pipe_resource *buffer,
377
unsigned buffer_offset,
378
unsigned buffer_size)
379
{
380
struct svga_context *svga = svga_context(pipe);
381
struct svga_stream_output_target *sot;
382
383
SVGA_DBG(DEBUG_STREAMOUT, "%s offset=%d size=%d\n", __FUNCTION__,
384
buffer_offset, buffer_size);
385
386
assert(svga_have_vgpu10(svga));
387
(void) svga;
388
389
sot = CALLOC_STRUCT(svga_stream_output_target);
390
if (!sot)
391
return NULL;
392
393
pipe_reference_init(&sot->base.reference, 1);
394
pipe_resource_reference(&sot->base.buffer, buffer);
395
sot->base.context = pipe;
396
sot->base.buffer = buffer;
397
sot->base.buffer_offset = buffer_offset;
398
sot->base.buffer_size = buffer_size;
399
400
return &sot->base;
401
}
402
403
static void
404
svga_destroy_stream_output_target(struct pipe_context *pipe,
405
struct pipe_stream_output_target *target)
406
{
407
struct svga_stream_output_target *sot = svga_stream_output_target(target);
408
409
SVGA_DBG(DEBUG_STREAMOUT, "%s\n", __FUNCTION__);
410
411
pipe_resource_reference(&sot->base.buffer, NULL);
412
FREE(sot);
413
}
414
415
static void
416
svga_set_stream_output_targets(struct pipe_context *pipe,
417
unsigned num_targets,
418
struct pipe_stream_output_target **targets,
419
const unsigned *offsets)
420
{
421
struct svga_context *svga = svga_context(pipe);
422
struct SVGA3dSoTarget soBindings[SVGA3D_DX_MAX_SOTARGETS];
423
unsigned i;
424
unsigned num_so_targets;
425
boolean begin_so_queries = num_targets > 0;
426
427
SVGA_DBG(DEBUG_STREAMOUT, "%s num_targets=%d\n", __FUNCTION__,
428
num_targets);
429
430
assert(svga_have_vgpu10(svga));
431
432
/* Mark the streamout buffers as dirty so that we'll issue readbacks
433
* before mapping.
434
*/
435
for (i = 0; i < svga->num_so_targets; i++) {
436
struct svga_buffer *sbuf = svga_buffer(svga->so_targets[i]->buffer);
437
sbuf->dirty = TRUE;
438
}
439
440
/* Before the currently bound streamout targets are unbound,
441
* save them in case they need to be referenced to retrieve the
442
* number of vertices being streamed out.
443
*/
444
for (i = 0; i < ARRAY_SIZE(svga->so_targets); i++) {
445
svga->vcount_so_targets[i] = svga->so_targets[i];
446
}
447
448
assert(num_targets <= SVGA3D_DX_MAX_SOTARGETS);
449
450
for (i = 0; i < num_targets; i++) {
451
struct svga_stream_output_target *sot
452
= svga_stream_output_target(targets[i]);
453
unsigned size;
454
455
svga->so_surfaces[i] = svga_buffer_handle(svga, sot->base.buffer,
456
PIPE_BIND_STREAM_OUTPUT);
457
458
assert(svga_buffer(sot->base.buffer)->key.flags
459
& SVGA3D_SURFACE_BIND_STREAM_OUTPUT);
460
461
svga->so_targets[i] = &sot->base;
462
if (offsets[i] == -1) {
463
soBindings[i].offset = -1;
464
465
/* The streamout is being resumed. There is no need to restart streamout statistics
466
* queries for the draw-auto fallback since those queries are still active.
467
*/
468
begin_so_queries = FALSE;
469
}
470
else
471
soBindings[i].offset = sot->base.buffer_offset + offsets[i];
472
473
/* The size cannot extend beyond the end of the buffer. Clamp it. */
474
size = MIN2(sot->base.buffer_size,
475
sot->base.buffer->width0 - sot->base.buffer_offset);
476
477
soBindings[i].sizeInBytes = size;
478
}
479
480
/* unbind any previously bound stream output buffers */
481
for (; i < svga->num_so_targets; i++) {
482
svga->so_surfaces[i] = NULL;
483
svga->so_targets[i] = NULL;
484
}
485
486
num_so_targets = MAX2(svga->num_so_targets, num_targets);
487
SVGA_RETRY(svga, SVGA3D_vgpu10_SetSOTargets(svga->swc, num_so_targets,
488
soBindings, svga->so_surfaces));
489
svga->num_so_targets = num_targets;
490
491
if (svga_have_sm5(svga) && svga->current_so && begin_so_queries) {
492
493
/* If there are already active queries and we need to start a new streamout,
494
* we need to stop the current active queries first.
495
*/
496
if (svga->in_streamout) {
497
svga_end_stream_output_queries(svga, svga->current_so->streammask);
498
}
499
500
/* Start stream out statistics queries for the new streamout */
501
svga_begin_stream_output_queries(svga, svga->current_so->streammask);
502
}
503
}
504
505
/**
506
* Rebind stream output target surfaces
507
*/
508
enum pipe_error
509
svga_rebind_stream_output_targets(struct svga_context *svga)
510
{
511
struct svga_winsys_context *swc = svga->swc;
512
enum pipe_error ret;
513
unsigned i;
514
515
for (i = 0; i < svga->num_so_targets; i++) {
516
ret = swc->resource_rebind(swc, svga->so_surfaces[i], NULL, SVGA_RELOC_WRITE);
517
if (ret != PIPE_OK)
518
return ret;
519
}
520
521
return PIPE_OK;
522
}
523
524
525
void
526
svga_init_stream_output_functions(struct svga_context *svga)
527
{
528
svga->pipe.create_stream_output_target = svga_create_stream_output_target;
529
svga->pipe.stream_output_target_destroy = svga_destroy_stream_output_target;
530
svga->pipe.set_stream_output_targets = svga_set_stream_output_targets;
531
}
532
533
534
/**
535
* A helper function to create stream output statistics queries for each stream.
536
* These queries are created as a workaround for DrawTransformFeedbackInstanced or
537
* DrawTransformFeedbackStreamInstanced when auto draw doesn't support
538
* instancing or non-0 stream. In this case, the vertex count will
539
* be retrieved from the stream output statistics query.
540
*/
541
void
542
svga_create_stream_output_queries(struct svga_context *svga)
543
{
544
unsigned i;
545
546
if (!svga_have_sm5(svga))
547
return;
548
549
for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
550
svga->so_queries[i] = svga->pipe.create_query(&svga->pipe,
551
PIPE_QUERY_SO_STATISTICS, i);
552
assert(svga->so_queries[i] != NULL);
553
}
554
}
555
556
557
/**
558
* Destroy the stream output statistics queries for the draw-auto workaround.
559
*/
560
void
561
svga_destroy_stream_output_queries(struct svga_context *svga)
562
{
563
unsigned i;
564
565
if (!svga_have_sm5(svga))
566
return;
567
568
for (i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
569
svga->pipe.destroy_query(&svga->pipe, svga->so_queries[i]);
570
}
571
}
572
573
574
/**
575
* Start stream output statistics queries for the active streams.
576
*/
577
void
578
svga_begin_stream_output_queries(struct svga_context *svga,
579
unsigned streammask)
580
{
581
assert(svga_have_sm5(svga));
582
assert(!svga->in_streamout);
583
584
for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
585
bool ret;
586
if (streammask & (1 << i)) {
587
ret = svga->pipe.begin_query(&svga->pipe, svga->so_queries[i]);
588
}
589
(void) ret;
590
}
591
svga->in_streamout = TRUE;
592
593
return;
594
}
595
596
597
/**
598
* Stop stream output statistics queries for the active streams.
599
*/
600
void
601
svga_end_stream_output_queries(struct svga_context *svga,
602
unsigned streammask)
603
{
604
assert(svga_have_sm5(svga));
605
606
if (!svga->in_streamout)
607
return;
608
609
for (unsigned i = 0; i < ARRAY_SIZE(svga->so_queries); i++) {
610
bool ret;
611
if (streammask & (1 << i)) {
612
ret = svga->pipe.end_query(&svga->pipe, svga->so_queries[i]);
613
}
614
(void) ret;
615
}
616
svga->in_streamout = FALSE;
617
618
return;
619
}
620
621
622
/**
623
* Return the primitive count returned from the stream output statistics query
624
* for the specified stream.
625
*/
626
unsigned
627
svga_get_primcount_from_stream_output(struct svga_context *svga,
628
unsigned stream)
629
{
630
unsigned primcount = 0;
631
union pipe_query_result result;
632
bool ret;
633
634
if (svga->current_so) {
635
svga_end_stream_output_queries(svga, svga->current_so->streammask);
636
}
637
638
ret = svga->pipe.get_query_result(&svga->pipe,
639
svga->so_queries[stream],
640
TRUE, &result);
641
if (ret)
642
primcount = result.so_statistics.num_primitives_written;
643
644
return primcount;
645
}
646
647