CoCalc -- vc4

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_draw.c
⁴⁵⁷⁰ views
1
/*
2
 * Copyright (c) 2014 Scott Mansell
3
 * Copyright © 2014 Broadcom
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
 * IN THE SOFTWARE.
23
 */
24

25
#include "util/u_blitter.h"
26
#include "util/u_draw.h"
27
#include "util/u_prim.h"
28
#include "util/format/u_format.h"
29
#include "util/u_pack_color.h"
30
#include "util/u_split_draw.h"
31
#include "util/u_upload_mgr.h"
32
#include "indices/u_primconvert.h"
33

34
#include "vc4_context.h"
35
#include "vc4_resource.h"
36

37
#define VC4_HW_2116_COUNT		0x1ef0
38

39
static void
40
vc4_get_draw_cl_space(struct vc4_job *job, int vert_count)
41
{
42
        /* The SW-5891 workaround may cause us to emit multiple shader recs
43
         * and draw packets.
44
         */
45
        int num_draws = DIV_ROUND_UP(vert_count, 65535 - 2) + 1;
46

47
        /* Binner gets our packet state -- vc4_emit.c contents,
48
         * and the primitive itself.
49
         */
50
        cl_ensure_space(&job->bcl,
51
                        256 + (VC4_PACKET_GL_ARRAY_PRIMITIVE_SIZE +
52
                               VC4_PACKET_GL_SHADER_STATE_SIZE) * num_draws);
53

54
        /* Nothing for rcl -- that's covered by vc4_context.c */
55

56
        /* shader_rec gets up to 12 dwords of reloc handles plus a maximally
57
         * sized shader_rec (104 bytes base for 8 vattrs plus 32 bytes of
58
         * vattr stride).
59
         */
60
        cl_ensure_space(&job->shader_rec,
61
                        (12 * sizeof(uint32_t) + 104 + 8 * 32) * num_draws);
62

63
        /* Uniforms are covered by vc4_write_uniforms(). */
64

65
        /* There could be up to 16 textures per stage, plus misc other
66
         * pointers.
67
         */
68
        cl_ensure_space(&job->bo_handles, (2 * 16 + 20) * sizeof(uint32_t));
69
        cl_ensure_space(&job->bo_pointers,
70
                        (2 * 16 + 20) * sizeof(struct vc4_bo *));
71
}
72

73
/**
74
 * Does the initial bining command list setup for drawing to a given FBO.
75
 */
76
static void
77
vc4_start_draw(struct vc4_context *vc4)
78
{
79
        struct vc4_job *job = vc4->job;
80

81
        if (job->needs_flush)
82
                return;
83

84
        vc4_get_draw_cl_space(job, 0);
85

86
        cl_emit(&job->bcl, TILE_BINNING_MODE_CONFIGURATION, bin) {
87
                bin.width_in_tiles = job->draw_tiles_x;
88
                bin.height_in_tiles = job->draw_tiles_y;
89
                bin.multisample_mode_4x = job->msaa;
90
        }
91

92
        /* START_TILE_BINNING resets the statechange counters in the hardware,
93
         * which are what is used when a primitive is binned to a tile to
94
         * figure out what new state packets need to be written to that tile's
95
         * command list.
96
         */
97
        cl_emit(&job->bcl, START_TILE_BINNING, start);
98

99
        /* Reset the current compressed primitives format.  This gets modified
100
         * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
101
         * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
102
         * of every tile.
103
         */
104
        cl_emit(&job->bcl, PRIMITIVE_LIST_FORMAT, list) {
105
                list.data_type = _16_BIT_INDEX;
106
                list.primitive_type = TRIANGLES_LIST;
107
        }
108

109
        job->needs_flush = true;
110
        job->draw_width = vc4->framebuffer.width;
111
        job->draw_height = vc4->framebuffer.height;
112
}
113

114
static void
115
vc4_predraw_check_textures(struct pipe_context *pctx,
116
                           struct vc4_texture_stateobj *stage_tex)
117
{
118
        struct vc4_context *vc4 = vc4_context(pctx);
119

120
        for (int i = 0; i < stage_tex->num_textures; i++) {
121
                struct vc4_sampler_view *view =
122
                        vc4_sampler_view(stage_tex->textures[i]);
123
                if (!view)
124
                        continue;
125

126
                if (view->texture != view->base.texture)
127
                        vc4_update_shadow_baselevel_texture(pctx, &view->base);
128

129
                vc4_flush_jobs_writing_resource(vc4, view->texture);
130
        }
131
}
132

133
static void
134
vc4_emit_gl_shader_state(struct vc4_context *vc4,
135
                         const struct pipe_draw_info *info,
136
                         const struct pipe_draw_start_count_bias *draws,
137
                         uint32_t extra_index_bias)
138
{
139
        struct vc4_job *job = vc4->job;
140
        /* VC4_DIRTY_VTXSTATE */
141
        struct vc4_vertex_stateobj *vtx = vc4->vtx;
142
        /* VC4_DIRTY_VTXBUF */
143
        struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf;
144

145
        /* The simulator throws a fit if VS or CS don't read an attribute, so
146
         * we emit a dummy read.
147
         */
148
        uint32_t num_elements_emit = MAX2(vtx->num_elements, 1);
149

150
        /* Emit the shader record. */
151
        cl_start_shader_reloc(&job->shader_rec, 3 + num_elements_emit);
152

153
        cl_emit(&job->shader_rec, SHADER_RECORD, rec) {
154
                rec.enable_clipping = true;
155

156
                /* VC4_DIRTY_COMPILED_FS */
157
                rec.fragment_shader_is_single_threaded =
158
                        !vc4->prog.fs->fs_threaded;
159

160
                /* VC4_DIRTY_PRIM_MODE | VC4_DIRTY_RASTERIZER */
161
                rec.point_size_included_in_shaded_vertex_data =
162
                         (info->mode == PIPE_PRIM_POINTS &&
163
                          vc4->rasterizer->base.point_size_per_vertex);
164

165
                /* VC4_DIRTY_COMPILED_FS */
166
                rec.fragment_shader_number_of_varyings =
167
                        vc4->prog.fs->num_inputs;
168
                rec.fragment_shader_code_address =
169
                        cl_address(vc4->prog.fs->bo, 0);
170

171
                rec.coordinate_shader_attribute_array_select_bits =
172
                         vc4->prog.cs->vattrs_live;
173
                rec.coordinate_shader_total_attributes_size =
174
                         vc4->prog.cs->vattr_offsets[8];
175
                rec.coordinate_shader_code_address =
176
                        cl_address(vc4->prog.cs->bo, 0);
177

178
                rec.vertex_shader_attribute_array_select_bits =
179
                         vc4->prog.vs->vattrs_live;
180
                rec.vertex_shader_total_attributes_size =
181
                         vc4->prog.vs->vattr_offsets[8];
182
                rec.vertex_shader_code_address =
183
                        cl_address(vc4->prog.vs->bo, 0);
184
        };
185

186
        uint32_t max_index = 0xffff;
187
        unsigned index_bias = info->index_size ? draws->index_bias : 0;
188
        for (int i = 0; i < vtx->num_elements; i++) {
189
                struct pipe_vertex_element *elem = &vtx->pipe[i];
190
                struct pipe_vertex_buffer *vb =
191
                        &vertexbuf->vb[elem->vertex_buffer_index];
192
                struct vc4_resource *rsc = vc4_resource(vb->buffer.resource);
193
                /* not vc4->dirty tracked: vc4->last_index_bias */
194
                uint32_t offset = (vb->buffer_offset +
195
                                   elem->src_offset +
196
                                   vb->stride * (index_bias +
197
                                                 extra_index_bias));
198
                uint32_t vb_size = rsc->bo->size - offset;
199
                uint32_t elem_size =
200
                        util_format_get_blocksize(elem->src_format);
201

202
                cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) {
203
                        attr.address = cl_address(rsc->bo, offset);
204
                        attr.number_of_bytes_minus_1 = elem_size - 1;
205
                        attr.stride = vb->stride;
206
                        attr.coordinate_shader_vpm_offset =
207
                                vc4->prog.cs->vattr_offsets[i];
208
                        attr.vertex_shader_vpm_offset =
209
                                vc4->prog.vs->vattr_offsets[i];
210
                }
211

212
                if (vb->stride > 0) {
213
                        max_index = MIN2(max_index,
214
                                         (vb_size - elem_size) / vb->stride);
215
                }
216
        }
217

218
        if (vtx->num_elements == 0) {
219
                assert(num_elements_emit == 1);
220
                struct vc4_bo *bo = vc4_bo_alloc(vc4->screen, 4096, "scratch VBO");
221

222
                cl_emit(&job->shader_rec, ATTRIBUTE_RECORD, attr) {
223
                        attr.address = cl_address(bo, 0);
224
                        attr.number_of_bytes_minus_1 = 16 - 1;
225
                        attr.stride = 0;
226
                        attr.coordinate_shader_vpm_offset = 0;
227
                        attr.vertex_shader_vpm_offset = 0;
228
                }
229

230
                vc4_bo_unreference(&bo);
231
        }
232

233
        cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
234
                /* Note that number of attributes == 0 in the packet means 8
235
                 * attributes.  This field also contains the offset into
236
                 * shader_rec.
237
                 */
238
                assert(vtx->num_elements <= 8);
239
                shader_state.number_of_attribute_arrays =
240
                        num_elements_emit & 0x7;
241
        }
242

243
        vc4_write_uniforms(vc4, vc4->prog.fs,
244
                           &vc4->constbuf[PIPE_SHADER_FRAGMENT],
245
                           &vc4->fragtex);
246
        vc4_write_uniforms(vc4, vc4->prog.vs,
247
                           &vc4->constbuf[PIPE_SHADER_VERTEX],
248
                           &vc4->verttex);
249
        vc4_write_uniforms(vc4, vc4->prog.cs,
250
                           &vc4->constbuf[PIPE_SHADER_VERTEX],
251
                           &vc4->verttex);
252

253
        vc4->last_index_bias = index_bias + extra_index_bias;
254
        vc4->max_index = max_index;
255
        job->shader_rec_count++;
256
}
257

258
/**
259
 * HW-2116 workaround: Flush the batch before triggering the hardware state
260
 * counter wraparound behavior.
261
 *
262
 * State updates are tracked by a global counter which increments at the first
263
 * state update after a draw or a START_BINNING.  Tiles can then have their
264
 * state updated at draw time with a set of cheap checks for whether the
265
 * state's copy of the global counter matches the global counter the last time
266
 * that state was written to the tile.
267
 *
268
 * The state counters are relatively small and wrap around quickly, so you
269
 * could get false negatives for needing to update a particular state in the
270
 * tile.  To avoid this, the hardware attempts to write all of the state in
271
 * the tile at wraparound time.  This apparently is broken, so we just flush
272
 * everything before that behavior is triggered.  A batch flush is sufficient
273
 * to get our current contents drawn and reset the counters to 0.
274
 *
275
 * Note that we can't just use VC4_PACKET_FLUSH_ALL, because that caps the
276
 * tiles with VC4_PACKET_RETURN_FROM_LIST.
277
 */
278
static void
279
vc4_hw_2116_workaround(struct pipe_context *pctx, int vert_count)
280
{
281
        struct vc4_context *vc4 = vc4_context(pctx);
282
        struct vc4_job *job = vc4_get_job_for_fbo(vc4);
283

284
        if (job->draw_calls_queued + vert_count / 65535 >= VC4_HW_2116_COUNT) {
285
                perf_debug("Flushing batch due to HW-2116 workaround "
286
                           "(too many draw calls per scene\n");
287
                vc4_job_submit(vc4, job);
288
        }
289
}
290

291
static void
292
vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info,
293
             unsigned drawid_offset,
294
             const struct pipe_draw_indirect_info *indirect,
295
             const struct pipe_draw_start_count_bias *draws,
296
             unsigned num_draws)
297
{
298
        if (num_draws > 1) {
299
                util_draw_multi(pctx, info, drawid_offset, indirect, draws, num_draws);
300
                return;
301
        }
302

303
        if (!indirect && (!draws[0].count || !info->instance_count))
304
           return;
305

306
        struct vc4_context *vc4 = vc4_context(pctx);
307
        struct pipe_draw_info local_info;
308

309
	if (!indirect &&
310
	    !info->primitive_restart &&
311
	    !u_trim_pipe_prim(info->mode, (unsigned*)&draws[0].count))
312
		return;
313

314
        if (info->mode >= PIPE_PRIM_QUADS) {
315
                if (info->mode == PIPE_PRIM_QUADS &&
316
                    draws[0].count == 4 &&
317
                    !vc4->rasterizer->base.flatshade) {
318
                        local_info = *info;
319
                        local_info.mode = PIPE_PRIM_TRIANGLE_FAN;
320
                        info = &local_info;
321
                } else {
322
                        util_primconvert_save_rasterizer_state(vc4->primconvert, &vc4->rasterizer->base);
323
                        util_primconvert_draw_vbo(vc4->primconvert, info, drawid_offset, indirect, draws, num_draws);
324
                        perf_debug("Fallback conversion for %d %s vertices\n",
325
                                   draws[0].count, u_prim_name(info->mode));
326
                        return;
327
                }
328
        }
329

330
        /* Before setting up the draw, do any fixup blits necessary. */
331
        vc4_predraw_check_textures(pctx, &vc4->verttex);
332
        vc4_predraw_check_textures(pctx, &vc4->fragtex);
333

334
        vc4_hw_2116_workaround(pctx, draws[0].count);
335

336
        struct vc4_job *job = vc4_get_job_for_fbo(vc4);
337

338
        /* Make sure that the raster order flags haven't changed, which can
339
         * only be set at job granularity.
340
         */
341
        if (job->flags != vc4->rasterizer->tile_raster_order_flags) {
342
                vc4_job_submit(vc4, job);
343
                job = vc4_get_job_for_fbo(vc4);
344
        }
345

346
        vc4_get_draw_cl_space(job, draws[0].count);
347

348
        if (vc4->prim_mode != info->mode) {
349
                vc4->prim_mode = info->mode;
350
                vc4->dirty |= VC4_DIRTY_PRIM_MODE;
351
        }
352

353
        vc4_start_draw(vc4);
354
        if (!vc4_update_compiled_shaders(vc4, info->mode)) {
355
                debug_warn_once("shader compile failed, skipping draw call.\n");
356
                return;
357
        }
358

359
        vc4_emit_state(pctx);
360

361
        bool needs_drawarrays_shader_state = false;
362

363
        unsigned index_bias = info->index_size ? draws->index_bias : 0;
364
        if ((vc4->dirty & (VC4_DIRTY_VTXBUF |
365
                           VC4_DIRTY_VTXSTATE |
366
                           VC4_DIRTY_PRIM_MODE |
367
                           VC4_DIRTY_RASTERIZER |
368
                           VC4_DIRTY_COMPILED_CS |
369
                           VC4_DIRTY_COMPILED_VS |
370
                           VC4_DIRTY_COMPILED_FS |
371
                           vc4->prog.cs->uniform_dirty_bits |
372
                           vc4->prog.vs->uniform_dirty_bits |
373
                           vc4->prog.fs->uniform_dirty_bits)) ||
374
            vc4->last_index_bias != index_bias) {
375
                if (info->index_size)
376
                        vc4_emit_gl_shader_state(vc4, info, draws, 0);
377
                else
378
                        needs_drawarrays_shader_state = true;
379
        }
380

381
        vc4->dirty = 0;
382

383
        /* Note that the primitive type fields match with OpenGL/gallium
384
         * definitions, up to but not including QUADS.
385
         */
386
        if (info->index_size) {
387
                uint32_t index_size = info->index_size;
388
                uint32_t offset = draws[0].start * index_size;
389
                struct pipe_resource *prsc;
390
                if (info->index_size == 4) {
391
                        prsc = vc4_get_shadow_index_buffer(pctx, info,
392
                                                           offset,
393
                                                           draws[0].count, &offset);
394
                        index_size = 2;
395
                } else {
396
                        if (info->has_user_indices) {
397
                                unsigned start_offset = draws[0].start * info->index_size;
398
                                prsc = NULL;
399
                                u_upload_data(vc4->uploader, start_offset,
400
                                              draws[0].count * index_size, 4,
401
                                              (char*)info->index.user + start_offset,
402
                                              &offset, &prsc);
403
                        } else {
404
                                prsc = info->index.resource;
405
                        }
406
                }
407
                struct vc4_resource *rsc = vc4_resource(prsc);
408

409
                struct vc4_cl_out *bcl = cl_start(&job->bcl);
410

411
                /* The original design for the VC4 kernel UABI had multiple
412
                 * packets that used relocations in the BCL (some of which
413
                 * needed two BOs), but later modifications eliminated all but
414
                 * this one usage.  We have an arbitrary 32-bit offset value,
415
                 * and need to also supply an arbitrary 32-bit index buffer
416
                 * GEM handle, so we have this fake packet we emit in our BCL
417
                 * to be validated, which the kernel uses at validation time
418
                 * to perform the relocation in the IB packet (without
419
                 * emitting to the actual HW).
420
                 */
421
                uint32_t hindex = vc4_gem_hindex(job, rsc->bo);
422
                if (job->last_gem_handle_hindex != hindex) {
423
                        cl_u8(&bcl, VC4_PACKET_GEM_HANDLES);
424
                        cl_u32(&bcl, hindex);
425
                        cl_u32(&bcl, 0);
426
                        job->last_gem_handle_hindex = hindex;
427
                }
428

429
                cl_u8(&bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
430
                cl_u8(&bcl,
431
                      info->mode |
432
                      (index_size == 2 ?
433
                       VC4_INDEX_BUFFER_U16:
434
                       VC4_INDEX_BUFFER_U8));
435
                cl_u32(&bcl, draws[0].count);
436
                cl_u32(&bcl, offset);
437
                cl_u32(&bcl, vc4->max_index);
438

439
                cl_end(&job->bcl, bcl);
440
                job->draw_calls_queued++;
441

442
                if (info->index_size == 4 || info->has_user_indices)
443
                        pipe_resource_reference(&prsc, NULL);
444
        } else {
445
                uint32_t count = draws[0].count;
446
                uint32_t start = draws[0].start;
447
                uint32_t extra_index_bias = 0;
448
                static const uint32_t max_verts = 65535;
449

450
                /* GFXH-515 / SW-5891: The binner emits 16 bit indices for
451
                 * drawarrays, which means that if start + count > 64k it
452
                 * would truncate the top bits.  Work around this by emitting
453
                 * a limited number of primitives at a time and reemitting the
454
                 * shader state pointing farther down the vertex attribute
455
                 * arrays.
456
                 *
457
                 * To do this properly for line loops or trifans, we'd need to
458
                 * make a new VB containing the first vertex plus whatever
459
                 * remainder.
460
                 */
461
                if (start + count > max_verts) {
462
                        extra_index_bias = start;
463
                        start = 0;
464
                        needs_drawarrays_shader_state = true;
465
                }
466

467
                while (count) {
468
                        uint32_t this_count = count;
469
                        uint32_t step;
470

471
                        if (needs_drawarrays_shader_state) {
472
                                vc4_emit_gl_shader_state(vc4, info, draws,
473
                                                         extra_index_bias);
474
                        }
475

476
                        u_split_draw(info, max_verts, &this_count, &step);
477

478
                        cl_emit(&job->bcl, VERTEX_ARRAY_PRIMITIVES, array) {
479
                                array.primitive_mode = info->mode;
480
                                array.length = this_count;
481
                                array.index_of_first_vertex = start;
482
                        }
483
                        job->draw_calls_queued++;
484

485
                        count -= step;
486
                        extra_index_bias += start + step;
487
                        start = 0;
488
                        needs_drawarrays_shader_state = true;
489
                }
490
        }
491

492
        /* We shouldn't have tripped the HW_2116 bug with the GFXH-515
493
         * workaround.
494
         */
495
        assert(job->draw_calls_queued <= VC4_HW_2116_COUNT);
496

497
        if (vc4->zsa && vc4->framebuffer.zsbuf) {
498
                struct vc4_resource *rsc =
499
                        vc4_resource(vc4->framebuffer.zsbuf->texture);
500

501
                if (vc4->zsa->base.depth_enabled) {
502
                        job->resolve |= PIPE_CLEAR_DEPTH;
503
                        rsc->initialized_buffers = PIPE_CLEAR_DEPTH;
504
                }
505

506
                if (vc4->zsa->base.stencil[0].enabled) {
507
                        job->resolve |= PIPE_CLEAR_STENCIL;
508
                        rsc->initialized_buffers |= PIPE_CLEAR_STENCIL;
509
                }
510
        }
511

512
        job->resolve |= PIPE_CLEAR_COLOR0;
513

514
        /* If we've used half of the presumably 256MB CMA area, flush the job
515
         * so that we don't accumulate a job that will end up not being
516
         * executable.
517
         */
518
        if (job->bo_space > 128 * 1024 * 1024)
519
                vc4_flush(pctx);
520

521
        if (vc4_debug & VC4_DEBUG_ALWAYS_FLUSH)
522
                vc4_flush(pctx);
523
}
524

525
static uint32_t
526
pack_rgba(enum pipe_format format, const float *rgba)
527
{
528
        union util_color uc;
529
        util_pack_color(rgba, format, &uc);
530
        if (util_format_get_blocksize(format) == 2)
531
                return uc.us;
532
        else
533
                return uc.ui[0];
534
}
535

536
static void
537
vc4_clear(struct pipe_context *pctx, unsigned buffers, const struct pipe_scissor_state *scissor_state,
538
          const union pipe_color_union *color, double depth, unsigned stencil)
539
{
540
        struct vc4_context *vc4 = vc4_context(pctx);
541
        struct vc4_job *job = vc4_get_job_for_fbo(vc4);
542

543
        if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
544
                struct vc4_resource *rsc =
545
                        vc4_resource(vc4->framebuffer.zsbuf->texture);
546
                unsigned zsclear = buffers & PIPE_CLEAR_DEPTHSTENCIL;
547

548
                /* Clearing ZS will clear both Z and stencil, so if we're
549
                 * trying to clear just one then we need to draw a quad to do
550
                 * it instead.  We need to do this before setting up
551
                 * tile-based clears in vc4->job, because the blitter may
552
                 * submit the current job.
553
                 */
554
                if ((zsclear == PIPE_CLEAR_DEPTH ||
555
                     zsclear == PIPE_CLEAR_STENCIL) &&
556
                    (rsc->initialized_buffers & ~(zsclear | job->cleared)) &&
557
                    util_format_is_depth_and_stencil(vc4->framebuffer.zsbuf->format)) {
558
                        static const union pipe_color_union dummy_color = {};
559

560
                        perf_debug("Partial clear of Z+stencil buffer, "
561
                                   "drawing a quad instead of fast clearing\n");
562
                        vc4_blitter_save(vc4);
563
                        util_blitter_clear(vc4->blitter,
564
                                           vc4->framebuffer.width,
565
                                           vc4->framebuffer.height,
566
                                           1,
567
                                           zsclear,
568
                                           &dummy_color, depth, stencil,
569
                                           false);
570
                        buffers &= ~zsclear;
571
                        if (!buffers)
572
                                return;
573
                        job = vc4_get_job_for_fbo(vc4);
574
                }
575
        }
576

577
        /* We can't flag new buffers for clearing once we've queued draws.  We
578
         * could avoid this by using the 3d engine to clear.
579
         */
580
        if (job->draw_calls_queued) {
581
                perf_debug("Flushing rendering to process new clear.\n");
582
                vc4_job_submit(vc4, job);
583
                job = vc4_get_job_for_fbo(vc4);
584
        }
585

586
        if (buffers & PIPE_CLEAR_COLOR0) {
587
                struct vc4_resource *rsc =
588
                        vc4_resource(vc4->framebuffer.cbufs[0]->texture);
589
                uint32_t clear_color;
590

591
                if (vc4_rt_format_is_565(vc4->framebuffer.cbufs[0]->format)) {
592
                        /* In 565 mode, the hardware will be packing our color
593
                         * for us.
594
                         */
595
                        clear_color = pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM,
596
                                                color->f);
597
                } else {
598
                        /* Otherwise, we need to do this packing because we
599
                         * support multiple swizzlings of RGBA8888.
600
                         */
601
                        clear_color =
602
                                pack_rgba(vc4->framebuffer.cbufs[0]->format,
603
                                          color->f);
604
                }
605
                job->clear_color[0] = job->clear_color[1] = clear_color;
606
                rsc->initialized_buffers |= (buffers & PIPE_CLEAR_COLOR0);
607
        }
608

609
        if (buffers & PIPE_CLEAR_DEPTHSTENCIL) {
610
                struct vc4_resource *rsc =
611
                        vc4_resource(vc4->framebuffer.zsbuf->texture);
612

613
                /* Though the depth buffer is stored with Z in the high 24,
614
                 * for this field we just need to store it in the low 24.
615
                 */
616
                if (buffers & PIPE_CLEAR_DEPTH) {
617
                        job->clear_depth = util_pack_z(PIPE_FORMAT_Z24X8_UNORM,
618
                                                       depth);
619
                }
620
                if (buffers & PIPE_CLEAR_STENCIL)
621
                        job->clear_stencil = stencil;
622

623
                rsc->initialized_buffers |= (buffers & PIPE_CLEAR_DEPTHSTENCIL);
624
        }
625

626
        job->draw_min_x = 0;
627
        job->draw_min_y = 0;
628
        job->draw_max_x = vc4->framebuffer.width;
629
        job->draw_max_y = vc4->framebuffer.height;
630
        job->cleared |= buffers;
631
        job->resolve |= buffers;
632

633
        vc4_start_draw(vc4);
634
}
635

636
static void
637
vc4_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
638
                        const union pipe_color_union *color,
639
                        unsigned x, unsigned y, unsigned w, unsigned h,
640
			bool render_condition_enabled)
641
{
642
        fprintf(stderr, "unimpl: clear RT\n");
643
}
644

645
static void
646
vc4_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
647
                        unsigned buffers, double depth, unsigned stencil,
648
                        unsigned x, unsigned y, unsigned w, unsigned h,
649
			bool render_condition_enabled)
650
{
651
        fprintf(stderr, "unimpl: clear DS\n");
652
}
653

654
void
655
vc4_draw_init(struct pipe_context *pctx)
656
{
657
        pctx->draw_vbo = vc4_draw_vbo;
658
        pctx->clear = vc4_clear;
659
        pctx->clear_render_target = vc4_clear_render_target;
660
        pctx->clear_depth_stencil = vc4_clear_depth_stencil;
661
}
662

663
Product

Resources

Company