CoCalc -- v3dx_simulator.c

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/simulator/v3dx_simulator.c
⁴⁵⁶⁰ views
1
/*
2
 * Copyright © 2014-2017 Broadcom
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
/**
25
 * @file v3dx_simulator.c
26
 *
27
 * Implements the actual HW interaction betweeh the GL driver's V3D simulator and the simulator.
28
 *
29
 * The register headers between V3D versions will have conflicting defines, so
30
 * all register interactions appear in this file and are compiled per V3D version
31
 * we support.
32
 */
33

34
#ifdef USE_V3D_SIMULATOR
35

36
#include <assert.h>
37
#include <stdbool.h>
38
#include <stdio.h>
39

40
#include "v3d_simulator.h"
41
#include "v3d_simulator_wrapper.h"
42

43
#include "util/macros.h"
44
#include "util/bitscan.h"
45
#include "drm-uapi/v3d_drm.h"
46

47
#define HW_REGISTER_RO(x) (x)
48
#define HW_REGISTER_RW(x) (x)
49
#if V3D_VERSION >= 41
50
#include "libs/core/v3d/registers/4.1.35.0/v3d.h"
51
#else
52
#include "libs/core/v3d/registers/3.3.0.0/v3d.h"
53
#endif
54

55
#define V3D_WRITE(reg, val) v3d_hw_write_reg(v3d, reg, val)
56
#define V3D_READ(reg) v3d_hw_read_reg(v3d, reg)
57

58
static void
59
v3d_invalidate_l3(struct v3d_hw *v3d)
60
{
61
#if V3D_VERSION < 40
62
        uint32_t gca_ctrl = V3D_READ(V3D_GCA_CACHE_CTRL);
63

64
        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH_SET);
65
        V3D_WRITE(V3D_GCA_CACHE_CTRL, gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH_SET);
66
#endif
67
}
68

69
/* Invalidates the L2C cache.  This is a read-only cache for uniforms and instructions. */
70
static void
71
v3d_invalidate_l2c(struct v3d_hw *v3d)
72
{
73
        if (V3D_VERSION >= 33)
74
                return;
75

76
        V3D_WRITE(V3D_CTL_0_L2CACTL,
77
                  V3D_CTL_0_L2CACTL_L2CCLR_SET |
78
                  V3D_CTL_0_L2CACTL_L2CENA_SET);
79
}
80

81
enum v3d_l2t_cache_flush_mode {
82
        V3D_CACHE_FLUSH_MODE_FLUSH,
83
        V3D_CACHE_FLUSH_MODE_CLEAR,
84
        V3D_CACHE_FLUSH_MODE_CLEAN,
85
};
86

87
/* Invalidates texture L2 cachelines */
88
static void
89
v3d_invalidate_l2t(struct v3d_hw *v3d)
90
{
91
        V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
92
        V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
93
        V3D_WRITE(V3D_CTL_0_L2TCACTL,
94
                  V3D_CTL_0_L2TCACTL_L2TFLS_SET |
95
                  (V3D_CACHE_FLUSH_MODE_FLUSH << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
96
}
97

98
/*
99
 * Wait for l2tcactl, used for flushes.
100
 *
101
 * FIXME: for a multicore scenario we should pass here the core. All wrapper
102
 * assumes just one core, so would be better to handle that on that case.
103
 */
104
static UNUSED void v3d_core_wait_l2tcactl(struct v3d_hw *v3d,
105
                                          uint32_t ctrl)
106
{
107
   assert(!(ctrl & ~(V3D_CTL_0_L2TCACTL_TMUWCF_SET | V3D_CTL_0_L2TCACTL_L2TFLS_SET)));
108

109
   while (V3D_READ(V3D_CTL_0_L2TCACTL) & ctrl) {
110
           v3d_hw_tick(v3d);
111
   }
112
}
113

114
/* Flushes dirty texture cachelines from the L1 write combiner */
115
static void
116
v3d_flush_l1td(struct v3d_hw *v3d)
117
{
118
        V3D_WRITE(V3D_CTL_0_L2TCACTL,
119
                  V3D_CTL_0_L2TCACTL_TMUWCF_SET);
120

121
        /* Note: here the kernel (and previous versions of the simulator
122
         * wrapper) is using V3D_CTL_0_L2TCACTL_L2TFLS_SET, as with l2t. We
123
         * understand that it makes more sense to do like this. We need to
124
         * confirm which one is doing it correctly. So far things work fine on
125
         * the simulator this way.
126
         */
127
        v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_TMUWCF_SET);
128
}
129

130
/* Flushes dirty texture L2 cachelines */
131
static void
132
v3d_flush_l2t(struct v3d_hw *v3d)
133
{
134
        V3D_WRITE(V3D_CTL_0_L2TFLSTA, 0);
135
        V3D_WRITE(V3D_CTL_0_L2TFLEND, ~0);
136
        V3D_WRITE(V3D_CTL_0_L2TCACTL,
137
                  V3D_CTL_0_L2TCACTL_L2TFLS_SET |
138
                  (V3D_CACHE_FLUSH_MODE_CLEAN << V3D_CTL_0_L2TCACTL_L2TFLM_LSB));
139

140
        v3d_core_wait_l2tcactl(v3d, V3D_CTL_0_L2TCACTL_L2TFLS_SET);
141
}
142

143
/* Invalidates the slice caches.  These are read-only caches. */
144
static void
145
v3d_invalidate_slices(struct v3d_hw *v3d)
146
{
147
        V3D_WRITE(V3D_CTL_0_SLCACTL, ~0);
148
}
149

150
static void
151
v3d_invalidate_caches(struct v3d_hw *v3d)
152
{
153
        v3d_invalidate_l3(v3d);
154
        v3d_invalidate_l2c(v3d);
155
        v3d_invalidate_l2t(v3d);
156
        v3d_invalidate_slices(v3d);
157
}
158

159
static uint32_t g_gmp_ofs;
160
static void
161
v3d_reload_gmp(struct v3d_hw *v3d)
162
{
163
        /* Completely reset the GMP. */
164
        V3D_WRITE(V3D_GMP_CFG,
165
                  V3D_GMP_CFG_PROTENABLE_SET);
166
        V3D_WRITE(V3D_GMP_TABLE_ADDR, g_gmp_ofs);
167
        V3D_WRITE(V3D_GMP_CLEAR_LOAD, ~0);
168
        while (V3D_READ(V3D_GMP_STATUS) &
169
               V3D_GMP_STATUS_CFG_BUSY_SET) {
170
                ;
171
        }
172
}
173

174
static UNUSED void
175
v3d_flush_caches(struct v3d_hw *v3d)
176
{
177
        v3d_flush_l1td(v3d);
178
        v3d_flush_l2t(v3d);
179
}
180

181
int
182
v3dX(simulator_submit_tfu_ioctl)(struct v3d_hw *v3d,
183
                                 struct drm_v3d_submit_tfu *args)
184
{
185
        int last_vtct = V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET;
186

187
        V3D_WRITE(V3D_TFU_IIA, args->iia);
188
        V3D_WRITE(V3D_TFU_IIS, args->iis);
189
        V3D_WRITE(V3D_TFU_ICA, args->ica);
190
        V3D_WRITE(V3D_TFU_IUA, args->iua);
191
        V3D_WRITE(V3D_TFU_IOA, args->ioa);
192
        V3D_WRITE(V3D_TFU_IOS, args->ios);
193
        V3D_WRITE(V3D_TFU_COEF0, args->coef[0]);
194
        V3D_WRITE(V3D_TFU_COEF1, args->coef[1]);
195
        V3D_WRITE(V3D_TFU_COEF2, args->coef[2]);
196
        V3D_WRITE(V3D_TFU_COEF3, args->coef[3]);
197

198
        V3D_WRITE(V3D_TFU_ICFG, args->icfg);
199

200
        while ((V3D_READ(V3D_TFU_CS) & V3D_TFU_CS_CVTCT_SET) == last_vtct) {
201
                v3d_hw_tick(v3d);
202
        }
203

204
        return 0;
205
}
206

207
#if V3D_VERSION >= 41
208
int
209
v3dX(simulator_submit_csd_ioctl)(struct v3d_hw *v3d,
210
                                 struct drm_v3d_submit_csd *args,
211
                                 uint32_t gmp_ofs)
212
{
213
        int last_completed_jobs = (V3D_READ(V3D_CSD_0_STATUS) &
214
                                   V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET);
215
        g_gmp_ofs = gmp_ofs;
216
        v3d_reload_gmp(v3d);
217

218
        v3d_invalidate_caches(v3d);
219

220
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG1, args->cfg[1]);
221
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG2, args->cfg[2]);
222
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG3, args->cfg[3]);
223
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG4, args->cfg[4]);
224
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG5, args->cfg[5]);
225
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG6, args->cfg[6]);
226
        /* CFG0 kicks off the job */
227
        V3D_WRITE(V3D_CSD_0_QUEUED_CFG0, args->cfg[0]);
228

229
        /* Now we wait for the dispatch to finish. The safest way is to check
230
         * if NUM_COMPLETED_JOBS has increased. Note that in spite of that
231
         * name that register field is about the number of completed
232
         * dispatches.
233
         */
234
        while ((V3D_READ(V3D_CSD_0_STATUS) &
235
                V3D_CSD_0_STATUS_NUM_COMPLETED_JOBS_SET) == last_completed_jobs) {
236
                v3d_hw_tick(v3d);
237
        }
238

239
        v3d_flush_caches(v3d);
240

241
        return 0;
242
}
243
#endif
244

245
int
246
v3dX(simulator_get_param_ioctl)(struct v3d_hw *v3d,
247
                                struct drm_v3d_get_param *args)
248
{
249
        static const uint32_t reg_map[] = {
250
                [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_CTL_UIFCFG,
251
                [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_CTL_IDENT1,
252
                [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_CTL_IDENT2,
253
                [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_CTL_IDENT3,
254
                [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_0_IDENT0,
255
                [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_0_IDENT1,
256
                [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_0_IDENT2,
257
        };
258

259
        switch (args->param) {
260
        case DRM_V3D_PARAM_SUPPORTS_TFU:
261
                args->value = 1;
262
                return 0;
263
        case DRM_V3D_PARAM_SUPPORTS_CSD:
264
                args->value = V3D_VERSION >= 41;
265
                return 0;
266
        case DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH:
267
                args->value = 1;
268
                return 0;
269
        }
270

271
        if (args->param < ARRAY_SIZE(reg_map) && reg_map[args->param]) {
272
                args->value = V3D_READ(reg_map[args->param]);
273
                return 0;
274
        }
275

276
        fprintf(stderr, "Unknown DRM_IOCTL_V3D_GET_PARAM(%lld)\n",
277
                (long long)args->value);
278
        abort();
279
}
280

281
static struct v3d_hw *v3d_isr_hw;
282

283

284
static void
285
v3d_isr_core(struct v3d_hw *v3d,
286
             unsigned core)
287
{
288
        /* FIXME: so far we are assuming just one core, and using only the _0_
289
         * registers. If we add multiple-core on the simulator, we would need
290
         * to pass core as a parameter, and chose the proper registers.
291
         */
292
        assert(core == 0);
293
        uint32_t core_status = V3D_READ(V3D_CTL_0_INT_STS);
294
        V3D_WRITE(V3D_CTL_0_INT_CLR, core_status);
295

296
        if (core_status & V3D_CTL_0_INT_STS_INT_OUTOMEM_SET) {
297
                uint32_t size = 256 * 1024;
298
                uint32_t offset = v3d_simulator_get_spill(size);
299

300
                v3d_reload_gmp(v3d);
301

302
                V3D_WRITE(V3D_PTB_0_BPOA, offset);
303
                V3D_WRITE(V3D_PTB_0_BPOS, size);
304
                return;
305
        }
306

307
        if (core_status & V3D_CTL_0_INT_STS_INT_GMPV_SET) {
308
                fprintf(stderr, "GMP violation at 0x%08x\n",
309
                        V3D_READ(V3D_GMP_VIO_ADDR));
310
                abort();
311
        } else {
312
                fprintf(stderr,
313
                        "Unexpected ISR with core status 0x%08x\n",
314
                        core_status);
315
        }
316
        abort();
317
}
318

319
static void
320
handle_mmu_interruptions(struct v3d_hw *v3d,
321
                         uint32_t hub_status)
322
{
323
        bool wrv = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET;
324
        bool pti = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET;
325
        bool cap = hub_status & V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET;
326

327
        if (!(pti || cap || wrv))
328
                return;
329

330
        const char *client = "?";
331
        uint32_t axi_id = V3D_READ(V3D_MMU_VIO_ID);
332
        uint32_t va_width = 30;
333

334
#if V3D_VERSION >= 41
335
        static const char *const v3d41_axi_ids[] = {
336
                "L2T",
337
                "PTB",
338
                "PSE",
339
                "TLB",
340
                "CLE",
341
                "TFU",
342
                "MMU",
343
                "GMP",
344
        };
345

346
        axi_id = axi_id >> 5;
347
        if (axi_id < ARRAY_SIZE(v3d41_axi_ids))
348
                client = v3d41_axi_ids[axi_id];
349

350
        uint32_t mmu_debug = V3D_READ(V3D_MMU_DEBUG_INFO);
351

352
        va_width += ((mmu_debug & V3D_MMU_DEBUG_INFO_VA_WIDTH_SET)
353
                     >> V3D_MMU_DEBUG_INFO_VA_WIDTH_LSB);
354
#endif
355
        /* Only the top bits (final number depends on the gen) of the virtual
356
         * address are reported in the MMU VIO_ADDR register.
357
         */
358
        uint64_t vio_addr = ((uint64_t)V3D_READ(V3D_MMU_VIO_ADDR) <<
359
                             (va_width - 32));
360

361
        /* Difference with the kernal: here were are going to abort after
362
         * logging, so we don't bother with some stuff that the kernel does,
363
         * like restoring the MMU ctrl bits
364
         */
365

366
        fprintf(stderr, "MMU error from client %s (%d) at 0x%llx%s%s%s\n",
367
                client, axi_id, (long long) vio_addr,
368
                wrv ? ", write violation" : "",
369
                pti ? ", pte invalid" : "",
370
                cap ? ", cap exceeded" : "");
371

372
        abort();
373
}
374

375
static void
376
v3d_isr_hub(struct v3d_hw *v3d)
377
{
378
        uint32_t hub_status = V3D_READ(V3D_HUB_CTL_INT_STS);
379

380
        /* Acknowledge the interrupts we're handling here */
381
        V3D_WRITE(V3D_HUB_CTL_INT_CLR, hub_status);
382

383
        if (hub_status & V3D_HUB_CTL_INT_STS_INT_TFUC_SET) {
384
                /* FIXME: we were not able to raise this exception. We let the
385
                 * unreachable here, so we could get one if it is raised on
386
                 * the future. In any case, note that for this case we would
387
                 * only be doing debugging log.
388
                 */
389
                unreachable("TFU Conversion Complete interrupt not handled");
390
        }
391

392
        handle_mmu_interruptions(v3d, hub_status);
393
}
394

395
static void
396
v3d_isr(uint32_t hub_status)
397
{
398
        struct v3d_hw *v3d = v3d_isr_hw;
399
        uint32_t mask = hub_status;
400

401
        /* Check the hub_status bits */
402
        while (mask) {
403
                unsigned core = u_bit_scan(&mask);
404

405
                if (core == v3d_hw_get_hub_core())
406
                        v3d_isr_hub(v3d);
407
                else
408
                        v3d_isr_core(v3d, core);
409
        }
410

411
        return;
412
}
413

414
void
415
v3dX(simulator_init_regs)(struct v3d_hw *v3d)
416
{
417
#if V3D_VERSION == 33
418
        /* Set OVRTMUOUT to match kernel behavior.
419
         *
420
         * This means that the texture sampler uniform configuration's tmu
421
         * output type field is used, instead of using the hardware default
422
         * behavior based on the texture type.  If you want the default
423
         * behavior, you can still put "2" in the indirect texture state's
424
         * output_type field.
425
         */
426
        V3D_WRITE(V3D_CTL_0_MISCCFG, V3D_CTL_1_MISCCFG_OVRTMUOUT_SET);
427
#endif
428

429
        /* FIXME: the kernel captures some additional core interrupts here,
430
         * for tracing. Perhaps we should evaluate to do the same here and add
431
         * some debug options.
432
         */
433
        uint32_t core_interrupts = (V3D_CTL_0_INT_STS_INT_GMPV_SET |
434
                                    V3D_CTL_0_INT_STS_INT_OUTOMEM_SET);
435
        V3D_WRITE(V3D_CTL_0_INT_MSK_SET, ~core_interrupts);
436
        V3D_WRITE(V3D_CTL_0_INT_MSK_CLR, core_interrupts);
437

438
        uint32_t hub_interrupts =
439
           (V3D_HUB_CTL_INT_STS_INT_MMU_WRV_SET |  /* write violation */
440
            V3D_HUB_CTL_INT_STS_INT_MMU_PTI_SET |  /* page table invalid */
441
            V3D_HUB_CTL_INT_STS_INT_MMU_CAP_SET |  /* CAP exceeded */
442
            V3D_HUB_CTL_INT_STS_INT_TFUC_SET); /* TFU conversion */
443

444
        V3D_WRITE(V3D_HUB_CTL_INT_MSK_SET, ~hub_interrupts);
445
        V3D_WRITE(V3D_HUB_CTL_INT_MSK_CLR, hub_interrupts);
446

447
        v3d_isr_hw = v3d;
448
        v3d_hw_set_isr(v3d, v3d_isr);
449
}
450

451
void
452
v3dX(simulator_submit_cl_ioctl)(struct v3d_hw *v3d,
453
                                struct drm_v3d_submit_cl *submit,
454
                                uint32_t gmp_ofs)
455
{
456
        int last_bfc = (V3D_READ(V3D_CLE_0_BFC) &
457
                        V3D_CLE_0_BFC_BMFCT_SET);
458

459
        int last_rfc = (V3D_READ(V3D_CLE_0_RFC) &
460
                        V3D_CLE_0_RFC_RMFCT_SET);
461

462
        g_gmp_ofs = gmp_ofs;
463
        v3d_reload_gmp(v3d);
464

465
        v3d_invalidate_caches(v3d);
466

467
        if (submit->qma) {
468
                V3D_WRITE(V3D_CLE_0_CT0QMA, submit->qma);
469
                V3D_WRITE(V3D_CLE_0_CT0QMS, submit->qms);
470
        }
471
#if V3D_VERSION >= 41
472
        if (submit->qts) {
473
                V3D_WRITE(V3D_CLE_0_CT0QTS,
474
                          V3D_CLE_0_CT0QTS_CTQTSEN_SET |
475
                          submit->qts);
476
        }
477
#endif
478
        V3D_WRITE(V3D_CLE_0_CT0QBA, submit->bcl_start);
479
        V3D_WRITE(V3D_CLE_0_CT0QEA, submit->bcl_end);
480

481
        /* Wait for bin to complete before firing render.  The kernel's
482
         * scheduler implements this using the GPU scheduler blocking on the
483
         * bin fence completing.  (We don't use HW semaphores).
484
         */
485
        while ((V3D_READ(V3D_CLE_0_BFC) &
486
                V3D_CLE_0_BFC_BMFCT_SET) == last_bfc) {
487
                v3d_hw_tick(v3d);
488
        }
489

490
        v3d_invalidate_caches(v3d);
491

492
        V3D_WRITE(V3D_CLE_0_CT1QBA, submit->rcl_start);
493
        V3D_WRITE(V3D_CLE_0_CT1QEA, submit->rcl_end);
494

495
        while ((V3D_READ(V3D_CLE_0_RFC) &
496
                V3D_CLE_0_RFC_RMFCT_SET) == last_rfc) {
497
                v3d_hw_tick(v3d);
498
        }
499
}
500

501
#endif /* USE_V3D_SIMULATOR */
502

503
Product

Resources

Company