CoCalc -- qpu_validate.c

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/compiler/qpu_validate.c
⁴⁵⁶⁴ views
1
/*
2
 * Copyright © 2014 Broadcom
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
/**
25
 * @file
26
 *
27
 * Validates the QPU instruction sequence after register allocation and
28
 * scheduling.
29
 */
30

31
#include <assert.h>
32
#include <stdio.h>
33
#include <stdlib.h>
34
#include "v3d_compiler.h"
35
#include "qpu/qpu_disasm.h"
36

37
struct v3d_qpu_validate_state {
38
        struct v3d_compile *c;
39
        const struct v3d_qpu_instr *last;
40
        int ip;
41
        int last_sfu_write;
42
        int last_branch_ip;
43
        int last_thrsw_ip;
44

45
        /* Set when we've found the last-THRSW signal, or if we were started
46
         * in single-segment mode.
47
         */
48
        bool last_thrsw_found;
49

50
        /* Set when we've found the THRSW after the last THRSW */
51
        bool thrend_found;
52

53
        int thrsw_count;
54
};
55

56
static void
57
fail_instr(struct v3d_qpu_validate_state *state, const char *msg)
58
{
59
        struct v3d_compile *c = state->c;
60

61
        fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);
62

63
        int dump_ip = 0;
64
        vir_for_each_inst_inorder(inst, c) {
65
                v3d_qpu_dump(c->devinfo, &inst->qpu);
66

67
                if (dump_ip++ == state->ip)
68
                        fprintf(stderr, " *** ERROR ***");
69

70
                fprintf(stderr, "\n");
71
        }
72

73
        fprintf(stderr, "\n");
74
        abort();
75
}
76

77
static bool
78
in_branch_delay_slots(struct v3d_qpu_validate_state *state)
79
{
80
        return (state->ip - state->last_branch_ip) < 3;
81
}
82

83
static bool
84
in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)
85
{
86
        return (state->ip - state->last_thrsw_ip) < 3;
87
}
88

89
static bool
90
qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,
91
                        bool (*predicate)(enum v3d_qpu_waddr waddr))
92
{
93
        if (inst->type == V3D_QPU_INSTR_TYPE_ALU)
94
                return false;
95

96
        if (inst->alu.add.op != V3D_QPU_A_NOP &&
97
            inst->alu.add.magic_write &&
98
            predicate(inst->alu.add.waddr))
99
                return true;
100

101
        if (inst->alu.mul.op != V3D_QPU_M_NOP &&
102
            inst->alu.mul.magic_write &&
103
            predicate(inst->alu.mul.waddr))
104
                return true;
105

106
        return false;
107
}
108

109
static void
110
qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)
111
{
112
        const struct v3d_device_info *devinfo = state->c->devinfo;
113
        const struct v3d_qpu_instr *inst = &qinst->qpu;
114

115
        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
116
                return;
117

118
        /* LDVARY writes r5 two instructions later and LDUNIF writes
119
         * r5 one instruction later, which is illegal to have
120
         * together.
121
         */
122
        if (state->last && state->last->sig.ldvary &&
123
            (inst->sig.ldunif || inst->sig.ldunifa)) {
124
                fail_instr(state, "LDUNIF after a LDVARY");
125
        }
126

127
        /* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)
128
         *
129
         * FIXME: This would not check correctly for V3D 4.2 versions lower
130
         * than V3D 4.2.14, but that is not a real issue because the simulator
131
         * will still catch this, and we are not really targetting any such
132
         * versions anyway.
133
         */
134
        if (state->c->devinfo->ver < 42) {
135
                bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||
136
                                                          state->last->sig.ldunifrf));
137
                bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||
138
                                                           state->last->sig.ldunifarf));
139
                bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;
140
                bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;
141
                if ((last_reads_ldunif && reads_ldunifa) ||
142
                    (last_reads_ldunifa && reads_ldunif)) {
143
                        fail_instr(state,
144
                                   "LDUNIF and LDUNIFA can't be next to each other");
145
                }
146
        }
147

148
        int tmu_writes = 0;
149
        int sfu_writes = 0;
150
        int vpm_writes = 0;
151
        int tlb_writes = 0;
152
        int tsy_writes = 0;
153

154
        if (inst->alu.add.op != V3D_QPU_A_NOP) {
155
                if (inst->alu.add.magic_write) {
156
                        if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
157
                                                       inst->alu.add.waddr)) {
158
                                tmu_writes++;
159
                        }
160
                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))
161
                                sfu_writes++;
162
                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))
163
                                vpm_writes++;
164
                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))
165
                                tlb_writes++;
166
                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))
167
                                tsy_writes++;
168
                }
169
        }
170

171
        if (inst->alu.mul.op != V3D_QPU_M_NOP) {
172
                if (inst->alu.mul.magic_write) {
173
                        if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,
174
                                                       inst->alu.mul.waddr)) {
175
                                tmu_writes++;
176
                        }
177
                        if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))
178
                                sfu_writes++;
179
                        if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))
180
                                vpm_writes++;
181
                        if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))
182
                                tlb_writes++;
183
                        if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))
184
                                tsy_writes++;
185
                }
186
        }
187

188
        if (in_thrsw_delay_slots(state)) {
189
                /* There's no way you want to start SFU during the THRSW delay
190
                 * slots, since the result would land in the other thread.
191
                 */
192
                if (sfu_writes) {
193
                        fail_instr(state,
194
                                   "SFU write started during THRSW delay slots ");
195
                }
196

197
                if (inst->sig.ldvary)
198
                        fail_instr(state, "LDVARY during THRSW delay slots");
199
        }
200

201
        (void)qpu_magic_waddr_matches; /* XXX */
202

203
        /* SFU r4 results come back two instructions later.  No doing
204
         * r4 read/writes or other SFU lookups until it's done.
205
         */
206
        if (state->ip - state->last_sfu_write < 2) {
207
                if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))
208
                        fail_instr(state, "R4 read too soon after SFU");
209

210
                if (v3d_qpu_writes_r4(devinfo, inst))
211
                        fail_instr(state, "R4 write too soon after SFU");
212

213
                if (sfu_writes)
214
                        fail_instr(state, "SFU write too soon after SFU");
215
        }
216

217
        /* XXX: The docs say VPM can happen with the others, but the simulator
218
         * disagrees.
219
         */
220
        if (tmu_writes +
221
            sfu_writes +
222
            vpm_writes +
223
            tlb_writes +
224
            tsy_writes +
225
            inst->sig.ldtmu +
226
            inst->sig.ldtlb +
227
            inst->sig.ldvpm +
228
            inst->sig.ldtlbu > 1) {
229
                fail_instr(state,
230
                           "Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");
231
        }
232

233
        if (sfu_writes)
234
                state->last_sfu_write = state->ip;
235

236
        if (inst->sig.thrsw) {
237
                if (in_branch_delay_slots(state))
238
                        fail_instr(state, "THRSW in a branch delay slot.");
239

240
                if (state->last_thrsw_found)
241
                        state->thrend_found = true;
242

243
                if (state->last_thrsw_ip == state->ip - 1) {
244
                        /* If it's the second THRSW in a row, then it's just a
245
                         * last-thrsw signal.
246
                         */
247
                        if (state->last_thrsw_found)
248
                                fail_instr(state, "Two last-THRSW signals");
249
                        state->last_thrsw_found = true;
250
                } else {
251
                        if (in_thrsw_delay_slots(state)) {
252
                                fail_instr(state,
253
                                           "THRSW too close to another THRSW.");
254
                        }
255
                        state->thrsw_count++;
256
                        state->last_thrsw_ip = state->ip;
257
                }
258
        }
259

260
        if (state->thrend_found &&
261
            state->last_thrsw_ip - state->ip <= 2 &&
262
            inst->type == V3D_QPU_INSTR_TYPE_ALU) {
263
                if ((inst->alu.add.op != V3D_QPU_A_NOP &&
264
                     !inst->alu.add.magic_write)) {
265
                        fail_instr(state, "RF write after THREND");
266
                }
267

268
                if ((inst->alu.mul.op != V3D_QPU_M_NOP &&
269
                     !inst->alu.mul.magic_write)) {
270
                        fail_instr(state, "RF write after THREND");
271
                }
272

273
                if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
274
                    !inst->sig_magic) {
275
                        fail_instr(state, "RF write after THREND");
276
                }
277

278
                /* GFXH-1625: No TMUWT in the last instruction */
279
                if (state->last_thrsw_ip - state->ip == 2 &&
280
                    inst->alu.add.op == V3D_QPU_A_TMUWT)
281
                        fail_instr(state, "TMUWT in last instruction");
282
        }
283

284
        if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
285
                if (in_branch_delay_slots(state))
286
                        fail_instr(state, "branch in a branch delay slot.");
287
                if (in_thrsw_delay_slots(state))
288
                        fail_instr(state, "branch in a THRSW delay slot.");
289
                state->last_branch_ip = state->ip;
290
        }
291
}
292

293
static void
294
qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)
295
{
296
        vir_for_each_inst(qinst, block) {
297
                qpu_validate_inst(state, qinst);
298

299
                state->last = &qinst->qpu;
300
                state->ip++;
301
        }
302
}
303

304
/**
305
 * Checks for the instruction restrictions from page 37 ("Summary of
306
 * Instruction Restrictions").
307
 */
308
void
309
qpu_validate(struct v3d_compile *c)
310
{
311
        /* We don't want to do validation in release builds, but we want to
312
         * keep compiling the validation code to make sure it doesn't get
313
         * broken.
314
         */
315
#ifndef DEBUG
316
        return;
317
#endif
318

319
        struct v3d_qpu_validate_state state = {
320
                .c = c,
321
                .last_sfu_write = -10,
322
                .last_thrsw_ip = -10,
323
                .last_branch_ip = -10,
324
                .ip = 0,
325

326
                .last_thrsw_found = !c->last_thrsw,
327
        };
328

329
        vir_for_each_block(block, c) {
330
                qpu_validate_block(&state, block);
331
        }
332

333
        if (state.thrsw_count > 1 && !state.last_thrsw_found) {
334
                fail_instr(&state,
335
                           "thread switch found without last-THRSW in program");
336
        }
337

338
        if (!state.thrend_found)
339
                fail_instr(&state, "No program-end THRSW found");
340
}
341

342
Product

Resources

Company