Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/afuc/emu.c
4564 views
1
/*
2
* Copyright © 2021 Google, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include <assert.h>
25
#include <ctype.h>
26
#include <errno.h>
27
#include <stdio.h>
28
#include <stdlib.h>
29
#include <string.h>
30
#include <sys/mman.h>
31
#include <unistd.h>
32
33
#include "util/u_math.h"
34
35
#include "freedreno_pm4.h"
36
37
#include "emu.h"
38
#include "util.h"
39
40
#define rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
41
#define rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
42
43
/**
44
* AFUC emulator. Currently only supports a6xx
45
*
46
* TODO to add a5xx it might be easier to compile this multiple times
47
* with conditional compile to deal with differences between generations.
48
*/
49
50
static uint32_t
51
emu_alu(struct emu *emu, afuc_opc opc, uint32_t src1, uint32_t src2)
52
{
53
uint64_t tmp;
54
switch (opc) {
55
case OPC_ADD:
56
tmp = (uint64_t)src1 + (uint64_t)src2;
57
emu->carry = tmp >> 32;
58
return (uint32_t)tmp;
59
case OPC_ADDHI:
60
return src1 + src2 + emu->carry;
61
case OPC_SUB:
62
tmp = (uint64_t)src1 - (uint64_t)src2;
63
emu->carry = tmp >> 32;
64
return (uint32_t)tmp;
65
case OPC_SUBHI:
66
return src1 - src2 + emu->carry;
67
case OPC_AND:
68
return src1 & src2;
69
case OPC_OR:
70
return src1 | src2;
71
case OPC_XOR:
72
return src1 ^ src2;
73
case OPC_NOT:
74
return ~src1;
75
case OPC_SHL:
76
return src1 << src2;
77
case OPC_USHR:
78
return src1 >> src2;
79
case OPC_ISHR:
80
return (int32_t)src1 >> src2;
81
case OPC_ROT:
82
if (src2 & 0x80000000)
83
return rotl64(src1, -*(int32_t *)&src2);
84
else
85
return rotl32(src1, src2);
86
case OPC_MUL8:
87
return (src1 & 0xff) * (src2 & 0xff);
88
case OPC_MIN:
89
return MIN2(src1, src2);
90
case OPC_MAX:
91
return MAX2(src1, src2);
92
case OPC_CMP:
93
if (src1 > src2)
94
return 0x00;
95
else if (src1 == src2)
96
return 0x2b;
97
return 0x1e;
98
case OPC_MSB:
99
if (!src2)
100
return 0;
101
return util_last_bit(src2) - 1;
102
default:
103
printf("unhandled alu opc: 0x%02x\n", opc);
104
exit(1);
105
}
106
}
107
108
/**
109
* Helper to calculate load/store address based on LOAD_STORE_HI
110
*/
111
static uintptr_t
112
load_store_addr(struct emu *emu, unsigned gpr)
113
{
114
EMU_CONTROL_REG(LOAD_STORE_HI);
115
116
uintptr_t addr = emu_get_reg32(emu, &LOAD_STORE_HI);
117
addr <<= 32;
118
119
return addr + emu_get_gpr_reg(emu, gpr);
120
}
121
122
static void
123
emu_instr(struct emu *emu, afuc_instr *instr)
124
{
125
uint32_t rem = emu_get_gpr_reg(emu, REG_REM);
126
afuc_opc opc;
127
bool rep;
128
129
afuc_get_opc(instr, &opc, &rep);
130
131
switch (opc) {
132
case OPC_NOP:
133
break;
134
case OPC_ADD ... OPC_CMP: {
135
uint32_t val = emu_alu(emu, opc,
136
emu_get_gpr_reg(emu, instr->alui.src),
137
instr->alui.uimm);
138
emu_set_gpr_reg(emu, instr->alui.dst, val);
139
break;
140
}
141
case OPC_MOVI: {
142
uint32_t val = instr->movi.uimm << instr->movi.shift;
143
emu_set_gpr_reg(emu, instr->movi.dst, val);
144
break;
145
}
146
case OPC_ALU: {
147
uint32_t val = emu_alu(emu, instr->alu.alu,
148
emu_get_gpr_reg(emu, instr->alu.src1),
149
emu_get_gpr_reg(emu, instr->alu.src2));
150
emu_set_gpr_reg(emu, instr->alu.dst, val);
151
152
if (instr->alu.xmov) {
153
unsigned m = MIN2(instr->alu.xmov, rem);
154
155
assert(m <= 3);
156
157
if (m == 1) {
158
emu_set_gpr_reg(emu, REG_REM, --rem);
159
emu_dump_state_change(emu);
160
emu_set_gpr_reg(emu, REG_DATA,
161
emu_get_gpr_reg(emu, instr->alu.src2));
162
} else if (m == 2) {
163
emu_set_gpr_reg(emu, REG_REM, --rem);
164
emu_dump_state_change(emu);
165
emu_set_gpr_reg(emu, REG_DATA,
166
emu_get_gpr_reg(emu, instr->alu.src2));
167
emu_set_gpr_reg(emu, REG_REM, --rem);
168
emu_dump_state_change(emu);
169
emu_set_gpr_reg(emu, REG_DATA,
170
emu_get_gpr_reg(emu, instr->alu.src2));
171
} else if (m == 3) {
172
emu_set_gpr_reg(emu, REG_REM, --rem);
173
emu_dump_state_change(emu);
174
emu_set_gpr_reg(emu, REG_DATA,
175
emu_get_gpr_reg(emu, instr->alu.src2));
176
emu_set_gpr_reg(emu, REG_REM, --rem);
177
emu_dump_state_change(emu);
178
emu_set_gpr_reg(emu, instr->alu.dst,
179
emu_get_gpr_reg(emu, instr->alu.src2));
180
emu_set_gpr_reg(emu, REG_REM, --rem);
181
emu_dump_state_change(emu);
182
emu_set_gpr_reg(emu, REG_DATA,
183
emu_get_gpr_reg(emu, instr->alu.src2));
184
}
185
}
186
break;
187
}
188
case OPC_CWRITE6: {
189
uint32_t src1 = emu_get_gpr_reg(emu, instr->control.src1);
190
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
191
192
if (instr->control.flags == 0x4) {
193
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
194
} else if (instr->control.flags && !emu->quiet) {
195
printf("unhandled flags: %x\n", instr->control.flags);
196
}
197
198
emu_set_control_reg(emu, src2 + instr->control.uimm, src1);
199
break;
200
}
201
case OPC_CREAD6: {
202
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
203
204
if (instr->control.flags == 0x4) {
205
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
206
} else if (instr->control.flags && !emu->quiet) {
207
printf("unhandled flags: %x\n", instr->control.flags);
208
}
209
210
emu_set_gpr_reg(emu, instr->control.src1,
211
emu_get_control_reg(emu, src2 + instr->control.uimm));
212
break;
213
}
214
case OPC_LOAD6: {
215
uintptr_t addr = load_store_addr(emu, instr->control.src2) +
216
instr->control.uimm;
217
218
if (instr->control.flags == 0x4) {
219
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
220
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
221
} else if (instr->control.flags && !emu->quiet) {
222
printf("unhandled flags: %x\n", instr->control.flags);
223
}
224
225
uint32_t val = emu_mem_read_dword(emu, addr);
226
227
emu_set_gpr_reg(emu, instr->control.src1, val);
228
229
break;
230
}
231
case OPC_STORE6: {
232
uintptr_t addr = load_store_addr(emu, instr->control.src2) +
233
instr->control.uimm;
234
235
if (instr->control.flags == 0x4) {
236
uint32_t src2 = emu_get_gpr_reg(emu, instr->control.src2);
237
emu_set_gpr_reg(emu, instr->control.src2, src2 + instr->control.uimm);
238
} else if (instr->control.flags && !emu->quiet) {
239
printf("unhandled flags: %x\n", instr->control.flags);
240
}
241
242
uint32_t val = emu_get_gpr_reg(emu, instr->control.src1);
243
244
emu_mem_write_dword(emu, addr, val);
245
246
break;
247
}
248
case OPC_BRNEI ... OPC_BREQB: {
249
uint32_t off = emu->gpr_regs.pc + instr->br.ioff;
250
uint32_t src = emu_get_gpr_reg(emu, instr->br.src);
251
252
if (opc == OPC_BRNEI) {
253
if (src != instr->br.bit_or_imm)
254
emu->branch_target = off;
255
} else if (opc == OPC_BREQI) {
256
if (src == instr->br.bit_or_imm)
257
emu->branch_target = off;
258
} else if (opc == OPC_BRNEB) {
259
if (!(src & (1 << instr->br.bit_or_imm)))
260
emu->branch_target = off;
261
} else if (opc == OPC_BREQB) {
262
if (src & (1 << instr->br.bit_or_imm))
263
emu->branch_target = off;
264
} else {
265
assert(0);
266
}
267
break;
268
}
269
case OPC_RET: {
270
assert(emu->call_stack_idx > 0);
271
272
/* counter-part to 'call' instruction, also has a delay slot: */
273
emu->branch_target = emu->call_stack[--emu->call_stack_idx];
274
275
break;
276
}
277
case OPC_CALL: {
278
assert(emu->call_stack_idx < ARRAY_SIZE(emu->call_stack));
279
280
/* call looks to have same delay-slot behavior as branch/etc, so
281
* presumably the return PC is two instructions later:
282
*/
283
emu->call_stack[emu->call_stack_idx++] = emu->gpr_regs.pc + 2;
284
emu->branch_target = instr->call.uoff;
285
286
break;
287
}
288
case OPC_WIN: {
289
assert(!emu->branch_target);
290
emu->run_mode = false;
291
emu->waitin = true;
292
break;
293
}
294
/* OPC_PREEMPTLEAVE6 */
295
case OPC_SETSECURE: {
296
// TODO this acts like a conditional branch, but in which case
297
// does it branch?
298
break;
299
}
300
default:
301
printf("unhandled opc: 0x%02x\n", opc);
302
exit(1);
303
}
304
305
if (rep) {
306
assert(rem > 0);
307
emu_set_gpr_reg(emu, REG_REM, --rem);
308
}
309
}
310
311
void
312
emu_step(struct emu *emu)
313
{
314
afuc_instr *instr = (void *)&emu->instrs[emu->gpr_regs.pc];
315
afuc_opc opc;
316
bool rep;
317
318
emu_main_prompt(emu);
319
320
uint32_t branch_target = emu->branch_target;
321
emu->branch_target = 0;
322
323
bool waitin = emu->waitin;
324
emu->waitin = false;
325
326
afuc_get_opc(instr, &opc, &rep);
327
328
if (rep) {
329
do {
330
if (!emu_get_gpr_reg(emu, REG_REM))
331
break;
332
333
emu_clear_state_change(emu);
334
emu_instr(emu, instr);
335
336
/* defer last state-change dump until after any
337
* post-delay-slot handling below:
338
*/
339
if (emu_get_gpr_reg(emu, REG_REM))
340
emu_dump_state_change(emu);
341
} while (true);
342
} else {
343
emu_clear_state_change(emu);
344
emu_instr(emu, instr);
345
}
346
347
emu->gpr_regs.pc++;
348
349
if (branch_target) {
350
emu->gpr_regs.pc = branch_target;
351
}
352
353
if (waitin) {
354
uint32_t hdr = emu_get_gpr_reg(emu, 1);
355
uint32_t id, count;
356
357
if (pkt_is_type4(hdr)) {
358
id = afuc_pm4_id("PKT4");
359
count = type4_pkt_size(hdr);
360
361
/* Possibly a hack, not sure what the hw actually
362
* does here, but we want to mask out the pkt
363
* type field from the hdr, so that PKT4 handler
364
* doesn't see it and interpret it as part as the
365
* register offset:
366
*/
367
emu->gpr_regs.val[1] &= 0x0fffffff;
368
} else if (pkt_is_type7(hdr)) {
369
id = cp_type7_opcode(hdr);
370
count = type7_pkt_size(hdr);
371
} else {
372
printf("Invalid opcode: 0x%08x\n", hdr);
373
exit(1); /* GPU goes *boom* */
374
}
375
376
assert(id < ARRAY_SIZE(emu->jmptbl));
377
378
emu_set_gpr_reg(emu, REG_REM, count);
379
emu->gpr_regs.pc = emu->jmptbl[id];
380
}
381
382
emu_dump_state_change(emu);
383
}
384
385
void
386
emu_run_bootstrap(struct emu *emu)
387
{
388
EMU_CONTROL_REG(PACKET_TABLE_WRITE_ADDR);
389
390
emu->quiet = true;
391
emu->run_mode = true;
392
393
while (emu_get_reg32(emu, &PACKET_TABLE_WRITE_ADDR) < 0x80) {
394
emu_step(emu);
395
}
396
}
397
398
399
static void
400
check_access(struct emu *emu, uintptr_t gpuaddr, unsigned sz)
401
{
402
if ((gpuaddr % sz) != 0) {
403
printf("unaligned access fault: %p\n", (void *)gpuaddr);
404
exit(1);
405
}
406
407
if ((gpuaddr + sz) >= EMU_MEMORY_SIZE) {
408
printf("iova fault: %p\n", (void *)gpuaddr);
409
exit(1);
410
}
411
}
412
413
uint32_t
414
emu_mem_read_dword(struct emu *emu, uintptr_t gpuaddr)
415
{
416
check_access(emu, gpuaddr, 4);
417
return *(uint32_t *)(emu->gpumem + gpuaddr);
418
}
419
420
static void
421
mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
422
{
423
check_access(emu, gpuaddr, 4);
424
*(uint32_t *)(emu->gpumem + gpuaddr) = val;
425
}
426
427
void
428
emu_mem_write_dword(struct emu *emu, uintptr_t gpuaddr, uint32_t val)
429
{
430
mem_write_dword(emu, gpuaddr, val);
431
assert(emu->gpumem_written == ~0);
432
emu->gpumem_written = gpuaddr;
433
}
434
435
void
436
emu_init(struct emu *emu)
437
{
438
emu->gpumem = mmap(NULL, EMU_MEMORY_SIZE,
439
PROT_READ | PROT_WRITE,
440
MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE,
441
0, 0);
442
if (emu->gpumem == MAP_FAILED) {
443
printf("Could not allocate GPU memory: %s\n", strerror(errno));
444
exit(1);
445
}
446
447
/* Copy the instructions into GPU memory: */
448
for (unsigned i = 0; i < emu->sizedwords; i++) {
449
mem_write_dword(emu, EMU_INSTR_BASE + (4 * i), emu->instrs[i]);
450
}
451
452
EMU_GPU_REG(CP_SQE_INSTR_BASE);
453
EMU_GPU_REG(CP_LPAC_SQE_INSTR_BASE);
454
455
/* Setup the address of the SQE fw, just use the normal CPU ptr address: */
456
if (emu->lpac) {
457
emu_set_reg64(emu, &CP_LPAC_SQE_INSTR_BASE, EMU_INSTR_BASE);
458
} else {
459
emu_set_reg64(emu, &CP_SQE_INSTR_BASE, EMU_INSTR_BASE);
460
}
461
462
if (emu->gpu_id == 660) {
463
emu_set_control_reg(emu, 0, 3 << 28);
464
} else if (emu->gpu_id == 650) {
465
emu_set_control_reg(emu, 0, 1 << 28);
466
}
467
}
468
469
void
470
emu_fini(struct emu *emu)
471
{
472
uint32_t *instrs = emu->instrs;
473
unsigned sizedwords = emu->sizedwords;
474
unsigned gpu_id = emu->gpu_id;
475
476
munmap(emu->gpumem, EMU_MEMORY_SIZE);
477
memset(emu, 0, sizeof(*emu));
478
479
emu->instrs = instrs;
480
emu->sizedwords = sizedwords;
481
emu->gpu_id = gpu_id;
482
}
483
484