CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/ext/disarm.cpp
Views: 1401
1
/* disarm -- a simple disassembler for ARM instructions
2
* (c) 2000 Gareth McCaughan
3
*
4
* This file may be distributed and used freely provided:
5
* 1. You do not distribute any version that lacks this
6
* copyright notice (exactly as it appears here, extending
7
* from the start to the end of the C-language comment
8
* containing these words)); and,
9
* 2. If you distribute any modified version, its source
10
* contains a clear description of the ways in which
11
* it differs from the original version, and a clear
12
* indication that the changes are not mine.
13
* There is no restriction on your permission to use and
14
* distribute object code or executable code derived from
15
* this.
16
*
17
* The original version of this file (or perhaps a later
18
* version by the original author) may or may not be
19
* available at http://web.ukonline.co.uk/g.mccaughan/g/software.html .
20
*
21
* Share and enjoy! -- g
22
*/
23
24
/* (*This* comment is NOT part of the notice mentioned in the
25
* distribution conditions above.)
26
*
27
* The bulk of this code was ripped brutally from the middle
28
* of a much more interesting piece of software whose purpose
29
* is to disassemble object files in the format known as AOF;
30
* it's quite clever at spotting blocks of non-code embedded
31
* in code, identifying labels, and so on.
32
*
33
* This program, on the other hand, is very much simpler.
34
* It simply disassembles one instruction at a time. Some
35
* traces of the original purpose can be seen here and there.
36
* You might want to make this do a two-phase disassembly,
37
* adding labels etc the second time around. I've made this
38
* work by loading the whole file into memory first, partly
39
* because that makes a two-pass approach easier.
40
*
41
* One word of warning: I believe that the syntax this program
42
* uses for the MSR instruction is now obsolete.
43
*
44
* Usage:
45
* disarm <filename> <base-address>
46
* will disassemble every word in <filename>.
47
*
48
* <base-address> should be something understood by strtol.
49
* So you can get hex (which is probably what you want)
50
* by prefixing "0x".
51
*
52
* The -r option will byte-reverse each word before it's
53
* disassembled.
54
*
55
* The code is rather unmaintainable. I'm sorry.
56
*
57
* Changes since original release:
58
* ????-??-?? v0.00 Initial release.
59
* 2007-09-02 v0.11 Change %X to %lX in a format string.
60
* (Thanks to Vincent Zweije for reporting this.)
61
*/
62
63
#ifdef __clang__
64
#pragma GCC diagnostic push
65
#pragma GCC diagnostic ignored "-Wtautological-compare" //used to avoid warning, force compiler to accept it.
66
#pragma GCC diagnostic ignored "-Wstring-plus-int"
67
#endif
68
69
#include "ppsspp_config.h"
70
#include <cstdio>
71
#include <cstdlib>
72
#include <cstring>
73
74
#include "Common/ArmEmitter.h"
75
#include "ext/disarm.h"
76
77
static const char *CCFlagsStr[] = {
78
"EQ", // Equal
79
"NEQ", // Not equal
80
"CS", // Carry Set
81
"CC", // Carry Clear
82
"MI", // Minus (Negative)
83
"PL", // Plus
84
"VS", // Overflow
85
"VC", // No Overflow
86
"HI", // Unsigned higher
87
"LS", // Unsigned lower or same
88
"GE", // Signed greater than or equal
89
"LT", // Signed less than
90
"GT", // Signed greater than
91
"LE", // Signed less than or equal
92
"", // Always (unconditional) 14
93
};
94
95
int GetVd(uint32_t op, bool quad = false, bool dbl = false) {
96
int val;
97
if (!quad && !dbl) {
98
val = ((op >> 22) & 1) | ((op >> 11) & 0x1E);
99
} else {
100
val = ((op >> 18) & 0x10) | ((op >> 12) & 0xF);
101
}
102
if (quad)
103
val >>= 1;
104
return val;
105
}
106
107
int GetVn(uint32_t op, bool quad = false, bool dbl = false) {
108
int val;
109
if (!quad && !dbl) {
110
val = ((op >> 7) & 1) | ((op >> 15) & 0x1E);
111
} else {
112
val = ((op >> 16) & 0xF) | ((op >> 3) & 0x10);
113
}
114
if (quad)
115
val >>= 1;
116
return val;
117
}
118
119
int GetVm(uint32_t op, bool quad = false, bool dbl = false) {
120
int val;
121
if (!quad && !dbl) {
122
val = ((op >> 5) & 1) | ((op << 1) & 0x1E);
123
} else {
124
val = ((op >> 1) & 0x10) | (op & 0xF);
125
}
126
if (quad)
127
val >>= 1;
128
return val;
129
}
130
131
132
// Modern VFP disassembler, written entirely separately because I can't figure out the old stuff :P
133
// Horrible array of hacks but hey. Can be cleaned up later.
134
135
bool DisasmVFP(uint32_t op, char *text) {
136
#if defined(__ANDROID__) && PPSSPP_ARCH(X86)
137
// Prevent linking errors with ArmEmitter which I've excluded on x86 android.
138
strcpy(text, "ARM disasm not available");
139
#else
140
const char *cond = CCFlagsStr[op >> 28];
141
switch ((op >> 24) & 0xF) {
142
case 0xC:
143
// VLDMIA/VSTMIA
144
{
145
bool single_reg = ((op >> 8) & 0xF) == 10;
146
int freg = ((op >> 11) & 0x1E) | ((op >> 22) & 1);
147
int base = (op >> 16) & 0xF;
148
bool load = (op >> 20) & 1;
149
bool writeback = (op >> 21) & 1;
150
int numregs = op & 0xF;
151
bool add = (op >> 23) & 1;
152
if (add && writeback && load && base == 13) {
153
if (single_reg)
154
sprintf(text, "VPOP%s {s%i-s%i}", cond, freg, freg-1+numregs);
155
else
156
sprintf(text, "VPOP%s {d%i-d%i}", cond, freg, freg-1+(numregs/2));
157
158
return true;
159
}
160
if (single_reg)
161
sprintf(text, "%s%s r%i%s, {s%i-s%i}", load ? "VLDMIA" : "VSTMIA", cond, base, writeback ? "!":"", freg, freg-1+numregs);
162
else
163
sprintf(text, "%s%s r%i%s, {d%i-d%i}", load ? "VLDMIA" : "VSTMIA", cond, base, writeback ? "!":"", freg, freg-1+(numregs/2));
164
165
return true;
166
}
167
case 0xD:
168
// VLDR/VSTR/VLDMDB/VSTMDB
169
{
170
bool single_reg = ((op >> 8) & 0xF) == 10;
171
int freg = ((op >> 11) & 0x1E) | ((op >> 22) & 1);
172
int base = (op >> 16) & 0xF;
173
bool load = (op >> 20) & 1;
174
bool add = (op >> 23) & 1;
175
bool writeback = (op >> 21) & 1;
176
if (writeback) { // Multiple
177
int numregs = op & 0xF;
178
if (!add && !load && base == 13) {
179
if (single_reg)
180
sprintf(text, "VPUSH%s {s%i-s%i}", cond, freg, freg-1+numregs);
181
else
182
sprintf(text, "VPUSH%s {d%i-d%i}", cond, freg, freg-1+(numregs/2));
183
184
return true;
185
}
186
187
if (single_reg)
188
sprintf(text, "%s%s r%i, {s%i-s%i}", load ? "VLDMDB" : "VSTMDB", cond, base, freg, freg-1+numregs);
189
else
190
sprintf(text, "%s%s r%i, {d%i-d%i}", load ? "VLDMDB" : "VSTMDB", cond, base, freg, freg-1+(numregs/2));
191
} else {
192
int offset = (op & 0xFF) << 2;
193
if (!add) offset = -offset;
194
sprintf(text, "%s%s s%i, [r%i, #%i]", load ? "VLDR" : "VSTR", cond, freg, base, offset);
195
}
196
197
return true;
198
}
199
200
case 0xE:
201
{
202
switch ((op >> 20) & 0xF) {
203
case 0xE: // VMSR
204
if ((op & 0xFFF) != 0xA10)
205
break;
206
sprintf(text, "VMSR%s r%i", cond, (op >> 12) & 0xF);
207
return true;
208
case 0xF: // VMRS
209
if ((op & 0xFFF) != 0xA10)
210
break;
211
if (op == 0xEEF1FA10) {
212
sprintf(text, "VMRS%s APSR", cond);
213
} else {
214
sprintf(text, "VMRS%s r%i", cond, (op >> 12) & 0xF);
215
}
216
return true;
217
default:
218
break;
219
}
220
221
if (((op >> 19) & 0x7) == 0x7) {
222
// VCVT
223
sprintf(text, "VCVT ...");
224
return true;
225
}
226
227
bool quad_reg = (op >> 6) & 1;
228
bool double_reg = (op >> 8) & 1;
229
char c = double_reg ? 'd' : 's';
230
231
int part1 = ((op >> 23) & 0x1F);
232
int part2 = ((op >> 9) & 0x7) ;
233
int part3 = ((op >> 20) & 0x3) ;
234
if (part3 == 3 && part2 == 5 && part1 == 0x1D) {
235
// VMOV, VCMP
236
int vn = GetVn(op);
237
if (vn != 1 && vn != 2 && vn != 3) {
238
int vm = GetVm(op, false, double_reg);
239
int vd = GetVd(op, false, double_reg);
240
241
const char *name = "VMOV";
242
if (op & 0x40000)
243
name = (op & 0x80) ? "VCMPE" : "VCMP";
244
sprintf(text, "%s%s %c%i, %c%i", name, cond, c, vd, c, vm);
245
return true;
246
}
247
}
248
249
// Moves between single precision registers and GPRs
250
if (((op >> 20) & 0xFFE) == 0xEE0) {
251
int vd = ((op >> 15) & 0x1E) | ((op >> 7) & 0x1);
252
int src = (op >> 12) & 0xF;
253
254
if (op & (1 << 20))
255
sprintf(text, "VMOV r%i, s%i", src, vd);
256
else
257
sprintf(text, "VMOV s%i, r%i", vd, src);
258
return true;
259
}
260
261
// Arithmetic
262
263
int opnum = -1;
264
int opc1 = (op >> 20) & 0xFB;
265
int opc2 = (op >> 4) & 0xAC;
266
for (int i = 0; i < 16; i++) {
267
// What the hell?
268
int fixed_opc2 = opc2;
269
if (!(ArmGen::VFPOps[i][0].opc2 & 0x8))
270
fixed_opc2 &= 0xA7;
271
if (ArmGen::VFPOps[i][0].opc1 == opc1 && ArmGen::VFPOps[i][0].opc2 == fixed_opc2) {
272
opnum = i;
273
break;
274
}
275
}
276
if (opnum < 0)
277
return false;
278
switch (opnum) {
279
case 8:
280
case 10:
281
case 11:
282
case 12:
283
case 13:
284
case 14:
285
{
286
quad_reg = false;
287
int vd = GetVd(op, quad_reg, double_reg);
288
int vn = GetVn(op, quad_reg, true);
289
int vm = GetVm(op, quad_reg, double_reg);
290
if (opnum == 8 && vn == 0x11)
291
opnum += 3;
292
sprintf(text, "%s%s %c%i, %c%i", ArmGen::VFPOpNames[opnum], cond, c, vd, c, vm);
293
return true;
294
}
295
default:
296
{
297
quad_reg = false;
298
int vd = GetVd(op, quad_reg, double_reg);
299
int vn = GetVn(op, quad_reg, double_reg);
300
int vm = GetVm(op, quad_reg, double_reg);
301
sprintf(text, "%s%s %c%i, %c%i, %c%i", ArmGen::VFPOpNames[opnum], cond, c, vd, c, vn, c, vm);
302
return true;
303
}
304
}
305
return true;
306
}
307
break;
308
}
309
#endif
310
return false;
311
}
312
313
static const char *GetSizeString(int sz) {
314
switch (sz) {
315
case 0:
316
return "8";
317
case 1:
318
return "16";
319
case 2:
320
return "32";
321
case 3:
322
return "64";
323
default:
324
return "(err)";
325
}
326
}
327
328
static const char *GetISizeString(int sz) {
329
switch (sz) {
330
case 0:
331
return "i8";
332
case 1:
333
return "i16";
334
case 2:
335
return "i32";
336
case 3:
337
return "i64";
338
default:
339
return "(err)";
340
}
341
}
342
343
static int GetRegCount(int type) {
344
switch (type) {
345
case 7: return 1;
346
case 10: return 2;
347
case 6: return 3;
348
case 4: return 4;
349
default:
350
return 0;
351
}
352
}
353
354
// VLD1 / VST1
355
static bool DisasmNeonLDST(uint32_t op, char *text) {
356
bool load = (op >> 21) & 1;
357
int Rn = (op >> 16) & 0xF;
358
int Rm = (op & 0xF);
359
int Vd = GetVd(op, false, true);
360
361
const char *name = load ? "LD" : "ST";
362
const char *suffix = "";
363
if (Rm == 13)
364
suffix = "!";
365
366
if ((op & (1 << 23)) == 0) {
367
int sz = (op >> 6) & 3;
368
int regCount = GetRegCount((op >> 8) & 0xF);
369
370
int startReg = Vd;
371
int endReg = Vd + regCount - 1;
372
373
if (Rm != 15 && Rm != 13) {
374
sprintf(text, "V%s1 - regsum", name);
375
} else {
376
if (startReg == endReg)
377
sprintf(text, "V%s1.%s {d%i}, [r%i]%s", name, GetSizeString(sz), startReg, Rn, suffix);
378
else
379
sprintf(text, "V%s1.%s {d%i-d%i}, [r%i]%s", name, GetSizeString(sz), startReg, endReg, Rn, suffix);
380
}
381
} else {
382
int reg = Vd;
383
int sz = (op >> 10) & 3;
384
int index_align = (op >> 4) & 0xF;
385
int lane = 0;
386
switch (sz) {
387
case 0: lane = index_align >> 1; break;
388
case 1: lane = index_align >> 2; break;
389
case 2: lane = index_align >> 3; break;
390
}
391
if (Rm != 15) {
392
sprintf(text, "V%s1 d[0] - regsum", name);
393
} else {
394
sprintf(text, "V%s1.%s {d%i[%i]}, [r%i]%s", name, sz == 2 ? GetSizeString(sz) : GetISizeString(sz), reg, lane, Rn, suffix);
395
}
396
}
397
398
return true;
399
}
400
401
static bool DisasmArithNeon(uint32_t op, const char *opname, char *text, bool includeSuffix = true) {
402
bool quad = ((op >> 6) & 1);
403
int size = (op >> 20) & 3;
404
int type = (op >> 8) & 0xF;
405
char r = quad ? 'q' : 'd';
406
const char *szname = GetISizeString(size);
407
if (type == 0xD || type == 0xF)
408
szname = "f32";
409
410
int Vd = GetVd(op, quad, true);
411
int Vn = GetVn(op, quad, true);
412
int Vm = GetVm(op, quad, true);
413
sprintf(text, "V%s%s%s %c%i, %c%i, %c%i", opname, includeSuffix ? "." : "", includeSuffix ? szname : "", r, Vd, r, Vn, r, Vm);
414
return true;
415
}
416
417
static bool DisasmNeonImmVal(uint32_t op, char *text) {
418
using namespace ArmGen;
419
int opcode = (op >> 5) & 1;
420
int cmode = (op >> 8) & 0xF;
421
int imm = ((op >> 17) & 0x80) | ((op >> 12) & 0x70) | (op & 0xF);
422
int quad = (op >> 6) & 1;
423
const char *operation = "MOV";
424
const char *size = "(unk)";
425
char temp[64] = "(unk)";
426
switch (cmode) {
427
case VIMM___x___x:
428
case VIMM___x___x + 1:
429
sprintf(temp, "000000%02x_000000%02x", imm, imm);
430
size = ".i32";
431
break;
432
case VIMM__x___x_:
433
case VIMM__x___x_ + 1:
434
sprintf(temp, "0000%02x00_0000%02x00", imm, imm);
435
size = ".i32";
436
break;
437
case VIMM_x___x__:
438
case VIMM_x___x__ + 1:
439
sprintf(temp, "00%02x0000_00%02x0000", imm, imm);
440
size = ".i32";
441
break;
442
case VIMMx___x___:
443
case VIMMx___x___ + 1:
444
sprintf(temp, "%02x000000_%02x000000", imm, imm);
445
size = ".i32";
446
break;
447
448
// TODO: More
449
450
case VIMMf000f000:
451
if (opcode == 0) {
452
// TODO: Do this properly
453
float f = 1337;
454
switch (imm) {
455
case 0: f = 0.0f; break;
456
case 0x78: f = 1.5; break;
457
case 0x70: f = 1.0; break;
458
case 0xF0: f = -1.0; break;
459
}
460
sprintf(temp, "%1.1f", f);
461
size = "";
462
break;
463
}
464
}
465
char c = quad ? 'q' : 'd';
466
sprintf(text, "V%s%s %c%i, %s", operation, size, c, GetVd(op, false, false), temp);
467
return true;
468
}
469
470
static bool DisasmNeon2Op(uint32_t op, char *text) {
471
const char *opname = "(unk2op)";
472
473
bool quad = (op >> 6) & 1;
474
bool quadD = quad;
475
bool doubleD = false;
476
// VNEG, VABS
477
if (op & (1 << 16))
478
opname = "NEG";
479
480
int opcode = (op >> 6) & 0xF;
481
int sz = (op >> 18) & 3;
482
const char *size = "f32";
483
switch (opcode) {
484
case 0xE:
485
opname = "NEG";
486
size = GetISizeString(sz);
487
break;
488
case 0xD:
489
opname = "ABS";
490
size = GetISizeString(sz);
491
break;
492
case 0x7:
493
opname = "MVN";
494
size = ""; // MVN surely has no "size"?
495
break;
496
case 0x8:
497
opname = "MOVN"; // narrow, not negate
498
size = GetISizeString(sz + 1);
499
quad = true;
500
quadD = false;
501
doubleD = true;
502
break;
503
case 0xC:
504
opname = "SHLL"; // widen and shift
505
size = GetISizeString(sz);
506
quad = false;
507
quadD = true;
508
doubleD = true;
509
break;
510
}
511
512
int Vd = GetVd(op, quadD, doubleD);
513
int Vm = GetVm(op, quad, false);
514
char cD = quadD ? 'q' : 'd';
515
char c = quad ? 'q' : 'd';
516
if (opcode == 0xC) {
517
sprintf(text, "V%s%s%s %c%i, %c%i, #%d", opname, strlen(size) ? "." : "", size, cD, Vd, c, Vm, 8 << sz);
518
} else {
519
sprintf(text, "V%s%s%s %c%i, %c%i", opname, strlen(size) ? "." : "", size, cD, Vd, c, Vm);
520
}
521
return true;
522
}
523
524
static bool DisasmVdup(uint32_t op, char *text) {
525
bool quad = (op >> 6) & 1;
526
int imm4 = (op >> 16) & 0xF;
527
int Vd = GetVd(op, quad, false);
528
int Vm = GetVm(op, false, true);
529
char c = quad ? 'q' : 'd';
530
int index = 0;
531
int size = 0;
532
if (imm4 & 1) {
533
index = imm4 >> 1;
534
size = 0;
535
} else if (imm4 & 2) {
536
index = imm4 >> 2;
537
size = 1;
538
} else if (imm4 & 4) {
539
index = imm4 >> 3;
540
size = 2;
541
}
542
543
sprintf(text, "VDUP.%s %c%i, d%i[%i]", GetSizeString(size), c, Vd, Vm, index);
544
return true;
545
}
546
547
static bool DisasmNeonVecScalar(uint32_t op, char *text) {
548
bool quad = (op >> 24) & 1;
549
550
int Vd = GetVd(op, quad, true);
551
int Vn = GetVn(op, quad, true);
552
int Vm = GetVm(op, false, false);
553
554
char c = quad ? 'q' : 'd';
555
556
const char *opname = "(unk)";
557
const char *size = "f32";
558
559
switch ((op >> 4) & 0xFF) {
560
case 0x94:
561
case 0x9C:
562
opname = "VMUL";
563
break;
564
case 0x14:
565
case 0x1C:
566
case 0x1E: // Hmmm.. Should look this up :P
567
opname = "VMLA";
568
break;
569
}
570
571
int part = Vm & 1;
572
int reg = Vm >> 1;
573
sprintf(text, "%s.%s %c%i, %c%i, d%i[%i]", opname, size, c, Vd, c, Vn, reg, part);
574
return true;
575
}
576
577
// This needs a rewrite, those gotos are quite ugly...
578
const char *DecodeSizeAndShiftImm7(bool U, bool sign, bool inverse, int imm7, bool incSize, int *shift) {
579
if (imm7 & 64) {
580
if (inverse) {
581
*shift = 64 - (imm7 & 63);
582
} else {
583
*shift = imm7 & 63;
584
}
585
to64:
586
return U ? "u64" : (sign ? "s64" : "i64");
587
} else if (imm7 & 32) {
588
if (inverse) {
589
*shift = 32 - (imm7 & 31);
590
} else {
591
*shift = imm7 & 31;
592
}
593
if (incSize) goto to64;
594
to32:
595
return U ? "u32" : (sign ? "s32" : "i32");
596
} else if (imm7 & 16) {
597
if (inverse) {
598
*shift = 16 - (imm7 & 15);
599
} else {
600
*shift = imm7 & 15;
601
}
602
if (incSize) goto to32;
603
to16:
604
return U ? "u16" : (sign ? "s16" : "i16");
605
} else if (imm7 & 8) {
606
if (inverse) {
607
*shift = 8 - (imm7 & 7);
608
} else {
609
*shift = imm7 & 7;
610
}
611
if (incSize) goto to16;
612
return U ? "u8" : (sign ? "s8" : "i8");
613
} else {
614
// Invalid encoding
615
*shift = -1;
616
}
617
return "i32";
618
}
619
620
// What a horror show!
621
static bool DisasmNeon2RegShiftImm(uint32_t op, char *text) {
622
bool U = (op >> 24) & 1;
623
bool quadDest = false;
624
bool quadSrc = false;
625
bool incSize = false;
626
627
const char *opname = "(unk)";
628
int opcode = (op >> 8) & 0xF;
629
bool inverse = false;
630
bool sign = false;
631
switch (opcode) {
632
case 0x5: opname = "VSHL"; quadDest = quadSrc = ((op >> 6) & 1); break;
633
case 0xA: opname = "VSHLL"; quadDest = true; quadSrc = false; sign = true; break;
634
case 0x0: opname = "VSHR"; sign = true; quadDest = quadSrc = ((op >> 6) & 1); inverse = true; break;
635
case 0x8: opname = "VSHRN"; quadDest = false; quadSrc = true; inverse = true; incSize = true; break;
636
default:
637
// Immediate value ops!
638
return DisasmNeonImmVal(op, text);
639
}
640
641
int Vd = GetVd(op, quadDest, true);
642
int Vm = GetVm(op, quadSrc, true);
643
644
char c1 = quadDest ? 'q' : 'd';
645
char c2 = quadSrc ? 'q' : 'd';
646
int imm7 = ((op >> 16) & 0x3f) | ((op & 0x80) >> 1);
647
int shift;
648
649
const char *size;
650
if (opcode == 0xA) {
651
if (imm7 & 0x40) {
652
sprintf(text, "neon2regshiftimm undefined %08x", op);
653
return true;
654
}
655
}
656
657
size = DecodeSizeAndShiftImm7(U, sign, inverse, imm7, incSize, &shift);
658
659
if (opcode == 0xA && shift == 0) {
660
opname = "VMOVL";
661
sprintf(text, "%s.%s %c%i, %c%i", opname, size, c1, Vd, c2, Vm);
662
} else {
663
sprintf(text, "%s.%s %c%i, %c%i, #%i", opname, size, c1, Vd, c2, Vm, shift);
664
}
665
return true;
666
}
667
668
static bool DisasmNeonF2F3(uint32_t op, char *text) {
669
sprintf(text, "NEON F2");
670
if (((op >> 20) & 0xFF8) == 0xF20 || ((op >> 20) & 0xFF8) == 0xF30) {
671
const char *opname = "(unk)";
672
bool includeSuffix = true;
673
int temp;
674
switch ((op >> 20) & 0xFF) {
675
case 0x20:
676
temp = (op >> 4) & 0xF1;
677
switch (temp) {
678
case 0x11:
679
opname = "AND";
680
includeSuffix = false;
681
break;
682
case 0xd1:
683
opname = "MLA";
684
break;
685
case 0x80:
686
case 0xd0:
687
opname = "ADD";
688
break;
689
case 0xF0:
690
opname = "MAX";
691
break;
692
}
693
return DisasmArithNeon(op, opname, text, includeSuffix);
694
case 0x22:
695
case 0x24:
696
temp = (op >> 4) & 0xF1;
697
switch (temp) {
698
case 0xF0:
699
opname = "MIN";
700
break;
701
case 0x11:
702
opname = "ORR";
703
includeSuffix = false;
704
break;
705
case 0x80:
706
case 0xd0:
707
opname = "ADD";
708
break;
709
case 0xd1:
710
opname = "MLS";
711
break;
712
default:
713
opname = "???";
714
break;
715
}
716
return DisasmArithNeon(op, opname, text, includeSuffix);
717
case 0x31:
718
if (op & 0x100)
719
opname = "MLS";
720
else
721
opname = "SUB";
722
return DisasmArithNeon(op, opname, text);
723
case 0x30:
724
case 0x34:
725
temp = (op >> 4) & 0xF1;
726
switch (temp) {
727
case 0x11:
728
opname = "EOR";
729
includeSuffix = false;
730
break;
731
case 0xd0:
732
opname = "PADD";
733
break;
734
default:
735
opname = "MUL";
736
}
737
return DisasmArithNeon(op, opname, text, includeSuffix);
738
}
739
} else if ((op & 0xFE800010) == 0xF2800010) {
740
// Two regs and a shift amount
741
return DisasmNeon2RegShiftImm(op, text);
742
} else if ((op >> 20) == 0xF3E || (op >> 20) == 0xF2E || (op >> 20) == 0xF3A || (op >> 20) == 0xF2A) {
743
return DisasmNeonVecScalar(op, text);
744
} else if ((op >> 20) == 0xF3B && ((op >> 4) & 1) == 0) {
745
return DisasmNeon2Op(op, text);
746
} else if ((op >> 20) == 0xF3F) {
747
return DisasmVdup(op, text);
748
}
749
return true;
750
}
751
752
static bool DisasmNeon(uint32_t op, char *text) {
753
switch (op >> 24) {
754
case 0xF4:
755
return DisasmNeonLDST(op, text);
756
case 0xF2:
757
case 0xF3:
758
return DisasmNeonF2F3(op, text);
759
}
760
return false;
761
}
762
763
bool ArmAnalyzeLoadStore(uint32_t addr, uint32_t op, ArmLSInstructionInfo *info) {
764
*info = {};
765
info->instructionSize = 4;
766
767
// TODO
768
769
return false;
770
}
771
772
773
typedef unsigned int word;
774
typedef unsigned int address;
775
typedef unsigned int addrdiff;
776
#define W(x) ((word*)(x))
777
778
#define declstruct(name) typedef struct name s##name, * p##name
779
#define defstruct(name) struct name
780
#define defequiv(new,old) typedef struct old s##new, * p##new
781
782
declstruct(DisOptions);
783
declstruct(Instruction);
784
785
typedef enum {
786
target_None, /* instruction doesn't refer to an address */
787
target_Data, /* instruction refers to address of data */
788
target_FloatS, /* instruction refers to address of single-float */
789
target_FloatD, /* instruction refers to address of double-float */
790
target_FloatE, /* blah blah extended-float */
791
target_FloatP, /* blah blah packed decimal float */
792
target_Code, /* instruction refers to address of code */
793
target_Unknown /* instruction refers to address of *something* */
794
} eTargetType;
795
796
defstruct(Instruction) {
797
char text[128]; /* the disassembled instruction */
798
int undefined; /* non-0 iff it's an undefined instr */
799
int badbits; /* non-0 iff something reserved has the wrong value */
800
int oddbits; /* non-0 iff something unspecified isn't 0 */
801
int is_SWI; /* non-0 iff it's a SWI */
802
word swinum; /* only set for SWIs */
803
address target; /* address instr refers to */
804
eTargetType target_type; /* and what we expect to be there */
805
int offset; /* offset from register in LDR or STR or similar */
806
char * addrstart; /* start of address part of instruction, or 0 */
807
};
808
809
#define disopt_SWInames 1 /* use names, not &nnnn */
810
#define disopt_CommaSpace 2 /* put spaces after commas */
811
#define disopt_FIXS 4 /* bogus FIX syntax for ObjAsm */
812
#define disopt_ReverseBytes 8 /* byte-reverse words first */
813
814
defstruct(DisOptions) {
815
word flags;
816
const char * * regnames; /* pointer to 16 |char *|s: register names */
817
};
818
819
static pInstruction instr_disassemble(word, address, pDisOptions);
820
821
#define INSTR_grok_v4
822
823
/* Preprocessor defs you can give to affect this stuff:
824
* INSTR_grok_v4 understand ARMv4 instructions (halfword & sign-ext LDR/STR)
825
* INSTR_new_msr be prepared to produce new MSR syntax if asked
826
* The first of these is supported; the second isn't.
827
*/
828
829
/* Some important single-bit fields. */
830
831
#define Sbit (1<<20) /* set condition codes (data processing) */
832
#define Lbit (1<<20) /* load, not store (data transfer) */
833
#define Wbit (1<<21) /* writeback (data transfer) */
834
#define Bbit (1<<22) /* single byte (data transfer, SWP) */
835
#define Ubit (1<<23) /* up, not down (data transfer) */
836
#define Pbit (1<<24) /* pre-, not post-, indexed (data transfer) */
837
#define Ibit (1<<25) /* non-immediate (data transfer) */
838
/* immediate (data processing) */
839
#define SPSRbit (1<<22) /* SPSR, not CPSR (MRS, MSR) */
840
841
/* Some important 4-bit fields. */
842
843
#define RD(x) ((x)<<12) /* destination register */
844
#define RN(x) ((x)<<16) /* operand/base register */
845
#define CP(x) ((x)<<8) /* coprocessor number */
846
#define RDbits RD(15)
847
#define RNbits RN(15)
848
#define CPbits CP(15)
849
#define RD_is(x) ((instr&RDbits)==RD(x))
850
#define RN_is(x) ((instr&RNbits)==RN(x))
851
#define CP_is(x) ((instr&CPbits)==CP(x))
852
853
/* A slightly efficient way of telling whether two bits are the same
854
* or not. It's assumed that a<b.
855
*/
856
#define BitsDiffer(a,b) ((instr^(instr>>(b-a)))&(1<<a))
857
858
/* op = append(op,ip) === op += sprintf(op,"%s",ip),
859
* except that it's faster.
860
*/
861
static char * append(char * op, const char *ip) {
862
char c;
863
while ((c=*ip++)!=0) *op++=c;
864
return op;
865
}
866
867
/* op = hex8(op,w) === op += sprintf(op,"&%08lX",w), but faster.
868
*/
869
static char * hex8(char * op, word w) {
870
int i;
871
*op++='&';
872
for (i=28; i>=0; i-=4) *op++ = "0123456789ABCDEF"[(w>>i)&15];
873
return op;
874
}
875
876
/* op = reg(op,'x',n) === op += sprintf(op,"x%lu",n&15).
877
*/
878
static char * reg(char * op, char c, word n) {
879
*op++=c;
880
n&=15;
881
if (n>=10) { *op++='1'; n+='0'-10; } else n+='0';
882
*op++=(char)n;
883
return op;
884
}
885
886
/* op = num(op,n) appends n in decimal or &n in hex
887
* depending on whether n<100. It's assumed that n>=0.
888
*/
889
static char * num(char * op, word w) {
890
if (w>=100) {
891
int i;
892
word t;
893
*op++='&';
894
for (i=28; (t=(w>>i)&15)==0; i-=4) ;
895
for (; i>=0; i-=4) *op++ = "0123456789ABCDEF"[(w>>i)&15];
896
}
897
else {
898
/* divide by 10. You can prove this works by exhaustive search. :-) */
899
word t = w-(w>>2); t=(t+(t>>4)) >> 3;
900
{ word u = w-10*t;
901
if (u==10) { u=0; ++t; }
902
if (t) *op++=(char)(t+'0');
903
*op++=(char)(u+'0');
904
}
905
}
906
return op;
907
}
908
909
/* instr_disassemble
910
* Disassemble a single instruction.
911
*
912
* args: instr a single ARM instruction
913
* addr the address it's presumed to have come from
914
* opts cosmetic preferences for our output
915
*
916
* reqs: opts must be filled in right. In particular, it must contain
917
* a list of register names.
918
*
919
* return: a pointer to a structure containing the disassembled instruction
920
* and some other information about it.
921
*
922
* This is basically a replacement for the SWI Debugger_Disassemble,
923
* but it has the following advantages:
924
*
925
* + it's 3-4 times as fast
926
* + it's better at identifying undefined instructions,
927
* and instructions not invariant under { disassemble; ObjAsm; }
928
* + it provides some other useful information as well
929
* + its output syntax is the same as ObjAsm's input syntax
930
* (where possible)
931
* + it doesn't disassemble FIX incorrectly unless you ask it to
932
* + it's more configurable in some respects
933
*
934
* It also has the following disadvantages:
935
*
936
* - it increases the size of ObjDism
937
* - it doesn't provide so many `helpful' usage comments etc
938
* - it's less configurable in some respects
939
* - it doesn't (yet) know about ARMv4 instructions
940
*
941
* This function proceeds in two phases. The first is very simple:
942
* it works out what sort of instruction it's looking at and sets up
943
* three strings:
944
* - |mnemonic| (the basic mnemonic: LDR or whatever)
945
* - |flagchars| (things to go after the cond code: B or whatever)
946
* - |format| (a string describing how to display the instruction)
947
* The second phase consists of interpreting |format|, character by
948
* character. Some characters (e.g., letters) just mean `append this
949
* character to the output string'; some mean more complicated things
950
* like `append the name of the register whose number is in bits 12..15'
951
* or, worse, `append a description of the <op2> field'.
952
*
953
* I'm afraid the magic characters in |format| are rather arbitrary.
954
* One criterion in choosing them was that they should form a contiguous
955
* subrange of the character set! Sorry.
956
*
957
* Things I still want to do:
958
*
959
* - more configurability?
960
* - make it much faster, if possible
961
* - make it much smaller, if possible
962
*
963
* Format characters:
964
*
965
* \01..\05 copro register number from nybble (\001 == nybble 0, sorry)
966
* $ SWI number
967
* % register set for LDM/STM (takes note of bit 22 for ^)
968
* & address for B/BL
969
* ' ! if bit 21 set, else nothing (mnemonic: half a !)
970
* ( #regs for SFM (bits 22,15 = fpn, assumed already tweaked)
971
* ) copro opcode in bits 20..23 (for CDP)
972
* * op2 (takes note of bottom 12 bits, and bit 25)
973
* + FP register or immediate value: bits 0..3
974
* , comma or comma-space
975
* - copro extra info in bits 5..7 preceded by , omitted if 0
976
* . address in ADR instruction
977
* / address for LDR/STR (takes note of bit 23 & reg in bits 16..19)
978
* 0..4 register number from nybble
979
* 5..9 FP register number from nybble
980
* : copro opcode in bits 21..23 (for MRC/MCR)
981
* ; copro number in bits 8..11
982
*
983
* ADDED BY HRYDGARD:
984
* ^ 16-bit immediate
985
* > 5-bit immediate at 11..7 (lsb)
986
* < 5-bit immediate at 20..16 with +1 or -lsb if bit 6 set
987
*
988
* NB that / takes note of bit 22, too, and does its own ! when
989
* appropriate.
990
*
991
* On typical instructions this seems to take about 100us on my ARM6;
992
* that's about 3000 cycles, which seems grossly excessive. I'm not
993
* sure where all those cycles are being spent. Perhaps it's possible
994
* to make it much, much faster. Most of this time is spent on phase 2.
995
*/
996
997
extern pInstruction
998
instr_disassemble(word instr, address addr, pDisOptions opts) {
999
static char flagchars[4];
1000
static sInstruction result;
1001
const char * mnemonic = 0;
1002
char * flagp = flagchars;
1003
const char * format = 0;
1004
word fpn;
1005
eTargetType poss_tt = target_None;
1006
#ifdef INSTR_grok_v4
1007
int is_v4 = 0;
1008
#endif
1009
1010
/* PHASE 0. Set up default values for |result|. */
1011
1012
if (opts->flags & disopt_ReverseBytes) {
1013
instr = ((instr & 0xFF00FF00) >> 8) | ((instr & 0x00FF00FF) << 8);
1014
instr = (instr >> 16) | (instr << 16);
1015
}
1016
1017
fpn = ((instr>>15)&1) + ((instr>>21)&2);
1018
1019
result.undefined = 0;
1020
result.badbits = 0;
1021
result.oddbits = 0;
1022
result.is_SWI = 0;
1023
result.target_type = target_None;
1024
result.offset = 0x80000000;
1025
result.addrstart = 0;
1026
1027
/* PHASE 1. Decode and classify instruction. */
1028
1029
switch ((instr>>24)&15) {
1030
case 0:
1031
/* multiply or data processing, or LDRH etc */
1032
if ((instr&(15<<4))!=(9<<4)) goto lMaybeLDRHetc;
1033
/* multiply */
1034
if (instr&(1<<23)) {
1035
/* int multiply */
1036
mnemonic = "UMULL\0UMLAL\0SMULL\0SMLAL" + 6*((instr>>21)&3);
1037
format = "3,4,0,2";
1038
}
1039
else {
1040
if (instr&(1<<22)) goto lUndefined; /* "class C" */
1041
/* short multiply */
1042
if (instr&(1<<21)) {
1043
mnemonic = "MLA";
1044
format = "4,0,2,3";
1045
}
1046
else {
1047
mnemonic = "MUL";
1048
format = "4,0,2";
1049
}
1050
}
1051
if (instr&Sbit) *flagp++='S';
1052
break;
1053
case 1:
1054
if ((instr & 0x0FFFFFF0) == ((18 << 20) | (0xFFF << 8) | (1 << 4))) {
1055
mnemonic = "B";
1056
format = "0";
1057
break;
1058
} else if ((instr & 0x0FFFFFF0) == 0x012FFF30) {
1059
mnemonic = "BL";
1060
format = "0";
1061
break;
1062
} else if ((instr & 0x0FF000F0) == 0x01200070) {
1063
int imm = ((instr & 0xFFF00) >> 4) | (instr & 0xF);
1064
snprintf(result.text, sizeof(result.text), "BKPT %d", imm);
1065
result.undefined = 0;
1066
return &result;
1067
}
1068
case 3:
1069
if (instr >> 24 == 0xF3) {
1070
if (!DisasmNeon(instr, result.text)) {
1071
goto lUndefined;
1072
}
1073
result.undefined = 0;
1074
return &result;
1075
}
1076
/* SWP or MRS/MSR or data processing */
1077
// hrydgard addition: MOVW/MOVT
1078
if ((instr & 0x0FF00000) == 0x03000000) {
1079
mnemonic = "MOVW";
1080
format = "3,^";
1081
break;
1082
}
1083
else if ((instr & 0x0FF00000) == 0x03400000) {
1084
mnemonic = "MOVT";
1085
format = "3,^";
1086
break;
1087
}
1088
else if ((instr&0x02B00FF0)==0x00000090) {
1089
/* SWP */
1090
mnemonic = "SWP";
1091
format = "3,0,[4]";
1092
if (instr&Bbit) *flagp++='B';
1093
break;
1094
}
1095
else if ((instr&0x02BF0FFF)==0x000F0000) {
1096
/* MRS */
1097
mnemonic = "MRS";
1098
format = (instr&SPSRbit) ? "3,SPSR" : "3,CPSR";
1099
break;
1100
}
1101
else if ((instr&0x02BFFFF0)==0x0029F000) {
1102
/* MSR psr<P=0/1...>,Rs */
1103
mnemonic = "MSR";
1104
format = (instr&SPSRbit) ? "SPSR,0" : "CPSR,0";
1105
break;
1106
}
1107
else if ((instr&0x00BFF000)==0x0028F000) {
1108
/* MSR {C,S}PSR_flag,op2 */
1109
mnemonic = "MSR";
1110
format = (instr&SPSRbit) ? "SPSR_flg,*" : "CPSR_flg,*";
1111
if (!(instr&Ibit) && (instr&(15<<4)))
1112
#ifdef INSTR_grok_v4
1113
goto lMaybeLDRHetc;
1114
#else
1115
goto lUndefined; /* shifted reg in MSR illegal */
1116
#endif
1117
break;
1118
}
1119
/* fall through here */
1120
lMaybeLDRHetc:
1121
#ifdef INSTR_grok_v4
1122
if ((instr&(14<<24))==0
1123
&& ((instr&(9<<4))==(9<<4))) {
1124
/* Might well be LDRH or similar. */
1125
if ((instr&(Wbit+Pbit))==Wbit) goto lUndefined; /* "class E", case 1 */
1126
if ((instr&(Lbit+(1<<6)))==(1<<6)) goto lUndefined; /* STRSH etc */
1127
mnemonic = "STR\0LDR" + ((instr&Lbit) >> 18);
1128
if (instr&(1<<6)) *flagp++='S';
1129
*flagp++ = (instr&(1<<5)) ? 'B' : 'H';
1130
format = "3,/";
1131
/* aargh: */
1132
if (!(instr&(1<<22))) instr |= Ibit;
1133
is_v4=1;
1134
break;
1135
}
1136
#endif
1137
case 2:
1138
if (instr >> 24 == 0xF2) {
1139
if (!DisasmNeon(instr, result.text)) {
1140
goto lUndefined;
1141
}
1142
result.undefined = 0;
1143
return &result;
1144
}
1145
/* data processing */
1146
{ word op21 = instr&(15<<21);
1147
if ((op21==(2<<21) || (op21==(4<<21))) /* ADD or SUB */
1148
&& ((instr&(RNbits+Ibit+Sbit))==RN(15)+Ibit) /* imm, no S */
1149
/*&& ((instr&(30<<7))==0 || (instr&3))*/) { /* normal rot */
1150
/* ADD ...,pc,#... or SUB ...,pc,#...: turn into ADR */
1151
mnemonic = "ADR";
1152
format = "3,.";
1153
if ((instr&(30<<7))!=0 && !(instr&3)) result.oddbits=1;
1154
break;
1155
}
1156
mnemonic = "AND\0EOR\0SUB\0RSB\0ADD\0ADC\0SBC\0RSC\0"
1157
"TST\0TEQ\0CMP\0CMN\0ORR\0MOV\0BIC\0MVN" /* \0 */
1158
+ (op21 >> 19);
1159
/* Rd needed for all but TST,TEQ,CMP,CMN (8..11) */
1160
/* Rn needed for all but MOV,MVN (13,15) */
1161
if (op21 < ( 8<<21)) format = "3,4,*";
1162
else if (op21 < (12<<21)) {
1163
format = "4,*";
1164
if (instr&RDbits) {
1165
if ((instr&Sbit) && RD_is(15))
1166
*flagp++='P';
1167
else result.oddbits=1;
1168
}
1169
if (!(instr&Sbit)) goto lUndefined; /* CMP etc, no S bit */
1170
}
1171
else if (op21 & (1<<21)) {
1172
format = "3,*";
1173
if (instr&RNbits) result.oddbits=1;
1174
}
1175
else format = "3,4,*";
1176
if (instr&Sbit && (op21<(8<<21) || op21>=(12<<21))) *flagp++='S';
1177
}
1178
break;
1179
case 4:
1180
if ((instr >> 24) == 0xF4) {
1181
if (!DisasmNeon(instr, result.text)) {
1182
goto lUndefined;
1183
}
1184
result.undefined = 0;
1185
return &result;
1186
}
1187
case 5:
1188
case 6:
1189
case 7:
1190
/* STR/LDR/BFI/BFC/UBFX/SBFX or undefined */
1191
if ((instr&Ibit) && (instr&(1<<4))) {
1192
switch ((instr >> 21) & 7) {
1193
case 5:
1194
case 7:
1195
/* SBFX/UBFX */
1196
if (((instr>>4) & 7) != 5) {
1197
goto lUndefined;
1198
}
1199
mnemonic = (instr & (1 << 22)) ? "UBFX" : "SBFX";
1200
format = "3,0,>,<";
1201
break;
1202
case 6:
1203
/* BFI/BFC */
1204
if (((instr>>4) & 7) != 1) {
1205
goto lUndefined;
1206
}
1207
if ((instr & 15) == 15) {
1208
mnemonic = "BFC";
1209
format = "3,>,<";
1210
} else {
1211
mnemonic = "BFI";
1212
format = "3,0,>,<";
1213
}
1214
break;
1215
default:
1216
goto lUndefined; /* "class A" */
1217
}
1218
} else {
1219
mnemonic = "STR\0LDR" + ((instr&Lbit) >> 18);
1220
format = "3,/";
1221
if (instr&Bbit) *flagp++='B';
1222
if ((instr&(Wbit+Pbit))==Wbit) *flagp++='T';
1223
poss_tt = target_Data;
1224
}
1225
break;
1226
case 8:
1227
case 9:
1228
/* STM/LDM */
1229
mnemonic = "STM\0LDM" + ((instr&Lbit) >> 18);
1230
if (RN_is(13)) {
1231
/* r13, so treat as stack */
1232
word x = (instr&(3<<23)) >> 22;
1233
if (instr&Lbit) x^=6;
1234
{ const char * foo = "EDEAFDFA"+x;
1235
*flagp++ = *foo++;
1236
*flagp++ = *foo;
1237
}
1238
}
1239
else {
1240
/* not r13, so don't treat as stack */
1241
*flagp++ = (instr&Ubit) ? 'I' : 'D';
1242
*flagp++ = (instr&Pbit) ? 'B' : 'A';
1243
}
1244
format = "4',%";
1245
break;
1246
case 10:
1247
case 11:
1248
/* B or BL */
1249
mnemonic = "B\0BL"+((instr&(1<<24))>>23);
1250
format = "&";
1251
break;
1252
case 12:
1253
case 13:
1254
case 14: // FPU
1255
{
1256
if (!DisasmVFP(instr, result.text)) {
1257
goto lUndefined;
1258
}
1259
result.undefined = 0;
1260
return &result;
1261
}
1262
break;
1263
case 15:
1264
/* SWI */
1265
mnemonic = "SWI";
1266
format = "$";
1267
break;
1268
/* Nasty hack: this is code that won't be reached in the normal
1269
* course of events, and after the last case of the switch is a
1270
* convenient place for it.
1271
*/
1272
lUndefined:
1273
strcpy(result.text, "Undefined instruction");
1274
result.undefined = 1;
1275
return &result;
1276
}
1277
*flagp=0;
1278
1279
/* PHASE 2. Produce string. */
1280
1281
{ char * op = result.text;
1282
1283
/* 2a. Mnemonic. */
1284
1285
op = append(op,mnemonic);
1286
1287
/* 2b. Condition code. */
1288
1289
{ word cond = instr>>28;
1290
if (cond!=14) {
1291
const char * ip = "EQNECSCCMIPLVSVCHILSGELTGTLEALNV"+2*cond;
1292
*op++ = *ip++;
1293
*op++ = *ip;
1294
}
1295
}
1296
1297
/* 2c. Flags. */
1298
1299
{ const char * ip = flagchars;
1300
while (*ip) *op++ = *ip++;
1301
}
1302
1303
/* 2d. A tab character. */
1304
1305
*op++ = '\t';
1306
1307
/* 2e. Other stuff, determined by format string. */
1308
1309
{ const char * ip = format;
1310
char c;
1311
1312
const char * * regnames = opts->regnames;
1313
word oflags = opts->flags;
1314
1315
while ((c=*ip++) != 0) {
1316
switch(c) {
1317
case '^': // hrydgard addition
1318
{
1319
unsigned short imm16 = ((instr & 0x000F0000) >> 4) | (instr & 0x0FFF);
1320
op += sprintf(op, "%04x", imm16);
1321
}
1322
break;
1323
case '$':
1324
result.is_SWI = 1;
1325
result.swinum = instr&0x00FFFFFF;
1326
result.addrstart = op;
1327
op += sprintf(op, "&%X", result.swinum);
1328
break;
1329
case '%':
1330
*op++='{';
1331
{ word w = instr&0xFFFF;
1332
int i=0;
1333
while (w) {
1334
int j;
1335
while (!(w&(1ul<<i))) ++i;
1336
for (j=i+1; w&(1ul<<j); ++j) ;
1337
--j;
1338
/* registers [i..j] */
1339
op = append(op, regnames[i]);
1340
if (j-i) {
1341
*op++ = (j-i>1) ? '-' : ',';
1342
op = append(op, regnames[j]);
1343
}
1344
i=j; w=(w>>(j+1))<<(j+1);
1345
if (w) *op++=',';
1346
}
1347
}
1348
*op++='}';
1349
if (instr&(1<<22)) *op++='^';
1350
break;
1351
case '&':
1352
{ address target = (addr+8 + ((((int)instr)<<8)>>6)) & 0x03FFFFFC;
1353
result.addrstart = op;
1354
op = hex8(op, target);
1355
result.target_type = target_Code;
1356
result.target = target;
1357
}
1358
break;
1359
case '\'':
1360
lPling:
1361
if (instr&Wbit) *op++='!';
1362
break;
1363
case '(':
1364
*op++ = (char)('0'+fpn);
1365
break;
1366
case ')':
1367
{ word w = (instr>>20)&15;
1368
if (w>=10) { *op++='1'; *op++=(char)('0'-10+w); }
1369
else *op++=(char)(w+'0');
1370
}
1371
break;
1372
case '*':
1373
case '.':
1374
if (instr&Ibit) {
1375
/* immediate constant */
1376
word imm8 = (instr&255);
1377
word rot = (instr>>7)&30;
1378
if (rot && !(imm8&3) && c=='*') {
1379
/* Funny immediate const. Guaranteed not '.', btw */
1380
*op++='#'; *op++='&';
1381
*op++="0123456789ABCDEF"[imm8>>4];
1382
*op++="0123456789ABCDEF"[imm8&15];
1383
*op++=',';
1384
op = num(op, rot);
1385
}
1386
else {
1387
if (rot != 0) {
1388
imm8 = (imm8>>rot) | (imm8<<(32-rot));
1389
}
1390
if (c=='*') {
1391
*op++='#';
1392
if (imm8>256 && ((imm8&(imm8-1))==0)) {
1393
/* only one bit set, and that later than bit 8.
1394
* Represent as 1<<... .
1395
*/
1396
op = append(op,"1<<");
1397
{ int n=0;
1398
while (!(imm8&15)) { n+=4; imm8=imm8>>4; }
1399
/* Now imm8 is 1, 2, 4 or 8. */
1400
n += (0x30002010 >> 4*(imm8-1))&15;
1401
op = num(op, n);
1402
}
1403
}
1404
else {
1405
if (((int)imm8)<0 && ((int)imm8)>-100) {
1406
*op++='-'; imm8=-(int)imm8;
1407
}
1408
op = num(op, imm8);
1409
}
1410
}
1411
else {
1412
address a = addr+8;
1413
if (instr&(1<<22)) a-=imm8; else a+=imm8;
1414
result.addrstart=op;
1415
op = hex8(op, a);
1416
result.target=a; result.target_type=target_Unknown;
1417
}
1418
}
1419
}
1420
else {
1421
/* rotated register */
1422
const char * rot = "LSL\0LSR\0ASR\0ROR" + ((instr&(3<<5)) >> 3);
1423
op = append(op, regnames[instr&15]);
1424
if (instr&(1<<4)) {
1425
/* register rotation */
1426
if (instr&(1<<7)) goto lUndefined;
1427
*op++=','; if (oflags&disopt_CommaSpace) *op++=' ';
1428
op = append(op,rot); *op++=' ';
1429
op = append(op,regnames[(instr&(15<<8))>>8]);
1430
}
1431
else {
1432
/* constant rotation */
1433
word n = instr&(31<<7);
1434
if (!n) {
1435
if (!(instr&(3<<5))) break;
1436
else if ((instr&(3<<5))==(3<<5)) {
1437
op = append(op, ",RRX");
1438
break;
1439
}
1440
else n=32<<7;
1441
}
1442
*op++ = ','; if (oflags&disopt_CommaSpace) *op++=' ';
1443
op = num(append(append(op,rot)," #"),n>>7);
1444
}
1445
}
1446
break;
1447
case '+':
1448
if (instr&(1<<3)) {
1449
word w = instr&7;
1450
*op++='#';
1451
if (w<6) *op++=(char)('0'+w);
1452
else op = append(op, w==6 ? "0.5" : "10");
1453
}
1454
else {
1455
*op++='f';
1456
*op++=(char)('0'+(instr&7));
1457
}
1458
break;
1459
case ',':
1460
*op++=',';
1461
if (oflags&disopt_CommaSpace) *op++=' ';
1462
break;
1463
case '-':
1464
{ word w = instr&(7<<5);
1465
if (w) {
1466
*op++=',';
1467
if (oflags&disopt_CommaSpace) *op++=' ';
1468
*op++ = (char)('0'+(w>>5));
1469
}
1470
}
1471
break;
1472
case '/':
1473
result.addrstart = op;
1474
*op++='[';
1475
op = append(op, regnames[(instr&RNbits)>>16]);
1476
if (!(instr&Pbit)) *op++=']';
1477
*op++=','; if (oflags&disopt_CommaSpace) *op++=' ';
1478
/* For following, NB that bit 25 is always 0 for LDC, SFM etc */
1479
if (instr&Ibit) {
1480
/* shifted offset */
1481
if (!(instr&Ubit)) *op++='-';
1482
/* We're going to transfer to '*', basically. The stupid
1483
* thing is that the meaning of bit 25 is reversed there;
1484
* I don't know why the designers of the ARM did that.
1485
*/
1486
instr ^= Ibit;
1487
if (instr&(1<<4)) {
1488
#ifdef INSTR_grok_v4
1489
if (is_v4 && !(instr&(15<<8))) {
1490
ip = (instr&Pbit) ? "0]" : "0";
1491
break;
1492
}
1493
#else
1494
goto lUndefined; /* LSL r3 forbidden */
1495
#endif
1496
}
1497
/* Need a ] iff it was pre-indexed; and an optional ! iff
1498
* it's pre-indexed *or* a copro instruction,
1499
* except that FPU operations don't need the !. Bletch.
1500
*/
1501
if (instr&Pbit) ip="*]'";
1502
else if (instr&(1<<27)) {
1503
if (CP_is(1) || CP_is(2)) {
1504
if (!(instr&Wbit)) goto lUndefined;
1505
ip="*";
1506
}
1507
else ip="*'";
1508
}
1509
else ip="*";
1510
}
1511
else {
1512
/* immediate offset */
1513
word offset;
1514
if (instr&(1<<27)) {
1515
/* LDF or LFM or similar */
1516
offset = (instr&255)<<2;
1517
}
1518
#ifdef INSTR_grok_v4
1519
else if (is_v4) offset = (instr&15) + ((instr&(15<<8))>>4);
1520
#endif
1521
else {
1522
/* LDR or STR */
1523
offset = instr&0xFFF;
1524
}
1525
*op++='#';
1526
if (!(instr&Ubit)) {
1527
if (offset) *op++='-';
1528
else result.oddbits=1;
1529
result.offset = -(int)offset;
1530
}
1531
else result.offset = offset;
1532
op = num(op, offset);
1533
if (RN_is(15) && (instr&Pbit)) {
1534
/* Immediate, pre-indexed and PC-relative. Set target. */
1535
result.target_type = poss_tt;
1536
result.target = (instr&Ubit) ? addr+8 + offset
1537
: addr+8 - offset;
1538
if (!(instr&Wbit)) {
1539
/* no writeback, either. Use friendly form. */
1540
op = hex8(result.addrstart, result.target);
1541
break;
1542
}
1543
}
1544
if (instr&Pbit) { *op++=']'; goto lPling; }
1545
else if (instr&(1<<27)) {
1546
if (CP_is(1) || CP_is(2)) {
1547
if (!(instr&Wbit)) goto lUndefined;
1548
}
1549
else goto lPling;
1550
}
1551
}
1552
break;
1553
case '0': case '1': case '2': case '3': case '4':
1554
op = append(op, regnames[(instr>>(4*(c-'0')))&15]);
1555
break;
1556
case '5': case '6': case '7': case '8': case '9':
1557
*op++='f';
1558
*op++=(char)('0' + ((instr>>(4*(c-'5')))&7));
1559
break;
1560
case ':':
1561
*op++ = (char)('0' + ((instr>>21)&7));
1562
break;
1563
case ';':
1564
op = reg(op, 'p', instr>>8);
1565
break;
1566
case '>':
1567
*op++='#';
1568
op = num(op, (instr >> 7) & 31);
1569
break;
1570
case '<':
1571
*op++='#';
1572
if (instr & (1 << 6)) {
1573
op = num(op, ((instr >> 16) & 31) + 1);
1574
} else {
1575
op = num(op, ((instr >> 16) & 31) + 1 - ((instr >> 7) & 31));
1576
}
1577
break;
1578
default:
1579
if (c<=5)
1580
op = reg(op, 'c', instr >> (4*(c-1)));
1581
else *op++ = c;
1582
}
1583
}
1584
*op=0;
1585
}
1586
}
1587
1588
/* DONE! */
1589
1590
return &result;
1591
}
1592
1593
static const char * reg_names[16] = {
1594
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
1595
"r8", "r9", "r10", "r11", "ip", "sp", "lr", "pc"
1596
};
1597
1598
static sDisOptions options = {
1599
disopt_CommaSpace,
1600
reg_names
1601
};
1602
1603
const char *ArmRegName(int r) {
1604
return reg_names[r];
1605
}
1606
1607
void ArmDis(unsigned int addr, unsigned int w, char *output, int bufsize, bool includeWord) {
1608
pInstruction instr = instr_disassemble(w, addr, &options);
1609
char temp[256];
1610
if (includeWord) {
1611
snprintf(output, bufsize, "%08x\t%s", w, instr->text);
1612
} else {
1613
snprintf(output, bufsize, "%s", instr->text);
1614
}
1615
if (instr->undefined || instr->badbits || instr->oddbits) {
1616
if (instr->undefined) snprintf(output, bufsize, "%08x\t[undefined instr]", w);
1617
if (instr->badbits) snprintf(output, bufsize, "%08x\t[illegal bits]", w);
1618
1619
// HUH? LDR and STR gets this a lot
1620
// strcat(output, " ? (extra bits)");
1621
if (instr->oddbits) {
1622
snprintf(temp, sizeof(temp), " [unexpected bits %08x]", w);
1623
strcat(output, temp);
1624
}
1625
}
1626
// zap tabs
1627
while (*output) {
1628
if (*output == '\t')
1629
*output = ' ';
1630
output++;
1631
}
1632
}
1633
1634
#ifdef __clang__
1635
#pragma GCC diagnostic pop
1636
#endif
1637
1638