Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lldb/source/Plugins/Disassembler/LLVMC/DisassemblerLLVMC.cpp
39644 views
1
//===-- DisassemblerLLVMC.cpp ---------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "DisassemblerLLVMC.h"
10
11
#include "llvm-c/Disassembler.h"
12
#include "llvm/ADT/SmallString.h"
13
#include "llvm/ADT/StringExtras.h"
14
#include "llvm/MC/MCAsmInfo.h"
15
#include "llvm/MC/MCContext.h"
16
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
17
#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
18
#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
19
#include "llvm/MC/MCInst.h"
20
#include "llvm/MC/MCInstPrinter.h"
21
#include "llvm/MC/MCInstrAnalysis.h"
22
#include "llvm/MC/MCInstrInfo.h"
23
#include "llvm/MC/MCRegisterInfo.h"
24
#include "llvm/MC/MCSubtargetInfo.h"
25
#include "llvm/MC/MCTargetOptions.h"
26
#include "llvm/MC/TargetRegistry.h"
27
#include "llvm/Support/ErrorHandling.h"
28
#include "llvm/Support/ScopedPrinter.h"
29
#include "llvm/Support/TargetSelect.h"
30
#include "llvm/TargetParser/AArch64TargetParser.h"
31
32
#include "lldb/Core/Address.h"
33
#include "lldb/Core/Module.h"
34
#include "lldb/Symbol/SymbolContext.h"
35
#include "lldb/Target/ExecutionContext.h"
36
#include "lldb/Target/Process.h"
37
#include "lldb/Target/RegisterContext.h"
38
#include "lldb/Target/SectionLoadList.h"
39
#include "lldb/Target/StackFrame.h"
40
#include "lldb/Target/Target.h"
41
#include "lldb/Utility/DataExtractor.h"
42
#include "lldb/Utility/LLDBLog.h"
43
#include "lldb/Utility/Log.h"
44
#include "lldb/Utility/RegularExpression.h"
45
#include "lldb/Utility/Stream.h"
46
#include <optional>
47
48
using namespace lldb;
49
using namespace lldb_private;
50
51
LLDB_PLUGIN_DEFINE(DisassemblerLLVMC)
52
53
class DisassemblerLLVMC::MCDisasmInstance {
54
public:
55
static std::unique_ptr<MCDisasmInstance>
56
Create(const char *triple, const char *cpu, const char *features_str,
57
unsigned flavor, DisassemblerLLVMC &owner);
58
59
~MCDisasmInstance() = default;
60
61
uint64_t GetMCInst(const uint8_t *opcode_data, size_t opcode_data_len,
62
lldb::addr_t pc, llvm::MCInst &mc_inst) const;
63
void PrintMCInst(llvm::MCInst &mc_inst, lldb::addr_t pc,
64
std::string &inst_string, std::string &comments_string);
65
void SetStyle(bool use_hex_immed, HexImmediateStyle hex_style);
66
void SetUseColor(bool use_color);
67
bool GetUseColor() const;
68
bool CanBranch(llvm::MCInst &mc_inst) const;
69
bool HasDelaySlot(llvm::MCInst &mc_inst) const;
70
bool IsCall(llvm::MCInst &mc_inst) const;
71
bool IsLoad(llvm::MCInst &mc_inst) const;
72
bool IsAuthenticated(llvm::MCInst &mc_inst) const;
73
74
private:
75
MCDisasmInstance(std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
76
std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
77
std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
78
std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
79
std::unique_ptr<llvm::MCContext> &&context_up,
80
std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
81
std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up,
82
std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up);
83
84
std::unique_ptr<llvm::MCInstrInfo> m_instr_info_up;
85
std::unique_ptr<llvm::MCRegisterInfo> m_reg_info_up;
86
std::unique_ptr<llvm::MCSubtargetInfo> m_subtarget_info_up;
87
std::unique_ptr<llvm::MCAsmInfo> m_asm_info_up;
88
std::unique_ptr<llvm::MCContext> m_context_up;
89
std::unique_ptr<llvm::MCDisassembler> m_disasm_up;
90
std::unique_ptr<llvm::MCInstPrinter> m_instr_printer_up;
91
std::unique_ptr<llvm::MCInstrAnalysis> m_instr_analysis_up;
92
};
93
94
namespace x86 {
95
96
/// These are the three values deciding instruction control flow kind.
97
/// InstructionLengthDecode function decodes an instruction and get this struct.
98
///
99
/// primary_opcode
100
/// Primary opcode of the instruction.
101
/// For one-byte opcode instruction, it's the first byte after prefix.
102
/// For two- and three-byte opcodes, it's the second byte.
103
///
104
/// opcode_len
105
/// The length of opcode in bytes. Valid opcode lengths are 1, 2, or 3.
106
///
107
/// modrm
108
/// ModR/M byte of the instruction.
109
/// Bits[7:6] indicate MOD. Bits[5:3] specify a register and R/M bits[2:0]
110
/// may contain a register or specify an addressing mode, depending on MOD.
111
struct InstructionOpcodeAndModrm {
112
uint8_t primary_opcode;
113
uint8_t opcode_len;
114
uint8_t modrm;
115
};
116
117
/// Determine the InstructionControlFlowKind based on opcode and modrm bytes.
118
/// Refer to http://ref.x86asm.net/coder.html for the full list of opcode and
119
/// instruction set.
120
///
121
/// \param[in] opcode_and_modrm
122
/// Contains primary_opcode byte, its length, and ModR/M byte.
123
/// Refer to the struct InstructionOpcodeAndModrm for details.
124
///
125
/// \return
126
/// The control flow kind of the instruction or
127
/// eInstructionControlFlowKindOther if the instruction doesn't affect
128
/// the control flow of the program.
129
lldb::InstructionControlFlowKind
130
MapOpcodeIntoControlFlowKind(InstructionOpcodeAndModrm opcode_and_modrm) {
131
uint8_t opcode = opcode_and_modrm.primary_opcode;
132
uint8_t opcode_len = opcode_and_modrm.opcode_len;
133
uint8_t modrm = opcode_and_modrm.modrm;
134
135
if (opcode_len > 2)
136
return lldb::eInstructionControlFlowKindOther;
137
138
if (opcode >= 0x70 && opcode <= 0x7F) {
139
if (opcode_len == 1)
140
return lldb::eInstructionControlFlowKindCondJump;
141
else
142
return lldb::eInstructionControlFlowKindOther;
143
}
144
145
if (opcode >= 0x80 && opcode <= 0x8F) {
146
if (opcode_len == 2)
147
return lldb::eInstructionControlFlowKindCondJump;
148
else
149
return lldb::eInstructionControlFlowKindOther;
150
}
151
152
switch (opcode) {
153
case 0x9A:
154
if (opcode_len == 1)
155
return lldb::eInstructionControlFlowKindFarCall;
156
break;
157
case 0xFF:
158
if (opcode_len == 1) {
159
uint8_t modrm_reg = (modrm >> 3) & 7;
160
if (modrm_reg == 2)
161
return lldb::eInstructionControlFlowKindCall;
162
else if (modrm_reg == 3)
163
return lldb::eInstructionControlFlowKindFarCall;
164
else if (modrm_reg == 4)
165
return lldb::eInstructionControlFlowKindJump;
166
else if (modrm_reg == 5)
167
return lldb::eInstructionControlFlowKindFarJump;
168
}
169
break;
170
case 0xE8:
171
if (opcode_len == 1)
172
return lldb::eInstructionControlFlowKindCall;
173
break;
174
case 0xCD:
175
case 0xCC:
176
case 0xCE:
177
case 0xF1:
178
if (opcode_len == 1)
179
return lldb::eInstructionControlFlowKindFarCall;
180
break;
181
case 0xCF:
182
if (opcode_len == 1)
183
return lldb::eInstructionControlFlowKindFarReturn;
184
break;
185
case 0xE9:
186
case 0xEB:
187
if (opcode_len == 1)
188
return lldb::eInstructionControlFlowKindJump;
189
break;
190
case 0xEA:
191
if (opcode_len == 1)
192
return lldb::eInstructionControlFlowKindFarJump;
193
break;
194
case 0xE3:
195
case 0xE0:
196
case 0xE1:
197
case 0xE2:
198
if (opcode_len == 1)
199
return lldb::eInstructionControlFlowKindCondJump;
200
break;
201
case 0xC3:
202
case 0xC2:
203
if (opcode_len == 1)
204
return lldb::eInstructionControlFlowKindReturn;
205
break;
206
case 0xCB:
207
case 0xCA:
208
if (opcode_len == 1)
209
return lldb::eInstructionControlFlowKindFarReturn;
210
break;
211
case 0x05:
212
case 0x34:
213
if (opcode_len == 2)
214
return lldb::eInstructionControlFlowKindFarCall;
215
break;
216
case 0x35:
217
case 0x07:
218
if (opcode_len == 2)
219
return lldb::eInstructionControlFlowKindFarReturn;
220
break;
221
case 0x01:
222
if (opcode_len == 2) {
223
switch (modrm) {
224
case 0xc1:
225
return lldb::eInstructionControlFlowKindFarCall;
226
case 0xc2:
227
case 0xc3:
228
return lldb::eInstructionControlFlowKindFarReturn;
229
default:
230
break;
231
}
232
}
233
break;
234
default:
235
break;
236
}
237
238
return lldb::eInstructionControlFlowKindOther;
239
}
240
241
/// Decode an instruction into opcode, modrm and opcode_len.
242
/// Refer to http://ref.x86asm.net/coder.html for the instruction bytes layout.
243
/// Opcodes in x86 are generally the first byte of instruction, though two-byte
244
/// instructions and prefixes exist. ModR/M is the byte following the opcode
245
/// and adds additional information for how the instruction is executed.
246
///
247
/// \param[in] inst_bytes
248
/// Raw bytes of the instruction
249
///
250
///
251
/// \param[in] bytes_len
252
/// The length of the inst_bytes array.
253
///
254
/// \param[in] is_exec_mode_64b
255
/// If true, the execution mode is 64 bit.
256
///
257
/// \return
258
/// Returns decoded instruction as struct InstructionOpcodeAndModrm, holding
259
/// primary_opcode, opcode_len and modrm byte. Refer to the struct definition
260
/// for more details.
261
/// Otherwise if the given instruction is invalid, returns std::nullopt.
262
std::optional<InstructionOpcodeAndModrm>
263
InstructionLengthDecode(const uint8_t *inst_bytes, int bytes_len,
264
bool is_exec_mode_64b) {
265
int op_idx = 0;
266
bool prefix_done = false;
267
InstructionOpcodeAndModrm ret = {0, 0, 0};
268
269
// In most cases, the primary_opcode is the first byte of the instruction
270
// but some instructions have a prefix to be skipped for these calculations.
271
// The following mapping is inspired from libipt's instruction decoding logic
272
// in `src/pt_ild.c`
273
while (!prefix_done) {
274
if (op_idx >= bytes_len)
275
return std::nullopt;
276
277
ret.primary_opcode = inst_bytes[op_idx];
278
switch (ret.primary_opcode) {
279
// prefix_ignore
280
case 0x26:
281
case 0x2e:
282
case 0x36:
283
case 0x3e:
284
case 0x64:
285
case 0x65:
286
// prefix_osz, prefix_asz
287
case 0x66:
288
case 0x67:
289
// prefix_lock, prefix_f2, prefix_f3
290
case 0xf0:
291
case 0xf2:
292
case 0xf3:
293
op_idx++;
294
break;
295
296
// prefix_rex
297
case 0x40:
298
case 0x41:
299
case 0x42:
300
case 0x43:
301
case 0x44:
302
case 0x45:
303
case 0x46:
304
case 0x47:
305
case 0x48:
306
case 0x49:
307
case 0x4a:
308
case 0x4b:
309
case 0x4c:
310
case 0x4d:
311
case 0x4e:
312
case 0x4f:
313
if (is_exec_mode_64b)
314
op_idx++;
315
else
316
prefix_done = true;
317
break;
318
319
// prefix_vex_c4, c5
320
case 0xc5:
321
if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
322
prefix_done = true;
323
break;
324
}
325
326
ret.opcode_len = 2;
327
ret.primary_opcode = inst_bytes[op_idx + 2];
328
ret.modrm = inst_bytes[op_idx + 3];
329
return ret;
330
331
case 0xc4:
332
if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
333
prefix_done = true;
334
break;
335
}
336
ret.opcode_len = inst_bytes[op_idx + 1] & 0x1f;
337
ret.primary_opcode = inst_bytes[op_idx + 3];
338
ret.modrm = inst_bytes[op_idx + 4];
339
return ret;
340
341
// prefix_evex
342
case 0x62:
343
if (!is_exec_mode_64b && (inst_bytes[op_idx + 1] & 0xc0) != 0xc0) {
344
prefix_done = true;
345
break;
346
}
347
ret.opcode_len = inst_bytes[op_idx + 1] & 0x03;
348
ret.primary_opcode = inst_bytes[op_idx + 4];
349
ret.modrm = inst_bytes[op_idx + 5];
350
return ret;
351
352
default:
353
prefix_done = true;
354
break;
355
}
356
} // prefix done
357
358
ret.primary_opcode = inst_bytes[op_idx];
359
ret.modrm = inst_bytes[op_idx + 1];
360
ret.opcode_len = 1;
361
362
// If the first opcode is 0F, it's two- or three- byte opcodes.
363
if (ret.primary_opcode == 0x0F) {
364
ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
365
366
if (ret.primary_opcode == 0x38) {
367
ret.opcode_len = 3;
368
ret.primary_opcode = inst_bytes[++op_idx]; // get the next byte
369
ret.modrm = inst_bytes[op_idx + 1];
370
} else if (ret.primary_opcode == 0x3A) {
371
ret.opcode_len = 3;
372
ret.primary_opcode = inst_bytes[++op_idx];
373
ret.modrm = inst_bytes[op_idx + 1];
374
} else if ((ret.primary_opcode & 0xf8) == 0x38) {
375
ret.opcode_len = 0;
376
ret.primary_opcode = inst_bytes[++op_idx];
377
ret.modrm = inst_bytes[op_idx + 1];
378
} else if (ret.primary_opcode == 0x0F) {
379
ret.opcode_len = 3;
380
// opcode is 0x0F, no needs to update
381
ret.modrm = inst_bytes[op_idx + 1];
382
} else {
383
ret.opcode_len = 2;
384
ret.modrm = inst_bytes[op_idx + 1];
385
}
386
}
387
388
return ret;
389
}
390
391
lldb::InstructionControlFlowKind GetControlFlowKind(bool is_exec_mode_64b,
392
Opcode m_opcode) {
393
std::optional<InstructionOpcodeAndModrm> ret;
394
395
if (m_opcode.GetOpcodeBytes() == nullptr || m_opcode.GetByteSize() <= 0) {
396
// x86_64 and i386 instructions are categorized as Opcode::Type::eTypeBytes
397
return lldb::eInstructionControlFlowKindUnknown;
398
}
399
400
// Opcode bytes will be decoded into primary_opcode, modrm and opcode length.
401
// These are the three values deciding instruction control flow kind.
402
ret = InstructionLengthDecode((const uint8_t *)m_opcode.GetOpcodeBytes(),
403
m_opcode.GetByteSize(), is_exec_mode_64b);
404
if (!ret)
405
return lldb::eInstructionControlFlowKindUnknown;
406
else
407
return MapOpcodeIntoControlFlowKind(*ret);
408
}
409
410
} // namespace x86
411
412
class InstructionLLVMC : public lldb_private::Instruction {
413
public:
414
InstructionLLVMC(DisassemblerLLVMC &disasm,
415
const lldb_private::Address &address,
416
AddressClass addr_class)
417
: Instruction(address, addr_class),
418
m_disasm_wp(std::static_pointer_cast<DisassemblerLLVMC>(
419
disasm.shared_from_this())) {}
420
421
~InstructionLLVMC() override = default;
422
423
bool DoesBranch() override {
424
VisitInstruction();
425
return m_does_branch;
426
}
427
428
bool HasDelaySlot() override {
429
VisitInstruction();
430
return m_has_delay_slot;
431
}
432
433
bool IsLoad() override {
434
VisitInstruction();
435
return m_is_load;
436
}
437
438
bool IsAuthenticated() override {
439
VisitInstruction();
440
return m_is_authenticated;
441
}
442
443
DisassemblerLLVMC::MCDisasmInstance *GetDisasmToUse(bool &is_alternate_isa) {
444
DisassemblerScope disasm(*this);
445
return GetDisasmToUse(is_alternate_isa, disasm);
446
}
447
448
size_t Decode(const lldb_private::Disassembler &disassembler,
449
const lldb_private::DataExtractor &data,
450
lldb::offset_t data_offset) override {
451
// All we have to do is read the opcode which can be easy for some
452
// architectures
453
bool got_op = false;
454
DisassemblerScope disasm(*this);
455
if (disasm) {
456
const ArchSpec &arch = disasm->GetArchitecture();
457
const lldb::ByteOrder byte_order = data.GetByteOrder();
458
459
const uint32_t min_op_byte_size = arch.GetMinimumOpcodeByteSize();
460
const uint32_t max_op_byte_size = arch.GetMaximumOpcodeByteSize();
461
if (min_op_byte_size == max_op_byte_size) {
462
// Fixed size instructions, just read that amount of data.
463
if (!data.ValidOffsetForDataOfSize(data_offset, min_op_byte_size))
464
return false;
465
466
switch (min_op_byte_size) {
467
case 1:
468
m_opcode.SetOpcode8(data.GetU8(&data_offset), byte_order);
469
got_op = true;
470
break;
471
472
case 2:
473
m_opcode.SetOpcode16(data.GetU16(&data_offset), byte_order);
474
got_op = true;
475
break;
476
477
case 4:
478
m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
479
got_op = true;
480
break;
481
482
case 8:
483
m_opcode.SetOpcode64(data.GetU64(&data_offset), byte_order);
484
got_op = true;
485
break;
486
487
default:
488
m_opcode.SetOpcodeBytes(data.PeekData(data_offset, min_op_byte_size),
489
min_op_byte_size);
490
got_op = true;
491
break;
492
}
493
}
494
if (!got_op) {
495
bool is_alternate_isa = false;
496
DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
497
GetDisasmToUse(is_alternate_isa, disasm);
498
499
const llvm::Triple::ArchType machine = arch.GetMachine();
500
if (machine == llvm::Triple::arm || machine == llvm::Triple::thumb) {
501
if (machine == llvm::Triple::thumb || is_alternate_isa) {
502
uint32_t thumb_opcode = data.GetU16(&data_offset);
503
if ((thumb_opcode & 0xe000) != 0xe000 ||
504
((thumb_opcode & 0x1800u) == 0)) {
505
m_opcode.SetOpcode16(thumb_opcode, byte_order);
506
m_is_valid = true;
507
} else {
508
thumb_opcode <<= 16;
509
thumb_opcode |= data.GetU16(&data_offset);
510
m_opcode.SetOpcode16_2(thumb_opcode, byte_order);
511
m_is_valid = true;
512
}
513
} else {
514
m_opcode.SetOpcode32(data.GetU32(&data_offset), byte_order);
515
m_is_valid = true;
516
}
517
} else {
518
// The opcode isn't evenly sized, so we need to actually use the llvm
519
// disassembler to parse it and get the size.
520
uint8_t *opcode_data =
521
const_cast<uint8_t *>(data.PeekData(data_offset, 1));
522
const size_t opcode_data_len = data.BytesLeft(data_offset);
523
const addr_t pc = m_address.GetFileAddress();
524
llvm::MCInst inst;
525
526
const size_t inst_size =
527
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
528
if (inst_size == 0)
529
m_opcode.Clear();
530
else {
531
m_opcode.SetOpcodeBytes(opcode_data, inst_size);
532
m_is_valid = true;
533
}
534
}
535
}
536
return m_opcode.GetByteSize();
537
}
538
return 0;
539
}
540
541
void AppendComment(std::string &description) {
542
if (m_comment.empty())
543
m_comment.swap(description);
544
else {
545
m_comment.append(", ");
546
m_comment.append(description);
547
}
548
}
549
550
lldb::InstructionControlFlowKind
551
GetControlFlowKind(const lldb_private::ExecutionContext *exe_ctx) override {
552
DisassemblerScope disasm(*this, exe_ctx);
553
if (disasm){
554
if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86)
555
return x86::GetControlFlowKind(/*is_64b=*/false, m_opcode);
556
else if (disasm->GetArchitecture().GetMachine() == llvm::Triple::x86_64)
557
return x86::GetControlFlowKind(/*is_64b=*/true, m_opcode);
558
}
559
560
return eInstructionControlFlowKindUnknown;
561
}
562
563
void CalculateMnemonicOperandsAndComment(
564
const lldb_private::ExecutionContext *exe_ctx) override {
565
DataExtractor data;
566
const AddressClass address_class = GetAddressClass();
567
568
if (m_opcode.GetData(data)) {
569
std::string out_string;
570
std::string markup_out_string;
571
std::string comment_string;
572
std::string markup_comment_string;
573
574
DisassemblerScope disasm(*this, exe_ctx);
575
if (disasm) {
576
DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr;
577
578
if (address_class == AddressClass::eCodeAlternateISA)
579
mc_disasm_ptr = disasm->m_alternate_disasm_up.get();
580
else
581
mc_disasm_ptr = disasm->m_disasm_up.get();
582
583
lldb::addr_t pc = m_address.GetFileAddress();
584
m_using_file_addr = true;
585
586
const bool data_from_file = disasm->m_data_from_file;
587
bool use_hex_immediates = true;
588
Disassembler::HexImmediateStyle hex_style = Disassembler::eHexStyleC;
589
590
if (exe_ctx) {
591
Target *target = exe_ctx->GetTargetPtr();
592
if (target) {
593
use_hex_immediates = target->GetUseHexImmediates();
594
hex_style = target->GetHexImmediateStyle();
595
596
if (!data_from_file) {
597
const lldb::addr_t load_addr = m_address.GetLoadAddress(target);
598
if (load_addr != LLDB_INVALID_ADDRESS) {
599
pc = load_addr;
600
m_using_file_addr = false;
601
}
602
}
603
}
604
}
605
606
const uint8_t *opcode_data = data.GetDataStart();
607
const size_t opcode_data_len = data.GetByteSize();
608
llvm::MCInst inst;
609
size_t inst_size =
610
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
611
612
if (inst_size > 0) {
613
mc_disasm_ptr->SetStyle(use_hex_immediates, hex_style);
614
615
const bool saved_use_color = mc_disasm_ptr->GetUseColor();
616
mc_disasm_ptr->SetUseColor(false);
617
mc_disasm_ptr->PrintMCInst(inst, pc, out_string, comment_string);
618
mc_disasm_ptr->SetUseColor(true);
619
mc_disasm_ptr->PrintMCInst(inst, pc, markup_out_string,
620
markup_comment_string);
621
mc_disasm_ptr->SetUseColor(saved_use_color);
622
623
if (!comment_string.empty()) {
624
AppendComment(comment_string);
625
}
626
}
627
628
if (inst_size == 0) {
629
m_comment.assign("unknown opcode");
630
inst_size = m_opcode.GetByteSize();
631
StreamString mnemonic_strm;
632
lldb::offset_t offset = 0;
633
lldb::ByteOrder byte_order = data.GetByteOrder();
634
switch (inst_size) {
635
case 1: {
636
const uint8_t uval8 = data.GetU8(&offset);
637
m_opcode.SetOpcode8(uval8, byte_order);
638
m_opcode_name.assign(".byte");
639
mnemonic_strm.Printf("0x%2.2x", uval8);
640
} break;
641
case 2: {
642
const uint16_t uval16 = data.GetU16(&offset);
643
m_opcode.SetOpcode16(uval16, byte_order);
644
m_opcode_name.assign(".short");
645
mnemonic_strm.Printf("0x%4.4x", uval16);
646
} break;
647
case 4: {
648
const uint32_t uval32 = data.GetU32(&offset);
649
m_opcode.SetOpcode32(uval32, byte_order);
650
m_opcode_name.assign(".long");
651
mnemonic_strm.Printf("0x%8.8x", uval32);
652
} break;
653
case 8: {
654
const uint64_t uval64 = data.GetU64(&offset);
655
m_opcode.SetOpcode64(uval64, byte_order);
656
m_opcode_name.assign(".quad");
657
mnemonic_strm.Printf("0x%16.16" PRIx64, uval64);
658
} break;
659
default:
660
if (inst_size == 0)
661
return;
662
else {
663
const uint8_t *bytes = data.PeekData(offset, inst_size);
664
if (bytes == nullptr)
665
return;
666
m_opcode_name.assign(".byte");
667
m_opcode.SetOpcodeBytes(bytes, inst_size);
668
mnemonic_strm.Printf("0x%2.2x", bytes[0]);
669
for (uint32_t i = 1; i < inst_size; ++i)
670
mnemonic_strm.Printf(" 0x%2.2x", bytes[i]);
671
}
672
break;
673
}
674
m_mnemonics = std::string(mnemonic_strm.GetString());
675
return;
676
}
677
678
static RegularExpression s_regex(
679
llvm::StringRef("[ \t]*([^ ^\t]+)[ \t]*([^ ^\t].*)?"));
680
681
llvm::SmallVector<llvm::StringRef, 4> matches;
682
if (s_regex.Execute(out_string, &matches)) {
683
m_opcode_name = matches[1].str();
684
m_mnemonics = matches[2].str();
685
}
686
matches.clear();
687
if (s_regex.Execute(markup_out_string, &matches)) {
688
m_markup_opcode_name = matches[1].str();
689
m_markup_mnemonics = matches[2].str();
690
}
691
}
692
}
693
}
694
695
bool IsValid() const { return m_is_valid; }
696
697
bool UsingFileAddress() const { return m_using_file_addr; }
698
size_t GetByteSize() const { return m_opcode.GetByteSize(); }
699
700
/// Grants exclusive access to the disassembler and initializes it with the
701
/// given InstructionLLVMC and an optional ExecutionContext.
702
class DisassemblerScope {
703
std::shared_ptr<DisassemblerLLVMC> m_disasm;
704
705
public:
706
explicit DisassemblerScope(
707
InstructionLLVMC &i,
708
const lldb_private::ExecutionContext *exe_ctx = nullptr)
709
: m_disasm(i.m_disasm_wp.lock()) {
710
m_disasm->m_mutex.lock();
711
m_disasm->m_inst = &i;
712
m_disasm->m_exe_ctx = exe_ctx;
713
}
714
~DisassemblerScope() { m_disasm->m_mutex.unlock(); }
715
716
/// Evaluates to true if this scope contains a valid disassembler.
717
operator bool() const { return static_cast<bool>(m_disasm); }
718
719
std::shared_ptr<DisassemblerLLVMC> operator->() { return m_disasm; }
720
};
721
722
static llvm::StringRef::const_iterator
723
ConsumeWhitespace(llvm::StringRef::const_iterator osi,
724
llvm::StringRef::const_iterator ose) {
725
while (osi != ose) {
726
switch (*osi) {
727
default:
728
return osi;
729
case ' ':
730
case '\t':
731
break;
732
}
733
++osi;
734
}
735
736
return osi;
737
}
738
739
static std::pair<bool, llvm::StringRef::const_iterator>
740
ConsumeChar(llvm::StringRef::const_iterator osi, const char c,
741
llvm::StringRef::const_iterator ose) {
742
bool found = false;
743
744
osi = ConsumeWhitespace(osi, ose);
745
if (osi != ose && *osi == c) {
746
found = true;
747
++osi;
748
}
749
750
return std::make_pair(found, osi);
751
}
752
753
static std::pair<Operand, llvm::StringRef::const_iterator>
754
ParseRegisterName(llvm::StringRef::const_iterator osi,
755
llvm::StringRef::const_iterator ose) {
756
Operand ret;
757
ret.m_type = Operand::Type::Register;
758
std::string str;
759
760
osi = ConsumeWhitespace(osi, ose);
761
762
while (osi != ose) {
763
if (*osi >= '0' && *osi <= '9') {
764
if (str.empty()) {
765
return std::make_pair(Operand(), osi);
766
} else {
767
str.push_back(*osi);
768
}
769
} else if (*osi >= 'a' && *osi <= 'z') {
770
str.push_back(*osi);
771
} else {
772
switch (*osi) {
773
default:
774
if (str.empty()) {
775
return std::make_pair(Operand(), osi);
776
} else {
777
ret.m_register = ConstString(str);
778
return std::make_pair(ret, osi);
779
}
780
case '%':
781
if (!str.empty()) {
782
return std::make_pair(Operand(), osi);
783
}
784
break;
785
}
786
}
787
++osi;
788
}
789
790
ret.m_register = ConstString(str);
791
return std::make_pair(ret, osi);
792
}
793
794
static std::pair<Operand, llvm::StringRef::const_iterator>
795
ParseImmediate(llvm::StringRef::const_iterator osi,
796
llvm::StringRef::const_iterator ose) {
797
Operand ret;
798
ret.m_type = Operand::Type::Immediate;
799
std::string str;
800
bool is_hex = false;
801
802
osi = ConsumeWhitespace(osi, ose);
803
804
while (osi != ose) {
805
if (*osi >= '0' && *osi <= '9') {
806
str.push_back(*osi);
807
} else if (*osi >= 'a' && *osi <= 'f') {
808
if (is_hex) {
809
str.push_back(*osi);
810
} else {
811
return std::make_pair(Operand(), osi);
812
}
813
} else {
814
switch (*osi) {
815
default:
816
if (str.empty()) {
817
return std::make_pair(Operand(), osi);
818
} else {
819
ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
820
return std::make_pair(ret, osi);
821
}
822
case 'x':
823
if (!str.compare("0")) {
824
is_hex = true;
825
str.push_back(*osi);
826
} else {
827
return std::make_pair(Operand(), osi);
828
}
829
break;
830
case '#':
831
case '$':
832
if (!str.empty()) {
833
return std::make_pair(Operand(), osi);
834
}
835
break;
836
case '-':
837
if (str.empty()) {
838
ret.m_negative = true;
839
} else {
840
return std::make_pair(Operand(), osi);
841
}
842
}
843
}
844
++osi;
845
}
846
847
ret.m_immediate = strtoull(str.c_str(), nullptr, 0);
848
return std::make_pair(ret, osi);
849
}
850
851
// -0x5(%rax,%rax,2)
852
static std::pair<Operand, llvm::StringRef::const_iterator>
853
ParseIntelIndexedAccess(llvm::StringRef::const_iterator osi,
854
llvm::StringRef::const_iterator ose) {
855
std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
856
ParseImmediate(osi, ose);
857
if (offset_and_iterator.first.IsValid()) {
858
osi = offset_and_iterator.second;
859
}
860
861
bool found = false;
862
std::tie(found, osi) = ConsumeChar(osi, '(', ose);
863
if (!found) {
864
return std::make_pair(Operand(), osi);
865
}
866
867
std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
868
ParseRegisterName(osi, ose);
869
if (base_and_iterator.first.IsValid()) {
870
osi = base_and_iterator.second;
871
} else {
872
return std::make_pair(Operand(), osi);
873
}
874
875
std::tie(found, osi) = ConsumeChar(osi, ',', ose);
876
if (!found) {
877
return std::make_pair(Operand(), osi);
878
}
879
880
std::pair<Operand, llvm::StringRef::const_iterator> index_and_iterator =
881
ParseRegisterName(osi, ose);
882
if (index_and_iterator.first.IsValid()) {
883
osi = index_and_iterator.second;
884
} else {
885
return std::make_pair(Operand(), osi);
886
}
887
888
std::tie(found, osi) = ConsumeChar(osi, ',', ose);
889
if (!found) {
890
return std::make_pair(Operand(), osi);
891
}
892
893
std::pair<Operand, llvm::StringRef::const_iterator>
894
multiplier_and_iterator = ParseImmediate(osi, ose);
895
if (index_and_iterator.first.IsValid()) {
896
osi = index_and_iterator.second;
897
} else {
898
return std::make_pair(Operand(), osi);
899
}
900
901
std::tie(found, osi) = ConsumeChar(osi, ')', ose);
902
if (!found) {
903
return std::make_pair(Operand(), osi);
904
}
905
906
Operand product;
907
product.m_type = Operand::Type::Product;
908
product.m_children.push_back(index_and_iterator.first);
909
product.m_children.push_back(multiplier_and_iterator.first);
910
911
Operand index;
912
index.m_type = Operand::Type::Sum;
913
index.m_children.push_back(base_and_iterator.first);
914
index.m_children.push_back(product);
915
916
if (offset_and_iterator.first.IsValid()) {
917
Operand offset;
918
offset.m_type = Operand::Type::Sum;
919
offset.m_children.push_back(offset_and_iterator.first);
920
offset.m_children.push_back(index);
921
922
Operand deref;
923
deref.m_type = Operand::Type::Dereference;
924
deref.m_children.push_back(offset);
925
return std::make_pair(deref, osi);
926
} else {
927
Operand deref;
928
deref.m_type = Operand::Type::Dereference;
929
deref.m_children.push_back(index);
930
return std::make_pair(deref, osi);
931
}
932
}
933
934
// -0x10(%rbp)
935
static std::pair<Operand, llvm::StringRef::const_iterator>
936
ParseIntelDerefAccess(llvm::StringRef::const_iterator osi,
937
llvm::StringRef::const_iterator ose) {
938
std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
939
ParseImmediate(osi, ose);
940
if (offset_and_iterator.first.IsValid()) {
941
osi = offset_and_iterator.second;
942
}
943
944
bool found = false;
945
std::tie(found, osi) = ConsumeChar(osi, '(', ose);
946
if (!found) {
947
return std::make_pair(Operand(), osi);
948
}
949
950
std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
951
ParseRegisterName(osi, ose);
952
if (base_and_iterator.first.IsValid()) {
953
osi = base_and_iterator.second;
954
} else {
955
return std::make_pair(Operand(), osi);
956
}
957
958
std::tie(found, osi) = ConsumeChar(osi, ')', ose);
959
if (!found) {
960
return std::make_pair(Operand(), osi);
961
}
962
963
if (offset_and_iterator.first.IsValid()) {
964
Operand offset;
965
offset.m_type = Operand::Type::Sum;
966
offset.m_children.push_back(offset_and_iterator.first);
967
offset.m_children.push_back(base_and_iterator.first);
968
969
Operand deref;
970
deref.m_type = Operand::Type::Dereference;
971
deref.m_children.push_back(offset);
972
return std::make_pair(deref, osi);
973
} else {
974
Operand deref;
975
deref.m_type = Operand::Type::Dereference;
976
deref.m_children.push_back(base_and_iterator.first);
977
return std::make_pair(deref, osi);
978
}
979
}
980
981
// [sp, #8]!
982
static std::pair<Operand, llvm::StringRef::const_iterator>
983
ParseARMOffsetAccess(llvm::StringRef::const_iterator osi,
984
llvm::StringRef::const_iterator ose) {
985
bool found = false;
986
std::tie(found, osi) = ConsumeChar(osi, '[', ose);
987
if (!found) {
988
return std::make_pair(Operand(), osi);
989
}
990
991
std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
992
ParseRegisterName(osi, ose);
993
if (base_and_iterator.first.IsValid()) {
994
osi = base_and_iterator.second;
995
} else {
996
return std::make_pair(Operand(), osi);
997
}
998
999
std::tie(found, osi) = ConsumeChar(osi, ',', ose);
1000
if (!found) {
1001
return std::make_pair(Operand(), osi);
1002
}
1003
1004
std::pair<Operand, llvm::StringRef::const_iterator> offset_and_iterator =
1005
ParseImmediate(osi, ose);
1006
if (offset_and_iterator.first.IsValid()) {
1007
osi = offset_and_iterator.second;
1008
}
1009
1010
std::tie(found, osi) = ConsumeChar(osi, ']', ose);
1011
if (!found) {
1012
return std::make_pair(Operand(), osi);
1013
}
1014
1015
Operand offset;
1016
offset.m_type = Operand::Type::Sum;
1017
offset.m_children.push_back(offset_and_iterator.first);
1018
offset.m_children.push_back(base_and_iterator.first);
1019
1020
Operand deref;
1021
deref.m_type = Operand::Type::Dereference;
1022
deref.m_children.push_back(offset);
1023
return std::make_pair(deref, osi);
1024
}
1025
1026
// [sp]
1027
static std::pair<Operand, llvm::StringRef::const_iterator>
1028
ParseARMDerefAccess(llvm::StringRef::const_iterator osi,
1029
llvm::StringRef::const_iterator ose) {
1030
bool found = false;
1031
std::tie(found, osi) = ConsumeChar(osi, '[', ose);
1032
if (!found) {
1033
return std::make_pair(Operand(), osi);
1034
}
1035
1036
std::pair<Operand, llvm::StringRef::const_iterator> base_and_iterator =
1037
ParseRegisterName(osi, ose);
1038
if (base_and_iterator.first.IsValid()) {
1039
osi = base_and_iterator.second;
1040
} else {
1041
return std::make_pair(Operand(), osi);
1042
}
1043
1044
std::tie(found, osi) = ConsumeChar(osi, ']', ose);
1045
if (!found) {
1046
return std::make_pair(Operand(), osi);
1047
}
1048
1049
Operand deref;
1050
deref.m_type = Operand::Type::Dereference;
1051
deref.m_children.push_back(base_and_iterator.first);
1052
return std::make_pair(deref, osi);
1053
}
1054
1055
static void DumpOperand(const Operand &op, Stream &s) {
1056
switch (op.m_type) {
1057
case Operand::Type::Dereference:
1058
s.PutCString("*");
1059
DumpOperand(op.m_children[0], s);
1060
break;
1061
case Operand::Type::Immediate:
1062
if (op.m_negative) {
1063
s.PutCString("-");
1064
}
1065
s.PutCString(llvm::to_string(op.m_immediate));
1066
break;
1067
case Operand::Type::Invalid:
1068
s.PutCString("Invalid");
1069
break;
1070
case Operand::Type::Product:
1071
s.PutCString("(");
1072
DumpOperand(op.m_children[0], s);
1073
s.PutCString("*");
1074
DumpOperand(op.m_children[1], s);
1075
s.PutCString(")");
1076
break;
1077
case Operand::Type::Register:
1078
s.PutCString(op.m_register.GetStringRef());
1079
break;
1080
case Operand::Type::Sum:
1081
s.PutCString("(");
1082
DumpOperand(op.m_children[0], s);
1083
s.PutCString("+");
1084
DumpOperand(op.m_children[1], s);
1085
s.PutCString(")");
1086
break;
1087
}
1088
}
1089
1090
bool ParseOperands(
1091
llvm::SmallVectorImpl<Instruction::Operand> &operands) override {
1092
const char *operands_string = GetOperands(nullptr);
1093
1094
if (!operands_string) {
1095
return false;
1096
}
1097
1098
llvm::StringRef operands_ref(operands_string);
1099
1100
llvm::StringRef::const_iterator osi = operands_ref.begin();
1101
llvm::StringRef::const_iterator ose = operands_ref.end();
1102
1103
while (osi != ose) {
1104
Operand operand;
1105
llvm::StringRef::const_iterator iter;
1106
1107
if ((std::tie(operand, iter) = ParseIntelIndexedAccess(osi, ose),
1108
operand.IsValid()) ||
1109
(std::tie(operand, iter) = ParseIntelDerefAccess(osi, ose),
1110
operand.IsValid()) ||
1111
(std::tie(operand, iter) = ParseARMOffsetAccess(osi, ose),
1112
operand.IsValid()) ||
1113
(std::tie(operand, iter) = ParseARMDerefAccess(osi, ose),
1114
operand.IsValid()) ||
1115
(std::tie(operand, iter) = ParseRegisterName(osi, ose),
1116
operand.IsValid()) ||
1117
(std::tie(operand, iter) = ParseImmediate(osi, ose),
1118
operand.IsValid())) {
1119
osi = iter;
1120
operands.push_back(operand);
1121
} else {
1122
return false;
1123
}
1124
1125
std::pair<bool, llvm::StringRef::const_iterator> found_and_iter =
1126
ConsumeChar(osi, ',', ose);
1127
if (found_and_iter.first) {
1128
osi = found_and_iter.second;
1129
}
1130
1131
osi = ConsumeWhitespace(osi, ose);
1132
}
1133
1134
DisassemblerSP disasm_sp = m_disasm_wp.lock();
1135
1136
if (disasm_sp && operands.size() > 1) {
1137
// TODO tie this into the MC Disassembler's notion of clobbers.
1138
switch (disasm_sp->GetArchitecture().GetMachine()) {
1139
default:
1140
break;
1141
case llvm::Triple::x86:
1142
case llvm::Triple::x86_64:
1143
operands[operands.size() - 1].m_clobbered = true;
1144
break;
1145
case llvm::Triple::arm:
1146
operands[0].m_clobbered = true;
1147
break;
1148
}
1149
}
1150
1151
if (Log *log = GetLog(LLDBLog::Process)) {
1152
StreamString ss;
1153
1154
ss.Printf("[%s] expands to %zu operands:\n", operands_string,
1155
operands.size());
1156
for (const Operand &operand : operands) {
1157
ss.PutCString(" ");
1158
DumpOperand(operand, ss);
1159
ss.PutCString("\n");
1160
}
1161
1162
log->PutString(ss.GetString());
1163
}
1164
1165
return true;
1166
}
1167
1168
bool IsCall() override {
1169
VisitInstruction();
1170
return m_is_call;
1171
}
1172
1173
protected:
1174
std::weak_ptr<DisassemblerLLVMC> m_disasm_wp;
1175
1176
bool m_is_valid = false;
1177
bool m_using_file_addr = false;
1178
bool m_has_visited_instruction = false;
1179
1180
// Be conservative. If we didn't understand the instruction, say it:
1181
// - Might branch
1182
// - Does not have a delay slot
1183
// - Is not a call
1184
// - Is not a load
1185
// - Is not an authenticated instruction
1186
bool m_does_branch = true;
1187
bool m_has_delay_slot = false;
1188
bool m_is_call = false;
1189
bool m_is_load = false;
1190
bool m_is_authenticated = false;
1191
1192
void VisitInstruction() {
1193
if (m_has_visited_instruction)
1194
return;
1195
1196
DisassemblerScope disasm(*this);
1197
if (!disasm)
1198
return;
1199
1200
DataExtractor data;
1201
if (!m_opcode.GetData(data))
1202
return;
1203
1204
bool is_alternate_isa;
1205
lldb::addr_t pc = m_address.GetFileAddress();
1206
DisassemblerLLVMC::MCDisasmInstance *mc_disasm_ptr =
1207
GetDisasmToUse(is_alternate_isa, disasm);
1208
const uint8_t *opcode_data = data.GetDataStart();
1209
const size_t opcode_data_len = data.GetByteSize();
1210
llvm::MCInst inst;
1211
const size_t inst_size =
1212
mc_disasm_ptr->GetMCInst(opcode_data, opcode_data_len, pc, inst);
1213
if (inst_size == 0)
1214
return;
1215
1216
m_has_visited_instruction = true;
1217
m_does_branch = mc_disasm_ptr->CanBranch(inst);
1218
m_has_delay_slot = mc_disasm_ptr->HasDelaySlot(inst);
1219
m_is_call = mc_disasm_ptr->IsCall(inst);
1220
m_is_load = mc_disasm_ptr->IsLoad(inst);
1221
m_is_authenticated = mc_disasm_ptr->IsAuthenticated(inst);
1222
}
1223
1224
private:
1225
DisassemblerLLVMC::MCDisasmInstance *
1226
GetDisasmToUse(bool &is_alternate_isa, DisassemblerScope &disasm) {
1227
is_alternate_isa = false;
1228
if (disasm) {
1229
if (disasm->m_alternate_disasm_up) {
1230
const AddressClass address_class = GetAddressClass();
1231
1232
if (address_class == AddressClass::eCodeAlternateISA) {
1233
is_alternate_isa = true;
1234
return disasm->m_alternate_disasm_up.get();
1235
}
1236
}
1237
return disasm->m_disasm_up.get();
1238
}
1239
return nullptr;
1240
}
1241
};
1242
1243
std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>
1244
DisassemblerLLVMC::MCDisasmInstance::Create(const char *triple, const char *cpu,
1245
const char *features_str,
1246
unsigned flavor,
1247
DisassemblerLLVMC &owner) {
1248
using Instance = std::unique_ptr<DisassemblerLLVMC::MCDisasmInstance>;
1249
1250
std::string Status;
1251
const llvm::Target *curr_target =
1252
llvm::TargetRegistry::lookupTarget(triple, Status);
1253
if (!curr_target)
1254
return Instance();
1255
1256
std::unique_ptr<llvm::MCInstrInfo> instr_info_up(
1257
curr_target->createMCInstrInfo());
1258
if (!instr_info_up)
1259
return Instance();
1260
1261
std::unique_ptr<llvm::MCRegisterInfo> reg_info_up(
1262
curr_target->createMCRegInfo(triple));
1263
if (!reg_info_up)
1264
return Instance();
1265
1266
std::unique_ptr<llvm::MCSubtargetInfo> subtarget_info_up(
1267
curr_target->createMCSubtargetInfo(triple, cpu, features_str));
1268
if (!subtarget_info_up)
1269
return Instance();
1270
1271
llvm::MCTargetOptions MCOptions;
1272
std::unique_ptr<llvm::MCAsmInfo> asm_info_up(
1273
curr_target->createMCAsmInfo(*reg_info_up, triple, MCOptions));
1274
if (!asm_info_up)
1275
return Instance();
1276
1277
std::unique_ptr<llvm::MCContext> context_up(
1278
new llvm::MCContext(llvm::Triple(triple), asm_info_up.get(),
1279
reg_info_up.get(), subtarget_info_up.get()));
1280
if (!context_up)
1281
return Instance();
1282
1283
std::unique_ptr<llvm::MCDisassembler> disasm_up(
1284
curr_target->createMCDisassembler(*subtarget_info_up, *context_up));
1285
if (!disasm_up)
1286
return Instance();
1287
1288
std::unique_ptr<llvm::MCRelocationInfo> rel_info_up(
1289
curr_target->createMCRelocationInfo(triple, *context_up));
1290
if (!rel_info_up)
1291
return Instance();
1292
1293
std::unique_ptr<llvm::MCSymbolizer> symbolizer_up(
1294
curr_target->createMCSymbolizer(
1295
triple, nullptr, DisassemblerLLVMC::SymbolLookupCallback, &owner,
1296
context_up.get(), std::move(rel_info_up)));
1297
disasm_up->setSymbolizer(std::move(symbolizer_up));
1298
1299
unsigned asm_printer_variant =
1300
flavor == ~0U ? asm_info_up->getAssemblerDialect() : flavor;
1301
1302
std::unique_ptr<llvm::MCInstPrinter> instr_printer_up(
1303
curr_target->createMCInstPrinter(llvm::Triple{triple},
1304
asm_printer_variant, *asm_info_up,
1305
*instr_info_up, *reg_info_up));
1306
if (!instr_printer_up)
1307
return Instance();
1308
1309
instr_printer_up->setPrintBranchImmAsAddress(true);
1310
1311
// Not all targets may have registered createMCInstrAnalysis().
1312
std::unique_ptr<llvm::MCInstrAnalysis> instr_analysis_up(
1313
curr_target->createMCInstrAnalysis(instr_info_up.get()));
1314
1315
return Instance(new MCDisasmInstance(
1316
std::move(instr_info_up), std::move(reg_info_up),
1317
std::move(subtarget_info_up), std::move(asm_info_up),
1318
std::move(context_up), std::move(disasm_up), std::move(instr_printer_up),
1319
std::move(instr_analysis_up)));
1320
}
1321
1322
DisassemblerLLVMC::MCDisasmInstance::MCDisasmInstance(
1323
std::unique_ptr<llvm::MCInstrInfo> &&instr_info_up,
1324
std::unique_ptr<llvm::MCRegisterInfo> &&reg_info_up,
1325
std::unique_ptr<llvm::MCSubtargetInfo> &&subtarget_info_up,
1326
std::unique_ptr<llvm::MCAsmInfo> &&asm_info_up,
1327
std::unique_ptr<llvm::MCContext> &&context_up,
1328
std::unique_ptr<llvm::MCDisassembler> &&disasm_up,
1329
std::unique_ptr<llvm::MCInstPrinter> &&instr_printer_up,
1330
std::unique_ptr<llvm::MCInstrAnalysis> &&instr_analysis_up)
1331
: m_instr_info_up(std::move(instr_info_up)),
1332
m_reg_info_up(std::move(reg_info_up)),
1333
m_subtarget_info_up(std::move(subtarget_info_up)),
1334
m_asm_info_up(std::move(asm_info_up)),
1335
m_context_up(std::move(context_up)), m_disasm_up(std::move(disasm_up)),
1336
m_instr_printer_up(std::move(instr_printer_up)),
1337
m_instr_analysis_up(std::move(instr_analysis_up)) {
1338
assert(m_instr_info_up && m_reg_info_up && m_subtarget_info_up &&
1339
m_asm_info_up && m_context_up && m_disasm_up && m_instr_printer_up);
1340
}
1341
1342
uint64_t DisassemblerLLVMC::MCDisasmInstance::GetMCInst(
1343
const uint8_t *opcode_data, size_t opcode_data_len, lldb::addr_t pc,
1344
llvm::MCInst &mc_inst) const {
1345
llvm::ArrayRef<uint8_t> data(opcode_data, opcode_data_len);
1346
llvm::MCDisassembler::DecodeStatus status;
1347
1348
uint64_t new_inst_size;
1349
status = m_disasm_up->getInstruction(mc_inst, new_inst_size, data, pc,
1350
llvm::nulls());
1351
if (status == llvm::MCDisassembler::Success)
1352
return new_inst_size;
1353
else
1354
return 0;
1355
}
1356
1357
void DisassemblerLLVMC::MCDisasmInstance::PrintMCInst(
1358
llvm::MCInst &mc_inst, lldb::addr_t pc, std::string &inst_string,
1359
std::string &comments_string) {
1360
llvm::raw_string_ostream inst_stream(inst_string);
1361
llvm::raw_string_ostream comments_stream(comments_string);
1362
1363
inst_stream.enable_colors(m_instr_printer_up->getUseColor());
1364
m_instr_printer_up->setCommentStream(comments_stream);
1365
m_instr_printer_up->printInst(&mc_inst, pc, llvm::StringRef(),
1366
*m_subtarget_info_up, inst_stream);
1367
m_instr_printer_up->setCommentStream(llvm::nulls());
1368
1369
comments_stream.flush();
1370
1371
static std::string g_newlines("\r\n");
1372
1373
for (size_t newline_pos = 0;
1374
(newline_pos = comments_string.find_first_of(g_newlines, newline_pos)) !=
1375
comments_string.npos;
1376
/**/) {
1377
comments_string.replace(comments_string.begin() + newline_pos,
1378
comments_string.begin() + newline_pos + 1, 1, ' ');
1379
}
1380
}
1381
1382
void DisassemblerLLVMC::MCDisasmInstance::SetStyle(
1383
bool use_hex_immed, HexImmediateStyle hex_style) {
1384
m_instr_printer_up->setPrintImmHex(use_hex_immed);
1385
switch (hex_style) {
1386
case eHexStyleC:
1387
m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::C);
1388
break;
1389
case eHexStyleAsm:
1390
m_instr_printer_up->setPrintHexStyle(llvm::HexStyle::Asm);
1391
break;
1392
}
1393
}
1394
1395
void DisassemblerLLVMC::MCDisasmInstance::SetUseColor(bool use_color) {
1396
m_instr_printer_up->setUseColor(use_color);
1397
}
1398
1399
bool DisassemblerLLVMC::MCDisasmInstance::GetUseColor() const {
1400
return m_instr_printer_up->getUseColor();
1401
}
1402
1403
bool DisassemblerLLVMC::MCDisasmInstance::CanBranch(
1404
llvm::MCInst &mc_inst) const {
1405
if (m_instr_analysis_up)
1406
return m_instr_analysis_up->mayAffectControlFlow(mc_inst, *m_reg_info_up);
1407
return m_instr_info_up->get(mc_inst.getOpcode())
1408
.mayAffectControlFlow(mc_inst, *m_reg_info_up);
1409
}
1410
1411
bool DisassemblerLLVMC::MCDisasmInstance::HasDelaySlot(
1412
llvm::MCInst &mc_inst) const {
1413
return m_instr_info_up->get(mc_inst.getOpcode()).hasDelaySlot();
1414
}
1415
1416
bool DisassemblerLLVMC::MCDisasmInstance::IsCall(llvm::MCInst &mc_inst) const {
1417
if (m_instr_analysis_up)
1418
return m_instr_analysis_up->isCall(mc_inst);
1419
return m_instr_info_up->get(mc_inst.getOpcode()).isCall();
1420
}
1421
1422
bool DisassemblerLLVMC::MCDisasmInstance::IsLoad(llvm::MCInst &mc_inst) const {
1423
return m_instr_info_up->get(mc_inst.getOpcode()).mayLoad();
1424
}
1425
1426
bool DisassemblerLLVMC::MCDisasmInstance::IsAuthenticated(
1427
llvm::MCInst &mc_inst) const {
1428
const auto &InstrDesc = m_instr_info_up->get(mc_inst.getOpcode());
1429
1430
// Treat software auth traps (brk 0xc470 + aut key, where 0x70 == 'p', 0xc4
1431
// == 'a' + 'c') as authenticated instructions for reporting purposes, in
1432
// addition to the standard authenticated instructions specified in ARMv8.3.
1433
bool IsBrkC47x = false;
1434
if (InstrDesc.isTrap() && mc_inst.getNumOperands() == 1) {
1435
const llvm::MCOperand &Op0 = mc_inst.getOperand(0);
1436
if (Op0.isImm() && Op0.getImm() >= 0xc470 && Op0.getImm() <= 0xc474)
1437
IsBrkC47x = true;
1438
}
1439
1440
return InstrDesc.isAuthenticated() || IsBrkC47x;
1441
}
1442
1443
DisassemblerLLVMC::DisassemblerLLVMC(const ArchSpec &arch,
1444
const char *flavor_string)
1445
: Disassembler(arch, flavor_string), m_exe_ctx(nullptr), m_inst(nullptr),
1446
m_data_from_file(false), m_adrp_address(LLDB_INVALID_ADDRESS),
1447
m_adrp_insn() {
1448
if (!FlavorValidForArchSpec(arch, m_flavor.c_str())) {
1449
m_flavor.assign("default");
1450
}
1451
1452
unsigned flavor = ~0U;
1453
llvm::Triple triple = arch.GetTriple();
1454
1455
// So far the only supported flavor is "intel" on x86. The base class will
1456
// set this correctly coming in.
1457
if (triple.getArch() == llvm::Triple::x86 ||
1458
triple.getArch() == llvm::Triple::x86_64) {
1459
if (m_flavor == "intel") {
1460
flavor = 1;
1461
} else if (m_flavor == "att") {
1462
flavor = 0;
1463
}
1464
}
1465
1466
ArchSpec thumb_arch(arch);
1467
if (triple.getArch() == llvm::Triple::arm) {
1468
std::string thumb_arch_name(thumb_arch.GetTriple().getArchName().str());
1469
// Replace "arm" with "thumb" so we get all thumb variants correct
1470
if (thumb_arch_name.size() > 3) {
1471
thumb_arch_name.erase(0, 3);
1472
thumb_arch_name.insert(0, "thumb");
1473
} else {
1474
thumb_arch_name = "thumbv9.3a";
1475
}
1476
thumb_arch.GetTriple().setArchName(llvm::StringRef(thumb_arch_name));
1477
}
1478
1479
// If no sub architecture specified then use the most recent arm architecture
1480
// so the disassembler will return all instructions. Without it we will see a
1481
// lot of unknown opcodes if the code uses instructions which are not
1482
// available in the oldest arm version (which is used when no sub architecture
1483
// is specified).
1484
if (triple.getArch() == llvm::Triple::arm &&
1485
triple.getSubArch() == llvm::Triple::NoSubArch)
1486
triple.setArchName("armv9.3a");
1487
1488
std::string features_str;
1489
const char *triple_str = triple.getTriple().c_str();
1490
1491
// ARM Cortex M0-M7 devices only execute thumb instructions
1492
if (arch.IsAlwaysThumbInstructions()) {
1493
triple_str = thumb_arch.GetTriple().getTriple().c_str();
1494
features_str += "+fp-armv8,";
1495
}
1496
1497
const char *cpu = "";
1498
1499
switch (arch.GetCore()) {
1500
case ArchSpec::eCore_mips32:
1501
case ArchSpec::eCore_mips32el:
1502
cpu = "mips32";
1503
break;
1504
case ArchSpec::eCore_mips32r2:
1505
case ArchSpec::eCore_mips32r2el:
1506
cpu = "mips32r2";
1507
break;
1508
case ArchSpec::eCore_mips32r3:
1509
case ArchSpec::eCore_mips32r3el:
1510
cpu = "mips32r3";
1511
break;
1512
case ArchSpec::eCore_mips32r5:
1513
case ArchSpec::eCore_mips32r5el:
1514
cpu = "mips32r5";
1515
break;
1516
case ArchSpec::eCore_mips32r6:
1517
case ArchSpec::eCore_mips32r6el:
1518
cpu = "mips32r6";
1519
break;
1520
case ArchSpec::eCore_mips64:
1521
case ArchSpec::eCore_mips64el:
1522
cpu = "mips64";
1523
break;
1524
case ArchSpec::eCore_mips64r2:
1525
case ArchSpec::eCore_mips64r2el:
1526
cpu = "mips64r2";
1527
break;
1528
case ArchSpec::eCore_mips64r3:
1529
case ArchSpec::eCore_mips64r3el:
1530
cpu = "mips64r3";
1531
break;
1532
case ArchSpec::eCore_mips64r5:
1533
case ArchSpec::eCore_mips64r5el:
1534
cpu = "mips64r5";
1535
break;
1536
case ArchSpec::eCore_mips64r6:
1537
case ArchSpec::eCore_mips64r6el:
1538
cpu = "mips64r6";
1539
break;
1540
default:
1541
cpu = "";
1542
break;
1543
}
1544
1545
if (arch.IsMIPS()) {
1546
uint32_t arch_flags = arch.GetFlags();
1547
if (arch_flags & ArchSpec::eMIPSAse_msa)
1548
features_str += "+msa,";
1549
if (arch_flags & ArchSpec::eMIPSAse_dsp)
1550
features_str += "+dsp,";
1551
if (arch_flags & ArchSpec::eMIPSAse_dspr2)
1552
features_str += "+dspr2,";
1553
}
1554
1555
// If any AArch64 variant, enable latest ISA with all extensions.
1556
if (triple.isAArch64()) {
1557
features_str += "+all,";
1558
1559
if (triple.getVendor() == llvm::Triple::Apple)
1560
cpu = "apple-latest";
1561
}
1562
1563
if (triple.isRISCV()) {
1564
uint32_t arch_flags = arch.GetFlags();
1565
if (arch_flags & ArchSpec::eRISCV_rvc)
1566
features_str += "+c,";
1567
if (arch_flags & ArchSpec::eRISCV_rve)
1568
features_str += "+e,";
1569
if ((arch_flags & ArchSpec::eRISCV_float_abi_single) ==
1570
ArchSpec::eRISCV_float_abi_single)
1571
features_str += "+f,";
1572
if ((arch_flags & ArchSpec::eRISCV_float_abi_double) ==
1573
ArchSpec::eRISCV_float_abi_double)
1574
features_str += "+f,+d,";
1575
if ((arch_flags & ArchSpec::eRISCV_float_abi_quad) ==
1576
ArchSpec::eRISCV_float_abi_quad)
1577
features_str += "+f,+d,+q,";
1578
// FIXME: how do we detect features such as `+a`, `+m`?
1579
// Turn them on by default now, since everyone seems to use them
1580
features_str += "+a,+m,";
1581
}
1582
1583
// We use m_disasm_up.get() to tell whether we are valid or not, so if this
1584
// isn't good for some reason, we won't be valid and FindPlugin will fail and
1585
// we won't get used.
1586
m_disasm_up = MCDisasmInstance::Create(triple_str, cpu, features_str.c_str(),
1587
flavor, *this);
1588
1589
llvm::Triple::ArchType llvm_arch = triple.getArch();
1590
1591
// For arm CPUs that can execute arm or thumb instructions, also create a
1592
// thumb instruction disassembler.
1593
if (llvm_arch == llvm::Triple::arm) {
1594
std::string thumb_triple(thumb_arch.GetTriple().getTriple());
1595
m_alternate_disasm_up =
1596
MCDisasmInstance::Create(thumb_triple.c_str(), "", features_str.c_str(),
1597
flavor, *this);
1598
if (!m_alternate_disasm_up)
1599
m_disasm_up.reset();
1600
1601
} else if (arch.IsMIPS()) {
1602
/* Create alternate disassembler for MIPS16 and microMIPS */
1603
uint32_t arch_flags = arch.GetFlags();
1604
if (arch_flags & ArchSpec::eMIPSAse_mips16)
1605
features_str += "+mips16,";
1606
else if (arch_flags & ArchSpec::eMIPSAse_micromips)
1607
features_str += "+micromips,";
1608
1609
m_alternate_disasm_up = MCDisasmInstance::Create(
1610
triple_str, cpu, features_str.c_str(), flavor, *this);
1611
if (!m_alternate_disasm_up)
1612
m_disasm_up.reset();
1613
}
1614
}
1615
1616
DisassemblerLLVMC::~DisassemblerLLVMC() = default;
1617
1618
lldb::DisassemblerSP DisassemblerLLVMC::CreateInstance(const ArchSpec &arch,
1619
const char *flavor) {
1620
if (arch.GetTriple().getArch() != llvm::Triple::UnknownArch) {
1621
auto disasm_sp = std::make_shared<DisassemblerLLVMC>(arch, flavor);
1622
if (disasm_sp && disasm_sp->IsValid())
1623
return disasm_sp;
1624
}
1625
return lldb::DisassemblerSP();
1626
}
1627
1628
size_t DisassemblerLLVMC::DecodeInstructions(const Address &base_addr,
1629
const DataExtractor &data,
1630
lldb::offset_t data_offset,
1631
size_t num_instructions,
1632
bool append, bool data_from_file) {
1633
if (!append)
1634
m_instruction_list.Clear();
1635
1636
if (!IsValid())
1637
return 0;
1638
1639
m_data_from_file = data_from_file;
1640
uint32_t data_cursor = data_offset;
1641
const size_t data_byte_size = data.GetByteSize();
1642
uint32_t instructions_parsed = 0;
1643
Address inst_addr(base_addr);
1644
1645
while (data_cursor < data_byte_size &&
1646
instructions_parsed < num_instructions) {
1647
1648
AddressClass address_class = AddressClass::eCode;
1649
1650
if (m_alternate_disasm_up)
1651
address_class = inst_addr.GetAddressClass();
1652
1653
InstructionSP inst_sp(
1654
new InstructionLLVMC(*this, inst_addr, address_class));
1655
1656
if (!inst_sp)
1657
break;
1658
1659
uint32_t inst_size = inst_sp->Decode(*this, data, data_cursor);
1660
1661
if (inst_size == 0)
1662
break;
1663
1664
m_instruction_list.Append(inst_sp);
1665
data_cursor += inst_size;
1666
inst_addr.Slide(inst_size);
1667
instructions_parsed++;
1668
}
1669
1670
return data_cursor - data_offset;
1671
}
1672
1673
void DisassemblerLLVMC::Initialize() {
1674
PluginManager::RegisterPlugin(GetPluginNameStatic(),
1675
"Disassembler that uses LLVM MC to disassemble "
1676
"i386, x86_64, ARM, and ARM64.",
1677
CreateInstance);
1678
1679
llvm::InitializeAllTargetInfos();
1680
llvm::InitializeAllTargetMCs();
1681
llvm::InitializeAllAsmParsers();
1682
llvm::InitializeAllDisassemblers();
1683
}
1684
1685
void DisassemblerLLVMC::Terminate() {
1686
PluginManager::UnregisterPlugin(CreateInstance);
1687
}
1688
1689
int DisassemblerLLVMC::OpInfoCallback(void *disassembler, uint64_t pc,
1690
uint64_t offset, uint64_t size,
1691
int tag_type, void *tag_bug) {
1692
return static_cast<DisassemblerLLVMC *>(disassembler)
1693
->OpInfo(pc, offset, size, tag_type, tag_bug);
1694
}
1695
1696
const char *DisassemblerLLVMC::SymbolLookupCallback(void *disassembler,
1697
uint64_t value,
1698
uint64_t *type, uint64_t pc,
1699
const char **name) {
1700
return static_cast<DisassemblerLLVMC *>(disassembler)
1701
->SymbolLookup(value, type, pc, name);
1702
}
1703
1704
bool DisassemblerLLVMC::FlavorValidForArchSpec(
1705
const lldb_private::ArchSpec &arch, const char *flavor) {
1706
llvm::Triple triple = arch.GetTriple();
1707
if (flavor == nullptr || strcmp(flavor, "default") == 0)
1708
return true;
1709
1710
if (triple.getArch() == llvm::Triple::x86 ||
1711
triple.getArch() == llvm::Triple::x86_64) {
1712
return strcmp(flavor, "intel") == 0 || strcmp(flavor, "att") == 0;
1713
} else
1714
return false;
1715
}
1716
1717
bool DisassemblerLLVMC::IsValid() const { return m_disasm_up.operator bool(); }
1718
1719
int DisassemblerLLVMC::OpInfo(uint64_t PC, uint64_t Offset, uint64_t Size,
1720
int tag_type, void *tag_bug) {
1721
switch (tag_type) {
1722
default:
1723
break;
1724
case 1:
1725
memset(tag_bug, 0, sizeof(::LLVMOpInfo1));
1726
break;
1727
}
1728
return 0;
1729
}
1730
1731
const char *DisassemblerLLVMC::SymbolLookup(uint64_t value, uint64_t *type_ptr,
1732
uint64_t pc, const char **name) {
1733
if (*type_ptr) {
1734
if (m_exe_ctx && m_inst) {
1735
// std::string remove_this_prior_to_checkin;
1736
Target *target = m_exe_ctx ? m_exe_ctx->GetTargetPtr() : nullptr;
1737
Address value_so_addr;
1738
Address pc_so_addr;
1739
if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
1740
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_be ||
1741
target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
1742
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADRP) {
1743
m_adrp_address = pc;
1744
m_adrp_insn = value;
1745
*name = nullptr;
1746
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1747
return nullptr;
1748
}
1749
// If this instruction is an ADD and
1750
// the previous instruction was an ADRP and
1751
// the ADRP's register and this ADD's register are the same,
1752
// then this is a pc-relative address calculation.
1753
if (*type_ptr == LLVMDisassembler_ReferenceType_In_ARM64_ADDXri &&
1754
m_adrp_insn && m_adrp_address == pc - 4 &&
1755
(*m_adrp_insn & 0x1f) == ((value >> 5) & 0x1f)) {
1756
uint32_t addxri_inst;
1757
uint64_t adrp_imm, addxri_imm;
1758
// Get immlo and immhi bits, OR them together to get the ADRP imm
1759
// value.
1760
adrp_imm =
1761
((*m_adrp_insn & 0x00ffffe0) >> 3) | ((*m_adrp_insn >> 29) & 0x3);
1762
// if high bit of immhi after right-shifting set, sign extend
1763
if (adrp_imm & (1ULL << 20))
1764
adrp_imm |= ~((1ULL << 21) - 1);
1765
1766
addxri_inst = value;
1767
addxri_imm = (addxri_inst >> 10) & 0xfff;
1768
// check if 'sh' bit is set, shift imm value up if so
1769
// (this would make no sense, ADRP already gave us this part)
1770
if ((addxri_inst >> (12 + 5 + 5)) & 1)
1771
addxri_imm <<= 12;
1772
value = (m_adrp_address & 0xfffffffffffff000LL) + (adrp_imm << 12) +
1773
addxri_imm;
1774
}
1775
m_adrp_address = LLDB_INVALID_ADDRESS;
1776
m_adrp_insn.reset();
1777
}
1778
1779
if (m_inst->UsingFileAddress()) {
1780
ModuleSP module_sp(m_inst->GetAddress().GetModule());
1781
if (module_sp) {
1782
module_sp->ResolveFileAddress(value, value_so_addr);
1783
module_sp->ResolveFileAddress(pc, pc_so_addr);
1784
}
1785
} else if (target && !target->GetSectionLoadList().IsEmpty()) {
1786
target->GetSectionLoadList().ResolveLoadAddress(value, value_so_addr);
1787
target->GetSectionLoadList().ResolveLoadAddress(pc, pc_so_addr);
1788
}
1789
1790
SymbolContext sym_ctx;
1791
const SymbolContextItem resolve_scope =
1792
eSymbolContextFunction | eSymbolContextSymbol;
1793
if (pc_so_addr.IsValid() && pc_so_addr.GetModule()) {
1794
pc_so_addr.GetModule()->ResolveSymbolContextForAddress(
1795
pc_so_addr, resolve_scope, sym_ctx);
1796
}
1797
1798
if (value_so_addr.IsValid() && value_so_addr.GetSection()) {
1799
StreamString ss;
1800
1801
bool format_omitting_current_func_name = false;
1802
if (sym_ctx.symbol || sym_ctx.function) {
1803
AddressRange range;
1804
if (sym_ctx.GetAddressRange(resolve_scope, 0, false, range) &&
1805
range.GetBaseAddress().IsValid() &&
1806
range.ContainsLoadAddress(value_so_addr, target)) {
1807
format_omitting_current_func_name = true;
1808
}
1809
}
1810
1811
// If the "value" address (the target address we're symbolicating) is
1812
// inside the same SymbolContext as the current instruction pc
1813
// (pc_so_addr), don't print the full function name - just print it
1814
// with DumpStyleNoFunctionName style, e.g. "<+36>".
1815
if (format_omitting_current_func_name) {
1816
value_so_addr.Dump(&ss, target, Address::DumpStyleNoFunctionName,
1817
Address::DumpStyleSectionNameOffset);
1818
} else {
1819
value_so_addr.Dump(
1820
&ss, target,
1821
Address::DumpStyleResolvedDescriptionNoFunctionArguments,
1822
Address::DumpStyleSectionNameOffset);
1823
}
1824
1825
if (!ss.GetString().empty()) {
1826
// If Address::Dump returned a multi-line description, most commonly
1827
// seen when we have multiple levels of inlined functions at an
1828
// address, only show the first line.
1829
std::string str = std::string(ss.GetString());
1830
size_t first_eol_char = str.find_first_of("\r\n");
1831
if (first_eol_char != std::string::npos) {
1832
str.erase(first_eol_char);
1833
}
1834
m_inst->AppendComment(str);
1835
}
1836
}
1837
}
1838
}
1839
1840
// TODO: llvm-objdump sets the type_ptr to the
1841
// LLVMDisassembler_ReferenceType_Out_* values
1842
// based on where value_so_addr is pointing, with
1843
// Mach-O specific augmentations in MachODump.cpp. e.g.
1844
// see what AArch64ExternalSymbolizer::tryAddingSymbolicOperand
1845
// handles.
1846
*type_ptr = LLVMDisassembler_ReferenceType_InOut_None;
1847
*name = nullptr;
1848
return nullptr;
1849
}
1850
1851