CoCalc -- X86Disassembler.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
³⁵²⁹³ views
1
//===-- X86Disassembler.cpp - Disassembler for x86 and x86_64 -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file is part of the X86 Disassembler.
10
// It contains code to translate the data produced by the decoder into
11
//  MCInsts.
12
//
13
//
14
// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
15
// 64-bit X86 instruction sets.  The main decode sequence for an assembly
16
// instruction in this disassembler is:
17
//
18
// 1. Read the prefix bytes and determine the attributes of the instruction.
19
//    These attributes, recorded in enum attributeBits
20
//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
21
//    provides a mapping from bitmasks to contexts, which are represented by
22
//    enum InstructionContext (ibid.).
23
//
24
// 2. Read the opcode, and determine what kind of opcode it is.  The
25
//    disassembler distinguishes four kinds of opcodes, which are enumerated in
26
//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
27
//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
28
//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
29
//
30
// 3. Depending on the opcode type, look in one of four ClassDecision structures
31
//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
32
//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
33
//    a ModRMDecision (ibid.).
34
//
35
// 4. Some instructions, such as escape opcodes or extended opcodes, or even
36
//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
37
//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
38
//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
39
//    ModR/M byte is required and how to interpret it.
40
//
41
// 5. After resolving the ModRMDecision, the disassembler has a unique ID
42
//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
43
//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
44
//    meanings of its operands.
45
//
46
// 6. For each operand, its encoding is an entry from OperandEncoding
47
//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
48
//    OperandType (ibid.).  The encoding indicates how to read it from the
49
//    instruction; the type indicates how to interpret the value once it has
50
//    been read.  For example, a register operand could be stored in the R/M
51
//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
52
//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
53
//    register, for instance).  Given this information, the operands can be
54
//    extracted and interpreted.
55
//
56
// 7. As the last step, the disassembler translates the instruction information
57
//    and operands into a format understandable by the client - in this case, an
58
//    MCInst for use by the MC infrastructure.
59
//
60
// The disassembler is broken broadly into two parts: the table emitter that
61
// emits the instruction decode tables discussed above during compilation, and
62
// the disassembler itself.  The table emitter is documented in more detail in
63
// utils/TableGen/X86DisassemblerEmitter.h.
64
//
65
// X86Disassembler.cpp contains the code responsible for step 7, and for
66
//   invoking the decoder to execute steps 1-6.
67
// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
68
//   table emitter and the disassembler.
69
// X86DisassemblerDecoder.h contains the public interface of the decoder,
70
//   factored out into C for possible use by other projects.
71
// X86DisassemblerDecoder.c contains the source code of the decoder, which is
72
//   responsible for steps 1-6.
73
//
74
//===----------------------------------------------------------------------===//
75

76
#include "MCTargetDesc/X86BaseInfo.h"
77
#include "MCTargetDesc/X86MCTargetDesc.h"
78
#include "TargetInfo/X86TargetInfo.h"
79
#include "X86DisassemblerDecoder.h"
80
#include "llvm/MC/MCContext.h"
81
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
82
#include "llvm/MC/MCExpr.h"
83
#include "llvm/MC/MCInst.h"
84
#include "llvm/MC/MCInstrInfo.h"
85
#include "llvm/MC/MCSubtargetInfo.h"
86
#include "llvm/MC/TargetRegistry.h"
87
#include "llvm/Support/Debug.h"
88
#include "llvm/Support/Format.h"
89
#include "llvm/Support/raw_ostream.h"
90

91
using namespace llvm;
92
using namespace llvm::X86Disassembler;
93

94
#define DEBUG_TYPE "x86-disassembler"
95

96
#define debug(s) LLVM_DEBUG(dbgs() << __LINE__ << ": " << s);
97

98
// Specifies whether a ModR/M byte is needed and (if so) which
99
// instruction each possible value of the ModR/M byte corresponds to.  Once
100
// this information is known, we have narrowed down to a single instruction.
101
struct ModRMDecision {
102
  uint8_t modrm_type;
103
  uint16_t instructionIDs;
104
};
105

106
// Specifies which set of ModR/M->instruction tables to look at
107
// given a particular opcode.
108
struct OpcodeDecision {
109
  ModRMDecision modRMDecisions[256];
110
};
111

112
// Specifies which opcode->instruction tables to look at given
113
// a particular context (set of attributes).  Since there are many possible
114
// contexts, the decoder first uses CONTEXTS_SYM to determine which context
115
// applies given a specific set of attributes.  Hence there are only IC_max
116
// entries in this table, rather than 2^(ATTR_max).
117
struct ContextDecision {
118
  OpcodeDecision opcodeDecisions[IC_max];
119
};
120

121
#include "X86GenDisassemblerTables.inc"
122

123
static InstrUID decode(OpcodeType type, InstructionContext insnContext,
124
                       uint8_t opcode, uint8_t modRM) {
125
  const struct ModRMDecision *dec;
126

127
  switch (type) {
128
  case ONEBYTE:
129
    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
130
    break;
131
  case TWOBYTE:
132
    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
133
    break;
134
  case THREEBYTE_38:
135
    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
136
    break;
137
  case THREEBYTE_3A:
138
    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
139
    break;
140
  case XOP8_MAP:
141
    dec = &XOP8_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
142
    break;
143
  case XOP9_MAP:
144
    dec = &XOP9_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
145
    break;
146
  case XOPA_MAP:
147
    dec = &XOPA_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
148
    break;
149
  case THREEDNOW_MAP:
150
    dec =
151
        &THREEDNOW_MAP_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
152
    break;
153
  case MAP4:
154
    dec = &MAP4_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
155
    break;
156
  case MAP5:
157
    dec = &MAP5_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
158
    break;
159
  case MAP6:
160
    dec = &MAP6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
161
    break;
162
  case MAP7:
163
    dec = &MAP7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
164
    break;
165
  }
166

167
  switch (dec->modrm_type) {
168
  default:
169
    llvm_unreachable("Corrupt table!  Unknown modrm_type");
170
    return 0;
171
  case MODRM_ONEENTRY:
172
    return modRMTable[dec->instructionIDs];
173
  case MODRM_SPLITRM:
174
    if (modFromModRM(modRM) == 0x3)
175
      return modRMTable[dec->instructionIDs + 1];
176
    return modRMTable[dec->instructionIDs];
177
  case MODRM_SPLITREG:
178
    if (modFromModRM(modRM) == 0x3)
179
      return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3) + 8];
180
    return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
181
  case MODRM_SPLITMISC:
182
    if (modFromModRM(modRM) == 0x3)
183
      return modRMTable[dec->instructionIDs + (modRM & 0x3f) + 8];
184
    return modRMTable[dec->instructionIDs + ((modRM & 0x38) >> 3)];
185
  case MODRM_FULL:
186
    return modRMTable[dec->instructionIDs + modRM];
187
  }
188
}
189

190
static bool peek(struct InternalInstruction *insn, uint8_t &byte) {
191
  uint64_t offset = insn->readerCursor - insn->startLocation;
192
  if (offset >= insn->bytes.size())
193
    return true;
194
  byte = insn->bytes[offset];
195
  return false;
196
}
197

198
template <typename T> static bool consume(InternalInstruction *insn, T &ptr) {
199
  auto r = insn->bytes;
200
  uint64_t offset = insn->readerCursor - insn->startLocation;
201
  if (offset + sizeof(T) > r.size())
202
    return true;
203
  ptr = support::endian::read<T>(&r[offset], llvm::endianness::little);
204
  insn->readerCursor += sizeof(T);
205
  return false;
206
}
207

208
static bool isREX(struct InternalInstruction *insn, uint8_t prefix) {
209
  return insn->mode == MODE_64BIT && prefix >= 0x40 && prefix <= 0x4f;
210
}
211

212
static bool isREX2(struct InternalInstruction *insn, uint8_t prefix) {
213
  return insn->mode == MODE_64BIT && prefix == 0xd5;
214
}
215

216
// Consumes all of an instruction's prefix bytes, and marks the
217
// instruction as having them.  Also sets the instruction's default operand,
218
// address, and other relevant data sizes to report operands correctly.
219
//
220
// insn must not be empty.
221
static int readPrefixes(struct InternalInstruction *insn) {
222
  bool isPrefix = true;
223
  uint8_t byte = 0;
224
  uint8_t nextByte;
225

226
  LLVM_DEBUG(dbgs() << "readPrefixes()");
227

228
  while (isPrefix) {
229
    // If we fail reading prefixes, just stop here and let the opcode reader
230
    // deal with it.
231
    if (consume(insn, byte))
232
      break;
233

234
    // If the byte is a LOCK/REP/REPNE prefix and not a part of the opcode, then
235
    // break and let it be disassembled as a normal "instruction".
236
    if (insn->readerCursor - 1 == insn->startLocation && byte == 0xf0) // LOCK
237
      break;
238

239
    if ((byte == 0xf2 || byte == 0xf3) && !peek(insn, nextByte)) {
240
      // If the byte is 0xf2 or 0xf3, and any of the following conditions are
241
      // met:
242
      // - it is followed by a LOCK (0xf0) prefix
243
      // - it is followed by an xchg instruction
244
      // then it should be disassembled as a xacquire/xrelease not repne/rep.
245
      if (((nextByte == 0xf0) ||
246
           ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) {
247
        insn->xAcquireRelease = true;
248
        if (!(byte == 0xf3 && nextByte == 0x90)) // PAUSE instruction support
249
          break;
250
      }
251
      // Also if the byte is 0xf3, and the following condition is met:
252
      // - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
253
      //                       "mov mem, imm" (opcode 0xc6/0xc7) instructions.
254
      // then it should be disassembled as an xrelease not rep.
255
      if (byte == 0xf3 && (nextByte == 0x88 || nextByte == 0x89 ||
256
                           nextByte == 0xc6 || nextByte == 0xc7)) {
257
        insn->xAcquireRelease = true;
258
        break;
259
      }
260
      if (isREX(insn, nextByte)) {
261
        uint8_t nnextByte;
262
        // Go to REX prefix after the current one
263
        if (consume(insn, nnextByte))
264
          return -1;
265
        // We should be able to read next byte after REX prefix
266
        if (peek(insn, nnextByte))
267
          return -1;
268
        --insn->readerCursor;
269
      }
270
    }
271

272
    switch (byte) {
273
    case 0xf0: // LOCK
274
      insn->hasLockPrefix = true;
275
      break;
276
    case 0xf2: // REPNE/REPNZ
277
    case 0xf3: { // REP or REPE/REPZ
278
      uint8_t nextByte;
279
      if (peek(insn, nextByte))
280
        break;
281
      // TODO:
282
      //  1. There could be several 0x66
283
      //  2. if (nextByte == 0x66) and nextNextByte != 0x0f then
284
      //      it's not mandatory prefix
285
      //  3. if (nextByte >= 0x40 && nextByte <= 0x4f) it's REX and we need
286
      //     0x0f exactly after it to be mandatory prefix
287
      //  4. if (nextByte == 0xd5) it's REX2 and we need
288
      //     0x0f exactly after it to be mandatory prefix
289
      if (isREX(insn, nextByte) || isREX2(insn, nextByte) || nextByte == 0x0f ||
290
          nextByte == 0x66)
291
        // The last of 0xf2 /0xf3 is mandatory prefix
292
        insn->mandatoryPrefix = byte;
293
      insn->repeatPrefix = byte;
294
      break;
295
    }
296
    case 0x2e: // CS segment override -OR- Branch not taken
297
      insn->segmentOverride = SEG_OVERRIDE_CS;
298
      break;
299
    case 0x36: // SS segment override -OR- Branch taken
300
      insn->segmentOverride = SEG_OVERRIDE_SS;
301
      break;
302
    case 0x3e: // DS segment override
303
      insn->segmentOverride = SEG_OVERRIDE_DS;
304
      break;
305
    case 0x26: // ES segment override
306
      insn->segmentOverride = SEG_OVERRIDE_ES;
307
      break;
308
    case 0x64: // FS segment override
309
      insn->segmentOverride = SEG_OVERRIDE_FS;
310
      break;
311
    case 0x65: // GS segment override
312
      insn->segmentOverride = SEG_OVERRIDE_GS;
313
      break;
314
    case 0x66: { // Operand-size override {
315
      uint8_t nextByte;
316
      insn->hasOpSize = true;
317
      if (peek(insn, nextByte))
318
        break;
319
      // 0x66 can't overwrite existing mandatory prefix and should be ignored
320
      if (!insn->mandatoryPrefix && (nextByte == 0x0f || isREX(insn, nextByte)))
321
        insn->mandatoryPrefix = byte;
322
      break;
323
    }
324
    case 0x67: // Address-size override
325
      insn->hasAdSize = true;
326
      break;
327
    default: // Not a prefix byte
328
      isPrefix = false;
329
      break;
330
    }
331

332
    if (isPrefix)
333
      LLVM_DEBUG(dbgs() << format("Found prefix 0x%hhx", byte));
334
  }
335

336
  insn->vectorExtensionType = TYPE_NO_VEX_XOP;
337

338
  if (byte == 0x62) {
339
    uint8_t byte1, byte2;
340
    if (consume(insn, byte1)) {
341
      LLVM_DEBUG(dbgs() << "Couldn't read second byte of EVEX prefix");
342
      return -1;
343
    }
344

345
    if (peek(insn, byte2)) {
346
      LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
347
      return -1;
348
    }
349

350
    if ((insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)) {
351
      insn->vectorExtensionType = TYPE_EVEX;
352
    } else {
353
      --insn->readerCursor; // unconsume byte1
354
      --insn->readerCursor; // unconsume byte
355
    }
356

357
    if (insn->vectorExtensionType == TYPE_EVEX) {
358
      insn->vectorExtensionPrefix[0] = byte;
359
      insn->vectorExtensionPrefix[1] = byte1;
360
      if (consume(insn, insn->vectorExtensionPrefix[2])) {
361
        LLVM_DEBUG(dbgs() << "Couldn't read third byte of EVEX prefix");
362
        return -1;
363
      }
364
      if (consume(insn, insn->vectorExtensionPrefix[3])) {
365
        LLVM_DEBUG(dbgs() << "Couldn't read fourth byte of EVEX prefix");
366
        return -1;
367
      }
368

369
      if (insn->mode == MODE_64BIT) {
370
        // We simulate the REX prefix for simplicity's sake
371
        insn->rexPrefix = 0x40 |
372
                          (wFromEVEX3of4(insn->vectorExtensionPrefix[2]) << 3) |
373
                          (rFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 2) |
374
                          (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 1) |
375
                          (bFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 0);
376

377
        // We simulate the REX2 prefix for simplicity's sake
378
        insn->rex2ExtensionPrefix[1] =
379
            (r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 6) |
380
            (x2FromEVEX3of4(insn->vectorExtensionPrefix[2]) << 5) |
381
            (b2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4);
382
      }
383

384
      LLVM_DEBUG(
385
          dbgs() << format(
386
              "Found EVEX prefix 0x%hhx 0x%hhx 0x%hhx 0x%hhx",
387
              insn->vectorExtensionPrefix[0], insn->vectorExtensionPrefix[1],
388
              insn->vectorExtensionPrefix[2], insn->vectorExtensionPrefix[3]));
389
    }
390
  } else if (byte == 0xc4) {
391
    uint8_t byte1;
392
    if (peek(insn, byte1)) {
393
      LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
394
      return -1;
395
    }
396

397
    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
398
      insn->vectorExtensionType = TYPE_VEX_3B;
399
    else
400
      --insn->readerCursor;
401

402
    if (insn->vectorExtensionType == TYPE_VEX_3B) {
403
      insn->vectorExtensionPrefix[0] = byte;
404
      consume(insn, insn->vectorExtensionPrefix[1]);
405
      consume(insn, insn->vectorExtensionPrefix[2]);
406

407
      // We simulate the REX prefix for simplicity's sake
408

409
      if (insn->mode == MODE_64BIT)
410
        insn->rexPrefix = 0x40 |
411
                          (wFromVEX3of3(insn->vectorExtensionPrefix[2]) << 3) |
412
                          (rFromVEX2of3(insn->vectorExtensionPrefix[1]) << 2) |
413
                          (xFromVEX2of3(insn->vectorExtensionPrefix[1]) << 1) |
414
                          (bFromVEX2of3(insn->vectorExtensionPrefix[1]) << 0);
415

416
      LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx 0x%hhx",
417
                                  insn->vectorExtensionPrefix[0],
418
                                  insn->vectorExtensionPrefix[1],
419
                                  insn->vectorExtensionPrefix[2]));
420
    }
421
  } else if (byte == 0xc5) {
422
    uint8_t byte1;
423
    if (peek(insn, byte1)) {
424
      LLVM_DEBUG(dbgs() << "Couldn't read second byte of VEX");
425
      return -1;
426
    }
427

428
    if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0)
429
      insn->vectorExtensionType = TYPE_VEX_2B;
430
    else
431
      --insn->readerCursor;
432

433
    if (insn->vectorExtensionType == TYPE_VEX_2B) {
434
      insn->vectorExtensionPrefix[0] = byte;
435
      consume(insn, insn->vectorExtensionPrefix[1]);
436

437
      if (insn->mode == MODE_64BIT)
438
        insn->rexPrefix =
439
            0x40 | (rFromVEX2of2(insn->vectorExtensionPrefix[1]) << 2);
440

441
      switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
442
      default:
443
        break;
444
      case VEX_PREFIX_66:
445
        insn->hasOpSize = true;
446
        break;
447
      }
448

449
      LLVM_DEBUG(dbgs() << format("Found VEX prefix 0x%hhx 0x%hhx",
450
                                  insn->vectorExtensionPrefix[0],
451
                                  insn->vectorExtensionPrefix[1]));
452
    }
453
  } else if (byte == 0x8f) {
454
    uint8_t byte1;
455
    if (peek(insn, byte1)) {
456
      LLVM_DEBUG(dbgs() << "Couldn't read second byte of XOP");
457
      return -1;
458
    }
459

460
    if ((byte1 & 0x38) != 0x0) // 0 in these 3 bits is a POP instruction.
461
      insn->vectorExtensionType = TYPE_XOP;
462
    else
463
      --insn->readerCursor;
464

465
    if (insn->vectorExtensionType == TYPE_XOP) {
466
      insn->vectorExtensionPrefix[0] = byte;
467
      consume(insn, insn->vectorExtensionPrefix[1]);
468
      consume(insn, insn->vectorExtensionPrefix[2]);
469

470
      // We simulate the REX prefix for simplicity's sake
471

472
      if (insn->mode == MODE_64BIT)
473
        insn->rexPrefix = 0x40 |
474
                          (wFromXOP3of3(insn->vectorExtensionPrefix[2]) << 3) |
475
                          (rFromXOP2of3(insn->vectorExtensionPrefix[1]) << 2) |
476
                          (xFromXOP2of3(insn->vectorExtensionPrefix[1]) << 1) |
477
                          (bFromXOP2of3(insn->vectorExtensionPrefix[1]) << 0);
478

479
      switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
480
      default:
481
        break;
482
      case VEX_PREFIX_66:
483
        insn->hasOpSize = true;
484
        break;
485
      }
486

487
      LLVM_DEBUG(dbgs() << format("Found XOP prefix 0x%hhx 0x%hhx 0x%hhx",
488
                                  insn->vectorExtensionPrefix[0],
489
                                  insn->vectorExtensionPrefix[1],
490
                                  insn->vectorExtensionPrefix[2]));
491
    }
492
  } else if (isREX2(insn, byte)) {
493
    uint8_t byte1;
494
    if (peek(insn, byte1)) {
495
      LLVM_DEBUG(dbgs() << "Couldn't read second byte of REX2");
496
      return -1;
497
    }
498
    insn->rex2ExtensionPrefix[0] = byte;
499
    consume(insn, insn->rex2ExtensionPrefix[1]);
500

501
    // We simulate the REX prefix for simplicity's sake
502
    insn->rexPrefix = 0x40 | (wFromREX2(insn->rex2ExtensionPrefix[1]) << 3) |
503
                      (rFromREX2(insn->rex2ExtensionPrefix[1]) << 2) |
504
                      (xFromREX2(insn->rex2ExtensionPrefix[1]) << 1) |
505
                      (bFromREX2(insn->rex2ExtensionPrefix[1]) << 0);
506
    LLVM_DEBUG(dbgs() << format("Found REX2 prefix 0x%hhx 0x%hhx",
507
                                insn->rex2ExtensionPrefix[0],
508
                                insn->rex2ExtensionPrefix[1]));
509
  } else if (isREX(insn, byte)) {
510
    if (peek(insn, nextByte))
511
      return -1;
512
    insn->rexPrefix = byte;
513
    LLVM_DEBUG(dbgs() << format("Found REX prefix 0x%hhx", byte));
514
  } else
515
    --insn->readerCursor;
516

517
  if (insn->mode == MODE_16BIT) {
518
    insn->registerSize = (insn->hasOpSize ? 4 : 2);
519
    insn->addressSize = (insn->hasAdSize ? 4 : 2);
520
    insn->displacementSize = (insn->hasAdSize ? 4 : 2);
521
    insn->immediateSize = (insn->hasOpSize ? 4 : 2);
522
  } else if (insn->mode == MODE_32BIT) {
523
    insn->registerSize = (insn->hasOpSize ? 2 : 4);
524
    insn->addressSize = (insn->hasAdSize ? 2 : 4);
525
    insn->displacementSize = (insn->hasAdSize ? 2 : 4);
526
    insn->immediateSize = (insn->hasOpSize ? 2 : 4);
527
  } else if (insn->mode == MODE_64BIT) {
528
    insn->displacementSize = 4;
529
    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
530
      insn->registerSize = 8;
531
      insn->addressSize = (insn->hasAdSize ? 4 : 8);
532
      insn->immediateSize = 4;
533
      insn->hasOpSize = false;
534
    } else {
535
      insn->registerSize = (insn->hasOpSize ? 2 : 4);
536
      insn->addressSize = (insn->hasAdSize ? 4 : 8);
537
      insn->immediateSize = (insn->hasOpSize ? 2 : 4);
538
    }
539
  }
540

541
  return 0;
542
}
543

544
// Consumes the SIB byte to determine addressing information.
545
static int readSIB(struct InternalInstruction *insn) {
546
  SIBBase sibBaseBase = SIB_BASE_NONE;
547
  uint8_t index, base;
548

549
  LLVM_DEBUG(dbgs() << "readSIB()");
550
  switch (insn->addressSize) {
551
  case 2:
552
  default:
553
    llvm_unreachable("SIB-based addressing doesn't work in 16-bit mode");
554
  case 4:
555
    insn->sibIndexBase = SIB_INDEX_EAX;
556
    sibBaseBase = SIB_BASE_EAX;
557
    break;
558
  case 8:
559
    insn->sibIndexBase = SIB_INDEX_RAX;
560
    sibBaseBase = SIB_BASE_RAX;
561
    break;
562
  }
563

564
  if (consume(insn, insn->sib))
565
    return -1;
566

567
  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3) |
568
          (x2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
569

570
  if (index == 0x4) {
571
    insn->sibIndex = SIB_INDEX_NONE;
572
  } else {
573
    insn->sibIndex = (SIBIndex)(insn->sibIndexBase + index);
574
  }
575

576
  insn->sibScale = 1 << scaleFromSIB(insn->sib);
577

578
  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3) |
579
         (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
580

581
  switch (base) {
582
  case 0x5:
583
  case 0xd:
584
    switch (modFromModRM(insn->modRM)) {
585
    case 0x0:
586
      insn->eaDisplacement = EA_DISP_32;
587
      insn->sibBase = SIB_BASE_NONE;
588
      break;
589
    case 0x1:
590
      insn->eaDisplacement = EA_DISP_8;
591
      insn->sibBase = (SIBBase)(sibBaseBase + base);
592
      break;
593
    case 0x2:
594
      insn->eaDisplacement = EA_DISP_32;
595
      insn->sibBase = (SIBBase)(sibBaseBase + base);
596
      break;
597
    default:
598
      llvm_unreachable("Cannot have Mod = 0b11 and a SIB byte");
599
    }
600
    break;
601
  default:
602
    insn->sibBase = (SIBBase)(sibBaseBase + base);
603
    break;
604
  }
605

606
  return 0;
607
}
608

609
static int readDisplacement(struct InternalInstruction *insn) {
610
  int8_t d8;
611
  int16_t d16;
612
  int32_t d32;
613
  LLVM_DEBUG(dbgs() << "readDisplacement()");
614

615
  insn->displacementOffset = insn->readerCursor - insn->startLocation;
616
  switch (insn->eaDisplacement) {
617
  case EA_DISP_NONE:
618
    break;
619
  case EA_DISP_8:
620
    if (consume(insn, d8))
621
      return -1;
622
    insn->displacement = d8;
623
    break;
624
  case EA_DISP_16:
625
    if (consume(insn, d16))
626
      return -1;
627
    insn->displacement = d16;
628
    break;
629
  case EA_DISP_32:
630
    if (consume(insn, d32))
631
      return -1;
632
    insn->displacement = d32;
633
    break;
634
  }
635

636
  return 0;
637
}
638

639
// Consumes all addressing information (ModR/M byte, SIB byte, and displacement.
640
static int readModRM(struct InternalInstruction *insn) {
641
  uint8_t mod, rm, reg;
642
  LLVM_DEBUG(dbgs() << "readModRM()");
643

644
  if (insn->consumedModRM)
645
    return 0;
646

647
  if (consume(insn, insn->modRM))
648
    return -1;
649
  insn->consumedModRM = true;
650

651
  mod = modFromModRM(insn->modRM);
652
  rm = rmFromModRM(insn->modRM);
653
  reg = regFromModRM(insn->modRM);
654

655
  // This goes by insn->registerSize to pick the correct register, which messes
656
  // up if we're using (say) XMM or 8-bit register operands. That gets fixed in
657
  // fixupReg().
658
  switch (insn->registerSize) {
659
  case 2:
660
    insn->regBase = MODRM_REG_AX;
661
    insn->eaRegBase = EA_REG_AX;
662
    break;
663
  case 4:
664
    insn->regBase = MODRM_REG_EAX;
665
    insn->eaRegBase = EA_REG_EAX;
666
    break;
667
  case 8:
668
    insn->regBase = MODRM_REG_RAX;
669
    insn->eaRegBase = EA_REG_RAX;
670
    break;
671
  }
672

673
  reg |= (rFromREX(insn->rexPrefix) << 3) |
674
         (r2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
675
  rm |= (bFromREX(insn->rexPrefix) << 3) |
676
        (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4);
677

678
  if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT)
679
    reg |= r2FromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4;
680

681
  insn->reg = (Reg)(insn->regBase + reg);
682

683
  switch (insn->addressSize) {
684
  case 2: {
685
    EABase eaBaseBase = EA_BASE_BX_SI;
686

687
    switch (mod) {
688
    case 0x0:
689
      if (rm == 0x6) {
690
        insn->eaBase = EA_BASE_NONE;
691
        insn->eaDisplacement = EA_DISP_16;
692
        if (readDisplacement(insn))
693
          return -1;
694
      } else {
695
        insn->eaBase = (EABase)(eaBaseBase + rm);
696
        insn->eaDisplacement = EA_DISP_NONE;
697
      }
698
      break;
699
    case 0x1:
700
      insn->eaBase = (EABase)(eaBaseBase + rm);
701
      insn->eaDisplacement = EA_DISP_8;
702
      insn->displacementSize = 1;
703
      if (readDisplacement(insn))
704
        return -1;
705
      break;
706
    case 0x2:
707
      insn->eaBase = (EABase)(eaBaseBase + rm);
708
      insn->eaDisplacement = EA_DISP_16;
709
      if (readDisplacement(insn))
710
        return -1;
711
      break;
712
    case 0x3:
713
      insn->eaBase = (EABase)(insn->eaRegBase + rm);
714
      if (readDisplacement(insn))
715
        return -1;
716
      break;
717
    }
718
    break;
719
  }
720
  case 4:
721
  case 8: {
722
    EABase eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
723

724
    switch (mod) {
725
    case 0x0:
726
      insn->eaDisplacement = EA_DISP_NONE; // readSIB may override this
727
      // In determining whether RIP-relative mode is used (rm=5),
728
      // or whether a SIB byte is present (rm=4),
729
      // the extension bits (REX.b and EVEX.x) are ignored.
730
      switch (rm & 7) {
731
      case 0x4: // SIB byte is present
732
        insn->eaBase = (insn->addressSize == 4 ? EA_BASE_sib : EA_BASE_sib64);
733
        if (readSIB(insn) || readDisplacement(insn))
734
          return -1;
735
        break;
736
      case 0x5: // RIP-relative
737
        insn->eaBase = EA_BASE_NONE;
738
        insn->eaDisplacement = EA_DISP_32;
739
        if (readDisplacement(insn))
740
          return -1;
741
        break;
742
      default:
743
        insn->eaBase = (EABase)(eaBaseBase + rm);
744
        break;
745
      }
746
      break;
747
    case 0x1:
748
      insn->displacementSize = 1;
749
      [[fallthrough]];
750
    case 0x2:
751
      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
752
      switch (rm & 7) {
753
      case 0x4: // SIB byte is present
754
        insn->eaBase = EA_BASE_sib;
755
        if (readSIB(insn) || readDisplacement(insn))
756
          return -1;
757
        break;
758
      default:
759
        insn->eaBase = (EABase)(eaBaseBase + rm);
760
        if (readDisplacement(insn))
761
          return -1;
762
        break;
763
      }
764
      break;
765
    case 0x3:
766
      insn->eaDisplacement = EA_DISP_NONE;
767
      insn->eaBase = (EABase)(insn->eaRegBase + rm);
768
      break;
769
    }
770
    break;
771
  }
772
  } // switch (insn->addressSize)
773

774
  return 0;
775
}
776

777
#define GENERIC_FIXUP_FUNC(name, base, prefix)                                 \
778
  static uint16_t name(struct InternalInstruction *insn, OperandType type,     \
779
                       uint8_t index, uint8_t *valid) {                        \
780
    *valid = 1;                                                                \
781
    switch (type) {                                                            \
782
    default:                                                                   \
783
      debug("Unhandled register type");                                        \
784
      *valid = 0;                                                              \
785
      return 0;                                                                \
786
    case TYPE_Rv:                                                              \
787
      return base + index;                                                     \
788
    case TYPE_R8:                                                              \
789
      if (insn->rexPrefix && index >= 4 && index <= 7)                         \
790
        return prefix##_SPL + (index - 4);                                     \
791
      else                                                                     \
792
        return prefix##_AL + index;                                            \
793
    case TYPE_R16:                                                             \
794
      return prefix##_AX + index;                                              \
795
    case TYPE_R32:                                                             \
796
      return prefix##_EAX + index;                                             \
797
    case TYPE_R64:                                                             \
798
      return prefix##_RAX + index;                                             \
799
    case TYPE_ZMM:                                                             \
800
      return prefix##_ZMM0 + index;                                            \
801
    case TYPE_YMM:                                                             \
802
      return prefix##_YMM0 + index;                                            \
803
    case TYPE_XMM:                                                             \
804
      return prefix##_XMM0 + index;                                            \
805
    case TYPE_TMM:                                                             \
806
      if (index > 7)                                                           \
807
        *valid = 0;                                                            \
808
      return prefix##_TMM0 + index;                                            \
809
    case TYPE_VK:                                                              \
810
      index &= 0xf;                                                            \
811
      if (index > 7)                                                           \
812
        *valid = 0;                                                            \
813
      return prefix##_K0 + index;                                              \
814
    case TYPE_VK_PAIR:                                                         \
815
      if (index > 7)                                                           \
816
        *valid = 0;                                                            \
817
      return prefix##_K0_K1 + (index / 2);                                     \
818
    case TYPE_MM64:                                                            \
819
      return prefix##_MM0 + (index & 0x7);                                     \
820
    case TYPE_SEGMENTREG:                                                      \
821
      if ((index & 7) > 5)                                                     \
822
        *valid = 0;                                                            \
823
      return prefix##_ES + (index & 7);                                        \
824
    case TYPE_DEBUGREG:                                                        \
825
      if (index > 15)                                                          \
826
        *valid = 0;                                                            \
827
      return prefix##_DR0 + index;                                             \
828
    case TYPE_CONTROLREG:                                                      \
829
      if (index > 15)                                                          \
830
        *valid = 0;                                                            \
831
      return prefix##_CR0 + index;                                             \
832
    case TYPE_MVSIBX:                                                          \
833
      return prefix##_XMM0 + index;                                            \
834
    case TYPE_MVSIBY:                                                          \
835
      return prefix##_YMM0 + index;                                            \
836
    case TYPE_MVSIBZ:                                                          \
837
      return prefix##_ZMM0 + index;                                            \
838
    }                                                                          \
839
  }
840

841
// Consult an operand type to determine the meaning of the reg or R/M field. If
842
// the operand is an XMM operand, for example, an operand would be XMM0 instead
843
// of AX, which readModRM() would otherwise misinterpret it as.
844
//
845
// @param insn  - The instruction containing the operand.
846
// @param type  - The operand type.
847
// @param index - The existing value of the field as reported by readModRM().
848
// @param valid - The address of a uint8_t.  The target is set to 1 if the
849
//                field is valid for the register class; 0 if not.
850
// @return      - The proper value.
851
GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase, MODRM_REG)
852
GENERIC_FIXUP_FUNC(fixupRMValue, insn->eaRegBase, EA_REG)
853

854
// Consult an operand specifier to determine which of the fixup*Value functions
855
// to use in correcting readModRM()'ss interpretation.
856
//
857
// @param insn  - See fixup*Value().
858
// @param op    - The operand specifier.
859
// @return      - 0 if fixup was successful; -1 if the register returned was
860
//                invalid for its class.
861
static int fixupReg(struct InternalInstruction *insn,
862
                    const struct OperandSpecifier *op) {
863
  uint8_t valid;
864
  LLVM_DEBUG(dbgs() << "fixupReg()");
865

866
  switch ((OperandEncoding)op->encoding) {
867
  default:
868
    debug("Expected a REG or R/M encoding in fixupReg");
869
    return -1;
870
  case ENCODING_VVVV:
871
    insn->vvvv =
872
        (Reg)fixupRegValue(insn, (OperandType)op->type, insn->vvvv, &valid);
873
    if (!valid)
874
      return -1;
875
    break;
876
  case ENCODING_REG:
877
    insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type,
878
                                   insn->reg - insn->regBase, &valid);
879
    if (!valid)
880
      return -1;
881
    break;
882
  CASE_ENCODING_RM:
883
    if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
884
        modFromModRM(insn->modRM) == 3) {
885
      // EVEX_X can extend the register id to 32 for a non-GPR register that is
886
      // encoded in RM.
887
      // mode : MODE_64_BIT
888
      //  Only 8 vector registers are available in 32 bit mode
889
      // mod : 3
890
      //  RM encodes a register
891
      switch (op->type) {
892
      case TYPE_Rv:
893
      case TYPE_R8:
894
      case TYPE_R16:
895
      case TYPE_R32:
896
      case TYPE_R64:
897
        break;
898
      default:
899
        insn->eaBase =
900
            (EABase)(insn->eaBase +
901
                     (xFromEVEX2of4(insn->vectorExtensionPrefix[1]) << 4));
902
        break;
903
      }
904
    }
905
    [[fallthrough]];
906
  case ENCODING_SIB:
907
    if (insn->eaBase >= insn->eaRegBase) {
908
      insn->eaBase = (EABase)fixupRMValue(
909
          insn, (OperandType)op->type, insn->eaBase - insn->eaRegBase, &valid);
910
      if (!valid)
911
        return -1;
912
    }
913
    break;
914
  }
915

916
  return 0;
917
}
918

919
// Read the opcode (except the ModR/M byte in the case of extended or escape
920
// opcodes).
921
static bool readOpcode(struct InternalInstruction *insn) {
922
  uint8_t current;
923
  LLVM_DEBUG(dbgs() << "readOpcode()");
924

925
  insn->opcodeType = ONEBYTE;
926
  if (insn->vectorExtensionType == TYPE_EVEX) {
927
    switch (mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])) {
928
    default:
929
      LLVM_DEBUG(
930
          dbgs() << format("Unhandled mmm field for instruction (0x%hhx)",
931
                           mmmFromEVEX2of4(insn->vectorExtensionPrefix[1])));
932
      return true;
933
    case VEX_LOB_0F:
934
      insn->opcodeType = TWOBYTE;
935
      return consume(insn, insn->opcode);
936
    case VEX_LOB_0F38:
937
      insn->opcodeType = THREEBYTE_38;
938
      return consume(insn, insn->opcode);
939
    case VEX_LOB_0F3A:
940
      insn->opcodeType = THREEBYTE_3A;
941
      return consume(insn, insn->opcode);
942
    case VEX_LOB_MAP4:
943
      insn->opcodeType = MAP4;
944
      return consume(insn, insn->opcode);
945
    case VEX_LOB_MAP5:
946
      insn->opcodeType = MAP5;
947
      return consume(insn, insn->opcode);
948
    case VEX_LOB_MAP6:
949
      insn->opcodeType = MAP6;
950
      return consume(insn, insn->opcode);
951
    case VEX_LOB_MAP7:
952
      insn->opcodeType = MAP7;
953
      return consume(insn, insn->opcode);
954
    }
955
  } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
956
    switch (mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])) {
957
    default:
958
      LLVM_DEBUG(
959
          dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
960
                           mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
961
      return true;
962
    case VEX_LOB_0F:
963
      insn->opcodeType = TWOBYTE;
964
      return consume(insn, insn->opcode);
965
    case VEX_LOB_0F38:
966
      insn->opcodeType = THREEBYTE_38;
967
      return consume(insn, insn->opcode);
968
    case VEX_LOB_0F3A:
969
      insn->opcodeType = THREEBYTE_3A;
970
      return consume(insn, insn->opcode);
971
    case VEX_LOB_MAP5:
972
      insn->opcodeType = MAP5;
973
      return consume(insn, insn->opcode);
974
    case VEX_LOB_MAP6:
975
      insn->opcodeType = MAP6;
976
      return consume(insn, insn->opcode);
977
    case VEX_LOB_MAP7:
978
      insn->opcodeType = MAP7;
979
      return consume(insn, insn->opcode);
980
    }
981
  } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
982
    insn->opcodeType = TWOBYTE;
983
    return consume(insn, insn->opcode);
984
  } else if (insn->vectorExtensionType == TYPE_XOP) {
985
    switch (mmmmmFromXOP2of3(insn->vectorExtensionPrefix[1])) {
986
    default:
987
      LLVM_DEBUG(
988
          dbgs() << format("Unhandled m-mmmm field for instruction (0x%hhx)",
989
                           mmmmmFromVEX2of3(insn->vectorExtensionPrefix[1])));
990
      return true;
991
    case XOP_MAP_SELECT_8:
992
      insn->opcodeType = XOP8_MAP;
993
      return consume(insn, insn->opcode);
994
    case XOP_MAP_SELECT_9:
995
      insn->opcodeType = XOP9_MAP;
996
      return consume(insn, insn->opcode);
997
    case XOP_MAP_SELECT_A:
998
      insn->opcodeType = XOPA_MAP;
999
      return consume(insn, insn->opcode);
1000
    }
1001
  } else if (mFromREX2(insn->rex2ExtensionPrefix[1])) {
1002
    // m bit indicates opcode map 1
1003
    insn->opcodeType = TWOBYTE;
1004
    return consume(insn, insn->opcode);
1005
  }
1006

1007
  if (consume(insn, current))
1008
    return true;
1009

1010
  if (current == 0x0f) {
1011
    LLVM_DEBUG(
1012
        dbgs() << format("Found a two-byte escape prefix (0x%hhx)", current));
1013
    if (consume(insn, current))
1014
      return true;
1015

1016
    if (current == 0x38) {
1017
      LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1018
                                  current));
1019
      if (consume(insn, current))
1020
        return true;
1021

1022
      insn->opcodeType = THREEBYTE_38;
1023
    } else if (current == 0x3a) {
1024
      LLVM_DEBUG(dbgs() << format("Found a three-byte escape prefix (0x%hhx)",
1025
                                  current));
1026
      if (consume(insn, current))
1027
        return true;
1028

1029
      insn->opcodeType = THREEBYTE_3A;
1030
    } else if (current == 0x0f) {
1031
      LLVM_DEBUG(
1032
          dbgs() << format("Found a 3dnow escape prefix (0x%hhx)", current));
1033

1034
      // Consume operands before the opcode to comply with the 3DNow encoding
1035
      if (readModRM(insn))
1036
        return true;
1037

1038
      if (consume(insn, current))
1039
        return true;
1040

1041
      insn->opcodeType = THREEDNOW_MAP;
1042
    } else {
1043
      LLVM_DEBUG(dbgs() << "Didn't find a three-byte escape prefix");
1044
      insn->opcodeType = TWOBYTE;
1045
    }
1046
  } else if (insn->mandatoryPrefix)
1047
    // The opcode with mandatory prefix must start with opcode escape.
1048
    // If not it's legacy repeat prefix
1049
    insn->mandatoryPrefix = 0;
1050

1051
  // At this point we have consumed the full opcode.
1052
  // Anything we consume from here on must be unconsumed.
1053
  insn->opcode = current;
1054

1055
  return false;
1056
}
1057

1058
// Determine whether equiv is the 16-bit equivalent of orig (32-bit or 64-bit).
1059
static bool is16BitEquivalent(const char *orig, const char *equiv) {
1060
  for (int i = 0;; i++) {
1061
    if (orig[i] == '\0' && equiv[i] == '\0')
1062
      return true;
1063
    if (orig[i] == '\0' || equiv[i] == '\0')
1064
      return false;
1065
    if (orig[i] != equiv[i]) {
1066
      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
1067
        continue;
1068
      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
1069
        continue;
1070
      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
1071
        continue;
1072
      return false;
1073
    }
1074
  }
1075
}
1076

1077
// Determine whether this instruction is a 64-bit instruction.
1078
static bool is64Bit(const char *name) {
1079
  for (int i = 0;; ++i) {
1080
    if (name[i] == '\0')
1081
      return false;
1082
    if (name[i] == '6' && name[i + 1] == '4')
1083
      return true;
1084
  }
1085
}
1086

1087
// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1088
// for extended and escape opcodes, and using a supplied attribute mask.
1089
static int getInstructionIDWithAttrMask(uint16_t *instructionID,
1090
                                        struct InternalInstruction *insn,
1091
                                        uint16_t attrMask) {
1092
  auto insnCtx = InstructionContext(x86DisassemblerContexts[attrMask]);
1093
  const ContextDecision *decision;
1094
  switch (insn->opcodeType) {
1095
  case ONEBYTE:
1096
    decision = &ONEBYTE_SYM;
1097
    break;
1098
  case TWOBYTE:
1099
    decision = &TWOBYTE_SYM;
1100
    break;
1101
  case THREEBYTE_38:
1102
    decision = &THREEBYTE38_SYM;
1103
    break;
1104
  case THREEBYTE_3A:
1105
    decision = &THREEBYTE3A_SYM;
1106
    break;
1107
  case XOP8_MAP:
1108
    decision = &XOP8_MAP_SYM;
1109
    break;
1110
  case XOP9_MAP:
1111
    decision = &XOP9_MAP_SYM;
1112
    break;
1113
  case XOPA_MAP:
1114
    decision = &XOPA_MAP_SYM;
1115
    break;
1116
  case THREEDNOW_MAP:
1117
    decision = &THREEDNOW_MAP_SYM;
1118
    break;
1119
  case MAP4:
1120
    decision = &MAP4_SYM;
1121
    break;
1122
  case MAP5:
1123
    decision = &MAP5_SYM;
1124
    break;
1125
  case MAP6:
1126
    decision = &MAP6_SYM;
1127
    break;
1128
  case MAP7:
1129
    decision = &MAP7_SYM;
1130
    break;
1131
  }
1132

1133
  if (decision->opcodeDecisions[insnCtx]
1134
          .modRMDecisions[insn->opcode]
1135
          .modrm_type != MODRM_ONEENTRY) {
1136
    if (readModRM(insn))
1137
      return -1;
1138
    *instructionID =
1139
        decode(insn->opcodeType, insnCtx, insn->opcode, insn->modRM);
1140
  } else {
1141
    *instructionID = decode(insn->opcodeType, insnCtx, insn->opcode, 0);
1142
  }
1143

1144
  return 0;
1145
}
1146

1147
static bool isCCMPOrCTEST(InternalInstruction *insn) {
1148
  if (insn->opcodeType != MAP4)
1149
    return false;
1150
  if (insn->opcode == 0x83 && regFromModRM(insn->modRM) == 7)
1151
    return true;
1152
  switch (insn->opcode & 0xfe) {
1153
  default:
1154
    return false;
1155
  case 0x38:
1156
  case 0x3a:
1157
  case 0x84:
1158
    return true;
1159
  case 0x80:
1160
    return regFromModRM(insn->modRM) == 7;
1161
  case 0xf6:
1162
    return regFromModRM(insn->modRM) == 0;
1163
  }
1164
}
1165

1166
static bool isNF(InternalInstruction *insn) {
1167
  if (!nfFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1168
    return false;
1169
  if (insn->opcodeType == MAP4)
1170
    return true;
1171
  // Below NF instructions are not in map4.
1172
  if (insn->opcodeType == THREEBYTE_38 &&
1173
      ppFromEVEX3of4(insn->vectorExtensionPrefix[2]) == VEX_PREFIX_NONE) {
1174
    switch (insn->opcode) {
1175
    case 0xf2: // ANDN
1176
    case 0xf3: // BLSI, BLSR, BLSMSK
1177
    case 0xf5: // BZHI
1178
    case 0xf7: // BEXTR
1179
      return true;
1180
    default:
1181
      break;
1182
    }
1183
  }
1184
  return false;
1185
}
1186

1187
// Determine the ID of an instruction, consuming the ModR/M byte as appropriate
1188
// for extended and escape opcodes. Determines the attributes and context for
1189
// the instruction before doing so.
1190
static int getInstructionID(struct InternalInstruction *insn,
1191
                            const MCInstrInfo *mii) {
1192
  uint16_t attrMask;
1193
  uint16_t instructionID;
1194

1195
  LLVM_DEBUG(dbgs() << "getID()");
1196

1197
  attrMask = ATTR_NONE;
1198

1199
  if (insn->mode == MODE_64BIT)
1200
    attrMask |= ATTR_64BIT;
1201

1202
  if (insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1203
    attrMask |= (insn->vectorExtensionType == TYPE_EVEX) ? ATTR_EVEX : ATTR_VEX;
1204

1205
    if (insn->vectorExtensionType == TYPE_EVEX) {
1206
      switch (ppFromEVEX3of4(insn->vectorExtensionPrefix[2])) {
1207
      case VEX_PREFIX_66:
1208
        attrMask |= ATTR_OPSIZE;
1209
        break;
1210
      case VEX_PREFIX_F3:
1211
        attrMask |= ATTR_XS;
1212
        break;
1213
      case VEX_PREFIX_F2:
1214
        attrMask |= ATTR_XD;
1215
        break;
1216
      }
1217

1218
      if (zFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1219
        attrMask |= ATTR_EVEXKZ;
1220
      if (bFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1221
        attrMask |= ATTR_EVEXB;
1222
      if (isNF(insn) && !readModRM(insn) &&
1223
          !isCCMPOrCTEST(insn)) // NF bit is the MSB of aaa.
1224
        attrMask |= ATTR_EVEXNF;
1225
      // aaa is not used a opmask in MAP4
1226
      else if (aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]) &&
1227
               (insn->opcodeType != MAP4))
1228
        attrMask |= ATTR_EVEXK;
1229
      if (lFromEVEX4of4(insn->vectorExtensionPrefix[3]))
1230
        attrMask |= ATTR_VEXL;
1231
      if (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1232
        attrMask |= ATTR_EVEXL2;
1233
    } else if (insn->vectorExtensionType == TYPE_VEX_3B) {
1234
      switch (ppFromVEX3of3(insn->vectorExtensionPrefix[2])) {
1235
      case VEX_PREFIX_66:
1236
        attrMask |= ATTR_OPSIZE;
1237
        break;
1238
      case VEX_PREFIX_F3:
1239
        attrMask |= ATTR_XS;
1240
        break;
1241
      case VEX_PREFIX_F2:
1242
        attrMask |= ATTR_XD;
1243
        break;
1244
      }
1245

1246
      if (lFromVEX3of3(insn->vectorExtensionPrefix[2]))
1247
        attrMask |= ATTR_VEXL;
1248
    } else if (insn->vectorExtensionType == TYPE_VEX_2B) {
1249
      switch (ppFromVEX2of2(insn->vectorExtensionPrefix[1])) {
1250
      case VEX_PREFIX_66:
1251
        attrMask |= ATTR_OPSIZE;
1252
        if (insn->hasAdSize)
1253
          attrMask |= ATTR_ADSIZE;
1254
        break;
1255
      case VEX_PREFIX_F3:
1256
        attrMask |= ATTR_XS;
1257
        break;
1258
      case VEX_PREFIX_F2:
1259
        attrMask |= ATTR_XD;
1260
        break;
1261
      }
1262

1263
      if (lFromVEX2of2(insn->vectorExtensionPrefix[1]))
1264
        attrMask |= ATTR_VEXL;
1265
    } else if (insn->vectorExtensionType == TYPE_XOP) {
1266
      switch (ppFromXOP3of3(insn->vectorExtensionPrefix[2])) {
1267
      case VEX_PREFIX_66:
1268
        attrMask |= ATTR_OPSIZE;
1269
        break;
1270
      case VEX_PREFIX_F3:
1271
        attrMask |= ATTR_XS;
1272
        break;
1273
      case VEX_PREFIX_F2:
1274
        attrMask |= ATTR_XD;
1275
        break;
1276
      }
1277

1278
      if (lFromXOP3of3(insn->vectorExtensionPrefix[2]))
1279
        attrMask |= ATTR_VEXL;
1280
    } else {
1281
      return -1;
1282
    }
1283
  } else if (!insn->mandatoryPrefix) {
1284
    // If we don't have mandatory prefix we should use legacy prefixes here
1285
    if (insn->hasOpSize && (insn->mode != MODE_16BIT))
1286
      attrMask |= ATTR_OPSIZE;
1287
    if (insn->hasAdSize)
1288
      attrMask |= ATTR_ADSIZE;
1289
    if (insn->opcodeType == ONEBYTE) {
1290
      if (insn->repeatPrefix == 0xf3 && (insn->opcode == 0x90))
1291
        // Special support for PAUSE
1292
        attrMask |= ATTR_XS;
1293
    } else {
1294
      if (insn->repeatPrefix == 0xf2)
1295
        attrMask |= ATTR_XD;
1296
      else if (insn->repeatPrefix == 0xf3)
1297
        attrMask |= ATTR_XS;
1298
    }
1299
  } else {
1300
    switch (insn->mandatoryPrefix) {
1301
    case 0xf2:
1302
      attrMask |= ATTR_XD;
1303
      break;
1304
    case 0xf3:
1305
      attrMask |= ATTR_XS;
1306
      break;
1307
    case 0x66:
1308
      if (insn->mode != MODE_16BIT)
1309
        attrMask |= ATTR_OPSIZE;
1310
      if (insn->hasAdSize)
1311
        attrMask |= ATTR_ADSIZE;
1312
      break;
1313
    case 0x67:
1314
      attrMask |= ATTR_ADSIZE;
1315
      break;
1316
    }
1317
  }
1318

1319
  if (insn->rexPrefix & 0x08) {
1320
    attrMask |= ATTR_REXW;
1321
    attrMask &= ~ATTR_ADSIZE;
1322
  }
1323

1324
  // Absolute jump and pushp/popp need special handling
1325
  if (insn->rex2ExtensionPrefix[0] == 0xd5 && insn->opcodeType == ONEBYTE &&
1326
      (insn->opcode == 0xA1 || (insn->opcode & 0xf0) == 0x50))
1327
    attrMask |= ATTR_REX2;
1328

1329
  if (insn->mode == MODE_16BIT) {
1330
    // JCXZ/JECXZ need special handling for 16-bit mode because the meaning
1331
    // of the AdSize prefix is inverted w.r.t. 32-bit mode.
1332
    if (insn->opcodeType == ONEBYTE && insn->opcode == 0xE3)
1333
      attrMask ^= ATTR_ADSIZE;
1334
    // If we're in 16-bit mode and this is one of the relative jumps and opsize
1335
    // prefix isn't present, we need to force the opsize attribute since the
1336
    // prefix is inverted relative to 32-bit mode.
1337
    if (!insn->hasOpSize && insn->opcodeType == ONEBYTE &&
1338
        (insn->opcode == 0xE8 || insn->opcode == 0xE9))
1339
      attrMask |= ATTR_OPSIZE;
1340

1341
    if (!insn->hasOpSize && insn->opcodeType == TWOBYTE &&
1342
        insn->opcode >= 0x80 && insn->opcode <= 0x8F)
1343
      attrMask |= ATTR_OPSIZE;
1344
  }
1345

1346

1347
  if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1348
    return -1;
1349

1350
  // The following clauses compensate for limitations of the tables.
1351

1352
  if (insn->mode != MODE_64BIT &&
1353
      insn->vectorExtensionType != TYPE_NO_VEX_XOP) {
1354
    // The tables can't distinquish between cases where the W-bit is used to
1355
    // select register size and cases where its a required part of the opcode.
1356
    if ((insn->vectorExtensionType == TYPE_EVEX &&
1357
         wFromEVEX3of4(insn->vectorExtensionPrefix[2])) ||
1358
        (insn->vectorExtensionType == TYPE_VEX_3B &&
1359
         wFromVEX3of3(insn->vectorExtensionPrefix[2])) ||
1360
        (insn->vectorExtensionType == TYPE_XOP &&
1361
         wFromXOP3of3(insn->vectorExtensionPrefix[2]))) {
1362

1363
      uint16_t instructionIDWithREXW;
1364
      if (getInstructionIDWithAttrMask(&instructionIDWithREXW, insn,
1365
                                       attrMask | ATTR_REXW)) {
1366
        insn->instructionID = instructionID;
1367
        insn->spec = &INSTRUCTIONS_SYM[instructionID];
1368
        return 0;
1369
      }
1370

1371
      auto SpecName = mii->getName(instructionIDWithREXW);
1372
      // If not a 64-bit instruction. Switch the opcode.
1373
      if (!is64Bit(SpecName.data())) {
1374
        insn->instructionID = instructionIDWithREXW;
1375
        insn->spec = &INSTRUCTIONS_SYM[instructionIDWithREXW];
1376
        return 0;
1377
      }
1378
    }
1379
  }
1380

1381
  // Absolute moves, umonitor, and movdir64b need special handling.
1382
  // -For 16-bit mode because the meaning of the AdSize and OpSize prefixes are
1383
  //  inverted w.r.t.
1384
  // -For 32-bit mode we need to ensure the ADSIZE prefix is observed in
1385
  //  any position.
1386
  if ((insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0)) ||
1387
      (insn->opcodeType == TWOBYTE && (insn->opcode == 0xAE)) ||
1388
      (insn->opcodeType == THREEBYTE_38 && insn->opcode == 0xF8) ||
1389
      (insn->opcodeType == MAP4 && insn->opcode == 0xF8)) {
1390
    // Make sure we observed the prefixes in any position.
1391
    if (insn->hasAdSize)
1392
      attrMask |= ATTR_ADSIZE;
1393
    if (insn->hasOpSize)
1394
      attrMask |= ATTR_OPSIZE;
1395

1396
    // In 16-bit, invert the attributes.
1397
    if (insn->mode == MODE_16BIT) {
1398
      attrMask ^= ATTR_ADSIZE;
1399

1400
      // The OpSize attribute is only valid with the absolute moves.
1401
      if (insn->opcodeType == ONEBYTE && ((insn->opcode & 0xFC) == 0xA0))
1402
        attrMask ^= ATTR_OPSIZE;
1403
    }
1404

1405
    if (getInstructionIDWithAttrMask(&instructionID, insn, attrMask))
1406
      return -1;
1407

1408
    insn->instructionID = instructionID;
1409
    insn->spec = &INSTRUCTIONS_SYM[instructionID];
1410
    return 0;
1411
  }
1412

1413
  if ((insn->mode == MODE_16BIT || insn->hasOpSize) &&
1414
      !(attrMask & ATTR_OPSIZE)) {
1415
    // The instruction tables make no distinction between instructions that
1416
    // allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
1417
    // particular spot (i.e., many MMX operations). In general we're
1418
    // conservative, but in the specific case where OpSize is present but not in
1419
    // the right place we check if there's a 16-bit operation.
1420
    const struct InstructionSpecifier *spec;
1421
    uint16_t instructionIDWithOpsize;
1422
    llvm::StringRef specName, specWithOpSizeName;
1423

1424
    spec = &INSTRUCTIONS_SYM[instructionID];
1425

1426
    if (getInstructionIDWithAttrMask(&instructionIDWithOpsize, insn,
1427
                                     attrMask | ATTR_OPSIZE)) {
1428
      // ModRM required with OpSize but not present. Give up and return the
1429
      // version without OpSize set.
1430
      insn->instructionID = instructionID;
1431
      insn->spec = spec;
1432
      return 0;
1433
    }
1434

1435
    specName = mii->getName(instructionID);
1436
    specWithOpSizeName = mii->getName(instructionIDWithOpsize);
1437

1438
    if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
1439
        (insn->mode == MODE_16BIT) ^ insn->hasOpSize) {
1440
      insn->instructionID = instructionIDWithOpsize;
1441
      insn->spec = &INSTRUCTIONS_SYM[instructionIDWithOpsize];
1442
    } else {
1443
      insn->instructionID = instructionID;
1444
      insn->spec = spec;
1445
    }
1446
    return 0;
1447
  }
1448

1449
  if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 &&
1450
      insn->rexPrefix & 0x01) {
1451
    // NOOP shouldn't decode as NOOP if REX.b is set. Instead it should decode
1452
    // as XCHG %r8, %eax.
1453
    const struct InstructionSpecifier *spec;
1454
    uint16_t instructionIDWithNewOpcode;
1455
    const struct InstructionSpecifier *specWithNewOpcode;
1456

1457
    spec = &INSTRUCTIONS_SYM[instructionID];
1458

1459
    // Borrow opcode from one of the other XCHGar opcodes
1460
    insn->opcode = 0x91;
1461

1462
    if (getInstructionIDWithAttrMask(&instructionIDWithNewOpcode, insn,
1463
                                     attrMask)) {
1464
      insn->opcode = 0x90;
1465

1466
      insn->instructionID = instructionID;
1467
      insn->spec = spec;
1468
      return 0;
1469
    }
1470

1471
    specWithNewOpcode = &INSTRUCTIONS_SYM[instructionIDWithNewOpcode];
1472

1473
    // Change back
1474
    insn->opcode = 0x90;
1475

1476
    insn->instructionID = instructionIDWithNewOpcode;
1477
    insn->spec = specWithNewOpcode;
1478

1479
    return 0;
1480
  }
1481

1482
  insn->instructionID = instructionID;
1483
  insn->spec = &INSTRUCTIONS_SYM[insn->instructionID];
1484

1485
  return 0;
1486
}
1487

1488
// Read an operand from the opcode field of an instruction and interprets it
1489
// appropriately given the operand width. Handles AddRegFrm instructions.
1490
//
1491
// @param insn  - the instruction whose opcode field is to be read.
1492
// @param size  - The width (in bytes) of the register being specified.
1493
//                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
1494
//                RAX.
1495
// @return      - 0 on success; nonzero otherwise.
1496
static int readOpcodeRegister(struct InternalInstruction *insn, uint8_t size) {
1497
  LLVM_DEBUG(dbgs() << "readOpcodeRegister()");
1498

1499
  if (size == 0)
1500
    size = insn->registerSize;
1501

1502
  auto setOpcodeRegister = [&](unsigned base) {
1503
    insn->opcodeRegister =
1504
        (Reg)(base + ((bFromREX(insn->rexPrefix) << 3) |
1505
                      (b2FromREX2(insn->rex2ExtensionPrefix[1]) << 4) |
1506
                      (insn->opcode & 7)));
1507
  };
1508

1509
  switch (size) {
1510
  case 1:
1511
    setOpcodeRegister(MODRM_REG_AL);
1512
    if (insn->rexPrefix && insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
1513
        insn->opcodeRegister < MODRM_REG_AL + 0x8) {
1514
      insn->opcodeRegister =
1515
          (Reg)(MODRM_REG_SPL + (insn->opcodeRegister - MODRM_REG_AL - 4));
1516
    }
1517

1518
    break;
1519
  case 2:
1520
    setOpcodeRegister(MODRM_REG_AX);
1521
    break;
1522
  case 4:
1523
    setOpcodeRegister(MODRM_REG_EAX);
1524
    break;
1525
  case 8:
1526
    setOpcodeRegister(MODRM_REG_RAX);
1527
    break;
1528
  }
1529

1530
  return 0;
1531
}
1532

1533
// Consume an immediate operand from an instruction, given the desired operand
1534
// size.
1535
//
1536
// @param insn  - The instruction whose operand is to be read.
1537
// @param size  - The width (in bytes) of the operand.
1538
// @return      - 0 if the immediate was successfully consumed; nonzero
1539
//                otherwise.
1540
static int readImmediate(struct InternalInstruction *insn, uint8_t size) {
1541
  uint8_t imm8;
1542
  uint16_t imm16;
1543
  uint32_t imm32;
1544
  uint64_t imm64;
1545

1546
  LLVM_DEBUG(dbgs() << "readImmediate()");
1547

1548
  assert(insn->numImmediatesConsumed < 2 && "Already consumed two immediates");
1549

1550
  insn->immediateSize = size;
1551
  insn->immediateOffset = insn->readerCursor - insn->startLocation;
1552

1553
  switch (size) {
1554
  case 1:
1555
    if (consume(insn, imm8))
1556
      return -1;
1557
    insn->immediates[insn->numImmediatesConsumed] = imm8;
1558
    break;
1559
  case 2:
1560
    if (consume(insn, imm16))
1561
      return -1;
1562
    insn->immediates[insn->numImmediatesConsumed] = imm16;
1563
    break;
1564
  case 4:
1565
    if (consume(insn, imm32))
1566
      return -1;
1567
    insn->immediates[insn->numImmediatesConsumed] = imm32;
1568
    break;
1569
  case 8:
1570
    if (consume(insn, imm64))
1571
      return -1;
1572
    insn->immediates[insn->numImmediatesConsumed] = imm64;
1573
    break;
1574
  default:
1575
    llvm_unreachable("invalid size");
1576
  }
1577

1578
  insn->numImmediatesConsumed++;
1579

1580
  return 0;
1581
}
1582

1583
// Consume vvvv from an instruction if it has a VEX prefix.
1584
static int readVVVV(struct InternalInstruction *insn) {
1585
  LLVM_DEBUG(dbgs() << "readVVVV()");
1586

1587
  int vvvv;
1588
  if (insn->vectorExtensionType == TYPE_EVEX)
1589
    vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 |
1590
            vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]));
1591
  else if (insn->vectorExtensionType == TYPE_VEX_3B)
1592
    vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
1593
  else if (insn->vectorExtensionType == TYPE_VEX_2B)
1594
    vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
1595
  else if (insn->vectorExtensionType == TYPE_XOP)
1596
    vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
1597
  else
1598
    return -1;
1599

1600
  if (insn->mode != MODE_64BIT)
1601
    vvvv &= 0xf; // Can only clear bit 4. Bit 3 must be cleared later.
1602

1603
  insn->vvvv = static_cast<Reg>(vvvv);
1604
  return 0;
1605
}
1606

1607
// Read an mask register from the opcode field of an instruction.
1608
//
1609
// @param insn    - The instruction whose opcode field is to be read.
1610
// @return        - 0 on success; nonzero otherwise.
1611
static int readMaskRegister(struct InternalInstruction *insn) {
1612
  LLVM_DEBUG(dbgs() << "readMaskRegister()");
1613

1614
  if (insn->vectorExtensionType != TYPE_EVEX)
1615
    return -1;
1616

1617
  insn->writemask =
1618
      static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
1619
  return 0;
1620
}
1621

1622
// Consults the specifier for an instruction and consumes all
1623
// operands for that instruction, interpreting them as it goes.
1624
static int readOperands(struct InternalInstruction *insn) {
1625
  int hasVVVV, needVVVV;
1626
  int sawRegImm = 0;
1627

1628
  LLVM_DEBUG(dbgs() << "readOperands()");
1629

1630
  // If non-zero vvvv specified, make sure one of the operands uses it.
1631
  hasVVVV = !readVVVV(insn);
1632
  needVVVV = hasVVVV && (insn->vvvv != 0);
1633

1634
  for (const auto &Op : x86OperandSets[insn->spec->operands]) {
1635
    switch (Op.encoding) {
1636
    case ENCODING_NONE:
1637
    case ENCODING_SI:
1638
    case ENCODING_DI:
1639
      break;
1640
    CASE_ENCODING_VSIB:
1641
      // VSIB can use the V2 bit so check only the other bits.
1642
      if (needVVVV)
1643
        needVVVV = hasVVVV & ((insn->vvvv & 0xf) != 0);
1644
      if (readModRM(insn))
1645
        return -1;
1646

1647
      // Reject if SIB wasn't used.
1648
      if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1649
        return -1;
1650

1651
      // If sibIndex was set to SIB_INDEX_NONE, index offset is 4.
1652
      if (insn->sibIndex == SIB_INDEX_NONE)
1653
        insn->sibIndex = (SIBIndex)(insn->sibIndexBase + 4);
1654

1655
      // If EVEX.v2 is set this is one of the 16-31 registers.
1656
      if (insn->vectorExtensionType == TYPE_EVEX && insn->mode == MODE_64BIT &&
1657
          v2FromEVEX4of4(insn->vectorExtensionPrefix[3]))
1658
        insn->sibIndex = (SIBIndex)(insn->sibIndex + 16);
1659

1660
      // Adjust the index register to the correct size.
1661
      switch ((OperandType)Op.type) {
1662
      default:
1663
        debug("Unhandled VSIB index type");
1664
        return -1;
1665
      case TYPE_MVSIBX:
1666
        insn->sibIndex =
1667
            (SIBIndex)(SIB_INDEX_XMM0 + (insn->sibIndex - insn->sibIndexBase));
1668
        break;
1669
      case TYPE_MVSIBY:
1670
        insn->sibIndex =
1671
            (SIBIndex)(SIB_INDEX_YMM0 + (insn->sibIndex - insn->sibIndexBase));
1672
        break;
1673
      case TYPE_MVSIBZ:
1674
        insn->sibIndex =
1675
            (SIBIndex)(SIB_INDEX_ZMM0 + (insn->sibIndex - insn->sibIndexBase));
1676
        break;
1677
      }
1678

1679
      // Apply the AVX512 compressed displacement scaling factor.
1680
      if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1681
        insn->displacement *= 1 << (Op.encoding - ENCODING_VSIB);
1682
      break;
1683
    case ENCODING_SIB:
1684
      // Reject if SIB wasn't used.
1685
      if (insn->eaBase != EA_BASE_sib && insn->eaBase != EA_BASE_sib64)
1686
        return -1;
1687
      if (readModRM(insn))
1688
        return -1;
1689
      if (fixupReg(insn, &Op))
1690
        return -1;
1691
      break;
1692
    case ENCODING_REG:
1693
    CASE_ENCODING_RM:
1694
      if (readModRM(insn))
1695
        return -1;
1696
      if (fixupReg(insn, &Op))
1697
        return -1;
1698
      // Apply the AVX512 compressed displacement scaling factor.
1699
      if (Op.encoding != ENCODING_REG && insn->eaDisplacement == EA_DISP_8)
1700
        insn->displacement *= 1 << (Op.encoding - ENCODING_RM);
1701
      break;
1702
    case ENCODING_IB:
1703
      if (sawRegImm) {
1704
        // Saw a register immediate so don't read again and instead split the
1705
        // previous immediate. FIXME: This is a hack.
1706
        insn->immediates[insn->numImmediatesConsumed] =
1707
            insn->immediates[insn->numImmediatesConsumed - 1] & 0xf;
1708
        ++insn->numImmediatesConsumed;
1709
        break;
1710
      }
1711
      if (readImmediate(insn, 1))
1712
        return -1;
1713
      if (Op.type == TYPE_XMM || Op.type == TYPE_YMM)
1714
        sawRegImm = 1;
1715
      break;
1716
    case ENCODING_IW:
1717
      if (readImmediate(insn, 2))
1718
        return -1;
1719
      break;
1720
    case ENCODING_ID:
1721
      if (readImmediate(insn, 4))
1722
        return -1;
1723
      break;
1724
    case ENCODING_IO:
1725
      if (readImmediate(insn, 8))
1726
        return -1;
1727
      break;
1728
    case ENCODING_Iv:
1729
      if (readImmediate(insn, insn->immediateSize))
1730
        return -1;
1731
      break;
1732
    case ENCODING_Ia:
1733
      if (readImmediate(insn, insn->addressSize))
1734
        return -1;
1735
      break;
1736
    case ENCODING_IRC:
1737
      insn->RC = (l2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 1) |
1738
                 lFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1739
      break;
1740
    case ENCODING_RB:
1741
      if (readOpcodeRegister(insn, 1))
1742
        return -1;
1743
      break;
1744
    case ENCODING_RW:
1745
      if (readOpcodeRegister(insn, 2))
1746
        return -1;
1747
      break;
1748
    case ENCODING_RD:
1749
      if (readOpcodeRegister(insn, 4))
1750
        return -1;
1751
      break;
1752
    case ENCODING_RO:
1753
      if (readOpcodeRegister(insn, 8))
1754
        return -1;
1755
      break;
1756
    case ENCODING_Rv:
1757
      if (readOpcodeRegister(insn, 0))
1758
        return -1;
1759
      break;
1760
    case ENCODING_CF:
1761
      insn->immediates[1] = oszcFromEVEX3of4(insn->vectorExtensionPrefix[2]);
1762
      needVVVV = false; // oszc shares the same bits with VVVV
1763
      break;
1764
    case ENCODING_CC:
1765
      if (isCCMPOrCTEST(insn))
1766
        insn->immediates[2] = scFromEVEX4of4(insn->vectorExtensionPrefix[3]);
1767
      else
1768
        insn->immediates[1] = insn->opcode & 0xf;
1769
      break;
1770
    case ENCODING_FP:
1771
      break;
1772
    case ENCODING_VVVV:
1773
      needVVVV = 0; // Mark that we have found a VVVV operand.
1774
      if (!hasVVVV)
1775
        return -1;
1776
      if (insn->mode != MODE_64BIT)
1777
        insn->vvvv = static_cast<Reg>(insn->vvvv & 0x7);
1778
      if (fixupReg(insn, &Op))
1779
        return -1;
1780
      break;
1781
    case ENCODING_WRITEMASK:
1782
      if (readMaskRegister(insn))
1783
        return -1;
1784
      break;
1785
    case ENCODING_DUP:
1786
      break;
1787
    default:
1788
      LLVM_DEBUG(dbgs() << "Encountered an operand with an unknown encoding.");
1789
      return -1;
1790
    }
1791
  }
1792

1793
  // If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail
1794
  if (needVVVV)
1795
    return -1;
1796

1797
  return 0;
1798
}
1799

1800
namespace llvm {
1801

1802
// Fill-ins to make the compiler happy. These constants are never actually
1803
// assigned; they are just filler to make an automatically-generated switch
1804
// statement work.
1805
namespace X86 {
1806
  enum {
1807
    BX_SI = 500,
1808
    BX_DI = 501,
1809
    BP_SI = 502,
1810
    BP_DI = 503,
1811
    sib   = 504,
1812
    sib64 = 505
1813
  };
1814
} // namespace X86
1815

1816
} // namespace llvm
1817

1818
static bool translateInstruction(MCInst &target,
1819
                                InternalInstruction &source,
1820
                                const MCDisassembler *Dis);
1821

1822
namespace {
1823

1824
/// Generic disassembler for all X86 platforms. All each platform class should
1825
/// have to do is subclass the constructor, and provide a different
1826
/// disassemblerMode value.
1827
class X86GenericDisassembler : public MCDisassembler {
1828
  std::unique_ptr<const MCInstrInfo> MII;
1829
public:
1830
  X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
1831
                         std::unique_ptr<const MCInstrInfo> MII);
1832
public:
1833
  DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
1834
                              ArrayRef<uint8_t> Bytes, uint64_t Address,
1835
                              raw_ostream &cStream) const override;
1836

1837
private:
1838
  DisassemblerMode              fMode;
1839
};
1840

1841
} // namespace
1842

1843
X86GenericDisassembler::X86GenericDisassembler(
1844
                                         const MCSubtargetInfo &STI,
1845
                                         MCContext &Ctx,
1846
                                         std::unique_ptr<const MCInstrInfo> MII)
1847
  : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
1848
  const FeatureBitset &FB = STI.getFeatureBits();
1849
  if (FB[X86::Is16Bit]) {
1850
    fMode = MODE_16BIT;
1851
    return;
1852
  } else if (FB[X86::Is32Bit]) {
1853
    fMode = MODE_32BIT;
1854
    return;
1855
  } else if (FB[X86::Is64Bit]) {
1856
    fMode = MODE_64BIT;
1857
    return;
1858
  }
1859

1860
  llvm_unreachable("Invalid CPU mode");
1861
}
1862

1863
MCDisassembler::DecodeStatus X86GenericDisassembler::getInstruction(
1864
    MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
1865
    raw_ostream &CStream) const {
1866
  CommentStream = &CStream;
1867

1868
  InternalInstruction Insn;
1869
  memset(&Insn, 0, sizeof(InternalInstruction));
1870
  Insn.bytes = Bytes;
1871
  Insn.startLocation = Address;
1872
  Insn.readerCursor = Address;
1873
  Insn.mode = fMode;
1874

1875
  if (Bytes.empty() || readPrefixes(&Insn) || readOpcode(&Insn) ||
1876
      getInstructionID(&Insn, MII.get()) || Insn.instructionID == 0 ||
1877
      readOperands(&Insn)) {
1878
    Size = Insn.readerCursor - Address;
1879
    return Fail;
1880
  }
1881

1882
  Insn.operands = x86OperandSets[Insn.spec->operands];
1883
  Insn.length = Insn.readerCursor - Insn.startLocation;
1884
  Size = Insn.length;
1885
  if (Size > 15)
1886
    LLVM_DEBUG(dbgs() << "Instruction exceeds 15-byte limit");
1887

1888
  bool Ret = translateInstruction(Instr, Insn, this);
1889
  if (!Ret) {
1890
    unsigned Flags = X86::IP_NO_PREFIX;
1891
    if (Insn.hasAdSize)
1892
      Flags |= X86::IP_HAS_AD_SIZE;
1893
    if (!Insn.mandatoryPrefix) {
1894
      if (Insn.hasOpSize)
1895
        Flags |= X86::IP_HAS_OP_SIZE;
1896
      if (Insn.repeatPrefix == 0xf2)
1897
        Flags |= X86::IP_HAS_REPEAT_NE;
1898
      else if (Insn.repeatPrefix == 0xf3 &&
1899
               // It should not be 'pause' f3 90
1900
               Insn.opcode != 0x90)
1901
        Flags |= X86::IP_HAS_REPEAT;
1902
      if (Insn.hasLockPrefix)
1903
        Flags |= X86::IP_HAS_LOCK;
1904
    }
1905
    Instr.setFlags(Flags);
1906
  }
1907
  return (!Ret) ? Success : Fail;
1908
}
1909

1910
//
1911
// Private code that translates from struct InternalInstructions to MCInsts.
1912
//
1913

1914
/// translateRegister - Translates an internal register to the appropriate LLVM
1915
///   register, and appends it as an operand to an MCInst.
1916
///
1917
/// @param mcInst     - The MCInst to append to.
1918
/// @param reg        - The Reg to append.
1919
static void translateRegister(MCInst &mcInst, Reg reg) {
1920
#define ENTRY(x) X86::x,
1921
  static constexpr MCPhysReg llvmRegnums[] = {ALL_REGS};
1922
#undef ENTRY
1923

1924
  MCPhysReg llvmRegnum = llvmRegnums[reg];
1925
  mcInst.addOperand(MCOperand::createReg(llvmRegnum));
1926
}
1927

1928
static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
1929
  0,        // SEG_OVERRIDE_NONE
1930
  X86::CS,
1931
  X86::SS,
1932
  X86::DS,
1933
  X86::ES,
1934
  X86::FS,
1935
  X86::GS
1936
};
1937

1938
/// translateSrcIndex   - Appends a source index operand to an MCInst.
1939
///
1940
/// @param mcInst       - The MCInst to append to.
1941
/// @param insn         - The internal instruction.
1942
static bool translateSrcIndex(MCInst &mcInst, InternalInstruction &insn) {
1943
  unsigned baseRegNo;
1944

1945
  if (insn.mode == MODE_64BIT)
1946
    baseRegNo = insn.hasAdSize ? X86::ESI : X86::RSI;
1947
  else if (insn.mode == MODE_32BIT)
1948
    baseRegNo = insn.hasAdSize ? X86::SI : X86::ESI;
1949
  else {
1950
    assert(insn.mode == MODE_16BIT);
1951
    baseRegNo = insn.hasAdSize ? X86::ESI : X86::SI;
1952
  }
1953
  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1954
  mcInst.addOperand(baseReg);
1955

1956
  MCOperand segmentReg;
1957
  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
1958
  mcInst.addOperand(segmentReg);
1959
  return false;
1960
}
1961

1962
/// translateDstIndex   - Appends a destination index operand to an MCInst.
1963
///
1964
/// @param mcInst       - The MCInst to append to.
1965
/// @param insn         - The internal instruction.
1966

1967
static bool translateDstIndex(MCInst &mcInst, InternalInstruction &insn) {
1968
  unsigned baseRegNo;
1969

1970
  if (insn.mode == MODE_64BIT)
1971
    baseRegNo = insn.hasAdSize ? X86::EDI : X86::RDI;
1972
  else if (insn.mode == MODE_32BIT)
1973
    baseRegNo = insn.hasAdSize ? X86::DI : X86::EDI;
1974
  else {
1975
    assert(insn.mode == MODE_16BIT);
1976
    baseRegNo = insn.hasAdSize ? X86::EDI : X86::DI;
1977
  }
1978
  MCOperand baseReg = MCOperand::createReg(baseRegNo);
1979
  mcInst.addOperand(baseReg);
1980
  return false;
1981
}
1982

1983
/// translateImmediate  - Appends an immediate operand to an MCInst.
1984
///
1985
/// @param mcInst       - The MCInst to append to.
1986
/// @param immediate    - The immediate value to append.
1987
/// @param operand      - The operand, as stored in the descriptor table.
1988
/// @param insn         - The internal instruction.
1989
static void translateImmediate(MCInst &mcInst, uint64_t immediate,
1990
                               const OperandSpecifier &operand,
1991
                               InternalInstruction &insn,
1992
                               const MCDisassembler *Dis) {
1993
  // Sign-extend the immediate if necessary.
1994

1995
  OperandType type = (OperandType)operand.type;
1996

1997
  bool isBranch = false;
1998
  uint64_t pcrel = 0;
1999
  if (type == TYPE_REL) {
2000
    isBranch = true;
2001
    pcrel = insn.startLocation + insn.length;
2002
    switch (operand.encoding) {
2003
    default:
2004
      break;
2005
    case ENCODING_Iv:
2006
      switch (insn.displacementSize) {
2007
      default:
2008
        break;
2009
      case 1:
2010
        if(immediate & 0x80)
2011
          immediate |= ~(0xffull);
2012
        break;
2013
      case 2:
2014
        if(immediate & 0x8000)
2015
          immediate |= ~(0xffffull);
2016
        break;
2017
      case 4:
2018
        if(immediate & 0x80000000)
2019
          immediate |= ~(0xffffffffull);
2020
        break;
2021
      case 8:
2022
        break;
2023
      }
2024
      break;
2025
    case ENCODING_IB:
2026
      if(immediate & 0x80)
2027
        immediate |= ~(0xffull);
2028
      break;
2029
    case ENCODING_IW:
2030
      if(immediate & 0x8000)
2031
        immediate |= ~(0xffffull);
2032
      break;
2033
    case ENCODING_ID:
2034
      if(immediate & 0x80000000)
2035
        immediate |= ~(0xffffffffull);
2036
      break;
2037
    }
2038
  }
2039
  // By default sign-extend all X86 immediates based on their encoding.
2040
  else if (type == TYPE_IMM) {
2041
    switch (operand.encoding) {
2042
    default:
2043
      break;
2044
    case ENCODING_IB:
2045
      if(immediate & 0x80)
2046
        immediate |= ~(0xffull);
2047
      break;
2048
    case ENCODING_IW:
2049
      if(immediate & 0x8000)
2050
        immediate |= ~(0xffffull);
2051
      break;
2052
    case ENCODING_ID:
2053
      if(immediate & 0x80000000)
2054
        immediate |= ~(0xffffffffull);
2055
      break;
2056
    case ENCODING_IO:
2057
      break;
2058
    }
2059
  }
2060

2061
  switch (type) {
2062
  case TYPE_XMM:
2063
    mcInst.addOperand(MCOperand::createReg(X86::XMM0 + (immediate >> 4)));
2064
    return;
2065
  case TYPE_YMM:
2066
    mcInst.addOperand(MCOperand::createReg(X86::YMM0 + (immediate >> 4)));
2067
    return;
2068
  case TYPE_ZMM:
2069
    mcInst.addOperand(MCOperand::createReg(X86::ZMM0 + (immediate >> 4)));
2070
    return;
2071
  default:
2072
    // operand is 64 bits wide.  Do nothing.
2073
    break;
2074
  }
2075

2076
  if (!Dis->tryAddingSymbolicOperand(
2077
          mcInst, immediate + pcrel, insn.startLocation, isBranch,
2078
          insn.immediateOffset, insn.immediateSize, insn.length))
2079
    mcInst.addOperand(MCOperand::createImm(immediate));
2080

2081
  if (type == TYPE_MOFFS) {
2082
    MCOperand segmentReg;
2083
    segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
2084
    mcInst.addOperand(segmentReg);
2085
  }
2086
}
2087

2088
/// translateRMRegister - Translates a register stored in the R/M field of the
2089
///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
2090
/// @param mcInst       - The MCInst to append to.
2091
/// @param insn         - The internal instruction to extract the R/M field
2092
///                       from.
2093
/// @return             - 0 on success; -1 otherwise
2094
static bool translateRMRegister(MCInst &mcInst,
2095
                                InternalInstruction &insn) {
2096
  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2097
    debug("A R/M register operand may not have a SIB byte");
2098
    return true;
2099
  }
2100

2101
  switch (insn.eaBase) {
2102
  default:
2103
    debug("Unexpected EA base register");
2104
    return true;
2105
  case EA_BASE_NONE:
2106
    debug("EA_BASE_NONE for ModR/M base");
2107
    return true;
2108
#define ENTRY(x) case EA_BASE_##x:
2109
  ALL_EA_BASES
2110
#undef ENTRY
2111
    debug("A R/M register operand may not have a base; "
2112
          "the operand must be a register.");
2113
    return true;
2114
#define ENTRY(x)                                                      \
2115
  case EA_REG_##x:                                                    \
2116
    mcInst.addOperand(MCOperand::createReg(X86::x)); break;
2117
  ALL_REGS
2118
#undef ENTRY
2119
  }
2120

2121
  return false;
2122
}
2123

2124
/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
2125
///   fields of an internal instruction (and possibly its SIB byte) to a memory
2126
///   operand in LLVM's format, and appends it to an MCInst.
2127
///
2128
/// @param mcInst       - The MCInst to append to.
2129
/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
2130
///                       from.
2131
/// @param ForceSIB     - The instruction must use SIB.
2132
/// @return             - 0 on success; nonzero otherwise
2133
static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn,
2134
                              const MCDisassembler *Dis,
2135
                              bool ForceSIB = false) {
2136
  // Addresses in an MCInst are represented as five operands:
2137
  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the
2138
  //                                SIB base
2139
  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified
2140
  //                                scale amount
2141
  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
2142
  //                                the index (which is multiplied by the
2143
  //                                scale amount)
2144
  //   4. displacement  (immediate) 0, or the displacement if there is one
2145
  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
2146
  //                                if we have segment overrides
2147

2148
  MCOperand baseReg;
2149
  MCOperand scaleAmount;
2150
  MCOperand indexReg;
2151
  MCOperand displacement;
2152
  MCOperand segmentReg;
2153
  uint64_t pcrel = 0;
2154

2155
  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
2156
    if (insn.sibBase != SIB_BASE_NONE) {
2157
      switch (insn.sibBase) {
2158
      default:
2159
        debug("Unexpected sibBase");
2160
        return true;
2161
#define ENTRY(x)                                          \
2162
      case SIB_BASE_##x:                                  \
2163
        baseReg = MCOperand::createReg(X86::x); break;
2164
      ALL_SIB_BASES
2165
#undef ENTRY
2166
      }
2167
    } else {
2168
      baseReg = MCOperand::createReg(X86::NoRegister);
2169
    }
2170

2171
    if (insn.sibIndex != SIB_INDEX_NONE) {
2172
      switch (insn.sibIndex) {
2173
      default:
2174
        debug("Unexpected sibIndex");
2175
        return true;
2176
#define ENTRY(x)                                          \
2177
      case SIB_INDEX_##x:                                 \
2178
        indexReg = MCOperand::createReg(X86::x); break;
2179
      EA_BASES_32BIT
2180
      EA_BASES_64BIT
2181
      REGS_XMM
2182
      REGS_YMM
2183
      REGS_ZMM
2184
#undef ENTRY
2185
      }
2186
    } else {
2187
      // Use EIZ/RIZ for a few ambiguous cases where the SIB byte is present,
2188
      // but no index is used and modrm alone should have been enough.
2189
      // -No base register in 32-bit mode. In 64-bit mode this is used to
2190
      //  avoid rip-relative addressing.
2191
      // -Any base register used other than ESP/RSP/R12D/R12. Using these as a
2192
      //  base always requires a SIB byte.
2193
      // -A scale other than 1 is used.
2194
      if (!ForceSIB &&
2195
          (insn.sibScale != 1 ||
2196
           (insn.sibBase == SIB_BASE_NONE && insn.mode != MODE_64BIT) ||
2197
           (insn.sibBase != SIB_BASE_NONE &&
2198
            insn.sibBase != SIB_BASE_ESP && insn.sibBase != SIB_BASE_RSP &&
2199
            insn.sibBase != SIB_BASE_R12D && insn.sibBase != SIB_BASE_R12))) {
2200
        indexReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIZ :
2201
                                                                X86::RIZ);
2202
      } else
2203
        indexReg = MCOperand::createReg(X86::NoRegister);
2204
    }
2205

2206
    scaleAmount = MCOperand::createImm(insn.sibScale);
2207
  } else {
2208
    switch (insn.eaBase) {
2209
    case EA_BASE_NONE:
2210
      if (insn.eaDisplacement == EA_DISP_NONE) {
2211
        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
2212
        return true;
2213
      }
2214
      if (insn.mode == MODE_64BIT){
2215
        pcrel = insn.startLocation + insn.length;
2216
        Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
2217
                                             insn.startLocation +
2218
                                                 insn.displacementOffset);
2219
        // Section 2.2.1.6
2220
        baseReg = MCOperand::createReg(insn.addressSize == 4 ? X86::EIP :
2221
                                                               X86::RIP);
2222
      }
2223
      else
2224
        baseReg = MCOperand::createReg(X86::NoRegister);
2225

2226
      indexReg = MCOperand::createReg(X86::NoRegister);
2227
      break;
2228
    case EA_BASE_BX_SI:
2229
      baseReg = MCOperand::createReg(X86::BX);
2230
      indexReg = MCOperand::createReg(X86::SI);
2231
      break;
2232
    case EA_BASE_BX_DI:
2233
      baseReg = MCOperand::createReg(X86::BX);
2234
      indexReg = MCOperand::createReg(X86::DI);
2235
      break;
2236
    case EA_BASE_BP_SI:
2237
      baseReg = MCOperand::createReg(X86::BP);
2238
      indexReg = MCOperand::createReg(X86::SI);
2239
      break;
2240
    case EA_BASE_BP_DI:
2241
      baseReg = MCOperand::createReg(X86::BP);
2242
      indexReg = MCOperand::createReg(X86::DI);
2243
      break;
2244
    default:
2245
      indexReg = MCOperand::createReg(X86::NoRegister);
2246
      switch (insn.eaBase) {
2247
      default:
2248
        debug("Unexpected eaBase");
2249
        return true;
2250
        // Here, we will use the fill-ins defined above.  However,
2251
        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
2252
        //   sib and sib64 were handled in the top-level if, so they're only
2253
        //   placeholders to keep the compiler happy.
2254
#define ENTRY(x)                                        \
2255
      case EA_BASE_##x:                                 \
2256
        baseReg = MCOperand::createReg(X86::x); break;
2257
      ALL_EA_BASES
2258
#undef ENTRY
2259
#define ENTRY(x) case EA_REG_##x:
2260
      ALL_REGS
2261
#undef ENTRY
2262
        debug("A R/M memory operand may not be a register; "
2263
              "the base field must be a base.");
2264
        return true;
2265
      }
2266
    }
2267

2268
    scaleAmount = MCOperand::createImm(1);
2269
  }
2270

2271
  displacement = MCOperand::createImm(insn.displacement);
2272

2273
  segmentReg = MCOperand::createReg(segmentRegnums[insn.segmentOverride]);
2274

2275
  mcInst.addOperand(baseReg);
2276
  mcInst.addOperand(scaleAmount);
2277
  mcInst.addOperand(indexReg);
2278

2279
  const uint8_t dispSize =
2280
      (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
2281

2282
  if (!Dis->tryAddingSymbolicOperand(
2283
          mcInst, insn.displacement + pcrel, insn.startLocation, false,
2284
          insn.displacementOffset, dispSize, insn.length))
2285
    mcInst.addOperand(displacement);
2286
  mcInst.addOperand(segmentReg);
2287
  return false;
2288
}
2289

2290
/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
2291
///   byte of an instruction to LLVM form, and appends it to an MCInst.
2292
///
2293
/// @param mcInst       - The MCInst to append to.
2294
/// @param operand      - The operand, as stored in the descriptor table.
2295
/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
2296
///                       from.
2297
/// @return             - 0 on success; nonzero otherwise
2298
static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
2299
                        InternalInstruction &insn, const MCDisassembler *Dis) {
2300
  switch (operand.type) {
2301
  default:
2302
    debug("Unexpected type for a R/M operand");
2303
    return true;
2304
  case TYPE_R8:
2305
  case TYPE_R16:
2306
  case TYPE_R32:
2307
  case TYPE_R64:
2308
  case TYPE_Rv:
2309
  case TYPE_MM64:
2310
  case TYPE_XMM:
2311
  case TYPE_YMM:
2312
  case TYPE_ZMM:
2313
  case TYPE_TMM:
2314
  case TYPE_VK_PAIR:
2315
  case TYPE_VK:
2316
  case TYPE_DEBUGREG:
2317
  case TYPE_CONTROLREG:
2318
  case TYPE_BNDR:
2319
    return translateRMRegister(mcInst, insn);
2320
  case TYPE_M:
2321
  case TYPE_MVSIBX:
2322
  case TYPE_MVSIBY:
2323
  case TYPE_MVSIBZ:
2324
    return translateRMMemory(mcInst, insn, Dis);
2325
  case TYPE_MSIB:
2326
    return translateRMMemory(mcInst, insn, Dis, true);
2327
  }
2328
}
2329

2330
/// translateFPRegister - Translates a stack position on the FPU stack to its
2331
///   LLVM form, and appends it to an MCInst.
2332
///
2333
/// @param mcInst       - The MCInst to append to.
2334
/// @param stackPos     - The stack position to translate.
2335
static void translateFPRegister(MCInst &mcInst,
2336
                                uint8_t stackPos) {
2337
  mcInst.addOperand(MCOperand::createReg(X86::ST0 + stackPos));
2338
}
2339

2340
/// translateMaskRegister - Translates a 3-bit mask register number to
2341
///   LLVM form, and appends it to an MCInst.
2342
///
2343
/// @param mcInst       - The MCInst to append to.
2344
/// @param maskRegNum   - Number of mask register from 0 to 7.
2345
/// @return             - false on success; true otherwise.
2346
static bool translateMaskRegister(MCInst &mcInst,
2347
                                uint8_t maskRegNum) {
2348
  if (maskRegNum >= 8) {
2349
    debug("Invalid mask register number");
2350
    return true;
2351
  }
2352

2353
  mcInst.addOperand(MCOperand::createReg(X86::K0 + maskRegNum));
2354
  return false;
2355
}
2356

2357
/// translateOperand - Translates an operand stored in an internal instruction
2358
///   to LLVM's format and appends it to an MCInst.
2359
///
2360
/// @param mcInst       - The MCInst to append to.
2361
/// @param operand      - The operand, as stored in the descriptor table.
2362
/// @param insn         - The internal instruction.
2363
/// @return             - false on success; true otherwise.
2364
static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
2365
                             InternalInstruction &insn,
2366
                             const MCDisassembler *Dis) {
2367
  switch (operand.encoding) {
2368
  default:
2369
    debug("Unhandled operand encoding during translation");
2370
    return true;
2371
  case ENCODING_REG:
2372
    translateRegister(mcInst, insn.reg);
2373
    return false;
2374
  case ENCODING_WRITEMASK:
2375
    return translateMaskRegister(mcInst, insn.writemask);
2376
  case ENCODING_SIB:
2377
  CASE_ENCODING_RM:
2378
  CASE_ENCODING_VSIB:
2379
    return translateRM(mcInst, operand, insn, Dis);
2380
  case ENCODING_IB:
2381
  case ENCODING_IW:
2382
  case ENCODING_ID:
2383
  case ENCODING_IO:
2384
  case ENCODING_Iv:
2385
  case ENCODING_Ia:
2386
    translateImmediate(mcInst,
2387
                       insn.immediates[insn.numImmediatesTranslated++],
2388
                       operand,
2389
                       insn,
2390
                       Dis);
2391
    return false;
2392
  case ENCODING_IRC:
2393
    mcInst.addOperand(MCOperand::createImm(insn.RC));
2394
    return false;
2395
  case ENCODING_SI:
2396
    return translateSrcIndex(mcInst, insn);
2397
  case ENCODING_DI:
2398
    return translateDstIndex(mcInst, insn);
2399
  case ENCODING_RB:
2400
  case ENCODING_RW:
2401
  case ENCODING_RD:
2402
  case ENCODING_RO:
2403
  case ENCODING_Rv:
2404
    translateRegister(mcInst, insn.opcodeRegister);
2405
    return false;
2406
  case ENCODING_CF:
2407
    mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2408
    return false;
2409
  case ENCODING_CC:
2410
    if (isCCMPOrCTEST(&insn))
2411
      mcInst.addOperand(MCOperand::createImm(insn.immediates[2]));
2412
    else
2413
      mcInst.addOperand(MCOperand::createImm(insn.immediates[1]));
2414
    return false;
2415
  case ENCODING_FP:
2416
    translateFPRegister(mcInst, insn.modRM & 7);
2417
    return false;
2418
  case ENCODING_VVVV:
2419
    translateRegister(mcInst, insn.vvvv);
2420
    return false;
2421
  case ENCODING_DUP:
2422
    return translateOperand(mcInst, insn.operands[operand.type - TYPE_DUP0],
2423
                            insn, Dis);
2424
  }
2425
}
2426

2427
/// translateInstruction - Translates an internal instruction and all its
2428
///   operands to an MCInst.
2429
///
2430
/// @param mcInst       - The MCInst to populate with the instruction's data.
2431
/// @param insn         - The internal instruction.
2432
/// @return             - false on success; true otherwise.
2433
static bool translateInstruction(MCInst &mcInst,
2434
                                InternalInstruction &insn,
2435
                                const MCDisassembler *Dis) {
2436
  if (!insn.spec) {
2437
    debug("Instruction has no specification");
2438
    return true;
2439
  }
2440

2441
  mcInst.clear();
2442
  mcInst.setOpcode(insn.instructionID);
2443
  // If when reading the prefix bytes we determined the overlapping 0xf2 or 0xf3
2444
  // prefix bytes should be disassembled as xrelease and xacquire then set the
2445
  // opcode to those instead of the rep and repne opcodes.
2446
  if (insn.xAcquireRelease) {
2447
    if(mcInst.getOpcode() == X86::REP_PREFIX)
2448
      mcInst.setOpcode(X86::XRELEASE_PREFIX);
2449
    else if(mcInst.getOpcode() == X86::REPNE_PREFIX)
2450
      mcInst.setOpcode(X86::XACQUIRE_PREFIX);
2451
  }
2452

2453
  insn.numImmediatesTranslated = 0;
2454

2455
  for (const auto &Op : insn.operands) {
2456
    if (Op.encoding != ENCODING_NONE) {
2457
      if (translateOperand(mcInst, Op, insn, Dis)) {
2458
        return true;
2459
      }
2460
    }
2461
  }
2462

2463
  return false;
2464
}
2465

2466
static MCDisassembler *createX86Disassembler(const Target &T,
2467
                                             const MCSubtargetInfo &STI,
2468
                                             MCContext &Ctx) {
2469
  std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
2470
  return new X86GenericDisassembler(STI, Ctx, std::move(MII));
2471
}
2472

2473
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Disassembler() {
2474
  // Register the disassembler.
2475
  TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
2476
                                         createX86Disassembler);
2477
  TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),
2478
                                         createX86Disassembler);
2479
}
2480

2481
Product

Resources

Company