Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/machinst/vcode.rs
1693 views
1
//! This implements the VCode container: a CFG of Insts that have been lowered.
2
//!
3
//! VCode is virtual-register code. An instruction in VCode is almost a machine
4
//! instruction; however, its register slots can refer to virtual registers in
5
//! addition to real machine registers.
6
//!
7
//! VCode is structured with traditional basic blocks, and
8
//! each block must be terminated by an unconditional branch (one target), a
9
//! conditional branch (two targets), or a return (no targets). Note that this
10
//! slightly differs from the machine code of most ISAs: in most ISAs, a
11
//! conditional branch has one target (and the not-taken case falls through).
12
//! However, we expect that machine backends will elide branches to the following
13
//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14
//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15
//! play with layout prior to final binary emission, as well, if we want.
16
//!
17
//! See the main module comment in `mod.rs` for more details on the VCode-based
18
//! backend pipeline.
19
20
use crate::CodegenError;
21
use crate::ir::pcc::*;
22
use crate::ir::{self, Constant, ConstantData, ValueLabel, types};
23
use crate::ranges::Ranges;
24
use crate::timing;
25
use crate::trace;
26
use crate::{LabelValueLoc, ValueLocRange};
27
use crate::{machinst::*, trace_log_enabled};
28
use regalloc2::{
29
Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand,
30
OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass,
31
};
32
use rustc_hash::FxHashMap;
33
34
use core::cmp::Ordering;
35
use core::fmt::{self, Write};
36
use core::mem::take;
37
use cranelift_entity::{Keys, entity_impl};
38
use std::collections::HashMap;
39
use std::collections::hash_map::Entry;
40
41
/// Index referring to an instruction in VCode.
42
pub type InsnIndex = regalloc2::Inst;
43
44
/// Extension trait for `InsnIndex` to allow conversion to a
45
/// `BackwardsInsnIndex`.
46
trait ToBackwardsInsnIndex {
47
fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
48
}
49
50
impl ToBackwardsInsnIndex for InsnIndex {
51
fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
52
BackwardsInsnIndex::new(num_insts - self.index() - 1)
53
}
54
}
55
56
/// An index referring to an instruction in the VCode when it is backwards,
57
/// during VCode construction.
58
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
59
#[cfg_attr(
60
feature = "enable-serde",
61
derive(::serde::Serialize, ::serde::Deserialize)
62
)]
63
pub struct BackwardsInsnIndex(InsnIndex);
64
65
impl BackwardsInsnIndex {
66
pub fn new(i: usize) -> Self {
67
BackwardsInsnIndex(InsnIndex::new(i))
68
}
69
}
70
71
/// Index referring to a basic block in VCode.
72
pub type BlockIndex = regalloc2::Block;
73
74
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
75
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
76
pub trait VCodeInst: MachInst + MachInstEmit {}
77
impl<I: MachInst + MachInstEmit> VCodeInst for I {}
78
79
/// A function in "VCode" (virtualized-register code) form, after
80
/// lowering. This is essentially a standard CFG of basic blocks,
81
/// where each basic block consists of lowered instructions produced
82
/// by the machine-specific backend.
83
///
84
/// Note that the VCode is immutable once produced, and is not
85
/// modified by register allocation in particular. Rather, register
86
/// allocation on the `VCode` produces a separate `regalloc2::Output`
87
/// struct, and this can be passed to `emit`. `emit` in turn does not
88
/// modify the vcode, but produces an `EmitResult`, which contains the
89
/// machine code itself, and the associated disassembly and/or
90
/// metadata as requested.
91
pub struct VCode<I: VCodeInst> {
92
/// VReg IR-level types.
93
vreg_types: Vec<Type>,
94
95
/// Lowered machine instructions in order corresponding to the original IR.
96
insts: Vec<I>,
97
98
/// A map from backwards instruction index to the user stack map for that
99
/// instruction.
100
///
101
/// This is a sparse side table that only has entries for instructions that
102
/// are safepoints, and only for a subset of those that have an associated
103
/// user stack map.
104
user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
105
106
/// Operands: pre-regalloc references to virtual registers with
107
/// constraints, in one flattened array. This allows the regalloc
108
/// to efficiently access all operands without requiring expensive
109
/// matches or method invocations on insts.
110
operands: Vec<Operand>,
111
112
/// Operand index ranges: for each instruction in `insts`, there
113
/// is a tuple here providing the range in `operands` for that
114
/// instruction's operands.
115
operand_ranges: Ranges,
116
117
/// Clobbers: a sparse map from instruction indices to clobber masks.
118
clobbers: FxHashMap<InsnIndex, PRegSet>,
119
120
/// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
121
/// reasonable to keep one of these per instruction.)
122
srclocs: Vec<RelSourceLoc>,
123
124
/// Entry block.
125
entry: BlockIndex,
126
127
/// Block instruction indices.
128
block_ranges: Ranges,
129
130
/// Block successors: index range in the `block_succs` list.
131
block_succ_range: Ranges,
132
133
/// Block successor lists, concatenated into one vec. The
134
/// `block_succ_range` list of tuples above gives (start, end)
135
/// ranges within this list that correspond to each basic block's
136
/// successors.
137
block_succs: Vec<regalloc2::Block>,
138
139
/// Block predecessors: index range in the `block_preds` list.
140
block_pred_range: Ranges,
141
142
/// Block predecessor lists, concatenated into one vec. The
143
/// `block_pred_range` list of tuples above gives (start, end)
144
/// ranges within this list that correspond to each basic block's
145
/// predecessors.
146
block_preds: Vec<regalloc2::Block>,
147
148
/// Block parameters: index range in `block_params` below.
149
block_params_range: Ranges,
150
151
/// Block parameter lists, concatenated into one vec. The
152
/// `block_params_range` list of tuples above gives (start, end)
153
/// ranges within this list that correspond to each basic block's
154
/// blockparam vregs.
155
block_params: Vec<regalloc2::VReg>,
156
157
/// Outgoing block arguments on branch instructions, concatenated
158
/// into one list.
159
///
160
/// Note that this is conceptually a 3D array: we have a VReg list
161
/// per block, per successor. We flatten those three dimensions
162
/// into this 1D vec, then store index ranges in two levels of
163
/// indirection.
164
///
165
/// Indexed by the indices in `branch_block_arg_succ_range`.
166
branch_block_args: Vec<regalloc2::VReg>,
167
168
/// Array of sequences of (start, end) tuples in
169
/// `branch_block_args`, one for each successor; these sequences
170
/// for each block are concatenated.
171
///
172
/// Indexed by the indices in `branch_block_arg_succ_range`.
173
branch_block_arg_range: Ranges,
174
175
/// For a given block, indices in `branch_block_arg_range`
176
/// corresponding to all of its successors.
177
branch_block_arg_succ_range: Ranges,
178
179
/// Block-order information.
180
block_order: BlockLoweringOrder,
181
182
/// ABI object.
183
pub(crate) abi: Callee<I::ABIMachineSpec>,
184
185
/// Constant information used during code emission. This should be
186
/// immutable across function compilations within the same module.
187
emit_info: I::Info,
188
189
/// Constants.
190
pub(crate) constants: VCodeConstants,
191
192
/// Value labels for debuginfo attached to vregs.
193
debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
194
195
pub(crate) sigs: SigSet,
196
197
/// Facts on VRegs, for proof-carrying code verification.
198
facts: Vec<Option<Fact>>,
199
200
log2_min_function_alignment: u8,
201
}
202
203
/// The result of `VCode::emit`. Contains all information computed
204
/// during emission: actual machine code, optionally a disassembly,
205
/// and optionally metadata about the code layout.
206
pub struct EmitResult {
207
/// The MachBuffer containing the machine code.
208
pub buffer: MachBufferFinalized<Stencil>,
209
210
/// Offset of each basic block, recorded during emission. Computed
211
/// only if `machine_code_cfg_info` is enabled.
212
pub bb_offsets: Vec<CodeOffset>,
213
214
/// Final basic-block edges, in terms of code offsets of
215
/// bb-starts. Computed only if `machine_code_cfg_info` is enabled.
216
pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
217
218
/// The pretty-printed disassembly, if any. This uses the same
219
/// pretty-printing for MachInsts as the pre-regalloc VCode Debug
220
/// implementation, but additionally includes the prologue and
221
/// epilogue(s), and makes use of the regalloc results.
222
pub disasm: Option<String>,
223
224
/// Offsets of sized stackslots.
225
pub sized_stackslot_offsets: PrimaryMap<StackSlot, u32>,
226
227
/// Offsets of dynamic stackslots.
228
pub dynamic_stackslot_offsets: PrimaryMap<DynamicStackSlot, u32>,
229
230
/// Value-labels information (debug metadata).
231
pub value_labels_ranges: ValueLabelsRanges,
232
233
/// Stack frame size.
234
pub frame_size: u32,
235
}
236
237
/// A builder for a VCode function body.
238
///
239
/// This builder has the ability to accept instructions in either
240
/// forward or reverse order, depending on the pass direction that
241
/// produces the VCode. The lowering from CLIF to VCode<MachInst>
242
/// ordinarily occurs in reverse order (in order to allow instructions
243
/// to be lowered only if used, and not merged) so a reversal will
244
/// occur at the end of lowering to ensure the VCode is in machine
245
/// order.
246
///
247
/// If built in reverse, block and instruction indices used once the
248
/// VCode is built are relative to the final (reversed) order, not the
249
/// order of construction. Note that this means we do not know the
250
/// final block or instruction indices when building, so we do not
251
/// hand them out. (The user is assumed to know them when appending
252
/// terminator instructions with successor blocks.)
253
pub struct VCodeBuilder<I: VCodeInst> {
254
/// In-progress VCode.
255
pub(crate) vcode: VCode<I>,
256
257
/// In what direction is the build occurring?
258
direction: VCodeBuildDirection,
259
260
/// Debug-value label in-progress map, keyed by label. For each
261
/// label, we keep disjoint ranges mapping to vregs. We'll flatten
262
/// this into (vreg, range, label) tuples when done.
263
debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
264
}
265
266
/// Direction in which a VCodeBuilder builds VCode.
267
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
268
pub enum VCodeBuildDirection {
269
// TODO: add `Forward` once we need it and can test it adequately.
270
/// Backward-build pass: we expect the producer to call `emit()`
271
/// with instructions in reverse program order within each block.
272
Backward,
273
}
274
275
impl<I: VCodeInst> VCodeBuilder<I> {
276
/// Create a new VCodeBuilder.
277
pub fn new(
278
sigs: SigSet,
279
abi: Callee<I::ABIMachineSpec>,
280
emit_info: I::Info,
281
block_order: BlockLoweringOrder,
282
constants: VCodeConstants,
283
direction: VCodeBuildDirection,
284
log2_min_function_alignment: u8,
285
) -> Self {
286
let vcode = VCode::new(
287
sigs,
288
abi,
289
emit_info,
290
block_order,
291
constants,
292
log2_min_function_alignment,
293
);
294
295
VCodeBuilder {
296
vcode,
297
direction,
298
debug_info: FxHashMap::default(),
299
}
300
}
301
302
pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
303
self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
304
}
305
306
/// Access the ABI object.
307
pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
308
&self.vcode.abi
309
}
310
311
/// Access the ABI object.
312
pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
313
&mut self.vcode.abi
314
}
315
316
pub fn sigs(&self) -> &SigSet {
317
&self.vcode.sigs
318
}
319
320
pub fn sigs_mut(&mut self) -> &mut SigSet {
321
&mut self.vcode.sigs
322
}
323
324
/// Access to the BlockLoweringOrder object.
325
pub fn block_order(&self) -> &BlockLoweringOrder {
326
&self.vcode.block_order
327
}
328
329
/// Set the current block as the entry block.
330
pub fn set_entry(&mut self, block: BlockIndex) {
331
self.vcode.entry = block;
332
}
333
334
/// End the current basic block. Must be called after emitting vcode insts
335
/// for IR insts and prior to ending the function (building the VCode).
336
pub fn end_bb(&mut self) {
337
let end_idx = self.vcode.insts.len();
338
// Add the instruction index range to the list of blocks.
339
self.vcode.block_ranges.push_end(end_idx);
340
// End the successors list.
341
let succ_end = self.vcode.block_succs.len();
342
self.vcode.block_succ_range.push_end(succ_end);
343
// End the blockparams list.
344
let block_params_end = self.vcode.block_params.len();
345
self.vcode.block_params_range.push_end(block_params_end);
346
// End the branch blockparam args list.
347
let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
348
self.vcode
349
.branch_block_arg_succ_range
350
.push_end(branch_block_arg_succ_end);
351
}
352
353
pub fn add_block_param(&mut self, param: VirtualReg) {
354
self.vcode.block_params.push(param.into());
355
}
356
357
fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
358
self.vcode
359
.branch_block_args
360
.extend(args.iter().map(|&arg| VReg::from(arg)));
361
let end = self.vcode.branch_block_args.len();
362
self.vcode.branch_block_arg_range.push_end(end);
363
}
364
365
/// Push an instruction for the current BB and current IR inst
366
/// within the BB.
367
pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
368
assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
369
self.vcode.insts.push(insn);
370
self.vcode.srclocs.push(loc);
371
}
372
373
/// Add a successor block with branch args.
374
pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
375
self.vcode.block_succs.push(block);
376
self.add_branch_args_for_succ(args);
377
}
378
379
/// Add a debug value label to a register.
380
pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
381
// 1) In the reversed order, we consider the instructions
382
// that define ranges in the "debug_info" array to refer
383
// to the IP **after** them (when reversed):
384
// IP[2]__| Inst 3 |
385
// IP[1]__| Inst 2 |
386
// IP[0]__| Inst 1 |
387
// | Inst 0 |
388
// This is so that we can represent IP[<function start>],
389
// done at the cost of not being to represent IP[<function end>],
390
// which is OK since no values will be live at that point.
391
// 2) The live range for "reg" begins at the current IP
392
// and continues until the next, in execution order,
393
// VReg that defines "label". Since the ranges are open
394
// at the end, the subtraction of 1 cancels out:
395
// [last..current IP] <=>
396
// [last..last emitted inst index] <=>
397
// [last..next_inst_index - 1] <=>
398
// [last..next_inst_index)
399
//
400
let next_inst_index = self.vcode.insts.len();
401
if next_inst_index == 0 {
402
// This would produce a defective [0..0) range.
403
return;
404
}
405
let next_inst = InsnIndex::new(next_inst_index);
406
let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
407
let last = labels
408
.last()
409
.map(|(_start, end, _vreg)| *end)
410
.unwrap_or(InsnIndex::new(0));
411
labels.push((last, next_inst, reg.into()));
412
}
413
414
/// Access the constants.
415
pub fn constants(&mut self) -> &mut VCodeConstants {
416
&mut self.vcode.constants
417
}
418
419
fn compute_preds_from_succs(&mut self) {
420
// Do a linear-time counting sort: first determine how many
421
// times each block appears as a successor.
422
let mut starts = vec![0u32; self.vcode.num_blocks()];
423
for succ in &self.vcode.block_succs {
424
starts[succ.index()] += 1;
425
}
426
427
// Determine for each block the starting index where that
428
// block's predecessors should go. This is equivalent to the
429
// ranges we need to store in block_pred_range.
430
self.vcode.block_pred_range.reserve(starts.len());
431
let mut end = 0;
432
for count in starts.iter_mut() {
433
let start = end;
434
end += *count;
435
*count = start;
436
self.vcode.block_pred_range.push_end(end as usize);
437
}
438
let end = end as usize;
439
debug_assert_eq!(end, self.vcode.block_succs.len());
440
441
// Walk over the successors again, this time grouped by
442
// predecessor, and push the predecessor at the current
443
// starting position of each of its successors. We build
444
// each group of predecessors in whatever order Ranges::iter
445
// returns them; regalloc2 doesn't care.
446
self.vcode.block_preds.resize(end, BlockIndex::invalid());
447
for (pred, range) in self.vcode.block_succ_range.iter() {
448
let pred = BlockIndex::new(pred);
449
for succ in &self.vcode.block_succs[range] {
450
let pos = &mut starts[succ.index()];
451
self.vcode.block_preds[*pos as usize] = pred;
452
*pos += 1;
453
}
454
}
455
debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
456
}
457
458
/// Called once, when a build in Backward order is complete, to
459
/// perform the overall reversal (into final forward order) and
460
/// finalize metadata accordingly.
461
fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
462
let n_insts = self.vcode.insts.len();
463
if n_insts == 0 {
464
return;
465
}
466
467
// Reverse the per-block and per-inst sequences.
468
self.vcode.block_ranges.reverse_index();
469
self.vcode.block_ranges.reverse_target(n_insts);
470
// block_params_range is indexed by block (and blocks were
471
// traversed in reverse) so we reverse it; but block-param
472
// sequences in the concatenated vec can remain in reverse
473
// order (it is effectively an arena of arbitrarily-placed
474
// referenced sequences).
475
self.vcode.block_params_range.reverse_index();
476
// Likewise, we reverse block_succ_range, but the block_succ
477
// concatenated array can remain as-is.
478
self.vcode.block_succ_range.reverse_index();
479
self.vcode.insts.reverse();
480
self.vcode.srclocs.reverse();
481
// Likewise, branch_block_arg_succ_range is indexed by block
482
// so must be reversed.
483
self.vcode.branch_block_arg_succ_range.reverse_index();
484
485
// To translate an instruction index *endpoint* in reversed
486
// order to forward order, compute `n_insts - i`.
487
//
488
// Why not `n_insts - 1 - i`? That would be correct to
489
// translate an individual instruction index (for ten insts 0
490
// to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
491
// 0). But for the usual inclusive-start, exclusive-end range
492
// idiom, inclusive starts become exclusive ends and
493
// vice-versa, so e.g. an (inclusive) start of 0 becomes an
494
// (exclusive) end of 10.
495
let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
496
497
// Generate debug-value labels based on per-label maps.
498
for (label, tuples) in &self.debug_info {
499
for &(start, end, vreg) in tuples {
500
let vreg = vregs.resolve_vreg_alias(vreg);
501
let fwd_start = translate(end);
502
let fwd_end = translate(start);
503
self.vcode
504
.debug_value_labels
505
.push((vreg, fwd_start, fwd_end, label.as_u32()));
506
}
507
}
508
509
// Now sort debug value labels by VReg, as required
510
// by regalloc2.
511
self.vcode
512
.debug_value_labels
513
.sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
514
}
515
516
fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
517
let allocatable = PRegSet::from(self.vcode.abi.machine_env());
518
for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
519
// Push operands from the instruction onto the operand list.
520
//
521
// We rename through the vreg alias table as we collect
522
// the operands. This is better than a separate post-pass
523
// over operands, because it has more cache locality:
524
// operands only need to pass through L1 once. This is
525
// also better than renaming instructions'
526
// operands/registers while lowering, because here we only
527
// need to do the `match` over the instruction to visit
528
// its register fields (which is slow, branchy code) once.
529
530
let mut op_collector =
531
OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
532
vregs.resolve_vreg_alias(vreg)
533
});
534
insn.get_operands(&mut op_collector);
535
let (ops, clobbers) = op_collector.finish();
536
self.vcode.operand_ranges.push_end(ops);
537
538
if clobbers != PRegSet::default() {
539
self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
540
}
541
542
if let Some((dst, src)) = insn.is_move() {
543
// We should never see non-virtual registers present in move
544
// instructions.
545
assert!(
546
src.is_virtual(),
547
"the real register {src:?} was used as the source of a move instruction"
548
);
549
assert!(
550
dst.to_reg().is_virtual(),
551
"the real register {:?} was used as the destination of a move instruction",
552
dst.to_reg()
553
);
554
}
555
}
556
557
// Translate blockparam args via the vreg aliases table as well.
558
for arg in &mut self.vcode.branch_block_args {
559
let new_arg = vregs.resolve_vreg_alias(*arg);
560
trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
561
*arg = new_arg;
562
}
563
}
564
565
/// Build the final VCode.
566
pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
567
self.vcode.vreg_types = take(&mut vregs.vreg_types);
568
self.vcode.facts = take(&mut vregs.facts);
569
570
if self.direction == VCodeBuildDirection::Backward {
571
self.reverse_and_finalize(&vregs);
572
}
573
self.collect_operands(&vregs);
574
575
self.compute_preds_from_succs();
576
self.vcode.debug_value_labels.sort_unstable();
577
578
// At this point, nothing in the vcode should mention any
579
// VReg which has been aliased. All the appropriate rewriting
580
// should have happened above. Just to be sure, let's
581
// double-check each field which has vregs.
582
// Note: can't easily check vcode.insts, resolved in collect_operands.
583
// Operands are resolved in collect_operands.
584
vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
585
// Currently block params are never aliased to another vreg.
586
vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
587
// Branch block args are resolved in collect_operands.
588
vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
589
// Debug value labels are resolved in reverse_and_finalize.
590
vregs.debug_assert_no_vreg_aliases(
591
self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
592
);
593
// Facts are resolved eagerly during set_vreg_alias.
594
vregs.debug_assert_no_vreg_aliases(
595
self.vcode
596
.facts
597
.iter()
598
.zip(&vregs.vreg_types)
599
.enumerate()
600
.filter(|(_, (fact, _))| fact.is_some())
601
.map(|(vreg, (_, &ty))| {
602
let (regclasses, _) = I::rc_for_type(ty).unwrap();
603
VReg::new(vreg, regclasses[0])
604
}),
605
);
606
607
self.vcode
608
}
609
610
/// Add a user stack map for the associated instruction.
611
pub fn add_user_stack_map(
612
&mut self,
613
inst: BackwardsInsnIndex,
614
entries: &[ir::UserStackMapEntry],
615
) {
616
let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
617
let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
618
debug_assert!(old_entry.is_none());
619
}
620
}
621
622
const NO_INST_OFFSET: CodeOffset = u32::MAX;
623
624
impl<I: VCodeInst> VCode<I> {
625
/// New empty VCode.
626
fn new(
627
sigs: SigSet,
628
abi: Callee<I::ABIMachineSpec>,
629
emit_info: I::Info,
630
block_order: BlockLoweringOrder,
631
constants: VCodeConstants,
632
log2_min_function_alignment: u8,
633
) -> Self {
634
let n_blocks = block_order.lowered_order().len();
635
VCode {
636
sigs,
637
vreg_types: vec![],
638
insts: Vec::with_capacity(10 * n_blocks),
639
user_stack_maps: FxHashMap::default(),
640
operands: Vec::with_capacity(30 * n_blocks),
641
operand_ranges: Ranges::with_capacity(10 * n_blocks),
642
clobbers: FxHashMap::default(),
643
srclocs: Vec::with_capacity(10 * n_blocks),
644
entry: BlockIndex::new(0),
645
block_ranges: Ranges::with_capacity(n_blocks),
646
block_succ_range: Ranges::with_capacity(n_blocks),
647
block_succs: Vec::with_capacity(n_blocks),
648
block_pred_range: Ranges::default(),
649
block_preds: Vec::new(),
650
block_params_range: Ranges::with_capacity(n_blocks),
651
block_params: Vec::with_capacity(5 * n_blocks),
652
branch_block_args: Vec::with_capacity(10 * n_blocks),
653
branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
654
branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
655
block_order,
656
abi,
657
emit_info,
658
constants,
659
debug_value_labels: vec![],
660
facts: vec![],
661
log2_min_function_alignment,
662
}
663
}
664
665
/// Get the number of blocks. Block indices will be in the range `0 ..
666
/// (self.num_blocks() - 1)`.
667
pub fn num_blocks(&self) -> usize {
668
self.block_ranges.len()
669
}
670
671
/// The number of lowered instructions.
672
pub fn num_insts(&self) -> usize {
673
self.insts.len()
674
}
675
676
fn compute_clobbers_and_function_calls(
677
&self,
678
regalloc: &regalloc2::Output,
679
) -> (Vec<Writable<RealReg>>, FunctionCalls) {
680
let mut clobbered = PRegSet::default();
681
let mut function_calls = FunctionCalls::None;
682
683
// All moves are included in clobbers.
684
for (_, Edit::Move { to, .. }) in &regalloc.edits {
685
if let Some(preg) = to.as_reg() {
686
clobbered.add(preg);
687
}
688
}
689
690
for (i, range) in self.operand_ranges.iter() {
691
let operands = &self.operands[range.clone()];
692
let allocs = &regalloc.allocs[range];
693
for (operand, alloc) in operands.iter().zip(allocs.iter()) {
694
if operand.kind() == OperandKind::Def {
695
if let Some(preg) = alloc.as_reg() {
696
clobbered.add(preg);
697
}
698
}
699
}
700
701
function_calls.update(self.insts[i].call_type());
702
703
// Also add explicitly-clobbered registers.
704
//
705
// Skip merging this instruction's clobber list if not
706
// "included in clobbers" as per the MachInst. (Some
707
// backends use this to implement ABI specifics; e.g.,
708
// excluding calls of the same ABI as the current function
709
// from clobbers, because by definition everything
710
// clobbered by the call can be clobbered by this function
711
// without saving as well.
712
//
713
// This is important for a particular optimization: when
714
// some registers are "half-clobbered", e.g. vector/float
715
// registers on aarch64, we want them to be seen as
716
// clobbered by regalloc so it avoids carrying values
717
// across calls in these registers but not seen as
718
// clobbered by prologue generation here (because the
719
// actual half-clobber implied by the clobber list fits
720
// within the clobbers that we allow without
721
// clobber-saves).
722
if self.insts[i].is_included_in_clobbers() {
723
if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
724
clobbered.union_from(inst_clobbered);
725
}
726
}
727
}
728
729
let clobbered_regs = clobbered
730
.into_iter()
731
.map(|preg| Writable::from_reg(RealReg::from(preg)))
732
.collect();
733
734
(clobbered_regs, function_calls)
735
}
736
737
/// Emit the instructions to a `MachBuffer`, containing fixed-up
738
/// code and external reloc/trap/etc. records ready for use. Takes
739
/// the regalloc results as well.
740
///
741
/// Returns the machine code itself, and optionally metadata
742
/// and/or a disassembly, as an `EmitResult`. The `VCode` itself
743
/// is consumed by the emission process.
744
pub fn emit(
745
mut self,
746
regalloc: &regalloc2::Output,
747
want_disasm: bool,
748
flags: &settings::Flags,
749
ctrl_plane: &mut ControlPlane,
750
) -> EmitResult
751
where
752
I: VCodeInst,
753
{
754
let _tt = timing::vcode_emit();
755
let mut buffer = MachBuffer::new();
756
buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
757
let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
758
759
// The first M MachLabels are reserved for block indices.
760
buffer.reserve_labels_for_blocks(self.num_blocks());
761
762
// Register all allocated constants with the `MachBuffer` to ensure that
763
// any references to the constants during instructions can be handled
764
// correctly.
765
buffer.register_constants(&self.constants);
766
767
// Construct the final order we emit code in: cold blocks at the end.
768
let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
769
let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
770
for block in 0..self.num_blocks() {
771
let block = BlockIndex::new(block);
772
if self.block_order.is_cold(block) {
773
cold_blocks.push(block);
774
} else {
775
final_order.push(block);
776
}
777
}
778
final_order.extend(cold_blocks.clone());
779
780
// Compute/save info we need for the prologue: clobbers and
781
// number of spillslots.
782
//
783
// We clone `abi` here because we will mutate it as we
784
// generate the prologue and set other info, but we can't
785
// mutate `VCode`. The info it usually carries prior to
786
// setting clobbers is fairly minimal so this should be
787
// relatively cheap.
788
let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc);
789
self.abi.compute_frame_layout(
790
&self.sigs,
791
regalloc.num_spillslots,
792
clobbers,
793
function_calls,
794
);
795
796
// Emit blocks.
797
let mut cur_srcloc = None;
798
let mut last_offset = None;
799
let mut inst_offsets = vec![];
800
let mut state = I::State::new(&self.abi, std::mem::take(ctrl_plane));
801
802
let mut disasm = String::new();
803
804
if !self.debug_value_labels.is_empty() {
805
inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
806
}
807
808
// Count edits per block ahead of time; this is needed for
809
// lookahead island emission. (We could derive it per-block
810
// with binary search in the edit list, but it's more
811
// efficient to do it in one pass here.)
812
let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
813
let mut edit_idx = 0;
814
for block in 0..self.num_blocks() {
815
let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
816
let start_edit_idx = edit_idx;
817
while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
818
edit_idx += 1;
819
}
820
let end_edit_idx = edit_idx;
821
ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
822
}
823
824
let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
825
let mut bb_padding = match flags.bb_padding_log2_minus_one() {
826
0 => Vec::new(),
827
n => vec![0; 1 << (n - 1)],
828
};
829
let mut total_bb_padding = 0;
830
831
for (block_order_idx, &block) in final_order.iter().enumerate() {
832
trace!("emitting block {:?}", block);
833
834
// Call the new block hook for state
835
state.on_new_block();
836
837
// Emit NOPs to align the block.
838
let new_offset = I::align_basic_block(buffer.cur_offset());
839
while new_offset > buffer.cur_offset() {
840
// Pad with NOPs up to the aligned block offset.
841
let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
842
nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
843
}
844
assert_eq!(buffer.cur_offset(), new_offset);
845
846
let do_emit = |inst: &I,
847
disasm: &mut String,
848
buffer: &mut MachBuffer<I>,
849
state: &mut I::State| {
850
if want_disasm && !inst.is_args() {
851
let mut s = state.clone();
852
writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap();
853
}
854
inst.emit(buffer, &self.emit_info, state);
855
};
856
857
// Is this the first block? Emit the prologue directly if so.
858
if block == self.entry {
859
trace!(" -> entry block");
860
buffer.start_srcloc(Default::default());
861
for inst in &self.abi.gen_prologue() {
862
do_emit(&inst, &mut disasm, &mut buffer, &mut state);
863
}
864
buffer.end_srcloc();
865
}
866
867
// Now emit the regular block body.
868
869
buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
870
871
if want_disasm {
872
writeln!(&mut disasm, "block{}:", block.index()).unwrap();
873
}
874
875
if flags.machine_code_cfg_info() {
876
// Track BB starts. If we have backed up due to MachBuffer
877
// branch opts, note that the removed blocks were removed.
878
let cur_offset = buffer.cur_offset();
879
if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
880
for i in (0..bb_starts.len()).rev() {
881
if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
882
break;
883
}
884
bb_starts[i] = None;
885
}
886
}
887
bb_starts.push(Some(cur_offset));
888
last_offset = Some(cur_offset);
889
}
890
891
if let Some(block_start) = I::gen_block_start(
892
self.block_order.is_indirect_branch_target(block),
893
is_forward_edge_cfi_enabled,
894
) {
895
do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
896
}
897
898
for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
899
match inst_or_edit {
900
InstOrEdit::Inst(iix) => {
901
if !self.debug_value_labels.is_empty() {
902
// If we need to produce debug info,
903
// record the offset of each instruction
904
// so that we can translate value-label
905
// ranges to machine-code offsets.
906
907
// Cold blocks violate monotonicity
908
// assumptions elsewhere (that
909
// instructions in inst-index order are in
910
// order in machine code), so we omit
911
// their offsets here. Value-label range
912
// generation below will skip empty ranges
913
// and ranges with to-offsets of zero.
914
if !self.block_order.is_cold(block) {
915
inst_offsets[iix.index()] = buffer.cur_offset();
916
}
917
}
918
919
// Update the srcloc at this point in the buffer.
920
let srcloc = self.srclocs[iix.index()];
921
if cur_srcloc != Some(srcloc) {
922
if cur_srcloc.is_some() {
923
buffer.end_srcloc();
924
}
925
buffer.start_srcloc(srcloc);
926
cur_srcloc = Some(srcloc);
927
}
928
929
// If this is a safepoint, compute a stack map
930
// and pass it to the emit state.
931
let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
932
let (user_stack_map, user_stack_map_disasm) = {
933
// The `user_stack_maps` is keyed by reverse
934
// instruction index, so we must flip the
935
// index. We can't put this into a helper method
936
// due to borrowck issues because parts of
937
// `self` are borrowed mutably elsewhere in this
938
// function.
939
let index = iix.to_backwards_insn_index(self.num_insts());
940
let user_stack_map = self.user_stack_maps.remove(&index);
941
let user_stack_map_disasm =
942
user_stack_map.as_ref().map(|m| format!(" ; {m:?}"));
943
(user_stack_map, user_stack_map_disasm)
944
};
945
946
state.pre_safepoint(user_stack_map);
947
948
user_stack_map_disasm
949
} else {
950
None
951
};
952
953
// If the instruction we are about to emit is
954
// a return, place an epilogue at this point
955
// (and don't emit the return; the actual
956
// epilogue will contain it).
957
if self.insts[iix.index()].is_term() == MachTerminator::Ret {
958
for inst in self.abi.gen_epilogue() {
959
do_emit(&inst, &mut disasm, &mut buffer, &mut state);
960
}
961
} else {
962
// Update the operands for this inst using the
963
// allocations from the regalloc result.
964
let mut allocs = regalloc.inst_allocs(iix).iter();
965
self.insts[iix.index()].get_operands(
966
&mut |reg: &mut Reg, constraint, _kind, _pos| {
967
let alloc =
968
allocs.next().expect("enough allocations for all operands");
969
970
if let Some(alloc) = alloc.as_reg() {
971
let alloc: Reg = alloc.into();
972
if let OperandConstraint::FixedReg(rreg) = constraint {
973
debug_assert_eq!(Reg::from(rreg), alloc);
974
}
975
*reg = alloc;
976
} else if let Some(alloc) = alloc.as_stack() {
977
let alloc: Reg = alloc.into();
978
*reg = alloc;
979
}
980
},
981
);
982
debug_assert!(allocs.next().is_none());
983
984
// Emit the instruction!
985
do_emit(
986
&self.insts[iix.index()],
987
&mut disasm,
988
&mut buffer,
989
&mut state,
990
);
991
if let Some(stack_map_disasm) = stack_map_disasm {
992
disasm.push_str(&stack_map_disasm);
993
disasm.push('\n');
994
}
995
}
996
}
997
998
InstOrEdit::Edit(Edit::Move { from, to }) => {
999
// Create a move/spill/reload instruction and
1000
// immediately emit it.
1001
match (from.as_reg(), to.as_reg()) {
1002
(Some(from), Some(to)) => {
1003
// Reg-to-reg move.
1004
let from_rreg = Reg::from(from);
1005
let to_rreg = Writable::from_reg(Reg::from(to));
1006
debug_assert_eq!(from.class(), to.class());
1007
let ty = I::canonical_type_for_rc(from.class());
1008
let mv = I::gen_move(to_rreg, from_rreg, ty);
1009
do_emit(&mv, &mut disasm, &mut buffer, &mut state);
1010
}
1011
(Some(from), None) => {
1012
// Spill from register to spillslot.
1013
let to = to.as_stack().unwrap();
1014
let from_rreg = RealReg::from(from);
1015
let spill = self.abi.gen_spill(to, from_rreg);
1016
do_emit(&spill, &mut disasm, &mut buffer, &mut state);
1017
}
1018
(None, Some(to)) => {
1019
// Load from spillslot to register.
1020
let from = from.as_stack().unwrap();
1021
let to_rreg = Writable::from_reg(RealReg::from(to));
1022
let reload = self.abi.gen_reload(to_rreg, from);
1023
do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1024
}
1025
(None, None) => {
1026
panic!("regalloc2 should have eliminated stack-to-stack moves!");
1027
}
1028
}
1029
}
1030
}
1031
}
1032
1033
if cur_srcloc.is_some() {
1034
buffer.end_srcloc();
1035
cur_srcloc = None;
1036
}
1037
1038
// Do we need an island? Get the worst-case size of the next BB, add
1039
// it to the optional padding behind the block, and pass this to the
1040
// `MachBuffer` to determine if an island is necessary.
1041
let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1042
let next_block = final_order[block_order_idx + 1];
1043
let next_block_range = self.block_ranges.get(next_block.index());
1044
let next_block_size = next_block_range.len() as u32;
1045
let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1046
I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1047
} else {
1048
0
1049
};
1050
let padding = if bb_padding.is_empty() {
1051
0
1052
} else {
1053
bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1054
};
1055
if buffer.island_needed(padding + worst_case_next_bb) {
1056
buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1057
}
1058
1059
// Insert padding, if configured, to stress the `MachBuffer`'s
1060
// relocation and island calculations.
1061
//
1062
// Padding can get quite large during fuzzing though so place a
1063
// total cap on it where when a per-function threshold is exceeded
1064
// the padding is turned back down to zero. This avoids a small-ish
1065
// test case generating a GB+ memory footprint in Cranelift for
1066
// example.
1067
if !bb_padding.is_empty() {
1068
buffer.put_data(&bb_padding);
1069
buffer.align_to(I::LabelUse::ALIGN);
1070
total_bb_padding += bb_padding.len();
1071
if total_bb_padding > (150 << 20) {
1072
bb_padding = Vec::new();
1073
}
1074
}
1075
}
1076
1077
debug_assert!(
1078
self.user_stack_maps.is_empty(),
1079
"any stack maps should have been consumed by instruction emission, still have: {:#?}",
1080
self.user_stack_maps,
1081
);
1082
1083
// Do any optimizations on branches at tail of buffer, as if we had
1084
// bound one last label.
1085
buffer.optimize_branches(ctrl_plane);
1086
1087
// emission state is not needed anymore, move control plane back out
1088
*ctrl_plane = state.take_ctrl_plane();
1089
1090
let func_body_len = buffer.cur_offset();
1091
1092
// Create `bb_edges` and final (filtered) `bb_starts`.
1093
let mut bb_edges = vec![];
1094
let mut bb_offsets = vec![];
1095
if flags.machine_code_cfg_info() {
1096
for block in 0..self.num_blocks() {
1097
if bb_starts[block].is_none() {
1098
// Block was deleted by MachBuffer; skip.
1099
continue;
1100
}
1101
let from = bb_starts[block].unwrap();
1102
1103
bb_offsets.push(from);
1104
// Resolve each `succ` label and add edges.
1105
let succs = self.block_succs(BlockIndex::new(block));
1106
for &succ in succs.iter() {
1107
let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1108
bb_edges.push((from, to));
1109
}
1110
}
1111
}
1112
1113
self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1114
let value_labels_ranges =
1115
self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1116
let frame_size = self.abi.frame_size();
1117
1118
EmitResult {
1119
buffer: buffer.finish(&self.constants, ctrl_plane),
1120
bb_offsets,
1121
bb_edges,
1122
disasm: if want_disasm { Some(disasm) } else { None },
1123
sized_stackslot_offsets: self.abi.sized_stackslot_offsets().clone(),
1124
dynamic_stackslot_offsets: self.abi.dynamic_stackslot_offsets().clone(),
1125
value_labels_ranges,
1126
frame_size,
1127
}
1128
}
1129
1130
fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1131
if self.debug_value_labels.is_empty() {
1132
return;
1133
}
1134
1135
// During emission, branch removal can make offsets of instructions incorrect.
1136
// Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1137
// It will be recorded as (say): [30] [34] [38] [42] [<would be 46>]
1138
// When the jumps get removed we are left with (in "inst_offsets"):
1139
// [insi][jmp0][jmp1][jmp2][insj][...]
1140
// [30] [34] [38] [42] [34]
1141
// Which violates the monotonicity invariant. This method sets offsets of these
1142
// removed instructions such as to make them appear zero-sized:
1143
// [insi][jmp0][jmp1][jmp2][insj][...]
1144
// [30] [34] [34] [34] [34]
1145
//
1146
let mut next_offset = func_body_len;
1147
for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1148
let inst_offset = inst_offsets[inst_index];
1149
1150
// Not all instructions get their offsets recorded.
1151
if inst_offset == NO_INST_OFFSET {
1152
continue;
1153
}
1154
1155
if inst_offset > next_offset {
1156
trace!(
1157
"Fixing code offset of the removed Inst {}: {} -> {}",
1158
inst_index, inst_offset, next_offset
1159
);
1160
inst_offsets[inst_index] = next_offset;
1161
continue;
1162
}
1163
1164
next_offset = inst_offset;
1165
}
1166
}
1167
1168
fn compute_value_labels_ranges(
1169
&self,
1170
regalloc: &regalloc2::Output,
1171
inst_offsets: &[CodeOffset],
1172
func_body_len: u32,
1173
) -> ValueLabelsRanges {
1174
if self.debug_value_labels.is_empty() {
1175
return ValueLabelsRanges::default();
1176
}
1177
1178
if trace_log_enabled!() {
1179
self.log_value_labels_ranges(regalloc, inst_offsets);
1180
}
1181
1182
let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1183
for &(label, from, to, alloc) in &regalloc.debug_locations {
1184
let label = ValueLabel::from_u32(label);
1185
let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]);
1186
let prog_point_to_inst = |prog_point: ProgPoint| {
1187
let mut inst = prog_point.inst();
1188
if prog_point.pos() == InstPosition::After {
1189
inst = inst.next();
1190
}
1191
inst.index()
1192
};
1193
let from_inst_index = prog_point_to_inst(from);
1194
let to_inst_index = prog_point_to_inst(to);
1195
let from_offset = inst_offsets[from_inst_index];
1196
let to_offset = if to_inst_index == inst_offsets.len() {
1197
func_body_len
1198
} else {
1199
inst_offsets[to_inst_index]
1200
};
1201
1202
// Empty ranges or unavailable offsets can happen
1203
// due to cold blocks and branch removal (see above).
1204
if from_offset == NO_INST_OFFSET
1205
|| to_offset == NO_INST_OFFSET
1206
|| from_offset == to_offset
1207
{
1208
continue;
1209
}
1210
1211
let loc = if let Some(preg) = alloc.as_reg() {
1212
LabelValueLoc::Reg(Reg::from(preg))
1213
} else {
1214
let slot = alloc.as_stack().unwrap();
1215
let slot_offset = self.abi.get_spillslot_offset(slot);
1216
let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1217
let caller_sp_to_cfa_offset =
1218
crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1219
// NOTE: this is a negative offset because it's relative to the caller's SP
1220
let cfa_to_sp_offset =
1221
-((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1222
LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1223
};
1224
1225
// Coalesce adjacent ranges that for the same location
1226
// to minimize output size here and for the consumers.
1227
if let Some(last_loc_range) = ranges.last_mut() {
1228
if last_loc_range.loc == loc && last_loc_range.end == from_offset {
1229
trace!(
1230
"Extending debug range for {:?} in {:?} to Inst {} ({})",
1231
label, loc, to_inst_index, to_offset
1232
);
1233
last_loc_range.end = to_offset;
1234
continue;
1235
}
1236
}
1237
1238
trace!(
1239
"Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})",
1240
label, loc, from_inst_index, to_inst_index, from_offset, to_offset
1241
);
1242
1243
ranges.push(ValueLocRange {
1244
loc,
1245
start: from_offset,
1246
end: to_offset,
1247
});
1248
}
1249
1250
value_labels_ranges
1251
}
1252
1253
fn log_value_labels_ranges(&self, regalloc: &regalloc2::Output, inst_offsets: &[CodeOffset]) {
1254
debug_assert!(trace_log_enabled!());
1255
1256
// What debug labels do we have? Note we'll skip those that have not been
1257
// allocated any location at all. They will show up as numeric gaps in the table.
1258
let mut labels = vec![];
1259
for &(label, _, _, _) in &regalloc.debug_locations {
1260
if Some(&label) == labels.last() {
1261
continue;
1262
}
1263
labels.push(label);
1264
}
1265
1266
// Reformat the data on what VRegs were the VLs assigned to by lowering, since
1267
// the array we have is sorted by VReg, and we want it sorted by VL for easy
1268
// access in the loop below.
1269
let mut vregs = vec![];
1270
for &(vreg, start, end, label) in &self.debug_value_labels {
1271
if matches!(labels.binary_search(&label), Ok(_)) {
1272
vregs.push((label, start, end, vreg));
1273
}
1274
}
1275
vregs.sort_unstable_by(
1276
|(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) {
1277
Ordering::Equal => l_start.cmp(r_start),
1278
cmp => cmp,
1279
},
1280
);
1281
1282
#[derive(PartialEq)]
1283
enum Mode {
1284
Measure,
1285
Emit,
1286
}
1287
#[derive(PartialEq)]
1288
enum Row {
1289
Head,
1290
Line,
1291
Inst(usize, usize),
1292
}
1293
1294
let mut widths = vec![0; 3 + 2 * labels.len()];
1295
let mut row = String::new();
1296
let mut output_row = |row_kind: Row, mode: Mode| {
1297
let mut column_index = 0;
1298
row.clear();
1299
1300
macro_rules! output_cell_impl {
1301
($fill:literal, $span:literal, $($cell_fmt:tt)*) => {
1302
let column_start = row.len();
1303
{
1304
row.push('|');
1305
write!(row, $($cell_fmt)*).unwrap();
1306
}
1307
1308
let next_column_index = column_index + $span;
1309
let expected_width: usize = widths[column_index..next_column_index].iter().sum();
1310
if mode == Mode::Measure {
1311
let actual_width = row.len() - column_start;
1312
if actual_width > expected_width {
1313
widths[next_column_index - 1] += actual_width - expected_width;
1314
}
1315
} else {
1316
let column_end = column_start + expected_width;
1317
while row.len() != column_end {
1318
row.push($fill);
1319
}
1320
}
1321
column_index = next_column_index;
1322
};
1323
}
1324
macro_rules! output_cell {
1325
($($cell_fmt:tt)*) => {
1326
output_cell_impl!(' ', 1, $($cell_fmt)*);
1327
};
1328
}
1329
1330
match row_kind {
1331
Row::Head => {
1332
output_cell!("BB");
1333
output_cell!("Inst");
1334
output_cell!("IP");
1335
for label in &labels {
1336
output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label));
1337
}
1338
}
1339
Row::Line => {
1340
debug_assert!(mode == Mode::Emit);
1341
for _ in 0..3 {
1342
output_cell_impl!('-', 1, "");
1343
}
1344
for _ in &labels {
1345
output_cell_impl!('-', 2, "");
1346
}
1347
}
1348
Row::Inst(block_index, inst_index) => {
1349
debug_assert!(inst_index < self.num_insts());
1350
if self.block_ranges.get(block_index).start == inst_index {
1351
output_cell!("B{}", block_index);
1352
} else {
1353
output_cell!("");
1354
}
1355
output_cell!("Inst {inst_index} ");
1356
output_cell!("{} ", inst_offsets[inst_index]);
1357
1358
for label in &labels {
1359
// First, the VReg.
1360
use regalloc2::Inst;
1361
let vreg_cmp = |inst: usize,
1362
vreg_label: &u32,
1363
range_start: &Inst,
1364
range_end: &Inst| {
1365
match vreg_label.cmp(&label) {
1366
Ordering::Equal => {
1367
if range_end.index() <= inst {
1368
Ordering::Less
1369
} else if range_start.index() > inst {
1370
Ordering::Greater
1371
} else {
1372
Ordering::Equal
1373
}
1374
}
1375
cmp => cmp,
1376
}
1377
};
1378
let vreg_index =
1379
vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e));
1380
if let Ok(vreg_index) = vreg_index {
1381
let mut prev_vreg = None;
1382
if inst_index > 0 {
1383
let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| {
1384
vreg_cmp(inst_index - 1, l, s, e)
1385
});
1386
if let Ok(prev_vreg_index) = prev_vreg_index {
1387
prev_vreg = Some(vregs[prev_vreg_index].3);
1388
}
1389
}
1390
1391
let vreg = vregs[vreg_index].3;
1392
if Some(vreg) == prev_vreg {
1393
output_cell!("*");
1394
} else {
1395
output_cell!("{}", vreg);
1396
}
1397
} else {
1398
output_cell!("");
1399
}
1400
1401
// Second, the allocated location.
1402
let inst_prog_point = ProgPoint::before(Inst::new(inst_index));
1403
let range_index = regalloc.debug_locations.binary_search_by(
1404
|(range_label, range_start, range_end, _)| match range_label.cmp(label)
1405
{
1406
Ordering::Equal => {
1407
if *range_end <= inst_prog_point {
1408
Ordering::Less
1409
} else if *range_start > inst_prog_point {
1410
Ordering::Greater
1411
} else {
1412
Ordering::Equal
1413
}
1414
}
1415
cmp => cmp,
1416
},
1417
);
1418
if let Ok(range_index) = range_index {
1419
// Live at this instruction, print the location.
1420
if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() {
1421
output_cell!("{:?}", Reg::from(reg));
1422
} else {
1423
output_cell!("Stk");
1424
}
1425
} else {
1426
// Not live at this instruction.
1427
output_cell!("");
1428
}
1429
}
1430
}
1431
}
1432
row.push('|');
1433
1434
if mode == Mode::Emit {
1435
trace!("{}", row.as_str());
1436
}
1437
};
1438
1439
for block_index in 0..self.num_blocks() {
1440
for inst_index in self.block_ranges.get(block_index) {
1441
output_row(Row::Inst(block_index, inst_index), Mode::Measure);
1442
}
1443
}
1444
output_row(Row::Head, Mode::Measure);
1445
1446
output_row(Row::Head, Mode::Emit);
1447
output_row(Row::Line, Mode::Emit);
1448
for block_index in 0..self.num_blocks() {
1449
for inst_index in self.block_ranges.get(block_index) {
1450
output_row(Row::Inst(block_index, inst_index), Mode::Emit);
1451
}
1452
}
1453
}
1454
1455
/// Get the IR block for a BlockIndex, if one exists.
1456
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1457
self.block_order.lowered_order()[block.index()].orig_block()
1458
}
1459
1460
/// Get the type of a VReg.
1461
pub fn vreg_type(&self, vreg: VReg) -> Type {
1462
self.vreg_types[vreg.vreg()]
1463
}
1464
1465
/// Get the fact, if any, for a given VReg.
1466
pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1467
self.facts[vreg.vreg()].as_ref()
1468
}
1469
1470
/// Set the fact for a given VReg.
1471
pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1472
trace!("set fact on {}: {:?}", vreg, fact);
1473
self.facts[vreg.vreg()] = Some(fact);
1474
}
1475
1476
/// Does a given instruction define any facts?
1477
pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1478
self.inst_operands(inst)
1479
.iter()
1480
.filter(|o| o.kind() == OperandKind::Def)
1481
.map(|o| o.vreg())
1482
.any(|vreg| self.facts[vreg.vreg()].is_some())
1483
}
1484
1485
/// Get the user stack map associated with the given forward instruction index.
1486
pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1487
let index = inst.to_backwards_insn_index(self.num_insts());
1488
self.user_stack_maps.get(&index)
1489
}
1490
}
1491
1492
impl<I: VCodeInst> std::ops::Index<InsnIndex> for VCode<I> {
1493
type Output = I;
1494
fn index(&self, idx: InsnIndex) -> &Self::Output {
1495
&self.insts[idx.index()]
1496
}
1497
}
1498
1499
impl<I: VCodeInst> RegallocFunction for VCode<I> {
1500
fn num_insts(&self) -> usize {
1501
self.insts.len()
1502
}
1503
1504
fn num_blocks(&self) -> usize {
1505
self.block_ranges.len()
1506
}
1507
1508
fn entry_block(&self) -> BlockIndex {
1509
self.entry
1510
}
1511
1512
fn block_insns(&self, block: BlockIndex) -> InstRange {
1513
let range = self.block_ranges.get(block.index());
1514
InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1515
}
1516
1517
fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1518
let range = self.block_succ_range.get(block.index());
1519
&self.block_succs[range]
1520
}
1521
1522
fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1523
let range = self.block_pred_range.get(block.index());
1524
&self.block_preds[range]
1525
}
1526
1527
fn block_params(&self, block: BlockIndex) -> &[VReg] {
1528
// As a special case we don't return block params for the entry block, as all the arguments
1529
// will be defined by the `Inst::Args` instruction.
1530
if block == self.entry {
1531
return &[];
1532
}
1533
1534
let range = self.block_params_range.get(block.index());
1535
&self.block_params[range]
1536
}
1537
1538
fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1539
let succ_range = self.branch_block_arg_succ_range.get(block.index());
1540
debug_assert!(succ_idx < succ_range.len());
1541
let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1542
&self.branch_block_args[branch_block_args]
1543
}
1544
1545
fn is_ret(&self, insn: InsnIndex) -> bool {
1546
match self.insts[insn.index()].is_term() {
1547
// We treat blocks terminated by an unconditional trap like a return for regalloc.
1548
MachTerminator::None => self.insts[insn.index()].is_trap(),
1549
MachTerminator::Ret | MachTerminator::RetCall => true,
1550
MachTerminator::Branch => false,
1551
}
1552
}
1553
1554
fn is_branch(&self, insn: InsnIndex) -> bool {
1555
match self.insts[insn.index()].is_term() {
1556
MachTerminator::Branch => true,
1557
_ => false,
1558
}
1559
}
1560
1561
fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1562
let range = self.operand_ranges.get(insn.index());
1563
&self.operands[range]
1564
}
1565
1566
fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1567
self.clobbers.get(&insn).cloned().unwrap_or_default()
1568
}
1569
1570
fn num_vregs(&self) -> usize {
1571
self.vreg_types.len()
1572
}
1573
1574
fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1575
&self.debug_value_labels
1576
}
1577
1578
fn spillslot_size(&self, regclass: RegClass) -> usize {
1579
self.abi.get_spillslot_size(regclass) as usize
1580
}
1581
1582
fn allow_multiple_vreg_defs(&self) -> bool {
1583
// At least the s390x backend requires this, because the
1584
// `Loop` pseudo-instruction aggregates all Operands so pinned
1585
// vregs (RealRegs) may occur more than once.
1586
true
1587
}
1588
}
1589
1590
impl<I: VCodeInst> Debug for VRegAllocator<I> {
1591
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1592
writeln!(f, "VRegAllocator {{")?;
1593
1594
let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1595
alias_keys.sort_unstable();
1596
for key in alias_keys {
1597
let dest = self.vreg_aliases.get(&key).unwrap();
1598
writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1599
}
1600
1601
for (vreg, fact) in self.facts.iter().enumerate() {
1602
if let Some(fact) = fact {
1603
writeln!(f, " v{vreg} ! {fact}")?;
1604
}
1605
}
1606
1607
writeln!(f, "}}")
1608
}
1609
}
1610
1611
impl<I: VCodeInst> fmt::Debug for VCode<I> {
1612
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1613
writeln!(f, "VCode {{")?;
1614
writeln!(f, " Entry block: {}", self.entry.index())?;
1615
1616
let mut state = Default::default();
1617
1618
for block in 0..self.num_blocks() {
1619
let block = BlockIndex::new(block);
1620
writeln!(
1621
f,
1622
"Block {}({:?}):",
1623
block.index(),
1624
self.block_params(block)
1625
)?;
1626
if let Some(bb) = self.bindex_to_bb(block) {
1627
writeln!(f, " (original IR block: {bb})")?;
1628
}
1629
for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1630
writeln!(
1631
f,
1632
" (successor: Block {}({:?}))",
1633
succ.index(),
1634
self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1635
)?;
1636
}
1637
for inst in self.block_ranges.get(block.index()) {
1638
writeln!(
1639
f,
1640
" Inst {}: {}",
1641
inst,
1642
self.insts[inst].pretty_print_inst(&mut state)
1643
)?;
1644
if !self.operands.is_empty() {
1645
for operand in self.inst_operands(InsnIndex::new(inst)) {
1646
if operand.kind() == OperandKind::Def {
1647
if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1648
writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?;
1649
}
1650
}
1651
}
1652
}
1653
if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1654
writeln!(f, " {user_stack_map:?}")?;
1655
}
1656
}
1657
}
1658
1659
writeln!(f, "}}")?;
1660
Ok(())
1661
}
1662
}
1663
1664
/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1665
pub struct VRegAllocator<I> {
1666
/// VReg IR-level types.
1667
vreg_types: Vec<Type>,
1668
1669
/// VReg aliases. When the final VCode is built we rewrite all
1670
/// uses of the keys in this table to their replacement values.
1671
///
1672
/// We use these aliases to rename an instruction's expected
1673
/// result vregs to the returned vregs from lowering, which are
1674
/// usually freshly-allocated temps.
1675
vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1676
1677
/// A deferred error, to be bubbled up to the top level of the
1678
/// lowering algorithm. We take this approach because we cannot
1679
/// currently propagate a `Result` upward through ISLE code (the
1680
/// lowering rules) or some ABI code.
1681
deferred_error: Option<CodegenError>,
1682
1683
/// Facts on VRegs, for proof-carrying code.
1684
facts: Vec<Option<Fact>>,
1685
1686
/// The type of instruction that this allocator makes registers for.
1687
_inst: core::marker::PhantomData<I>,
1688
}
1689
1690
impl<I: VCodeInst> VRegAllocator<I> {
1691
/// Make a new VRegAllocator.
1692
pub fn with_capacity(capacity: usize) -> Self {
1693
let capacity = first_user_vreg_index() + capacity;
1694
let mut vreg_types = Vec::with_capacity(capacity);
1695
vreg_types.resize(first_user_vreg_index(), types::INVALID);
1696
Self {
1697
vreg_types,
1698
vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1699
deferred_error: None,
1700
facts: Vec::with_capacity(capacity),
1701
_inst: core::marker::PhantomData::default(),
1702
}
1703
}
1704
1705
/// Allocate a fresh ValueRegs.
1706
pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1707
if self.deferred_error.is_some() {
1708
return Err(CodegenError::CodeTooLarge);
1709
}
1710
let v = self.vreg_types.len();
1711
let (regclasses, tys) = I::rc_for_type(ty)?;
1712
if v + regclasses.len() >= VReg::MAX {
1713
return Err(CodegenError::CodeTooLarge);
1714
}
1715
1716
let regs: ValueRegs<Reg> = match regclasses {
1717
&[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1718
&[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1719
// We can extend this if/when we support 32-bit targets; e.g.,
1720
// an i128 on a 32-bit machine will need up to four machine regs
1721
// for a `Value`.
1722
_ => panic!("Value must reside in 1 or 2 registers"),
1723
};
1724
for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1725
let vreg = reg.to_virtual_reg().unwrap();
1726
debug_assert_eq!(self.vreg_types.len(), vreg.index());
1727
self.vreg_types.push(reg_ty);
1728
}
1729
1730
// Create empty facts for each allocated vreg.
1731
self.facts.resize(self.vreg_types.len(), None);
1732
1733
Ok(regs)
1734
}
1735
1736
/// Allocate a fresh ValueRegs, deferring any out-of-vregs
1737
/// errors. This is useful in places where we cannot bubble a
1738
/// `CodegenResult` upward easily, and which are known to be
1739
/// invoked from within the lowering loop that checks the deferred
1740
/// error status below.
1741
pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1742
match self.alloc(ty) {
1743
Ok(x) => x,
1744
Err(e) => {
1745
self.deferred_error = Some(e);
1746
self.bogus_for_deferred_error(ty)
1747
}
1748
}
1749
}
1750
1751
/// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1752
pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1753
self.deferred_error.take()
1754
}
1755
1756
/// Produce an bogus VReg placeholder with the proper number of
1757
/// registers for the given type. This is meant to be used with
1758
/// deferred allocation errors (see `Lower::alloc_tmp()`).
1759
fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1760
let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1761
match regclasses {
1762
&[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1763
&[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1764
_ => panic!("Value must reside in 1 or 2 registers"),
1765
}
1766
}
1767
1768
/// Rewrite any mention of `from` into `to`.
1769
pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1770
let from = from.into();
1771
let resolved_to = self.resolve_vreg_alias(to.into());
1772
// Disallow cycles (see below).
1773
assert_ne!(resolved_to, from);
1774
1775
// Maintain the invariant that PCC facts only exist on vregs
1776
// which aren't aliases. We want to preserve whatever was
1777
// stated about the vreg before its producer was lowered.
1778
if let Some(fact) = self.facts[from.vreg()].take() {
1779
self.set_fact(resolved_to, fact);
1780
}
1781
1782
let old_alias = self.vreg_aliases.insert(from, resolved_to);
1783
debug_assert_eq!(old_alias, None);
1784
}
1785
1786
fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1787
// We prevent cycles from existing by resolving targets of
1788
// aliases eagerly before setting them. If the target resolves
1789
// to the origin of the alias, then a cycle would be created
1790
// and the alias is disallowed. Because of the structure of
1791
// SSA code (one instruction can refer to another's defs but
1792
// not vice-versa, except indirectly through
1793
// phis/blockparams), cycles should not occur as we use
1794
// aliases to redirect vregs to the temps that actually define
1795
// them.
1796
while let Some(to) = self.vreg_aliases.get(&vreg) {
1797
vreg = *to;
1798
}
1799
vreg
1800
}
1801
1802
#[inline]
1803
fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1804
debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1805
}
1806
1807
/// Set the proof-carrying code fact on a given virtual register.
1808
///
1809
/// Returns the old fact, if any (only one fact can be stored).
1810
fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1811
trace!("vreg {:?} has fact: {:?}", vreg, fact);
1812
debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1813
self.facts[vreg.vreg()].replace(fact)
1814
}
1815
1816
/// Set a fact only if one doesn't already exist.
1817
pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1818
let vreg = self.resolve_vreg_alias(vreg.into());
1819
if self.facts[vreg.vreg()].is_none() {
1820
self.set_fact(vreg, fact);
1821
}
1822
}
1823
1824
/// Allocate a fresh ValueRegs, with a given fact to apply if
1825
/// the value fits in one VReg.
1826
pub fn alloc_with_maybe_fact(
1827
&mut self,
1828
ty: Type,
1829
fact: Option<Fact>,
1830
) -> CodegenResult<ValueRegs<Reg>> {
1831
let result = self.alloc(ty)?;
1832
1833
// Ensure that we don't lose a fact on a value that splits
1834
// into multiple VRegs.
1835
assert!(result.len() == 1 || fact.is_none());
1836
if let Some(fact) = fact {
1837
self.set_fact(result.regs()[0].into(), fact);
1838
}
1839
1840
Ok(result)
1841
}
1842
}
1843
1844
/// This structure tracks the large constants used in VCode that will be emitted separately by the
1845
/// [MachBuffer].
1846
///
1847
/// First, during the lowering phase, constants are inserted using
1848
/// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1849
/// used in this phase. Some deduplication is performed, when possible, as constant
1850
/// values are inserted.
1851
///
1852
/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1853
/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1854
/// then writes the constant values to the buffer.
1855
#[derive(Default)]
1856
pub struct VCodeConstants {
1857
constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1858
pool_uses: HashMap<Constant, VCodeConstant>,
1859
well_known_uses: HashMap<*const [u8], VCodeConstant>,
1860
u64s: HashMap<[u8; 8], VCodeConstant>,
1861
}
1862
impl VCodeConstants {
1863
/// Initialize the structure with the expected number of constants.
1864
pub fn with_capacity(expected_num_constants: usize) -> Self {
1865
Self {
1866
constants: PrimaryMap::with_capacity(expected_num_constants),
1867
pool_uses: HashMap::with_capacity(expected_num_constants),
1868
well_known_uses: HashMap::new(),
1869
u64s: HashMap::new(),
1870
}
1871
}
1872
1873
/// Insert a constant; using this method indicates that a constant value will be used and thus
1874
/// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1875
/// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1876
/// [VCodeConstantData::Generated].
1877
pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1878
match data {
1879
VCodeConstantData::Generated(_) => self.constants.push(data),
1880
VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1881
None => {
1882
let vcode_constant = self.constants.push(data);
1883
self.pool_uses.insert(constant, vcode_constant);
1884
vcode_constant
1885
}
1886
Some(&vcode_constant) => vcode_constant,
1887
},
1888
VCodeConstantData::WellKnown(data_ref) => {
1889
match self.well_known_uses.entry(data_ref as *const [u8]) {
1890
Entry::Vacant(v) => {
1891
let vcode_constant = self.constants.push(data);
1892
v.insert(vcode_constant);
1893
vcode_constant
1894
}
1895
Entry::Occupied(o) => *o.get(),
1896
}
1897
}
1898
VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1899
Entry::Vacant(v) => {
1900
let vcode_constant = self.constants.push(data);
1901
v.insert(vcode_constant);
1902
vcode_constant
1903
}
1904
Entry::Occupied(o) => *o.get(),
1905
},
1906
}
1907
}
1908
1909
/// Return the number of constants inserted.
1910
pub fn len(&self) -> usize {
1911
self.constants.len()
1912
}
1913
1914
/// Iterate over the `VCodeConstant` keys inserted in this structure.
1915
pub fn keys(&self) -> Keys<VCodeConstant> {
1916
self.constants.keys()
1917
}
1918
1919
/// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1920
/// structure.
1921
pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1922
self.constants.iter()
1923
}
1924
1925
/// Returns the data associated with the specified constant.
1926
pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1927
&self.constants[c]
1928
}
1929
1930
/// Checks if the given [VCodeConstantData] is registered as
1931
/// used by the pool.
1932
pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
1933
match constant {
1934
VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
1935
_ => false,
1936
}
1937
}
1938
}
1939
1940
/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
1941
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1942
pub struct VCodeConstant(u32);
1943
entity_impl!(VCodeConstant);
1944
1945
/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
1946
/// these separately instead of as raw byte buffers allows us to avoid some duplication.
1947
pub enum VCodeConstantData {
1948
/// A constant already present in the Cranelift IR
1949
/// [ConstantPool](crate::ir::constant::ConstantPool).
1950
Pool(Constant, ConstantData),
1951
/// A reference to a well-known constant value that is statically encoded within the compiler.
1952
WellKnown(&'static [u8]),
1953
/// A constant value generated during lowering; the value may depend on the instruction context
1954
/// which makes it difficult to de-duplicate--if possible, use other variants.
1955
Generated(ConstantData),
1956
/// A constant of at most 64 bits. These are deduplicated as
1957
/// well. Stored as a fixed-size array of `u8` so that we do not
1958
/// encounter endianness problems when cross-compiling.
1959
U64([u8; 8]),
1960
}
1961
impl VCodeConstantData {
1962
/// Retrieve the constant data as a byte slice.
1963
pub fn as_slice(&self) -> &[u8] {
1964
match self {
1965
VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
1966
VCodeConstantData::WellKnown(d) => d,
1967
VCodeConstantData::U64(value) => &value[..],
1968
}
1969
}
1970
1971
/// Calculate the alignment of the constant data.
1972
pub fn alignment(&self) -> u32 {
1973
if self.as_slice().len() <= 8 { 8 } else { 16 }
1974
}
1975
}
1976
1977
#[cfg(test)]
1978
mod test {
1979
use super::*;
1980
use std::mem::size_of;
1981
1982
#[test]
1983
fn size_of_constant_structs() {
1984
assert_eq!(size_of::<Constant>(), 4);
1985
assert_eq!(size_of::<VCodeConstant>(), 4);
1986
assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
1987
assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
1988
assert_eq!(
1989
size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
1990
3 * size_of::<usize>()
1991
);
1992
// TODO The VCodeConstants structure's memory size could be further optimized.
1993
// With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
1994
// least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
1995
}
1996
}
1997
1998