Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/machinst/vcode.rs
3054 views
1
//! This implements the VCode container: a CFG of Insts that have been lowered.
2
//!
3
//! VCode is virtual-register code. An instruction in VCode is almost a machine
4
//! instruction; however, its register slots can refer to virtual registers in
5
//! addition to real machine registers.
6
//!
7
//! VCode is structured with traditional basic blocks, and
8
//! each block must be terminated by an unconditional branch (one target), a
9
//! conditional branch (two targets), or a return (no targets). Note that this
10
//! slightly differs from the machine code of most ISAs: in most ISAs, a
11
//! conditional branch has one target (and the not-taken case falls through).
12
//! However, we expect that machine backends will elide branches to the following
13
//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
14
//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
15
//! play with layout prior to final binary emission, as well, if we want.
16
//!
17
//! See the main module comment in `mod.rs` for more details on the VCode-based
18
//! backend pipeline.
19
20
use crate::CodegenError;
21
use crate::FxHashMap;
22
use crate::ir::pcc::*;
23
use crate::ir::{self, Constant, ConstantData, ValueLabel, types};
24
use crate::ranges::Ranges;
25
use crate::timing;
26
use crate::trace;
27
use crate::{LabelValueLoc, ValueLocRange};
28
use crate::{machinst::*, trace_log_enabled};
29
use regalloc2::{
30
Edit, Function as RegallocFunction, InstOrEdit, InstPosition, InstRange, Operand,
31
OperandConstraint, OperandKind, PRegSet, ProgPoint, RegClass,
32
};
33
34
use crate::HashMap;
35
use crate::hash_map::Entry;
36
use core::cmp::Ordering;
37
use core::fmt::{self, Write};
38
use core::mem::take;
39
use core::ops::Range;
40
use cranelift_entity::{Keys, entity_impl};
41
42
/// Index referring to an instruction in VCode.
43
pub type InsnIndex = regalloc2::Inst;
44
45
/// Extension trait for `InsnIndex` to allow conversion to a
46
/// `BackwardsInsnIndex`.
47
trait ToBackwardsInsnIndex {
48
fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex;
49
}
50
51
impl ToBackwardsInsnIndex for InsnIndex {
52
fn to_backwards_insn_index(&self, num_insts: usize) -> BackwardsInsnIndex {
53
BackwardsInsnIndex::new(num_insts - self.index() - 1)
54
}
55
}
56
57
/// An index referring to an instruction in the VCode when it is backwards,
58
/// during VCode construction.
59
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
60
#[cfg_attr(
61
feature = "enable-serde",
62
derive(::serde::Serialize, ::serde::Deserialize)
63
)]
64
pub struct BackwardsInsnIndex(InsnIndex);
65
66
impl BackwardsInsnIndex {
67
pub fn new(i: usize) -> Self {
68
BackwardsInsnIndex(InsnIndex::new(i))
69
}
70
}
71
72
/// Index referring to a basic block in VCode.
73
pub type BlockIndex = regalloc2::Block;
74
75
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
76
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
77
pub trait VCodeInst: MachInst + MachInstEmit {}
78
impl<I: MachInst + MachInstEmit> VCodeInst for I {}
79
80
/// A function in "VCode" (virtualized-register code) form, after
81
/// lowering. This is essentially a standard CFG of basic blocks,
82
/// where each basic block consists of lowered instructions produced
83
/// by the machine-specific backend.
84
///
85
/// Note that the VCode is immutable once produced, and is not
86
/// modified by register allocation in particular. Rather, register
87
/// allocation on the `VCode` produces a separate `regalloc2::Output`
88
/// struct, and this can be passed to `emit`. `emit` in turn does not
89
/// modify the vcode, but produces an `EmitResult`, which contains the
90
/// machine code itself, and the associated disassembly and/or
91
/// metadata as requested.
92
pub struct VCode<I: VCodeInst> {
93
/// VReg IR-level types.
94
vreg_types: Vec<Type>,
95
96
/// Lowered machine instructions in order corresponding to the original IR.
97
insts: Vec<I>,
98
99
/// A map from backwards instruction index to the user stack map for that
100
/// instruction.
101
///
102
/// This is a sparse side table that only has entries for instructions that
103
/// are safepoints, and only for a subset of those that have an associated
104
/// user stack map.
105
user_stack_maps: FxHashMap<BackwardsInsnIndex, ir::UserStackMap>,
106
107
/// A map from backwards instruction index to the debug tags for
108
/// that instruction. Each entry indexes a range in the
109
/// `debug_tag_pool`.
110
debug_tags: FxHashMap<BackwardsInsnIndex, Range<u32>>,
111
112
/// Pooled storage for sequences of debug tags; indexed by entries
113
/// in `debug_tags`.
114
debug_tag_pool: Vec<ir::DebugTag>,
115
116
/// Operands: pre-regalloc references to virtual registers with
117
/// constraints, in one flattened array. This allows the regalloc
118
/// to efficiently access all operands without requiring expensive
119
/// matches or method invocations on insts.
120
operands: Vec<Operand>,
121
122
/// Operand index ranges: for each instruction in `insts`, there
123
/// is a tuple here providing the range in `operands` for that
124
/// instruction's operands.
125
operand_ranges: Ranges,
126
127
/// Clobbers: a sparse map from instruction indices to clobber masks.
128
clobbers: FxHashMap<InsnIndex, PRegSet>,
129
130
/// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is
131
/// reasonable to keep one of these per instruction.)
132
srclocs: Vec<RelSourceLoc>,
133
134
/// Entry block.
135
entry: BlockIndex,
136
137
/// Block instruction indices.
138
block_ranges: Ranges,
139
140
/// Block successors: index range in the `block_succs` list.
141
block_succ_range: Ranges,
142
143
/// Block successor lists, concatenated into one vec. The
144
/// `block_succ_range` list of tuples above gives (start, end)
145
/// ranges within this list that correspond to each basic block's
146
/// successors.
147
block_succs: Vec<regalloc2::Block>,
148
149
/// Block predecessors: index range in the `block_preds` list.
150
block_pred_range: Ranges,
151
152
/// Block predecessor lists, concatenated into one vec. The
153
/// `block_pred_range` list of tuples above gives (start, end)
154
/// ranges within this list that correspond to each basic block's
155
/// predecessors.
156
block_preds: Vec<regalloc2::Block>,
157
158
/// Block parameters: index range in `block_params` below.
159
block_params_range: Ranges,
160
161
/// Block parameter lists, concatenated into one vec. The
162
/// `block_params_range` list of tuples above gives (start, end)
163
/// ranges within this list that correspond to each basic block's
164
/// blockparam vregs.
165
block_params: Vec<regalloc2::VReg>,
166
167
/// Outgoing block arguments on branch instructions, concatenated
168
/// into one list.
169
///
170
/// Note that this is conceptually a 3D array: we have a VReg list
171
/// per block, per successor. We flatten those three dimensions
172
/// into this 1D vec, then store index ranges in two levels of
173
/// indirection.
174
///
175
/// Indexed by the indices in `branch_block_arg_succ_range`.
176
branch_block_args: Vec<regalloc2::VReg>,
177
178
/// Array of sequences of (start, end) tuples in
179
/// `branch_block_args`, one for each successor; these sequences
180
/// for each block are concatenated.
181
///
182
/// Indexed by the indices in `branch_block_arg_succ_range`.
183
branch_block_arg_range: Ranges,
184
185
/// For a given block, indices in `branch_block_arg_range`
186
/// corresponding to all of its successors.
187
branch_block_arg_succ_range: Ranges,
188
189
/// Block-order information.
190
block_order: BlockLoweringOrder,
191
192
/// ABI object.
193
pub(crate) abi: Callee<I::ABIMachineSpec>,
194
195
/// Constant information used during code emission. This should be
196
/// immutable across function compilations within the same module.
197
emit_info: I::Info,
198
199
/// Constants.
200
pub(crate) constants: VCodeConstants,
201
202
/// Value labels for debuginfo attached to vregs.
203
debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>,
204
205
pub(crate) sigs: SigSet,
206
207
/// Facts on VRegs, for proof-carrying code verification.
208
facts: Vec<Option<Fact>>,
209
210
log2_min_function_alignment: u8,
211
}
212
213
/// The result of `VCode::emit`. Contains all information computed
214
/// during emission: actual machine code, optionally a disassembly,
215
/// and optionally metadata about the code layout.
216
pub struct EmitResult {
217
/// The MachBuffer containing the machine code.
218
pub buffer: MachBufferFinalized<Stencil>,
219
220
/// Offset of each basic block, recorded during emission. Computed
221
/// only if `machine_code_cfg_info` is enabled.
222
pub bb_offsets: Vec<CodeOffset>,
223
224
/// Final basic-block edges, in terms of code offsets of
225
/// bb-starts. Computed only if `machine_code_cfg_info` is enabled.
226
pub bb_edges: Vec<(CodeOffset, CodeOffset)>,
227
228
/// The pretty-printed disassembly, if any. This uses the same
229
/// pretty-printing for MachInsts as the pre-regalloc VCode Debug
230
/// implementation, but additionally includes the prologue and
231
/// epilogue(s), and makes use of the regalloc results.
232
pub disasm: Option<String>,
233
234
/// Value-labels information (debug metadata).
235
pub value_labels_ranges: ValueLabelsRanges,
236
}
237
238
/// A builder for a VCode function body.
239
///
240
/// This builder has the ability to accept instructions in either
241
/// forward or reverse order, depending on the pass direction that
242
/// produces the VCode. The lowering from CLIF to VCode<MachInst>
243
/// ordinarily occurs in reverse order (in order to allow instructions
244
/// to be lowered only if used, and not merged) so a reversal will
245
/// occur at the end of lowering to ensure the VCode is in machine
246
/// order.
247
///
248
/// If built in reverse, block and instruction indices used once the
249
/// VCode is built are relative to the final (reversed) order, not the
250
/// order of construction. Note that this means we do not know the
251
/// final block or instruction indices when building, so we do not
252
/// hand them out. (The user is assumed to know them when appending
253
/// terminator instructions with successor blocks.)
254
pub struct VCodeBuilder<I: VCodeInst> {
255
/// In-progress VCode.
256
pub(crate) vcode: VCode<I>,
257
258
/// In what direction is the build occurring?
259
direction: VCodeBuildDirection,
260
261
/// Debug-value label in-progress map, keyed by label. For each
262
/// label, we keep disjoint ranges mapping to vregs. We'll flatten
263
/// this into (vreg, range, label) tuples when done.
264
debug_info: FxHashMap<ValueLabel, Vec<(InsnIndex, InsnIndex, VReg)>>,
265
}
266
267
/// Direction in which a VCodeBuilder builds VCode.
268
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
269
pub enum VCodeBuildDirection {
270
// TODO: add `Forward` once we need it and can test it adequately.
271
/// Backward-build pass: we expect the producer to call `emit()`
272
/// with instructions in reverse program order within each block.
273
Backward,
274
}
275
276
impl<I: VCodeInst> VCodeBuilder<I> {
277
/// Create a new VCodeBuilder.
278
pub fn new(
279
sigs: SigSet,
280
abi: Callee<I::ABIMachineSpec>,
281
emit_info: I::Info,
282
block_order: BlockLoweringOrder,
283
constants: VCodeConstants,
284
direction: VCodeBuildDirection,
285
log2_min_function_alignment: u8,
286
) -> Self {
287
let vcode = VCode::new(
288
sigs,
289
abi,
290
emit_info,
291
block_order,
292
constants,
293
log2_min_function_alignment,
294
);
295
296
VCodeBuilder {
297
vcode,
298
direction,
299
debug_info: FxHashMap::default(),
300
}
301
}
302
303
pub fn init_retval_area(&mut self, vregs: &mut VRegAllocator<I>) -> CodegenResult<()> {
304
self.vcode.abi.init_retval_area(&self.vcode.sigs, vregs)
305
}
306
307
/// Access the ABI object.
308
pub fn abi(&self) -> &Callee<I::ABIMachineSpec> {
309
&self.vcode.abi
310
}
311
312
/// Access the ABI object.
313
pub fn abi_mut(&mut self) -> &mut Callee<I::ABIMachineSpec> {
314
&mut self.vcode.abi
315
}
316
317
pub fn sigs(&self) -> &SigSet {
318
&self.vcode.sigs
319
}
320
321
pub fn sigs_mut(&mut self) -> &mut SigSet {
322
&mut self.vcode.sigs
323
}
324
325
/// Access to the BlockLoweringOrder object.
326
pub fn block_order(&self) -> &BlockLoweringOrder {
327
&self.vcode.block_order
328
}
329
330
/// Set the current block as the entry block.
331
pub fn set_entry(&mut self, block: BlockIndex) {
332
self.vcode.entry = block;
333
}
334
335
/// End the current basic block. Must be called after emitting vcode insts
336
/// for IR insts and prior to ending the function (building the VCode).
337
pub fn end_bb(&mut self) {
338
let end_idx = self.vcode.insts.len();
339
// Add the instruction index range to the list of blocks.
340
self.vcode.block_ranges.push_end(end_idx);
341
// End the successors list.
342
let succ_end = self.vcode.block_succs.len();
343
self.vcode.block_succ_range.push_end(succ_end);
344
// End the blockparams list.
345
let block_params_end = self.vcode.block_params.len();
346
self.vcode.block_params_range.push_end(block_params_end);
347
// End the branch blockparam args list.
348
let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len();
349
self.vcode
350
.branch_block_arg_succ_range
351
.push_end(branch_block_arg_succ_end);
352
}
353
354
pub fn add_block_param(&mut self, param: VirtualReg) {
355
self.vcode.block_params.push(param.into());
356
}
357
358
fn add_branch_args_for_succ(&mut self, args: &[Reg]) {
359
self.vcode
360
.branch_block_args
361
.extend(args.iter().map(|&arg| VReg::from(arg)));
362
let end = self.vcode.branch_block_args.len();
363
self.vcode.branch_block_arg_range.push_end(end);
364
}
365
366
/// Push an instruction for the current BB and current IR inst
367
/// within the BB.
368
pub fn push(&mut self, insn: I, loc: RelSourceLoc) {
369
assert!(!insn.is_low_level_branch()); // These are not meant to be in VCode.
370
self.vcode.insts.push(insn);
371
self.vcode.srclocs.push(loc);
372
}
373
374
/// Add a successor block with branch args.
375
pub fn add_succ(&mut self, block: BlockIndex, args: &[Reg]) {
376
self.vcode.block_succs.push(block);
377
self.add_branch_args_for_succ(args);
378
}
379
380
/// Add a debug value label to a register.
381
pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) {
382
// 1) In the reversed order, we consider the instructions
383
// that define ranges in the "debug_info" array to refer
384
// to the IP **after** them (when reversed):
385
// IP[2]__| Inst 3 |
386
// IP[1]__| Inst 2 |
387
// IP[0]__| Inst 1 |
388
// | Inst 0 |
389
// This is so that we can represent IP[<function start>],
390
// done at the cost of not being to represent IP[<function end>],
391
// which is OK since no values will be live at that point.
392
// 2) The live range for "reg" begins at the current IP
393
// and continues until the next, in execution order,
394
// VReg that defines "label". Since the ranges are open
395
// at the end, the subtraction of 1 cancels out:
396
// [last..current IP] <=>
397
// [last..last emitted inst index] <=>
398
// [last..next_inst_index - 1] <=>
399
// [last..next_inst_index)
400
//
401
let next_inst_index = self.vcode.insts.len();
402
if next_inst_index == 0 {
403
// This would produce a defective [0..0) range.
404
return;
405
}
406
let next_inst = InsnIndex::new(next_inst_index);
407
let labels = self.debug_info.entry(label).or_insert_with(|| vec![]);
408
let last = labels
409
.last()
410
.map(|(_start, end, _vreg)| *end)
411
.unwrap_or(InsnIndex::new(0));
412
labels.push((last, next_inst, reg.into()));
413
}
414
415
/// Access the constants.
416
pub fn constants(&mut self) -> &mut VCodeConstants {
417
&mut self.vcode.constants
418
}
419
420
fn compute_preds_from_succs(&mut self) {
421
// Do a linear-time counting sort: first determine how many
422
// times each block appears as a successor.
423
let mut starts = vec![0u32; self.vcode.num_blocks()];
424
for succ in &self.vcode.block_succs {
425
starts[succ.index()] += 1;
426
}
427
428
// Determine for each block the starting index where that
429
// block's predecessors should go. This is equivalent to the
430
// ranges we need to store in block_pred_range.
431
self.vcode.block_pred_range.reserve(starts.len());
432
let mut end = 0;
433
for count in starts.iter_mut() {
434
let start = end;
435
end += *count;
436
*count = start;
437
self.vcode.block_pred_range.push_end(end as usize);
438
}
439
let end = end as usize;
440
debug_assert_eq!(end, self.vcode.block_succs.len());
441
442
// Walk over the successors again, this time grouped by
443
// predecessor, and push the predecessor at the current
444
// starting position of each of its successors. We build
445
// each group of predecessors in whatever order Ranges::iter
446
// returns them; regalloc2 doesn't care.
447
self.vcode.block_preds.resize(end, BlockIndex::invalid());
448
for (pred, range) in self.vcode.block_succ_range.iter() {
449
let pred = BlockIndex::new(pred);
450
for succ in &self.vcode.block_succs[range] {
451
let pos = &mut starts[succ.index()];
452
self.vcode.block_preds[*pos as usize] = pred;
453
*pos += 1;
454
}
455
}
456
debug_assert!(self.vcode.block_preds.iter().all(|pred| pred.is_valid()));
457
}
458
459
/// Called once, when a build in Backward order is complete, to
460
/// perform the overall reversal (into final forward order) and
461
/// finalize metadata accordingly.
462
fn reverse_and_finalize(&mut self, vregs: &VRegAllocator<I>) {
463
let n_insts = self.vcode.insts.len();
464
if n_insts == 0 {
465
return;
466
}
467
468
// Reverse the per-block and per-inst sequences.
469
self.vcode.block_ranges.reverse_index();
470
self.vcode.block_ranges.reverse_target(n_insts);
471
// block_params_range is indexed by block (and blocks were
472
// traversed in reverse) so we reverse it; but block-param
473
// sequences in the concatenated vec can remain in reverse
474
// order (it is effectively an arena of arbitrarily-placed
475
// referenced sequences).
476
self.vcode.block_params_range.reverse_index();
477
// Likewise, we reverse block_succ_range, but the block_succ
478
// concatenated array can remain as-is.
479
self.vcode.block_succ_range.reverse_index();
480
self.vcode.insts.reverse();
481
self.vcode.srclocs.reverse();
482
// Likewise, branch_block_arg_succ_range is indexed by block
483
// so must be reversed.
484
self.vcode.branch_block_arg_succ_range.reverse_index();
485
486
// To translate an instruction index *endpoint* in reversed
487
// order to forward order, compute `n_insts - i`.
488
//
489
// Why not `n_insts - 1 - i`? That would be correct to
490
// translate an individual instruction index (for ten insts 0
491
// to 9 inclusive, inst 0 becomes 9, and inst 9 becomes
492
// 0). But for the usual inclusive-start, exclusive-end range
493
// idiom, inclusive starts become exclusive ends and
494
// vice-versa, so e.g. an (inclusive) start of 0 becomes an
495
// (exclusive) end of 10.
496
let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index());
497
498
// Generate debug-value labels based on per-label maps.
499
for (label, tuples) in &self.debug_info {
500
for &(start, end, vreg) in tuples {
501
let vreg = vregs.resolve_vreg_alias(vreg);
502
let fwd_start = translate(end);
503
let fwd_end = translate(start);
504
self.vcode
505
.debug_value_labels
506
.push((vreg, fwd_start, fwd_end, label.as_u32()));
507
}
508
}
509
510
// Now sort debug value labels by VReg, as required
511
// by regalloc2.
512
self.vcode
513
.debug_value_labels
514
.sort_unstable_by_key(|(vreg, _, _, _)| *vreg);
515
}
516
517
fn collect_operands(&mut self, vregs: &VRegAllocator<I>) {
518
let allocatable = PRegSet::from(self.vcode.abi.machine_env());
519
for (i, insn) in self.vcode.insts.iter_mut().enumerate() {
520
// Push operands from the instruction onto the operand list.
521
//
522
// We rename through the vreg alias table as we collect
523
// the operands. This is better than a separate post-pass
524
// over operands, because it has more cache locality:
525
// operands only need to pass through L1 once. This is
526
// also better than renaming instructions'
527
// operands/registers while lowering, because here we only
528
// need to do the `match` over the instruction to visit
529
// its register fields (which is slow, branchy code) once.
530
531
let mut op_collector =
532
OperandCollector::new(&mut self.vcode.operands, allocatable, |vreg| {
533
vregs.resolve_vreg_alias(vreg)
534
});
535
insn.get_operands(&mut op_collector);
536
let (ops, clobbers) = op_collector.finish();
537
self.vcode.operand_ranges.push_end(ops);
538
539
if clobbers != PRegSet::default() {
540
self.vcode.clobbers.insert(InsnIndex::new(i), clobbers);
541
}
542
543
if let Some((dst, src)) = insn.is_move() {
544
// We should never see non-virtual registers present in move
545
// instructions.
546
assert!(
547
src.is_virtual(),
548
"the real register {src:?} was used as the source of a move instruction"
549
);
550
assert!(
551
dst.to_reg().is_virtual(),
552
"the real register {:?} was used as the destination of a move instruction",
553
dst.to_reg()
554
);
555
}
556
}
557
558
// Translate blockparam args via the vreg aliases table as well.
559
for arg in &mut self.vcode.branch_block_args {
560
let new_arg = vregs.resolve_vreg_alias(*arg);
561
trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg);
562
*arg = new_arg;
563
}
564
}
565
566
/// Build the final VCode.
567
pub fn build(mut self, mut vregs: VRegAllocator<I>) -> VCode<I> {
568
self.vcode.vreg_types = take(&mut vregs.vreg_types);
569
self.vcode.facts = take(&mut vregs.facts);
570
571
if self.direction == VCodeBuildDirection::Backward {
572
self.reverse_and_finalize(&vregs);
573
}
574
self.collect_operands(&vregs);
575
576
self.compute_preds_from_succs();
577
self.vcode.debug_value_labels.sort_unstable();
578
579
// At this point, nothing in the vcode should mention any
580
// VReg which has been aliased. All the appropriate rewriting
581
// should have happened above. Just to be sure, let's
582
// double-check each field which has vregs.
583
// Note: can't easily check vcode.insts, resolved in collect_operands.
584
// Operands are resolved in collect_operands.
585
vregs.debug_assert_no_vreg_aliases(self.vcode.operands.iter().map(|op| op.vreg()));
586
// Currently block params are never aliased to another vreg.
587
vregs.debug_assert_no_vreg_aliases(self.vcode.block_params.iter().copied());
588
// Branch block args are resolved in collect_operands.
589
vregs.debug_assert_no_vreg_aliases(self.vcode.branch_block_args.iter().copied());
590
// Debug value labels are resolved in reverse_and_finalize.
591
vregs.debug_assert_no_vreg_aliases(
592
self.vcode.debug_value_labels.iter().map(|&(vreg, ..)| vreg),
593
);
594
// Facts are resolved eagerly during set_vreg_alias.
595
vregs.debug_assert_no_vreg_aliases(
596
self.vcode
597
.facts
598
.iter()
599
.zip(&vregs.vreg_types)
600
.enumerate()
601
.filter(|(_, (fact, _))| fact.is_some())
602
.map(|(vreg, (_, &ty))| {
603
let (regclasses, _) = I::rc_for_type(ty).unwrap();
604
VReg::new(vreg, regclasses[0])
605
}),
606
);
607
608
self.vcode
609
}
610
611
/// Add a user stack map for the associated instruction.
612
pub fn add_user_stack_map(
613
&mut self,
614
inst: BackwardsInsnIndex,
615
entries: &[ir::UserStackMapEntry],
616
) {
617
let stack_map = ir::UserStackMap::new(entries, self.vcode.abi.sized_stackslot_offsets());
618
let old_entry = self.vcode.user_stack_maps.insert(inst, stack_map);
619
debug_assert!(old_entry.is_none());
620
}
621
622
/// Add debug tags for the associated instruction.
623
pub fn add_debug_tags(&mut self, inst: BackwardsInsnIndex, entries: &[ir::DebugTag]) {
624
let start = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();
625
self.vcode.debug_tag_pool.extend(entries.iter().cloned());
626
let end = u32::try_from(self.vcode.debug_tag_pool.len()).unwrap();
627
self.vcode.debug_tags.insert(inst, start..end);
628
}
629
}
630
631
const NO_INST_OFFSET: CodeOffset = u32::MAX;
632
633
impl<I: VCodeInst> VCode<I> {
634
/// New empty VCode.
635
fn new(
636
sigs: SigSet,
637
abi: Callee<I::ABIMachineSpec>,
638
emit_info: I::Info,
639
block_order: BlockLoweringOrder,
640
constants: VCodeConstants,
641
log2_min_function_alignment: u8,
642
) -> Self {
643
let n_blocks = block_order.lowered_order().len();
644
VCode {
645
sigs,
646
vreg_types: vec![],
647
insts: Vec::with_capacity(10 * n_blocks),
648
user_stack_maps: FxHashMap::default(),
649
debug_tags: FxHashMap::default(),
650
debug_tag_pool: vec![],
651
operands: Vec::with_capacity(30 * n_blocks),
652
operand_ranges: Ranges::with_capacity(10 * n_blocks),
653
clobbers: FxHashMap::default(),
654
srclocs: Vec::with_capacity(10 * n_blocks),
655
entry: BlockIndex::new(0),
656
block_ranges: Ranges::with_capacity(n_blocks),
657
block_succ_range: Ranges::with_capacity(n_blocks),
658
block_succs: Vec::with_capacity(n_blocks),
659
block_pred_range: Ranges::default(),
660
block_preds: Vec::new(),
661
block_params_range: Ranges::with_capacity(n_blocks),
662
block_params: Vec::with_capacity(5 * n_blocks),
663
branch_block_args: Vec::with_capacity(10 * n_blocks),
664
branch_block_arg_range: Ranges::with_capacity(2 * n_blocks),
665
branch_block_arg_succ_range: Ranges::with_capacity(n_blocks),
666
block_order,
667
abi,
668
emit_info,
669
constants,
670
debug_value_labels: vec![],
671
facts: vec![],
672
log2_min_function_alignment,
673
}
674
}
675
676
/// Get the number of blocks. Block indices will be in the range `0 ..
677
/// (self.num_blocks() - 1)`.
678
pub fn num_blocks(&self) -> usize {
679
self.block_ranges.len()
680
}
681
682
/// The number of lowered instructions.
683
pub fn num_insts(&self) -> usize {
684
self.insts.len()
685
}
686
687
fn compute_clobbers_and_function_calls(
688
&self,
689
regalloc: &regalloc2::Output,
690
) -> (Vec<Writable<RealReg>>, FunctionCalls) {
691
let mut clobbered = PRegSet::default();
692
let mut function_calls = FunctionCalls::None;
693
694
// All moves are included in clobbers.
695
for (_, Edit::Move { to, .. }) in &regalloc.edits {
696
if let Some(preg) = to.as_reg() {
697
clobbered.add(preg);
698
}
699
}
700
701
for (i, range) in self.operand_ranges.iter() {
702
let operands = &self.operands[range.clone()];
703
let allocs = &regalloc.allocs[range];
704
for (operand, alloc) in operands.iter().zip(allocs.iter()) {
705
if operand.kind() == OperandKind::Def {
706
if let Some(preg) = alloc.as_reg() {
707
clobbered.add(preg);
708
}
709
}
710
}
711
712
function_calls.update(self.insts[i].call_type());
713
714
// Also add explicitly-clobbered registers.
715
//
716
// Skip merging this instruction's clobber list if not
717
// "included in clobbers" as per the MachInst. (Some
718
// backends use this to implement ABI specifics; e.g.,
719
// excluding calls of the same ABI as the current function
720
// from clobbers, because by definition everything
721
// clobbered by the call can be clobbered by this function
722
// without saving as well.
723
//
724
// This is important for a particular optimization: when
725
// some registers are "half-clobbered", e.g. vector/float
726
// registers on aarch64, we want them to be seen as
727
// clobbered by regalloc so it avoids carrying values
728
// across calls in these registers but not seen as
729
// clobbered by prologue generation here (because the
730
// actual half-clobber implied by the clobber list fits
731
// within the clobbers that we allow without
732
// clobber-saves).
733
if self.insts[i].is_included_in_clobbers() {
734
if let Some(&inst_clobbered) = self.clobbers.get(&InsnIndex::new(i)) {
735
clobbered.union_from(inst_clobbered);
736
}
737
}
738
}
739
740
let clobbered_regs = clobbered
741
.into_iter()
742
.map(|preg| Writable::from_reg(RealReg::from(preg)))
743
.collect();
744
745
(clobbered_regs, function_calls)
746
}
747
748
/// Emit the instructions to a `MachBuffer`, containing fixed-up
749
/// code and external reloc/trap/etc. records ready for use. Takes
750
/// the regalloc results as well.
751
///
752
/// Returns the machine code itself, and optionally metadata
753
/// and/or a disassembly, as an `EmitResult`. The `VCode` itself
754
/// is consumed by the emission process.
755
pub fn emit(
756
mut self,
757
regalloc: &regalloc2::Output,
758
want_disasm: bool,
759
flags: &settings::Flags,
760
ctrl_plane: &mut ControlPlane,
761
) -> EmitResult
762
where
763
I: VCodeInst,
764
{
765
let _tt = timing::vcode_emit();
766
let mut buffer = MachBuffer::new();
767
buffer.set_log2_min_function_alignment(self.log2_min_function_alignment);
768
let mut bb_starts: Vec<Option<CodeOffset>> = vec![];
769
770
// The first M MachLabels are reserved for block indices.
771
buffer.reserve_labels_for_blocks(self.num_blocks());
772
773
// Register all allocated constants with the `MachBuffer` to ensure that
774
// any references to the constants during instructions can be handled
775
// correctly.
776
buffer.register_constants(&self.constants);
777
778
// Construct the final order we emit code in: cold blocks at the end.
779
let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![];
780
let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![];
781
for block in 0..self.num_blocks() {
782
let block = BlockIndex::new(block);
783
if self.block_order.is_cold(block) {
784
cold_blocks.push(block);
785
} else {
786
final_order.push(block);
787
}
788
}
789
final_order.extend(cold_blocks.clone());
790
791
// Compute/save info we need for the prologue: clobbers and
792
// number of spillslots.
793
//
794
// We clone `abi` here because we will mutate it as we
795
// generate the prologue and set other info, but we can't
796
// mutate `VCode`. The info it usually carries prior to
797
// setting clobbers is fairly minimal so this should be
798
// relatively cheap.
799
let (clobbers, function_calls) = self.compute_clobbers_and_function_calls(regalloc);
800
self.abi.compute_frame_layout(
801
&self.sigs,
802
regalloc.num_spillslots,
803
clobbers,
804
function_calls,
805
);
806
807
// Emit blocks.
808
let mut cur_srcloc = None;
809
let mut last_offset = None;
810
let mut inst_offsets = vec![];
811
let mut state = I::State::new(&self.abi, core::mem::take(ctrl_plane));
812
813
let mut disasm = String::new();
814
815
if !self.debug_value_labels.is_empty() {
816
inst_offsets.resize(self.insts.len(), NO_INST_OFFSET);
817
}
818
819
// Count edits per block ahead of time; this is needed for
820
// lookahead island emission. (We could derive it per-block
821
// with binary search in the edit list, but it's more
822
// efficient to do it in one pass here.)
823
let mut ra_edits_per_block: SmallVec<[u32; 64]> = smallvec![];
824
let mut edit_idx = 0;
825
for block in 0..self.num_blocks() {
826
let end_inst = InsnIndex::new(self.block_ranges.get(block).end);
827
let start_edit_idx = edit_idx;
828
while edit_idx < regalloc.edits.len() && regalloc.edits[edit_idx].0.inst() < end_inst {
829
edit_idx += 1;
830
}
831
let end_edit_idx = edit_idx;
832
ra_edits_per_block.push((end_edit_idx - start_edit_idx) as u32);
833
}
834
835
let is_forward_edge_cfi_enabled = self.abi.is_forward_edge_cfi_enabled();
836
let mut bb_padding = match flags.bb_padding_log2_minus_one() {
837
0 => Vec::new(),
838
n => vec![0; 1 << (n - 1)],
839
};
840
let mut total_bb_padding = 0;
841
842
for (block_order_idx, &block) in final_order.iter().enumerate() {
843
trace!("emitting block {:?}", block);
844
845
// Call the new block hook for state
846
state.on_new_block();
847
848
// Emit NOPs to align the block.
849
let new_offset = I::align_basic_block(buffer.cur_offset());
850
while new_offset > buffer.cur_offset() {
851
// Pad with NOPs up to the aligned block offset.
852
let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize);
853
nop.emit(&mut buffer, &self.emit_info, &mut Default::default());
854
}
855
assert_eq!(buffer.cur_offset(), new_offset);
856
857
let do_emit = |inst: &I,
858
disasm: &mut String,
859
buffer: &mut MachBuffer<I>,
860
state: &mut I::State| {
861
if want_disasm && !inst.is_args() {
862
let mut s = state.clone();
863
writeln!(disasm, " {}", inst.pretty_print_inst(&mut s)).unwrap();
864
}
865
inst.emit(buffer, &self.emit_info, state);
866
};
867
868
// Is this the first block? Emit the prologue directly if so.
869
if block == self.entry {
870
trace!(" -> entry block");
871
buffer.start_srcloc(Default::default());
872
for inst in &self.abi.gen_prologue() {
873
do_emit(&inst, &mut disasm, &mut buffer, &mut state);
874
}
875
buffer.end_srcloc();
876
}
877
878
// Now emit the regular block body.
879
880
buffer.bind_label(MachLabel::from_block(block), state.ctrl_plane_mut());
881
882
if want_disasm {
883
writeln!(&mut disasm, "block{}:", block.index()).unwrap();
884
}
885
886
if flags.machine_code_cfg_info() {
887
// Track BB starts. If we have backed up due to MachBuffer
888
// branch opts, note that the removed blocks were removed.
889
let cur_offset = buffer.cur_offset();
890
if last_offset.is_some() && cur_offset <= last_offset.unwrap() {
891
for i in (0..bb_starts.len()).rev() {
892
if bb_starts[i].is_some() && cur_offset > bb_starts[i].unwrap() {
893
break;
894
}
895
bb_starts[i] = None;
896
}
897
}
898
bb_starts.push(Some(cur_offset));
899
last_offset = Some(cur_offset);
900
}
901
902
if let Some(block_start) = I::gen_block_start(
903
self.block_order.is_indirect_branch_target(block),
904
is_forward_edge_cfi_enabled,
905
) {
906
do_emit(&block_start, &mut disasm, &mut buffer, &mut state);
907
}
908
909
for inst_or_edit in regalloc.block_insts_and_edits(&self, block) {
910
match inst_or_edit {
911
InstOrEdit::Inst(iix) => {
912
if !self.debug_value_labels.is_empty() {
913
// If we need to produce debug info,
914
// record the offset of each instruction
915
// so that we can translate value-label
916
// ranges to machine-code offsets.
917
918
// Cold blocks violate monotonicity
919
// assumptions elsewhere (that
920
// instructions in inst-index order are in
921
// order in machine code), so we omit
922
// their offsets here. Value-label range
923
// generation below will skip empty ranges
924
// and ranges with to-offsets of zero.
925
if !self.block_order.is_cold(block) {
926
inst_offsets[iix.index()] = buffer.cur_offset();
927
}
928
}
929
930
// Update the srcloc at this point in the buffer.
931
let srcloc = self.srclocs[iix.index()];
932
if cur_srcloc != Some(srcloc) {
933
if cur_srcloc.is_some() {
934
buffer.end_srcloc();
935
}
936
buffer.start_srcloc(srcloc);
937
cur_srcloc = Some(srcloc);
938
}
939
940
// If this is a safepoint, compute a stack map
941
// and pass it to the emit state.
942
let stack_map_disasm = if self.insts[iix.index()].is_safepoint() {
943
let (user_stack_map, user_stack_map_disasm) = {
944
// The `user_stack_maps` is keyed by reverse
945
// instruction index, so we must flip the
946
// index. We can't put this into a helper method
947
// due to borrowck issues because parts of
948
// `self` are borrowed mutably elsewhere in this
949
// function.
950
let index = iix.to_backwards_insn_index(self.num_insts());
951
let user_stack_map = self.user_stack_maps.remove(&index);
952
let user_stack_map_disasm = if want_disasm {
953
user_stack_map.as_ref().map(|m| format!(" ; {m:?}"))
954
} else {
955
None
956
};
957
(user_stack_map, user_stack_map_disasm)
958
};
959
960
state.pre_safepoint(user_stack_map);
961
962
user_stack_map_disasm
963
} else {
964
None
965
};
966
967
// Place debug tags in the emission buffer
968
// either at the offset prior to the
969
// instruction or after the instruction,
970
// depending on whether this is a call. See
971
// the documentation on [`MachDebugTagPos`]
972
// for details on why.
973
let mut debug_tag_disasm = None;
974
let mut place_debug_tags =
975
|this: &VCode<I>, pos: MachDebugTagPos, buffer: &mut MachBuffer<I>| {
976
// As above, translate the forward instruction
977
// index to a backward index for the lookup.
978
let debug_tag_range = {
979
let index = iix.to_backwards_insn_index(this.num_insts());
980
this.debug_tags.get(&index)
981
};
982
if let Some(range) = debug_tag_range {
983
let start = usize::try_from(range.start).unwrap();
984
let end = usize::try_from(range.end).unwrap();
985
let tags = &this.debug_tag_pool[start..end];
986
987
if want_disasm {
988
debug_tag_disasm =
989
Some(format!(" ; ^-- debug @ {pos:?}: {tags:?}"));
990
}
991
buffer.push_debug_tags(pos, tags);
992
}
993
};
994
let debug_tag_pos =
995
if self.insts[iix.index()].call_type() == CallType::Regular {
996
MachDebugTagPos::Post
997
} else {
998
MachDebugTagPos::Pre
999
};
1000
1001
if debug_tag_pos == MachDebugTagPos::Pre {
1002
place_debug_tags(&self, debug_tag_pos, &mut buffer);
1003
}
1004
1005
// If the instruction we are about to emit is
1006
// a return, place an epilogue at this point
1007
// (and don't emit the return; the actual
1008
// epilogue will contain it).
1009
if self.insts[iix.index()].is_term() == MachTerminator::Ret {
1010
log::trace!("emitting epilogue");
1011
for inst in self.abi.gen_epilogue() {
1012
do_emit(&inst, &mut disasm, &mut buffer, &mut state);
1013
}
1014
} else {
1015
// Update the operands for this inst using the
1016
// allocations from the regalloc result.
1017
let mut allocs = regalloc.inst_allocs(iix).iter();
1018
self.insts[iix.index()].get_operands(
1019
&mut |reg: &mut Reg, constraint, _kind, _pos| {
1020
let alloc =
1021
allocs.next().expect("enough allocations for all operands");
1022
1023
if let Some(alloc) = alloc.as_reg() {
1024
let alloc: Reg = alloc.into();
1025
if let OperandConstraint::FixedReg(rreg) = constraint {
1026
debug_assert_eq!(Reg::from(rreg), alloc);
1027
}
1028
*reg = alloc;
1029
} else if let Some(alloc) = alloc.as_stack() {
1030
let alloc: Reg = alloc.into();
1031
*reg = alloc;
1032
}
1033
},
1034
);
1035
debug_assert!(allocs.next().is_none());
1036
1037
log::trace!("emitting: {:?}", self.insts[iix.index()]);
1038
1039
// Emit the instruction!
1040
do_emit(
1041
&self.insts[iix.index()],
1042
&mut disasm,
1043
&mut buffer,
1044
&mut state,
1045
);
1046
1047
if debug_tag_pos == MachDebugTagPos::Post {
1048
place_debug_tags(&self, debug_tag_pos, &mut buffer);
1049
}
1050
1051
if let Some(stack_map_disasm) = stack_map_disasm {
1052
disasm.push_str(&stack_map_disasm);
1053
disasm.push('\n');
1054
}
1055
if let Some(debug_tag_disasm) = debug_tag_disasm {
1056
disasm.push_str(&debug_tag_disasm);
1057
disasm.push('\n');
1058
}
1059
}
1060
}
1061
1062
InstOrEdit::Edit(Edit::Move { from, to }) => {
1063
// Create a move/spill/reload instruction and
1064
// immediately emit it.
1065
match (from.as_reg(), to.as_reg()) {
1066
(Some(from), Some(to)) => {
1067
// Reg-to-reg move.
1068
let from_rreg = Reg::from(from);
1069
let to_rreg = Writable::from_reg(Reg::from(to));
1070
debug_assert_eq!(from.class(), to.class());
1071
let ty = I::canonical_type_for_rc(from.class());
1072
let mv = I::gen_move(to_rreg, from_rreg, ty);
1073
do_emit(&mv, &mut disasm, &mut buffer, &mut state);
1074
}
1075
(Some(from), None) => {
1076
// Spill from register to spillslot.
1077
let to = to.as_stack().unwrap();
1078
let from_rreg = RealReg::from(from);
1079
let spill = self.abi.gen_spill(to, from_rreg);
1080
do_emit(&spill, &mut disasm, &mut buffer, &mut state);
1081
}
1082
(None, Some(to)) => {
1083
// Load from spillslot to register.
1084
let from = from.as_stack().unwrap();
1085
let to_rreg = Writable::from_reg(RealReg::from(to));
1086
let reload = self.abi.gen_reload(to_rreg, from);
1087
do_emit(&reload, &mut disasm, &mut buffer, &mut state);
1088
}
1089
(None, None) => {
1090
panic!("regalloc2 should have eliminated stack-to-stack moves!");
1091
}
1092
}
1093
}
1094
}
1095
}
1096
1097
if cur_srcloc.is_some() {
1098
buffer.end_srcloc();
1099
cur_srcloc = None;
1100
}
1101
1102
// Do we need an island? Get the worst-case size of the next BB, add
1103
// it to the optional padding behind the block, and pass this to the
1104
// `MachBuffer` to determine if an island is necessary.
1105
let worst_case_next_bb = if block_order_idx < final_order.len() - 1 {
1106
let next_block = final_order[block_order_idx + 1];
1107
let next_block_range = self.block_ranges.get(next_block.index());
1108
let next_block_size = next_block_range.len() as u32;
1109
let next_block_ra_insertions = ra_edits_per_block[next_block.index()];
1110
I::worst_case_size() * (next_block_size + next_block_ra_insertions)
1111
} else {
1112
0
1113
};
1114
let padding = if bb_padding.is_empty() {
1115
0
1116
} else {
1117
bb_padding.len() as u32 + I::LabelUse::ALIGN - 1
1118
};
1119
if buffer.island_needed(padding + worst_case_next_bb) {
1120
buffer.emit_island(padding + worst_case_next_bb, ctrl_plane);
1121
}
1122
1123
// Insert padding, if configured, to stress the `MachBuffer`'s
1124
// relocation and island calculations.
1125
//
1126
// Padding can get quite large during fuzzing though so place a
1127
// total cap on it where when a per-function threshold is exceeded
1128
// the padding is turned back down to zero. This avoids a small-ish
1129
// test case generating a GB+ memory footprint in Cranelift for
1130
// example.
1131
if !bb_padding.is_empty() {
1132
buffer.put_data(&bb_padding);
1133
buffer.align_to(I::LabelUse::ALIGN);
1134
total_bb_padding += bb_padding.len();
1135
if total_bb_padding > (150 << 20) {
1136
bb_padding = Vec::new();
1137
}
1138
}
1139
}
1140
1141
debug_assert!(
1142
self.user_stack_maps.is_empty(),
1143
"any stack maps should have been consumed by instruction emission, still have: {:#?}",
1144
self.user_stack_maps,
1145
);
1146
1147
// Do any optimizations on branches at tail of buffer, as if we had
1148
// bound one last label.
1149
buffer.optimize_branches(ctrl_plane);
1150
1151
// emission state is not needed anymore, move control plane back out
1152
*ctrl_plane = state.take_ctrl_plane();
1153
1154
let func_body_len = buffer.cur_offset();
1155
1156
// Create `bb_edges` and final (filtered) `bb_starts`.
1157
let mut bb_edges = vec![];
1158
let mut bb_offsets = vec![];
1159
if flags.machine_code_cfg_info() {
1160
for block in 0..self.num_blocks() {
1161
if bb_starts[block].is_none() {
1162
// Block was deleted by MachBuffer; skip.
1163
continue;
1164
}
1165
let from = bb_starts[block].unwrap();
1166
1167
bb_offsets.push(from);
1168
// Resolve each `succ` label and add edges.
1169
let succs = self.block_succs(BlockIndex::new(block));
1170
for &succ in succs.iter() {
1171
let to = buffer.resolve_label_offset(MachLabel::from_block(succ));
1172
bb_edges.push((from, to));
1173
}
1174
}
1175
}
1176
1177
self.monotonize_inst_offsets(&mut inst_offsets[..], func_body_len);
1178
let value_labels_ranges =
1179
self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len);
1180
1181
// Store metadata about frame layout in the MachBuffer.
1182
buffer.set_frame_layout(self.abi.frame_slot_metadata());
1183
1184
EmitResult {
1185
buffer: buffer.finish(&self.constants, ctrl_plane),
1186
bb_offsets,
1187
bb_edges,
1188
disasm: if want_disasm { Some(disasm) } else { None },
1189
value_labels_ranges,
1190
}
1191
}
1192
1193
fn monotonize_inst_offsets(&self, inst_offsets: &mut [CodeOffset], func_body_len: u32) {
1194
if self.debug_value_labels.is_empty() {
1195
return;
1196
}
1197
1198
// During emission, branch removal can make offsets of instructions incorrect.
1199
// Consider the following sequence: [insi][jmp0][jmp1][jmp2][insj]
1200
// It will be recorded as (say): [30] [34] [38] [42] [<would be 46>]
1201
// When the jumps get removed we are left with (in "inst_offsets"):
1202
// [insi][jmp0][jmp1][jmp2][insj][...]
1203
// [30] [34] [38] [42] [34]
1204
// Which violates the monotonicity invariant. This method sets offsets of these
1205
// removed instructions such as to make them appear zero-sized:
1206
// [insi][jmp0][jmp1][jmp2][insj][...]
1207
// [30] [34] [34] [34] [34]
1208
//
1209
let mut next_offset = func_body_len;
1210
for inst_index in (0..(inst_offsets.len() - 1)).rev() {
1211
let inst_offset = inst_offsets[inst_index];
1212
1213
// Not all instructions get their offsets recorded.
1214
if inst_offset == NO_INST_OFFSET {
1215
continue;
1216
}
1217
1218
if inst_offset > next_offset {
1219
trace!(
1220
"Fixing code offset of the removed Inst {}: {} -> {}",
1221
inst_index, inst_offset, next_offset
1222
);
1223
inst_offsets[inst_index] = next_offset;
1224
continue;
1225
}
1226
1227
next_offset = inst_offset;
1228
}
1229
}
1230
1231
fn compute_value_labels_ranges(
1232
&self,
1233
regalloc: &regalloc2::Output,
1234
inst_offsets: &[CodeOffset],
1235
func_body_len: u32,
1236
) -> ValueLabelsRanges {
1237
if self.debug_value_labels.is_empty() {
1238
return ValueLabelsRanges::default();
1239
}
1240
1241
if trace_log_enabled!() {
1242
self.log_value_labels_ranges(regalloc, inst_offsets);
1243
}
1244
1245
let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
1246
for &(label, from, to, alloc) in &regalloc.debug_locations {
1247
let label = ValueLabel::from_u32(label);
1248
let ranges = value_labels_ranges.entry(label).or_insert_with(|| vec![]);
1249
let prog_point_to_inst = |prog_point: ProgPoint| {
1250
let mut inst = prog_point.inst();
1251
if prog_point.pos() == InstPosition::After {
1252
inst = inst.next();
1253
}
1254
inst.index()
1255
};
1256
let inst_to_offset = |inst_index: usize| {
1257
// Skip over cold blocks.
1258
for offset in &inst_offsets[inst_index..] {
1259
if *offset != NO_INST_OFFSET {
1260
return *offset;
1261
}
1262
}
1263
func_body_len
1264
};
1265
let from_inst_index = prog_point_to_inst(from);
1266
let to_inst_index = prog_point_to_inst(to);
1267
let from_offset = inst_to_offset(from_inst_index);
1268
let to_offset = inst_to_offset(to_inst_index);
1269
1270
// Empty ranges or unavailable offsets can happen
1271
// due to cold blocks and branch removal (see above).
1272
if from_offset == to_offset {
1273
continue;
1274
}
1275
1276
let loc = if let Some(preg) = alloc.as_reg() {
1277
LabelValueLoc::Reg(Reg::from(preg))
1278
} else {
1279
#[cfg(not(feature = "unwind"))]
1280
continue;
1281
1282
#[cfg(feature = "unwind")]
1283
{
1284
let slot = alloc.as_stack().unwrap();
1285
let slot_offset = self.abi.get_spillslot_offset(slot);
1286
let slot_base_to_caller_sp_offset = self.abi.slot_base_to_caller_sp_offset();
1287
let caller_sp_to_cfa_offset =
1288
crate::isa::unwind::systemv::caller_sp_to_cfa_offset();
1289
// NOTE: this is a negative offset because it's relative to the caller's SP
1290
let cfa_to_sp_offset =
1291
-((slot_base_to_caller_sp_offset + caller_sp_to_cfa_offset) as i64);
1292
LabelValueLoc::CFAOffset(cfa_to_sp_offset + slot_offset)
1293
}
1294
};
1295
1296
// Coalesce adjacent ranges that for the same location
1297
// to minimize output size here and for the consumers.
1298
if let Some(last_loc_range) = ranges.last_mut() {
1299
if last_loc_range.loc == loc && last_loc_range.end == from_offset {
1300
trace!(
1301
"Extending debug range for {:?} in {:?} to Inst {} ({})",
1302
label, loc, to_inst_index, to_offset
1303
);
1304
last_loc_range.end = to_offset;
1305
continue;
1306
}
1307
}
1308
1309
trace!(
1310
"Recording debug range for {:?} in {:?}: [Inst {}..Inst {}) [{}..{})",
1311
label, loc, from_inst_index, to_inst_index, from_offset, to_offset
1312
);
1313
1314
ranges.push(ValueLocRange {
1315
loc,
1316
start: from_offset,
1317
end: to_offset,
1318
});
1319
}
1320
1321
value_labels_ranges
1322
}
1323
1324
fn log_value_labels_ranges(&self, regalloc: &regalloc2::Output, inst_offsets: &[CodeOffset]) {
1325
debug_assert!(trace_log_enabled!());
1326
1327
// What debug labels do we have? Note we'll skip those that have not been
1328
// allocated any location at all. They will show up as numeric gaps in the table.
1329
let mut labels = vec![];
1330
for &(label, _, _, _) in &regalloc.debug_locations {
1331
if Some(&label) == labels.last() {
1332
continue;
1333
}
1334
labels.push(label);
1335
}
1336
1337
// Reformat the data on what VRegs were the VLs assigned to by lowering, since
1338
// the array we have is sorted by VReg, and we want it sorted by VL for easy
1339
// access in the loop below.
1340
let mut vregs = vec![];
1341
for &(vreg, start, end, label) in &self.debug_value_labels {
1342
if matches!(labels.binary_search(&label), Ok(_)) {
1343
vregs.push((label, start, end, vreg));
1344
}
1345
}
1346
vregs.sort_unstable_by(
1347
|(l_label, l_start, _, _), (r_label, r_start, _, _)| match l_label.cmp(r_label) {
1348
Ordering::Equal => l_start.cmp(r_start),
1349
cmp => cmp,
1350
},
1351
);
1352
1353
#[derive(PartialEq)]
1354
enum Mode {
1355
Measure,
1356
Emit,
1357
}
1358
#[derive(PartialEq)]
1359
enum Row {
1360
Head,
1361
Line,
1362
Inst(usize, usize),
1363
}
1364
1365
let mut widths = vec![0; 3 + 2 * labels.len()];
1366
let mut row = String::new();
1367
let mut output_row = |row_kind: Row, mode: Mode| {
1368
let mut column_index = 0;
1369
row.clear();
1370
1371
macro_rules! output_cell_impl {
1372
($fill:literal, $span:literal, $($cell_fmt:tt)*) => {
1373
let column_start = row.len();
1374
{
1375
row.push('|');
1376
write!(row, $($cell_fmt)*).unwrap();
1377
}
1378
1379
let next_column_index = column_index + $span;
1380
let expected_width: usize = widths[column_index..next_column_index].iter().sum();
1381
if mode == Mode::Measure {
1382
let actual_width = row.len() - column_start;
1383
if actual_width > expected_width {
1384
widths[next_column_index - 1] += actual_width - expected_width;
1385
}
1386
} else {
1387
let column_end = column_start + expected_width;
1388
while row.len() != column_end {
1389
row.push($fill);
1390
}
1391
}
1392
column_index = next_column_index;
1393
};
1394
}
1395
macro_rules! output_cell {
1396
($($cell_fmt:tt)*) => {
1397
output_cell_impl!(' ', 1, $($cell_fmt)*);
1398
};
1399
}
1400
1401
match row_kind {
1402
Row::Head => {
1403
output_cell!("BB");
1404
output_cell!("Inst");
1405
output_cell!("IP");
1406
for label in &labels {
1407
output_cell_impl!(' ', 2, "{:?}", ValueLabel::from_u32(*label));
1408
}
1409
}
1410
Row::Line => {
1411
debug_assert!(mode == Mode::Emit);
1412
for _ in 0..3 {
1413
output_cell_impl!('-', 1, "");
1414
}
1415
for _ in &labels {
1416
output_cell_impl!('-', 2, "");
1417
}
1418
}
1419
Row::Inst(block_index, inst_index) => {
1420
debug_assert!(inst_index < self.num_insts());
1421
if self.block_ranges.get(block_index).start == inst_index {
1422
output_cell!("B{}", block_index);
1423
} else {
1424
output_cell!("");
1425
}
1426
output_cell!("Inst {inst_index} ");
1427
output_cell!("{} ", inst_offsets[inst_index]);
1428
1429
for label in &labels {
1430
// First, the VReg.
1431
use regalloc2::Inst;
1432
let vreg_cmp = |inst: usize,
1433
vreg_label: &u32,
1434
range_start: &Inst,
1435
range_end: &Inst| {
1436
match vreg_label.cmp(&label) {
1437
Ordering::Equal => {
1438
if range_end.index() <= inst {
1439
Ordering::Less
1440
} else if range_start.index() > inst {
1441
Ordering::Greater
1442
} else {
1443
Ordering::Equal
1444
}
1445
}
1446
cmp => cmp,
1447
}
1448
};
1449
let vreg_index =
1450
vregs.binary_search_by(|(l, s, e, _)| vreg_cmp(inst_index, l, s, e));
1451
if let Ok(vreg_index) = vreg_index {
1452
let mut prev_vreg = None;
1453
if inst_index > 0 {
1454
let prev_vreg_index = vregs.binary_search_by(|(l, s, e, _)| {
1455
vreg_cmp(inst_index - 1, l, s, e)
1456
});
1457
if let Ok(prev_vreg_index) = prev_vreg_index {
1458
prev_vreg = Some(vregs[prev_vreg_index].3);
1459
}
1460
}
1461
1462
let vreg = vregs[vreg_index].3;
1463
if Some(vreg) == prev_vreg {
1464
output_cell!("*");
1465
} else {
1466
output_cell!("{}", vreg);
1467
}
1468
} else {
1469
output_cell!("");
1470
}
1471
1472
// Second, the allocated location.
1473
let inst_prog_point = ProgPoint::before(Inst::new(inst_index));
1474
let range_index = regalloc.debug_locations.binary_search_by(
1475
|(range_label, range_start, range_end, _)| match range_label.cmp(label)
1476
{
1477
Ordering::Equal => {
1478
if *range_end <= inst_prog_point {
1479
Ordering::Less
1480
} else if *range_start > inst_prog_point {
1481
Ordering::Greater
1482
} else {
1483
Ordering::Equal
1484
}
1485
}
1486
cmp => cmp,
1487
},
1488
);
1489
if let Ok(range_index) = range_index {
1490
// Live at this instruction, print the location.
1491
if let Some(reg) = regalloc.debug_locations[range_index].3.as_reg() {
1492
output_cell!("{:?}", Reg::from(reg));
1493
} else {
1494
output_cell!("Stk");
1495
}
1496
} else {
1497
// Not live at this instruction.
1498
output_cell!("");
1499
}
1500
}
1501
}
1502
}
1503
row.push('|');
1504
1505
if mode == Mode::Emit {
1506
trace!("{}", row.as_str());
1507
}
1508
};
1509
1510
for block_index in 0..self.num_blocks() {
1511
for inst_index in self.block_ranges.get(block_index) {
1512
output_row(Row::Inst(block_index, inst_index), Mode::Measure);
1513
}
1514
}
1515
output_row(Row::Head, Mode::Measure);
1516
1517
output_row(Row::Head, Mode::Emit);
1518
output_row(Row::Line, Mode::Emit);
1519
for block_index in 0..self.num_blocks() {
1520
for inst_index in self.block_ranges.get(block_index) {
1521
output_row(Row::Inst(block_index, inst_index), Mode::Emit);
1522
}
1523
}
1524
}
1525
1526
/// Get the IR block for a BlockIndex, if one exists.
1527
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
1528
self.block_order.lowered_order()[block.index()].orig_block()
1529
}
1530
1531
/// Get the type of a VReg.
1532
pub fn vreg_type(&self, vreg: VReg) -> Type {
1533
self.vreg_types[vreg.vreg()]
1534
}
1535
1536
/// Get the fact, if any, for a given VReg.
1537
pub fn vreg_fact(&self, vreg: VReg) -> Option<&Fact> {
1538
self.facts[vreg.vreg()].as_ref()
1539
}
1540
1541
/// Set the fact for a given VReg.
1542
pub fn set_vreg_fact(&mut self, vreg: VReg, fact: Fact) {
1543
trace!("set fact on {}: {:?}", vreg, fact);
1544
self.facts[vreg.vreg()] = Some(fact);
1545
}
1546
1547
/// Does a given instruction define any facts?
1548
pub fn inst_defines_facts(&self, inst: InsnIndex) -> bool {
1549
self.inst_operands(inst)
1550
.iter()
1551
.filter(|o| o.kind() == OperandKind::Def)
1552
.map(|o| o.vreg())
1553
.any(|vreg| self.facts[vreg.vreg()].is_some())
1554
}
1555
1556
/// Get the user stack map associated with the given forward instruction index.
1557
pub fn get_user_stack_map(&self, inst: InsnIndex) -> Option<&ir::UserStackMap> {
1558
let index = inst.to_backwards_insn_index(self.num_insts());
1559
self.user_stack_maps.get(&index)
1560
}
1561
}
1562
1563
impl<I: VCodeInst> core::ops::Index<InsnIndex> for VCode<I> {
1564
type Output = I;
1565
fn index(&self, idx: InsnIndex) -> &Self::Output {
1566
&self.insts[idx.index()]
1567
}
1568
}
1569
1570
impl<I: VCodeInst> RegallocFunction for VCode<I> {
1571
fn num_insts(&self) -> usize {
1572
self.insts.len()
1573
}
1574
1575
fn num_blocks(&self) -> usize {
1576
self.block_ranges.len()
1577
}
1578
1579
fn entry_block(&self) -> BlockIndex {
1580
self.entry
1581
}
1582
1583
fn block_insns(&self, block: BlockIndex) -> InstRange {
1584
let range = self.block_ranges.get(block.index());
1585
InstRange::new(InsnIndex::new(range.start), InsnIndex::new(range.end))
1586
}
1587
1588
fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] {
1589
let range = self.block_succ_range.get(block.index());
1590
&self.block_succs[range]
1591
}
1592
1593
fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] {
1594
let range = self.block_pred_range.get(block.index());
1595
&self.block_preds[range]
1596
}
1597
1598
fn block_params(&self, block: BlockIndex) -> &[VReg] {
1599
// As a special case we don't return block params for the entry block, as all the arguments
1600
// will be defined by the `Inst::Args` instruction.
1601
if block == self.entry {
1602
return &[];
1603
}
1604
1605
let range = self.block_params_range.get(block.index());
1606
&self.block_params[range]
1607
}
1608
1609
fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] {
1610
let succ_range = self.branch_block_arg_succ_range.get(block.index());
1611
debug_assert!(succ_idx < succ_range.len());
1612
let branch_block_args = self.branch_block_arg_range.get(succ_range.start + succ_idx);
1613
&self.branch_block_args[branch_block_args]
1614
}
1615
1616
fn is_ret(&self, insn: InsnIndex) -> bool {
1617
match self.insts[insn.index()].is_term() {
1618
// We treat blocks terminated by an unconditional trap like a return for regalloc.
1619
MachTerminator::None => self.insts[insn.index()].is_trap(),
1620
MachTerminator::Ret | MachTerminator::RetCall => true,
1621
MachTerminator::Branch => false,
1622
}
1623
}
1624
1625
fn is_branch(&self, insn: InsnIndex) -> bool {
1626
match self.insts[insn.index()].is_term() {
1627
MachTerminator::Branch => true,
1628
_ => false,
1629
}
1630
}
1631
1632
fn inst_operands(&self, insn: InsnIndex) -> &[Operand] {
1633
let range = self.operand_ranges.get(insn.index());
1634
&self.operands[range]
1635
}
1636
1637
fn inst_clobbers(&self, insn: InsnIndex) -> PRegSet {
1638
self.clobbers.get(&insn).cloned().unwrap_or_default()
1639
}
1640
1641
fn num_vregs(&self) -> usize {
1642
self.vreg_types.len()
1643
}
1644
1645
fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] {
1646
&self.debug_value_labels
1647
}
1648
1649
fn spillslot_size(&self, regclass: RegClass) -> usize {
1650
self.abi.get_spillslot_size(regclass) as usize
1651
}
1652
1653
fn allow_multiple_vreg_defs(&self) -> bool {
1654
// At least the s390x backend requires this, because the
1655
// `Loop` pseudo-instruction aggregates all Operands so pinned
1656
// vregs (RealRegs) may occur more than once.
1657
true
1658
}
1659
}
1660
1661
impl<I: VCodeInst> Debug for VRegAllocator<I> {
1662
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1663
writeln!(f, "VRegAllocator {{")?;
1664
1665
let mut alias_keys = self.vreg_aliases.keys().cloned().collect::<Vec<_>>();
1666
alias_keys.sort_unstable();
1667
for key in alias_keys {
1668
let dest = self.vreg_aliases.get(&key).unwrap();
1669
writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?;
1670
}
1671
1672
for (vreg, fact) in self.facts.iter().enumerate() {
1673
if let Some(fact) = fact {
1674
writeln!(f, " v{vreg} ! {fact}")?;
1675
}
1676
}
1677
1678
writeln!(f, "}}")
1679
}
1680
}
1681
1682
impl<I: VCodeInst> fmt::Debug for VCode<I> {
1683
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1684
writeln!(f, "VCode {{")?;
1685
writeln!(f, " Entry block: {}", self.entry.index())?;
1686
1687
let mut state = Default::default();
1688
1689
for block in 0..self.num_blocks() {
1690
let block = BlockIndex::new(block);
1691
writeln!(
1692
f,
1693
"Block {}({:?}):",
1694
block.index(),
1695
self.block_params(block)
1696
)?;
1697
if let Some(bb) = self.bindex_to_bb(block) {
1698
writeln!(f, " (original IR block: {bb})")?;
1699
}
1700
for (succ_idx, succ) in self.block_succs(block).iter().enumerate() {
1701
writeln!(
1702
f,
1703
" (successor: Block {}({:?}))",
1704
succ.index(),
1705
self.branch_blockparams(block, InsnIndex::new(0) /* dummy */, succ_idx)
1706
)?;
1707
}
1708
for inst in self.block_ranges.get(block.index()) {
1709
writeln!(
1710
f,
1711
" Inst {}: {}",
1712
inst,
1713
self.insts[inst].pretty_print_inst(&mut state)
1714
)?;
1715
if !self.operands.is_empty() {
1716
for operand in self.inst_operands(InsnIndex::new(inst)) {
1717
if operand.kind() == OperandKind::Def {
1718
if let Some(fact) = &self.facts[operand.vreg().vreg()] {
1719
writeln!(f, " v{} ! {}", operand.vreg().vreg(), fact)?;
1720
}
1721
}
1722
}
1723
}
1724
if let Some(user_stack_map) = self.get_user_stack_map(InsnIndex::new(inst)) {
1725
writeln!(f, " {user_stack_map:?}")?;
1726
}
1727
}
1728
}
1729
1730
writeln!(f, "}}")?;
1731
Ok(())
1732
}
1733
}
1734
1735
/// This structure manages VReg allocation during the lifetime of the VCodeBuilder.
1736
pub struct VRegAllocator<I> {
1737
/// VReg IR-level types.
1738
vreg_types: Vec<Type>,
1739
1740
/// VReg aliases. When the final VCode is built we rewrite all
1741
/// uses of the keys in this table to their replacement values.
1742
///
1743
/// We use these aliases to rename an instruction's expected
1744
/// result vregs to the returned vregs from lowering, which are
1745
/// usually freshly-allocated temps.
1746
vreg_aliases: FxHashMap<regalloc2::VReg, regalloc2::VReg>,
1747
1748
/// A deferred error, to be bubbled up to the top level of the
1749
/// lowering algorithm. We take this approach because we cannot
1750
/// currently propagate a `Result` upward through ISLE code (the
1751
/// lowering rules) or some ABI code.
1752
deferred_error: Option<CodegenError>,
1753
1754
/// Facts on VRegs, for proof-carrying code.
1755
facts: Vec<Option<Fact>>,
1756
1757
/// The type of instruction that this allocator makes registers for.
1758
_inst: core::marker::PhantomData<I>,
1759
}
1760
1761
impl<I: VCodeInst> VRegAllocator<I> {
1762
/// Make a new VRegAllocator.
1763
pub fn with_capacity(capacity: usize) -> Self {
1764
let capacity = first_user_vreg_index() + capacity;
1765
let mut vreg_types = Vec::with_capacity(capacity);
1766
vreg_types.resize(first_user_vreg_index(), types::INVALID);
1767
Self {
1768
vreg_types,
1769
vreg_aliases: FxHashMap::with_capacity_and_hasher(capacity, Default::default()),
1770
deferred_error: None,
1771
facts: Vec::with_capacity(capacity),
1772
_inst: core::marker::PhantomData::default(),
1773
}
1774
}
1775
1776
/// Allocate a fresh ValueRegs.
1777
pub fn alloc(&mut self, ty: Type) -> CodegenResult<ValueRegs<Reg>> {
1778
if self.deferred_error.is_some() {
1779
return Err(CodegenError::CodeTooLarge);
1780
}
1781
let v = self.vreg_types.len();
1782
let (regclasses, tys) = I::rc_for_type(ty)?;
1783
if v + regclasses.len() >= VReg::MAX {
1784
return Err(CodegenError::CodeTooLarge);
1785
}
1786
1787
let regs: ValueRegs<Reg> = match regclasses {
1788
&[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
1789
&[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
1790
// We can extend this if/when we support 32-bit targets; e.g.,
1791
// an i128 on a 32-bit machine will need up to four machine regs
1792
// for a `Value`.
1793
_ => panic!("Value must reside in 1 or 2 registers"),
1794
};
1795
for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
1796
let vreg = reg.to_virtual_reg().unwrap();
1797
debug_assert_eq!(self.vreg_types.len(), vreg.index());
1798
self.vreg_types.push(reg_ty);
1799
}
1800
1801
// Create empty facts for each allocated vreg.
1802
self.facts.resize(self.vreg_types.len(), None);
1803
1804
Ok(regs)
1805
}
1806
1807
/// Allocate a fresh ValueRegs, deferring any out-of-vregs
1808
/// errors. This is useful in places where we cannot bubble a
1809
/// `CodegenResult` upward easily, and which are known to be
1810
/// invoked from within the lowering loop that checks the deferred
1811
/// error status below.
1812
pub fn alloc_with_deferred_error(&mut self, ty: Type) -> ValueRegs<Reg> {
1813
match self.alloc(ty) {
1814
Ok(x) => x,
1815
Err(e) => {
1816
self.deferred_error = Some(e);
1817
self.bogus_for_deferred_error(ty)
1818
}
1819
}
1820
}
1821
1822
/// Take any deferred error that was accumulated by `alloc_with_deferred_error`.
1823
pub fn take_deferred_error(&mut self) -> Option<CodegenError> {
1824
self.deferred_error.take()
1825
}
1826
1827
/// Produce an bogus VReg placeholder with the proper number of
1828
/// registers for the given type. This is meant to be used with
1829
/// deferred allocation errors (see `Lower::alloc_tmp()`).
1830
fn bogus_for_deferred_error(&self, ty: Type) -> ValueRegs<Reg> {
1831
let (regclasses, _tys) = I::rc_for_type(ty).expect("must have valid type");
1832
match regclasses {
1833
&[rc0] => ValueRegs::one(VReg::new(0, rc0).into()),
1834
&[rc0, rc1] => ValueRegs::two(VReg::new(0, rc0).into(), VReg::new(1, rc1).into()),
1835
_ => panic!("Value must reside in 1 or 2 registers"),
1836
}
1837
}
1838
1839
/// Rewrite any mention of `from` into `to`.
1840
pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
1841
let from = from.into();
1842
let resolved_to = self.resolve_vreg_alias(to.into());
1843
// Disallow cycles (see below).
1844
assert_ne!(resolved_to, from);
1845
1846
// Maintain the invariant that PCC facts only exist on vregs
1847
// which aren't aliases. We want to preserve whatever was
1848
// stated about the vreg before its producer was lowered.
1849
if let Some(fact) = self.facts[from.vreg()].take() {
1850
self.set_fact(resolved_to, fact);
1851
}
1852
1853
let old_alias = self.vreg_aliases.insert(from, resolved_to);
1854
debug_assert_eq!(old_alias, None);
1855
}
1856
1857
fn resolve_vreg_alias(&self, mut vreg: regalloc2::VReg) -> regalloc2::VReg {
1858
// We prevent cycles from existing by resolving targets of
1859
// aliases eagerly before setting them. If the target resolves
1860
// to the origin of the alias, then a cycle would be created
1861
// and the alias is disallowed. Because of the structure of
1862
// SSA code (one instruction can refer to another's defs but
1863
// not vice-versa, except indirectly through
1864
// phis/blockparams), cycles should not occur as we use
1865
// aliases to redirect vregs to the temps that actually define
1866
// them.
1867
while let Some(to) = self.vreg_aliases.get(&vreg) {
1868
vreg = *to;
1869
}
1870
vreg
1871
}
1872
1873
#[inline]
1874
fn debug_assert_no_vreg_aliases(&self, mut list: impl Iterator<Item = VReg>) {
1875
debug_assert!(list.all(|vreg| !self.vreg_aliases.contains_key(&vreg)));
1876
}
1877
1878
/// Set the proof-carrying code fact on a given virtual register.
1879
///
1880
/// Returns the old fact, if any (only one fact can be stored).
1881
fn set_fact(&mut self, vreg: regalloc2::VReg, fact: Fact) -> Option<Fact> {
1882
trace!("vreg {:?} has fact: {:?}", vreg, fact);
1883
debug_assert!(!self.vreg_aliases.contains_key(&vreg));
1884
self.facts[vreg.vreg()].replace(fact)
1885
}
1886
1887
/// Set a fact only if one doesn't already exist.
1888
pub fn set_fact_if_missing(&mut self, vreg: VirtualReg, fact: Fact) {
1889
let vreg = self.resolve_vreg_alias(vreg.into());
1890
if self.facts[vreg.vreg()].is_none() {
1891
self.set_fact(vreg, fact);
1892
}
1893
}
1894
1895
/// Allocate a fresh ValueRegs, with a given fact to apply if
1896
/// the value fits in one VReg.
1897
pub fn alloc_with_maybe_fact(
1898
&mut self,
1899
ty: Type,
1900
fact: Option<Fact>,
1901
) -> CodegenResult<ValueRegs<Reg>> {
1902
let result = self.alloc(ty)?;
1903
1904
// Ensure that we don't lose a fact on a value that splits
1905
// into multiple VRegs.
1906
assert!(result.len() == 1 || fact.is_none());
1907
if let Some(fact) = fact {
1908
self.set_fact(result.regs()[0].into(), fact);
1909
}
1910
1911
Ok(result)
1912
}
1913
}
1914
1915
/// This structure tracks the large constants used in VCode that will be emitted separately by the
1916
/// [MachBuffer].
1917
///
1918
/// First, during the lowering phase, constants are inserted using
1919
/// [VCodeConstants.insert]; an intermediate handle, `VCodeConstant`, tracks what constants are
1920
/// used in this phase. Some deduplication is performed, when possible, as constant
1921
/// values are inserted.
1922
///
1923
/// Secondly, during the emission phase, the [MachBuffer] assigns [MachLabel]s for each of the
1924
/// constants so that instructions can refer to the value's memory location. The [MachBuffer]
1925
/// then writes the constant values to the buffer.
1926
#[derive(Default)]
1927
pub struct VCodeConstants {
1928
constants: PrimaryMap<VCodeConstant, VCodeConstantData>,
1929
pool_uses: HashMap<Constant, VCodeConstant>,
1930
well_known_uses: HashMap<*const [u8], VCodeConstant>,
1931
u64s: HashMap<[u8; 8], VCodeConstant>,
1932
}
1933
impl VCodeConstants {
1934
/// Initialize the structure with the expected number of constants.
1935
pub fn with_capacity(expected_num_constants: usize) -> Self {
1936
Self {
1937
constants: PrimaryMap::with_capacity(expected_num_constants),
1938
pool_uses: HashMap::with_capacity(expected_num_constants),
1939
well_known_uses: HashMap::new(),
1940
u64s: HashMap::new(),
1941
}
1942
}
1943
1944
/// Insert a constant; using this method indicates that a constant value will be used and thus
1945
/// will be emitted to the `MachBuffer`. The current implementation can deduplicate constants
1946
/// that are [VCodeConstantData::Pool] or [VCodeConstantData::WellKnown] but not
1947
/// [VCodeConstantData::Generated].
1948
pub fn insert(&mut self, data: VCodeConstantData) -> VCodeConstant {
1949
match data {
1950
VCodeConstantData::Generated(_) => self.constants.push(data),
1951
VCodeConstantData::Pool(constant, _) => match self.pool_uses.get(&constant) {
1952
None => {
1953
let vcode_constant = self.constants.push(data);
1954
self.pool_uses.insert(constant, vcode_constant);
1955
vcode_constant
1956
}
1957
Some(&vcode_constant) => vcode_constant,
1958
},
1959
VCodeConstantData::WellKnown(data_ref) => {
1960
match self.well_known_uses.entry(data_ref as *const [u8]) {
1961
Entry::Vacant(v) => {
1962
let vcode_constant = self.constants.push(data);
1963
v.insert(vcode_constant);
1964
vcode_constant
1965
}
1966
Entry::Occupied(o) => *o.get(),
1967
}
1968
}
1969
VCodeConstantData::U64(value) => match self.u64s.entry(value) {
1970
Entry::Vacant(v) => {
1971
let vcode_constant = self.constants.push(data);
1972
v.insert(vcode_constant);
1973
vcode_constant
1974
}
1975
Entry::Occupied(o) => *o.get(),
1976
},
1977
}
1978
}
1979
1980
/// Return the number of constants inserted.
1981
pub fn len(&self) -> usize {
1982
self.constants.len()
1983
}
1984
1985
/// Iterate over the `VCodeConstant` keys inserted in this structure.
1986
pub fn keys(&self) -> Keys<VCodeConstant> {
1987
self.constants.keys()
1988
}
1989
1990
/// Iterate over the `VCodeConstant` keys and the data (as a byte slice) inserted in this
1991
/// structure.
1992
pub fn iter(&self) -> impl Iterator<Item = (VCodeConstant, &VCodeConstantData)> {
1993
self.constants.iter()
1994
}
1995
1996
/// Returns the data associated with the specified constant.
1997
pub fn get(&self, c: VCodeConstant) -> &VCodeConstantData {
1998
&self.constants[c]
1999
}
2000
2001
/// Checks if the given [VCodeConstantData] is registered as
2002
/// used by the pool.
2003
pub fn pool_uses(&self, constant: &VCodeConstantData) -> bool {
2004
match constant {
2005
VCodeConstantData::Pool(c, _) => self.pool_uses.contains_key(c),
2006
_ => false,
2007
}
2008
}
2009
}
2010
2011
/// A use of a constant by one or more VCode instructions; see [VCodeConstants].
2012
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2013
pub struct VCodeConstant(u32);
2014
entity_impl!(VCodeConstant);
2015
2016
/// Identify the different types of constant that can be inserted into [VCodeConstants]. Tracking
2017
/// these separately instead of as raw byte buffers allows us to avoid some duplication.
2018
pub enum VCodeConstantData {
2019
/// A constant already present in the Cranelift IR
2020
/// [ConstantPool](crate::ir::constant::ConstantPool).
2021
Pool(Constant, ConstantData),
2022
/// A reference to a well-known constant value that is statically encoded within the compiler.
2023
WellKnown(&'static [u8]),
2024
/// A constant value generated during lowering; the value may depend on the instruction context
2025
/// which makes it difficult to de-duplicate--if possible, use other variants.
2026
Generated(ConstantData),
2027
/// A constant of at most 64 bits. These are deduplicated as
2028
/// well. Stored as a fixed-size array of `u8` so that we do not
2029
/// encounter endianness problems when cross-compiling.
2030
U64([u8; 8]),
2031
}
2032
impl VCodeConstantData {
2033
/// Retrieve the constant data as a byte slice.
2034
pub fn as_slice(&self) -> &[u8] {
2035
match self {
2036
VCodeConstantData::Pool(_, d) | VCodeConstantData::Generated(d) => d.as_slice(),
2037
VCodeConstantData::WellKnown(d) => d,
2038
VCodeConstantData::U64(value) => &value[..],
2039
}
2040
}
2041
2042
/// Calculate the alignment of the constant data.
2043
pub fn alignment(&self) -> u32 {
2044
if self.as_slice().len() <= 8 { 8 } else { 16 }
2045
}
2046
}
2047
2048
#[cfg(test)]
2049
mod test {
2050
use super::*;
2051
use core::mem::size_of;
2052
2053
#[test]
2054
fn size_of_constant_structs() {
2055
assert_eq!(size_of::<Constant>(), 4);
2056
assert_eq!(size_of::<VCodeConstant>(), 4);
2057
assert_eq!(size_of::<ConstantData>(), 3 * size_of::<usize>());
2058
assert_eq!(size_of::<VCodeConstantData>(), 4 * size_of::<usize>());
2059
assert_eq!(
2060
size_of::<PrimaryMap<VCodeConstant, VCodeConstantData>>(),
2061
3 * size_of::<usize>()
2062
);
2063
// TODO The VCodeConstants structure's memory size could be further optimized.
2064
// With certain versions of Rust, each `HashMap` in `VCodeConstants` occupied at
2065
// least 48 bytes, making an empty `VCodeConstants` cost 120 bytes.
2066
}
2067
}
2068
2069