Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/egraph/elaborate.rs
3072 views
1
//! Elaboration phase: lowers EGraph back to sequences of operations
2
//! in CFG nodes.
3
4
use super::Stats;
5
use super::cost::Cost;
6
use crate::ctxhash::NullCtx;
7
use crate::dominator_tree::DominatorTree;
8
use crate::hash_map::Entry as HashEntry;
9
use crate::inst_predicates::is_pure_for_egraph;
10
use crate::ir::{Block, Function, Inst, Value, ValueDef};
11
use crate::loop_analysis::{Loop, LoopAnalysis};
12
use crate::scoped_hash_map::ScopedHashMap;
13
use crate::trace;
14
use crate::{FxHashMap, FxHashSet};
15
use alloc::vec::Vec;
16
use cranelift_control::ControlPlane;
17
use cranelift_entity::{EntitySet, SecondaryMap, packed_option::ReservedValue};
18
use smallvec::{SmallVec, smallvec};
19
20
pub(crate) struct Elaborator<'a> {
21
func: &'a mut Function,
22
domtree: &'a DominatorTree,
23
loop_analysis: &'a LoopAnalysis,
24
/// Map from Value that is produced by a pure Inst (and was thus
25
/// not in the side-effecting skeleton) to the value produced by
26
/// an elaborated inst (placed in the layout) to whose results we
27
/// refer in the final code.
28
///
29
/// The first time we use some result of an instruction during
30
/// elaboration, we can place it and insert an identity map (inst
31
/// results to that same inst's results) in this scoped
32
/// map. Within that block and its dom-tree children, that mapping
33
/// is visible and we can continue to use it. This allows us to
34
/// avoid cloning the instruction. However, if we pop that scope
35
/// and use it somewhere else as well, we will need to
36
/// duplicate. We detect this case by checking, when a value that
37
/// we want is not present in this map, whether the producing inst
38
/// is already placed in the Layout. If so, we duplicate, and
39
/// insert non-identity mappings from the original inst's results
40
/// to the cloned inst's results.
41
///
42
/// Note that as values may refer to unions that represent a subset
43
/// of a larger eclass, it's not valid to walk towards the root of a
44
/// union tree: doing so would potentially equate values that fall
45
/// on different branches of the dominator tree.
46
value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
47
/// Map from Value to the best (lowest-cost) Value in its eclass
48
/// (tree of union value-nodes).
49
value_to_best_value: SecondaryMap<Value, BestEntry>,
50
/// Stack of blocks and loops in current elaboration path.
51
loop_stack: SmallVec<[LoopStackEntry; 8]>,
52
/// The current block into which we are elaborating.
53
cur_block: Block,
54
/// Values that opt rules have indicated should be rematerialized
55
/// in every block they are used (e.g., immediates or other
56
/// "cheap-to-compute" ops).
57
remat_values: &'a FxHashSet<Value>,
58
/// Explicitly-unrolled value elaboration stack.
59
elab_stack: Vec<ElabStackEntry>,
60
/// Results from the elab stack.
61
elab_result_stack: Vec<ElaboratedValue>,
62
/// Explicitly-unrolled block elaboration stack.
63
block_stack: Vec<BlockStackEntry>,
64
/// Copies of values that have been rematerialized.
65
remat_copies: FxHashMap<(Block, Value), Value>,
66
/// Stats for various events during egraph processing, to help
67
/// with optimization of this infrastructure.
68
stats: &'a mut Stats,
69
/// Chaos-mode control-plane so we can test that we still get
70
/// correct results when our heuristics make bad decisions.
71
ctrl_plane: &'a mut ControlPlane,
72
}
73
74
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
75
struct BestEntry(Cost, Value);
76
77
impl PartialOrd for BestEntry {
78
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
79
Some(self.cmp(other))
80
}
81
}
82
83
impl Ord for BestEntry {
84
#[inline]
85
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
86
self.0.cmp(&other.0).then_with(|| {
87
// Note that this comparison is reversed. When costs are equal,
88
// prefer the value with the bigger index. This is a heuristic that
89
// prefers results of rewrites to the original value, since we
90
// expect that our rewrites are generally improvements.
91
self.1.cmp(&other.1).reverse()
92
})
93
}
94
}
95
96
#[derive(Clone, Copy, Debug)]
97
struct ElaboratedValue {
98
in_block: Block,
99
value: Value,
100
}
101
102
#[derive(Clone, Debug)]
103
struct LoopStackEntry {
104
/// The loop identifier.
105
lp: Loop,
106
/// The hoist point: a block that immediately dominates this
107
/// loop. May not be an immediate predecessor, but will be a valid
108
/// point to place all loop-invariant ops: they must depend only
109
/// on inputs that dominate the loop, so are available at (the end
110
/// of) this block.
111
hoist_block: Block,
112
/// The depth in the scope map.
113
scope_depth: u32,
114
}
115
116
#[derive(Clone, Debug)]
117
enum ElabStackEntry {
118
/// Next action is to resolve this value into an elaborated inst
119
/// (placed into the layout) that produces the value, and
120
/// recursively elaborate the insts that produce its args.
121
///
122
/// Any inserted ops should be inserted before `before`, which is
123
/// the instruction demanding this value.
124
Start { value: Value, before: Inst },
125
/// Args have been pushed; waiting for results.
126
PendingInst {
127
inst: Inst,
128
result_idx: usize,
129
num_args: usize,
130
before: Inst,
131
},
132
}
133
134
#[derive(Clone, Debug)]
135
enum BlockStackEntry {
136
Elaborate { block: Block, idom: Option<Block> },
137
Pop,
138
}
139
140
impl<'a> Elaborator<'a> {
141
pub(crate) fn new(
142
func: &'a mut Function,
143
domtree: &'a DominatorTree,
144
loop_analysis: &'a LoopAnalysis,
145
remat_values: &'a FxHashSet<Value>,
146
stats: &'a mut Stats,
147
ctrl_plane: &'a mut ControlPlane,
148
) -> Self {
149
let num_values = func.dfg.num_values();
150
let mut value_to_best_value =
151
SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
152
value_to_best_value.resize(num_values);
153
Self {
154
func,
155
domtree,
156
loop_analysis,
157
value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
158
value_to_best_value,
159
loop_stack: smallvec![],
160
cur_block: Block::reserved_value(),
161
remat_values,
162
elab_stack: vec![],
163
elab_result_stack: vec![],
164
block_stack: vec![],
165
remat_copies: FxHashMap::default(),
166
stats,
167
ctrl_plane,
168
}
169
}
170
171
fn start_block(&mut self, idom: Option<Block>, block: Block) {
172
trace!(
173
"start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
174
block,
175
idom,
176
self.loop_stack.len(),
177
self.value_to_elaborated_value.depth()
178
);
179
180
// Pop any loop levels we're no longer in.
181
while let Some(inner_loop) = self.loop_stack.last() {
182
if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
183
break;
184
}
185
self.loop_stack.pop();
186
}
187
188
// Note that if the *entry* block is a loop header, we will
189
// not make note of the loop here because it will not have an
190
// immediate dominator. We must disallow this case because we
191
// will skip adding the `LoopStackEntry` here but our
192
// `LoopAnalysis` will otherwise still make note of this loop
193
// and loop depths will not match.
194
if let Some(idom) = idom {
195
if let Some(lp) = self.loop_analysis.is_loop_header(block) {
196
self.loop_stack.push(LoopStackEntry {
197
lp,
198
// Any code hoisted out of this loop will have code
199
// placed in `idom`, and will have def mappings
200
// inserted in to the scoped hashmap at that block's
201
// level.
202
hoist_block: idom,
203
scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
204
});
205
trace!(
206
" -> loop header, pushing; depth now {}",
207
self.loop_stack.len()
208
);
209
}
210
} else {
211
debug_assert!(
212
self.loop_analysis.is_loop_header(block).is_none(),
213
"Entry block (domtree root) cannot be a loop header!"
214
);
215
}
216
217
trace!("block {}: loop stack is {:?}", block, self.loop_stack);
218
219
self.cur_block = block;
220
}
221
222
fn topo_sorted_values(&self) -> Vec<Value> {
223
#[derive(Debug)]
224
enum Event {
225
Enter,
226
Exit,
227
}
228
let mut stack = Vec::<(Event, Value)>::new();
229
230
// Traverse the CFG in pre-order so that, when we look at the
231
// instructions and operands inside each block, we see value defs before
232
// uses.
233
for block in crate::traversals::Dfs::new().pre_order_iter(&self.func) {
234
for inst in self.func.layout.block_insts(block) {
235
stack.extend(self.func.dfg.inst_values(inst).map(|v| (Event::Enter, v)));
236
}
237
}
238
239
// We pushed in the desired order, so popping would implicitly reverse
240
// that. Avoid that by reversing the initial stack before we start
241
// traversing the DFG.
242
stack.reverse();
243
244
let mut sorted = Vec::with_capacity(self.func.dfg.values().len());
245
let mut seen = EntitySet::<Value>::with_capacity(self.func.dfg.values().len());
246
247
// Post-order traversal of the DFG, visiting value defs before uses.
248
while let Some((event, value)) = stack.pop() {
249
match event {
250
Event::Enter => {
251
if seen.insert(value) {
252
stack.push((Event::Exit, value));
253
match self.func.dfg.value_def(value) {
254
ValueDef::Result(inst, _) => {
255
stack.extend(
256
self.func
257
.dfg
258
.inst_values(inst)
259
.rev()
260
.filter(|v| !seen.contains(*v))
261
.map(|v| (Event::Enter, v)),
262
);
263
}
264
ValueDef::Union(a, b) => {
265
if !seen.contains(b) {
266
stack.push((Event::Enter, b));
267
}
268
if !seen.contains(a) {
269
stack.push((Event::Enter, a));
270
}
271
}
272
ValueDef::Param(..) => {}
273
}
274
}
275
}
276
Event::Exit => {
277
sorted.push(value);
278
}
279
}
280
}
281
282
sorted
283
}
284
285
fn compute_best_values(&mut self) {
286
let sorted_values = self.topo_sorted_values();
287
288
let best = &mut self.value_to_best_value;
289
290
// We can't make random decisions inside the fixpoint loop below because
291
// that could cause values to change on every iteration of the loop,
292
// which would make the loop never terminate. So in chaos testing
293
// mode we need a form of making suboptimal decisions that is fully
294
// deterministic. We choose to simply make the worst decision we know
295
// how to do instead of the best.
296
let use_worst = self.ctrl_plane.get_decision();
297
298
trace!(
299
"Computing the {} values for each eclass",
300
if use_worst {
301
"worst (chaos mode)"
302
} else {
303
"best"
304
}
305
);
306
307
// Because the values are topologically sorted, we know that we will see
308
// defs before uses, so an instruction's operands' costs will already be
309
// computed by the time we are computing the cost for the current value
310
// and its instruction.
311
for value in sorted_values.iter().copied() {
312
let def = self.func.dfg.value_def(value);
313
trace!("computing best for value {:?} def {:?}", value, def);
314
315
match def {
316
// Pick the best of the two options based on min-cost. This
317
// works because each element of `best` is a `(cost, value)`
318
// tuple; `cost` comes first so the natural comparison works
319
// based on cost, and breaks ties based on value number.
320
ValueDef::Union(x, y) => {
321
debug_assert!(!best[x].1.is_reserved_value());
322
debug_assert!(!best[y].1.is_reserved_value());
323
best[value] = if use_worst {
324
core::cmp::max(best[x], best[y])
325
} else {
326
core::cmp::min(best[x], best[y])
327
};
328
trace!(
329
" -> best of union({:?}, {:?}) = {:?}",
330
best[x], best[y], best[value]
331
);
332
}
333
334
ValueDef::Param(_, _) => {
335
best[value] = BestEntry(Cost::zero(), value);
336
}
337
338
// If the Inst is inserted into the layout (which is,
339
// at this point, only the side-effecting skeleton),
340
// then it must be computed and thus we give it zero
341
// cost.
342
ValueDef::Result(inst, _) => {
343
if let Some(_) = self.func.layout.inst_block(inst) {
344
best[value] = BestEntry(Cost::zero(), value);
345
} else {
346
let inst_data = &self.func.dfg.insts[inst];
347
// N.B.: at this point we know that the opcode is
348
// pure, so `pure_op_cost`'s precondition is
349
// satisfied.
350
let cost = Cost::of_pure_op(
351
inst_data.opcode(),
352
self.func.dfg.inst_values(inst).map(|value| {
353
debug_assert!(!best[value].1.is_reserved_value());
354
best[value].0
355
}),
356
);
357
best[value] = BestEntry(cost, value);
358
trace!(" -> cost of value {} = {:?}", value, cost);
359
}
360
}
361
};
362
363
// You might be expecting an assert that the best cost we just
364
// computed is not infinity, however infinite cost *can* happen in
365
// practice. First, note that our cost function doesn't know about
366
// any shared structure in the dataflow graph, it only sums operand
367
// costs. (And trying to avoid that by deduping a single operation's
368
// operands is a losing game because you can always just add one
369
// indirection and go from `add(x, x)` to `add(foo(x), bar(x))` to
370
// hide the shared structure.) Given that blindness to sharing, we
371
// can make cost grow exponentially with a linear sequence of
372
// operations:
373
//
374
// v0 = iconst.i32 1 ;; cost = 1
375
// v1 = iadd v0, v0 ;; cost = 3 + 1 + 1
376
// v2 = iadd v1, v1 ;; cost = 3 + 5 + 5
377
// v3 = iadd v2, v2 ;; cost = 3 + 13 + 13
378
// v4 = iadd v3, v3 ;; cost = 3 + 29 + 29
379
// v5 = iadd v4, v4 ;; cost = 3 + 61 + 61
380
// v6 = iadd v5, v5 ;; cost = 3 + 125 + 125
381
// ;; etc...
382
//
383
// Such a chain can cause cost to saturate to infinity. How do we
384
// choose which e-node is best when there are multiple that have
385
// saturated to infinity? It doesn't matter. As long as invariant
386
// (2) for optimization rules is upheld by our rule set (see
387
// `cranelift/codegen/src/opts/README.md`) it is safe to choose
388
// *any* e-node in the e-class. At worst we will produce suboptimal
389
// code, but never an incorrectness.
390
}
391
}
392
393
/// Elaborate use of an eclass, inserting any needed new
394
/// instructions before the given inst `before`. Should only be
395
/// given values corresponding to results of instructions or
396
/// blockparams.
397
fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
398
debug_assert_ne!(value, Value::reserved_value());
399
400
// Kick off the process by requesting this result
401
// value.
402
self.elab_stack
403
.push(ElabStackEntry::Start { value, before });
404
405
// Now run the explicit-stack recursion until we reach
406
// the root.
407
self.process_elab_stack();
408
debug_assert_eq!(self.elab_result_stack.len(), 1);
409
self.elab_result_stack.pop().unwrap()
410
}
411
412
/// Possibly rematerialize the instruction producing the value in
413
/// `arg` and rewrite `arg` to refer to it, if needed. Returns
414
/// `true` if a rewrite occurred.
415
fn maybe_remat_arg(
416
remat_values: &FxHashSet<Value>,
417
func: &mut Function,
418
remat_copies: &mut FxHashMap<(Block, Value), Value>,
419
insert_block: Block,
420
before: Inst,
421
arg: &mut ElaboratedValue,
422
stats: &mut Stats,
423
) -> bool {
424
// TODO (#7313): we may want to consider recursive
425
// rematerialization as well. We could process the arguments of
426
// the rematerialized instruction up to a certain depth. This
427
// would affect, e.g., adds-with-one-constant-arg, which are
428
// currently rematerialized. Right now we don't do this, to
429
// avoid the need for another fixpoint loop here.
430
if arg.in_block != insert_block && remat_values.contains(&arg.value) {
431
let new_value = match remat_copies.entry((insert_block, arg.value)) {
432
HashEntry::Occupied(o) => *o.get(),
433
HashEntry::Vacant(v) => {
434
let inst = func.dfg.value_def(arg.value).inst().unwrap();
435
debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
436
let new_inst = func.dfg.clone_inst(inst);
437
func.layout.insert_inst(new_inst, before);
438
let new_result = func.dfg.inst_results(new_inst)[0];
439
*v.insert(new_result)
440
}
441
};
442
trace!("rematerialized {} as {}", arg.value, new_value);
443
arg.value = new_value;
444
stats.elaborate_remat += 1;
445
true
446
} else {
447
false
448
}
449
}
450
451
fn process_elab_stack(&mut self) {
452
while let Some(entry) = self.elab_stack.pop() {
453
match entry {
454
ElabStackEntry::Start { value, before } => {
455
debug_assert!(self.func.dfg.value_is_real(value));
456
457
self.stats.elaborate_visit_node += 1;
458
459
// Get the best option; we use `value` (latest
460
// value) here so we have a full view of the
461
// eclass.
462
trace!("looking up best value for {}", value);
463
let BestEntry(_, best_value) = self.value_to_best_value[value];
464
trace!("elaborate: value {} -> best {}", value, best_value);
465
debug_assert_ne!(best_value, Value::reserved_value());
466
467
if let Some(elab_val) =
468
self.value_to_elaborated_value.get(&NullCtx, &best_value)
469
{
470
// Value is available; use it.
471
trace!("elaborate: value {} -> {:?}", value, elab_val);
472
self.stats.elaborate_memoize_hit += 1;
473
self.elab_result_stack.push(*elab_val);
474
continue;
475
}
476
477
self.stats.elaborate_memoize_miss += 1;
478
479
// Now resolve the value to its definition to see
480
// how we can compute it.
481
let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
482
ValueDef::Result(inst, result_idx) => {
483
trace!(
484
" -> value {} is result {} of {}",
485
best_value, result_idx, inst
486
);
487
(inst, result_idx)
488
}
489
ValueDef::Param(in_block, _) => {
490
// We don't need to do anything to compute
491
// this value; just push its result on the
492
// result stack (blockparams are already
493
// available).
494
trace!(" -> value {} is a blockparam", best_value);
495
self.elab_result_stack.push(ElaboratedValue {
496
in_block,
497
value: best_value,
498
});
499
continue;
500
}
501
ValueDef::Union(_, _) => {
502
panic!("Should never have a Union value as the best value");
503
}
504
};
505
506
trace!(
507
" -> result {} of inst {:?}",
508
result_idx, self.func.dfg.insts[inst]
509
);
510
511
// We're going to need to use this instruction
512
// result, placing the instruction into the
513
// layout. First, enqueue all args to be
514
// elaborated. Push state to receive the results
515
// and later elab this inst.
516
let num_args = self.func.dfg.inst_values(inst).count();
517
self.elab_stack.push(ElabStackEntry::PendingInst {
518
inst,
519
result_idx,
520
num_args,
521
before,
522
});
523
524
// Push args in reverse order so we process the
525
// first arg first.
526
for arg in self.func.dfg.inst_values(inst).rev() {
527
debug_assert_ne!(arg, Value::reserved_value());
528
self.elab_stack
529
.push(ElabStackEntry::Start { value: arg, before });
530
}
531
}
532
533
ElabStackEntry::PendingInst {
534
inst,
535
result_idx,
536
num_args,
537
before,
538
} => {
539
trace!(
540
"PendingInst: {} result {} args {} before {}",
541
inst, result_idx, num_args, before
542
);
543
544
// We should have all args resolved at this
545
// point. Grab them and drain them out, removing
546
// them.
547
let arg_idx = self.elab_result_stack.len() - num_args;
548
let arg_values = &mut self.elab_result_stack[arg_idx..];
549
550
// Compute max loop depth.
551
//
552
// Note that if there are no arguments then this instruction
553
// is allowed to get hoisted up one loop. This is not
554
// usually used since no-argument values are things like
555
// constants which are typically rematerialized, but for the
556
// `vconst` instruction 128-bit constants aren't as easily
557
// rematerialized. They're hoisted out of inner loops but
558
// not to the function entry which may run the risk of
559
// placing too much register pressure on the entire
560
// function. This is modeled with the `.saturating_sub(1)`
561
// as the default if there's otherwise no maximum.
562
let loop_hoist_level = arg_values
563
.iter()
564
.map(|&value| {
565
// Find the outermost loop level at which
566
// the value's defining block *is not* a
567
// member. This is the loop-nest level
568
// whose hoist-block we hoist to.
569
let hoist_level = self
570
.loop_stack
571
.iter()
572
.position(|loop_entry| {
573
!self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
574
})
575
.unwrap_or(self.loop_stack.len());
576
trace!(
577
" -> arg: elab_value {:?} hoist level {:?}",
578
value, hoist_level
579
);
580
hoist_level
581
})
582
.max()
583
.unwrap_or(self.loop_stack.len().saturating_sub(1));
584
trace!(
585
" -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
586
loop_hoist_level,
587
self.loop_stack.len(),
588
self.loop_stack,
589
);
590
591
// We know that this is a pure inst, because
592
// non-pure roots have already been placed in the
593
// value-to-elab'd-value map, so they will not
594
// reach this stage of processing.
595
//
596
// We now must determine the location at which we
597
// place the instruction. This is the current
598
// block *unless* we hoist above a loop when all
599
// args are loop-invariant (and this op is pure).
600
let (scope_depth, before, insert_block) = if loop_hoist_level
601
== self.loop_stack.len()
602
{
603
// Depends on some value at the current
604
// loop depth, or remat forces it here:
605
// place it at the current location.
606
(
607
self.value_to_elaborated_value.depth(),
608
before,
609
self.func.layout.inst_block(before).unwrap(),
610
)
611
} else {
612
// Does not depend on any args at current
613
// loop depth: hoist out of loop.
614
self.stats.elaborate_licm_hoist += 1;
615
let data = &self.loop_stack[loop_hoist_level];
616
// `data.hoist_block` should dominate `before`'s block.
617
let before_block = self.func.layout.inst_block(before).unwrap();
618
debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));
619
// Determine the instruction at which we
620
// insert in `data.hoist_block`.
621
let before = self.func.layout.last_inst(data.hoist_block).unwrap();
622
(data.scope_depth as usize, before, data.hoist_block)
623
};
624
625
trace!(
626
" -> decided to place: before {} insert_block {}",
627
before, insert_block
628
);
629
630
// Now that we have the location for the
631
// instruction, check if any of its args are remat
632
// values. If so, and if we don't have a copy of
633
// the rematerializing instruction for this block
634
// yet, create one.
635
let mut remat_arg = false;
636
for arg_value in arg_values.iter_mut() {
637
if Self::maybe_remat_arg(
638
&self.remat_values,
639
&mut self.func,
640
&mut self.remat_copies,
641
insert_block,
642
before,
643
arg_value,
644
&mut self.stats,
645
) {
646
remat_arg = true;
647
}
648
}
649
650
// Now we need to place `inst` at the computed
651
// location (just before `before`). Note that
652
// `inst` may already have been placed somewhere
653
// else, because a pure node may be elaborated at
654
// more than one place. In this case, we need to
655
// duplicate the instruction (and return the
656
// `Value`s for that duplicated instance instead).
657
//
658
// Also clone if we rematerialized, because we
659
// don't want to rewrite the args in the original
660
// copy.
661
trace!("need inst {} before {}", inst, before);
662
let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
663
// Clone the inst!
664
let new_inst = self.func.dfg.clone_inst(inst);
665
trace!(
666
" -> inst {} already has a location; cloned to {}",
667
inst, new_inst
668
);
669
// Create mappings in the
670
// value-to-elab'd-value map from original
671
// results to cloned results.
672
for (&result, &new_result) in self
673
.func
674
.dfg
675
.inst_results(inst)
676
.iter()
677
.zip(self.func.dfg.inst_results(new_inst).iter())
678
{
679
let elab_value = ElaboratedValue {
680
value: new_result,
681
in_block: insert_block,
682
};
683
let best_result = self.value_to_best_value[result];
684
self.value_to_elaborated_value.insert_if_absent_with_depth(
685
&NullCtx,
686
best_result.1,
687
elab_value,
688
scope_depth,
689
);
690
691
self.value_to_best_value[new_result] = best_result;
692
693
trace!(
694
" -> cloned inst has new result {} for orig {}",
695
new_result, result
696
);
697
}
698
new_inst
699
} else {
700
trace!(" -> no location; using original inst");
701
// Create identity mappings from result values
702
// to themselves in this scope, since we're
703
// using the original inst.
704
for &result in self.func.dfg.inst_results(inst) {
705
let elab_value = ElaboratedValue {
706
value: result,
707
in_block: insert_block,
708
};
709
let best_result = self.value_to_best_value[result];
710
self.value_to_elaborated_value.insert_if_absent_with_depth(
711
&NullCtx,
712
best_result.1,
713
elab_value,
714
scope_depth,
715
);
716
trace!(" -> inserting identity mapping for {}", result);
717
}
718
inst
719
};
720
721
// Place the inst just before `before`.
722
assert!(
723
is_pure_for_egraph(self.func, inst),
724
"something has gone very wrong if we are elaborating effectful \
725
instructions, they should have remained in the skeleton"
726
);
727
self.func.layout.insert_inst(inst, before);
728
729
// Update the inst's arguments.
730
self.func
731
.dfg
732
.overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
733
734
// Now that we've consumed the arg values, pop
735
// them off the stack.
736
self.elab_result_stack.truncate(arg_idx);
737
738
// Push the requested result index of the
739
// instruction onto the elab-results stack.
740
self.elab_result_stack.push(ElaboratedValue {
741
in_block: insert_block,
742
value: self.func.dfg.inst_results(inst)[result_idx],
743
});
744
}
745
}
746
}
747
}
748
749
fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
750
trace!("elaborate_block: block {}", block);
751
self.start_block(idom, block);
752
753
// Iterate over the side-effecting skeleton using the linked
754
// list in Layout. We will insert instructions that are
755
// elaborated *before* `inst`, so we can always use its
756
// next-link to continue the iteration.
757
let mut next_inst = self.func.layout.first_inst(block);
758
let mut first_branch = None;
759
while let Some(inst) = next_inst {
760
trace!(
761
"elaborating inst {} with results {:?}",
762
inst,
763
self.func.dfg.inst_results(inst)
764
);
765
// Record the first branch we see in the block; all
766
// elaboration for args of *any* branch must be inserted
767
// before the *first* branch, because the branch group
768
// must remain contiguous at the end of the block.
769
if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
770
first_branch = Some(inst);
771
}
772
773
// Determine where elaboration inserts insts.
774
let before = first_branch.unwrap_or(inst);
775
trace!(" -> inserting before {}", before);
776
777
elab_values.extend(self.func.dfg.inst_values(inst));
778
for arg in elab_values.iter_mut() {
779
trace!(" -> arg {}", *arg);
780
// Elaborate the arg, placing any newly-inserted insts
781
// before `before`. Get the updated value, which may
782
// be different than the original.
783
let mut new_arg = self.elaborate_eclass_use(*arg, before);
784
Self::maybe_remat_arg(
785
&self.remat_values,
786
&mut self.func,
787
&mut self.remat_copies,
788
block,
789
inst,
790
&mut new_arg,
791
&mut self.stats,
792
);
793
trace!(" -> rewrote arg to {:?}", new_arg);
794
*arg = new_arg.value;
795
}
796
self.func
797
.dfg
798
.overwrite_inst_values(inst, elab_values.drain(..));
799
800
// We need to put the results of this instruction in the
801
// map now.
802
for &result in self.func.dfg.inst_results(inst) {
803
trace!(" -> result {}", result);
804
let best_result = self.value_to_best_value[result];
805
self.value_to_elaborated_value.insert_if_absent(
806
&NullCtx,
807
best_result.1,
808
ElaboratedValue {
809
in_block: block,
810
value: result,
811
},
812
);
813
}
814
815
next_inst = self.func.layout.next_inst(inst);
816
}
817
}
818
819
fn elaborate_domtree(&mut self, domtree: &DominatorTree) {
820
self.block_stack.push(BlockStackEntry::Elaborate {
821
block: self.func.layout.entry_block().unwrap(),
822
idom: None,
823
});
824
825
// A temporary workspace for elaborate_block, allocated here to maximize the use of the
826
// allocation.
827
let mut elab_values = Vec::new();
828
829
while let Some(top) = self.block_stack.pop() {
830
match top {
831
BlockStackEntry::Elaborate { block, idom } => {
832
self.block_stack.push(BlockStackEntry::Pop);
833
self.value_to_elaborated_value.increment_depth();
834
835
self.elaborate_block(&mut elab_values, idom, block);
836
837
// Push children. We are doing a preorder
838
// traversal so we do this after processing this
839
// block above.
840
let block_stack_end = self.block_stack.len();
841
for child in self.ctrl_plane.shuffled(domtree.children(block)) {
842
self.block_stack.push(BlockStackEntry::Elaborate {
843
block: child,
844
idom: Some(block),
845
});
846
}
847
// Reverse what we just pushed so we elaborate in
848
// original block order. (The domtree iter is a
849
// single-ended iter over a singly-linked list so
850
// we can't `.rev()` above.)
851
self.block_stack[block_stack_end..].reverse();
852
}
853
BlockStackEntry::Pop => {
854
self.value_to_elaborated_value.decrement_depth();
855
}
856
}
857
}
858
}
859
860
pub(crate) fn elaborate(&mut self) {
861
self.stats.elaborate_func += 1;
862
self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
863
self.compute_best_values();
864
self.elaborate_domtree(&self.domtree);
865
self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
866
}
867
}
868
869