CoCalc -- elaborate.rs

GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/cranelift/codegen/src/egraph/elaborate.rs
¹⁶⁹³ views
1
//! Elaboration phase: lowers EGraph back to sequences of operations
2
//! in CFG nodes.
3

4
use super::Stats;
5
use super::cost::Cost;
6
use crate::ctxhash::NullCtx;
7
use crate::dominator_tree::DominatorTree;
8
use crate::hash_map::Entry as HashEntry;
9
use crate::inst_predicates::is_pure_for_egraph;
10
use crate::ir::{Block, Function, Inst, Value, ValueDef};
11
use crate::loop_analysis::{Loop, LoopAnalysis};
12
use crate::scoped_hash_map::ScopedHashMap;
13
use crate::trace;
14
use alloc::vec::Vec;
15
use cranelift_control::ControlPlane;
16
use cranelift_entity::{SecondaryMap, packed_option::ReservedValue};
17
use rustc_hash::{FxHashMap, FxHashSet};
18
use smallvec::{SmallVec, smallvec};
19

20
pub(crate) struct Elaborator<'a> {
21
    func: &'a mut Function,
22
    domtree: &'a DominatorTree,
23
    loop_analysis: &'a LoopAnalysis,
24
    /// Map from Value that is produced by a pure Inst (and was thus
25
    /// not in the side-effecting skeleton) to the value produced by
26
    /// an elaborated inst (placed in the layout) to whose results we
27
    /// refer in the final code.
28
    ///
29
    /// The first time we use some result of an instruction during
30
    /// elaboration, we can place it and insert an identity map (inst
31
    /// results to that same inst's results) in this scoped
32
    /// map. Within that block and its dom-tree children, that mapping
33
    /// is visible and we can continue to use it. This allows us to
34
    /// avoid cloning the instruction. However, if we pop that scope
35
    /// and use it somewhere else as well, we will need to
36
    /// duplicate. We detect this case by checking, when a value that
37
    /// we want is not present in this map, whether the producing inst
38
    /// is already placed in the Layout. If so, we duplicate, and
39
    /// insert non-identity mappings from the original inst's results
40
    /// to the cloned inst's results.
41
    ///
42
    /// Note that as values may refer to unions that represent a subset
43
    /// of a larger eclass, it's not valid to walk towards the root of a
44
    /// union tree: doing so would potentially equate values that fall
45
    /// on different branches of the dominator tree.
46
    value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
47
    /// Map from Value to the best (lowest-cost) Value in its eclass
48
    /// (tree of union value-nodes).
49
    value_to_best_value: SecondaryMap<Value, BestEntry>,
50
    /// Stack of blocks and loops in current elaboration path.
51
    loop_stack: SmallVec<[LoopStackEntry; 8]>,
52
    /// The current block into which we are elaborating.
53
    cur_block: Block,
54
    /// Values that opt rules have indicated should be rematerialized
55
    /// in every block they are used (e.g., immediates or other
56
    /// "cheap-to-compute" ops).
57
    remat_values: &'a FxHashSet<Value>,
58
    /// Explicitly-unrolled value elaboration stack.
59
    elab_stack: Vec<ElabStackEntry>,
60
    /// Results from the elab stack.
61
    elab_result_stack: Vec<ElaboratedValue>,
62
    /// Explicitly-unrolled block elaboration stack.
63
    block_stack: Vec<BlockStackEntry>,
64
    /// Copies of values that have been rematerialized.
65
    remat_copies: FxHashMap<(Block, Value), Value>,
66
    /// Stats for various events during egraph processing, to help
67
    /// with optimization of this infrastructure.
68
    stats: &'a mut Stats,
69
    /// Chaos-mode control-plane so we can test that we still get
70
    /// correct results when our heuristics make bad decisions.
71
    ctrl_plane: &'a mut ControlPlane,
72
}
73

74
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
75
struct BestEntry(Cost, Value);
76

77
impl PartialOrd for BestEntry {
78
    fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
79
        Some(self.cmp(other))
80
    }
81
}
82

83
impl Ord for BestEntry {
84
    #[inline]
85
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
86
        self.0.cmp(&other.0).then_with(|| {
87
            // Note that this comparison is reversed. When costs are equal,
88
            // prefer the value with the bigger index. This is a heuristic that
89
            // prefers results of rewrites to the original value, since we
90
            // expect that our rewrites are generally improvements.
91
            self.1.cmp(&other.1).reverse()
92
        })
93
    }
94
}
95

96
#[derive(Clone, Copy, Debug)]
97
struct ElaboratedValue {
98
    in_block: Block,
99
    value: Value,
100
}
101

102
#[derive(Clone, Debug)]
103
struct LoopStackEntry {
104
    /// The loop identifier.
105
    lp: Loop,
106
    /// The hoist point: a block that immediately dominates this
107
    /// loop. May not be an immediate predecessor, but will be a valid
108
    /// point to place all loop-invariant ops: they must depend only
109
    /// on inputs that dominate the loop, so are available at (the end
110
    /// of) this block.
111
    hoist_block: Block,
112
    /// The depth in the scope map.
113
    scope_depth: u32,
114
}
115

116
#[derive(Clone, Debug)]
117
enum ElabStackEntry {
118
    /// Next action is to resolve this value into an elaborated inst
119
    /// (placed into the layout) that produces the value, and
120
    /// recursively elaborate the insts that produce its args.
121
    ///
122
    /// Any inserted ops should be inserted before `before`, which is
123
    /// the instruction demanding this value.
124
    Start { value: Value, before: Inst },
125
    /// Args have been pushed; waiting for results.
126
    PendingInst {
127
        inst: Inst,
128
        result_idx: usize,
129
        num_args: usize,
130
        before: Inst,
131
    },
132
}
133

134
#[derive(Clone, Debug)]
135
enum BlockStackEntry {
136
    Elaborate { block: Block, idom: Option<Block> },
137
    Pop,
138
}
139

140
impl<'a> Elaborator<'a> {
141
    pub(crate) fn new(
142
        func: &'a mut Function,
143
        domtree: &'a DominatorTree,
144
        loop_analysis: &'a LoopAnalysis,
145
        remat_values: &'a FxHashSet<Value>,
146
        stats: &'a mut Stats,
147
        ctrl_plane: &'a mut ControlPlane,
148
    ) -> Self {
149
        let num_values = func.dfg.num_values();
150
        let mut value_to_best_value =
151
            SecondaryMap::with_default(BestEntry(Cost::infinity(), Value::reserved_value()));
152
        value_to_best_value.resize(num_values);
153
        Self {
154
            func,
155
            domtree,
156
            loop_analysis,
157
            value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
158
            value_to_best_value,
159
            loop_stack: smallvec![],
160
            cur_block: Block::reserved_value(),
161
            remat_values,
162
            elab_stack: vec![],
163
            elab_result_stack: vec![],
164
            block_stack: vec![],
165
            remat_copies: FxHashMap::default(),
166
            stats,
167
            ctrl_plane,
168
        }
169
    }
170

171
    fn start_block(&mut self, idom: Option<Block>, block: Block) {
172
        trace!(
173
            "start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
174
            block,
175
            idom,
176
            self.loop_stack.len(),
177
            self.value_to_elaborated_value.depth()
178
        );
179

180
        // Pop any loop levels we're no longer in.
181
        while let Some(inner_loop) = self.loop_stack.last() {
182
            if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
183
                break;
184
            }
185
            self.loop_stack.pop();
186
        }
187

188
        // Note that if the *entry* block is a loop header, we will
189
        // not make note of the loop here because it will not have an
190
        // immediate dominator. We must disallow this case because we
191
        // will skip adding the `LoopStackEntry` here but our
192
        // `LoopAnalysis` will otherwise still make note of this loop
193
        // and loop depths will not match.
194
        if let Some(idom) = idom {
195
            if let Some(lp) = self.loop_analysis.is_loop_header(block) {
196
                self.loop_stack.push(LoopStackEntry {
197
                    lp,
198
                    // Any code hoisted out of this loop will have code
199
                    // placed in `idom`, and will have def mappings
200
                    // inserted in to the scoped hashmap at that block's
201
                    // level.
202
                    hoist_block: idom,
203
                    scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
204
                });
205
                trace!(
206
                    " -> loop header, pushing; depth now {}",
207
                    self.loop_stack.len()
208
                );
209
            }
210
        } else {
211
            debug_assert!(
212
                self.loop_analysis.is_loop_header(block).is_none(),
213
                "Entry block (domtree root) cannot be a loop header!"
214
            );
215
        }
216

217
        trace!("block {}: loop stack is {:?}", block, self.loop_stack);
218

219
        self.cur_block = block;
220
    }
221

222
    fn compute_best_values(&mut self) {
223
        let best = &mut self.value_to_best_value;
224

225
        // We can't make random decisions inside the fixpoint loop below because
226
        // that could cause values to change on every iteration of the loop,
227
        // which would make the loop never terminate. So in chaos testing
228
        // mode we need a form of making suboptimal decisions that is fully
229
        // deterministic. We choose to simply make the worst decision we know
230
        // how to do instead of the best.
231
        let use_worst = self.ctrl_plane.get_decision();
232

233
        // Do a fixpoint loop to compute the best value for each eclass.
234
        //
235
        // The maximum number of iterations is the length of the longest chain
236
        // of `vNN -> vMM` edges in the dataflow graph where `NN < MM`, so this
237
        // is *technically* quadratic, but `cranelift-frontend` won't construct
238
        // any such edges. NaN canonicalization will introduce some of these
239
        // edges, but they are chains of only two or three edges. So in
240
        // practice, we *never* do more than a handful of iterations here unless
241
        // (a) we parsed the CLIF from text and the text was funkily numbered,
242
        // which we don't really care about, or (b) the CLIF producer did
243
        // something weird, in which case it is their responsibility to stop
244
        // doing that.
245
        trace!(
246
            "Entering fixpoint loop to compute the {} values for each eclass",
247
            if use_worst {
248
                "worst (chaos mode)"
249
            } else {
250
                "best"
251
            }
252
        );
253
        let mut keep_going = true;
254
        while keep_going {
255
            keep_going = false;
256
            trace!(
257
                "fixpoint iteration {}",
258
                self.stats.elaborate_best_cost_fixpoint_iters
259
            );
260
            self.stats.elaborate_best_cost_fixpoint_iters += 1;
261

262
            for (value, def) in self.func.dfg.values_and_defs() {
263
                trace!("computing best for value {:?} def {:?}", value, def);
264
                let orig_best_value = best[value];
265

266
                match def {
267
                    ValueDef::Union(x, y) => {
268
                        // Pick the best of the two options based on
269
                        // min-cost. This works because each element of `best`
270
                        // is a `(cost, value)` tuple; `cost` comes first so
271
                        // the natural comparison works based on cost, and
272
                        // breaks ties based on value number.
273
                        best[value] = if use_worst {
274
                            if best[x].1.is_reserved_value() {
275
                                best[y]
276
                            } else if best[y].1.is_reserved_value() {
277
                                best[x]
278
                            } else {
279
                                std::cmp::max(best[x], best[y])
280
                            }
281
                        } else {
282
                            std::cmp::min(best[x], best[y])
283
                        };
284
                        trace!(
285
                            " -> best of union({:?}, {:?}) = {:?}",
286
                            best[x], best[y], best[value]
287
                        );
288
                    }
289
                    ValueDef::Param(_, _) => {
290
                        best[value] = BestEntry(Cost::zero(), value);
291
                    }
292
                    // If the Inst is inserted into the layout (which is,
293
                    // at this point, only the side-effecting skeleton),
294
                    // then it must be computed and thus we give it zero
295
                    // cost.
296
                    ValueDef::Result(inst, _) => {
297
                        if let Some(_) = self.func.layout.inst_block(inst) {
298
                            best[value] = BestEntry(Cost::zero(), value);
299
                        } else {
300
                            let inst_data = &self.func.dfg.insts[inst];
301
                            // N.B.: at this point we know that the opcode is
302
                            // pure, so `pure_op_cost`'s precondition is
303
                            // satisfied.
304
                            let cost = Cost::of_pure_op(
305
                                inst_data.opcode(),
306
                                self.func.dfg.inst_values(inst).map(|value| best[value].0),
307
                            );
308
                            best[value] = BestEntry(cost, value);
309
                            trace!(" -> cost of value {} = {:?}", value, cost);
310
                        }
311
                    }
312
                };
313

314
                // Keep on iterating the fixpoint loop while we are finding new
315
                // best values.
316
                keep_going |= orig_best_value != best[value];
317
            }
318
        }
319

320
        if cfg!(any(feature = "trace-log", debug_assertions)) {
321
            trace!("finished fixpoint loop to compute best value for each eclass");
322
            for value in self.func.dfg.values() {
323
                trace!("-> best for eclass {:?}: {:?}", value, best[value]);
324
                debug_assert_ne!(best[value].1, Value::reserved_value());
325
                // You might additionally be expecting an assert that the best
326
                // cost is not infinity, however infinite cost *can* happen in
327
                // practice. First, note that our cost function doesn't know
328
                // about any shared structure in the dataflow graph, it only
329
                // sums operand costs. (And trying to avoid that by deduping a
330
                // single operation's operands is a losing game because you can
331
                // always just add one indirection and go from `add(x, x)` to
332
                // `add(foo(x), bar(x))` to hide the shared structure.) Given
333
                // that blindness to sharing, we can make cost grow
334
                // exponentially with a linear sequence of operations:
335
                //
336
                //     v0 = iconst.i32 1    ;; cost = 1
337
                //     v1 = iadd v0, v0     ;; cost = 3 + 1 + 1
338
                //     v2 = iadd v1, v1     ;; cost = 3 + 5 + 5
339
                //     v3 = iadd v2, v2     ;; cost = 3 + 13 + 13
340
                //     v4 = iadd v3, v3     ;; cost = 3 + 29 + 29
341
                //     v5 = iadd v4, v4     ;; cost = 3 + 61 + 61
342
                //     v6 = iadd v5, v5     ;; cost = 3 + 125 + 125
343
                //     ;; etc...
344
                //
345
                // Such a chain can cause cost to saturate to infinity. How do
346
                // we choose which e-node is best when there are multiple that
347
                // have saturated to infinity? It doesn't matter. As long as
348
                // invariant (2) for optimization rules is upheld by our rule
349
                // set (see `cranelift/codegen/src/opts/README.md`) it is safe
350
                // to choose *any* e-node in the e-class. At worst we will
351
                // produce suboptimal code, but never an incorrectness.
352
            }
353
        }
354
    }
355

356
    /// Elaborate use of an eclass, inserting any needed new
357
    /// instructions before the given inst `before`. Should only be
358
    /// given values corresponding to results of instructions or
359
    /// blockparams.
360
    fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
361
        debug_assert_ne!(value, Value::reserved_value());
362

363
        // Kick off the process by requesting this result
364
        // value.
365
        self.elab_stack
366
            .push(ElabStackEntry::Start { value, before });
367

368
        // Now run the explicit-stack recursion until we reach
369
        // the root.
370
        self.process_elab_stack();
371
        debug_assert_eq!(self.elab_result_stack.len(), 1);
372
        self.elab_result_stack.pop().unwrap()
373
    }
374

375
    /// Possibly rematerialize the instruction producing the value in
376
    /// `arg` and rewrite `arg` to refer to it, if needed. Returns
377
    /// `true` if a rewrite occurred.
378
    fn maybe_remat_arg(
379
        remat_values: &FxHashSet<Value>,
380
        func: &mut Function,
381
        remat_copies: &mut FxHashMap<(Block, Value), Value>,
382
        insert_block: Block,
383
        before: Inst,
384
        arg: &mut ElaboratedValue,
385
        stats: &mut Stats,
386
    ) -> bool {
387
        // TODO (#7313): we may want to consider recursive
388
        // rematerialization as well. We could process the arguments of
389
        // the rematerialized instruction up to a certain depth. This
390
        // would affect, e.g., adds-with-one-constant-arg, which are
391
        // currently rematerialized. Right now we don't do this, to
392
        // avoid the need for another fixpoint loop here.
393
        if arg.in_block != insert_block && remat_values.contains(&arg.value) {
394
            let new_value = match remat_copies.entry((insert_block, arg.value)) {
395
                HashEntry::Occupied(o) => *o.get(),
396
                HashEntry::Vacant(v) => {
397
                    let inst = func.dfg.value_def(arg.value).inst().unwrap();
398
                    debug_assert_eq!(func.dfg.inst_results(inst).len(), 1);
399
                    let new_inst = func.dfg.clone_inst(inst);
400
                    func.layout.insert_inst(new_inst, before);
401
                    let new_result = func.dfg.inst_results(new_inst)[0];
402
                    *v.insert(new_result)
403
                }
404
            };
405
            trace!("rematerialized {} as {}", arg.value, new_value);
406
            arg.value = new_value;
407
            stats.elaborate_remat += 1;
408
            true
409
        } else {
410
            false
411
        }
412
    }
413

414
    fn process_elab_stack(&mut self) {
415
        while let Some(entry) = self.elab_stack.pop() {
416
            match entry {
417
                ElabStackEntry::Start { value, before } => {
418
                    debug_assert!(self.func.dfg.value_is_real(value));
419

420
                    self.stats.elaborate_visit_node += 1;
421

422
                    // Get the best option; we use `value` (latest
423
                    // value) here so we have a full view of the
424
                    // eclass.
425
                    trace!("looking up best value for {}", value);
426
                    let BestEntry(_, best_value) = self.value_to_best_value[value];
427
                    trace!("elaborate: value {} -> best {}", value, best_value);
428
                    debug_assert_ne!(best_value, Value::reserved_value());
429

430
                    if let Some(elab_val) =
431
                        self.value_to_elaborated_value.get(&NullCtx, &best_value)
432
                    {
433
                        // Value is available; use it.
434
                        trace!("elaborate: value {} -> {:?}", value, elab_val);
435
                        self.stats.elaborate_memoize_hit += 1;
436
                        self.elab_result_stack.push(*elab_val);
437
                        continue;
438
                    }
439

440
                    self.stats.elaborate_memoize_miss += 1;
441

442
                    // Now resolve the value to its definition to see
443
                    // how we can compute it.
444
                    let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
445
                        ValueDef::Result(inst, result_idx) => {
446
                            trace!(
447
                                " -> value {} is result {} of {}",
448
                                best_value, result_idx, inst
449
                            );
450
                            (inst, result_idx)
451
                        }
452
                        ValueDef::Param(in_block, _) => {
453
                            // We don't need to do anything to compute
454
                            // this value; just push its result on the
455
                            // result stack (blockparams are already
456
                            // available).
457
                            trace!(" -> value {} is a blockparam", best_value);
458
                            self.elab_result_stack.push(ElaboratedValue {
459
                                in_block,
460
                                value: best_value,
461
                            });
462
                            continue;
463
                        }
464
                        ValueDef::Union(_, _) => {
465
                            panic!("Should never have a Union value as the best value");
466
                        }
467
                    };
468

469
                    trace!(
470
                        " -> result {} of inst {:?}",
471
                        result_idx, self.func.dfg.insts[inst]
472
                    );
473

474
                    // We're going to need to use this instruction
475
                    // result, placing the instruction into the
476
                    // layout. First, enqueue all args to be
477
                    // elaborated. Push state to receive the results
478
                    // and later elab this inst.
479
                    let num_args = self.func.dfg.inst_values(inst).count();
480
                    self.elab_stack.push(ElabStackEntry::PendingInst {
481
                        inst,
482
                        result_idx,
483
                        num_args,
484
                        before,
485
                    });
486

487
                    // Push args in reverse order so we process the
488
                    // first arg first.
489
                    for arg in self.func.dfg.inst_values(inst).rev() {
490
                        debug_assert_ne!(arg, Value::reserved_value());
491
                        self.elab_stack
492
                            .push(ElabStackEntry::Start { value: arg, before });
493
                    }
494
                }
495

496
                ElabStackEntry::PendingInst {
497
                    inst,
498
                    result_idx,
499
                    num_args,
500
                    before,
501
                } => {
502
                    trace!(
503
                        "PendingInst: {} result {} args {} before {}",
504
                        inst, result_idx, num_args, before
505
                    );
506

507
                    // We should have all args resolved at this
508
                    // point. Grab them and drain them out, removing
509
                    // them.
510
                    let arg_idx = self.elab_result_stack.len() - num_args;
511
                    let arg_values = &mut self.elab_result_stack[arg_idx..];
512

513
                    // Compute max loop depth.
514
                    //
515
                    // Note that if there are no arguments then this instruction
516
                    // is allowed to get hoisted up one loop. This is not
517
                    // usually used since no-argument values are things like
518
                    // constants which are typically rematerialized, but for the
519
                    // `vconst` instruction 128-bit constants aren't as easily
520
                    // rematerialized. They're hoisted out of inner loops but
521
                    // not to the function entry which may run the risk of
522
                    // placing too much register pressure on the entire
523
                    // function. This is modeled with the `.saturating_sub(1)`
524
                    // as the default if there's otherwise no maximum.
525
                    let loop_hoist_level = arg_values
526
                        .iter()
527
                        .map(|&value| {
528
                            // Find the outermost loop level at which
529
                            // the value's defining block *is not* a
530
                            // member. This is the loop-nest level
531
                            // whose hoist-block we hoist to.
532
                            let hoist_level = self
533
                                .loop_stack
534
                                .iter()
535
                                .position(|loop_entry| {
536
                                    !self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
537
                                })
538
                                .unwrap_or(self.loop_stack.len());
539
                            trace!(
540
                                " -> arg: elab_value {:?} hoist level {:?}",
541
                                value, hoist_level
542
                            );
543
                            hoist_level
544
                        })
545
                        .max()
546
                        .unwrap_or(self.loop_stack.len().saturating_sub(1));
547
                    trace!(
548
                        " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
549
                        loop_hoist_level,
550
                        self.loop_stack.len(),
551
                        self.loop_stack,
552
                    );
553

554
                    // We know that this is a pure inst, because
555
                    // non-pure roots have already been placed in the
556
                    // value-to-elab'd-value map, so they will not
557
                    // reach this stage of processing.
558
                    //
559
                    // We now must determine the location at which we
560
                    // place the instruction. This is the current
561
                    // block *unless* we hoist above a loop when all
562
                    // args are loop-invariant (and this op is pure).
563
                    let (scope_depth, before, insert_block) = if loop_hoist_level
564
                        == self.loop_stack.len()
565
                    {
566
                        // Depends on some value at the current
567
                        // loop depth, or remat forces it here:
568
                        // place it at the current location.
569
                        (
570
                            self.value_to_elaborated_value.depth(),
571
                            before,
572
                            self.func.layout.inst_block(before).unwrap(),
573
                        )
574
                    } else {
575
                        // Does not depend on any args at current
576
                        // loop depth: hoist out of loop.
577
                        self.stats.elaborate_licm_hoist += 1;
578
                        let data = &self.loop_stack[loop_hoist_level];
579
                        // `data.hoist_block` should dominate `before`'s block.
580
                        let before_block = self.func.layout.inst_block(before).unwrap();
581
                        debug_assert!(self.domtree.block_dominates(data.hoist_block, before_block));
582
                        // Determine the instruction at which we
583
                        // insert in `data.hoist_block`.
584
                        let before = self.func.layout.last_inst(data.hoist_block).unwrap();
585
                        (data.scope_depth as usize, before, data.hoist_block)
586
                    };
587

588
                    trace!(
589
                        " -> decided to place: before {} insert_block {}",
590
                        before, insert_block
591
                    );
592

593
                    // Now that we have the location for the
594
                    // instruction, check if any of its args are remat
595
                    // values. If so, and if we don't have a copy of
596
                    // the rematerializing instruction for this block
597
                    // yet, create one.
598
                    let mut remat_arg = false;
599
                    for arg_value in arg_values.iter_mut() {
600
                        if Self::maybe_remat_arg(
601
                            &self.remat_values,
602
                            &mut self.func,
603
                            &mut self.remat_copies,
604
                            insert_block,
605
                            before,
606
                            arg_value,
607
                            &mut self.stats,
608
                        ) {
609
                            remat_arg = true;
610
                        }
611
                    }
612

613
                    // Now we need to place `inst` at the computed
614
                    // location (just before `before`). Note that
615
                    // `inst` may already have been placed somewhere
616
                    // else, because a pure node may be elaborated at
617
                    // more than one place. In this case, we need to
618
                    // duplicate the instruction (and return the
619
                    // `Value`s for that duplicated instance instead).
620
                    //
621
                    // Also clone if we rematerialized, because we
622
                    // don't want to rewrite the args in the original
623
                    // copy.
624
                    trace!("need inst {} before {}", inst, before);
625
                    let inst = if self.func.layout.inst_block(inst).is_some() || remat_arg {
626
                        // Clone the inst!
627
                        let new_inst = self.func.dfg.clone_inst(inst);
628
                        trace!(
629
                            " -> inst {} already has a location; cloned to {}",
630
                            inst, new_inst
631
                        );
632
                        // Create mappings in the
633
                        // value-to-elab'd-value map from original
634
                        // results to cloned results.
635
                        for (&result, &new_result) in self
636
                            .func
637
                            .dfg
638
                            .inst_results(inst)
639
                            .iter()
640
                            .zip(self.func.dfg.inst_results(new_inst).iter())
641
                        {
642
                            let elab_value = ElaboratedValue {
643
                                value: new_result,
644
                                in_block: insert_block,
645
                            };
646
                            let best_result = self.value_to_best_value[result];
647
                            self.value_to_elaborated_value.insert_if_absent_with_depth(
648
                                &NullCtx,
649
                                best_result.1,
650
                                elab_value,
651
                                scope_depth,
652
                            );
653

654
                            self.value_to_best_value[new_result] = best_result;
655

656
                            trace!(
657
                                " -> cloned inst has new result {} for orig {}",
658
                                new_result, result
659
                            );
660
                        }
661
                        new_inst
662
                    } else {
663
                        trace!(" -> no location; using original inst");
664
                        // Create identity mappings from result values
665
                        // to themselves in this scope, since we're
666
                        // using the original inst.
667
                        for &result in self.func.dfg.inst_results(inst) {
668
                            let elab_value = ElaboratedValue {
669
                                value: result,
670
                                in_block: insert_block,
671
                            };
672
                            let best_result = self.value_to_best_value[result];
673
                            self.value_to_elaborated_value.insert_if_absent_with_depth(
674
                                &NullCtx,
675
                                best_result.1,
676
                                elab_value,
677
                                scope_depth,
678
                            );
679
                            trace!(" -> inserting identity mapping for {}", result);
680
                        }
681
                        inst
682
                    };
683

684
                    // Place the inst just before `before`.
685
                    assert!(
686
                        is_pure_for_egraph(self.func, inst),
687
                        "something has gone very wrong if we are elaborating effectful \
688
                         instructions, they should have remained in the skeleton"
689
                    );
690
                    self.func.layout.insert_inst(inst, before);
691

692
                    // Update the inst's arguments.
693
                    self.func
694
                        .dfg
695
                        .overwrite_inst_values(inst, arg_values.into_iter().map(|ev| ev.value));
696

697
                    // Now that we've consumed the arg values, pop
698
                    // them off the stack.
699
                    self.elab_result_stack.truncate(arg_idx);
700

701
                    // Push the requested result index of the
702
                    // instruction onto the elab-results stack.
703
                    self.elab_result_stack.push(ElaboratedValue {
704
                        in_block: insert_block,
705
                        value: self.func.dfg.inst_results(inst)[result_idx],
706
                    });
707
                }
708
            }
709
        }
710
    }
711

712
    fn elaborate_block(&mut self, elab_values: &mut Vec<Value>, idom: Option<Block>, block: Block) {
713
        trace!("elaborate_block: block {}", block);
714
        self.start_block(idom, block);
715

716
        // Iterate over the side-effecting skeleton using the linked
717
        // list in Layout. We will insert instructions that are
718
        // elaborated *before* `inst`, so we can always use its
719
        // next-link to continue the iteration.
720
        let mut next_inst = self.func.layout.first_inst(block);
721
        let mut first_branch = None;
722
        while let Some(inst) = next_inst {
723
            trace!(
724
                "elaborating inst {} with results {:?}",
725
                inst,
726
                self.func.dfg.inst_results(inst)
727
            );
728
            // Record the first branch we see in the block; all
729
            // elaboration for args of *any* branch must be inserted
730
            // before the *first* branch, because the branch group
731
            // must remain contiguous at the end of the block.
732
            if self.func.dfg.insts[inst].opcode().is_branch() && first_branch == None {
733
                first_branch = Some(inst);
734
            }
735

736
            // Determine where elaboration inserts insts.
737
            let before = first_branch.unwrap_or(inst);
738
            trace!(" -> inserting before {}", before);
739

740
            elab_values.extend(self.func.dfg.inst_values(inst));
741
            for arg in elab_values.iter_mut() {
742
                trace!(" -> arg {}", *arg);
743
                // Elaborate the arg, placing any newly-inserted insts
744
                // before `before`. Get the updated value, which may
745
                // be different than the original.
746
                let mut new_arg = self.elaborate_eclass_use(*arg, before);
747
                Self::maybe_remat_arg(
748
                    &self.remat_values,
749
                    &mut self.func,
750
                    &mut self.remat_copies,
751
                    block,
752
                    inst,
753
                    &mut new_arg,
754
                    &mut self.stats,
755
                );
756
                trace!("   -> rewrote arg to {:?}", new_arg);
757
                *arg = new_arg.value;
758
            }
759
            self.func
760
                .dfg
761
                .overwrite_inst_values(inst, elab_values.drain(..));
762

763
            // We need to put the results of this instruction in the
764
            // map now.
765
            for &result in self.func.dfg.inst_results(inst) {
766
                trace!(" -> result {}", result);
767
                let best_result = self.value_to_best_value[result];
768
                self.value_to_elaborated_value.insert_if_absent(
769
                    &NullCtx,
770
                    best_result.1,
771
                    ElaboratedValue {
772
                        in_block: block,
773
                        value: result,
774
                    },
775
                );
776
            }
777

778
            next_inst = self.func.layout.next_inst(inst);
779
        }
780
    }
781

782
    fn elaborate_domtree(&mut self, domtree: &DominatorTree) {
783
        self.block_stack.push(BlockStackEntry::Elaborate {
784
            block: self.func.layout.entry_block().unwrap(),
785
            idom: None,
786
        });
787

788
        // A temporary workspace for elaborate_block, allocated here to maximize the use of the
789
        // allocation.
790
        let mut elab_values = Vec::new();
791

792
        while let Some(top) = self.block_stack.pop() {
793
            match top {
794
                BlockStackEntry::Elaborate { block, idom } => {
795
                    self.block_stack.push(BlockStackEntry::Pop);
796
                    self.value_to_elaborated_value.increment_depth();
797

798
                    self.elaborate_block(&mut elab_values, idom, block);
799

800
                    // Push children. We are doing a preorder
801
                    // traversal so we do this after processing this
802
                    // block above.
803
                    let block_stack_end = self.block_stack.len();
804
                    for child in self.ctrl_plane.shuffled(domtree.children(block)) {
805
                        self.block_stack.push(BlockStackEntry::Elaborate {
806
                            block: child,
807
                            idom: Some(block),
808
                        });
809
                    }
810
                    // Reverse what we just pushed so we elaborate in
811
                    // original block order. (The domtree iter is a
812
                    // single-ended iter over a singly-linked list so
813
                    // we can't `.rev()` above.)
814
                    self.block_stack[block_stack_end..].reverse();
815
                }
816
                BlockStackEntry::Pop => {
817
                    self.value_to_elaborated_value.decrement_depth();
818
                }
819
            }
820
        }
821
    }
822

823
    pub(crate) fn elaborate(&mut self) {
824
        self.stats.elaborate_func += 1;
825
        self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
826
        self.compute_best_values();
827
        self.elaborate_domtree(&self.domtree);
828
        self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
829
    }
830
}
831

832
Product

Resources

Company