Path: blob/main/cranelift/codegen/src/ir/pcc.rs
1693 views
//! Proof-carrying code. We attach "facts" to values and then check1//! that they remain true after compilation.2//!3//! A few key design principle of this approach are:4//!5//! - The producer of the IR provides the axioms. All "ground truth",6//! such as what memory is accessible -- is meant to come by way of7//! facts on the function arguments and global values. In some8//! sense, all we are doing here is validating the "internal9//! consistency" of the facts that are provided on values, and the10//! actions performed on those values.11//!12//! - We do not derive and forward-propagate facts eagerly. Rather,13//! the producer needs to provide breadcrumbs -- a "proof witness"14//! of sorts -- to allow the checking to complete. That means that15//! as an address is computed, or pointer chains are dereferenced,16//! each intermediate value will likely have some fact attached.17//!18//! This does create more verbose IR, but a significant positive19//! benefit is that it avoids unnecessary work: we do not build up a20//! knowledge base that effectively encodes the integer ranges of21//! many or most values in the program. Rather, we only check22//! specifically the memory-access sequences. In practice, each such23//! sequence is likely to be a carefully-controlled sequence of IR24//! operations from, e.g., a sandboxing compiler (such as25//! Wasmtime) so adding annotations here to communicate26//! intent (ranges, bounds-checks, and the like) is no problem.27//!28//! Facts are attached to SSA values in CLIF, and are maintained29//! through optimizations and through lowering. They are thus also30//! present on VRegs in the VCode. In theory, facts could be checked31//! at either level, though in practice it is most useful to check32//! them at the VCode level if the goal is an end-to-end verification33//! of certain properties (e.g., memory sandboxing).34//!35//! Checking facts entails visiting each instruction that defines a36//! value with a fact, and checking the result's fact against the37//! facts on arguments and the operand. For VCode, this is38//! fundamentally a question of the target ISA's semantics, so we call39//! into the `LowerBackend` for this. Note that during checking there40//! is also limited forward propagation / inference, but only within41//! an instruction: for example, an addressing mode commonly can42//! include an addition, multiplication/shift, or extend operation,43//! and there is no way to attach facts to the intermediate values44//! "inside" the instruction, so instead the backend can use45//! `FactContext::add()` and friends to forward-propagate facts.46//!47//! TODO:48//!49//! Deployment:50//! - Add to fuzzing51//! - Turn on during wasm spec-tests52//!53//! More checks:54//! - Check that facts on `vmctx` GVs are subsumed by the actual facts55//! on the vmctx arg in block0 (function arg).56//!57//! Generality:58//! - facts on outputs (in func signature)?59//! - Implement checking at the CLIF level as well.60//! - Check instructions that can trap as well?61//!62//! Nicer errors:63//! - attach instruction index or some other identifier to errors64//!65//! Text format cleanup:66//! - make the bitwidth on `max` facts optional in the CLIF text67//! format?68//! - make offset in `mem` fact optional in the text format?69//!70//! Bikeshed colors (syntax):71//! - Put fact bang-annotations after types?72//! `v0: i64 ! fact(..)` vs. `v0 ! fact(..): i64`7374use crate::ir;75use crate::ir::types::*;76use crate::isa::TargetIsa;77use crate::machinst::{BlockIndex, LowerBackend, VCode};78use crate::trace;79use regalloc2::Function as _;80use std::fmt;8182#[cfg(feature = "enable-serde")]83use serde_derive::{Deserialize, Serialize};8485/// The result of checking proof-carrying-code facts.86pub type PccResult<T> = std::result::Result<T, PccError>;8788/// An error or inconsistency discovered when checking proof-carrying89/// code.90#[derive(Debug, Clone)]91pub enum PccError {92/// An operation wraps around, invalidating the stated value93/// range.94Overflow,95/// An input to an operator that produces a fact-annotated value96/// does not have a fact describing it, and one is needed.97MissingFact,98/// A derivation of an output fact is unsupported (incorrect or99/// not derivable).100UnsupportedFact,101/// A block parameter claims a fact that one of its predecessors102/// does not support.103UnsupportedBlockparam,104/// A memory access is out of bounds.105OutOfBounds,106/// Proof-carrying-code checking is not implemented for a107/// particular compiler backend.108UnimplementedBackend,109/// Proof-carrying-code checking is not implemented for a110/// particular instruction that instruction-selection chose. This111/// is an internal compiler error.112UnimplementedInst,113/// Access to an invalid or undefined field offset in a struct.114InvalidFieldOffset,115/// Access to a field via the wrong type.116BadFieldType,117/// Store to a read-only field.118WriteToReadOnlyField,119/// Store of data to a field with a fact that does not subsume the120/// field's fact.121InvalidStoredFact,122}123124/// A fact on a value.125#[derive(Clone, Debug, Hash, PartialEq, Eq)]126#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]127pub enum Fact {128/// A bitslice of a value (up to a bitwidth) is within the given129/// integer range.130///131/// The slicing behavior is needed because this fact can describe132/// both an SSA `Value`, whose entire value is well-defined, and a133/// `VReg` in VCode, whose bits beyond the type stored in that134/// register are don't-care (undefined).135Range {136/// The bitwidth of bits we care about, from the LSB upward.137bit_width: u16,138/// The minimum value that the bitslice can take139/// (inclusive). The range is unsigned: the specified bits of140/// the actual value will be greater than or equal to this141/// value, as evaluated by an unsigned integer comparison.142min: u64,143/// The maximum value that the bitslice can take144/// (inclusive). The range is unsigned: the specified bits of145/// the actual value will be less than or equal to this value,146/// as evaluated by an unsigned integer comparison.147max: u64,148},149150/// A value bounded by a global value.151///152/// The range is in `(min_GV + min_offset)..(max_GV +153/// max_offset)`, inclusive on the lower and upper bound.154DynamicRange {155/// The bitwidth of bits we care about, from the LSB upward.156bit_width: u16,157/// The lower bound, inclusive.158min: Expr,159/// The upper bound, inclusive.160max: Expr,161},162163/// A pointer to a memory type.164Mem {165/// The memory type.166ty: ir::MemoryType,167/// The minimum offset into the memory type, inclusive.168min_offset: u64,169/// The maximum offset into the memory type, inclusive.170max_offset: u64,171/// This pointer can also be null.172nullable: bool,173},174175/// A pointer to a memory type, dynamically bounded. The pointer176/// is within `(GV_min+offset_min)..(GV_max+offset_max)`177/// (inclusive on both ends) in the memory type.178DynamicMem {179/// The memory type.180ty: ir::MemoryType,181/// The lower bound, inclusive.182min: Expr,183/// The upper bound, inclusive.184max: Expr,185/// This pointer can also be null.186nullable: bool,187},188189/// A definition of a value to be used as a symbol in190/// BaseExprs. There can only be one of these per value number.191///192/// Note that this differs from a `DynamicRange` specifying that193/// some value in the program is the same as `value`. A `def(v1)`194/// fact is propagated to machine code and serves as a source of195/// truth: the value or location labeled with this fact *defines*196/// what `v1` is, and any `dynamic_range(64, v1, v1)`-labeled197/// values elsewhere are claiming to be equal to this value.198///199/// This is necessary because we don't propagate SSA value labels200/// down to machine code otherwise; so when referring symbolically201/// to addresses and expressions derived from addresses, we need202/// to introduce the symbol first.203Def {204/// The SSA value this value defines.205value: ir::Value,206},207208/// A comparison result between two dynamic values with a209/// comparison of a certain kind.210Compare {211/// The kind of comparison.212kind: ir::condcodes::IntCC,213/// The left-hand side of the comparison.214lhs: Expr,215/// The right-hand side of the comparison.216rhs: Expr,217},218219/// A "conflict fact": this fact results from merging two other220/// facts, and it can never be satisfied -- checking any value221/// against this fact will fail.222Conflict,223}224225/// A bound expression.226#[derive(Clone, Debug, Hash, PartialEq, Eq)]227#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]228pub struct Expr {229/// The dynamic (base) part.230pub base: BaseExpr,231/// The static (offset) part.232pub offset: i64,233}234235/// The base part of a bound expression.236#[derive(Clone, Debug, Hash, PartialEq, Eq)]237#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]238pub enum BaseExpr {239/// No dynamic part (i.e., zero).240None,241/// A global value.242GlobalValue(ir::GlobalValue),243/// An SSA Value as a symbolic value. This can be referenced in244/// facts even after we've lowered out of SSA: it becomes simply245/// some symbolic value.246Value(ir::Value),247/// Top of the address space. This is "saturating": the offset248/// doesn't matter.249Max,250}251252impl BaseExpr {253/// Is one base less than or equal to another? (We can't always254/// know; in such cases, returns `false`.)255fn le(lhs: &BaseExpr, rhs: &BaseExpr) -> bool {256// (i) reflexivity; (ii) 0 <= x for all (unsigned) x; (iii) x <= max for all x.257lhs == rhs || *lhs == BaseExpr::None || *rhs == BaseExpr::Max258}259260/// Compute some BaseExpr that will be less than or equal to both261/// inputs. This is a generalization of `min` (but looser).262fn min(lhs: &BaseExpr, rhs: &BaseExpr) -> BaseExpr {263if lhs == rhs {264lhs.clone()265} else if *lhs == BaseExpr::Max {266rhs.clone()267} else if *rhs == BaseExpr::Max {268lhs.clone()269} else {270BaseExpr::None // zero is <= x for all (unsigned) x.271}272}273274/// Compute some BaseExpr that will be greater than or equal to275/// both inputs.276fn max(lhs: &BaseExpr, rhs: &BaseExpr) -> BaseExpr {277if lhs == rhs {278lhs.clone()279} else if *lhs == BaseExpr::None {280rhs.clone()281} else if *rhs == BaseExpr::None {282lhs.clone()283} else {284BaseExpr::Max285}286}287}288289impl Expr {290/// Constant value.291pub fn constant(offset: i64) -> Self {292Expr {293base: BaseExpr::None,294offset,295}296}297298/// The value of an SSA value.299pub fn value(value: ir::Value) -> Self {300Expr {301base: BaseExpr::Value(value),302offset: 0,303}304}305306/// The value of a global value.307pub fn global_value(gv: ir::GlobalValue) -> Self {308Expr {309base: BaseExpr::GlobalValue(gv),310offset: 0,311}312}313314/// Is one expression definitely less than or equal to another?315/// (We can't always know; in such cases, returns `false`.)316fn le(lhs: &Expr, rhs: &Expr) -> bool {317if rhs.base == BaseExpr::Max {318true319} else {320BaseExpr::le(&lhs.base, &rhs.base) && lhs.offset <= rhs.offset321}322}323324/// Generalization of `min`: compute some Expr that is less than325/// or equal to both inputs.326fn min(lhs: &Expr, rhs: &Expr) -> Expr {327if lhs.base == BaseExpr::None && lhs.offset == 0 {328lhs.clone()329} else if rhs.base == BaseExpr::None && rhs.offset == 0 {330rhs.clone()331} else {332Expr {333base: BaseExpr::min(&lhs.base, &rhs.base),334offset: std::cmp::min(lhs.offset, rhs.offset),335}336}337}338339/// Generalization of `max`: compute some Expr that is greater340/// than or equal to both inputs.341fn max(lhs: &Expr, rhs: &Expr) -> Expr {342if lhs.base == BaseExpr::None && lhs.offset == 0 {343rhs.clone()344} else if rhs.base == BaseExpr::None && rhs.offset == 0 {345lhs.clone()346} else {347Expr {348base: BaseExpr::max(&lhs.base, &rhs.base),349offset: std::cmp::max(lhs.offset, rhs.offset),350}351}352}353354/// Add one expression to another.355fn add(lhs: &Expr, rhs: &Expr) -> Option<Expr> {356if lhs.base == rhs.base {357Some(Expr {358base: lhs.base.clone(),359offset: lhs.offset.checked_add(rhs.offset)?,360})361} else if lhs.base == BaseExpr::None {362Some(Expr {363base: rhs.base.clone(),364offset: lhs.offset.checked_add(rhs.offset)?,365})366} else if rhs.base == BaseExpr::None {367Some(Expr {368base: lhs.base.clone(),369offset: lhs.offset.checked_add(rhs.offset)?,370})371} else {372Some(Expr {373base: BaseExpr::Max,374offset: 0,375})376}377}378379/// Add a static offset to an expression.380pub fn offset(lhs: &Expr, rhs: i64) -> Option<Expr> {381let offset = lhs.offset.checked_add(rhs)?;382Some(Expr {383base: lhs.base.clone(),384offset,385})386}387388/// Is this Expr a BaseExpr with no offset? Return it if so.389pub fn without_offset(&self) -> Option<&BaseExpr> {390if self.offset == 0 {391Some(&self.base)392} else {393None394}395}396}397398impl fmt::Display for BaseExpr {399fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {400match self {401BaseExpr::None => Ok(()),402BaseExpr::Max => write!(f, "max"),403BaseExpr::GlobalValue(gv) => write!(f, "{gv}"),404BaseExpr::Value(value) => write!(f, "{value}"),405}406}407}408409impl BaseExpr {410/// Does this dynamic_expression take an offset?411pub fn is_some(&self) -> bool {412match self {413BaseExpr::None => false,414_ => true,415}416}417}418419impl fmt::Display for Expr {420fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {421write!(f, "{}", self.base)?;422match self.offset {423offset if offset > 0 && self.base.is_some() => write!(f, "+{offset:#x}"),424offset if offset > 0 => write!(f, "{offset:#x}"),425offset if offset < 0 => {426let negative_offset = -i128::from(offset); // upcast to support i64::MIN.427write!(f, "-{negative_offset:#x}")428}4290 if self.base.is_some() => Ok(()),4300 => write!(f, "0"),431_ => unreachable!(),432}433}434}435436impl fmt::Display for Fact {437fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {438match self {439Fact::Range {440bit_width,441min,442max,443} => write!(f, "range({bit_width}, {min:#x}, {max:#x})"),444Fact::DynamicRange {445bit_width,446min,447max,448} => {449write!(f, "dynamic_range({bit_width}, {min}, {max})")450}451Fact::Mem {452ty,453min_offset,454max_offset,455nullable,456} => {457let nullable_flag = if *nullable { ", nullable" } else { "" };458write!(459f,460"mem({ty}, {min_offset:#x}, {max_offset:#x}{nullable_flag})"461)462}463Fact::DynamicMem {464ty,465min,466max,467nullable,468} => {469let nullable_flag = if *nullable { ", nullable" } else { "" };470write!(f, "dynamic_mem({ty}, {min}, {max}{nullable_flag})")471}472Fact::Def { value } => write!(f, "def({value})"),473Fact::Compare { kind, lhs, rhs } => {474write!(f, "compare({kind}, {lhs}, {rhs})")475}476Fact::Conflict => write!(f, "conflict"),477}478}479}480481impl Fact {482/// Create a range fact that specifies a single known constant value.483pub fn constant(bit_width: u16, value: u64) -> Self {484debug_assert!(value <= max_value_for_width(bit_width));485// `min` and `max` are inclusive, so this specifies a range of486// exactly one value.487Fact::Range {488bit_width,489min: value,490max: value,491}492}493494/// Create a dynamic range fact that points to the base of a dynamic memory.495pub fn dynamic_base_ptr(ty: ir::MemoryType) -> Self {496Fact::DynamicMem {497ty,498min: Expr::constant(0),499max: Expr::constant(0),500nullable: false,501}502}503504/// Create a fact that specifies the value is exactly an SSA value.505///506/// Note that this differs from a `def` fact: it is not *defining*507/// a symbol to have the value that this fact is attached to;508/// rather it is claiming that this value is the same as whatever509/// that symbol is. (In other words, the def should be elsewhere,510/// and we are tying ourselves to it.)511pub fn value(bit_width: u16, value: ir::Value) -> Self {512Fact::DynamicRange {513bit_width,514min: Expr::value(value),515max: Expr::value(value),516}517}518519/// Create a fact that specifies the value is exactly an SSA value plus some offset.520pub fn value_offset(bit_width: u16, value: ir::Value, offset: i64) -> Self {521Fact::DynamicRange {522bit_width,523min: Expr::offset(&Expr::value(value), offset).unwrap(),524max: Expr::offset(&Expr::value(value), offset).unwrap(),525}526}527528/// Create a fact that specifies the value is exactly the value of a GV.529pub fn global_value(bit_width: u16, gv: ir::GlobalValue) -> Self {530Fact::DynamicRange {531bit_width,532min: Expr::global_value(gv),533max: Expr::global_value(gv),534}535}536537/// Create a fact that specifies the value is exactly the value of a GV plus some offset.538pub fn global_value_offset(bit_width: u16, gv: ir::GlobalValue, offset: i64) -> Self {539Fact::DynamicRange {540bit_width,541min: Expr::offset(&Expr::global_value(gv), offset).unwrap(),542max: Expr::offset(&Expr::global_value(gv), offset).unwrap(),543}544}545546/// Create a range fact that specifies the maximum range for a547/// value of the given bit-width.548pub const fn max_range_for_width(bit_width: u16) -> Self {549match bit_width {550bit_width if bit_width < 64 => Fact::Range {551bit_width,552min: 0,553max: (1u64 << bit_width) - 1,554},55564 => Fact::Range {556bit_width: 64,557min: 0,558max: u64::MAX,559},560_ => panic!("bit width too large!"),561}562}563564/// Create a range fact that specifies the maximum range for a565/// value of the given bit-width, zero-extended into a wider566/// width.567pub const fn max_range_for_width_extended(from_width: u16, to_width: u16) -> Self {568debug_assert!(from_width <= to_width);569match from_width {570from_width if from_width < 64 => Fact::Range {571bit_width: to_width,572min: 0,573max: (1u64 << from_width) - 1,574},57564 => Fact::Range {576bit_width: to_width,577min: 0,578max: u64::MAX,579},580_ => panic!("bit width too large!"),581}582}583584/// Try to infer a minimal fact for a value of the given IR type.585pub fn infer_from_type(ty: ir::Type) -> Option<&'static Self> {586static FACTS: [Fact; 4] = [587Fact::max_range_for_width(8),588Fact::max_range_for_width(16),589Fact::max_range_for_width(32),590Fact::max_range_for_width(64),591];592match ty {593I8 => Some(&FACTS[0]),594I16 => Some(&FACTS[1]),595I32 => Some(&FACTS[2]),596I64 => Some(&FACTS[3]),597_ => None,598}599}600601/// Does this fact "propagate" automatically, i.e., cause602/// instructions that process it to infer their own output facts?603/// Not all facts propagate automatically; otherwise, verification604/// would be much slower.605pub fn propagates(&self) -> bool {606match self {607Fact::Mem { .. } => true,608_ => false,609}610}611612/// Is this a constant value of the given bitwidth? Return it as a613/// `Some(value)` if so.614pub fn as_const(&self, bits: u16) -> Option<u64> {615match self {616Fact::Range {617bit_width,618min,619max,620} if *bit_width == bits && min == max => Some(*min),621_ => None,622}623}624625/// Is this fact a single-value range with a symbolic Expr?626pub fn as_symbol(&self) -> Option<&Expr> {627match self {628Fact::DynamicRange { min, max, .. } if min == max => Some(min),629_ => None,630}631}632633/// Merge two facts. We take the *intersection*: that is, we know634/// both facts to be true, so we can intersect ranges. (This635/// differs from the usual static analysis approach, where we are636/// merging multiple possibilities into a generalized / widened637/// fact. We want to narrow here.)638pub fn intersect(a: &Fact, b: &Fact) -> Fact {639match (a, b) {640(641Fact::Range {642bit_width: bw_lhs,643min: min_lhs,644max: max_lhs,645},646Fact::Range {647bit_width: bw_rhs,648min: min_rhs,649max: max_rhs,650},651) if bw_lhs == bw_rhs && max_lhs >= min_rhs && max_rhs >= min_lhs => Fact::Range {652bit_width: *bw_lhs,653min: std::cmp::max(*min_lhs, *min_rhs),654max: std::cmp::min(*max_lhs, *max_rhs),655},656657(658Fact::DynamicRange {659bit_width: bw_lhs,660min: min_lhs,661max: max_lhs,662},663Fact::DynamicRange {664bit_width: bw_rhs,665min: min_rhs,666max: max_rhs,667},668) if bw_lhs == bw_rhs && Expr::le(min_rhs, max_lhs) && Expr::le(min_lhs, max_rhs) => {669Fact::DynamicRange {670bit_width: *bw_lhs,671min: Expr::max(min_lhs, min_rhs),672max: Expr::min(max_lhs, max_rhs),673}674}675676(677Fact::Mem {678ty: ty_lhs,679min_offset: min_offset_lhs,680max_offset: max_offset_lhs,681nullable: nullable_lhs,682},683Fact::Mem {684ty: ty_rhs,685min_offset: min_offset_rhs,686max_offset: max_offset_rhs,687nullable: nullable_rhs,688},689) if ty_lhs == ty_rhs690&& max_offset_lhs >= min_offset_rhs691&& max_offset_rhs >= min_offset_lhs =>692{693Fact::Mem {694ty: *ty_lhs,695min_offset: std::cmp::max(*min_offset_lhs, *min_offset_rhs),696max_offset: std::cmp::min(*max_offset_lhs, *max_offset_rhs),697nullable: *nullable_lhs && *nullable_rhs,698}699}700701(702Fact::DynamicMem {703ty: ty_lhs,704min: min_lhs,705max: max_lhs,706nullable: null_lhs,707},708Fact::DynamicMem {709ty: ty_rhs,710min: min_rhs,711max: max_rhs,712nullable: null_rhs,713},714) if ty_lhs == ty_rhs && Expr::le(min_rhs, max_lhs) && Expr::le(min_lhs, max_rhs) => {715Fact::DynamicMem {716ty: *ty_lhs,717min: Expr::max(min_lhs, min_rhs),718max: Expr::min(max_lhs, max_rhs),719nullable: *null_lhs && *null_rhs,720}721}722723_ => Fact::Conflict,724}725}726}727728macro_rules! ensure {729( $condition:expr, $err:tt $(,)? ) => {730if !$condition {731return Err(PccError::$err);732}733};734}735736macro_rules! bail {737( $err:tt ) => {{738return Err(PccError::$err);739}};740}741742/// The two kinds of inequalities: "strict" (`<`, `>`) and "loose"743/// (`<=`, `>=`), the latter of which admit equality.744#[derive(Clone, Copy, Debug, PartialEq, Eq)]745pub enum InequalityKind {746/// Strict inequality: {less,greater}-than.747Strict,748/// Loose inequality: {less,greater}-than-or-equal.749Loose,750}751752/// A "context" in which we can evaluate and derive facts. This753/// context carries environment/global properties, such as the machine754/// pointer width.755pub struct FactContext<'a> {756function: &'a ir::Function,757pointer_width: u16,758}759760impl<'a> FactContext<'a> {761/// Create a new "fact context" in which to evaluate facts.762pub fn new(function: &'a ir::Function, pointer_width: u16) -> Self {763FactContext {764function,765pointer_width,766}767}768769/// Computes whether `lhs` "subsumes" (implies) `rhs`.770pub fn subsumes(&self, lhs: &Fact, rhs: &Fact) -> bool {771match (lhs, rhs) {772// Reflexivity.773(l, r) if l == r => true,774775(776Fact::Range {777bit_width: bw_lhs,778min: min_lhs,779max: max_lhs,780},781Fact::Range {782bit_width: bw_rhs,783min: min_rhs,784max: max_rhs,785},786) => {787// If the bitwidths we're claiming facts about are the788// same, or the left-hand-side makes a claim about a789// wider bitwidth, and if the right-hand-side range is790// larger than the left-hand-side range, than the LHS791// subsumes the RHS.792//793// In other words, we can always expand the claimed794// possible value range.795bw_lhs >= bw_rhs && max_lhs <= max_rhs && min_lhs >= min_rhs796}797798(799Fact::DynamicRange {800bit_width: bw_lhs,801min: min_lhs,802max: max_lhs,803},804Fact::DynamicRange {805bit_width: bw_rhs,806min: min_rhs,807max: max_rhs,808},809) => {810// Nearly same as above, but with dynamic-expression811// comparisons. Note that we require equal bitwidths812// here: unlike in the static case, we don't have813// fixed values for min and max, so we can't lean on814// the well-formedness requirements of the static815// ranges fitting within the bit-width max.816bw_lhs == bw_rhs && Expr::le(max_lhs, max_rhs) && Expr::le(min_rhs, min_lhs)817}818819(820Fact::Mem {821ty: ty_lhs,822min_offset: min_offset_lhs,823max_offset: max_offset_lhs,824nullable: nullable_lhs,825},826Fact::Mem {827ty: ty_rhs,828min_offset: min_offset_rhs,829max_offset: max_offset_rhs,830nullable: nullable_rhs,831},832) => {833ty_lhs == ty_rhs834&& max_offset_lhs <= max_offset_rhs835&& min_offset_lhs >= min_offset_rhs836&& (*nullable_lhs || !*nullable_rhs)837}838839(840Fact::DynamicMem {841ty: ty_lhs,842min: min_lhs,843max: max_lhs,844nullable: nullable_lhs,845},846Fact::DynamicMem {847ty: ty_rhs,848min: min_rhs,849max: max_rhs,850nullable: nullable_rhs,851},852) => {853ty_lhs == ty_rhs854&& Expr::le(max_lhs, max_rhs)855&& Expr::le(min_rhs, min_lhs)856&& (*nullable_lhs || !*nullable_rhs)857}858859// Constant zero subsumes nullable DynamicMem pointers.860(861Fact::Range {862bit_width,863min: 0,864max: 0,865},866Fact::DynamicMem { nullable: true, .. },867) if *bit_width == self.pointer_width => true,868869// Any fact subsumes a Def, because the Def makes no870// claims about the actual value (it ties a symbol to that871// value, but the value is fed to the symbol, not the872// other way around).873(_, Fact::Def { .. }) => true,874875_ => false,876}877}878879/// Computes whether the optional fact `lhs` subsumes (implies)880/// the optional fact `lhs`. A `None` never subsumes any fact, and881/// is always subsumed by any fact at all (or no fact).882pub fn subsumes_fact_optionals(&self, lhs: Option<&Fact>, rhs: Option<&Fact>) -> bool {883match (lhs, rhs) {884(None, None) => true,885(Some(_), None) => true,886(None, Some(_)) => false,887(Some(lhs), Some(rhs)) => self.subsumes(lhs, rhs),888}889}890891/// Computes whatever fact can be known about the sum of two892/// values with attached facts. The add is performed to the given893/// bit-width. Note that this is distinct from the machine or894/// pointer width: e.g., many 64-bit machines can still do 32-bit895/// adds that wrap at 2^32.896pub fn add(&self, lhs: &Fact, rhs: &Fact, add_width: u16) -> Option<Fact> {897let result = match (lhs, rhs) {898(899Fact::Range {900bit_width: bw_lhs,901min: min_lhs,902max: max_lhs,903},904Fact::Range {905bit_width: bw_rhs,906min: min_rhs,907max: max_rhs,908},909) if bw_lhs == bw_rhs && add_width >= *bw_lhs => {910let computed_min = min_lhs.checked_add(*min_rhs)?;911let computed_max = max_lhs.checked_add(*max_rhs)?;912let computed_max = std::cmp::min(max_value_for_width(add_width), computed_max);913Some(Fact::Range {914bit_width: *bw_lhs,915min: computed_min,916max: computed_max,917})918}919920(921Fact::Range {922bit_width: bw_max,923min,924max,925},926Fact::Mem {927ty,928min_offset,929max_offset,930nullable,931},932)933| (934Fact::Mem {935ty,936min_offset,937max_offset,938nullable,939},940Fact::Range {941bit_width: bw_max,942min,943max,944},945) if *bw_max >= self.pointer_width946&& add_width >= *bw_max947&& (!*nullable || *max == 0) =>948{949let min_offset = min_offset.checked_add(*min)?;950let max_offset = max_offset.checked_add(*max)?;951Some(Fact::Mem {952ty: *ty,953min_offset,954max_offset,955nullable: false,956})957}958959(960Fact::Range {961bit_width: bw_static,962min: min_static,963max: max_static,964},965Fact::DynamicRange {966bit_width: bw_dynamic,967min: min_dynamic,968max: max_dynamic,969},970)971| (972Fact::DynamicRange {973bit_width: bw_dynamic,974min: min_dynamic,975max: max_dynamic,976},977Fact::Range {978bit_width: bw_static,979min: min_static,980max: max_static,981},982) if bw_static == bw_dynamic => {983let min = Expr::offset(min_dynamic, i64::try_from(*min_static).ok()?)?;984let max = Expr::offset(max_dynamic, i64::try_from(*max_static).ok()?)?;985Some(Fact::DynamicRange {986bit_width: *bw_dynamic,987min,988max,989})990}991992(993Fact::DynamicMem {994ty,995min: min_mem,996max: max_mem,997nullable: false,998},999Fact::DynamicRange {1000bit_width,1001min: min_range,1002max: max_range,1003},1004)1005| (1006Fact::DynamicRange {1007bit_width,1008min: min_range,1009max: max_range,1010},1011Fact::DynamicMem {1012ty,1013min: min_mem,1014max: max_mem,1015nullable: false,1016},1017) if *bit_width == self.pointer_width => {1018let min = Expr::add(min_mem, min_range)?;1019let max = Expr::add(max_mem, max_range)?;1020Some(Fact::DynamicMem {1021ty: *ty,1022min,1023max,1024nullable: false,1025})1026}10271028(1029Fact::Mem {1030ty,1031min_offset,1032max_offset,1033nullable: false,1034},1035Fact::DynamicRange {1036bit_width,1037min: min_range,1038max: max_range,1039},1040)1041| (1042Fact::DynamicRange {1043bit_width,1044min: min_range,1045max: max_range,1046},1047Fact::Mem {1048ty,1049min_offset,1050max_offset,1051nullable: false,1052},1053) if *bit_width == self.pointer_width => {1054let min = Expr::offset(min_range, i64::try_from(*min_offset).ok()?)?;1055let max = Expr::offset(max_range, i64::try_from(*max_offset).ok()?)?;1056Some(Fact::DynamicMem {1057ty: *ty,1058min,1059max,1060nullable: false,1061})1062}10631064(1065Fact::Range {1066bit_width: bw_static,1067min: min_static,1068max: max_static,1069},1070Fact::DynamicMem {1071ty,1072min: min_dynamic,1073max: max_dynamic,1074nullable,1075},1076)1077| (1078Fact::DynamicMem {1079ty,1080min: min_dynamic,1081max: max_dynamic,1082nullable,1083},1084Fact::Range {1085bit_width: bw_static,1086min: min_static,1087max: max_static,1088},1089) if *bw_static == self.pointer_width && (!*nullable || *max_static == 0) => {1090let min = Expr::offset(min_dynamic, i64::try_from(*min_static).ok()?)?;1091let max = Expr::offset(max_dynamic, i64::try_from(*max_static).ok()?)?;1092Some(Fact::DynamicMem {1093ty: *ty,1094min,1095max,1096nullable: false,1097})1098}10991100_ => None,1101};11021103trace!("add: {lhs:?} + {rhs:?} -> {result:?}");1104result1105}11061107/// Computes the `uextend` of a value with the given facts.1108pub fn uextend(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option<Fact> {1109if from_width == to_width {1110return Some(fact.clone());1111}11121113let result = match fact {1114// If the claim is already for a same-or-wider value and the min1115// and max are within range of the narrower value, we can1116// claim the same range.1117Fact::Range {1118bit_width,1119min,1120max,1121} if *bit_width >= from_width1122&& *min <= max_value_for_width(from_width)1123&& *max <= max_value_for_width(from_width) =>1124{1125Some(Fact::Range {1126bit_width: to_width,1127min: *min,1128max: *max,1129})1130}11311132// If the claim is a dynamic range for the from-width, we1133// can extend to the to-width.1134Fact::DynamicRange {1135bit_width,1136min,1137max,1138} if *bit_width == from_width => Some(Fact::DynamicRange {1139bit_width: to_width,1140min: min.clone(),1141max: max.clone(),1142}),11431144// If the claim is a definition of a value, we can say1145// that the output has a range of exactly that value.1146Fact::Def { value } => Some(Fact::value(to_width, *value)),11471148// Otherwise, we can at least claim that the value is1149// within the range of `from_width`.1150Fact::Range { .. } => Some(Fact::max_range_for_width_extended(from_width, to_width)),11511152_ => None,1153};1154trace!("uextend: fact {fact:?} from {from_width} to {to_width} -> {result:?}");1155result1156}11571158/// Computes the `sextend` of a value with the given facts.1159pub fn sextend(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option<Fact> {1160match fact {1161// If we have a defined value in bits 0..bit_width, and1162// the MSB w.r.t. `from_width` is *not* set, then we can1163// do the same as `uextend`.1164Fact::Range {1165bit_width,1166// We can ignore `min`: it is always <= max in1167// unsigned terms, and we check max's LSB below.1168min: _,1169max,1170} if *bit_width == from_width && (*max & (1 << (*bit_width - 1)) == 0) => {1171self.uextend(fact, from_width, to_width)1172}1173_ => None,1174}1175}11761177/// Computes the bit-truncation of a value with the given fact.1178pub fn truncate(&self, fact: &Fact, from_width: u16, to_width: u16) -> Option<Fact> {1179if from_width == to_width {1180return Some(fact.clone());1181}11821183trace!(1184"truncate: fact {:?} from {} to {}",1185fact, from_width, to_width1186);11871188match fact {1189Fact::Range {1190bit_width,1191min,1192max,1193} if *bit_width == from_width => {1194let max_val = (1u64 << to_width) - 1;1195if *min <= max_val && *max <= max_val {1196Some(Fact::Range {1197bit_width: to_width,1198min: *min,1199max: *max,1200})1201} else {1202Some(Fact::Range {1203bit_width: to_width,1204min: 0,1205max: max_val,1206})1207}1208}1209_ => None,1210}1211}12121213/// Scales a value with a fact by a known constant.1214pub fn scale(&self, fact: &Fact, width: u16, factor: u32) -> Option<Fact> {1215let result = match fact {1216x if factor == 1 => Some(x.clone()),12171218Fact::Range {1219bit_width,1220min,1221max,1222} if *bit_width == width => {1223let min = min.checked_mul(u64::from(factor))?;1224let max = max.checked_mul(u64::from(factor))?;1225if *bit_width < 64 && max > max_value_for_width(width) {1226return None;1227}1228Some(Fact::Range {1229bit_width: *bit_width,1230min,1231max,1232})1233}1234_ => None,1235};1236trace!("scale: {fact:?} * {factor} at width {width} -> {result:?}");1237result1238}12391240/// Left-shifts a value with a fact by a known constant.1241pub fn shl(&self, fact: &Fact, width: u16, amount: u16) -> Option<Fact> {1242if amount >= 32 {1243return None;1244}1245let factor: u32 = 1 << amount;1246self.scale(fact, width, factor)1247}12481249/// Offsets a value with a fact by a known amount.1250pub fn offset(&self, fact: &Fact, width: u16, offset: i64) -> Option<Fact> {1251if offset == 0 {1252return Some(fact.clone());1253}12541255let compute_offset = |base: u64| -> Option<u64> {1256if offset >= 0 {1257base.checked_add(u64::try_from(offset).unwrap())1258} else {1259base.checked_sub(u64::try_from(-offset).unwrap())1260}1261};12621263let result = match fact {1264Fact::Range {1265bit_width,1266min,1267max,1268} if *bit_width == width => {1269let min = compute_offset(*min)?;1270let max = compute_offset(*max)?;1271Some(Fact::Range {1272bit_width: *bit_width,1273min,1274max,1275})1276}1277Fact::DynamicRange {1278bit_width,1279min,1280max,1281} if *bit_width == width => {1282let min = Expr::offset(min, offset)?;1283let max = Expr::offset(max, offset)?;1284Some(Fact::DynamicRange {1285bit_width: *bit_width,1286min,1287max,1288})1289}1290Fact::Mem {1291ty,1292min_offset: mem_min_offset,1293max_offset: mem_max_offset,1294nullable: false,1295} => {1296let min_offset = compute_offset(*mem_min_offset)?;1297let max_offset = compute_offset(*mem_max_offset)?;1298Some(Fact::Mem {1299ty: *ty,1300min_offset,1301max_offset,1302nullable: false,1303})1304}1305Fact::DynamicMem {1306ty,1307min,1308max,1309nullable: false,1310} => {1311let min = Expr::offset(min, offset)?;1312let max = Expr::offset(max, offset)?;1313Some(Fact::DynamicMem {1314ty: *ty,1315min,1316max,1317nullable: false,1318})1319}1320_ => None,1321};1322trace!("offset: {fact:?} + {offset} in width {width} -> {result:?}");1323result1324}13251326/// Check that accessing memory via a pointer with this fact, with1327/// a memory access of the given size, is valid.1328///1329/// If valid, returns the memory type and offset into that type1330/// that this address accesses, if known, or `None` if the range1331/// doesn't constrain the access to exactly one location.1332fn check_address(&self, fact: &Fact, size: u32) -> PccResult<Option<(ir::MemoryType, u64)>> {1333trace!("check_address: fact {:?} size {}", fact, size);1334match fact {1335Fact::Mem {1336ty,1337min_offset,1338max_offset,1339nullable: _,1340} => {1341let end_offset: u64 = max_offset1342.checked_add(u64::from(size))1343.ok_or(PccError::Overflow)?;1344match &self.function.memory_types[*ty] {1345ir::MemoryTypeData::Struct { size, .. }1346| ir::MemoryTypeData::Memory { size } => {1347ensure!(end_offset <= *size, OutOfBounds)1348}1349ir::MemoryTypeData::DynamicMemory { .. } => bail!(OutOfBounds),1350ir::MemoryTypeData::Empty => bail!(OutOfBounds),1351}1352let specific_ty_and_offset = if min_offset == max_offset {1353Some((*ty, *min_offset))1354} else {1355None1356};1357Ok(specific_ty_and_offset)1358}1359Fact::DynamicMem {1360ty,1361min: _,1362max:1363Expr {1364base: BaseExpr::GlobalValue(max_gv),1365offset: max_offset,1366},1367nullable: _,1368} => match &self.function.memory_types[*ty] {1369ir::MemoryTypeData::DynamicMemory {1370gv,1371size: mem_static_size,1372} if gv == max_gv => {1373let end_offset = max_offset1374.checked_add(i64::from(size))1375.ok_or(PccError::Overflow)?;1376let mem_static_size =1377i64::try_from(*mem_static_size).map_err(|_| PccError::Overflow)?;1378ensure!(end_offset <= mem_static_size, OutOfBounds);1379Ok(None)1380}1381_ => bail!(OutOfBounds),1382},1383_ => bail!(OutOfBounds),1384}1385}13861387/// Get the access struct field, if any, by a pointer with the1388/// given fact and an access of the given type.1389pub fn struct_field<'b>(1390&'b self,1391fact: &Fact,1392access_ty: ir::Type,1393) -> PccResult<Option<&'b ir::MemoryTypeField>> {1394let (ty, offset) = match self.check_address(fact, access_ty.bytes())? {1395Some((ty, offset)) => (ty, offset),1396None => return Ok(None),1397};13981399if let ir::MemoryTypeData::Struct { fields, .. } = &self.function.memory_types[ty] {1400let field = fields1401.iter()1402.find(|field| field.offset == offset)1403.ok_or(PccError::InvalidFieldOffset)?;1404if field.ty != access_ty {1405bail!(BadFieldType);1406}1407Ok(Some(field))1408} else {1409// Access to valid memory, but not a struct: no facts can be attached to the result.1410Ok(None)1411}1412}14131414/// Check a load, and determine what fact, if any, the result of the load might have.1415pub fn load<'b>(&'b self, fact: &Fact, access_ty: ir::Type) -> PccResult<Option<&'b Fact>> {1416Ok(self1417.struct_field(fact, access_ty)?1418.and_then(|field| field.fact()))1419}14201421/// Check a store.1422pub fn store(1423&self,1424fact: &Fact,1425access_ty: ir::Type,1426data_fact: Option<&Fact>,1427) -> PccResult<()> {1428if let Some(field) = self.struct_field(fact, access_ty)? {1429// If it's a read-only field, disallow.1430if field.readonly {1431bail!(WriteToReadOnlyField);1432}1433// Check that the fact on the stored data subsumes the field's fact.1434if !self.subsumes_fact_optionals(data_fact, field.fact()) {1435bail!(InvalidStoredFact);1436}1437}1438Ok(())1439}14401441/// Apply a known inequality to rewrite dynamic bounds using transitivity, if possible.1442///1443/// Given that `lhs >= rhs` (if not `strict`) or `lhs > rhs` (if1444/// `strict`), update `fact`.1445pub fn apply_inequality(1446&self,1447fact: &Fact,1448lhs: &Fact,1449rhs: &Fact,1450kind: InequalityKind,1451) -> Fact {1452let result = match (1453lhs.as_symbol(),1454lhs.as_const(self.pointer_width)1455.and_then(|k| i64::try_from(k).ok()),1456rhs.as_symbol(),1457fact,1458) {1459(1460Some(lhs),1461None,1462Some(rhs),1463Fact::DynamicMem {1464ty,1465min,1466max,1467nullable,1468},1469) if rhs.base == max.base => {1470let strict_offset = match kind {1471InequalityKind::Strict => 1,1472InequalityKind::Loose => 0,1473};1474if let Some(offset) = max1475.offset1476.checked_add(lhs.offset)1477.and_then(|x| x.checked_sub(rhs.offset))1478.and_then(|x| x.checked_sub(strict_offset))1479{1480let new_max = Expr {1481base: lhs.base.clone(),1482offset,1483};1484Fact::DynamicMem {1485ty: *ty,1486min: min.clone(),1487max: new_max,1488nullable: *nullable,1489}1490} else {1491fact.clone()1492}1493}14941495(1496None,1497Some(lhs_const),1498Some(rhs),1499Fact::DynamicMem {1500ty,1501min: _,1502max,1503nullable,1504},1505) if rhs.base == max.base => {1506let strict_offset = match kind {1507InequalityKind::Strict => 1,1508InequalityKind::Loose => 0,1509};1510if let Some(offset) = max1511.offset1512.checked_add(lhs_const)1513.and_then(|x| x.checked_sub(rhs.offset))1514.and_then(|x| x.checked_sub(strict_offset))1515{1516Fact::Mem {1517ty: *ty,1518min_offset: 0,1519max_offset: u64::try_from(offset).unwrap_or(0),1520nullable: *nullable,1521}1522} else {1523fact.clone()1524}1525}15261527_ => fact.clone(),1528};1529trace!("apply_inequality({fact:?}, {lhs:?}, {rhs:?}, {kind:?} -> {result:?}");1530result1531}15321533/// Compute the union of two facts, if possible.1534pub fn union(&self, lhs: &Fact, rhs: &Fact) -> Option<Fact> {1535let result = match (lhs, rhs) {1536(lhs, rhs) if lhs == rhs => Some(lhs.clone()),15371538(1539Fact::DynamicMem {1540ty: ty_lhs,1541min: min_lhs,1542max: max_lhs,1543nullable: nullable_lhs,1544},1545Fact::DynamicMem {1546ty: ty_rhs,1547min: min_rhs,1548max: max_rhs,1549nullable: nullable_rhs,1550},1551) if ty_lhs == ty_rhs => Some(Fact::DynamicMem {1552ty: *ty_lhs,1553min: Expr::min(min_lhs, min_rhs),1554max: Expr::max(max_lhs, max_rhs),1555nullable: *nullable_lhs || *nullable_rhs,1556}),15571558(1559Fact::Range {1560bit_width: bw_const,1561min: 0,1562max: 0,1563},1564Fact::DynamicMem {1565ty,1566min,1567max,1568nullable: _,1569},1570)1571| (1572Fact::DynamicMem {1573ty,1574min,1575max,1576nullable: _,1577},1578Fact::Range {1579bit_width: bw_const,1580min: 0,1581max: 0,1582},1583) if *bw_const == self.pointer_width => Some(Fact::DynamicMem {1584ty: *ty,1585min: min.clone(),1586max: max.clone(),1587nullable: true,1588}),15891590(1591Fact::Range {1592bit_width: bw_const,1593min: 0,1594max: 0,1595},1596Fact::Mem {1597ty,1598min_offset,1599max_offset,1600nullable: _,1601},1602)1603| (1604Fact::Mem {1605ty,1606min_offset,1607max_offset,1608nullable: _,1609},1610Fact::Range {1611bit_width: bw_const,1612min: 0,1613max: 0,1614},1615) if *bw_const == self.pointer_width => Some(Fact::Mem {1616ty: *ty,1617min_offset: *min_offset,1618max_offset: *max_offset,1619nullable: true,1620}),16211622_ => None,1623};1624trace!("union({lhs:?}, {rhs:?}) -> {result:?}");1625result1626}1627}16281629fn max_value_for_width(bits: u16) -> u64 {1630assert!(bits <= 64);1631if bits == 64 {1632u64::MAX1633} else {1634(1u64 << bits) - 11635}1636}16371638/// Top-level entry point after compilation: this checks the facts in1639/// VCode.1640pub fn check_vcode_facts<B: LowerBackend + TargetIsa>(1641f: &ir::Function,1642vcode: &mut VCode<B::MInst>,1643backend: &B,1644) -> PccResult<()> {1645let ctx = FactContext::new(f, backend.triple().pointer_width().unwrap().bits().into());16461647// Check that individual instructions are valid according to input1648// facts, and support the stated output facts.1649for block in 0..vcode.num_blocks() {1650let block = BlockIndex::new(block);1651let mut flow_state = B::FactFlowState::default();1652for inst in vcode.block_insns(block).iter() {1653// Check any output facts on this inst.1654if let Err(e) = backend.check_fact(&ctx, vcode, inst, &mut flow_state) {1655log::info!("Error checking instruction: {:?}", vcode[inst]);1656return Err(e);1657}16581659// If this is a branch, check that all block arguments subsume1660// the assumed facts on the blockparams of successors.1661if vcode.is_branch(inst) {1662for (succ_idx, succ) in vcode.block_succs(block).iter().enumerate() {1663for (arg, param) in vcode1664.branch_blockparams(block, inst, succ_idx)1665.iter()1666.zip(vcode.block_params(*succ).iter())1667{1668let arg_fact = vcode.vreg_fact(*arg);1669let param_fact = vcode.vreg_fact(*param);1670if !ctx.subsumes_fact_optionals(arg_fact, param_fact) {1671return Err(PccError::UnsupportedBlockparam);1672}1673}1674}1675}1676}1677}1678Ok(())1679}168016811682