Path: blob/main/crates/polars-plan/src/plans/visitor/hash.rs
8479 views
use std::hash::{Hash, Hasher};1use std::sync::Arc;23use polars_utils::arena::Arena;45use super::*;6#[cfg(feature = "python")]7use crate::plans::PythonOptions;8use crate::plans::{AExpr, IR};9use crate::prelude::aexpr::traverse_and_hash_aexpr;10use crate::prelude::{ExprIR, PlanCallback};1112impl IRNode {13pub(crate) fn hashable_and_cmp<'a>(14&'a self,15lp_arena: &'a Arena<IR>,16expr_arena: &'a Arena<AExpr>,17) -> IRHashWrap<'a> {18IRHashWrap {19node: *self,20lp_arena,21expr_arena,22hash_as_equality: false,23}24}25}2627pub(crate) struct IRHashWrap<'a> {28node: IRNode,29lp_arena: &'a Arena<IR>,30expr_arena: &'a Arena<AExpr>,31hash_as_equality: bool,32}3334impl IRHashWrap<'_> {35pub fn hash_as_equality(mut self) -> Self {36self.hash_as_equality = true;37self38}39}4041fn hash_option_expr<H: Hasher>(expr: &Option<ExprIR>, expr_arena: &Arena<AExpr>, state: &mut H) {42if let Some(e) = expr {43e.traverse_and_hash(expr_arena, state)44}45}4647fn hash_exprs<H: Hasher>(exprs: &[ExprIR], expr_arena: &Arena<AExpr>, state: &mut H) {48for e in exprs {49e.traverse_and_hash(expr_arena, state);50}51}5253/// Specialized Hash that dispatches to `ExprIR::traverse_and_hash` instead of just hashing54/// the `Node`.55#[cfg(feature = "python")]56fn hash_python_predicate<H: Hasher>(57pred: &crate::prelude::PythonPredicate,58expr_arena: &Arena<AExpr>,59state: &mut H,60) {61use crate::prelude::PythonPredicate;62std::mem::discriminant(pred).hash(state);63match pred {64PythonPredicate::None => {},65PythonPredicate::PyArrow(s) => s.hash(state),66PythonPredicate::Polars(e) => e.traverse_and_hash(expr_arena, state),67}68}6970impl Hash for IRHashWrap<'_> {71// This hashes the variant, not the whole plan72fn hash<H: Hasher>(&self, state: &mut H) {73let alp = self.node.to_alp(self.lp_arena);74std::mem::discriminant(alp).hash(state);75match alp {76#[cfg(feature = "python")]77IR::PythonScan {78options:79PythonOptions {80scan_fn,81schema,82output_schema,83with_columns,84python_source,85n_rows,86predicate,87validate_schema,88is_pure,89},90} => {91// Hash the Python function object using the pointer to the object92// This should be the same as calling id() in python, but we don't need the GIL9394use std::sync::atomic::AtomicU64;95static UNIQUE_COUNT: AtomicU64 = AtomicU64::new(0);96if let Some(scan_fn) = scan_fn {97let ptr_addr = scan_fn.0.as_ptr() as usize;98ptr_addr.hash(state);99}100// Hash the stable fields101// We include the schema since it can be set by the user102schema.hash(state);103output_schema.hash(state);104with_columns.hash(state);105python_source.hash(state);106n_rows.hash(state);107hash_python_predicate(predicate, self.expr_arena, state);108validate_schema.hash(state);109110if self.hash_as_equality && !*is_pure {111let val = UNIQUE_COUNT.fetch_add(1, std::sync::atomic::Ordering::Relaxed);112val.hash(state)113} else {114is_pure.hash(state)115}116},117IR::Slice {118offset,119len,120input: _,121} => {122len.hash(state);123offset.hash(state);124},125IR::Filter {126input: _,127predicate,128} => {129predicate.traverse_and_hash(self.expr_arena, state);130},131IR::Scan {132sources,133file_info: _,134hive_parts: _,135predicate,136predicate_file_skip_applied: _,137output_schema: _,138scan_type,139unified_scan_args,140} => {141// We don't have to traverse the schema, hive partitions etc. as they are derivative from the paths.142scan_type.hash(state);143sources.hash(state);144hash_option_expr(predicate, self.expr_arena, state);145unified_scan_args.hash(state);146},147IR::DataFrameScan {148df,149schema: _,150output_schema,151..152} => {153(Arc::as_ptr(df) as usize).hash(state);154output_schema.hash(state);155},156IR::SimpleProjection { columns, input: _ } => {157columns.hash(state);158},159IR::Select {160input: _,161expr,162schema: _,163options,164} => {165hash_exprs(expr, self.expr_arena, state);166options.hash(state);167},168IR::Sort {169input: _,170by_column,171slice,172sort_options,173} => {174hash_exprs(by_column, self.expr_arena, state);175slice.hash(state);176sort_options.hash(state);177},178IR::GroupBy {179input: _,180keys,181aggs,182schema: _,183apply,184maintain_order,185options,186} => {187hash_exprs(keys, self.expr_arena, state);188hash_exprs(aggs, self.expr_arena, state);189190if let Some(function) = apply {191true.hash(state);192match function {193PlanCallback::Rust(f) => {194f.hash(state);195},196#[cfg(feature = "python")]197PlanCallback::Python(f) => {198f.hash(state);199},200}201}202203apply.is_none().hash(state);204maintain_order.hash(state);205options.hash(state);206},207IR::Join {208input_left: _,209input_right: _,210schema: _,211left_on,212right_on,213options,214} => {215hash_exprs(left_on, self.expr_arena, state);216hash_exprs(right_on, self.expr_arena, state);217options.hash(state);218},219IR::HStack {220input: _,221exprs,222schema: _,223options,224} => {225hash_exprs(exprs, self.expr_arena, state);226options.hash(state);227},228IR::Distinct { input: _, options } => {229options.hash(state);230},231IR::MapFunction { input: _, function } => {232function.hash(state);233},234IR::Union { inputs: _, options } => options.hash(state),235IR::HConcat {236inputs: _,237schema: _,238options,239} => {240options.hash(state);241},242IR::ExtContext {243input: _,244contexts,245schema: _,246} => {247for node in contexts {248traverse_and_hash_aexpr(*node, self.expr_arena, state);249}250},251IR::Sink { input: _, payload } => {252payload.traverse_and_hash(self.expr_arena, state);253},254IR::SinkMultiple { inputs: _ } => {},255IR::Cache { input: _, id } => {256id.hash(state);257},258#[cfg(feature = "merge_sorted")]259IR::MergeSorted {260input_left: _,261input_right: _,262key,263} => {264key.hash(state);265},266IR::Invalid => unreachable!(),267}268}269}270271272