Path: blob/main/crates/polars-plan/src/plans/ir/schema.rs
6940 views
use recursive::recursive;12use super::*;34impl IR {5/// Get the schema of the logical plan node but don't take projections into account at the scan6/// level. This ensures we can apply the predicate7pub(crate) fn scan_schema(&self) -> &SchemaRef {8use IR::*;9match self {10Scan { file_info, .. } => &file_info.schema,11#[cfg(feature = "python")]12PythonScan { options, .. } => &options.schema,13_ => unreachable!(),14}15}1617pub fn name(&self) -> &'static str {18use IR::*;19match self {20Scan { scan_type, .. } => (&**scan_type).into(),21#[cfg(feature = "python")]22PythonScan { .. } => "python_scan",23Slice { .. } => "slice",24Filter { .. } => "filter",25DataFrameScan { .. } => "df",26Select { .. } => "projection",27Sort { .. } => "sort",28Cache { .. } => "cache",29GroupBy { .. } => "aggregate",30Join { .. } => "join",31HStack { .. } => "hstack",32Distinct { .. } => "distinct",33MapFunction { .. } => "map_function",34Union { .. } => "union",35HConcat { .. } => "hconcat",36ExtContext { .. } => "ext_context",37Sink { payload, .. } => match payload {38SinkTypeIR::Memory => "sink (memory)",39SinkTypeIR::File { .. } => "sink (file)",40SinkTypeIR::Partition { .. } => "sink (partition)",41},42SinkMultiple { .. } => "sink multiple",43SimpleProjection { .. } => "simple_projection",44#[cfg(feature = "merge_sorted")]45MergeSorted { .. } => "merge_sorted",46Invalid => "invalid",47}48}4950pub fn input_schema<'a>(&'a self, arena: &'a Arena<IR>) -> Option<Cow<'a, SchemaRef>> {51use IR::*;52let schema = match self {53#[cfg(feature = "python")]54PythonScan { options } => &options.schema,55DataFrameScan { schema, .. } => schema,56Scan { file_info, .. } => &file_info.schema,57node => {58let input = node.get_input()?;59return Some(arena.get(input).schema(arena));60},61};62Some(Cow::Borrowed(schema))63}6465/// Get the schema of the logical plan node.66#[recursive]67pub fn schema<'a>(&'a self, arena: &'a Arena<IR>) -> Cow<'a, SchemaRef> {68use IR::*;69let schema = match self {70#[cfg(feature = "python")]71PythonScan { options } => options.output_schema.as_ref().unwrap_or(&options.schema),72Union { inputs, .. } => return arena.get(inputs[0]).schema(arena),73HConcat { schema, .. } => schema,74Cache { input, .. } => return arena.get(*input).schema(arena),75Sort { input, .. } => return arena.get(*input).schema(arena),76Scan {77output_schema,78file_info,79..80} => output_schema.as_ref().unwrap_or(&file_info.schema),81DataFrameScan {82schema,83output_schema,84..85} => output_schema.as_ref().unwrap_or(schema),86Filter { input, .. } => return arena.get(*input).schema(arena),87Select { schema, .. } => schema,88SimpleProjection { columns, .. } => columns,89GroupBy { schema, .. } => schema,90Join { schema, .. } => schema,91HStack { schema, .. } => schema,92Distinct { input, .. }93| Sink {94input,95payload: SinkTypeIR::Memory,96} => return arena.get(*input).schema(arena),97Sink { .. } | SinkMultiple { .. } => return Cow::Owned(Arc::new(Schema::default())),98Slice { input, .. } => return arena.get(*input).schema(arena),99MapFunction { input, function } => {100let input_schema = arena.get(*input).schema(arena);101102return match input_schema {103Cow::Owned(schema) => {104Cow::Owned(function.schema(&schema).unwrap().into_owned())105},106Cow::Borrowed(schema) => function.schema(schema).unwrap(),107};108},109ExtContext { schema, .. } => schema,110#[cfg(feature = "merge_sorted")]111MergeSorted { input_left, .. } => return arena.get(*input_left).schema(arena),112Invalid => unreachable!(),113};114Cow::Borrowed(schema)115}116117/// Get the schema of the logical plan node, using caching.118#[recursive]119pub fn schema_with_cache<'a>(120node: Node,121arena: &'a Arena<IR>,122cache: &mut PlHashMap<Node, Arc<Schema>>,123) -> Arc<Schema> {124use IR::*;125if let Some(schema) = cache.get(&node) {126return schema.clone();127}128129let schema = match arena.get(node) {130#[cfg(feature = "python")]131PythonScan { options } => options132.output_schema133.as_ref()134.unwrap_or(&options.schema)135.clone(),136Union { inputs, .. } => IR::schema_with_cache(inputs[0], arena, cache),137HConcat { schema, .. } => schema.clone(),138Cache { input, .. }139| Sort { input, .. }140| Filter { input, .. }141| Distinct { input, .. }142| Sink {143input,144payload: SinkTypeIR::Memory,145}146| Slice { input, .. } => IR::schema_with_cache(*input, arena, cache),147Sink { .. } | SinkMultiple { .. } => Arc::new(Schema::default()),148Scan {149output_schema,150file_info,151..152} => output_schema.as_ref().unwrap_or(&file_info.schema).clone(),153DataFrameScan {154schema,155output_schema,156..157} => output_schema.as_ref().unwrap_or(schema).clone(),158Select { schema, .. }159| GroupBy { schema, .. }160| Join { schema, .. }161| HStack { schema, .. }162| ExtContext { schema, .. }163| SimpleProjection {164columns: schema, ..165} => schema.clone(),166MapFunction { input, function } => {167let input_schema = IR::schema_with_cache(*input, arena, cache);168function.schema(&input_schema).unwrap().into_owned()169},170#[cfg(feature = "merge_sorted")]171MergeSorted { input_left, .. } => IR::schema_with_cache(*input_left, arena, cache),172Invalid => unreachable!(),173};174cache.insert(node, schema.clone());175schema176}177}178179180