Path: blob/main/crates/polars-stream/src/nodes/io_sources/multi_scan/config.rs
8475 views
use std::sync::{Arc, OnceLock};12use polars_core::schema::SchemaRef;3use polars_io::RowIndex;4use polars_io::cloud::CloudOptions;5use polars_io::predicates::ScanIOPredicate;6use polars_plan::dsl::deletion::DeletionFilesList;7use polars_plan::dsl::{8CastColumnsPolicy, MissingColumnsPolicy, PredicateFileSkip, ScanSources, TableStatistics,9};10use polars_plan::plans::hive::HivePartitionsDf;11use polars_utils::pl_str::PlSmallStr;12use polars_utils::relaxed_cell::RelaxedCell;13use polars_utils::slice_enum::Slice;14use reader_interface::builder::FileReaderBuilder;15use reader_interface::capabilities::ReaderCapabilities;1617use crate::metrics::IOMetrics;18use crate::nodes::io_sources::multi_scan::components::forbid_extra_columns::ForbidExtraColumns;19use crate::nodes::io_sources::multi_scan::components::projection::builder::ProjectionBuilder;20use crate::nodes::io_sources::multi_scan::reader_interface;2122// Some parts are called MultiScan for now to avoid conflict with existing MultiScan.2324pub struct MultiScanConfig {25pub sources: ScanSources,26pub file_reader_builder: Arc<dyn FileReaderBuilder>,27pub cloud_options: Option<Arc<CloudOptions>>,2829/// Final output schema of MultiScan node. Includes all e.g. row index / missing columns / file paths / hive etc.30pub final_output_schema: SchemaRef,31/// Columns to be projected from the file.32pub file_projection_builder: ProjectionBuilder,3334pub row_index: Option<RowIndex>,35pub pre_slice: Option<Slice>,36pub predicate: Option<ScanIOPredicate>,37pub predicate_file_skip_applied: Option<PredicateFileSkip>,3839pub hive_parts: Option<Arc<HivePartitionsDf>>,40pub include_file_paths: Option<PlSmallStr>,41pub missing_columns_policy: MissingColumnsPolicy,42pub cast_columns_policy: CastColumnsPolicy,43pub forbid_extra_columns: Option<ForbidExtraColumns>,44pub deletion_files: Option<DeletionFilesList>,45pub table_statistics: Option<TableStatistics>,4647pub num_pipelines: RelaxedCell<usize>,48/// Number of readers to initialize concurrently. e.g. Parquet will want to fetch metadata in this49/// step.50pub n_readers_pre_init: RelaxedCell<usize>,51pub max_concurrent_scans: RelaxedCell<usize>,52pub disable_morsel_split: bool,53pub io_metrics: OnceLock<Arc<IOMetrics>>,5455pub verbose: bool,56}5758impl MultiScanConfig {59pub fn num_pipelines(&self) -> usize {60self.num_pipelines.load()61}6263pub fn n_readers_pre_init(&self) -> usize {64self.n_readers_pre_init.load()65}6667pub fn max_concurrent_scans(&self) -> usize {68self.max_concurrent_scans.load()69}7071pub fn io_metrics(&self) -> Option<Arc<IOMetrics>> {72self.io_metrics.get().cloned()73}7475pub fn reader_capabilities(&self) -> ReaderCapabilities {76if std::env::var("POLARS_FORCE_EMPTY_READER_CAPABILITIES").as_deref() == Ok("1") {77self.file_reader_builder.reader_capabilities()78& ReaderCapabilities::NEEDS_FILE_CACHE_INIT79} else {80self.file_reader_builder.reader_capabilities()81}82}83}848586