Path: blob/main/crates/polars-stream/src/nodes/io_sources/multi_scan/config.rs
6939 views
use std::sync::Arc;12use polars_core::schema::SchemaRef;3use polars_io::RowIndex;4use polars_io::cloud::CloudOptions;5use polars_io::predicates::ScanIOPredicate;6use polars_plan::dsl::deletion::DeletionFilesList;7use polars_plan::dsl::{CastColumnsPolicy, MissingColumnsPolicy, ScanSources};8use polars_plan::plans::hive::HivePartitionsDf;9use polars_utils::pl_str::PlSmallStr;10use polars_utils::relaxed_cell::RelaxedCell;11use polars_utils::slice_enum::Slice;12use reader_interface::builder::FileReaderBuilder;13use reader_interface::capabilities::ReaderCapabilities;1415use crate::nodes::io_sources::multi_scan::components::forbid_extra_columns::ForbidExtraColumns;16use crate::nodes::io_sources::multi_scan::components::projection::builder::ProjectionBuilder;17use crate::nodes::io_sources::multi_scan::reader_interface;1819// Some parts are called MultiScan for now to avoid conflict with existing MultiScan.2021pub struct MultiScanConfig {22pub sources: ScanSources,23pub file_reader_builder: Arc<dyn FileReaderBuilder>,24pub cloud_options: Option<Arc<CloudOptions>>,2526/// Final output schema of MultiScan node. Includes all e.g. row index / missing columns / file paths / hive etc.27pub final_output_schema: SchemaRef,28/// Columns to be projected from the file.29pub file_projection_builder: ProjectionBuilder,3031pub row_index: Option<RowIndex>,32pub pre_slice: Option<Slice>,33pub predicate: Option<ScanIOPredicate>,3435pub hive_parts: Option<Arc<HivePartitionsDf>>,36pub include_file_paths: Option<PlSmallStr>,37pub missing_columns_policy: MissingColumnsPolicy,38pub cast_columns_policy: CastColumnsPolicy,39pub forbid_extra_columns: Option<ForbidExtraColumns>,40pub deletion_files: Option<DeletionFilesList>,4142pub num_pipelines: RelaxedCell<usize>,43/// Number of readers to initialize concurrently. e.g. Parquet will want to fetch metadata in this44/// step.45pub n_readers_pre_init: RelaxedCell<usize>,46pub max_concurrent_scans: RelaxedCell<usize>,4748pub verbose: bool,49}5051impl MultiScanConfig {52pub fn num_pipelines(&self) -> usize {53self.num_pipelines.load()54}5556pub fn n_readers_pre_init(&self) -> usize {57self.n_readers_pre_init.load()58}5960pub fn max_concurrent_scans(&self) -> usize {61self.max_concurrent_scans.load()62}6364pub fn reader_capabilities(&self) -> ReaderCapabilities {65if std::env::var("POLARS_FORCE_EMPTY_READER_CAPABILITIES").as_deref() == Ok("1") {66self.file_reader_builder.reader_capabilities()67& ReaderCapabilities::NEEDS_FILE_CACHE_INIT68} else {69self.file_reader_builder.reader_capabilities()70}71}72}737475