Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-stream/src/nodes/io_sources/multi_scan/config.rs
6939 views
1
use std::sync::Arc;
2
3
use polars_core::schema::SchemaRef;
4
use polars_io::RowIndex;
5
use polars_io::cloud::CloudOptions;
6
use polars_io::predicates::ScanIOPredicate;
7
use polars_plan::dsl::deletion::DeletionFilesList;
8
use polars_plan::dsl::{CastColumnsPolicy, MissingColumnsPolicy, ScanSources};
9
use polars_plan::plans::hive::HivePartitionsDf;
10
use polars_utils::pl_str::PlSmallStr;
11
use polars_utils::relaxed_cell::RelaxedCell;
12
use polars_utils::slice_enum::Slice;
13
use reader_interface::builder::FileReaderBuilder;
14
use reader_interface::capabilities::ReaderCapabilities;
15
16
use crate::nodes::io_sources::multi_scan::components::forbid_extra_columns::ForbidExtraColumns;
17
use crate::nodes::io_sources::multi_scan::components::projection::builder::ProjectionBuilder;
18
use crate::nodes::io_sources::multi_scan::reader_interface;
19
20
// Some parts are called MultiScan for now to avoid conflict with existing MultiScan.
21
22
pub struct MultiScanConfig {
23
pub sources: ScanSources,
24
pub file_reader_builder: Arc<dyn FileReaderBuilder>,
25
pub cloud_options: Option<Arc<CloudOptions>>,
26
27
/// Final output schema of MultiScan node. Includes all e.g. row index / missing columns / file paths / hive etc.
28
pub final_output_schema: SchemaRef,
29
/// Columns to be projected from the file.
30
pub file_projection_builder: ProjectionBuilder,
31
32
pub row_index: Option<RowIndex>,
33
pub pre_slice: Option<Slice>,
34
pub predicate: Option<ScanIOPredicate>,
35
36
pub hive_parts: Option<Arc<HivePartitionsDf>>,
37
pub include_file_paths: Option<PlSmallStr>,
38
pub missing_columns_policy: MissingColumnsPolicy,
39
pub cast_columns_policy: CastColumnsPolicy,
40
pub forbid_extra_columns: Option<ForbidExtraColumns>,
41
pub deletion_files: Option<DeletionFilesList>,
42
43
pub num_pipelines: RelaxedCell<usize>,
44
/// Number of readers to initialize concurrently. e.g. Parquet will want to fetch metadata in this
45
/// step.
46
pub n_readers_pre_init: RelaxedCell<usize>,
47
pub max_concurrent_scans: RelaxedCell<usize>,
48
49
pub verbose: bool,
50
}
51
52
impl MultiScanConfig {
53
pub fn num_pipelines(&self) -> usize {
54
self.num_pipelines.load()
55
}
56
57
pub fn n_readers_pre_init(&self) -> usize {
58
self.n_readers_pre_init.load()
59
}
60
61
pub fn max_concurrent_scans(&self) -> usize {
62
self.max_concurrent_scans.load()
63
}
64
65
pub fn reader_capabilities(&self) -> ReaderCapabilities {
66
if std::env::var("POLARS_FORCE_EMPTY_READER_CAPABILITIES").as_deref() == Ok("1") {
67
self.file_reader_builder.reader_capabilities()
68
& ReaderCapabilities::NEEDS_FILE_CACHE_INIT
69
} else {
70
self.file_reader_builder.reader_capabilities()
71
}
72
}
73
}
74
75