Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-stream/src/nodes/io_sources/multi_scan/config.rs
8475 views
1
use std::sync::{Arc, OnceLock};
2
3
use polars_core::schema::SchemaRef;
4
use polars_io::RowIndex;
5
use polars_io::cloud::CloudOptions;
6
use polars_io::predicates::ScanIOPredicate;
7
use polars_plan::dsl::deletion::DeletionFilesList;
8
use polars_plan::dsl::{
9
CastColumnsPolicy, MissingColumnsPolicy, PredicateFileSkip, ScanSources, TableStatistics,
10
};
11
use polars_plan::plans::hive::HivePartitionsDf;
12
use polars_utils::pl_str::PlSmallStr;
13
use polars_utils::relaxed_cell::RelaxedCell;
14
use polars_utils::slice_enum::Slice;
15
use reader_interface::builder::FileReaderBuilder;
16
use reader_interface::capabilities::ReaderCapabilities;
17
18
use crate::metrics::IOMetrics;
19
use crate::nodes::io_sources::multi_scan::components::forbid_extra_columns::ForbidExtraColumns;
20
use crate::nodes::io_sources::multi_scan::components::projection::builder::ProjectionBuilder;
21
use crate::nodes::io_sources::multi_scan::reader_interface;
22
23
// Some parts are called MultiScan for now to avoid conflict with existing MultiScan.
24
25
pub struct MultiScanConfig {
26
pub sources: ScanSources,
27
pub file_reader_builder: Arc<dyn FileReaderBuilder>,
28
pub cloud_options: Option<Arc<CloudOptions>>,
29
30
/// Final output schema of MultiScan node. Includes all e.g. row index / missing columns / file paths / hive etc.
31
pub final_output_schema: SchemaRef,
32
/// Columns to be projected from the file.
33
pub file_projection_builder: ProjectionBuilder,
34
35
pub row_index: Option<RowIndex>,
36
pub pre_slice: Option<Slice>,
37
pub predicate: Option<ScanIOPredicate>,
38
pub predicate_file_skip_applied: Option<PredicateFileSkip>,
39
40
pub hive_parts: Option<Arc<HivePartitionsDf>>,
41
pub include_file_paths: Option<PlSmallStr>,
42
pub missing_columns_policy: MissingColumnsPolicy,
43
pub cast_columns_policy: CastColumnsPolicy,
44
pub forbid_extra_columns: Option<ForbidExtraColumns>,
45
pub deletion_files: Option<DeletionFilesList>,
46
pub table_statistics: Option<TableStatistics>,
47
48
pub num_pipelines: RelaxedCell<usize>,
49
/// Number of readers to initialize concurrently. e.g. Parquet will want to fetch metadata in this
50
/// step.
51
pub n_readers_pre_init: RelaxedCell<usize>,
52
pub max_concurrent_scans: RelaxedCell<usize>,
53
pub disable_morsel_split: bool,
54
pub io_metrics: OnceLock<Arc<IOMetrics>>,
55
56
pub verbose: bool,
57
}
58
59
impl MultiScanConfig {
60
pub fn num_pipelines(&self) -> usize {
61
self.num_pipelines.load()
62
}
63
64
pub fn n_readers_pre_init(&self) -> usize {
65
self.n_readers_pre_init.load()
66
}
67
68
pub fn max_concurrent_scans(&self) -> usize {
69
self.max_concurrent_scans.load()
70
}
71
72
pub fn io_metrics(&self) -> Option<Arc<IOMetrics>> {
73
self.io_metrics.get().cloned()
74
}
75
76
pub fn reader_capabilities(&self) -> ReaderCapabilities {
77
if std::env::var("POLARS_FORCE_EMPTY_READER_CAPABILITIES").as_deref() == Ok("1") {
78
self.file_reader_builder.reader_capabilities()
79
& ReaderCapabilities::NEEDS_FILE_CACHE_INIT
80
} else {
81
self.file_reader_builder.reader_capabilities()
82
}
83
}
84
}
85
86