Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/mod.rs
8430 views
1
mod aggregations;
2
mod arity;
3
#[cfg(all(feature = "strings", feature = "cse"))]
4
mod cse;
5
#[cfg(feature = "parquet")]
6
mod io;
7
mod logical;
8
mod optimization_checks;
9
#[cfg(all(feature = "strings", feature = "cse"))]
10
mod pdsh;
11
mod predicate_queries;
12
mod projection_queries;
13
mod queries;
14
mod schema;
15
16
fn get_arenas() -> (Arena<AExpr>, Arena<IR>) {
17
let expr_arena = Arena::with_capacity(16);
18
let lp_arena = Arena::with_capacity(8);
19
(expr_arena, lp_arena)
20
}
21
22
fn load_df() -> DataFrame {
23
df!("a" => &[1, 2, 3, 4, 5],
24
"b" => &["a", "a", "b", "c", "c"],
25
"c" => &[1, 2, 3, 4, 5]
26
)
27
.unwrap()
28
}
29
30
use std::io::Cursor;
31
32
#[cfg(feature = "temporal")]
33
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
34
use optimization_checks::*;
35
#[cfg(feature = "parquet")]
36
pub(crate) use polars_core::SINGLE_LOCK;
37
use polars_core::chunked_array::builder::get_list_builder;
38
use polars_core::df;
39
use polars_core::prelude::*;
40
use polars_io::prelude::*;
41
use polars_utils::pl_path::PlRefPath;
42
43
#[cfg(feature = "cov")]
44
use crate::dsl::pearson_corr;
45
use crate::prelude::*;
46
47
#[cfg(feature = "parquet")]
48
static GLOB_PARQUET: &str = "../../examples/datasets/*.parquet";
49
#[cfg(feature = "csv")]
50
static GLOB_CSV: &str = "../../examples/datasets/foods*.csv";
51
#[cfg(feature = "ipc")]
52
static GLOB_IPC: &str = "../../examples/datasets/*.ipc";
53
#[cfg(feature = "parquet")]
54
static FOODS_PARQUET: &str = "../../examples/datasets/foods1.parquet";
55
#[cfg(feature = "parquet")]
56
static NUTRI_SCORE_NULL_COLUMN_PARQUET: &str = "../../examples/datasets/null_nutriscore.parquet";
57
#[cfg(feature = "csv")]
58
static FOODS_CSV: &str = "../../examples/datasets/foods1.csv";
59
#[cfg(feature = "ipc")]
60
static FOODS_IPC: &str = "../../examples/datasets/foods1.ipc";
61
62
#[cfg(feature = "csv")]
63
fn scan_foods_csv() -> LazyFrame {
64
LazyCsvReader::new(PlRefPath::new(FOODS_CSV))
65
.finish()
66
.unwrap()
67
}
68
69
#[cfg(feature = "ipc")]
70
fn scan_foods_ipc() -> LazyFrame {
71
init_files();
72
LazyFrame::scan_ipc(
73
PlRefPath::new(FOODS_IPC),
74
Default::default(),
75
Default::default(),
76
)
77
.unwrap()
78
}
79
80
#[cfg(any(feature = "ipc", feature = "parquet"))]
81
fn init_files() {
82
if std::fs::OpenOptions::new()
83
.write(true)
84
.create_new(true)
85
.open("../../examples/datasets/busy")
86
.is_err()
87
{
88
while !std::fs::exists("../../examples/datasets/finished").unwrap() {}
89
return;
90
}
91
92
for path in &[
93
"../../examples/datasets/foods1.csv",
94
"../../examples/datasets/foods2.csv",
95
"../../examples/datasets/null_nutriscore.csv",
96
] {
97
for ext in [".parquet", ".ipc", ".ndjson"] {
98
let out_path = path.replace(".csv", ext);
99
100
if std::fs::metadata(&out_path).is_err() {
101
let mut df = CsvReadOptions::default()
102
.try_into_reader_with_file_path(Some(path.into()))
103
.unwrap()
104
.finish()
105
.unwrap();
106
let f = std::fs::File::create(&out_path).unwrap();
107
108
match ext {
109
".parquet" => {
110
#[cfg(feature = "parquet")]
111
{
112
ParquetWriter::new(f)
113
.with_statistics(StatisticsOptions::full())
114
.finish(&mut df)
115
.unwrap();
116
}
117
},
118
".ipc" => {
119
IpcWriter::new(f).finish(&mut df).unwrap();
120
},
121
".ndjson" => {
122
#[cfg(feature = "json")]
123
{
124
JsonWriter::new(f).finish(&mut df).unwrap()
125
}
126
},
127
_ => panic!(),
128
}
129
}
130
}
131
}
132
133
std::fs::OpenOptions::new()
134
.write(true)
135
.create_new(true)
136
.open("../../examples/datasets/finished")
137
.unwrap();
138
}
139
140
#[cfg(feature = "parquet")]
141
fn scan_foods_parquet(parallel: bool) -> LazyFrame {
142
init_files();
143
let out_path = FOODS_PARQUET;
144
let parallel = if parallel {
145
ParallelStrategy::Auto
146
} else {
147
ParallelStrategy::None
148
};
149
150
let args = ScanArgsParquet {
151
n_rows: None,
152
cache: false,
153
parallel,
154
rechunk: true,
155
..Default::default()
156
};
157
LazyFrame::scan_parquet(PlRefPath::new(out_path), args).unwrap()
158
}
159
160
#[cfg(feature = "parquet")]
161
fn scan_nutri_score_null_column_parquet(parallel: bool) -> LazyFrame {
162
init_files();
163
let out_path = NUTRI_SCORE_NULL_COLUMN_PARQUET;
164
let parallel = if parallel {
165
ParallelStrategy::Auto
166
} else {
167
ParallelStrategy::None
168
};
169
170
let args = ScanArgsParquet {
171
n_rows: None,
172
cache: false,
173
parallel,
174
rechunk: true,
175
..Default::default()
176
};
177
LazyFrame::scan_parquet(PlRefPath::new(out_path), args).unwrap()
178
}
179
180
pub(crate) fn fruits_cars() -> DataFrame {
181
df!(
182
"A"=> [1, 2, 3, 4, 5],
183
"fruits"=> ["banana", "banana", "apple", "apple", "banana"],
184
"B"=> [5, 4, 3, 2, 1],
185
"cars"=> ["beetle", "audi", "beetle", "beetle", "beetle"]
186
)
187
.unwrap()
188
}
189
190
pub(crate) fn get_df() -> DataFrame {
191
let s = r#"
192
"sepal_length","sepal_width","petal_length","petal_width","variety"
193
5.1,3.5,1.4,.2,"Setosa"
194
4.9,3,1.4,.2,"Setosa"
195
4.7,3.2,1.3,.2,"Setosa"
196
4.6,3.1,1.5,.2,"Setosa"
197
5,3.6,1.4,.2,"Setosa"
198
5.4,3.9,1.7,.4,"Setosa"
199
4.6,3.4,1.4,.3,"Setosa"
200
"#;
201
202
let file = Cursor::new(s);
203
204
CsvReadOptions::default()
205
.with_infer_schema_length(Some(3))
206
.with_has_header(true)
207
.into_reader_with_file_handle(file)
208
.finish()
209
.unwrap()
210
}
211
212