Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/mod.rs
6939 views
1
mod aggregations;
2
mod arity;
3
#[cfg(all(feature = "strings", feature = "cse"))]
4
mod cse;
5
#[cfg(feature = "parquet")]
6
mod io;
7
mod logical;
8
mod optimization_checks;
9
#[cfg(all(feature = "strings", feature = "cse"))]
10
mod pdsh;
11
mod predicate_queries;
12
mod projection_queries;
13
mod queries;
14
mod schema;
15
16
fn get_arenas() -> (Arena<AExpr>, Arena<IR>) {
17
let expr_arena = Arena::with_capacity(16);
18
let lp_arena = Arena::with_capacity(8);
19
(expr_arena, lp_arena)
20
}
21
22
fn load_df() -> DataFrame {
23
df!("a" => &[1, 2, 3, 4, 5],
24
"b" => &["a", "a", "b", "c", "c"],
25
"c" => &[1, 2, 3, 4, 5]
26
)
27
.unwrap()
28
}
29
30
use std::io::Cursor;
31
32
#[cfg(feature = "temporal")]
33
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
34
use optimization_checks::*;
35
#[cfg(feature = "parquet")]
36
pub(crate) use polars_core::SINGLE_LOCK;
37
use polars_core::chunked_array::builder::get_list_builder;
38
use polars_core::df;
39
use polars_core::prelude::*;
40
use polars_io::prelude::*;
41
use polars_utils::plpath::PlPath;
42
43
#[cfg(feature = "cov")]
44
use crate::dsl::pearson_corr;
45
use crate::prelude::*;
46
47
#[cfg(feature = "parquet")]
48
static GLOB_PARQUET: &str = "../../examples/datasets/*.parquet";
49
#[cfg(feature = "csv")]
50
static GLOB_CSV: &str = "../../examples/datasets/foods*.csv";
51
#[cfg(feature = "ipc")]
52
static GLOB_IPC: &str = "../../examples/datasets/*.ipc";
53
#[cfg(feature = "parquet")]
54
static FOODS_PARQUET: &str = "../../examples/datasets/foods1.parquet";
55
#[cfg(feature = "parquet")]
56
static NUTRI_SCORE_NULL_COLUMN_PARQUET: &str = "../../examples/datasets/null_nutriscore.parquet";
57
#[cfg(feature = "csv")]
58
static FOODS_CSV: &str = "../../examples/datasets/foods1.csv";
59
#[cfg(feature = "ipc")]
60
static FOODS_IPC: &str = "../../examples/datasets/foods1.ipc";
61
62
#[cfg(feature = "csv")]
63
fn scan_foods_csv() -> LazyFrame {
64
LazyCsvReader::new(PlPath::new(FOODS_CSV)).finish().unwrap()
65
}
66
67
#[cfg(feature = "ipc")]
68
fn scan_foods_ipc() -> LazyFrame {
69
init_files();
70
LazyFrame::scan_ipc(PlPath::new(FOODS_IPC), Default::default()).unwrap()
71
}
72
73
#[cfg(any(feature = "ipc", feature = "parquet"))]
74
fn init_files() {
75
if std::fs::OpenOptions::new()
76
.write(true)
77
.create_new(true)
78
.open("../../examples/datasets/busy")
79
.is_err()
80
{
81
while !std::fs::exists("../../examples/datasets/finished").unwrap() {}
82
return;
83
}
84
85
for path in &[
86
"../../examples/datasets/foods1.csv",
87
"../../examples/datasets/foods2.csv",
88
"../../examples/datasets/null_nutriscore.csv",
89
] {
90
for ext in [".parquet", ".ipc", ".ndjson"] {
91
let out_path = path.replace(".csv", ext);
92
93
if std::fs::metadata(&out_path).is_err() {
94
let mut df = CsvReadOptions::default()
95
.try_into_reader_with_file_path(Some(path.into()))
96
.unwrap()
97
.finish()
98
.unwrap();
99
let f = std::fs::File::create(&out_path).unwrap();
100
101
match ext {
102
".parquet" => {
103
#[cfg(feature = "parquet")]
104
{
105
ParquetWriter::new(f)
106
.with_statistics(StatisticsOptions::full())
107
.finish(&mut df)
108
.unwrap();
109
}
110
},
111
".ipc" => {
112
IpcWriter::new(f).finish(&mut df).unwrap();
113
},
114
".ndjson" => {
115
#[cfg(feature = "json")]
116
{
117
JsonWriter::new(f).finish(&mut df).unwrap()
118
}
119
},
120
_ => panic!(),
121
}
122
}
123
}
124
}
125
126
std::fs::OpenOptions::new()
127
.write(true)
128
.create_new(true)
129
.open("../../examples/datasets/finished")
130
.unwrap();
131
}
132
133
#[cfg(feature = "parquet")]
134
fn scan_foods_parquet(parallel: bool) -> LazyFrame {
135
init_files();
136
let out_path = FOODS_PARQUET;
137
let parallel = if parallel {
138
ParallelStrategy::Auto
139
} else {
140
ParallelStrategy::None
141
};
142
143
let args = ScanArgsParquet {
144
n_rows: None,
145
cache: false,
146
parallel,
147
rechunk: true,
148
..Default::default()
149
};
150
LazyFrame::scan_parquet(PlPath::new(out_path), args).unwrap()
151
}
152
153
#[cfg(feature = "parquet")]
154
fn scan_nutri_score_null_column_parquet(parallel: bool) -> LazyFrame {
155
init_files();
156
let out_path = NUTRI_SCORE_NULL_COLUMN_PARQUET;
157
let parallel = if parallel {
158
ParallelStrategy::Auto
159
} else {
160
ParallelStrategy::None
161
};
162
163
let args = ScanArgsParquet {
164
n_rows: None,
165
cache: false,
166
parallel,
167
rechunk: true,
168
..Default::default()
169
};
170
LazyFrame::scan_parquet(PlPath::new(out_path), args).unwrap()
171
}
172
173
pub(crate) fn fruits_cars() -> DataFrame {
174
df!(
175
"A"=> [1, 2, 3, 4, 5],
176
"fruits"=> ["banana", "banana", "apple", "apple", "banana"],
177
"B"=> [5, 4, 3, 2, 1],
178
"cars"=> ["beetle", "audi", "beetle", "beetle", "beetle"]
179
)
180
.unwrap()
181
}
182
183
pub(crate) fn get_df() -> DataFrame {
184
let s = r#"
185
"sepal_length","sepal_width","petal_length","petal_width","variety"
186
5.1,3.5,1.4,.2,"Setosa"
187
4.9,3,1.4,.2,"Setosa"
188
4.7,3.2,1.3,.2,"Setosa"
189
4.6,3.1,1.5,.2,"Setosa"
190
5,3.6,1.4,.2,"Setosa"
191
5.4,3.9,1.7,.4,"Setosa"
192
4.6,3.4,1.4,.3,"Setosa"
193
"#;
194
195
let file = Cursor::new(s);
196
197
CsvReadOptions::default()
198
.with_infer_schema_length(Some(3))
199
.with_has_header(true)
200
.into_reader_with_file_handle(file)
201
.finish()
202
.unwrap()
203
}
204
205