CoCalc -- mod.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/mod.rs
⁶⁹³⁹ views
1
mod aggregations;
2
mod arity;
3
#[cfg(all(feature = "strings", feature = "cse"))]
4
mod cse;
5
#[cfg(feature = "parquet")]
6
mod io;
7
mod logical;
8
mod optimization_checks;
9
#[cfg(all(feature = "strings", feature = "cse"))]
10
mod pdsh;
11
mod predicate_queries;
12
mod projection_queries;
13
mod queries;
14
mod schema;
15

16
fn get_arenas() -> (Arena<AExpr>, Arena<IR>) {
17
    let expr_arena = Arena::with_capacity(16);
18
    let lp_arena = Arena::with_capacity(8);
19
    (expr_arena, lp_arena)
20
}
21

22
fn load_df() -> DataFrame {
23
    df!("a" => &[1, 2, 3, 4, 5],
24
                 "b" => &["a", "a", "b", "c", "c"],
25
                 "c" => &[1, 2, 3, 4, 5]
26
    )
27
    .unwrap()
28
}
29

30
use std::io::Cursor;
31

32
#[cfg(feature = "temporal")]
33
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
34
use optimization_checks::*;
35
#[cfg(feature = "parquet")]
36
pub(crate) use polars_core::SINGLE_LOCK;
37
use polars_core::chunked_array::builder::get_list_builder;
38
use polars_core::df;
39
use polars_core::prelude::*;
40
use polars_io::prelude::*;
41
use polars_utils::plpath::PlPath;
42

43
#[cfg(feature = "cov")]
44
use crate::dsl::pearson_corr;
45
use crate::prelude::*;
46

47
#[cfg(feature = "parquet")]
48
static GLOB_PARQUET: &str = "../../examples/datasets/*.parquet";
49
#[cfg(feature = "csv")]
50
static GLOB_CSV: &str = "../../examples/datasets/foods*.csv";
51
#[cfg(feature = "ipc")]
52
static GLOB_IPC: &str = "../../examples/datasets/*.ipc";
53
#[cfg(feature = "parquet")]
54
static FOODS_PARQUET: &str = "../../examples/datasets/foods1.parquet";
55
#[cfg(feature = "parquet")]
56
static NUTRI_SCORE_NULL_COLUMN_PARQUET: &str = "../../examples/datasets/null_nutriscore.parquet";
57
#[cfg(feature = "csv")]
58
static FOODS_CSV: &str = "../../examples/datasets/foods1.csv";
59
#[cfg(feature = "ipc")]
60
static FOODS_IPC: &str = "../../examples/datasets/foods1.ipc";
61

62
#[cfg(feature = "csv")]
63
fn scan_foods_csv() -> LazyFrame {
64
    LazyCsvReader::new(PlPath::new(FOODS_CSV)).finish().unwrap()
65
}
66

67
#[cfg(feature = "ipc")]
68
fn scan_foods_ipc() -> LazyFrame {
69
    init_files();
70
    LazyFrame::scan_ipc(PlPath::new(FOODS_IPC), Default::default()).unwrap()
71
}
72

73
#[cfg(any(feature = "ipc", feature = "parquet"))]
74
fn init_files() {
75
    if std::fs::OpenOptions::new()
76
        .write(true)
77
        .create_new(true)
78
        .open("../../examples/datasets/busy")
79
        .is_err()
80
    {
81
        while !std::fs::exists("../../examples/datasets/finished").unwrap() {}
82
        return;
83
    }
84

85
    for path in &[
86
        "../../examples/datasets/foods1.csv",
87
        "../../examples/datasets/foods2.csv",
88
        "../../examples/datasets/null_nutriscore.csv",
89
    ] {
90
        for ext in [".parquet", ".ipc", ".ndjson"] {
91
            let out_path = path.replace(".csv", ext);
92

93
            if std::fs::metadata(&out_path).is_err() {
94
                let mut df = CsvReadOptions::default()
95
                    .try_into_reader_with_file_path(Some(path.into()))
96
                    .unwrap()
97
                    .finish()
98
                    .unwrap();
99
                let f = std::fs::File::create(&out_path).unwrap();
100

101
                match ext {
102
                    ".parquet" => {
103
                        #[cfg(feature = "parquet")]
104
                        {
105
                            ParquetWriter::new(f)
106
                                .with_statistics(StatisticsOptions::full())
107
                                .finish(&mut df)
108
                                .unwrap();
109
                        }
110
                    },
111
                    ".ipc" => {
112
                        IpcWriter::new(f).finish(&mut df).unwrap();
113
                    },
114
                    ".ndjson" => {
115
                        #[cfg(feature = "json")]
116
                        {
117
                            JsonWriter::new(f).finish(&mut df).unwrap()
118
                        }
119
                    },
120
                    _ => panic!(),
121
                }
122
            }
123
        }
124
    }
125

126
    std::fs::OpenOptions::new()
127
        .write(true)
128
        .create_new(true)
129
        .open("../../examples/datasets/finished")
130
        .unwrap();
131
}
132

133
#[cfg(feature = "parquet")]
134
fn scan_foods_parquet(parallel: bool) -> LazyFrame {
135
    init_files();
136
    let out_path = FOODS_PARQUET;
137
    let parallel = if parallel {
138
        ParallelStrategy::Auto
139
    } else {
140
        ParallelStrategy::None
141
    };
142

143
    let args = ScanArgsParquet {
144
        n_rows: None,
145
        cache: false,
146
        parallel,
147
        rechunk: true,
148
        ..Default::default()
149
    };
150
    LazyFrame::scan_parquet(PlPath::new(out_path), args).unwrap()
151
}
152

153
#[cfg(feature = "parquet")]
154
fn scan_nutri_score_null_column_parquet(parallel: bool) -> LazyFrame {
155
    init_files();
156
    let out_path = NUTRI_SCORE_NULL_COLUMN_PARQUET;
157
    let parallel = if parallel {
158
        ParallelStrategy::Auto
159
    } else {
160
        ParallelStrategy::None
161
    };
162

163
    let args = ScanArgsParquet {
164
        n_rows: None,
165
        cache: false,
166
        parallel,
167
        rechunk: true,
168
        ..Default::default()
169
    };
170
    LazyFrame::scan_parquet(PlPath::new(out_path), args).unwrap()
171
}
172

173
pub(crate) fn fruits_cars() -> DataFrame {
174
    df!(
175
            "A"=> [1, 2, 3, 4, 5],
176
            "fruits"=> ["banana", "banana", "apple", "apple", "banana"],
177
            "B"=> [5, 4, 3, 2, 1],
178
            "cars"=> ["beetle", "audi", "beetle", "beetle", "beetle"]
179
    )
180
    .unwrap()
181
}
182

183
pub(crate) fn get_df() -> DataFrame {
184
    let s = r#"
185
"sepal_length","sepal_width","petal_length","petal_width","variety"
186
5.1,3.5,1.4,.2,"Setosa"
187
4.9,3,1.4,.2,"Setosa"
188
4.7,3.2,1.3,.2,"Setosa"
189
4.6,3.1,1.5,.2,"Setosa"
190
5,3.6,1.4,.2,"Setosa"
191
5.4,3.9,1.7,.4,"Setosa"
192
4.6,3.4,1.4,.3,"Setosa"
193
"#;
194

195
    let file = Cursor::new(s);
196

197
    CsvReadOptions::default()
198
        .with_infer_schema_length(Some(3))
199
        .with_has_header(true)
200
        .into_reader_with_file_handle(file)
201
        .finish()
202
        .unwrap()
203
}
204

205
Product

Resources

Company