Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/pdsh.rs
6939 views
1
//! The PDSH files only got ten rows, so after all the joins filters there is not data
2
//! Still we can use this to test the schema, operation correctness on empty data, and optimizations
3
//! taken.
4
use super::*;
5
6
const fn base_path() -> &'static str {
7
"../../examples/datasets/pds_heads"
8
}
9
10
fn region() -> LazyFrame {
11
let base_path = base_path();
12
LazyFrame::scan_ipc(
13
PlPath::new(&format!("{base_path}/region.feather")),
14
ScanArgsIpc::default(),
15
)
16
.unwrap()
17
}
18
fn nation() -> LazyFrame {
19
let base_path = base_path();
20
LazyFrame::scan_ipc(
21
PlPath::new(&format!("{base_path}/nation.feather")),
22
ScanArgsIpc::default(),
23
)
24
.unwrap()
25
}
26
27
fn supplier() -> LazyFrame {
28
let base_path = base_path();
29
LazyFrame::scan_ipc(
30
PlPath::new(&format!("{base_path}/supplier.feather")),
31
ScanArgsIpc::default(),
32
)
33
.unwrap()
34
}
35
36
fn part() -> LazyFrame {
37
let base_path = base_path();
38
LazyFrame::scan_ipc(
39
PlPath::new(&format!("{base_path}/part.feather")),
40
ScanArgsIpc::default(),
41
)
42
.unwrap()
43
}
44
45
fn partsupp() -> LazyFrame {
46
let base_path = base_path();
47
LazyFrame::scan_ipc(
48
PlPath::new(&format!("{base_path}/partsupp.feather")),
49
ScanArgsIpc::default(),
50
)
51
.unwrap()
52
}
53
54
#[test]
55
fn test_q2() -> PolarsResult<()> {
56
let q1 = part()
57
.inner_join(partsupp(), "p_partkey", "ps_partkey")
58
.inner_join(supplier(), "ps_suppkey", "s_suppkey")
59
.inner_join(nation(), "s_nationkey", "n_nationkey")
60
.inner_join(region(), "n_regionkey", "r_regionkey")
61
.filter(col("p_size").eq(15))
62
.filter(col("p_type").str().ends_with(lit("BRASS".to_string())));
63
let q = q1
64
.clone()
65
.group_by([col("p_partkey")])
66
.agg([col("ps_supplycost").min()])
67
.join(
68
q1,
69
[col("p_partkey"), col("ps_supplycost")],
70
[col("p_partkey"), col("ps_supplycost")],
71
JoinType::Inner.into(),
72
)
73
.select([cols([
74
"s_acctbal",
75
"s_name",
76
"n_name",
77
"p_partkey",
78
"p_mfgr",
79
"s_address",
80
"s_phone",
81
"s_comment",
82
])
83
.as_expr()])
84
.sort_by_exprs(
85
[cols(["s_acctbal", "n_name", "s_name", "p_partkey"]).as_expr()],
86
SortMultipleOptions::default()
87
.with_order_descending_multi([true, false, false, false])
88
.with_maintain_order(true),
89
)
90
.limit(100)
91
.with_comm_subplan_elim(true);
92
93
let IRPlan {
94
lp_top, lp_arena, ..
95
} = q.clone().to_alp_optimized().unwrap();
96
assert_eq!(
97
lp_arena
98
.iter(lp_top)
99
.filter(|(_, alp)| matches!(alp, IR::Cache { .. }))
100
.count(),
101
2
102
);
103
104
let out = q.collect()?;
105
let schema = Schema::from_iter([
106
Field::new("s_acctbal".into(), DataType::Float64),
107
Field::new("s_name".into(), DataType::String),
108
Field::new("n_name".into(), DataType::String),
109
Field::new("p_partkey".into(), DataType::Int64),
110
Field::new("p_mfgr".into(), DataType::String),
111
Field::new("s_address".into(), DataType::String),
112
Field::new("s_phone".into(), DataType::String),
113
Field::new("s_comment".into(), DataType::String),
114
]);
115
assert_eq!(&**out.schema(), &schema);
116
117
Ok(())
118
}
119
120