Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-lazy/src/tests/pdsh.rs
8458 views
1
//! The PDSH files only got ten rows, so after all the joins filters there is not data
2
//! Still we can use this to test the schema, operation correctness on empty data, and optimizations
3
//! taken.
4
use super::*;
5
6
const fn base_path() -> &'static str {
7
"../../examples/datasets/pds_heads"
8
}
9
10
fn region() -> LazyFrame {
11
let base_path = base_path();
12
LazyFrame::scan_ipc(
13
PlRefPath::new(format!("{base_path}/region.feather")),
14
Default::default(),
15
Default::default(),
16
)
17
.unwrap()
18
}
19
fn nation() -> LazyFrame {
20
let base_path = base_path();
21
LazyFrame::scan_ipc(
22
PlRefPath::new(format!("{base_path}/nation.feather")),
23
Default::default(),
24
Default::default(),
25
)
26
.unwrap()
27
}
28
29
fn supplier() -> LazyFrame {
30
let base_path = base_path();
31
LazyFrame::scan_ipc(
32
PlRefPath::new(format!("{base_path}/supplier.feather")),
33
Default::default(),
34
Default::default(),
35
)
36
.unwrap()
37
}
38
39
fn part() -> LazyFrame {
40
let base_path = base_path();
41
LazyFrame::scan_ipc(
42
PlRefPath::new(format!("{base_path}/part.feather")),
43
Default::default(),
44
Default::default(),
45
)
46
.unwrap()
47
}
48
49
fn partsupp() -> LazyFrame {
50
let base_path = base_path();
51
LazyFrame::scan_ipc(
52
PlRefPath::new(format!("{base_path}/partsupp.feather")),
53
Default::default(),
54
Default::default(),
55
)
56
.unwrap()
57
}
58
59
#[test]
60
fn test_q2() -> PolarsResult<()> {
61
let q1 = part()
62
.inner_join(partsupp(), "p_partkey", "ps_partkey")
63
.inner_join(supplier(), "ps_suppkey", "s_suppkey")
64
.inner_join(nation(), "s_nationkey", "n_nationkey")
65
.inner_join(region(), "n_regionkey", "r_regionkey")
66
.filter(col("p_size").eq(15))
67
.filter(col("p_type").str().ends_with(lit("BRASS".to_string())));
68
let q = q1
69
.clone()
70
.group_by([col("p_partkey")])
71
.agg([col("ps_supplycost").min()])
72
.join(
73
q1,
74
[col("p_partkey"), col("ps_supplycost")],
75
[col("p_partkey"), col("ps_supplycost")],
76
JoinType::Inner.into(),
77
)
78
.select([cols([
79
"s_acctbal",
80
"s_name",
81
"n_name",
82
"p_partkey",
83
"p_mfgr",
84
"s_address",
85
"s_phone",
86
"s_comment",
87
])
88
.as_expr()])
89
.sort_by_exprs(
90
[cols(["s_acctbal", "n_name", "s_name", "p_partkey"]).as_expr()],
91
SortMultipleOptions::default()
92
.with_order_descending_multi([true, false, false, false])
93
.with_maintain_order(true),
94
)
95
.limit(100)
96
.with_comm_subplan_elim(true);
97
98
let IRPlan {
99
lp_top, lp_arena, ..
100
} = q.clone().to_alp_optimized().unwrap();
101
assert_eq!(
102
lp_arena
103
.iter(lp_top)
104
.filter(|(_, alp)| matches!(alp, IR::Cache { .. }))
105
.count(),
106
2
107
);
108
109
let out = q.collect()?;
110
let schema = Schema::from_iter([
111
Field::new("s_acctbal".into(), DataType::Float64),
112
Field::new("s_name".into(), DataType::String),
113
Field::new("n_name".into(), DataType::String),
114
Field::new("p_partkey".into(), DataType::Int64),
115
Field::new("p_mfgr".into(), DataType::String),
116
Field::new("s_address".into(), DataType::String),
117
Field::new("s_phone".into(), DataType::String),
118
Field::new("s_comment".into(), DataType::String),
119
]);
120
assert_eq!(&**out.schema(), &schema);
121
122
Ok(())
123
}
124
125