Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-sql/tests/functions_aggregate.rs
6939 views
1
use polars_core::prelude::*;
2
use polars_lazy::prelude::*;
3
use polars_plan::dsl::Expr;
4
use polars_sql::*;
5
6
fn create_df() -> LazyFrame {
7
df! {
8
"Data" => [1000, 2000, 3000, 4000, 5000, 6000]
9
}
10
.unwrap()
11
.lazy()
12
}
13
14
fn create_expected(expr: Expr, sql: &str) -> (DataFrame, DataFrame) {
15
let df = create_df();
16
let alias = "TEST";
17
18
let query = format!(
19
r#"
20
SELECT
21
{sql} as {alias}
22
FROM
23
df
24
"#
25
);
26
27
let expected = df
28
.clone()
29
.select(&[expr.alias(alias)])
30
.sort([alias], Default::default())
31
.collect()
32
.unwrap();
33
let mut ctx = SQLContext::new();
34
ctx.register("df", df);
35
36
let actual = ctx.execute(&query).unwrap().collect().unwrap();
37
(expected, actual)
38
}
39
40
#[test]
41
fn test_median() {
42
let expr = col("Data").median();
43
44
let sql_expr = "MEDIAN(Data)";
45
let (expected, actual) = create_expected(expr, sql_expr);
46
47
assert!(expected.equals(&actual))
48
}
49
50
#[test]
51
fn test_quantile_cont() {
52
for &q in &[0.25, 0.5, 0.75] {
53
let expr = col("Data").quantile(lit(q), QuantileMethod::Linear);
54
55
let sql_expr = format!("QUANTILE_CONT(Data, {q})");
56
let (expected, actual) = create_expected(expr, &sql_expr);
57
58
assert!(
59
expected.equals(&actual),
60
"q: {q}: expected {expected:?}, got {actual:?}"
61
)
62
}
63
}
64
65
#[test]
66
fn test_quantile_disc() {
67
for &q in &[0.25, 0.5, 0.75] {
68
let expr = col("Data").quantile(lit(q), QuantileMethod::Equiprobable);
69
70
let sql_expr = format!("QUANTILE_DISC(Data, {q})");
71
let (expected, actual) = create_expected(expr, &sql_expr);
72
73
assert!(expected.equals(&actual))
74
}
75
}
76
77
#[test]
78
fn test_quantile_out_of_range() {
79
for &q in &["-1", "2", "-0.01", "1.01"] {
80
for &func in &["QUANTILE_CONT", "QUANTILE_DISC"] {
81
let query = format!("SELECT {func}(Data, {q})");
82
let mut ctx = SQLContext::new();
83
ctx.register("df", create_df());
84
let actual = ctx.execute(&query);
85
assert!(actual.is_err())
86
}
87
}
88
}
89
90
#[test]
91
fn test_quantile_disc_conformance() {
92
let expected = df![
93
"q" => [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
94
"Data" => [1000, 1000, 2000, 2000, 3000, 3000, 4000, 5000, 5000, 6000, 6000],
95
]
96
.unwrap();
97
98
let mut ctx = SQLContext::new();
99
ctx.register("df", create_df());
100
101
let mut actual: Option<DataFrame> = None;
102
for &q in &[0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] {
103
let res = ctx
104
.execute(&format!(
105
"SELECT {q}::float as q, QUANTILE_DISC(Data, {q}) as Data FROM df"
106
))
107
.unwrap()
108
.collect()
109
.unwrap();
110
actual = if let Some(df) = actual {
111
Some(df.vstack(&res).unwrap())
112
} else {
113
Some(res)
114
};
115
}
116
117
assert!(
118
expected.equals(actual.as_ref().unwrap()),
119
"expected {expected:?}, got {actual:?}"
120
)
121
}
122
123
fn create_df_corr() -> LazyFrame {
124
df! {
125
"a" => [1, 2, 3, 4, 5, 6],
126
"b" => [2, 4, 10, 8, 9, 13],
127
"c" => ["a", "b", "a", "a", "b", "b"]
128
}
129
.unwrap()
130
.lazy()
131
}
132
133
#[test]
134
fn test_corr() {
135
let df = create_df_corr();
136
137
let expr_corr = pearson_corr(col("a"), col("b")).alias("corr");
138
let expr_cov = cov(col("a"), col("b"), 1).alias("cov");
139
let expr_cov_pop = cov(col("a"), col("b"), 0).alias("cov_pop");
140
let expected = df
141
.clone()
142
.select(&[expr_corr, expr_cov, expr_cov_pop])
143
.collect()
144
.unwrap();
145
146
let mut ctx = SQLContext::new();
147
ctx.register("df", df);
148
let sql = r#"
149
SELECT
150
CORR(a, b) as corr,
151
COVAR(a, b) as covar,
152
COVAR_POP(a, b) as covar_pop
153
FROM df"#;
154
let actual = ctx.execute(sql).unwrap().collect().unwrap();
155
156
assert_eq!(expected, actual, "expected {expected:?}, got {actual:?}");
157
}
158
159
#[test]
160
fn test_corr_group_by() {
161
let df = create_df_corr();
162
163
let expected = df
164
.clone()
165
.group_by(["c"])
166
.agg([
167
pearson_corr(col("a"), col("b")).alias("corr"),
168
cov(col("a"), col("b"), 1).alias("cov"),
169
])
170
.sort(["c"], Default::default())
171
.collect()
172
.unwrap();
173
174
let mut ctx = SQLContext::new();
175
ctx.register("df", df);
176
let sql = r#"
177
SELECT
178
c,
179
CORR(a, b) AS corr,
180
COVAR(a, b) AS covar
181
FROM df
182
GROUP BY c
183
ORDER BY c"#;
184
let actual = ctx.execute(sql).unwrap().collect().unwrap();
185
186
assert_eq!(expected, actual, "expected {expected:?}, got {actual:?}");
187
}
188
189