CoCalc -- issues.rs

GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-sql/tests/issues.rs
⁸⁴⁰⁶ views
1
use polars_core::prelude::*;
2
use polars_lazy::prelude::*;
3
use polars_sql::*;
4
use polars_utils::pl_path::PlRefPath;
5

6
#[test]
7
#[cfg(feature = "csv")]
8
fn iss_7437() -> PolarsResult<()> {
9
    let mut context = SQLContext::new();
10
    let sql = r#"
11
        CREATE TABLE foods AS
12
        SELECT *
13
        FROM read_csv('../../examples/datasets/foods1.csv')"#;
14
    context.execute(sql)?.collect()?;
15

16
    let df_sql = context
17
        .execute(
18
            r#"
19
            SELECT "category" as category
20
            FROM foods
21
            GROUP BY "category"
22
        "#,
23
        )?
24
        .collect()?
25
        .sort(["category"], SortMultipleOptions::default())?;
26

27
    let expected = LazyCsvReader::new(PlRefPath::new("../../examples/datasets/foods1.csv"))
28
        .finish()?
29
        .group_by(vec![col("category").alias("category")])
30
        .agg(vec![])
31
        .collect()?
32
        .sort(["category"], Default::default())?;
33

34
    assert!(df_sql.equals(&expected));
35
    Ok(())
36
}
37

38
#[test]
39
#[cfg(feature = "csv")]
40
fn iss_7436() {
41
    let mut context = SQLContext::new();
42
    let sql = r#"
43
        CREATE TABLE foods AS
44
        SELECT *
45
        FROM read_csv('../../examples/datasets/foods1.csv')"#;
46
    context.execute(sql).unwrap().collect().unwrap();
47
    let df_sql = context
48
        .execute(
49
            r#"
50
        SELECT
51
            "fats_g" AS fats,
52
            AVG(calories) OVER (PARTITION BY "category") AS avg_calories_by_category
53
        FROM foods
54
        LIMIT 5
55
        "#,
56
        )
57
        .unwrap()
58
        .collect()
59
        .unwrap();
60
    let expected = LazyCsvReader::new(PlRefPath::new("../../examples/datasets/foods1.csv"))
61
        .finish()
62
        .unwrap()
63
        .select(&[
64
            col("fats_g").alias("fats"),
65
            col("calories")
66
                .mean()
67
                .over(vec![col("category")])
68
                .alias("avg_calories_by_category"),
69
        ])
70
        .limit(5)
71
        .collect()
72
        .unwrap();
73
    assert!(df_sql.equals(&expected));
74
}
75

76
#[test]
77
fn iss_7440() {
78
    let df = df! {
79
        "a" => [2.0, -2.5]
80
    }
81
    .unwrap()
82
    .lazy();
83
    let sql = r#"SELECT a, FLOOR(a) AS floor, CEIL(a) AS ceil FROM df"#;
84
    let mut context = SQLContext::new();
85
    context.register("df", df.clone());
86

87
    let df_sql = context.execute(sql).unwrap().collect().unwrap();
88

89
    let df_pl = df
90
        .select(&[
91
            col("a"),
92
            col("a").floor().alias("floor"),
93
            col("a").ceil().alias("ceil"),
94
        ])
95
        .collect()
96
        .unwrap();
97
    assert!(df_sql.equals_missing(&df_pl));
98
}
99

100
#[test]
101
#[cfg(feature = "csv")]
102
fn iss_8395() -> PolarsResult<()> {
103
    let mut context = SQLContext::new();
104
    let sql = r#"
105
    with foods as (
106
        SELECT *
107
        FROM read_csv('../../examples/datasets/foods1.csv')
108
    )
109
    select * from foods where category IN ('vegetables', 'seafood')"#;
110
    let res = context.execute(sql)?;
111
    let df = res.collect()?;
112

113
    // assert that the df only contains [vegetables, seafood]
114
    let s = df.column("category")?.unique()?.sort(Default::default())?;
115
    let expected = Column::new("category".into(), &["seafood", "vegetables"]);
116
    assert!(s.equals(&expected));
117
    Ok(())
118
}
119

120
#[test]
121
fn iss_8419() {
122
    let df = df! {
123
      "Year"=> [2018, 2018, 2019, 2019, 2020, 2020],
124
      "Country"=> ["US", "UK", "US", "UK", "US", "UK"],
125
      "Sales"=> [1000, 2000, 3000, 4000, 5000, 6000]
126
    }
127
    .unwrap()
128
    .lazy();
129

130
    let mut ctx = SQLContext::new();
131
    ctx.register("df", df);
132

133
    let query = r#"
134
    SELECT
135
        Year,
136
        Country,
137
        Sales,
138
        SUM(Sales) OVER (ORDER BY Sales DESC) as SalesCumulative
139
    FROM
140
        df
141
    ORDER BY
142
        SalesCumulative
143
    "#;
144

145
    let df = ctx.execute(query).unwrap().collect().unwrap();
146
    let expected = df! {
147
      "Year"   => [2020, 2020, 2019, 2019, 2018, 2018],
148
      "Country"=> ["UK", "US", "UK", "US", "UK", "US"],
149
      "Sales"  => [6000, 5000, 4000, 3000, 2000, 1000],
150
      "SalesCumulative" => [6000, 11000, 15000, 18000, 20000, 21000]
151
    }
152
    .unwrap();
153

154
    assert!(df.equals(&expected))
155
}
156

157
#[test]
158
fn iss_23134() -> PolarsResult<()> {
159
    // Reproduce issue: https://github.com/pola-rs/polars/issues/23134
160
    // Applying function to a column of group_by results in a list
161

162
    // Create test data
163
    let df = df! {
164
        "a" => ["a", "a", "b"],
165
        "b" => [1, 1, 2]
166
    }?;
167

168
    let mut ctx = SQLContext::new();
169
    ctx.register("test", df.lazy());
170

171
    let result1 = ctx
172
        .execute("SELECT a, b FROM test GROUP BY a, b")?
173
        .collect()?;
174

175
    // This should return 2 rows with distinct (a, b) pairs
176
    assert_eq!(result1.height(), 2);
177

178
    let result2 = ctx
179
        .execute("SELECT CONCAT(a, ' kek') as c, b FROM test GROUP BY a, b")?
180
        .collect()?;
181

182
    // Check the result structure
183
    let c_column = result2.column("c")?;
184

185
    // BUG: Currently returns List[String] instead of String
186
    // The issue is that it returns ["a kek", "a kek"] and ["b kek"] instead of "a kek" and "b kek"
187
    assert_eq!(c_column.dtype(), &DataType::String);
188

189
    let result3 = ctx
190
        .execute("SELECT CONCAT(a, ' kek'), b FROM test GROUP BY a, b")?
191
        .collect()?;
192

193
    // Check the result structure
194
    let columns: Vec<String> = result3
195
        .get_column_names()
196
        .into_iter()
197
        .map(|s| s.to_string())
198
        .collect();
199
    // a and b
200
    assert_eq!(columns, vec!["a", "b"]);
201

202
    Ok(())
203
}
204

205
Product

Resources

Company