CoCalc -- getting-started.rs

GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/rust/user-guide/getting-started.rs
⁶⁹⁴⁰ views
1
fn main() -> Result<(), Box<dyn std::error::Error>> {
2
    // --8<-- [start:df]
3
    use chrono::prelude::*;
4
    use polars::prelude::*;
5

6
    let mut df: DataFrame = df!(
7
        "name" => ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],
8
        "birthdate" => [
9
            NaiveDate::from_ymd_opt(1997, 1, 10).unwrap(),
10
            NaiveDate::from_ymd_opt(1985, 2, 15).unwrap(),
11
            NaiveDate::from_ymd_opt(1983, 3, 22).unwrap(),
12
            NaiveDate::from_ymd_opt(1981, 4, 30).unwrap(),
13
        ],
14
        "weight" => [57.9, 72.5, 53.6, 83.1],  // (kg)
15
        "height" => [1.56, 1.77, 1.65, 1.75],  // (m)
16
    )
17
    .unwrap();
18
    println!("{df}");
19
    // --8<-- [end:df]
20

21
    // --8<-- [start:csv]
22
    use std::fs::File;
23

24
    let mut file = File::create("docs/assets/data/output.csv").expect("could not create file");
25
    CsvWriter::new(&mut file)
26
        .include_header(true)
27
        .with_separator(b',')
28
        .finish(&mut df)?;
29
    let df_csv = CsvReadOptions::default()
30
        .with_has_header(true)
31
        .with_parse_options(CsvParseOptions::default().with_try_parse_dates(true))
32
        .try_into_reader_with_file_path(Some("docs/assets/data/output.csv".into()))?
33
        .finish()?;
34
    println!("{df_csv}");
35
    // --8<-- [end:csv]
36

37
    // --8<-- [start:select]
38
    let result = df
39
        .clone()
40
        .lazy()
41
        .select([
42
            col("name"),
43
            col("birthdate").dt().year().alias("birth_year"),
44
            (col("weight") / col("height").pow(2)).alias("bmi"),
45
        ])
46
        .collect()?;
47
    println!("{result}");
48
    // --8<-- [end:select]
49

50
    // --8<-- [start:expression-expansion]
51
    let result = df
52
        .clone()
53
        .lazy()
54
        .select([
55
            col("name"),
56
            (cols(["weight", "height"]).as_expr() * lit(0.95))
57
                .round(2, RoundMode::default())
58
                .name()
59
                .suffix("-5%"),
60
        ])
61
        .collect()?;
62
    println!("{result}");
63
    // --8<-- [end:expression-expansion]
64

65
    // --8<-- [start:with_columns]
66
    let result = df
67
        .clone()
68
        .lazy()
69
        .with_columns([
70
            col("birthdate").dt().year().alias("birth_year"),
71
            (col("weight") / col("height").pow(2)).alias("bmi"),
72
        ])
73
        .collect()?;
74
    println!("{result}");
75
    // --8<-- [end:with_columns]
76

77
    // --8<-- [start:filter]
78
    let result = df
79
        .clone()
80
        .lazy()
81
        .filter(col("birthdate").dt().year().lt(lit(1990)))
82
        .collect()?;
83
    println!("{result}");
84
    // --8<-- [end:filter]
85

86
    // --8<-- [start:filter-multiple]
87
    let result = df
88
        .clone()
89
        .lazy()
90
        .filter(
91
            col("birthdate")
92
                .is_between(
93
                    lit(NaiveDate::from_ymd_opt(1982, 12, 31).unwrap()),
94
                    lit(NaiveDate::from_ymd_opt(1996, 1, 1).unwrap()),
95
                    ClosedInterval::Both,
96
                )
97
                .and(col("height").gt(lit(1.7))),
98
        )
99
        .collect()?;
100
    println!("{result}");
101
    // --8<-- [end:filter-multiple]
102

103
    // --8<-- [start:group_by]
104
    // Use `group_by_stable` if you want the Python behaviour of `maintain_order=True`.
105
    let result = df
106
        .clone()
107
        .lazy()
108
        .group_by([(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade")])
109
        .agg([len()])
110
        .collect()?;
111
    println!("{result}");
112
    // --8<-- [end:group_by]
113

114
    // --8<-- [start:group_by-agg]
115
    let result = df
116
        .clone()
117
        .lazy()
118
        .group_by([(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade")])
119
        .agg([
120
            len().alias("sample_size"),
121
            col("weight")
122
                .mean()
123
                .round(2, RoundMode::default())
124
                .alias("avg_weight"),
125
            col("height").max().alias("tallest"),
126
        ])
127
        .collect()?;
128
    println!("{result}");
129
    // --8<-- [end:group_by-agg]
130

131
    // --8<-- [start:complex]
132
    let result = df
133
        .clone()
134
        .lazy()
135
        .with_columns([
136
            (col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade"),
137
            col("name").str().split(lit(" ")).list().first(),
138
        ])
139
        .select([all().exclude_cols(["birthdate"]).as_expr()])
140
        .group_by([col("decade")])
141
        .agg([
142
            col("name"),
143
            cols(["weight", "height"])
144
                .as_expr()
145
                .mean()
146
                .round(2, RoundMode::default())
147
                .name()
148
                .prefix("avg_"),
149
        ])
150
        .collect()?;
151
    println!("{result}");
152
    // --8<-- [end:complex]
153

154
    // --8<-- [start:join]
155
    let df2: DataFrame = df!(
156
        "name" => ["Ben Brown", "Daniel Donovan", "Alice Archer", "Chloe Cooper"],
157
        "parent" => [true, false, false, false],
158
        "siblings" => [1, 2, 3, 4],
159
    )
160
    .unwrap();
161

162
    let result = df
163
        .clone()
164
        .lazy()
165
        .join(
166
            df2.lazy(),
167
            [col("name")],
168
            [col("name")],
169
            JoinArgs::new(JoinType::Left),
170
        )
171
        .collect()?;
172

173
    println!("{result}");
174
    // --8<-- [end:join]
175

176
    // --8<-- [start:concat]
177
    let df3: DataFrame = df!(
178
        "name" => ["Ethan Edwards", "Fiona Foster", "Grace Gibson", "Henry Harris"],
179
        "birthdate" => [
180
            NaiveDate::from_ymd_opt(1977, 5, 10).unwrap(),
181
            NaiveDate::from_ymd_opt(1975, 6, 23).unwrap(),
182
            NaiveDate::from_ymd_opt(1973, 7, 22).unwrap(),
183
            NaiveDate::from_ymd_opt(1971, 8, 3).unwrap(),
184
        ],
185
        "weight" => [67.9, 72.5, 57.6, 93.1],  // (kg)
186
        "height" => [1.76, 1.6, 1.66, 1.8],  // (m)
187
    )
188
    .unwrap();
189

190
    let result = concat([df.clone().lazy(), df3.lazy()], UnionArgs::default())?.collect()?;
191
    println!("{result}");
192
    // --8<-- [end:concat]
193

194
    Ok(())
195
}
196

197
Product

Resources

Company