Path: blob/main/docs/source/src/rust/user-guide/getting-started.rs
6940 views
fn main() -> Result<(), Box<dyn std::error::Error>> {1// --8<-- [start:df]2use chrono::prelude::*;3use polars::prelude::*;45let mut df: DataFrame = df!(6"name" => ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],7"birthdate" => [8NaiveDate::from_ymd_opt(1997, 1, 10).unwrap(),9NaiveDate::from_ymd_opt(1985, 2, 15).unwrap(),10NaiveDate::from_ymd_opt(1983, 3, 22).unwrap(),11NaiveDate::from_ymd_opt(1981, 4, 30).unwrap(),12],13"weight" => [57.9, 72.5, 53.6, 83.1], // (kg)14"height" => [1.56, 1.77, 1.65, 1.75], // (m)15)16.unwrap();17println!("{df}");18// --8<-- [end:df]1920// --8<-- [start:csv]21use std::fs::File;2223let mut file = File::create("docs/assets/data/output.csv").expect("could not create file");24CsvWriter::new(&mut file)25.include_header(true)26.with_separator(b',')27.finish(&mut df)?;28let df_csv = CsvReadOptions::default()29.with_has_header(true)30.with_parse_options(CsvParseOptions::default().with_try_parse_dates(true))31.try_into_reader_with_file_path(Some("docs/assets/data/output.csv".into()))?32.finish()?;33println!("{df_csv}");34// --8<-- [end:csv]3536// --8<-- [start:select]37let result = df38.clone()39.lazy()40.select([41col("name"),42col("birthdate").dt().year().alias("birth_year"),43(col("weight") / col("height").pow(2)).alias("bmi"),44])45.collect()?;46println!("{result}");47// --8<-- [end:select]4849// --8<-- [start:expression-expansion]50let result = df51.clone()52.lazy()53.select([54col("name"),55(cols(["weight", "height"]).as_expr() * lit(0.95))56.round(2, RoundMode::default())57.name()58.suffix("-5%"),59])60.collect()?;61println!("{result}");62// --8<-- [end:expression-expansion]6364// --8<-- [start:with_columns]65let result = df66.clone()67.lazy()68.with_columns([69col("birthdate").dt().year().alias("birth_year"),70(col("weight") / col("height").pow(2)).alias("bmi"),71])72.collect()?;73println!("{result}");74// --8<-- [end:with_columns]7576// --8<-- [start:filter]77let result = df78.clone()79.lazy()80.filter(col("birthdate").dt().year().lt(lit(1990)))81.collect()?;82println!("{result}");83// --8<-- [end:filter]8485// --8<-- [start:filter-multiple]86let result = df87.clone()88.lazy()89.filter(90col("birthdate")91.is_between(92lit(NaiveDate::from_ymd_opt(1982, 12, 31).unwrap()),93lit(NaiveDate::from_ymd_opt(1996, 1, 1).unwrap()),94ClosedInterval::Both,95)96.and(col("height").gt(lit(1.7))),97)98.collect()?;99println!("{result}");100// --8<-- [end:filter-multiple]101102// --8<-- [start:group_by]103// Use `group_by_stable` if you want the Python behaviour of `maintain_order=True`.104let result = df105.clone()106.lazy()107.group_by([(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade")])108.agg([len()])109.collect()?;110println!("{result}");111// --8<-- [end:group_by]112113// --8<-- [start:group_by-agg]114let result = df115.clone()116.lazy()117.group_by([(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade")])118.agg([119len().alias("sample_size"),120col("weight")121.mean()122.round(2, RoundMode::default())123.alias("avg_weight"),124col("height").max().alias("tallest"),125])126.collect()?;127println!("{result}");128// --8<-- [end:group_by-agg]129130// --8<-- [start:complex]131let result = df132.clone()133.lazy()134.with_columns([135(col("birthdate").dt().year() / lit(10) * lit(10)).alias("decade"),136col("name").str().split(lit(" ")).list().first(),137])138.select([all().exclude_cols(["birthdate"]).as_expr()])139.group_by([col("decade")])140.agg([141col("name"),142cols(["weight", "height"])143.as_expr()144.mean()145.round(2, RoundMode::default())146.name()147.prefix("avg_"),148])149.collect()?;150println!("{result}");151// --8<-- [end:complex]152153// --8<-- [start:join]154let df2: DataFrame = df!(155"name" => ["Ben Brown", "Daniel Donovan", "Alice Archer", "Chloe Cooper"],156"parent" => [true, false, false, false],157"siblings" => [1, 2, 3, 4],158)159.unwrap();160161let result = df162.clone()163.lazy()164.join(165df2.lazy(),166[col("name")],167[col("name")],168JoinArgs::new(JoinType::Left),169)170.collect()?;171172println!("{result}");173// --8<-- [end:join]174175// --8<-- [start:concat]176let df3: DataFrame = df!(177"name" => ["Ethan Edwards", "Fiona Foster", "Grace Gibson", "Henry Harris"],178"birthdate" => [179NaiveDate::from_ymd_opt(1977, 5, 10).unwrap(),180NaiveDate::from_ymd_opt(1975, 6, 23).unwrap(),181NaiveDate::from_ymd_opt(1973, 7, 22).unwrap(),182NaiveDate::from_ymd_opt(1971, 8, 3).unwrap(),183],184"weight" => [67.9, 72.5, 57.6, 93.1], // (kg)185"height" => [1.76, 1.6, 1.66, 1.8], // (m)186)187.unwrap();188189let result = concat([df.clone().lazy(), df3.lazy()], UnionArgs::default())?.collect()?;190println!("{result}");191// --8<-- [end:concat]192193Ok(())194}195196197