Path: blob/main/docs/source/src/rust/user-guide/expressions/aggregation.rs
7889 views
fn main() -> Result<(), Box<dyn std::error::Error>> {1// --8<-- [start:dataframe]2use std::io::Cursor;34use polars::prelude::*;5use reqwest::blocking::Client;67let url = "https://huggingface.co/datasets/nameexhaustion/polars-docs/resolve/main/legislators-historical.csv";89let mut schema = Schema::default();10schema.with_column(11"first_name".into(),12DataType::from_categories(Categories::global()),13);14schema.with_column(15"gender".into(),16DataType::from_categories(Categories::global()),17);18schema.with_column(19"type".into(),20DataType::from_categories(Categories::global()),21);22schema.with_column(23"state".into(),24DataType::from_categories(Categories::global()),25);26schema.with_column(27"party".into(),28DataType::from_categories(Categories::global()),29);30schema.with_column("birthday".into(), DataType::Date);3132let data = Client::new().get(url).send()?.bytes()?;3334let dataset = CsvReadOptions::default()35.with_has_header(true)36.with_schema_overwrite(Some(Arc::new(schema)))37.map_parse_options(|parse_options| parse_options.with_try_parse_dates(true))38.into_reader_with_file_handle(Cursor::new(data))39.finish()?40.lazy()41.with_columns([42col("first").name().suffix("_name"),43col("middle").name().suffix("_name"),44col("last").name().suffix("_name"),45])46.collect()?;4748println!("{}", &dataset);49// --8<-- [end:dataframe]5051// --8<-- [start:basic]52let df = dataset53.clone()54.lazy()55.group_by(["first_name"])56.agg([len(), col("gender"), col("last_name").first()])57.sort(58["len"],59SortMultipleOptions::default()60.with_order_descending(true)61.with_nulls_last(true),62)63.limit(5)64.collect()?;6566println!("{df}");67// --8<-- [end:basic]6869// --8<-- [start:conditional]70let df = dataset71.clone()72.lazy()73.group_by(["state"])74.agg([75(col("party").eq(lit("Anti-Administration")))76.sum()77.alias("anti"),78(col("party").eq(lit("Pro-Administration")))79.sum()80.alias("pro"),81])82.sort(83["pro"],84SortMultipleOptions::default().with_order_descending(true),85)86.limit(5)87.collect()?;8889println!("{df}");90// --8<-- [end:conditional]9192// --8<-- [start:nested]93let df = dataset94.clone()95.lazy()96.group_by(["state", "party"])97.agg([len().alias("count")])98.filter(99col("party")100.eq(lit("Anti-Administration"))101.or(col("party").eq(lit("Pro-Administration"))),102)103.sort(104["count"],105SortMultipleOptions::default()106.with_order_descending(true)107.with_nulls_last(true),108)109.limit(5)110.collect()?;111112println!("{df}");113// --8<-- [end:nested]114115// --8<-- [start:filter]116fn compute_age() -> Expr {117lit(2024) - col("birthday").dt().year()118}119120fn avg_birthday(gender: &str) -> Expr {121compute_age()122.filter(col("gender").eq(lit(gender)))123.mean()124.alias(format!("avg {gender} birthday"))125}126127let df = dataset128.clone()129.lazy()130.group_by(["state"])131.agg([132avg_birthday("M"),133avg_birthday("F"),134(col("gender").eq(lit("M"))).sum().alias("# male"),135(col("gender").eq(lit("F"))).sum().alias("# female"),136])137.limit(5)138.collect()?;139140println!("{df}");141// --8<-- [end:filter]142143// --8<-- [start:filter-nested]144let df = dataset145.clone()146.lazy()147.group_by(["state", "gender"])148.agg([compute_age().mean().alias("avg birthday"), len().alias("#")])149.sort(150["#"],151SortMultipleOptions::default()152.with_order_descending(true)153.with_nulls_last(true),154)155.limit(5)156.collect()?;157158println!("{df}");159// --8<-- [end:filter-nested]160161// --8<-- [start:sort]162fn get_name() -> Expr {163col("first_name") + lit(" ") + col("last_name")164}165166let df = dataset167.clone()168.lazy()169.sort(170["birthday"],171SortMultipleOptions::default()172.with_order_descending(true)173.with_nulls_last(true),174)175.group_by(["state"])176.agg([177get_name().first().alias("youngest"),178get_name().last().alias("oldest"),179])180.limit(5)181.collect()?;182183println!("{df}");184// --8<-- [end:sort]185186// --8<-- [start:sort2]187let df = dataset188.clone()189.lazy()190.sort(191["birthday"],192SortMultipleOptions::default()193.with_order_descending(true)194.with_nulls_last(true),195)196.group_by(["state"])197.agg([198get_name().first().alias("youngest"),199get_name().last().alias("oldest"),200get_name()201.sort(Default::default())202.first()203.alias("alphabetical_first"),204])205.limit(5)206.collect()?;207208println!("{df}");209// --8<-- [end:sort2]210211// --8<-- [start:sort3]212let df = dataset213.lazy()214.sort(215["birthday"],216SortMultipleOptions::default()217.with_order_descending(true)218.with_nulls_last(true),219)220.group_by(["state"])221.agg([222get_name().first().alias("youngest"),223get_name().last().alias("oldest"),224get_name()225.sort(Default::default())226.first()227.alias("alphabetical_first"),228col("gender")229.sort_by(["first_name"], SortMultipleOptions::default())230.first(),231])232.sort(["state"], SortMultipleOptions::default())233.limit(5)234.collect()?;235236println!("{df}");237// --8<-- [end:sort3]238239Ok(())240}241242243