Path: blob/main/docs/source/src/python/user-guide/getting-started.py
6940 views
# --8<-- [start:df]1import polars as pl2import datetime as dt34df = pl.DataFrame(5{6"name": ["Alice Archer", "Ben Brown", "Chloe Cooper", "Daniel Donovan"],7"birthdate": [8dt.date(1997, 1, 10),9dt.date(1985, 2, 15),10dt.date(1983, 3, 22),11dt.date(1981, 4, 30),12],13"weight": [57.9, 72.5, 53.6, 83.1], # (kg)14"height": [1.56, 1.77, 1.65, 1.75], # (m)15}16)1718print(df)19# --8<-- [end:df]2021# --8<-- [start:csv]22df.write_csv("docs/assets/data/output.csv")23df_csv = pl.read_csv("docs/assets/data/output.csv", try_parse_dates=True)24print(df_csv)25# --8<-- [end:csv]2627# --8<-- [start:select]28result = df.select(29pl.col("name"),30pl.col("birthdate").dt.year().alias("birth_year"),31(pl.col("weight") / (pl.col("height") ** 2)).alias("bmi"),32)33print(result)34# --8<-- [end:select]3536# --8<-- [start:expression-expansion]37result = df.select(38pl.col("name"),39(pl.col("weight", "height") * 0.95).round(2).name.suffix("-5%"),40)41print(result)42# --8<-- [end:expression-expansion]4344# --8<-- [start:with_columns]45result = df.with_columns(46birth_year=pl.col("birthdate").dt.year(),47bmi=pl.col("weight") / (pl.col("height") ** 2),48)49print(result)50# --8<-- [end:with_columns]5152# --8<-- [start:filter]53result = df.filter(pl.col("birthdate").dt.year() < 1990)54print(result)55# --8<-- [end:filter]5657# --8<-- [start:filter-multiple]58result = df.filter(59pl.col("birthdate").is_between(dt.date(1982, 12, 31), dt.date(1996, 1, 1)),60pl.col("height") > 1.7,61)62print(result)63# --8<-- [end:filter-multiple]6465# --8<-- [start:group_by]66result = df.group_by(67(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),68maintain_order=True,69).len()70print(result)71# --8<-- [end:group_by]7273# --8<-- [start:group_by-agg]74result = df.group_by(75(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),76maintain_order=True,77).agg(78pl.len().alias("sample_size"),79pl.col("weight").mean().round(2).alias("avg_weight"),80pl.col("height").max().alias("tallest"),81)82print(result)83# --8<-- [end:group_by-agg]8485# --8<-- [start:complex]86result = (87df.with_columns(88(pl.col("birthdate").dt.year() // 10 * 10).alias("decade"),89pl.col("name").str.split(by=" ").list.first(),90)91.select(92pl.all().exclude("birthdate"),93)94.group_by(95pl.col("decade"),96maintain_order=True,97)98.agg(99pl.col("name"),100pl.col("weight", "height").mean().round(2).name.prefix("avg_"),101)102)103print(result)104# --8<-- [end:complex]105106# --8<-- [start:join]107df2 = pl.DataFrame(108{109"name": ["Ben Brown", "Daniel Donovan", "Alice Archer", "Chloe Cooper"],110"parent": [True, False, False, False],111"siblings": [1, 2, 3, 4],112}113)114115print(df.join(df2, on="name", how="left"))116# --8<-- [end:join]117118# --8<-- [start:concat]119df3 = pl.DataFrame(120{121"name": ["Ethan Edwards", "Fiona Foster", "Grace Gibson", "Henry Harris"],122"birthdate": [123dt.date(1977, 5, 10),124dt.date(1975, 6, 23),125dt.date(1973, 7, 22),126dt.date(1971, 8, 3),127],128"weight": [67.9, 72.5, 57.6, 93.1], # (kg)129"height": [1.76, 1.6, 1.66, 1.8], # (m)130}131)132133print(pl.concat([df, df3], how="vertical"))134# --8<-- [end:concat]135136137