Path: blob/main/docs/source/src/python/user-guide/expressions/categoricals.py
7890 views
# --8<-- [start:enum-example]1import polars as pl23bears_enum = pl.Enum(["Polar", "Panda", "Brown"])4bears = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=bears_enum)5print(bears)6# --8<-- [end:enum-example]78# --8<-- [start:enum-wrong-value]9from polars.exceptions import InvalidOperationError1011try:12bears_kind_of = pl.Series(13["Polar", "Panda", "Brown", "Polar", "Shark"],14dtype=bears_enum,15)16except InvalidOperationError as exc:17print("InvalidOperationError:", exc)18# --8<-- [end:enum-wrong-value]1920# --8<-- [start:log-levels]21log_levels = pl.Enum(["debug", "info", "warning", "error"])2223logs = pl.DataFrame(24{25"level": ["debug", "info", "debug", "error"],26"message": [27"process id: 525",28"Service started correctly",29"startup time: 67ms",30"Cannot connect to DB!",31],32},33schema_overrides={34"level": log_levels,35},36)3738non_debug_logs = logs.filter(39pl.col("level") > "debug",40)41print(non_debug_logs)42# --8<-- [end:log-levels]4344# --8<-- [start:categorical-example]45bears_cat = pl.Series(46["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical47)48print(bears_cat)49# --8<-- [end:categorical-example]5051# --8<-- [start:categorical-comparison-string]52print(bears_cat < "Cat")53# --8<-- [end:categorical-comparison-string]5455# --8<-- [start:categorical-comparison-string-column]56bears_str = pl.Series(57["Panda", "Brown", "Brown", "Polar", "Polar"],58)59print(bears_cat == bears_str)60# --8<-- [end:categorical-comparison-string-column]6162# --8<-- [start:categorical-comparison-categorical-column]63from polars.exceptions import StringCacheMismatchError6465bears_cat2 = pl.Series(66["Panda", "Brown", "Brown", "Polar", "Polar"],67dtype=pl.Categorical,68)6970try:71print(bears_cat == bears_cat2)72except StringCacheMismatchError as exc:73exc_str = str(exc).splitlines()[0]74print("StringCacheMismatchError:", exc_str)75# --8<-- [end:categorical-comparison-categorical-column]7677# --8<-- [start:stringcache-categorical-equality]78with pl.StringCache():79bears_cat = pl.Series(80["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical81)82bears_cat2 = pl.Series(83["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical84)8586print(bears_cat == bears_cat2)87# --8<-- [end:stringcache-categorical-equality]8889# --8<-- [start:stringcache-categorical-comparison-lexical]90import warnings9192with pl.StringCache():93with warnings.catch_warnings():94warnings.filterwarnings("ignore", category=DeprecationWarning)9596bears_cat = pl.Series(97["Polar", "Panda", "Brown", "Brown", "Polar"],98dtype=pl.Categorical(ordering="lexical"),99)100bears_cat2 = pl.Series(101["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical102)103104print(bears_cat > bears_cat2)105# --8<-- [end:stringcache-categorical-comparison-lexical]106107# --8<-- [start:stringcache-categorical-comparison-physical]108with pl.StringCache():109bears_cat = pl.Series(110# Polar < Panda < Brown111["Polar", "Panda", "Brown", "Brown", "Polar"],112dtype=pl.Categorical,113)114bears_cat2 = pl.Series(115["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical116)117118print(bears_cat > bears_cat2)119# --8<-- [end:stringcache-categorical-comparison-physical]120121# --8<-- [start:concatenating-categoricals]122import warnings123124from polars.exceptions import CategoricalRemappingWarning125126male_bears = pl.DataFrame(127{128"species": ["Polar", "Brown", "Panda"],129"weight": [450, 500, 110], # kg130},131schema_overrides={"species": pl.Categorical},132)133female_bears = pl.DataFrame(134{135"species": ["Brown", "Polar", "Panda"],136"weight": [340, 200, 90], # kg137},138schema_overrides={"species": pl.Categorical},139)140141with warnings.catch_warnings():142warnings.filterwarnings("ignore", category=CategoricalRemappingWarning)143bears = pl.concat([male_bears, female_bears], how="vertical")144145print(bears)146# --8<-- [end:concatenating-categoricals]147148149# --8<-- [start:example]150import polars as pl151152bears_enum = pl.Enum(["Polar", "Panda", "Brown"])153bears = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=bears_enum)154print(bears)155156cat_bears = pl.Series(157["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical158)159# --8<-- [end:example]160161162# --8<-- [start:append]163cat_bears = pl.Series(164["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=pl.Categorical165)166cat2_series = pl.Series(167["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=pl.Categorical168)169170# Triggers a CategoricalRemappingWarning.171print(cat_bears.append(cat2_series))172# --8<-- [end:append]173174# --8<-- [start:enum_append]175dtype = pl.Enum(["Polar", "Panda", "Brown"])176cat_bears = pl.Series(["Polar", "Panda", "Brown", "Brown", "Polar"], dtype=dtype)177cat2_series = pl.Series(["Panda", "Brown", "Brown", "Polar", "Polar"], dtype=dtype)178print(cat_bears.append(cat2_series))179# --8<-- [end:enum_append]180181# --8<-- [start:enum_error]182dtype = pl.Enum(["Polar", "Panda", "Brown"])183try:184cat_bears = pl.Series(["Polar", "Panda", "Brown", "Black"], dtype=dtype)185except Exception as e:186print(e)187# --8<-- [end:enum_error]188189# --8<-- [start:equality]190dtype = pl.Enum(["Polar", "Panda", "Brown"])191cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=dtype)192cat_series2 = pl.Series(["Polar", "Panda", "Brown"], dtype=dtype)193print(cat_bears == cat_series2)194# --8<-- [end:equality]195196# --8<-- [start:global_equality]197with pl.StringCache():198cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)199cat_series2 = pl.Series(["Polar", "Panda", "Black"], dtype=pl.Categorical)200print(cat_bears == cat_series2)201# --8<-- [end:global_equality]202203# --8<-- [start:equality]204dtype = pl.Enum(["Polar", "Panda", "Brown"])205cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=dtype)206cat_series2 = pl.Series(["Polar", "Panda", "Brown"], dtype=dtype)207print(cat_bears == cat_series2)208# --8<-- [end:equality]209210# --8<-- [start:str_compare_single]211cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)212print(cat_bears <= "Cat")213# --8<-- [end:str_compare_single]214215# --8<-- [start:str_compare]216cat_bears = pl.Series(["Brown", "Panda", "Polar"], dtype=pl.Categorical)217cat_series_utf = pl.Series(["Panda", "Panda", "Polar"])218print(cat_bears <= cat_series_utf)219# --8<-- [end:str_compare]220221222