Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/missing-data.py
7890 views
1
# --8<-- [start:dataframe]
2
import polars as pl
3
4
df = pl.DataFrame(
5
{
6
"value": [1, None],
7
},
8
)
9
print(df)
10
# --8<-- [end:dataframe]
11
12
13
# --8<-- [start:count]
14
null_count_df = df.null_count()
15
print(null_count_df)
16
# --8<-- [end:count]
17
18
19
# --8<-- [start:isnull]
20
is_null_series = df.select(
21
pl.col("value").is_null(),
22
)
23
print(is_null_series)
24
# --8<-- [end:isnull]
25
26
27
# --8<-- [start:dataframe2]
28
df = pl.DataFrame(
29
{
30
"col1": [0.5, 1, 1.5, 2, 2.5],
31
"col2": [1, None, 3, None, 5],
32
},
33
)
34
print(df)
35
# --8<-- [end:dataframe2]
36
37
38
# --8<-- [start:fill]
39
fill_literal_df = df.with_columns(
40
pl.col("col2").fill_null(3),
41
)
42
print(fill_literal_df)
43
# --8<-- [end:fill]
44
45
# --8<-- [start:fillexpr]
46
fill_expression_df = df.with_columns(
47
pl.col("col2").fill_null((2 * pl.col("col1")).cast(pl.Int64)),
48
)
49
print(fill_expression_df)
50
# --8<-- [end:fillexpr]
51
52
# --8<-- [start:fillstrategy]
53
fill_forward_df = df.with_columns(
54
pl.col("col2").fill_null(strategy="forward").alias("forward"),
55
pl.col("col2").fill_null(strategy="backward").alias("backward"),
56
)
57
print(fill_forward_df)
58
# --8<-- [end:fillstrategy]
59
60
# --8<-- [start:fillinterpolate]
61
fill_interpolation_df = df.with_columns(
62
pl.col("col2").interpolate(),
63
)
64
print(fill_interpolation_df)
65
# --8<-- [end:fillinterpolate]
66
67
# --8<-- [start:nan]
68
import numpy as np
69
70
nan_df = pl.DataFrame(
71
{
72
"value": [1.0, np.nan, float("nan"), 3.0],
73
},
74
)
75
print(nan_df)
76
# --8<-- [end:nan]
77
78
# --8<-- [start:nan-computed]
79
df = pl.DataFrame(
80
{
81
"dividend": [1, 0, -1],
82
"divisor": [1, 0, -1],
83
}
84
)
85
result = df.select(pl.col("dividend") / pl.col("divisor"))
86
print(result)
87
# --8<-- [end:nan-computed]
88
89
# --8<-- [start:nanfill]
90
mean_nan_df = nan_df.with_columns(
91
pl.col("value").fill_nan(None).alias("replaced"),
92
).select(
93
pl.all().mean().name.suffix("_mean"),
94
pl.all().sum().name.suffix("_sum"),
95
)
96
print(mean_nan_df)
97
# --8<-- [end:nanfill]
98
99