Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/expressions/casting.py
7890 views
1
# --8<-- [start:dfnum]
2
import polars as pl
3
4
df = pl.DataFrame(
5
{
6
"integers": [1, 2, 3],
7
"big_integers": [10000002, 2, 30000003],
8
"floats": [4.0, 5.8, -6.3],
9
}
10
)
11
12
print(df)
13
# --8<-- [end:dfnum]
14
15
# --8<-- [start:castnum]
16
result = df.select(
17
pl.col("integers").cast(pl.Float32).alias("integers_as_floats"),
18
pl.col("floats").cast(pl.Int32).alias("floats_as_integers"),
19
)
20
print(result)
21
# --8<-- [end:castnum]
22
23
24
# --8<-- [start:downcast]
25
print(f"Before downcasting: {df.estimated_size()} bytes")
26
result = df.with_columns(
27
pl.col("integers").cast(pl.Int16),
28
pl.col("floats").cast(pl.Float32),
29
)
30
print(f"After downcasting: {result.estimated_size()} bytes")
31
# --8<-- [end:downcast]
32
33
# --8<-- [start:overflow]
34
from polars.exceptions import InvalidOperationError
35
36
try:
37
result = df.select(pl.col("big_integers").cast(pl.Int8))
38
print(result)
39
except InvalidOperationError as err:
40
print(err)
41
# --8<-- [end:overflow]
42
43
# --8<-- [start:overflow2]
44
result = df.select(pl.col("big_integers").cast(pl.Int8, strict=False))
45
print(result)
46
# --8<-- [end:overflow2]
47
48
49
# --8<-- [start:strings]
50
df = pl.DataFrame(
51
{
52
"integers_as_strings": ["1", "2", "3"],
53
"floats_as_strings": ["4.0", "5.8", "-6.3"],
54
"floats": [4.0, 5.8, -6.3],
55
}
56
)
57
58
result = df.select(
59
pl.col("integers_as_strings").cast(pl.Int32),
60
pl.col("floats_as_strings").cast(pl.Float64),
61
pl.col("floats").cast(pl.String),
62
)
63
print(result)
64
# --8<-- [end:strings]
65
66
67
# --8<-- [start:strings2]
68
df = pl.DataFrame(
69
{
70
"floats": ["4.0", "5.8", "- 6 . 3"],
71
}
72
)
73
try:
74
result = df.select(pl.col("floats").cast(pl.Float64))
75
except InvalidOperationError as err:
76
print(err)
77
# --8<-- [end:strings2]
78
79
# --8<-- [start:bool]
80
df = pl.DataFrame(
81
{
82
"integers": [-1, 0, 2, 3, 4],
83
"floats": [0.0, 1.0, 2.0, 3.0, 4.0],
84
"bools": [True, False, True, False, True],
85
}
86
)
87
88
result = df.select(
89
pl.col("integers").cast(pl.Boolean),
90
pl.col("floats").cast(pl.Boolean),
91
pl.col("bools").cast(pl.Int8),
92
)
93
print(result)
94
# --8<-- [end:bool]
95
96
# --8<-- [start:dates]
97
from datetime import date, datetime, time
98
99
df = pl.DataFrame(
100
{
101
"date": [
102
date(1970, 1, 1), # epoch
103
date(1970, 1, 10), # 9 days later
104
],
105
"datetime": [
106
datetime(1970, 1, 1, 0, 0, 0), # epoch
107
datetime(1970, 1, 1, 0, 1, 0), # 1 minute later
108
],
109
"time": [
110
time(0, 0, 0), # reference time
111
time(0, 0, 1), # 1 second later
112
],
113
}
114
)
115
116
result = df.select(
117
pl.col("date").cast(pl.Int64).alias("days_since_epoch"),
118
pl.col("datetime").cast(pl.Int64).alias("us_since_epoch"),
119
pl.col("time").cast(pl.Int64).alias("ns_since_midnight"),
120
)
121
print(result)
122
# --8<-- [end:dates]
123
124
# --8<-- [start:dates2]
125
df = pl.DataFrame(
126
{
127
"date": [date(2022, 1, 1), date(2022, 1, 2)],
128
"string": ["2022-01-01", "2022-01-02"],
129
}
130
)
131
132
result = df.select(
133
pl.col("date").dt.to_string("%Y-%m-%d"),
134
pl.col("string").str.to_datetime("%Y-%m-%d"),
135
)
136
print(result)
137
# --8<-- [end:dates2]
138
139