Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/docs/source/src/python/user-guide/io/multiple.py
7890 views
1
# --8<-- [start:create]
2
import polars as pl
3
4
df = pl.DataFrame({"foo": [1, 2, 3], "bar": [None, "ham", "spam"]})
5
6
for i in range(5):
7
df.write_csv(f"docs/assets/data/my_many_files_{i}.csv")
8
# --8<-- [end:create]
9
10
# --8<-- [start:read]
11
df = pl.read_csv("docs/assets/data/my_many_files_*.csv")
12
print(df)
13
# --8<-- [end:read]
14
15
# --8<-- [start:creategraph]
16
import base64
17
18
pl.scan_csv("docs/assets/data/my_many_files_*.csv").show_graph(
19
output_path="docs/assets/images/multiple.png", show=False
20
)
21
with open("docs/assets/images/multiple.png", "rb") as f:
22
png = base64.b64encode(f.read()).decode()
23
print(f'<img src="data:image/png;base64, {png}"/>')
24
# --8<-- [end:creategraph]
25
26
# --8<-- [start:graph]
27
pl.scan_csv("docs/assets/data/my_many_files_*.csv").show_graph()
28
# --8<-- [end:graph]
29
30
# --8<-- [start:glob]
31
import glob
32
33
import polars as pl
34
35
queries = []
36
for file in glob.glob("docs/assets/data/my_many_files_*.csv"):
37
q = pl.scan_csv(file).group_by("bar").agg(pl.len(), pl.sum("foo"))
38
queries.append(q)
39
40
dataframes = pl.collect_all(queries)
41
print(dataframes)
42
# --8<-- [end:glob]
43
44