Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/arrow/write/file.rs
8479 views
1
use std::io::Write;
2
3
use arrow::datatypes::ArrowSchema;
4
use polars_error::{PolarsError, PolarsResult};
5
6
use super::schema::schema_to_metadata_key;
7
use super::{ThriftFileMetadata, WriteOptions, to_parquet_schema};
8
use crate::parquet::metadata::{KeyValue, SchemaDescriptor};
9
use crate::parquet::write::{RowGroupIterColumns, WriteOptions as FileWriteOptions};
10
11
/// An interface to write a parquet to a [`Write`]
12
pub struct FileWriter<W: Write> {
13
writer: crate::parquet::write::FileWriter<W>,
14
schema: ArrowSchema,
15
options: WriteOptions,
16
}
17
18
// Accessors
19
impl<W: Write> FileWriter<W> {
20
/// The options assigned to the file
21
pub fn options(&self) -> WriteOptions {
22
self.options
23
}
24
25
/// The [`SchemaDescriptor`] assigned to this file
26
pub fn parquet_schema(&self) -> &SchemaDescriptor {
27
self.writer.schema()
28
}
29
30
/// The [`ArrowSchema`] assigned to this file
31
pub fn schema(&self) -> &ArrowSchema {
32
&self.schema
33
}
34
}
35
36
impl<W: Write> FileWriter<W> {
37
/// Returns a new [`FileWriter`].
38
/// # Error
39
/// If it is unable to derive a parquet schema from [`ArrowSchema`].
40
pub fn new_with_parquet_schema(
41
writer: W,
42
schema: ArrowSchema,
43
parquet_schema: SchemaDescriptor,
44
options: WriteOptions,
45
) -> Self {
46
let created_by = Some("Polars".to_string());
47
48
Self {
49
writer: crate::parquet::write::FileWriter::new(
50
writer,
51
parquet_schema,
52
FileWriteOptions {
53
version: options.version,
54
write_statistics: options.has_statistics(),
55
},
56
created_by,
57
),
58
schema,
59
options,
60
}
61
}
62
63
/// Returns a new [`FileWriter`].
64
/// # Error
65
/// If it is unable to derive a parquet schema from [`ArrowSchema`].
66
pub fn try_new(writer: W, schema: ArrowSchema, options: WriteOptions) -> PolarsResult<Self> {
67
let parquet_schema = to_parquet_schema(&schema)?;
68
Ok(Self::new_with_parquet_schema(
69
writer,
70
schema,
71
parquet_schema,
72
options,
73
))
74
}
75
76
/// Writes a row group to the file.
77
pub fn write(
78
&mut self,
79
num_rows: u64,
80
row_group: RowGroupIterColumns<'_, PolarsError>,
81
) -> PolarsResult<()> {
82
Ok(self.writer.write(num_rows, row_group)?)
83
}
84
85
/// Writes the footer of the parquet file. Returns the total size of the file.
86
/// If `key_value_metadata` is provided, the value is taken as-is. If it is not provided,
87
/// the Arrow schema is added to the metadata.
88
pub fn end(&mut self, key_value_metadata: Option<Vec<KeyValue>>) -> PolarsResult<u64> {
89
let key_value_metadata =
90
key_value_metadata.unwrap_or_else(|| vec![schema_to_metadata_key(&self.schema)]);
91
Ok(self.writer.end(Some(key_value_metadata))?)
92
}
93
94
/// Consumes this writer and returns the inner writer
95
pub fn into_inner(self) -> W {
96
self.writer.into_inner()
97
}
98
99
/// Returns the underlying writer and [`ThriftFileMetadata`]
100
/// # Panics
101
/// This function panics if [`Self::end`] has not yet been called
102
pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetadata) {
103
self.writer.into_inner_and_metadata()
104
}
105
}
106
107