Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-parquet/src/arrow/write/file.rs
6940 views
1
use std::io::Write;
2
3
use arrow::datatypes::ArrowSchema;
4
use polars_error::{PolarsError, PolarsResult};
5
6
use super::schema::schema_to_metadata_key;
7
use super::{ColumnWriteOptions, ThriftFileMetadata, WriteOptions, to_parquet_schema};
8
use crate::parquet::metadata::{KeyValue, SchemaDescriptor};
9
use crate::parquet::write::{RowGroupIterColumns, WriteOptions as FileWriteOptions};
10
11
/// An interface to write a parquet to a [`Write`]
12
pub struct FileWriter<W: Write> {
13
writer: crate::parquet::write::FileWriter<W>,
14
schema: ArrowSchema,
15
options: WriteOptions,
16
}
17
18
// Accessors
19
impl<W: Write> FileWriter<W> {
20
/// The options assigned to the file
21
pub fn options(&self) -> WriteOptions {
22
self.options
23
}
24
25
/// The [`SchemaDescriptor`] assigned to this file
26
pub fn parquet_schema(&self) -> &SchemaDescriptor {
27
self.writer.schema()
28
}
29
30
/// The [`ArrowSchema`] assigned to this file
31
pub fn schema(&self) -> &ArrowSchema {
32
&self.schema
33
}
34
}
35
36
impl<W: Write> FileWriter<W> {
37
/// Returns a new [`FileWriter`].
38
/// # Error
39
/// If it is unable to derive a parquet schema from [`ArrowSchema`].
40
pub fn new_with_parquet_schema(
41
writer: W,
42
schema: ArrowSchema,
43
parquet_schema: SchemaDescriptor,
44
options: WriteOptions,
45
) -> Self {
46
let created_by = Some("Polars".to_string());
47
48
Self {
49
writer: crate::parquet::write::FileWriter::new(
50
writer,
51
parquet_schema,
52
FileWriteOptions {
53
version: options.version,
54
write_statistics: options.has_statistics(),
55
},
56
created_by,
57
),
58
schema,
59
options,
60
}
61
}
62
63
/// Returns a new [`FileWriter`].
64
/// # Error
65
/// If it is unable to derive a parquet schema from [`ArrowSchema`].
66
pub fn try_new(
67
writer: W,
68
schema: ArrowSchema,
69
options: WriteOptions,
70
column_options: &[ColumnWriteOptions],
71
) -> PolarsResult<Self> {
72
let parquet_schema = to_parquet_schema(&schema, column_options)?;
73
Ok(Self::new_with_parquet_schema(
74
writer,
75
schema,
76
parquet_schema,
77
options,
78
))
79
}
80
81
/// Writes a row group to the file.
82
pub fn write(&mut self, row_group: RowGroupIterColumns<'_, PolarsError>) -> PolarsResult<()> {
83
Ok(self.writer.write(row_group)?)
84
}
85
86
/// Writes the footer of the parquet file. Returns the total size of the file.
87
/// If `key_value_metadata` is provided, the value is taken as-is. If it is not provided,
88
/// the Arrow schema is added to the metadata.
89
pub fn end(
90
&mut self,
91
key_value_metadata: Option<Vec<KeyValue>>,
92
column_options: &[ColumnWriteOptions],
93
) -> PolarsResult<u64> {
94
let key_value_metadata = key_value_metadata
95
.unwrap_or_else(|| vec![schema_to_metadata_key(&self.schema, column_options)]);
96
Ok(self.writer.end(Some(key_value_metadata))?)
97
}
98
99
/// Consumes this writer and returns the inner writer
100
pub fn into_inner(self) -> W {
101
self.writer.into_inner()
102
}
103
104
/// Returns the underlying writer and [`ThriftFileMetadata`]
105
/// # Panics
106
/// This function panics if [`Self::end`] has not yet been called
107
pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetadata) {
108
self.writer.into_inner_and_metadata()
109
}
110
}
111
112