Path: blob/main/crates/polars-parquet/src/arrow/write/file.rs
8479 views
use std::io::Write;12use arrow::datatypes::ArrowSchema;3use polars_error::{PolarsError, PolarsResult};45use super::schema::schema_to_metadata_key;6use super::{ThriftFileMetadata, WriteOptions, to_parquet_schema};7use crate::parquet::metadata::{KeyValue, SchemaDescriptor};8use crate::parquet::write::{RowGroupIterColumns, WriteOptions as FileWriteOptions};910/// An interface to write a parquet to a [`Write`]11pub struct FileWriter<W: Write> {12writer: crate::parquet::write::FileWriter<W>,13schema: ArrowSchema,14options: WriteOptions,15}1617// Accessors18impl<W: Write> FileWriter<W> {19/// The options assigned to the file20pub fn options(&self) -> WriteOptions {21self.options22}2324/// The [`SchemaDescriptor`] assigned to this file25pub fn parquet_schema(&self) -> &SchemaDescriptor {26self.writer.schema()27}2829/// The [`ArrowSchema`] assigned to this file30pub fn schema(&self) -> &ArrowSchema {31&self.schema32}33}3435impl<W: Write> FileWriter<W> {36/// Returns a new [`FileWriter`].37/// # Error38/// If it is unable to derive a parquet schema from [`ArrowSchema`].39pub fn new_with_parquet_schema(40writer: W,41schema: ArrowSchema,42parquet_schema: SchemaDescriptor,43options: WriteOptions,44) -> Self {45let created_by = Some("Polars".to_string());4647Self {48writer: crate::parquet::write::FileWriter::new(49writer,50parquet_schema,51FileWriteOptions {52version: options.version,53write_statistics: options.has_statistics(),54},55created_by,56),57schema,58options,59}60}6162/// Returns a new [`FileWriter`].63/// # Error64/// If it is unable to derive a parquet schema from [`ArrowSchema`].65pub fn try_new(writer: W, schema: ArrowSchema, options: WriteOptions) -> PolarsResult<Self> {66let parquet_schema = to_parquet_schema(&schema)?;67Ok(Self::new_with_parquet_schema(68writer,69schema,70parquet_schema,71options,72))73}7475/// Writes a row group to the file.76pub fn write(77&mut self,78num_rows: u64,79row_group: RowGroupIterColumns<'_, PolarsError>,80) -> PolarsResult<()> {81Ok(self.writer.write(num_rows, row_group)?)82}8384/// Writes the footer of the parquet file. Returns the total size of the file.85/// If `key_value_metadata` is provided, the value is taken as-is. If it is not provided,86/// the Arrow schema is added to the metadata.87pub fn end(&mut self, key_value_metadata: Option<Vec<KeyValue>>) -> PolarsResult<u64> {88let key_value_metadata =89key_value_metadata.unwrap_or_else(|| vec![schema_to_metadata_key(&self.schema)]);90Ok(self.writer.end(Some(key_value_metadata))?)91}9293/// Consumes this writer and returns the inner writer94pub fn into_inner(self) -> W {95self.writer.into_inner()96}9798/// Returns the underlying writer and [`ThriftFileMetadata`]99/// # Panics100/// This function panics if [`Self::end`] has not yet been called101pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetadata) {102self.writer.into_inner_and_metadata()103}104}105106107