Path: blob/main/crates/polars-parquet/src/arrow/write/file.rs
6940 views
use std::io::Write;12use arrow::datatypes::ArrowSchema;3use polars_error::{PolarsError, PolarsResult};45use super::schema::schema_to_metadata_key;6use super::{ColumnWriteOptions, ThriftFileMetadata, WriteOptions, to_parquet_schema};7use crate::parquet::metadata::{KeyValue, SchemaDescriptor};8use crate::parquet::write::{RowGroupIterColumns, WriteOptions as FileWriteOptions};910/// An interface to write a parquet to a [`Write`]11pub struct FileWriter<W: Write> {12writer: crate::parquet::write::FileWriter<W>,13schema: ArrowSchema,14options: WriteOptions,15}1617// Accessors18impl<W: Write> FileWriter<W> {19/// The options assigned to the file20pub fn options(&self) -> WriteOptions {21self.options22}2324/// The [`SchemaDescriptor`] assigned to this file25pub fn parquet_schema(&self) -> &SchemaDescriptor {26self.writer.schema()27}2829/// The [`ArrowSchema`] assigned to this file30pub fn schema(&self) -> &ArrowSchema {31&self.schema32}33}3435impl<W: Write> FileWriter<W> {36/// Returns a new [`FileWriter`].37/// # Error38/// If it is unable to derive a parquet schema from [`ArrowSchema`].39pub fn new_with_parquet_schema(40writer: W,41schema: ArrowSchema,42parquet_schema: SchemaDescriptor,43options: WriteOptions,44) -> Self {45let created_by = Some("Polars".to_string());4647Self {48writer: crate::parquet::write::FileWriter::new(49writer,50parquet_schema,51FileWriteOptions {52version: options.version,53write_statistics: options.has_statistics(),54},55created_by,56),57schema,58options,59}60}6162/// Returns a new [`FileWriter`].63/// # Error64/// If it is unable to derive a parquet schema from [`ArrowSchema`].65pub fn try_new(66writer: W,67schema: ArrowSchema,68options: WriteOptions,69column_options: &[ColumnWriteOptions],70) -> PolarsResult<Self> {71let parquet_schema = to_parquet_schema(&schema, column_options)?;72Ok(Self::new_with_parquet_schema(73writer,74schema,75parquet_schema,76options,77))78}7980/// Writes a row group to the file.81pub fn write(&mut self, row_group: RowGroupIterColumns<'_, PolarsError>) -> PolarsResult<()> {82Ok(self.writer.write(row_group)?)83}8485/// Writes the footer of the parquet file. Returns the total size of the file.86/// If `key_value_metadata` is provided, the value is taken as-is. If it is not provided,87/// the Arrow schema is added to the metadata.88pub fn end(89&mut self,90key_value_metadata: Option<Vec<KeyValue>>,91column_options: &[ColumnWriteOptions],92) -> PolarsResult<u64> {93let key_value_metadata = key_value_metadata94.unwrap_or_else(|| vec![schema_to_metadata_key(&self.schema, column_options)]);95Ok(self.writer.end(Some(key_value_metadata))?)96}9798/// Consumes this writer and returns the inner writer99pub fn into_inner(self) -> W {100self.writer.into_inner()101}102103/// Returns the underlying writer and [`ThriftFileMetadata`]104/// # Panics105/// This function panics if [`Self::end`] has not yet been called106pub fn into_inner_and_metadata(self) -> (W, ThriftFileMetadata) {107self.writer.into_inner_and_metadata()108}109}110111112