Path: blob/main/crates/polars-parquet/src/arrow/read/schema/mod.rs
6940 views
//! APIs to handle Parquet <-> Arrow schemas.1use arrow::datatypes::{ArrowSchema, TimeUnit};23mod convert;4mod metadata;56pub(crate) use convert::*;7pub use convert::{parquet_to_arrow_schema, parquet_to_arrow_schema_with_options};8pub use metadata::{read_custom_key_value_metadata, read_schema_from_metadata};9use polars_error::PolarsResult;1011use self::metadata::parse_key_value_metadata;12pub use crate::parquet::metadata::{FileMetadata, KeyValue, SchemaDescriptor};13pub use crate::parquet::schema::types::ParquetType;1415/// Options when inferring schemas from Parquet16pub struct SchemaInferenceOptions {17/// When inferring schemas from the Parquet INT96 timestamp type, this is the corresponding TimeUnit18/// in the inferred Arrow Timestamp type.19///20/// This defaults to `TimeUnit::Nanosecond`, but INT96 timestamps outside of the range of years 1678-2262,21/// will overflow when parsed as `Timestamp(TimeUnit::Nanosecond)`. Setting this to a lower resolution22/// (e.g. TimeUnit::Milliseconds) will result in loss of precision, but support a larger range of dates23/// without overflowing when parsing the data.24pub int96_coerce_to_timeunit: TimeUnit,25}2627impl Default for SchemaInferenceOptions {28fn default() -> Self {29SchemaInferenceOptions {30int96_coerce_to_timeunit: TimeUnit::Nanosecond,31}32}33}3435/// Infers a [`ArrowSchema`] from parquet's [`FileMetadata`].36///37/// This first looks for the metadata key `"ARROW:schema"`; if it does not exist, it converts the38/// Parquet types declared in the file's Parquet schema to Arrow's equivalent.39///40/// # Error41/// This function errors iff the key `"ARROW:schema"` exists but is not correctly encoded,42/// indicating that the file's arrow metadata was incorrectly written.43pub fn infer_schema(file_metadata: &FileMetadata) -> PolarsResult<ArrowSchema> {44infer_schema_with_options(file_metadata, &None)45}4647/// Like [`infer_schema`] but with configurable options which affects the behavior of inference48pub fn infer_schema_with_options(49file_metadata: &FileMetadata,50options: &Option<SchemaInferenceOptions>,51) -> PolarsResult<ArrowSchema> {52let mut metadata = parse_key_value_metadata(file_metadata.key_value_metadata());5354let schema = read_schema_from_metadata(&mut metadata)?;55Ok(schema.unwrap_or_else(|| {56parquet_to_arrow_schema_with_options(file_metadata.schema().fields(), options)57}))58}596061