Path: blob/main/crates/polars-io/src/parquet/read/utils.rs
8475 views
use std::borrow::Cow;12use polars_core::prelude::{ArrowSchema, Column, DataFrame, DataType, IDX_DTYPE, Series};3use polars_core::schema::{SchemaExt, SchemaNamesAndDtypes};4use polars_error::{PolarsResult, polars_bail};5use polars_schema::Schema;67use crate::RowIndex;8use crate::hive::materialize_hive_partitions;9use crate::utils::apply_projection;1011pub fn materialize_empty_df(12projection: Option<&[usize]>,13reader_schema: &ArrowSchema,14hive_partition_columns: Option<&[Series]>,15row_index: Option<&RowIndex>,16) -> DataFrame {17let schema = if let Some(projection) = projection {18Cow::Owned(apply_projection(reader_schema, projection))19} else {20Cow::Borrowed(reader_schema)21};22let mut df = DataFrame::empty_with_schema(&Schema::from_arrow_schema(&schema));2324if let Some(row_index) = row_index {25df.insert_column(0, Column::new_empty(row_index.name.clone(), &IDX_DTYPE))26.unwrap();27}2829materialize_hive_partitions(&mut df, reader_schema, hive_partition_columns);3031df32}3334pub(super) fn projected_arrow_schema_to_projection_indices(35schema: &ArrowSchema,36projected_arrow_schema: &ArrowSchema,37) -> PolarsResult<Option<Vec<usize>>> {38let mut projection_indices = Vec::with_capacity(projected_arrow_schema.len());39let mut is_full_ordered_projection = projected_arrow_schema.len() == schema.len();4041for (i, field) in projected_arrow_schema.iter_values().enumerate() {42let dtype = {43let Some((idx, _, field)) = schema.get_full(&field.name) else {44polars_bail!(ColumnNotFound: "did not find column in file: {}", field.name)45};4647projection_indices.push(idx);48is_full_ordered_projection &= idx == i;4950DataType::from_arrow_field(field)51};52let expected_dtype = DataType::from_arrow_field(field);5354if dtype.clone() != expected_dtype {55polars_bail!(56mismatch,57col = &field.name,58expected = expected_dtype,59found = dtype60);61}62}6364Ok((!is_full_ordered_projection).then_some(projection_indices))65}6667/// Utility to ensure the dtype of the column in `current_schema` matches the dtype in `schema` if68/// that column exists in `schema`.69pub fn ensure_matching_dtypes_if_found(70schema: &ArrowSchema,71current_schema: &ArrowSchema,72) -> PolarsResult<()> {73current_schema74.iter_names_and_dtypes()75.try_for_each(|(name, dtype)| {76if let Some(field) = schema.get(name) {77if dtype != &field.dtype {78// Check again with timezone normalization79// TODO: Add an ArrowDtype eq wrapper?80let lhs = DataType::from_arrow_dtype(dtype);81let rhs = DataType::from_arrow_field(field);8283if lhs != rhs {84polars_bail!(85SchemaMismatch:86"dtypes differ for column {}: {:?} != {:?}"87, name, dtype, &field.dtype88);89}90}91}92Ok(())93})94}959697