Path: blob/main/crates/polars-json/src/ndjson/deserialize.rs
6939 views
use arrow::array::Array;1use arrow::compute::concatenate::concatenate_unchecked;2use simd_json::BorrowedValue;34use super::*;56/// Deserializes an iterator of rows into an [`Array`][Array] of [`DataType`].7///8/// [Array]: arrow::array::Array9///10/// # Implementation11/// This function is CPU-bounded.12/// This function is guaranteed to return an array of length equal to the length13/// # Errors14/// This function errors iff any of the rows is not a valid JSON (i.e. the format is not valid NDJSON).15pub fn deserialize_iter<'a>(16rows: impl Iterator<Item = &'a str>,17dtype: ArrowDataType,18buf_size: usize,19count: usize,20allow_extra_fields_in_struct: bool,21) -> PolarsResult<ArrayRef> {22let mut arr: Vec<Box<dyn Array>> = Vec::new();23let mut buf = Vec::with_capacity(std::cmp::min(buf_size + count + 2, u32::MAX as usize));24buf.push(b'[');2526fn _deserializer(27s: &mut [u8],28dtype: ArrowDataType,29allow_extra_fields_in_struct: bool,30) -> PolarsResult<Box<dyn Array>> {31let out = simd_json::to_borrowed_value(s)32.map_err(|e| PolarsError::ComputeError(format!("json parsing error: '{e}'").into()))?;33if let BorrowedValue::Array(rows) = out {34super::super::json::deserialize::_deserialize(35&rows,36dtype,37allow_extra_fields_in_struct,38)39} else {40unreachable!()41}42}43let mut row_iter = rows.peekable();4445while let Some(row) = row_iter.next() {46buf.extend_from_slice(row.as_bytes());47buf.push(b',');4849let next_row_length = row_iter.peek().map(|row| row.len()).unwrap_or(0);50if buf.len() + next_row_length >= u32::MAX as usize {51let _ = buf.pop();52buf.push(b']');53arr.push(_deserializer(54&mut buf,55dtype.clone(),56allow_extra_fields_in_struct,57)?);58buf.clear();59buf.push(b'[');60}61}62if buf.len() > 1 {63let _ = buf.pop();64}65buf.push(b']');6667if arr.is_empty() {68_deserializer(&mut buf, dtype, allow_extra_fields_in_struct)69} else {70arr.push(_deserializer(71&mut buf,72dtype,73allow_extra_fields_in_struct,74)?);75concatenate_unchecked(&arr)76}77}787980