Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-arrow/src/record_batch.rs
6939 views
1
//! Contains [`RecordBatchT`], a container of [`Array`] where every array has the
2
//! same length.
3
4
use polars_error::{PolarsResult, polars_ensure};
5
6
use crate::array::{Array, ArrayRef};
7
use crate::datatypes::{ArrowSchema, ArrowSchemaRef};
8
9
/// A vector of trait objects of [`Array`] where every item has
10
/// the same length, [`RecordBatchT::len`].
11
#[derive(Debug, Clone, PartialEq, Eq)]
12
pub struct RecordBatchT<A: AsRef<dyn Array>> {
13
height: usize,
14
schema: ArrowSchemaRef,
15
arrays: Vec<A>,
16
}
17
18
pub type RecordBatch = RecordBatchT<ArrayRef>;
19
20
impl<A: AsRef<dyn Array>> RecordBatchT<A> {
21
/// Creates a new [`RecordBatchT`].
22
///
23
/// # Panics
24
///
25
/// I.f.f. the length does not match the length of any of the arrays
26
pub fn new(length: usize, schema: ArrowSchemaRef, arrays: Vec<A>) -> Self {
27
Self::try_new(length, schema, arrays).unwrap()
28
}
29
30
/// Creates a new [`RecordBatchT`].
31
///
32
/// # Error
33
///
34
/// I.f.f. the height does not match the length of any of the arrays
35
pub fn try_new(height: usize, schema: ArrowSchemaRef, arrays: Vec<A>) -> PolarsResult<Self> {
36
polars_ensure!(
37
schema.len() == arrays.len(),
38
ComputeError: "RecordBatch requires an equal number of fields and arrays",
39
);
40
polars_ensure!(
41
arrays.iter().all(|arr| arr.as_ref().len() == height),
42
ComputeError: "RecordBatch requires all its arrays to have an equal number of rows",
43
);
44
45
Ok(Self {
46
height,
47
schema,
48
arrays,
49
})
50
}
51
52
/// returns the [`Array`]s in [`RecordBatchT`]
53
pub fn arrays(&self) -> &[A] {
54
&self.arrays
55
}
56
57
/// returns the [`ArrowSchema`]s in [`RecordBatchT`]
58
pub fn schema(&self) -> &ArrowSchema {
59
&self.schema
60
}
61
62
/// returns the [`Array`]s in [`RecordBatchT`]
63
pub fn columns(&self) -> &[A] {
64
&self.arrays
65
}
66
67
/// returns the number of rows of every array
68
pub fn len(&self) -> usize {
69
self.height
70
}
71
72
/// returns the number of rows of every array
73
pub fn height(&self) -> usize {
74
self.height
75
}
76
77
/// returns the number of arrays
78
pub fn width(&self) -> usize {
79
self.arrays.len()
80
}
81
82
/// returns whether the columns have any rows
83
pub fn is_empty(&self) -> bool {
84
self.len() == 0
85
}
86
87
/// Consumes [`RecordBatchT`] into its underlying arrays.
88
/// The arrays are guaranteed to have the same length
89
pub fn into_arrays(self) -> Vec<A> {
90
self.arrays
91
}
92
93
/// Consumes [`RecordBatchT`] into its underlying schema and arrays.
94
/// The arrays are guaranteed to have the same length
95
pub fn into_schema_and_arrays(self) -> (ArrowSchemaRef, Vec<A>) {
96
(self.schema, self.arrays)
97
}
98
}
99
100
impl<A: AsRef<dyn Array>> From<RecordBatchT<A>> for Vec<A> {
101
fn from(c: RecordBatchT<A>) -> Self {
102
c.into_arrays()
103
}
104
}
105
106
impl<A: AsRef<dyn Array>> std::ops::Deref for RecordBatchT<A> {
107
type Target = [A];
108
109
#[inline]
110
fn deref(&self) -> &[A] {
111
self.arrays()
112
}
113
}
114
115