Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/frame/row/dataframe.rs
8424 views
1
use super::*;
2
3
impl DataFrame {
4
/// Get a row from a [`DataFrame`]. Use of this is discouraged as it will likely be slow.
5
pub fn get_row(&self, idx: usize) -> PolarsResult<Row<'_>> {
6
let values = self
7
.materialized_column_iter()
8
.map(|s| s.get(idx))
9
.collect::<PolarsResult<Vec<_>>>()?;
10
Ok(Row(values))
11
}
12
13
/// Amortize allocations by reusing a row.
14
/// The caller is responsible to make sure that the row has at least the capacity for the number
15
/// of columns in the [`DataFrame`]
16
pub fn get_row_amortized<'a>(&'a self, idx: usize, row: &mut Row<'a>) -> PolarsResult<()> {
17
for (s, any_val) in self.materialized_column_iter().zip(&mut row.0) {
18
*any_val = s.get(idx)?;
19
}
20
Ok(())
21
}
22
23
/// Amortize allocations by reusing a row.
24
/// The caller is responsible to make sure that the row has at least the capacity for the number
25
/// of columns in the [`DataFrame`]
26
///
27
/// # Safety
28
/// Does not do any bounds checking.
29
#[inline]
30
pub unsafe fn get_row_amortized_unchecked<'a>(&'a self, idx: usize, row: &mut Row<'a>) {
31
self.materialized_column_iter()
32
.zip(&mut row.0)
33
.for_each(|(s, any_val)| {
34
*any_val = s.get_unchecked(idx);
35
});
36
}
37
38
/// Create a new [`DataFrame`] from rows.
39
///
40
/// This should only be used when you have row wise data, as this is a lot slower
41
/// than creating the [`Series`] in a columnar fashion
42
pub fn from_rows_and_schema(rows: &[Row], schema: &Schema) -> PolarsResult<Self> {
43
Self::from_rows_iter_and_schema(rows.iter(), schema)
44
}
45
46
/// Create a new [`DataFrame`] from an iterator over rows.
47
///
48
/// This should only be used when you have row wise data, as this is a lot slower
49
/// than creating the [`Series`] in a columnar fashion.
50
pub fn from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
51
where
52
I: Iterator<Item = &'a Row<'a>>,
53
{
54
if schema.is_empty() {
55
let height = rows.count();
56
let columns = Vec::new();
57
return Ok(unsafe { DataFrame::new_unchecked(height, columns) });
58
}
59
60
let capacity = rows.size_hint().0;
61
62
let mut buffers: Vec<_> = schema
63
.iter_values()
64
.map(|dtype| {
65
let buf: AnyValueBuffer = (dtype, capacity).into();
66
buf
67
})
68
.collect();
69
70
let mut expected_len = 0;
71
rows.try_for_each::<_, PolarsResult<()>>(|row| {
72
expected_len += 1;
73
for (value, buf) in row.0.iter().zip(&mut buffers) {
74
buf.add_fallible(value)?
75
}
76
Ok(())
77
})?;
78
79
let v = buffers
80
.into_iter()
81
.zip(schema.iter_names())
82
.map(|(b, name)| {
83
let mut c = b.into_series().into_column();
84
// if the schema adds a column not in the rows, we
85
// fill it with nulls
86
if c.is_empty() {
87
Column::full_null(name.clone(), expected_len, c.dtype())
88
} else {
89
c.rename(name.clone());
90
c
91
}
92
})
93
.collect();
94
95
DataFrame::new(expected_len, v)
96
}
97
98
/// Create a new [`DataFrame`] from an iterator over rows. This should only be used when you have row wise data,
99
/// as this is a lot slower than creating the [`Series`] in a columnar fashion
100
pub fn try_from_rows_iter_and_schema<'a, I>(mut rows: I, schema: &Schema) -> PolarsResult<Self>
101
where
102
I: Iterator<Item = PolarsResult<&'a Row<'a>>>,
103
{
104
let capacity = rows.size_hint().0;
105
106
let mut buffers: Vec<_> = schema
107
.iter_values()
108
.map(|dtype| {
109
let buf: AnyValueBuffer = (dtype, capacity).into();
110
buf
111
})
112
.collect();
113
114
let mut expected_len = 0;
115
rows.try_for_each::<_, PolarsResult<()>>(|row| {
116
expected_len += 1;
117
for (value, buf) in row?.0.iter().zip(&mut buffers) {
118
buf.add_fallible(value)?
119
}
120
Ok(())
121
})?;
122
let v = buffers
123
.into_iter()
124
.zip(schema.iter_names())
125
.map(|(b, name)| {
126
let mut c = b.into_series().into_column();
127
// if the schema adds a column not in the rows, we
128
// fill it with nulls
129
if c.is_empty() {
130
Column::full_null(name.clone(), expected_len, c.dtype())
131
} else {
132
c.rename(name.clone());
133
c
134
}
135
})
136
.collect();
137
138
DataFrame::new(expected_len, v)
139
}
140
141
/// Create a new [`DataFrame`] from rows. This should only be used when you have row wise data,
142
/// as this is a lot slower than creating the [`Series`] in a columnar fashion
143
pub fn from_rows(rows: &[Row]) -> PolarsResult<Self> {
144
let schema = rows_to_schema_first_non_null(rows, Some(50))?;
145
let has_nulls = schema
146
.iter_values()
147
.any(|dtype| matches!(dtype, DataType::Null));
148
polars_ensure!(
149
!has_nulls, ComputeError: "unable to infer row types because of null values"
150
);
151
Self::from_rows_and_schema(rows, &schema)
152
}
153
}
154
155