Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-core/src/series/iterator.rs
8421 views
1
use crate::prelude::any_value::arr_to_any_value;
2
use crate::prelude::*;
3
use crate::utils::NoNull;
4
5
macro_rules! from_iterator {
6
($native:ty, $variant:ident) => {
7
impl FromIterator<Option<$native>> for Series {
8
fn from_iter<I: IntoIterator<Item = Option<$native>>>(iter: I) -> Self {
9
let ca: ChunkedArray<$variant> = iter.into_iter().collect();
10
ca.into_series()
11
}
12
}
13
14
impl FromIterator<$native> for Series {
15
fn from_iter<I: IntoIterator<Item = $native>>(iter: I) -> Self {
16
let ca: NoNull<ChunkedArray<$variant>> = iter.into_iter().collect();
17
ca.into_inner().into_series()
18
}
19
}
20
21
impl<'a> FromIterator<&'a $native> for Series {
22
fn from_iter<I: IntoIterator<Item = &'a $native>>(iter: I) -> Self {
23
let ca: ChunkedArray<$variant> = iter.into_iter().map(|v| Some(*v)).collect();
24
ca.into_series()
25
}
26
}
27
};
28
}
29
30
#[cfg(feature = "dtype-u8")]
31
from_iterator!(u8, UInt8Type);
32
#[cfg(feature = "dtype-u16")]
33
from_iterator!(u16, UInt16Type);
34
from_iterator!(u32, UInt32Type);
35
from_iterator!(u64, UInt64Type);
36
#[cfg(feature = "dtype-i8")]
37
from_iterator!(i8, Int8Type);
38
#[cfg(feature = "dtype-i16")]
39
from_iterator!(i16, Int16Type);
40
from_iterator!(i32, Int32Type);
41
from_iterator!(i64, Int64Type);
42
from_iterator!(f32, Float32Type);
43
from_iterator!(f64, Float64Type);
44
from_iterator!(bool, BooleanType);
45
46
impl<'a> FromIterator<Option<&'a str>> for Series {
47
fn from_iter<I: IntoIterator<Item = Option<&'a str>>>(iter: I) -> Self {
48
let ca: StringChunked = iter.into_iter().collect();
49
ca.into_series()
50
}
51
}
52
53
impl<'a> FromIterator<&'a str> for Series {
54
fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Self {
55
let ca: StringChunked = iter.into_iter().collect();
56
ca.into_series()
57
}
58
}
59
60
impl FromIterator<Option<String>> for Series {
61
fn from_iter<T: IntoIterator<Item = Option<String>>>(iter: T) -> Self {
62
let ca: StringChunked = iter.into_iter().collect();
63
ca.into_series()
64
}
65
}
66
67
impl FromIterator<String> for Series {
68
fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Self {
69
let ca: StringChunked = iter.into_iter().collect();
70
ca.into_series()
71
}
72
}
73
74
pub type SeriesPhysIter<'a> = Box<dyn ExactSizeIterator<Item = AnyValue<'a>> + 'a>;
75
76
impl Series {
77
/// Iterate over [`Series`] as [`AnyValue`].
78
///
79
/// # Panics
80
/// This will panic if the array is not rechunked first.
81
pub fn iter(&self) -> SeriesIter<'_> {
82
let arrays = self.chunks();
83
SeriesIter {
84
idx_in_cur_arr: 0,
85
cur_arr_idx: 0,
86
cur_arr_len: arrays[0].len(),
87
arrays,
88
dtype: self.dtype(),
89
total_elems_in_remaining_arrays: self.len(),
90
}
91
}
92
93
pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
94
let dtype = self.dtype();
95
let phys_dtype = dtype.to_physical();
96
97
assert_eq!(dtype, &phys_dtype, "impl error");
98
assert_eq!(self.chunks().len(), 1, "impl error");
99
let arr = &*self.chunks()[0];
100
101
if phys_dtype.is_primitive_numeric() {
102
if arr.null_count() == 0 {
103
with_match_physical_numeric_type!(phys_dtype, |$T| {
104
let arr = arr.as_any().downcast_ref::<PrimitiveArray<$T>>().unwrap();
105
let values = arr.values().as_slice();
106
Box::new(values.iter().map(|&value| AnyValue::from(value))) as Box<dyn ExactSizeIterator<Item=AnyValue<'_>> + '_>
107
})
108
} else {
109
with_match_physical_numeric_type!(phys_dtype, |$T| {
110
let arr = arr.as_any().downcast_ref::<PrimitiveArray<$T>>().unwrap();
111
Box::new(arr.iter().map(|value| {
112
113
match value {
114
Some(value) => AnyValue::from(*value),
115
None => AnyValue::Null
116
}
117
118
})) as Box<dyn ExactSizeIterator<Item=AnyValue<'_>> + '_>
119
})
120
}
121
} else {
122
match dtype {
123
DataType::String => {
124
let arr = arr.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
125
if arr.null_count() == 0 {
126
Box::new(arr.values_iter().map(AnyValue::String))
127
as Box<dyn ExactSizeIterator<Item = AnyValue<'_>> + '_>
128
} else {
129
let zipvalid = arr.iter();
130
Box::new(zipvalid.unwrap_optional().map(|v| match v {
131
Some(value) => AnyValue::String(value),
132
None => AnyValue::Null,
133
}))
134
as Box<dyn ExactSizeIterator<Item = AnyValue<'_>> + '_>
135
}
136
},
137
DataType::Boolean => {
138
let arr = arr.as_any().downcast_ref::<BooleanArray>().unwrap();
139
if arr.null_count() == 0 {
140
Box::new(arr.values_iter().map(AnyValue::Boolean))
141
as Box<dyn ExactSizeIterator<Item = AnyValue<'_>> + '_>
142
} else {
143
let zipvalid = arr.iter();
144
Box::new(zipvalid.unwrap_optional().map(|v| match v {
145
Some(value) => AnyValue::Boolean(value),
146
None => AnyValue::Null,
147
}))
148
as Box<dyn ExactSizeIterator<Item = AnyValue<'_>> + '_>
149
}
150
},
151
_ => Box::new(self.iter()),
152
}
153
}
154
}
155
}
156
157
pub struct SeriesIter<'a> {
158
arrays: &'a [Box<dyn Array>],
159
dtype: &'a DataType,
160
idx_in_cur_arr: usize,
161
cur_arr_len: usize,
162
cur_arr_idx: usize,
163
total_elems_in_remaining_arrays: usize,
164
}
165
166
impl<'a> Iterator for SeriesIter<'a> {
167
type Item = AnyValue<'a>;
168
169
#[inline]
170
fn next(&mut self) -> Option<Self::Item> {
171
loop {
172
if self.idx_in_cur_arr < self.cur_arr_len {
173
let arr = unsafe { self.arrays.get_unchecked(self.cur_arr_idx) };
174
let ret = unsafe { arr_to_any_value(&**arr, self.idx_in_cur_arr, self.dtype) };
175
self.idx_in_cur_arr += 1;
176
return Some(ret);
177
}
178
179
if self.cur_arr_idx + 1 < self.arrays.len() {
180
self.total_elems_in_remaining_arrays -= self.cur_arr_len;
181
self.cur_arr_idx += 1;
182
self.idx_in_cur_arr = 0;
183
let arr = unsafe { self.arrays.get_unchecked(self.cur_arr_idx) };
184
self.cur_arr_len = arr.len();
185
} else {
186
return None;
187
}
188
}
189
}
190
191
fn size_hint(&self) -> (usize, Option<usize>) {
192
let len = self.total_elems_in_remaining_arrays - self.idx_in_cur_arr;
193
(len, Some(len))
194
}
195
}
196
197
impl ExactSizeIterator for SeriesIter<'_> {}
198
199
#[cfg(test)]
200
mod test {
201
use crate::prelude::*;
202
203
#[test]
204
fn test_iter() {
205
let a = Series::new("age".into(), [23, 71, 9].as_ref());
206
let _b = a
207
.i32()
208
.unwrap()
209
.into_iter()
210
.map(|opt_v| opt_v.map(|v| v * 2));
211
}
212
213
#[test]
214
fn test_iter_str() {
215
let data = [Some("John"), Some("Doe"), None];
216
let a: Series = data.into_iter().collect();
217
let b = Series::new("".into(), data);
218
assert_eq!(a, b);
219
}
220
221
#[test]
222
fn test_iter_string() {
223
let data = [Some("John".to_string()), Some("Doe".to_string()), None];
224
let a: Series = data.clone().into_iter().collect();
225
let b = Series::new("".into(), data);
226
assert_eq!(a, b);
227
}
228
}
229
230