Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-ops/src/chunked_array/array/join.rs
6939 views
1
use std::fmt::Write;
2
3
use super::*;
4
5
fn join_literal(
6
ca: &ArrayChunked,
7
separator: &str,
8
ignore_nulls: bool,
9
) -> PolarsResult<StringChunked> {
10
let DataType::Array(_, _) = ca.dtype() else {
11
unreachable!()
12
};
13
14
let mut buf = String::with_capacity(128);
15
let mut builder = StringChunkedBuilder::new(ca.name().clone(), ca.len());
16
17
ca.for_each_amortized(|opt_s| {
18
let opt_val = opt_s.and_then(|s| {
19
// make sure that we don't write values of previous iteration
20
buf.clear();
21
let ca = s.as_ref().str().unwrap();
22
23
if ca.null_count() != 0 && !ignore_nulls {
24
return None;
25
}
26
for arr in ca.downcast_iter() {
27
for val in arr.non_null_values_iter() {
28
buf.write_str(val).unwrap();
29
buf.write_str(separator).unwrap();
30
}
31
}
32
33
// last value should not have a separator, so slice that off
34
// saturating sub because there might have been nothing written.
35
Some(&buf[..buf.len().saturating_sub(separator.len())])
36
});
37
builder.append_option(opt_val)
38
});
39
Ok(builder.finish())
40
}
41
42
fn join_many(
43
ca: &ArrayChunked,
44
separator: &StringChunked,
45
ignore_nulls: bool,
46
) -> PolarsResult<StringChunked> {
47
polars_ensure!(
48
ca.len() == separator.len(),
49
length_mismatch = "arr.join",
50
ca.len(),
51
separator.len()
52
);
53
54
let mut buf = String::new();
55
let mut builder = StringChunkedBuilder::new(ca.name().clone(), ca.len());
56
57
{ ca.amortized_iter() }
58
.zip(separator)
59
.for_each(|(opt_s, opt_sep)| match opt_sep {
60
Some(separator) => {
61
let opt_val = opt_s.and_then(|s| {
62
// make sure that we don't write values of previous iteration
63
buf.clear();
64
let ca = s.as_ref().str().unwrap();
65
66
if ca.null_count() != 0 && !ignore_nulls {
67
return None;
68
}
69
70
for arr in ca.downcast_iter() {
71
for val in arr.non_null_values_iter() {
72
buf.write_str(val).unwrap();
73
buf.write_str(separator).unwrap();
74
}
75
}
76
// last value should not have a separator, so slice that off
77
// saturating sub because there might have been nothing written.
78
Some(&buf[..buf.len().saturating_sub(separator.len())])
79
});
80
builder.append_option(opt_val)
81
},
82
_ => builder.append_null(),
83
});
84
Ok(builder.finish())
85
}
86
87
/// In case the inner dtype [`DataType::String`], the individual items will be joined into a
88
/// single string separated by `separator`.
89
pub fn array_join(
90
ca: &ArrayChunked,
91
separator: &StringChunked,
92
ignore_nulls: bool,
93
) -> PolarsResult<StringChunked> {
94
match ca.inner_dtype() {
95
DataType::String => match separator.len() {
96
1 => match separator.get(0) {
97
Some(separator) => join_literal(ca, separator, ignore_nulls),
98
_ => Ok(StringChunked::full_null(ca.name().clone(), ca.len())),
99
},
100
_ => join_many(ca, separator, ignore_nulls),
101
},
102
dt => polars_bail!(op = "`array.join`", got = dt, expected = "String"),
103
}
104
}
105
106