Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-json/src/json/write/mod.rs
6939 views
1
//! APIs to write to JSON
2
mod serialize;
3
mod utf8;
4
5
use std::io::Write;
6
7
use arrow::array::Array;
8
use arrow::datatypes::ArrowSchema;
9
use arrow::io::iterator::StreamingIterator;
10
use arrow::record_batch::RecordBatchT;
11
pub use fallible_streaming_iterator::*;
12
use polars_error::{PolarsError, PolarsResult};
13
pub(crate) use serialize::new_serializer;
14
use serialize::serialize;
15
pub use utf8::serialize_to_utf8;
16
17
/// [`FallibleStreamingIterator`] that serializes an [`Array`] to bytes of valid JSON
18
/// # Implementation
19
/// Advancing this iterator CPU-bounded
20
#[derive(Debug, Clone)]
21
pub struct Serializer<A, I>
22
where
23
A: AsRef<dyn Array>,
24
I: Iterator<Item = PolarsResult<A>>,
25
{
26
arrays: I,
27
buffer: Vec<u8>,
28
}
29
30
impl<A, I> Serializer<A, I>
31
where
32
A: AsRef<dyn Array>,
33
I: Iterator<Item = PolarsResult<A>>,
34
{
35
/// Creates a new [`Serializer`].
36
pub fn new(arrays: I, buffer: Vec<u8>) -> Self {
37
Self { arrays, buffer }
38
}
39
}
40
41
impl<A, I> FallibleStreamingIterator for Serializer<A, I>
42
where
43
A: AsRef<dyn Array>,
44
I: Iterator<Item = PolarsResult<A>>,
45
{
46
type Item = [u8];
47
48
type Error = PolarsError;
49
50
fn advance(&mut self) -> PolarsResult<()> {
51
self.buffer.clear();
52
self.arrays
53
.next()
54
.map(|maybe_array| maybe_array.map(|array| serialize(array.as_ref(), &mut self.buffer)))
55
.transpose()?;
56
Ok(())
57
}
58
59
fn get(&self) -> Option<&Self::Item> {
60
if !self.buffer.is_empty() {
61
Some(&self.buffer)
62
} else {
63
None
64
}
65
}
66
}
67
68
/// [`FallibleStreamingIterator`] that serializes a [`RecordBatchT`] into bytes of JSON
69
/// in a (pandas-compatible) record-oriented format.
70
///
71
/// # Implementation
72
/// Advancing this iterator is CPU-bounded.
73
pub struct RecordSerializer<'a> {
74
schema: ArrowSchema,
75
index: usize,
76
end: usize,
77
iterators: Vec<Box<dyn StreamingIterator<Item = [u8]> + Send + Sync + 'a>>,
78
buffer: Vec<u8>,
79
}
80
81
impl<'a> RecordSerializer<'a> {
82
/// Creates a new [`RecordSerializer`].
83
pub fn new<A>(schema: ArrowSchema, chunk: &'a RecordBatchT<A>, buffer: Vec<u8>) -> Self
84
where
85
A: AsRef<dyn Array>,
86
{
87
let end = chunk.len();
88
let iterators = chunk
89
.arrays()
90
.iter()
91
.map(|arr| new_serializer(arr.as_ref(), 0, usize::MAX))
92
.collect();
93
94
Self {
95
schema,
96
index: 0,
97
end,
98
iterators,
99
buffer,
100
}
101
}
102
}
103
104
impl FallibleStreamingIterator for RecordSerializer<'_> {
105
type Item = [u8];
106
107
type Error = PolarsError;
108
109
fn advance(&mut self) -> PolarsResult<()> {
110
self.buffer.clear();
111
if self.index == self.end {
112
return Ok(());
113
}
114
115
let mut is_first_row = true;
116
write!(&mut self.buffer, "{{")?;
117
for (f, ref mut it) in self.schema.iter_values().zip(self.iterators.iter_mut()) {
118
if !is_first_row {
119
write!(&mut self.buffer, ",")?;
120
}
121
write!(&mut self.buffer, "\"{}\":", f.name)?;
122
123
self.buffer.extend_from_slice(it.next().unwrap());
124
is_first_row = false;
125
}
126
write!(&mut self.buffer, "}}")?;
127
128
self.index += 1;
129
Ok(())
130
}
131
132
fn get(&self) -> Option<&Self::Item> {
133
if !self.buffer.is_empty() {
134
Some(&self.buffer)
135
} else {
136
None
137
}
138
}
139
}
140
141
/// Writes valid JSON from an iterator of (assumed JSON-encoded) bytes to `writer`
142
pub fn write<W, I>(writer: &mut W, mut blocks: I) -> PolarsResult<()>
143
where
144
W: std::io::Write,
145
I: FallibleStreamingIterator<Item = [u8], Error = PolarsError>,
146
{
147
writer.write_all(b"[")?;
148
let mut is_first_row = true;
149
while let Some(block) = blocks.next()? {
150
if !is_first_row {
151
writer.write_all(b",")?;
152
}
153
is_first_row = false;
154
writer.write_all(block)?;
155
}
156
writer.write_all(b"]")?;
157
Ok(())
158
}
159
160