Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-io/src/utils/mod.rs
8422 views
1
pub mod compression;
2
mod other;
3
4
pub use other::*;
5
#[cfg(any(feature = "async", feature = "cloud"))]
6
pub mod byte_source;
7
pub mod file;
8
pub mod mkdir;
9
pub mod slice;
10
#[cfg(feature = "async")]
11
pub mod stream_buf_reader;
12
pub mod sync_on_close;
13
14
/// Excludes only the unreserved URI characters in RFC-3986:
15
///
16
/// <https://datatracker.ietf.org/doc/html/rfc3986#section-2.3>
17
///
18
/// Characters that are allowed in a URI but do not have a reserved
19
/// purpose are called unreserved. These include uppercase and lowercase
20
/// letters, decimal digits, hyphen, period, underscore, and tilde.
21
///
22
/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
23
pub const URL_ENCODE_CHARSET: &percent_encoding::AsciiSet = &percent_encoding::NON_ALPHANUMERIC
24
.remove(b'-')
25
.remove(b'.')
26
.remove(b'_')
27
.remove(b'~');
28
29
/// Characters to percent-encode for hive values such that they round-trip from bucket storage.
30
///
31
/// This is much more relaxed than the RFC-3986 URI spec as bucket storage is more permissive of allowed
32
/// characters.
33
pub const HIVE_VALUE_ENCODE_CHARSET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
34
.add(b'/') // Exclude path separator
35
.add(b'=') // Exclude hive `key=value` separator
36
.add(b'%') // Percent itself.
37
// Colon and space are supported by object storage, but are encoded to mimic
38
// the datetime output format from pyarrow:
39
// * i.e. 'date2=2023-01-01 00:00:00.000000' becomes 'date2=2023-01-01%2000%3A00%3A00.000000'
40
.add(b':')
41
.add(b' ');
42
43