Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
pola-rs
GitHub Repository: pola-rs/polars
Path: blob/main/crates/polars-utils/src/regex_cache.rs
6939 views
1
use std::cell::RefCell;
2
3
use regex::{Regex, RegexBuilder};
4
5
use crate::cache::LruCache;
6
7
fn get_size_limit() -> Option<usize> {
8
Some(
9
std::env::var("POLARS_REGEX_SIZE_LIMIT")
10
.ok()
11
.filter(|l| !l.is_empty())?
12
.parse()
13
.expect("invalid POLARS_REGEX_SIZE_LIMIT"),
14
)
15
}
16
17
// Regex compilation is really heavy, and the resulting regexes can be large as
18
// well, so we should have a good caching scheme.
19
//
20
// TODO: add larger global cache which has time-based flush.
21
22
/// A cache for compiled regular expressions.
23
pub struct RegexCache {
24
cache: LruCache<String, Regex>,
25
size_limit: Option<usize>,
26
}
27
28
impl RegexCache {
29
fn new() -> Self {
30
Self {
31
cache: LruCache::with_capacity(32),
32
size_limit: get_size_limit(),
33
}
34
}
35
36
pub fn compile(&mut self, re: &str) -> Result<&Regex, regex::Error> {
37
let r = self.cache.try_get_or_insert_with(re, |re| {
38
// We do this little loop to only check POLARS_REGEX_SIZE_LIMIT when
39
// a regex fails to compile due to the size limit.
40
loop {
41
let mut builder = RegexBuilder::new(re);
42
if let Some(bytes) = self.size_limit {
43
builder.size_limit(bytes);
44
}
45
match builder.build() {
46
err @ Err(regex::Error::CompiledTooBig(_)) => {
47
let new_size_limit = get_size_limit();
48
if new_size_limit != self.size_limit {
49
self.size_limit = new_size_limit;
50
continue; // Try to compile again.
51
}
52
break err;
53
},
54
r => break r,
55
};
56
}
57
});
58
Ok(&*r?)
59
}
60
}
61
62
thread_local! {
63
static LOCAL_REGEX_CACHE: RefCell<RegexCache> = RefCell::new(RegexCache::new());
64
}
65
66
pub fn compile_regex(re: &str) -> Result<Regex, regex::Error> {
67
LOCAL_REGEX_CACHE.with_borrow_mut(|cache| cache.compile(re).cloned())
68
}
69
70
pub fn with_regex_cache<R, F: FnOnce(&mut RegexCache) -> R>(f: F) -> R {
71
LOCAL_REGEX_CACHE.with_borrow_mut(f)
72
}
73
74
#[macro_export]
75
macro_rules! cached_regex {
76
() => {};
77
78
($vis:vis static $name:ident = $regex:expr; $($rest:tt)*) => {
79
#[allow(clippy::disallowed_methods)]
80
$vis static $name: std::sync::LazyLock<regex::Regex> = std::sync::LazyLock::new(|| regex::Regex::new($regex).unwrap());
81
$crate::regex_cache::cached_regex!($($rest)*);
82
};
83
}
84
pub use cached_regex;
85
86