Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
bytecodealliance
GitHub Repository: bytecodealliance/wasmtime
Path: blob/main/crates/cache/src/lib.rs
1693 views
1
//! > **⚠️ Warning ⚠️**: this crate is an internal-only crate for the Wasmtime
2
//! > project and is not intended for general use. APIs are not strictly
3
//! > reviewed for safety and usage outside of Wasmtime may have bugs. If
4
//! > you're interested in using this feel free to file an issue on the
5
//! > Wasmtime repository to start a discussion about doing so, but otherwise
6
//! > be aware that your usage of this crate is not supported.
7
8
use anyhow::Result;
9
use base64::Engine;
10
use log::{debug, trace, warn};
11
use serde::{Deserialize, Serialize};
12
use sha2::{Digest, Sha256};
13
use std::hash::Hash;
14
use std::hash::Hasher;
15
use std::io::Write;
16
use std::path::{Path, PathBuf};
17
use std::sync::Arc;
18
use std::sync::atomic::{AtomicUsize, Ordering::SeqCst};
19
use std::time::Duration;
20
use std::{fs, io};
21
22
#[macro_use] // for tests
23
mod config;
24
mod worker;
25
26
pub use config::{CacheConfig, create_new_config};
27
use worker::Worker;
28
29
/// Global configuration for how the cache is managed
30
#[derive(Debug, Clone)]
31
pub struct Cache {
32
config: CacheConfig,
33
worker: Worker,
34
state: Arc<CacheState>,
35
}
36
37
macro_rules! generate_config_setting_getter {
38
($setting:ident: $setting_type:ty) => {
39
#[doc = concat!("Returns ", "`", stringify!($setting), "`.")]
40
pub fn $setting(&self) -> $setting_type {
41
self.config.$setting()
42
}
43
};
44
}
45
46
impl Cache {
47
/// Builds a [`Cache`] from the configuration and spawns the cache worker.
48
///
49
/// If you want to load the cache configuration from a file, use [`CacheConfig::from_file`].
50
/// You can call [`CacheConfig::new`] for the default configuration.
51
///
52
/// # Errors
53
/// Returns an error if the configuration is invalid.
54
pub fn new(mut config: CacheConfig) -> Result<Self> {
55
config.validate()?;
56
Ok(Self {
57
worker: Worker::start_new(&config),
58
config,
59
state: Default::default(),
60
})
61
}
62
63
/// Loads cache configuration specified at `path`.
64
///
65
/// This method will read the file specified by `path` on the filesystem and
66
/// attempt to load cache configuration from it. This method can also fail
67
/// due to I/O errors, misconfiguration, syntax errors, etc. For expected
68
/// syntax in the configuration file see the [documentation online][docs].
69
///
70
/// Passing in `None` loads cache configuration from the system default path.
71
/// This is located, for example, on Unix at `$HOME/.config/wasmtime/config.toml`
72
/// and is typically created with the `wasmtime config new` command.
73
///
74
/// # Errors
75
///
76
/// This method can fail due to any error that happens when loading the file
77
/// pointed to by `path` and attempting to load the cache configuration.
78
///
79
/// [docs]: https://bytecodealliance.github.io/wasmtime/cli-cache.html
80
pub fn from_file(path: Option<&Path>) -> Result<Self> {
81
let config = CacheConfig::from_file(path)?;
82
Self::new(config)
83
}
84
85
generate_config_setting_getter!(worker_event_queue_size: u64);
86
generate_config_setting_getter!(baseline_compression_level: i32);
87
generate_config_setting_getter!(optimized_compression_level: i32);
88
generate_config_setting_getter!(optimized_compression_usage_counter_threshold: u64);
89
generate_config_setting_getter!(cleanup_interval: Duration);
90
generate_config_setting_getter!(optimizing_compression_task_timeout: Duration);
91
generate_config_setting_getter!(allowed_clock_drift_for_files_from_future: Duration);
92
generate_config_setting_getter!(file_count_soft_limit: u64);
93
generate_config_setting_getter!(files_total_size_soft_limit: u64);
94
generate_config_setting_getter!(file_count_limit_percent_if_deleting: u8);
95
generate_config_setting_getter!(files_total_size_limit_percent_if_deleting: u8);
96
97
/// Returns path to the cache directory.
98
pub fn directory(&self) -> &PathBuf {
99
&self
100
.config
101
.directory()
102
.expect("directory should be validated in Config::new")
103
}
104
105
#[cfg(test)]
106
fn worker(&self) -> &Worker {
107
&self.worker
108
}
109
110
/// Returns the number of cache hits seen so far
111
pub fn cache_hits(&self) -> usize {
112
self.state.hits.load(SeqCst)
113
}
114
115
/// Returns the number of cache misses seen so far
116
pub fn cache_misses(&self) -> usize {
117
self.state.misses.load(SeqCst)
118
}
119
120
pub(crate) fn on_cache_get_async(&self, path: impl AsRef<Path>) {
121
self.state.hits.fetch_add(1, SeqCst);
122
self.worker.on_cache_get_async(path)
123
}
124
125
pub(crate) fn on_cache_update_async(&self, path: impl AsRef<Path>) {
126
self.state.misses.fetch_add(1, SeqCst);
127
self.worker.on_cache_update_async(path)
128
}
129
}
130
131
#[derive(Default, Debug)]
132
struct CacheState {
133
hits: AtomicUsize,
134
misses: AtomicUsize,
135
}
136
137
/// Module level cache entry.
138
pub struct ModuleCacheEntry<'cache>(Option<ModuleCacheEntryInner<'cache>>);
139
140
struct ModuleCacheEntryInner<'cache> {
141
root_path: PathBuf,
142
cache: &'cache Cache,
143
}
144
145
struct Sha256Hasher(Sha256);
146
147
impl<'cache> ModuleCacheEntry<'cache> {
148
/// Create the cache entry.
149
pub fn new(compiler_name: &str, cache: Option<&'cache Cache>) -> Self {
150
Self(cache.map(|cache| ModuleCacheEntryInner::new(compiler_name, cache)))
151
}
152
153
#[cfg(test)]
154
fn from_inner(inner: ModuleCacheEntryInner<'cache>) -> Self {
155
Self(Some(inner))
156
}
157
158
/// Gets cached data if state matches, otherwise calls `compute`.
159
///
160
/// Data is automatically serialized/deserialized with `bincode`.
161
pub fn get_data<T, U, E>(&self, state: T, compute: fn(&T) -> Result<U, E>) -> Result<U, E>
162
where
163
T: Hash,
164
U: Serialize + for<'a> Deserialize<'a>,
165
{
166
self.get_data_raw(
167
&state,
168
compute,
169
|_state, data| postcard::to_allocvec(data).ok(),
170
|_state, data| postcard::from_bytes(&data).ok(),
171
)
172
}
173
174
/// Gets cached data if state matches, otherwise calls `compute`.
175
///
176
/// If the cache is disabled or no cached data is found then `compute` is
177
/// called to calculate the data. If the data was found in cache it is
178
/// passed to `deserialize`, which if successful will be the returned value.
179
/// When computed the `serialize` function is used to generate the bytes
180
/// from the returned value.
181
pub fn get_data_raw<T, U, E>(
182
&self,
183
state: &T,
184
// NOTE: These are function pointers instead of closures so that they
185
// don't accidentally close over something not accounted in the cache.
186
compute: fn(&T) -> Result<U, E>,
187
serialize: fn(&T, &U) -> Option<Vec<u8>>,
188
deserialize: fn(&T, Vec<u8>) -> Option<U>,
189
) -> Result<U, E>
190
where
191
T: Hash,
192
{
193
let inner = match &self.0 {
194
Some(inner) => inner,
195
None => return compute(state),
196
};
197
198
let mut hasher = Sha256Hasher(Sha256::new());
199
state.hash(&mut hasher);
200
let hash: [u8; 32] = hasher.0.finalize().into();
201
// standard encoding uses '/' which can't be used for filename
202
let hash = base64::engine::general_purpose::URL_SAFE_NO_PAD.encode(&hash);
203
204
if let Some(cached_val) = inner.get_data(&hash) {
205
if let Some(val) = deserialize(state, cached_val) {
206
let mod_cache_path = inner.root_path.join(&hash);
207
inner.cache.on_cache_get_async(&mod_cache_path); // call on success
208
return Ok(val);
209
}
210
}
211
let val_to_cache = compute(state)?;
212
if let Some(bytes) = serialize(state, &val_to_cache) {
213
if inner.update_data(&hash, &bytes).is_some() {
214
let mod_cache_path = inner.root_path.join(&hash);
215
inner.cache.on_cache_update_async(&mod_cache_path); // call on success
216
}
217
}
218
Ok(val_to_cache)
219
}
220
}
221
222
impl<'cache> ModuleCacheEntryInner<'cache> {
223
fn new(compiler_name: &str, cache: &'cache Cache) -> Self {
224
// If debug assertions are enabled then assume that we're some sort of
225
// local build. We don't want local builds to stomp over caches between
226
// builds, so just use a separate cache directory based on the mtime of
227
// our executable, which should roughly correlate with "you changed the
228
// source code so you get a different directory".
229
//
230
// Otherwise if this is a release build we use the `GIT_REV` env var
231
// which is either the git rev if installed from git or the crate
232
// version if installed from crates.io.
233
let compiler_dir = if cfg!(debug_assertions) {
234
fn self_mtime() -> Option<String> {
235
let path = std::env::current_exe().ok()?;
236
let metadata = path.metadata().ok()?;
237
let mtime = metadata.modified().ok()?;
238
Some(match mtime.duration_since(std::time::UNIX_EPOCH) {
239
Ok(dur) => format!("{}", dur.as_millis()),
240
Err(err) => format!("m{}", err.duration().as_millis()),
241
})
242
}
243
let self_mtime = self_mtime().unwrap_or("no-mtime".to_string());
244
format!(
245
"{comp_name}-{comp_ver}-{comp_mtime}",
246
comp_name = compiler_name,
247
comp_ver = env!("GIT_REV"),
248
comp_mtime = self_mtime,
249
)
250
} else {
251
format!(
252
"{comp_name}-{comp_ver}",
253
comp_name = compiler_name,
254
comp_ver = env!("GIT_REV"),
255
)
256
};
257
let root_path = cache.directory().join("modules").join(compiler_dir);
258
259
Self { root_path, cache }
260
}
261
262
fn get_data(&self, hash: &str) -> Option<Vec<u8>> {
263
let mod_cache_path = self.root_path.join(hash);
264
trace!("get_data() for path: {}", mod_cache_path.display());
265
let compressed_cache_bytes = fs::read(&mod_cache_path).ok()?;
266
let cache_bytes = zstd::decode_all(&compressed_cache_bytes[..])
267
.map_err(|err| warn!("Failed to decompress cached code: {err}"))
268
.ok()?;
269
Some(cache_bytes)
270
}
271
272
fn update_data(&self, hash: &str, serialized_data: &[u8]) -> Option<()> {
273
let mod_cache_path = self.root_path.join(hash);
274
trace!("update_data() for path: {}", mod_cache_path.display());
275
let compressed_data = zstd::encode_all(
276
&serialized_data[..],
277
self.cache.baseline_compression_level(),
278
)
279
.map_err(|err| warn!("Failed to compress cached code: {err}"))
280
.ok()?;
281
282
// Optimize syscalls: first, try writing to disk. It should succeed in most cases.
283
// Otherwise, try creating the cache directory and retry writing to the file.
284
if fs_write_atomic(&mod_cache_path, "mod", &compressed_data).is_ok() {
285
return Some(());
286
}
287
288
debug!(
289
"Attempting to create the cache directory, because \
290
failed to write cached code to disk, path: {}",
291
mod_cache_path.display(),
292
);
293
294
let cache_dir = mod_cache_path.parent().unwrap();
295
fs::create_dir_all(cache_dir)
296
.map_err(|err| {
297
warn!(
298
"Failed to create cache directory, path: {}, message: {}",
299
cache_dir.display(),
300
err
301
)
302
})
303
.ok()?;
304
305
match fs_write_atomic(&mod_cache_path, "mod", &compressed_data) {
306
Ok(_) => Some(()),
307
Err(err) => {
308
warn!(
309
"Failed to write file with rename, target path: {}, err: {}",
310
mod_cache_path.display(),
311
err
312
);
313
None
314
}
315
}
316
}
317
}
318
319
impl Hasher for Sha256Hasher {
320
fn finish(&self) -> u64 {
321
panic!("Sha256Hasher doesn't support finish!");
322
}
323
324
fn write(&mut self, bytes: &[u8]) {
325
self.0.update(bytes);
326
}
327
}
328
329
// Assumption: path inside cache directory.
330
// Then, we don't have to use sound OS-specific exclusive file access.
331
// Note: there's no need to remove temporary file here - cleanup task will do it later.
332
fn fs_write_atomic(path: &Path, reason: &str, contents: &[u8]) -> io::Result<()> {
333
let lock_path = path.with_extension(format!("wip-atomic-write-{reason}"));
334
fs::OpenOptions::new()
335
.create_new(true) // atomic file creation (assumption: no one will open it without this flag)
336
.write(true)
337
.open(&lock_path)
338
.and_then(|mut file| file.write_all(contents))
339
// file should go out of scope and be closed at this point
340
.and_then(|()| fs::rename(&lock_path, &path)) // atomic file rename
341
}
342
343
#[cfg(test)]
344
mod tests;
345
346