wasmer_wasix/runtime/module_cache/
filesystem.rs

1use std::path::{Path, PathBuf};
2use std::sync::Arc;
3
4use tempfile::NamedTempFile;
5use tokio::io::AsyncWriteExt;
6use wasmer::{Engine, Module};
7
8use crate::runtime::module_cache::{CacheError, ModuleCache, ModuleHash};
9use crate::runtime::task_manager::tokio::TokioTaskManager;
10
11/// A cache that saves modules to a folder on the host filesystem using
12/// [`Module::serialize()`].
13#[derive(Debug, Clone)]
14pub struct FileSystemCache {
15    cache_dir: PathBuf,
16    task_manager: Arc<TokioTaskManager>,
17}
18
19impl FileSystemCache {
20    pub fn new(cache_dir: impl Into<PathBuf>, task_manager: Arc<TokioTaskManager>) -> Self {
21        FileSystemCache {
22            cache_dir: cache_dir.into(),
23            task_manager,
24        }
25    }
26
27    pub fn cache_dir(&self) -> &Path {
28        &self.cache_dir
29    }
30
31    fn path(&self, key: ModuleHash, deterministic_id: &str) -> PathBuf {
32        let artifact_version = wasmer_types::MetadataHeader::CURRENT_VERSION;
33        self.cache_dir
34            .join(format!("{deterministic_id}-v{artifact_version}"))
35            .join(key.to_string())
36            .with_extension("bin")
37    }
38}
39
40/// Loads a module from the filesystem cache.
41///
42/// A tokio reactor must be available
43#[tracing::instrument(level = "debug", skip_all, fields(? path))]
44async fn tokio_load(path: PathBuf, engine: Engine) -> Result<Module, CacheError> {
45    let bytes = read_file(&path).await?;
46    let deserialized = tokio::task::spawn_blocking(move || deserialize(&bytes, &engine))
47        .await
48        .unwrap();
49    match deserialized {
50        Ok(m) => {
51            tracing::debug!("Cache hit!");
52            Ok(m)
53        }
54        Err(e) => {
55            tracing::debug!(
56                path=%path.display(),
57                error=&e as &dyn std::error::Error,
58                "Deleting the cache file because the artifact couldn't be deserialized",
59            );
60
61            if let Err(e) = std::fs::remove_file(&path) {
62                tracing::warn!(
63                    path=%path.display(),
64                    error=&e as &dyn std::error::Error,
65                    "Unable to remove the corrupted cache file",
66                );
67            }
68            Err(e)
69        }
70    }
71}
72
73/// Checks if the path exists in the filesystem cache.
74///
75/// A tokio reactor must be available
76async fn tokio_contains(path: PathBuf) -> Result<bool, CacheError> {
77    tokio::fs::try_exists(&path)
78        .await
79        .map_err(|e| CacheError::FileRead {
80            path: path.clone(),
81            error: e,
82        })
83}
84
85/// Saves the module to the filesystem cache.
86///
87/// A tokio reactor must be available
88#[tracing::instrument(level = "debug", skip_all, fields(? path))]
89async fn tokio_save(path: PathBuf, module: Module) -> Result<(), CacheError> {
90    let parent = path
91        .parent()
92        .expect("Unreachable - always created by joining onto cache_dir");
93
94    if let Err(e) = tokio::fs::create_dir_all(parent).await {
95        tracing::warn!(
96            dir=%parent.display(),
97            error=&e as &dyn std::error::Error,
98            "Unable to create the cache directory",
99        );
100    }
101
102    // TODO: NamedTempFile is blocking and we should use the appropriate tokio function instead.
103    // Note: We save to a temporary file and persist() it at the end so
104    // concurrent readers won't see a partially written module.
105    let (file, temp) = NamedTempFile::new_in(parent)
106        .map_err(CacheError::other)?
107        .into_parts();
108
109    let mut file = tokio::fs::File::from_std(file);
110
111    let serialized = tokio::task::spawn_blocking(move || module.serialize())
112        .await
113        .unwrap()?;
114
115    let mut writer = tokio::io::BufWriter::new(&mut file);
116    if let Err(error) = writer.write_all(&serialized).await {
117        return Err(CacheError::FileWrite { path, error });
118    }
119    if let Err(error) = writer.flush().await {
120        return Err(CacheError::FileWrite { path, error });
121    }
122
123    temp.persist(&path).map_err(CacheError::other)?;
124    tracing::debug!(path=%path.display(), "Saved to disk");
125
126    Ok(())
127}
128
129#[async_trait::async_trait]
130impl ModuleCache for FileSystemCache {
131    #[tracing::instrument(level = "debug", skip_all, fields(% key))]
132    async fn load(&self, key: ModuleHash, engine: &Engine) -> Result<Module, CacheError> {
133        let path = self.path(key, &engine.deterministic_id());
134        let engine = engine.clone();
135
136        // Use the bundled tokio runtime instead of the given async runtime
137        // This is necessary because this function can also be called with a futures_executor
138        self.task_manager
139            .runtime_handle()
140            .spawn(tokio_load(path, engine))
141            .await
142            .unwrap()
143    }
144
145    async fn contains(&self, key: ModuleHash, engine: &Engine) -> Result<bool, CacheError> {
146        let path = self.path(key, &engine.deterministic_id());
147
148        // Use the bundled tokio runtime instead of the given async runtime
149        // This is necessary because this function can also be called with a futures_executor
150        self.task_manager
151            .runtime_handle()
152            .spawn(tokio_contains(path))
153            .await
154            .unwrap()
155    }
156
157    #[tracing::instrument(level = "debug", skip_all, fields(% key))]
158    async fn save(
159        &self,
160        key: ModuleHash,
161        engine: &Engine,
162        module: &Module,
163    ) -> Result<(), CacheError> {
164        let path = self.path(key, &engine.deterministic_id());
165        let module = module.clone();
166
167        // Use the bundled tokio runtime instead of the given async runtime
168        // This is necessary because this function can also be called with a futures_executor
169        self.task_manager
170            .runtime_handle()
171            .spawn(tokio_save(path, module))
172            .await
173            .unwrap()
174    }
175}
176
177async fn read_file(path: &Path) -> Result<Vec<u8>, CacheError> {
178    match tokio::fs::read(path).await {
179        Ok(bytes) => Ok(bytes),
180        Err(e) if e.kind() == std::io::ErrorKind::NotFound => Err(CacheError::NotFound),
181        Err(error) => Err(CacheError::FileRead {
182            path: path.to_path_buf(),
183            error,
184        }),
185    }
186}
187
188fn deserialize(bytes: &[u8], engine: &Engine) -> Result<Module, CacheError> {
189    // We used to compress our compiled modules using LZW encoding in the past.
190    // This was removed because it has a negative impact on startup times for
191    // "wasmer run", so all new compiled modules should be saved directly to
192    // disk.
193    //
194    // For perspective, compiling php.wasm with cranelift took about 4.75
195    // seconds on a M1 Mac.
196    //
197    // Without LZW compression:
198    // - ModuleCache::save(): 408ms, 142MB binary
199    // - ModuleCache::load(): 155ms
200    // With LZW compression:
201    // - ModuleCache::save(): 2.4s, 72MB binary
202    // - ModuleCache::load(): 822ms
203
204    match unsafe { Module::deserialize(engine, bytes) } {
205        // The happy case
206        Ok(m) => Ok(m),
207        Err(wasmer::DeserializeError::Incompatible(_)) => {
208            let bytes = weezl::decode::Decoder::new(weezl::BitOrder::Msb, 8)
209                .decode(bytes)
210                .map_err(CacheError::other)?;
211
212            let m = unsafe { Module::deserialize(engine, bytes)? };
213
214            Ok(m)
215        }
216        Err(e) => Err(CacheError::Deserialize(e)),
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use crate::runtime::task_manager::tokio::TokioTaskManager;
223    use tempfile::TempDir;
224
225    use super::*;
226
227    const ADD_WAT: &[u8] = br#"(
228        module
229            (func
230                (export "add")
231                (param $x i64)
232                (param $y i64)
233                (result i64)
234                (i64.add (local.get $x) (local.get $y)))
235        )"#;
236
237    fn create_tokio_task_manager() -> Arc<TokioTaskManager> {
238        Arc::new(TokioTaskManager::new(tokio::runtime::Handle::current()))
239    }
240
241    #[tokio::test]
242    async fn save_to_disk() {
243        let temp = TempDir::new().unwrap();
244        let engine = Engine::default();
245        let module = Module::new(&engine, ADD_WAT).unwrap();
246        let cache = FileSystemCache::new(temp.path(), create_tokio_task_manager());
247        let key = ModuleHash::xxhash_from_bytes([0; 8]);
248        let expected_path = cache.path(key, &engine.deterministic_id());
249
250        cache.save(key, &engine, &module).await.unwrap();
251
252        assert!(expected_path.exists());
253    }
254
255    #[tokio::test]
256    async fn create_cache_dir_automatically() {
257        let temp = TempDir::new().unwrap();
258        let engine = Engine::default();
259        let module = Module::new(&engine, ADD_WAT).unwrap();
260        let cache_dir = temp.path().join("this").join("doesn't").join("exist");
261        assert!(!cache_dir.exists());
262        let cache = FileSystemCache::new(&cache_dir, create_tokio_task_manager());
263        let key = ModuleHash::xxhash_from_bytes([0; 8]);
264
265        cache.save(key, &engine, &module).await.unwrap();
266
267        assert!(cache_dir.is_dir());
268    }
269
270    #[tokio::test]
271    async fn missing_file() {
272        let temp = TempDir::new().unwrap();
273        let engine = Engine::default();
274        let key = ModuleHash::xxhash_from_bytes([0; 8]);
275        let cache = FileSystemCache::new(temp.path(), create_tokio_task_manager());
276
277        let err = cache.load(key, &engine).await.unwrap_err();
278
279        assert!(matches!(err, CacheError::NotFound));
280    }
281
282    #[tokio::test]
283    async fn load_from_disk() {
284        let temp = TempDir::new().unwrap();
285        let engine = Engine::default();
286        let module = Module::new(&engine, ADD_WAT).unwrap();
287        let key = ModuleHash::xxhash_from_bytes([0; 8]);
288        let cache = FileSystemCache::new(temp.path(), create_tokio_task_manager());
289        let expected_path = cache.path(key, &engine.deterministic_id());
290        std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
291        let serialized = module.serialize().unwrap();
292        std::fs::write(&expected_path, &serialized).unwrap();
293
294        let module = cache.load(key, &engine).await.unwrap();
295
296        let exports: Vec<_> = module
297            .exports()
298            .map(|export| export.name().to_string())
299            .collect();
300        assert_eq!(exports, ["add"]);
301    }
302
303    /// For backwards compatibility, make sure we can still work with LZW
304    /// compressed modules.
305    #[tokio::test]
306    async fn can_still_load_lzw_compressed_binaries() {
307        let temp = TempDir::new().unwrap();
308        let engine = Engine::default();
309        let module = Module::new(&engine, ADD_WAT).unwrap();
310        let key = ModuleHash::xxhash_from_bytes([0; 8]);
311        let cache = FileSystemCache::new(temp.path(), create_tokio_task_manager());
312        let expected_path = cache.path(key, &engine.deterministic_id());
313        std::fs::create_dir_all(expected_path.parent().unwrap()).unwrap();
314        let serialized = module.serialize().unwrap();
315        let mut encoder = weezl::encode::Encoder::new(weezl::BitOrder::Msb, 8);
316        std::fs::write(&expected_path, encoder.encode(&serialized).unwrap()).unwrap();
317
318        let module = cache.load(key, &engine).await.unwrap();
319
320        let exports: Vec<_> = module
321            .exports()
322            .map(|export| export.name().to_string())
323            .collect();
324        assert_eq!(exports, ["add"]);
325    }
326}