wasmer_wasix/state/linker/
linker_state.rs

1use std::{
2    borrow::Cow,
3    collections::{BTreeMap, HashMap},
4    path::{Path, PathBuf},
5    sync::{Arc, Barrier},
6};
7
8use tracing::trace;
9use virtual_mio::block_on;
10use wasmer::{
11    AsStoreMut, AsStoreRef, Engine, Extern, ExternType, ImportType, Memory, MemoryError, Module,
12    Type,
13};
14
15use crate::{Runtime, runtime::module_cache::HashedModuleData};
16
17use super::{
18    DlModule, DlModuleSpec, DlOperation, DylinkInfo, INVALID_MODULE_HANDLE, InProgressLinkState,
19    InProgressModuleLoad, InProgressSymbolResolution, InstanceGroupState, LinkError,
20    MAIN_MODULE_HANDLE, MemoryAllocator, ModuleHandle, NeededSymbolResolutionKey,
21    SymbolResolutionKey, SymbolResolutionResult,
22};
23
24use super::{get_integer_global_type_from_import, locate_module, parse_dylink0_section};
25
26use crate::state::WasiState;
27
28// There is only one LinkerState for all instance groups
29pub(super) struct LinkerState {
30    pub(super) engine: Engine,
31
32    pub(super) main_module: Module,
33    pub(super) main_module_dylink_info: DylinkInfo,
34    pub(super) main_module_memory_base: u64,
35
36    // We used to have an issue where spawning instances out-of-order in new threads
37    // would break globals. That has since been fixed. However, spawning in the same
38    // order helps with diagnosing potential linker issues, so we're keeping the
39    // hack from back then.
40    // To ensure the same order, we use a BTreeMap here, which means when we
41    // iterate over it, we'll get the modules from lowest handle to highest, and
42    // order is preserved.
43    pub(super) side_modules: BTreeMap<ModuleHandle, DlModule>,
44    pub(super) side_modules_by_name: HashMap<PathBuf, ModuleHandle>,
45    pub(super) next_module_handle: u32,
46
47    pub(super) memory_allocator: MemoryAllocator,
48    pub(super) heap_base: u64,
49
50    /// Tracks which slots in the function table are currently used for closures
51    ///
52    /// True if the closure is currently in use, false otherwise.
53    pub(super) allocated_closure_functions: BTreeMap<u32, bool>,
54    /// Slots in the indirect function table that were allocated for closures but are currently not in use.
55    /// These can be given out without needing to lock all threads.
56    pub(super) available_closure_functions: Vec<u32>,
57
58    pub(super) symbol_resolution_records: HashMap<SymbolResolutionKey, SymbolResolutionResult>,
59
60    pub(super) send_pending_operation_barrier: bus::Bus<Arc<Barrier>>,
61    pub(super) send_pending_operation: bus::Bus<DlOperation>,
62}
63
64impl LinkerState {
65    pub(super) fn allocate_memory(
66        &mut self,
67        store: &mut impl AsStoreMut,
68        memory: &Memory,
69        mem_info: &wasmparser::MemInfo,
70    ) -> Result<u64, MemoryError> {
71        trace!(?mem_info, "Allocating memory");
72
73        let new_size = if mem_info.memory_size == 0 {
74            0
75        } else {
76            self.memory_allocator.allocate(
77                memory,
78                store,
79                mem_info.memory_size,
80                2_u32.pow(mem_info.memory_alignment),
81            )? as u64
82        };
83
84        trace!(new_size, "Final size");
85
86        Ok(new_size)
87    }
88
89    pub(super) fn memory_base(&self, module_handle: ModuleHandle) -> u64 {
90        if module_handle == MAIN_MODULE_HANDLE {
91            self.main_module_memory_base
92        } else {
93            self.side_modules
94                .get(&module_handle)
95                .expect("Internal error: bad module handle")
96                .memory_base
97        }
98    }
99
100    pub(super) fn dylink_info(&self, module_handle: ModuleHandle) -> &DylinkInfo {
101        if module_handle == MAIN_MODULE_HANDLE {
102            &self.main_module_dylink_info
103        } else {
104            &self
105                .side_modules
106                .get(&module_handle)
107                .expect("Internal error: bad module handle")
108                .dylink_info
109        }
110    }
111
112    // Resolves all imports for the given module, and places the results into
113    // the in progress link state's symbol collection.
114    // A follow-up call to [`InstanceGroupState::populate_imports_from_link_state`]
115    // is needed to create a usable imports object, which needs to happen once per
116    // instance group.
117    // Each instance group has a different store, so the group ID corresponding
118    // to the given store must be provided to resolve globals from the correct
119    // instances.
120    pub(super) fn resolve_symbols(
121        &self,
122        group: &InstanceGroupState,
123        store: &mut impl AsStoreMut,
124        module: &Module,
125        module_handle: ModuleHandle,
126        link_state: &mut InProgressLinkState,
127        // Used only to "skip over" well known imports, so we don't actually need the
128        // u64 values. However, we use the same type as populate_imports to let calling
129        // code construct the data only once.
130        well_known_imports: &[(&str, &str, u64)],
131    ) -> Result<(), LinkError> {
132        trace!(?module_handle, "Resolving symbols");
133        for import in module.imports() {
134            // Skip over well known imports, since they'll be provided externally
135            if well_known_imports
136                .iter()
137                .any(|i| i.0 == import.module() && i.1 == import.name())
138            {
139                trace!(?import, "Skipping resolution of well-known symbol");
140                continue;
141            }
142
143            // Skip over the memory, function table and stack pointer imports as well
144            match import.name() {
145                "memory"
146                | "__indirect_function_table"
147                | "__stack_pointer"
148                | "__c_longjmp"
149                | "__cpp_exception" => {
150                    trace!(?import, "Skipping resolution of special symbol");
151                    continue;
152                }
153                _ => (),
154            }
155
156            match import.module() {
157                "env" => {
158                    let resolution = self.resolve_env_symbol(group, &import, store)?;
159                    trace!(?import, ?resolution, "Symbol resolved");
160                    link_state.symbols.insert(
161                        NeededSymbolResolutionKey {
162                            module_handle,
163                            import_module: "env".to_owned(),
164                            import_name: import.name().to_string(),
165                        },
166                        resolution,
167                    );
168                }
169                "GOT.mem" => {
170                    let resolution = self.resolve_got_mem_symbol(group, &import, store)?;
171                    trace!(?import, ?resolution, "Symbol resolved");
172                    link_state.symbols.insert(
173                        NeededSymbolResolutionKey {
174                            module_handle,
175                            import_module: "GOT.mem".to_owned(),
176                            import_name: import.name().to_string(),
177                        },
178                        resolution,
179                    );
180                }
181                "GOT.func" => {
182                    let resolution = self.resolve_got_func_symbol(group, &import, store)?;
183                    trace!(?import, ?resolution, "Symbol resolved");
184                    link_state.symbols.insert(
185                        NeededSymbolResolutionKey {
186                            module_handle,
187                            import_module: "GOT.func".to_owned(),
188                            import_name: import.name().to_string(),
189                        },
190                        resolution,
191                    );
192                }
193                _ => (),
194            }
195        }
196
197        trace!(?module_handle, "All symbols resolved");
198
199        Ok(())
200    }
201
202    // Imports from the env module are:
203    //   * the memory and indirect function table
204    //   * well-known addresses, such as __stack_pointer and __memory_base
205    //   * functions that are imported directly
206    // resolve_env_symbol only handles the imported functions.
207    fn resolve_env_symbol(
208        &self,
209        group: &InstanceGroupState,
210        import: &ImportType,
211        store: &impl AsStoreRef,
212    ) -> Result<InProgressSymbolResolution, LinkError> {
213        let ExternType::Function(import_func_ty) = import.ty() else {
214            return Err(LinkError::ImportMustBeFunction(
215                "env",
216                import.name().to_string(),
217            ));
218        };
219
220        let export = group.resolve_exported_symbol(import.name());
221
222        match export {
223            Some((module_handle, export)) => {
224                let Extern::Function(export_func) = export else {
225                    return Err(LinkError::ImportTypeMismatch(
226                        "env".to_string(),
227                        import.name().to_string(),
228                        ExternType::Function(import_func_ty.clone()),
229                        export.ty(store).clone(),
230                    ));
231                };
232
233                if export_func.ty(store) != *import_func_ty {
234                    return Err(LinkError::ImportTypeMismatch(
235                        "env".to_string(),
236                        import.name().to_string(),
237                        ExternType::Function(import_func_ty.clone()),
238                        export.ty(store).clone(),
239                    ));
240                }
241
242                Ok(InProgressSymbolResolution::Function(module_handle))
243            }
244            None => {
245                // The function may be exported from a module we have yet to link in,
246                // or otherwise not be used by the module at all. We provide a stub that,
247                // when called, will try to resolve the symbol and call it. This lets
248                // us resolve circular dependencies, as well as letting modules that don't
249                // actually use their imports run successfully.
250                Ok(InProgressSymbolResolution::StubFunction(
251                    import_func_ty.clone(),
252                ))
253            }
254        }
255    }
256
257    // "Global" imports (i.e. imports from GOT.mem and GOT.func) are integer globals.
258    // GOT.mem imports should point to the address of another module's data.
259    fn resolve_got_mem_symbol(
260        &self,
261        group: &InstanceGroupState,
262        import: &ImportType,
263        store: &impl AsStoreRef,
264    ) -> Result<InProgressSymbolResolution, LinkError> {
265        let global_type = get_integer_global_type_from_import(import)?;
266
267        match group.resolve_exported_symbol(import.name()) {
268            Some((module_handle, export)) => {
269                let ExternType::Global(global_type) = export.ty(store) else {
270                    return Err(LinkError::ImportTypeMismatch(
271                        "GOT.mem".to_string(),
272                        import.name().to_string(),
273                        ExternType::Global(global_type),
274                        export.ty(store).clone(),
275                    ));
276                };
277
278                if !matches!(global_type.ty, Type::I32 | Type::I64) {
279                    return Err(LinkError::ImportTypeMismatch(
280                        "GOT.mem".to_string(),
281                        import.name().to_string(),
282                        ExternType::Global(global_type),
283                        export.ty(store).clone(),
284                    ));
285                }
286
287                Ok(InProgressSymbolResolution::MemGlobal(module_handle))
288            }
289            None => Ok(InProgressSymbolResolution::UnresolvedMemGlobal),
290        }
291    }
292
293    // "Global" imports (i.e. imports from GOT.mem and GOT.func) are integer globals.
294    // GOT.func imports are function pointers (i.e. indices into the indirect function
295    // table).
296    fn resolve_got_func_symbol(
297        &self,
298        group: &InstanceGroupState,
299        import: &ImportType,
300        store: &impl AsStoreRef,
301    ) -> Result<InProgressSymbolResolution, LinkError> {
302        // Ensure the global is the correct type (i32 or i64)
303        let _ = get_integer_global_type_from_import(import)?;
304
305        match group.resolve_exported_symbol(import.name()) {
306            Some((module_handle, export)) => {
307                let ExternType::Function(_) = export.ty(store) else {
308                    return Err(LinkError::ExportMustBeFunction(
309                        import.name().to_string(),
310                        export.ty(store).clone(),
311                    ));
312                };
313
314                Ok(InProgressSymbolResolution::FuncGlobal(module_handle))
315            }
316            None => Ok(InProgressSymbolResolution::UnresolvedFuncGlobal),
317        }
318    }
319
320    // TODO: give loaded library a different wasi env that specifies its module handle
321    // This function loads the module (and its needed modules) and puts the resulting `Module`s
322    // in the linker state, while assigning handles and putting the handles in the in-progress
323    // link state. The modules must then get their symbols resolved and be instantiated in the
324    // order in which their handles exist in the link state.
325    // Returns the handle of the originally requested module. This will be the last entry in
326    // the link state's list of module handles, but only if the module was actually loaded; if
327    // it was already loaded, the existing handle is returned.
328    pub(super) fn load_module_tree(
329        &mut self,
330        module_spec: DlModuleSpec,
331        link_state: &mut InProgressLinkState,
332        runtime: &Arc<dyn Runtime + Send + Sync + 'static>,
333        wasi_state: &WasiState,
334        runtime_path: &[impl AsRef<str>],
335        calling_module_path: Option<impl AsRef<Path>>,
336    ) -> Result<ModuleHandle, LinkError> {
337        let module_name = match module_spec {
338            DlModuleSpec::FileSystem { module_spec, .. } => Cow::Borrowed(module_spec),
339            DlModuleSpec::Memory { module_name, .. } => {
340                Cow::Owned(PathBuf::from(format!("::in-memory::{module_name}")))
341            }
342        };
343        trace!(?module_name, "Locating and loading module");
344
345        if let Some(handle) = self.side_modules_by_name.get(module_name.as_ref()) {
346            let handle = *handle;
347
348            trace!(?module_name, ?handle, "Module was already loaded");
349
350            return Ok(handle);
351        }
352
353        // Locate and load the module bytes
354        let (module_data, paths) = match module_spec {
355            DlModuleSpec::FileSystem {
356                module_spec,
357                ld_library_path,
358            } => {
359                let (full_path, bytes) = block_on(locate_module(
360                    module_spec,
361                    ld_library_path,
362                    runtime_path,
363                    calling_module_path,
364                    &wasi_state.fs,
365                ))?;
366                // TODO: this can be optimized by detecting early if the module is already
367                // pending without loading its bytes
368                if link_state.pending_module_paths.contains(&full_path) {
369                    trace!("Module is already pending, won't load again");
370                    // This is fine, since a non-empty pending_modules list means we are
371                    // recursively resolving needed modules. We don't use the handle
372                    // returned from this function for anything when running recursively
373                    // (see self.load_module call below).
374                    return Ok(INVALID_MODULE_HANDLE);
375                }
376
377                (
378                    HashedModuleData::new(bytes),
379                    Some((full_path, ld_library_path)),
380                )
381            }
382            DlModuleSpec::Memory { bytes, .. } => (HashedModuleData::new(bytes), None),
383        };
384
385        let module = runtime.load_hashed_module_sync(module_data, Some(&self.engine))?;
386
387        let dylink_info = parse_dylink0_section(&module)?;
388
389        trace!(?dylink_info, "Loading side module");
390
391        if let Some((full_path, ld_library_path)) = paths {
392            link_state.pending_module_paths.push(full_path.clone());
393            let num_pending_modules = link_state.pending_module_paths.len();
394            let pop_pending_module = |link_state: &mut InProgressLinkState| {
395                assert_eq!(
396                    num_pending_modules,
397                    link_state.pending_module_paths.len(),
398                    "Internal error: pending modules not maintained correctly"
399                );
400                link_state.pending_module_paths.pop().unwrap();
401            };
402
403            for needed in &dylink_info.needed {
404                trace!(needed, "Loading needed side module");
405                match self.load_module_tree(
406                    DlModuleSpec::FileSystem {
407                        module_spec: Path::new(needed.as_str()),
408                        ld_library_path,
409                    },
410                    link_state,
411                    runtime,
412                    wasi_state,
413                    // RUNPATH, on which WASM_DYLINK_RUNTIME_PATH is based, is *not* applied
414                    // recursively, so we discard the runtime_path parameter and
415                    // only take the one from the module's dylink.0 section
416                    dylink_info.runtime_path.as_ref(),
417                    Some(&full_path),
418                ) {
419                    Ok(_) => (),
420                    Err(e) => {
421                        pop_pending_module(link_state);
422                        return Err(e);
423                    }
424                }
425            }
426
427            pop_pending_module(link_state);
428        } else if !dylink_info.needed.is_empty() {
429            unreachable!(
430                "Internal error: in-memory modules with further needed modules not \
431                    supported and no code paths can create such a module"
432            );
433        }
434
435        let handle = ModuleHandle(self.next_module_handle);
436        self.next_module_handle += 1;
437
438        trace!(?module_name, ?handle, "Assigned handle to module");
439
440        link_state.new_modules.push(InProgressModuleLoad {
441            handle,
442            dylink_info,
443            module,
444        });
445        // Put the name in the linker state - the actual DlModule must be
446        // constructed later by the instance group once table addresses are
447        // allocated for the module.
448        // TODO: allocate table here (at least logically)?
449        self.side_modules_by_name
450            .insert(module_name.into_owned(), handle);
451
452        Ok(handle)
453    }
454}