wasmer_wasix/state/linker/sync/
mod.rs

1//! Dynamic-linking (`Dl*`) synchronization for the WASIX [`super::Linker`].
2//!
3//! Instance groups run on different OS threads and share one [`super::LinkerState`] behind an
4//! [`std::sync::RwLock`]. Operations that change “who exists” or “what every group must agree on”
5//! must coordinate with both that lock and a stop-the-world style broadcast of concrete mutations
6//! ([`DlOperation`]). This module holds the primitives that make that safe.
7//!
8//! # Locks and responsibilities
9//!
10//! - **Instance-group mutex** ([`lock_instance_group_state!`], `Linker::instance_group_state`):
11//!   Per-[`super::Linker`] handle to this thread’s [`InstanceGroupState`]. Many linker entry points
12//!   take it first so they can call into group-local state and, when needed, run cooperative DL
13//!   helpers with the right `Store` / [`FunctionEnv`].
14//!
15//! - **Topology lease** ([`LinkerShared`](LinkerShared) holds [`topology_lock::TopologyCoordinator`]
16//!   privately): A single-writer gate for *topology-changing* work (new instance groups, module loads,
17//!   export resolution that can allocate shared slots, etc.). The lease is acquired in a cooperative loop
18//!   with backoff and pending-DL cooperation (see [`LinkerShared::acquire_topology_token`],
19//!   [`LinkerShared::write_linker_state_with_topology`]). [`TopologyToken`] may move to another thread (spawn handoff).
20//!
21//! - **Shared linker state** (inside [`LinkerShared`], not exposed as a field): Global module
22//!   table, symbol records, and the buses used to broadcast [`DlOperation`] and barriers. Writers
23//!   must follow the cooperative patterns below—not raw lock calls.
24//!
25//! - **Pending-DL handshake** (`dl_operation_pending`, barriers, wakeup signals): While an
26//!   instigator runs [`LinkerShared::synchronize_link_operation`], follower threads must enter
27//!   [`Linker::do_pending_link_operations`] (or helpers) so everyone rendezvouses. That is why
28//!   contended access to [`LinkerState`] cannot spin blindly.
29//!
30//! # Lock ordering (intended)
31//!
32//! When topology applies: **topology token first**, then lock [`LinkerState`] for write via the APIs
33//! in this module—not the inverse. Never try to acquire a topology lease from inside code that
34//! already holds [`LinkerState`] for write without a deliberate, reviewed plan (easy deadlock).
35//!
36//! # Why you must never lock `LinkerState` directly
37//!
38//! **Do not call `linker_state.read()`, `write()`, or `try_write()` on [`Linker`]’s [`RwLock`] from
39//! normal instance-group linker paths.** Doing so skips the cooperative path and can deadlock the
40//! whole process: another thread may hold the write lock while waiting at a DL barrier for *this*
41//! thread to execute [`LinkerShared::do_pending_link_operations_internal`], which requires the same group
42//! context and cannot run if this thread is stuck in a naive blocking `write()`.
43//!
44//! Use instead:
45//!
46//! - [`LinkerShared::write_linker_state`] — `try_write` loop + [`LinkerStateWriteBackoff`] + pending-DL draining.
47//! - [`LinkerShared::write_linker_state_with_topology`] — topology lease + draining + blocking write when
48//!   topology must be serialized before grabbing [`LinkerState`].
49//! - [`LinkerShared::write_linker_state_blocking_holding_topology`] — blocking write only while already
50//!   holding [`TopologyToken`], after topology was leased on another thread/step.
51//!
52//! Narrow exceptions (e.g. one-off bootstrap in [`super::Linker::new`] before other groups exist)
53//! belong in tightly scoped code and should still avoid contending paths that overlap DL sync.
54
55pub(super) mod topology_lock;
56
57mod linker_shared;
58
59pub(in crate::state::linker) use linker_shared::LinkerShared;
60pub(crate) use topology_lock::TopologyToken;
61
62use std::time::Duration;
63
64use super::ModuleHandle;
65
66/// Spin, then yield, then capped exponential sleep — for cooperative linker-state retries.
67pub(super) struct LinkerStateWriteBackoff {
68    /// Number of [`backoff`](Self::backoff) calls so far after a collision.
69    step: u32,
70}
71
72impl LinkerStateWriteBackoff {
73    const SPIN_PHASE_STEPS: u32 = 64;
74    const YIELD_PHASE_STEPS: u32 = 24;
75    const SLEEP_MICROS_INITIAL: u64 = 48;
76    const SLEEP_MICROS_MAX: u64 = 10_000;
77    const SLEEP_SHIFT_CAP: u32 = 8;
78
79    pub(super) fn new() -> Self {
80        Self { step: 0 }
81    }
82
83    /// Call once per failed `try_write` after cooperating on pending DL ops.
84    pub(super) fn backoff(&mut self) {
85        let step = self.step;
86        self.step = self.step.saturating_add(1);
87
88        if step < Self::SPIN_PHASE_STEPS {
89            std::hint::spin_loop();
90            return;
91        }
92
93        let after_spin = step - Self::SPIN_PHASE_STEPS;
94        if after_spin < Self::YIELD_PHASE_STEPS {
95            std::thread::yield_now();
96            return;
97        }
98
99        let slept = step
100            .saturating_sub(Self::SPIN_PHASE_STEPS)
101            .saturating_sub(Self::YIELD_PHASE_STEPS);
102        let shift = slept.min(Self::SLEEP_SHIFT_CAP);
103        let micros = Self::SLEEP_MICROS_INITIAL
104            .checked_shl(shift)
105            .unwrap_or(Self::SLEEP_MICROS_MAX)
106            .min(Self::SLEEP_MICROS_MAX);
107
108        std::thread::sleep(Duration::from_micros(micros));
109    }
110}
111
112macro_rules! lock_instance_group_state {
113    ($guard:ident, $state:ident, $linker:expr, $err:expr) => {
114        let mut $guard = $linker.instance_group_state.lock().unwrap();
115        if $guard.is_none() {
116            return Err($err);
117        }
118        let $state = $guard.deref_mut().as_mut().unwrap();
119    };
120}
121
122pub(super) use lock_instance_group_state;
123
124// Used to communicate the result of an operation that happened in one
125// instance group to all others
126#[derive(Debug, Clone)]
127pub(super) enum DlOperation {
128    LoadModules(Vec<ModuleHandle>),
129    ResolveFunction {
130        name: String,
131        resolved_from: ModuleHandle,
132        // This should match the current length of each instance group's function table
133        // minus one. Otherwise, we're out of sync and an error has been encountered.
134        function_table_index: u32,
135    },
136    // Allocates slots in the function table
137    AllocateFunctionTable {
138        index: u32,
139        size: u32,
140    },
141}