wasmer_compiler_cranelift/translator/
func_state.rs

1// This file contains code from external sources.
2// Attributions: https://github.com/wasmerio/wasmer/blob/main/docs/ATTRIBUTIONS.md
3
4//! WebAssembly module and function translation state.
5//!
6//! The `ModuleTranslationState` struct defined in this module is used to keep track of data about
7//! the whole WebAssembly module, such as the decoded type signatures.
8//!
9//! The `FuncTranslationState` struct defined in this module is used to keep track of the WebAssembly
10//! value and control stacks during the translation of a single function.
11
12use super::func_environ::{FuncEnvironment, GlobalVariable};
13use crate::heap::Heap;
14use crate::translator::code_translator::CatchClause;
15use crate::{HashMap, Occupied, Vacant};
16use cranelift_codegen::ir::{self, Block, Inst, Value};
17use cranelift_frontend::FunctionBuilder;
18use itertools::Itertools;
19use std::vec::Vec;
20use wasmer_types::{FunctionIndex, GlobalIndex, MemoryIndex, SignatureIndex, WasmResult};
21
22/// Information about the presence of an associated `else` for an `if`, or the
23/// lack thereof.
24#[derive(Debug)]
25pub enum ElseData {
26    /// The `if` does not already have an `else` block.
27    ///
28    /// This doesn't mean that it will never have an `else`, just that we
29    /// haven't seen it yet.
30    NoElse {
31        /// If we discover that we need an `else` block, this is the jump
32        /// instruction that needs to be fixed up to point to the new `else`
33        /// block rather than the destination block after the `if...end`.
34        branch_inst: Inst,
35
36        /// The placeholder block we're replacing.
37        placeholder: Block,
38    },
39
40    /// We have already allocated an `else` block.
41    ///
42    /// Usually we don't know whether we will hit an `if .. end` or an `if
43    /// .. else .. end`, but sometimes we can tell based on the block's type
44    /// signature that the signature is not valid if there isn't an `else`. In
45    /// these cases, we pre-allocate the `else` block.
46    WithElse {
47        /// This is the `else` block.
48        else_block: Block,
49    },
50}
51
52/// A control stack frame can be an `if`, a `block` or a `loop`, each one having the following
53/// fields:
54///
55/// - `destination`: reference to the `Block` that will hold the code after the control block;
56/// - `num_return_values`: number of values returned by the control block;
57/// - `original_stack_size`: size of the value stack at the beginning of the control block.
58///
59/// Moreover, the `if` frame has the `branch_inst` field that points to the `brz` instruction
60/// separating the `true` and `false` branch. The `loop` frame has a `header` field that references
61/// the `Block` that contains the beginning of the body of the loop.
62#[derive(Debug)]
63pub enum ControlStackFrame {
64    If {
65        destination: Block,
66        else_data: ElseData,
67        num_param_values: usize,
68        num_return_values: usize,
69        original_stack_size: usize,
70        exit_is_branched_to: bool,
71        blocktype: wasmer_compiler::wasmparser::BlockType,
72        /// Was the head of the `if` reachable?
73        head_is_reachable: bool,
74        /// What was the reachability at the end of the consequent?
75        ///
76        /// This is `None` until we're finished translating the consequent, and
77        /// is set to `Some` either by hitting an `else` when we will begin
78        /// translating the alternative, or by hitting an `end` in which case
79        /// there is no alternative.
80        consequent_ends_reachable: Option<bool>,
81        // Note: no need for `alternative_ends_reachable` because that is just
82        // `state.reachable` when we hit the `end` in the `if .. else .. end`.
83    },
84    Block {
85        destination: Block,
86        num_param_values: usize,
87        num_return_values: usize,
88        original_stack_size: usize,
89        exit_is_branched_to: bool,
90        /// When this block corresponds to a try-table, keep the handler state
91        /// checkpoint and the list of catch blocks to seal once the scope ends.
92        try_table_info: Option<(HandlerStateCheckpoint, Vec<Block>)>,
93    },
94    Loop {
95        destination: Block,
96        header: Block,
97        num_param_values: usize,
98        num_return_values: usize,
99        original_stack_size: usize,
100    },
101}
102
103/// Helper methods for the control stack objects.
104impl ControlStackFrame {
105    pub fn num_return_values(&self) -> usize {
106        match *self {
107            Self::If {
108                num_return_values, ..
109            }
110            | Self::Block {
111                num_return_values, ..
112            }
113            | Self::Loop {
114                num_return_values, ..
115            } => num_return_values,
116        }
117    }
118    pub fn num_param_values(&self) -> usize {
119        match *self {
120            Self::If {
121                num_param_values, ..
122            }
123            | Self::Block {
124                num_param_values, ..
125            }
126            | Self::Loop {
127                num_param_values, ..
128            } => num_param_values,
129        }
130    }
131    pub fn following_code(&self) -> Block {
132        match *self {
133            Self::If { destination, .. }
134            | Self::Block { destination, .. }
135            | Self::Loop { destination, .. } => destination,
136        }
137    }
138    pub fn br_destination(&self) -> Block {
139        match *self {
140            Self::If { destination, .. } | Self::Block { destination, .. } => destination,
141            Self::Loop { header, .. } => header,
142        }
143    }
144    /// Private helper. Use `truncate_value_stack_to_else_params()` or
145    /// `truncate_value_stack_to_original_size()` to restore value-stack state.
146    fn original_stack_size(&self) -> usize {
147        match *self {
148            Self::If {
149                original_stack_size,
150                ..
151            }
152            | Self::Block {
153                original_stack_size,
154                ..
155            }
156            | Self::Loop {
157                original_stack_size,
158                ..
159            } => original_stack_size,
160        }
161    }
162    pub fn is_loop(&self) -> bool {
163        match *self {
164            Self::If { .. } | Self::Block { .. } => false,
165            Self::Loop { .. } => true,
166        }
167    }
168
169    pub fn exit_is_branched_to(&self) -> bool {
170        match *self {
171            Self::If {
172                exit_is_branched_to,
173                ..
174            }
175            | Self::Block {
176                exit_is_branched_to,
177                ..
178            } => exit_is_branched_to,
179            Self::Loop { .. } => false,
180        }
181    }
182
183    pub fn set_branched_to_exit(&mut self) {
184        match *self {
185            Self::If {
186                ref mut exit_is_branched_to,
187                ..
188            }
189            | Self::Block {
190                ref mut exit_is_branched_to,
191                ..
192            } => *exit_is_branched_to = true,
193            Self::Loop { .. } => {}
194        }
195    }
196
197    /// Pop values from the value stack so that it is left at the
198    /// input-parameters to an else-block.
199    pub fn truncate_value_stack_to_else_params(&self, stack: &mut Vec<Value>) {
200        debug_assert!(matches!(self, &Self::If { .. }));
201        stack.truncate(self.original_stack_size());
202    }
203
204    /// Pop values from the value stack so that it is left at the state it was
205    /// before this control-flow frame.
206    pub fn truncate_value_stack_to_original_size(&self, stack: &mut Vec<Value>) {
207        // The "If" frame pushes its parameters twice, so they're available to the else block
208        // (see also `FuncTranslationState::push_if`).
209        // Yet, the original_stack_size member accounts for them only once, so that the else
210        // block can see the same number of parameters as the consequent block. As a matter of
211        // fact, we need to substract an extra number of parameter values for if blocks.
212        let num_duplicated_params = match self {
213            &Self::If {
214                num_param_values, ..
215            } => {
216                debug_assert!(num_param_values <= self.original_stack_size());
217                num_param_values
218            }
219            _ => 0,
220        };
221        stack.truncate(self.original_stack_size() - num_duplicated_params);
222    }
223
224    /// Restore exception handler state and seal catch blocks when exiting a
225    /// try-table scope.
226    pub fn restore_catch_handlers(
227        &self,
228        handlers: &mut HandlerState,
229        builder: &mut FunctionBuilder,
230    ) {
231        if let Self::Block {
232            try_table_info: Some((checkpoint, catch_blocks)),
233            ..
234        } = self
235        {
236            handlers.restore_checkpoint(*checkpoint);
237            for block in catch_blocks {
238                builder.seal_block(*block);
239            }
240        }
241    }
242}
243
244/// Contains information passed along during a function's translation and that records:
245///
246/// - The current value and control stacks.
247/// - The depth of the two unreachable control blocks stacks, that are manipulated when translating
248///   unreachable code;
249pub struct FuncTranslationState {
250    /// A stack of values corresponding to the active values in the input wasm function at this
251    /// point.
252    pub(crate) stack: Vec<Value>,
253    /// A stack of active control flow operations at this point in the input wasm function.
254    pub(crate) control_stack: Vec<ControlStackFrame>,
255    /// Exception handler state used to attach catch blocks to try-calls.
256    pub(crate) handlers: HandlerState,
257    /// Is the current translation state still reachable? This is false when translating operators
258    /// like End, Return, or Unreachable.
259    pub(crate) reachable: bool,
260
261    // Map of global variables that have already been created by `FuncEnvironment::make_global`.
262    globals: HashMap<GlobalIndex, GlobalVariable>,
263
264    // Map of heaps that have been created by `FuncEnvironment::make_heap`.
265    heaps: HashMap<MemoryIndex, Heap>,
266
267    // Map of indirect call signatures that have been created by
268    // `FuncEnvironment::make_indirect_sig()`.
269    // Stores both the signature reference and the number of WebAssembly arguments
270    signatures: HashMap<SignatureIndex, (ir::SigRef, usize)>,
271
272    // Imported and local functions that have been created by
273    // `FuncEnvironment::make_direct_func()`.
274    // Stores both the function reference and the number of WebAssembly arguments
275    functions: HashMap<FunctionIndex, (ir::FuncRef, usize)>,
276}
277
278// Public methods that are exposed to non-`cranelift_wasm` API consumers.
279impl FuncTranslationState {
280    /// True if the current translation state expresses reachable code, false if it is unreachable.
281    #[inline]
282    #[allow(dead_code)]
283    pub fn reachable(&self) -> bool {
284        self.reachable
285    }
286}
287
288#[derive(Clone, Copy, Debug)]
289pub(crate) struct HandlerStateCheckpoint(usize, usize);
290
291#[derive(Default)]
292pub(crate) struct HandlerState {
293    handlers: Vec<Block>,
294    clauses: Vec<CatchClause>,
295}
296
297pub(crate) struct LandingPad {
298    pub(crate) block: Block,
299    pub(crate) clauses: Vec<CatchClause>,
300}
301
302impl HandlerState {
303    pub fn add_handler(&mut self, block: Block) {
304        self.handlers.push(block);
305    }
306
307    pub fn add_clause(&mut self, clause: CatchClause) {
308        self.clauses.push(clause);
309    }
310
311    pub fn take_checkpoint(&self) -> HandlerStateCheckpoint {
312        HandlerStateCheckpoint(self.handlers.len(), self.clauses.len())
313    }
314
315    pub fn restore_checkpoint(&mut self, checkpoint: HandlerStateCheckpoint) {
316        debug_assert!(checkpoint.0 <= self.handlers.len());
317        debug_assert!(checkpoint.1 <= self.clauses.len());
318        self.handlers.truncate(checkpoint.0);
319        self.clauses.truncate(checkpoint.1);
320    }
321
322    /// Get the latest landing pad block including all the tags covered by it.
323    pub fn landing_pad(&self) -> Option<LandingPad> {
324        self.handlers.last().copied().map(|block| LandingPad {
325            block,
326            clauses: self.unique_clauses(),
327        })
328    }
329
330    /// Returns an iterator over the catch clauses in reverse order, with duplicates removed.
331    pub fn unique_clauses(&self) -> Vec<CatchClause> {
332        self.clauses
333            .iter()
334            .unique_by(|c| c.wasm_tag)
335            .rev()
336            .cloned()
337            .collect()
338    }
339
340    pub fn is_empty(&self) -> bool {
341        self.handlers.is_empty()
342    }
343
344    pub fn clear(&mut self) {
345        self.handlers.clear();
346        self.clauses.clear();
347    }
348}
349
350impl FuncTranslationState {
351    /// Construct a new, empty, `FuncTranslationState`
352    pub(crate) fn new() -> Self {
353        Self {
354            stack: Vec::new(),
355            // TODO(reftypes):
356            //metadata_stack: Vec::new(),
357            control_stack: Vec::new(),
358            handlers: HandlerState::default(),
359            reachable: true,
360            globals: HashMap::new(),
361            heaps: HashMap::new(),
362            signatures: HashMap::new(),
363            functions: HashMap::new(),
364        }
365    }
366
367    fn clear(&mut self) {
368        debug_assert!(self.stack.is_empty());
369        debug_assert!(self.control_stack.is_empty());
370        debug_assert!(self.handlers.is_empty());
371        self.reachable = true;
372        self.handlers.clear();
373        self.globals.clear();
374        self.heaps.clear();
375        self.signatures.clear();
376        self.functions.clear();
377    }
378
379    /// Initialize the state for compiling a function with the given signature.
380    ///
381    /// This resets the state to containing only a single block representing the whole function.
382    /// The exit block is the last block in the function which will contain the return instruction.
383    pub(crate) fn initialize(&mut self, sig: &ir::Signature, exit_block: Block) {
384        self.clear();
385        self.push_block(
386            exit_block,
387            0,
388            sig.returns
389                .iter()
390                .filter(|arg| arg.purpose == ir::ArgumentPurpose::Normal)
391                .count(),
392        );
393    }
394
395    /// Push a value.
396    pub(crate) fn push1(&mut self, val: Value) {
397        self.stack.push(val);
398    }
399
400    /// Push multiple values.
401    pub(crate) fn pushn(&mut self, vals: &[Value]) {
402        self.stack.extend_from_slice(vals);
403    }
404
405    /// Pop one value.
406    pub(crate) fn pop1(&mut self) -> Value {
407        self.stack
408            .pop()
409            .expect("attempted to pop a value from an empty stack")
410    }
411
412    /// Peek at the top of the stack without popping it.
413    pub(crate) fn peek1(&self) -> Value {
414        *self
415            .stack
416            .last()
417            .expect("attempted to peek at a value on an empty stack")
418    }
419
420    /// Pop two values. Return them in the order they were pushed.
421    pub(crate) fn pop2(&mut self) -> (Value, Value) {
422        let v2 = self.pop1();
423        let v1 = self.pop1();
424        (v1, v2)
425    }
426
427    /// Pop three values. Return them in the order they were pushed.
428    pub(crate) fn pop3(&mut self) -> (Value, Value, Value) {
429        let v3 = self.pop1();
430        let v2 = self.pop1();
431        let v1 = self.pop1();
432        (v1, v2, v3)
433    }
434
435    /// Helper to ensure the stack size is at least as big as `n`; note that due to
436    /// `debug_assert` this will not execute in non-optimized builds.
437    #[inline]
438    fn ensure_length_is_at_least(&self, n: usize) {
439        debug_assert!(
440            n <= self.stack.len(),
441            "attempted to access {} values but stack only has {} values",
442            n,
443            self.stack.len()
444        );
445    }
446
447    /// Pop the top `n` values on the stack.
448    ///
449    /// The popped values are not returned. Use `peekn` to look at them before popping.
450    pub(crate) fn popn(&mut self, n: usize) {
451        self.ensure_length_is_at_least(n);
452        let new_len = self.stack.len() - n;
453        self.stack.truncate(new_len);
454    }
455
456    /// Peek at the top `n` values on the stack in the order they were pushed.
457    pub(crate) fn peekn(&self, n: usize) -> &[Value] {
458        self.ensure_length_is_at_least(n);
459        &self.stack[self.stack.len() - n..]
460    }
461
462    /// Peek at the top `n` values on the stack in the order they were pushed.
463    pub(crate) fn peekn_mut(&mut self, n: usize) -> &mut [Value] {
464        self.ensure_length_is_at_least(n);
465        let len = self.stack.len();
466        &mut self.stack[len - n..]
467    }
468
469    fn push_block_impl(
470        &mut self,
471        following_code: Block,
472        num_param_types: usize,
473        num_result_types: usize,
474        try_table_info: Option<(HandlerStateCheckpoint, Vec<Block>)>,
475    ) {
476        debug_assert!(num_param_types <= self.stack.len());
477        self.control_stack.push(ControlStackFrame::Block {
478            destination: following_code,
479            original_stack_size: self.stack.len() - num_param_types,
480            num_param_values: num_param_types,
481            num_return_values: num_result_types,
482            exit_is_branched_to: false,
483            try_table_info,
484        });
485    }
486
487    /// Push a block on the control stack.
488    pub(crate) fn push_block(
489        &mut self,
490        following_code: Block,
491        num_param_types: usize,
492        num_result_types: usize,
493    ) {
494        self.push_block_impl(following_code, num_param_types, num_result_types, None);
495    }
496
497    /// Push a try-table block on the control stack.
498    pub(crate) fn push_try_table_block(
499        &mut self,
500        following_code: Block,
501        catch_blocks: Vec<Block>,
502        num_param_types: usize,
503        num_result_types: usize,
504        checkpoint: HandlerStateCheckpoint,
505    ) {
506        self.push_block_impl(
507            following_code,
508            num_param_types,
509            num_result_types,
510            Some((checkpoint, catch_blocks)),
511        );
512    }
513
514    /// Push a loop on the control stack.
515    pub(crate) fn push_loop(
516        &mut self,
517        header: Block,
518        following_code: Block,
519        num_param_types: usize,
520        num_result_types: usize,
521    ) {
522        debug_assert!(num_param_types <= self.stack.len());
523        self.control_stack.push(ControlStackFrame::Loop {
524            header,
525            destination: following_code,
526            original_stack_size: self.stack.len() - num_param_types,
527            num_param_values: num_param_types,
528            num_return_values: num_result_types,
529        });
530    }
531
532    /// Push an if on the control stack.
533    pub(crate) fn push_if(
534        &mut self,
535        destination: Block,
536        else_data: ElseData,
537        num_param_types: usize,
538        num_result_types: usize,
539        blocktype: wasmer_compiler::wasmparser::BlockType,
540    ) {
541        debug_assert!(num_param_types <= self.stack.len());
542
543        // Push a second copy of our `if`'s parameters on the stack. This lets
544        // us avoid saving them on the side in the `ControlStackFrame` for our
545        // `else` block (if it exists), which would require a second heap
546        // allocation. See also the comment in `translate_operator` for
547        // `Operator::Else`.
548        self.stack.reserve(num_param_types);
549        for i in (self.stack.len() - num_param_types)..self.stack.len() {
550            let val = self.stack[i];
551            self.stack.push(val);
552        }
553
554        self.control_stack.push(ControlStackFrame::If {
555            destination,
556            else_data,
557            original_stack_size: self.stack.len() - num_param_types,
558            num_param_values: num_param_types,
559            num_return_values: num_result_types,
560            exit_is_branched_to: false,
561            head_is_reachable: self.reachable,
562            consequent_ends_reachable: None,
563            blocktype,
564        });
565    }
566}
567
568/// Methods for handling entity references.
569impl FuncTranslationState {
570    /// Get the `GlobalVariable` reference that should be used to access the global variable
571    /// `index`. Create the reference if necessary.
572    /// Also return the WebAssembly type of the global.
573    pub(crate) fn get_global<FE: FuncEnvironment + ?Sized>(
574        &mut self,
575        func: &mut ir::Function,
576        index: u32,
577        environ: &mut FE,
578    ) -> WasmResult<GlobalVariable> {
579        let index = GlobalIndex::from_u32(index);
580        match self.globals.entry(index) {
581            Occupied(entry) => Ok(*entry.get()),
582            Vacant(entry) => Ok(*entry.insert(environ.make_global(func, index)?)),
583        }
584    }
585
586    /// Get the `Heap` reference that should be used to access linear memory `index`.
587    /// Create the reference if necessary.
588    pub(crate) fn get_heap<FE: FuncEnvironment + ?Sized>(
589        &mut self,
590        func: &mut ir::Function,
591        index: u32,
592        environ: &mut FE,
593    ) -> WasmResult<Heap> {
594        let index = MemoryIndex::from_u32(index);
595        match self.heaps.entry(index) {
596            Occupied(entry) => Ok(*entry.get()),
597            Vacant(entry) => Ok(*entry.insert(environ.make_heap(func, index)?)),
598        }
599    }
600
601    /// Get the `SigRef` reference that should be used to make an indirect call with signature
602    /// `index`. Also return the number of WebAssembly arguments in the signature.
603    ///
604    /// Create the signature if necessary.
605    pub(crate) fn get_indirect_sig<FE: FuncEnvironment + ?Sized>(
606        &mut self,
607        func: &mut ir::Function,
608        index: u32,
609        environ: &mut FE,
610    ) -> WasmResult<(ir::SigRef, usize)> {
611        let index = SignatureIndex::from_u32(index);
612        match self.signatures.entry(index) {
613            Occupied(entry) => Ok(*entry.get()),
614            Vacant(entry) => {
615                let sig = environ.make_indirect_sig(func, index)?;
616                Ok(*entry.insert((sig, num_wasm_parameters(environ, &func.dfg.signatures[sig]))))
617            }
618        }
619    }
620
621    /// Get the `FuncRef` reference that should be used to make a direct call to function
622    /// `index`. Also return the number of WebAssembly arguments in the signature.
623    ///
624    /// Create the function reference if necessary.
625    pub(crate) fn get_direct_func<FE: FuncEnvironment + ?Sized>(
626        &mut self,
627        func: &mut ir::Function,
628        index: u32,
629        environ: &mut FE,
630    ) -> WasmResult<(ir::FuncRef, usize)> {
631        let index = FunctionIndex::from_u32(index);
632        match self.functions.entry(index) {
633            Occupied(entry) => Ok(*entry.get()),
634            Vacant(entry) => {
635                let fref = environ.make_direct_func(func, index)?;
636                let sig = func.dfg.ext_funcs[fref].signature;
637                Ok(*entry.insert((
638                    fref,
639                    num_wasm_parameters(environ, &func.dfg.signatures[sig]),
640                )))
641            }
642        }
643    }
644}
645
646fn num_wasm_parameters<FE: FuncEnvironment + ?Sized>(
647    environ: &FE,
648    signature: &ir::Signature,
649) -> usize {
650    (0..signature.params.len())
651        .filter(|index| environ.is_wasm_parameter(signature, *index))
652        .count()
653}