wasmer_compiler_cranelift/
eh.rs

1//! Helpers for generating DWARF LSDA data for Cranelift-compiled functions.
2//!
3//! The structures and encoding implemented here mirror what LLVM produces for
4//! Wasm exception handling so that Wasmer's libunwind personalities can parse
5//! the tables without any runtime changes.
6
7use cranelift_codegen::{
8    ExceptionContextLoc, FinalizedMachCallSite, FinalizedMachExceptionHandler,
9    isa::unwind::UnwindInst,
10};
11use cranelift_entity::EntityRef;
12use itertools::Itertools;
13use std::collections::hash_map::Entry;
14use std::collections::{HashMap, HashSet};
15use std::convert::TryFrom;
16use std::io::{Cursor, Write};
17
18use wasmer_compiler::types::{
19    relocation::{Relocation, RelocationKind, RelocationTarget},
20    section::{CustomSection, CustomSectionProtection, SectionBody, SectionIndex},
21};
22use wasmer_types::{LibCall, LocalFunctionIndex};
23
24/// Relocation information for an LSDA entry that references a tag constant.
25#[derive(Debug, Clone)]
26pub struct TagRelocation {
27    /// Offset within the LSDA blob where the relocation should be applied.
28    pub offset: u32,
29    /// The module-local exception tag value.
30    pub tag: u32,
31}
32
33/// Fully encoded LSDA bytes for a single function, together with pending tag
34/// relocations that will be resolved once the global tag section is built.
35#[derive(Debug, Clone)]
36pub struct FunctionLsdaData {
37    pub bytes: Vec<u8>,
38    pub relocations: Vec<TagRelocation>,
39}
40
41/// Build the LSDA for a single function given the finalized Cranelift
42/// call-site metadata.
43pub fn build_function_lsda<'a>(
44    call_sites: impl Iterator<Item = FinalizedMachCallSite<'a>>,
45    function_length: usize,
46    pointer_bytes: u8,
47) -> Option<FunctionLsdaData> {
48    let mut sites = Vec::new();
49
50    for site in call_sites {
51        let mut catches = Vec::new();
52        let mut landing_pad = None;
53
54        // Our landing pads handle all the tags considered for a call instruction, thus
55        // we use the latest landing pad.
56        for handler in site.exception_handlers {
57            match handler {
58                FinalizedMachExceptionHandler::Tag(tag, offset) => {
59                    landing_pad = Some(landing_pad.unwrap_or(*offset));
60                    catches.push(ExceptionType::Tag {
61                        tag: u32::try_from(tag.index()).expect("tag index fits in u32"),
62                    });
63                }
64                FinalizedMachExceptionHandler::Default(offset) => {
65                    landing_pad = Some(landing_pad.unwrap_or(*offset));
66                    catches.push(ExceptionType::CatchAll);
67                }
68                FinalizedMachExceptionHandler::Context(context) => {
69                    // Context records are used by Cranelift to thread VMContext
70                    // information through the landing pad. We emit the LSDA
71                    // regardless of whether we see them; nothing to do here.
72                    match context {
73                        ExceptionContextLoc::SPOffset(_) | ExceptionContextLoc::GPR(_) => {}
74                    }
75                }
76            }
77        }
78
79        if catches.is_empty() {
80            continue;
81        }
82
83        let landing_pad = landing_pad.expect("landing pad offset set when catches exist");
84        let cs_start = site.ret_addr.saturating_sub(1);
85
86        sites.push(CallSiteDesc {
87            start: cs_start,
88            len: 1,
89            landing_pad,
90            actions: catches,
91        });
92    }
93
94    if sites.is_empty() {
95        return None;
96    }
97
98    // Ensure all instructions in the function are covered by filling gaps with
99    // default unwinding behavior (no catch actions).
100    let mut current_pos = 0u32;
101    let mut filled_sites = Vec::new();
102
103    for site in sites {
104        if site.start > current_pos {
105            // Gap found: add a default site that covers instructions with no handlers
106            filled_sites.push(CallSiteDesc {
107                start: current_pos,
108                len: site.start - current_pos,
109                landing_pad: 0,
110                actions: Vec::new(),
111            });
112        }
113        current_pos = site.start + site.len;
114        filled_sites.push(site);
115    }
116
117    // Cover any remaining instructions at the end of the function
118    if current_pos < function_length as u32 {
119        filled_sites.push(CallSiteDesc {
120            start: current_pos,
121            len: function_length as u32 - current_pos,
122            landing_pad: 0,
123            actions: Vec::new(),
124        });
125    }
126
127    let sites = filled_sites;
128
129    let mut type_entries = TypeTable::new();
130    let mut callsite_actions = Vec::with_capacity(sites.len());
131
132    for site in &sites {
133        #[cfg(debug_assertions)]
134        {
135            // CatchAll must always be the last item in the action list; otherwise, the tags that follow
136            // it will be ignored.
137            let catch_all_positions = site
138                .actions
139                .iter()
140                .positions(|a| matches!(a, ExceptionType::CatchAll))
141                .collect_vec();
142            assert!(catch_all_positions.iter().at_most_one().is_ok());
143            if let Some(&i) = catch_all_positions.first() {
144                assert!(i == site.actions.len() - 1);
145            }
146        }
147
148        let action_indices = site
149            .actions
150            .iter()
151            // Reverse actions to ensure CatchAll is always last in the chain, since the action table
152            // encoding uses back references and relies on this ordering.
153            .rev()
154            .map(|action| type_entries.get_or_insert(*action) as i32)
155            .collect_vec();
156        callsite_actions.push(action_indices);
157    }
158
159    let action_table = encode_action_table(&callsite_actions);
160    let call_site_table = encode_call_site_table(&sites, &action_table);
161    let (type_table_bytes, type_table_relocs) = type_entries.encode(pointer_bytes);
162
163    let call_site_table_len = call_site_table.len() as u64;
164    let mut writer = Cursor::new(Vec::new());
165    writer
166        .write_all(&gimli::DW_EH_PE_omit.0.to_le_bytes())
167        .unwrap(); // lpstart encoding omitted (relative to function start)
168
169    if type_entries.is_empty() {
170        writer
171            .write_all(&gimli::DW_EH_PE_omit.0.to_le_bytes())
172            .unwrap();
173    } else {
174        writer
175            .write_all(&gimli::DW_EH_PE_absptr.0.to_le_bytes())
176            .unwrap();
177    }
178
179    if !type_entries.is_empty() {
180        let ttype_table_end = 1 // call-site encoding byte
181            + uleb128_len(call_site_table_len)
182            + call_site_table.len()
183            + action_table.bytes.len()
184            + type_table_bytes.len();
185        leb128::write::unsigned(&mut writer, ttype_table_end as u64).unwrap();
186    }
187
188    writer
189        .write_all(&gimli::DW_EH_PE_udata4.0.to_le_bytes())
190        .unwrap();
191    leb128::write::unsigned(&mut writer, call_site_table_len).unwrap();
192    writer.write_all(&call_site_table).unwrap();
193    writer.write_all(&action_table.bytes).unwrap();
194
195    let type_table_offset = writer.position() as u32;
196    writer.write_all(&type_table_bytes).unwrap();
197
198    let mut relocations = Vec::new();
199    for reloc in type_table_relocs {
200        relocations.push(TagRelocation {
201            offset: type_table_offset + reloc.offset,
202            tag: reloc.tag,
203        });
204    }
205
206    Some(FunctionLsdaData {
207        bytes: writer.into_inner(),
208        relocations,
209    })
210}
211
212/// Build the global tag section and a tag->offset map.
213pub fn build_tag_section(
214    lsda_data: &[Option<FunctionLsdaData>],
215) -> Option<(CustomSection, HashMap<u32, u32>)> {
216    let mut unique_tags = HashSet::new();
217    for data in lsda_data.iter().flatten() {
218        for reloc in &data.relocations {
219            unique_tags.insert(reloc.tag);
220        }
221    }
222
223    if unique_tags.is_empty() {
224        return None;
225    }
226
227    let mut tags: Vec<u32> = unique_tags.into_iter().collect();
228    tags.sort_unstable();
229
230    let mut bytes = Vec::with_capacity(tags.len() * std::mem::size_of::<u32>());
231    let mut offsets = HashMap::new();
232    for tag in tags {
233        let offset = bytes.len() as u32;
234        bytes.extend_from_slice(&tag.to_ne_bytes());
235        offsets.insert(tag, offset);
236    }
237
238    let section = CustomSection {
239        protection: CustomSectionProtection::Read,
240        alignment: None,
241        bytes: SectionBody::new_with_vec(bytes),
242        relocations: Vec::new(),
243    };
244
245    Some((section, offsets))
246}
247
248/// Build the LSDA custom section and record the offset for each function.
249///
250/// Returns the section (if any) and a vector mapping each function index to
251/// its LSDA offset inside the section. Even when utilizing the same landing pad for exception tags,
252/// Cranelift generates separate landing pad locations.
253/// These locations are essentially small trampolines that redirect to the basic block we established (the EH dispatch block).
254///
255/// The section can be dumped using the elfutils' readelf tool:
256/// ```shell
257/// objcopy -I binary -O elf64-x86-64 --rename-section .data=.gcc_except_table,alloc,contents lsda.bin object.o && eu-readelf -w object.o
258/// ```
259pub fn build_lsda_section(
260    lsda_data: Vec<Option<FunctionLsdaData>>,
261    pointer_bytes: u8,
262    tag_offsets: &HashMap<u32, u32>,
263    tag_section_index: Option<SectionIndex>,
264) -> (Option<CustomSection>, Vec<Option<u32>>) {
265    let mut bytes = Vec::new();
266    let mut relocations = Vec::new();
267    let mut offsets_per_function = Vec::with_capacity(lsda_data.len());
268
269    let pointer_kind = match pointer_bytes {
270        4 => RelocationKind::Abs4,
271        8 => RelocationKind::Abs8,
272        other => panic!("unsupported pointer size {other} for LSDA generation"),
273    };
274
275    for data in lsda_data.into_iter() {
276        if let Some(data) = data {
277            let base = bytes.len() as u32;
278            bytes.extend_from_slice(&data.bytes);
279
280            for reloc in &data.relocations {
281                let target_offset = tag_offsets
282                    .get(&reloc.tag)
283                    .copied()
284                    .expect("missing tag offset for relocation");
285                relocations.push(Relocation {
286                    kind: pointer_kind,
287                    reloc_target: RelocationTarget::CustomSection(
288                        tag_section_index
289                            .expect("tag section index must exist when relocations are present"),
290                    ),
291                    offset: base + reloc.offset,
292                    addend: target_offset as i64,
293                });
294            }
295
296            offsets_per_function.push(Some(base));
297        } else {
298            offsets_per_function.push(None);
299        }
300    }
301
302    if bytes.is_empty() {
303        (None, offsets_per_function)
304    } else {
305        (
306            Some(CustomSection {
307                protection: CustomSectionProtection::Read,
308                alignment: None,
309                bytes: SectionBody::new_with_vec(bytes),
310                relocations,
311            }),
312            offsets_per_function,
313        )
314    }
315}
316
317#[derive(Debug, Clone)]
318pub struct CompactUnwindEntryData {
319    pub function: LocalFunctionIndex,
320    pub function_length: u32,
321    pub compact_encoding: u32,
322    pub lsda_offset: Option<u32>,
323}
324
325/// Build the 64-bit Mach-O `__compact_unwind` section consumed by the
326/// runtime compact-unwind publisher.
327pub fn build_compact_unwind_section(
328    entries: impl IntoIterator<Item = CompactUnwindEntryData>,
329    lsda_section_index: Option<SectionIndex>,
330) -> Option<CustomSection> {
331    const ENTRY_SIZE: usize = 32;
332    const FUNCTION_ADDR_OFFSET: u32 = 0;
333    const PERSONALITY_ADDR_OFFSET: u32 = 16;
334    const LSDA_ADDR_OFFSET: u32 = 24;
335
336    let entries = entries.into_iter().collect::<Vec<_>>();
337    if entries.is_empty() {
338        return None;
339    }
340
341    let mut bytes = Vec::with_capacity(entries.len() * ENTRY_SIZE);
342    let mut relocations = Vec::new();
343
344    for entry in entries {
345        let base = bytes.len() as u32;
346
347        bytes.extend_from_slice(&0u64.to_le_bytes());
348        bytes.extend_from_slice(&entry.function_length.to_le_bytes());
349        bytes.extend_from_slice(&entry.compact_encoding.to_le_bytes());
350        bytes.extend_from_slice(&0u64.to_le_bytes());
351        bytes.extend_from_slice(&0u64.to_le_bytes());
352
353        relocations.push(Relocation {
354            kind: RelocationKind::Abs8,
355            reloc_target: RelocationTarget::LocalFunc(entry.function),
356            offset: base + FUNCTION_ADDR_OFFSET,
357            addend: 0,
358        });
359        relocations.push(Relocation {
360            kind: RelocationKind::Abs8,
361            reloc_target: RelocationTarget::LibCall(LibCall::EHPersonality),
362            offset: base + PERSONALITY_ADDR_OFFSET,
363            addend: 0,
364        });
365
366        if let Some(lsda_offset) = entry.lsda_offset {
367            relocations.push(Relocation {
368                kind: RelocationKind::Abs8,
369                reloc_target: RelocationTarget::CustomSection(
370                    lsda_section_index.expect("LSDA section index required for LSDA relocation"),
371                ),
372                offset: base + LSDA_ADDR_OFFSET,
373                addend: lsda_offset as i64,
374            });
375        }
376    }
377
378    Some(CustomSection {
379        protection: CustomSectionProtection::Read,
380        alignment: Some(8),
381        bytes: SectionBody::new_with_vec(bytes),
382        relocations,
383    })
384}
385
386// Constants are defined in compact_unwind_encoding.h file.
387const UNWIND_ARM64_MODE_FRAMELESS: u32 = 0x02000000;
388const UNWIND_ARM64_MODE_FRAME: u32 = 0x04000000;
389
390const UNWIND_ARM64_FRAMELESS_STACK_SIZE_SHIFT: u32 = 12;
391const UNWIND_ARM64_FRAME_X19_X20_PAIR: u32 = 0x00000001;
392const UNWIND_ARM64_FRAME_X21_X22_PAIR: u32 = 0x00000002;
393const UNWIND_ARM64_FRAME_X23_X24_PAIR: u32 = 0x00000004;
394const UNWIND_ARM64_FRAME_X25_X26_PAIR: u32 = 0x00000008;
395const UNWIND_ARM64_FRAME_X27_X28_PAIR: u32 = 0x00000010;
396const UNWIND_ARM64_FRAME_D8_D9_PAIR: u32 = 0x00000100;
397const UNWIND_ARM64_FRAME_D10_D11_PAIR: u32 = 0x00000200;
398const UNWIND_ARM64_FRAME_D12_D13_PAIR: u32 = 0x00000400;
399const UNWIND_ARM64_FRAME_D14_D15_PAIR: u32 = 0x00000800;
400
401const STACK_SIZE_UNIT: u32 = 16;
402
403pub fn compact_unwind_encoding_aarch64(unwind_info: &[(u32, UnwindInst)]) -> Result<u32, String> {
404    let mut has_frame = false;
405    let mut stack_size = 0u32;
406    let mut saved_int = HashSet::new();
407    let mut saved_float = HashSet::new();
408
409    for (_, inst) in unwind_info {
410        match inst {
411            UnwindInst::PushFrameRegs { .. } | UnwindInst::DefineNewFrame { .. } => {
412                has_frame = true;
413            }
414            UnwindInst::StackAlloc { size } => {
415                stack_size = stack_size
416                    .checked_add(*size)
417                    .ok_or_else(|| "aarch64 compact-unwind stack size overflow".to_string())?;
418            }
419            UnwindInst::SaveReg { reg, .. } => match reg.class() {
420                regalloc2::RegClass::Int => {
421                    saved_int.insert(reg.hw_enc());
422                }
423                regalloc2::RegClass::Float => {
424                    saved_float.insert(reg.hw_enc());
425                }
426                regalloc2::RegClass::Vector => {
427                    return Err(
428                        "aarch64 compact-unwind cannot encode vector register saves".to_owned()
429                    );
430                }
431            },
432            UnwindInst::RegStackOffset { .. } => {
433                return Err("aarch64 compact-unwind cannot encode RegStackOffset".to_owned());
434            }
435            UnwindInst::Aarch64SetPointerAuth { .. } => {}
436        }
437    }
438
439    if !has_frame {
440        if !saved_int.is_empty() || !saved_float.is_empty() {
441            return Err("aarch64 frameless compact-unwind cannot encode saved registers".into());
442        }
443        if !stack_size.is_multiple_of(STACK_SIZE_UNIT) {
444            return Err("aarch64 compact-unwind stack size must be 16-byte aligned".into());
445        }
446        let stack_units = stack_size / STACK_SIZE_UNIT;
447        if stack_units > 0x0fff {
448            return Err("aarch64 compact-unwind stack size is too large".into());
449        }
450        return Ok(
451            UNWIND_ARM64_MODE_FRAMELESS | (stack_units << UNWIND_ARM64_FRAMELESS_STACK_SIZE_SHIFT)
452        );
453    }
454
455    let encode_saved_pair = |saved: &mut HashSet<_>, lo, hi, bit, class_name| match (
456        saved.remove(&lo),
457        saved.remove(&hi),
458    ) {
459        (false, false) => Ok(0),
460        (true, true) => Ok(bit),
461        _ => Err(format!(
462            "aarch64 compact-unwind cannot encode unpaired {class_name}{lo}/{class_name}{hi} save"
463        )),
464    };
465
466    let mut encoding = UNWIND_ARM64_MODE_FRAME;
467    for (lo, hi, bit) in [
468        (19, 20, UNWIND_ARM64_FRAME_X19_X20_PAIR),
469        (21, 22, UNWIND_ARM64_FRAME_X21_X22_PAIR),
470        (23, 24, UNWIND_ARM64_FRAME_X23_X24_PAIR),
471        (25, 26, UNWIND_ARM64_FRAME_X25_X26_PAIR),
472        (27, 28, UNWIND_ARM64_FRAME_X27_X28_PAIR),
473    ] {
474        encoding |= encode_saved_pair(&mut saved_int, lo, hi, bit, "x")?;
475    }
476    for (lo, hi, bit) in [
477        (8, 9, UNWIND_ARM64_FRAME_D8_D9_PAIR),
478        (10, 11, UNWIND_ARM64_FRAME_D10_D11_PAIR),
479        (12, 13, UNWIND_ARM64_FRAME_D12_D13_PAIR),
480        (14, 15, UNWIND_ARM64_FRAME_D14_D15_PAIR),
481    ] {
482        encoding |= encode_saved_pair(&mut saved_float, lo, hi, bit, "d")?;
483    }
484
485    if !saved_int.is_empty() || !saved_float.is_empty() {
486        return Err("aarch64 compact-unwind encountered unsupported saved register".to_owned());
487    }
488
489    Ok(encoding)
490}
491
492#[derive(Debug)]
493struct CallSiteDesc {
494    start: u32,
495    len: u32,
496    landing_pad: u32,
497    actions: Vec<ExceptionType>,
498}
499
500#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)]
501enum ExceptionType {
502    Tag { tag: u32 },
503    CatchAll,
504}
505
506#[derive(Debug)]
507struct TypeTable {
508    entries: indexmap::IndexSet<ExceptionType>,
509}
510
511impl TypeTable {
512    fn new() -> Self {
513        Self {
514            entries: indexmap::IndexSet::new(),
515        }
516    }
517
518    fn is_empty(&self) -> bool {
519        self.entries.is_empty()
520    }
521
522    fn get_or_insert(&mut self, exception: ExceptionType) -> usize {
523        self.entries.insert(exception);
524
525        // The indices are one-based!
526        self.entries
527            .get_index_of(&exception)
528            .expect("must be already inserted")
529            + 1
530    }
531
532    fn encode(&self, pointer_bytes: u8) -> (Vec<u8>, Vec<TagRelocation>) {
533        let mut bytes = Vec::with_capacity(self.entries.len() * pointer_bytes as usize);
534        let mut relocations = Vec::new();
535
536        // Note the exception types must be streamed in the reverse order!
537        for entry in self.entries.iter().rev() {
538            let offset = bytes.len() as u32;
539            match entry {
540                ExceptionType::Tag { tag } => {
541                    bytes.extend(std::iter::repeat_n(0, pointer_bytes as usize));
542                    relocations.push(TagRelocation { offset, tag: *tag });
543                }
544                ExceptionType::CatchAll => {
545                    bytes.extend(std::iter::repeat_n(0, pointer_bytes as usize));
546                }
547            }
548        }
549
550        (bytes, relocations)
551    }
552}
553
554struct ActionTable {
555    bytes: Vec<u8>,
556    first_action_offsets: Vec<Option<u32>>,
557}
558
559fn encode_action_table(callsite_actions: &[Vec<i32>]) -> ActionTable {
560    let mut writer = Cursor::new(Vec::new());
561    let mut first_action_offsets = Vec::new();
562
563    let mut cache = HashMap::new();
564
565    for actions in callsite_actions {
566        if actions.is_empty() {
567            first_action_offsets.push(None);
568        } else {
569            match cache.entry(actions.clone()) {
570                Entry::Occupied(entry) => {
571                    first_action_offsets.push(Some(*entry.get()));
572                }
573                Entry::Vacant(entry) => {
574                    let mut last_action_start = 0;
575                    for (i, &ttype_index) in actions.iter().enumerate() {
576                        let next_action_start = writer.position();
577                        leb128::write::signed(&mut writer, ttype_index as i64)
578                            .expect("leb128 write failed");
579
580                        if i != 0 {
581                            // Make a linked list to the previous action
582                            let displacement = last_action_start - writer.position() as i64;
583                            leb128::write::signed(&mut writer, displacement)
584                                .expect("leb128 write failed");
585                        } else {
586                            leb128::write::signed(&mut writer, 0).expect("leb128 write failed");
587                        }
588                        last_action_start = next_action_start as i64;
589                    }
590                    let last_action_start = last_action_start as u32;
591                    entry.insert(last_action_start);
592                    first_action_offsets.push(Some(last_action_start));
593                }
594            }
595        }
596    }
597
598    ActionTable {
599        bytes: writer.into_inner(),
600        first_action_offsets,
601    }
602}
603
604fn encode_call_site_table(callsites: &[CallSiteDesc], action_table: &ActionTable) -> Vec<u8> {
605    let mut writer = Cursor::new(Vec::new());
606    for (idx, site) in callsites.iter().enumerate() {
607        write_encoded_offset(site.start, &mut writer);
608        write_encoded_offset(site.len, &mut writer);
609        write_encoded_offset(site.landing_pad, &mut writer);
610
611        let action = match action_table.first_action_offsets[idx] {
612            Some(offset) => offset as u64 + 1,
613            None => 0,
614        };
615        leb128::write::unsigned(&mut writer, action).expect("leb128 write failed");
616    }
617    writer.into_inner()
618}
619
620fn write_encoded_offset(val: u32, out: &mut impl Write) {
621    // We use DW_EH_PE_udata4 for all offsets.
622    out.write_all(&val.to_le_bytes())
623        .expect("write to buffer failed")
624}
625
626fn uleb128_len(value: u64) -> usize {
627    let mut cursor = Cursor::new([0u8; 10]);
628    leb128::write::unsigned(&mut cursor, value).unwrap()
629}