wasmer_vm/libcalls/eh/dwarf/
eh.rs

1//! Parsing of GCC-style Language-Specific Data Area (LSDA)
2//! For details se*const ():
3//!  * <https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html>
4//!  * <https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html>
5//!  * <https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf>
6//!  * <https://www.airs.com/blog/archives/460>
7//!  * <https://www.airs.com/blog/archives/464>
8//!
9//! A reference implementation may be found in the GCC source tree
10//! (`<root>/libgcc/unwind-c.c` as of this writing).
11
12#![allow(non_upper_case_globals)]
13#![allow(clippy::transmutes_expressible_as_ptr_casts)]
14#![allow(clippy::comparison_chain)]
15#![allow(unused)]
16
17use core::{mem, ptr};
18
19use gimli::DwEhPe;
20
21use super::DwarfReader;
22
23#[derive(Copy, Clone)]
24pub struct EHContext<'a> {
25    pub ip: *const u8,                             // Current instruction pointer
26    pub func_start: *const u8,                     // Pointer to the current function
27    pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section
28    pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section
29}
30
31/// Landing pad.
32type LPad = *const u8;
33
34#[derive(Debug, Clone)]
35pub enum EHAction {
36    None,
37    CatchAll { lpad: LPad },
38    CatchSpecific { lpad: LPad, tags: Vec<u32> },
39    CatchSpecificOrAll { lpad: LPad, tags: Vec<u32> },
40    Terminate,
41}
42
43/// 32-bit ARM Darwin platforms uses SjLj exceptions.
44///
45/// The exception is watchOS armv7k (specifically that subarchitecture), which
46/// instead uses DWARF Call Frame Information (CFI) unwinding.
47///
48/// <https://github.com/llvm/llvm-project/blob/llvmorg-18.1.4/clang/lib/Driver/ToolChains/Darwin.cpp#L3107-L3119>
49pub const USING_SJLJ_EXCEPTIONS: bool = cfg!(all(
50    target_vendor = "apple",
51    not(target_os = "watchos"),
52    target_arch = "arm"
53));
54
55/* change to true to enable logging from the personality function */
56macro_rules! log {
57    ($e: expr) => {
58        if false {
59            eprintln!($e)
60        }
61
62    };
63
64    ($($e: expr),*) => {
65        if false {
66            eprintln!($($e),*)
67        }
68
69    };
70}
71
72pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result<EHAction, ()> {
73    if lsda.is_null() {
74        return Ok(EHAction::None);
75    }
76
77    log!("(pers) Analysing LSDA at {lsda:?}");
78
79    let func_start = context.func_start;
80    let mut reader = DwarfReader::new(lsda);
81
82    let lpad_start_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
83    log!("(pers) Read LP start encoding {lpad_start_encoding:?}");
84
85    let lpad_base = unsafe {
86        // base address for landing pad offsets
87        if lpad_start_encoding != gimli::DW_EH_PE_omit {
88            read_encoded_pointer(&mut reader, context, lpad_start_encoding)?
89        } else {
90            log!("(pers) (is omit)");
91            func_start
92        }
93    };
94    log!("(pers) read landingpad base: {lpad_base:?}");
95
96    let types_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
97    log!("(pers) read ttype encoding: {types_table_encoding:?}");
98
99    // If no value for types_table_encoding was given it means that there's no
100    // types_table, therefore we can't possibly use this lpad.
101    if types_table_encoding == gimli::DW_EH_PE_omit {
102        log!("(pers) ttype is omit, returning None");
103        return Ok(EHAction::None);
104    }
105
106    let types_table_base_offset = unsafe { reader.read_uleb128() };
107
108    let types_table_base = unsafe {
109        log!("(pers) read class_info offset {types_table_base_offset:?}");
110        reader.ptr.wrapping_add(types_table_base_offset as _)
111    };
112    log!("(pers) read types_table_base sits at offset {types_table_base:?}");
113
114    let call_site_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
115    log!("(pers) read call_site_table_encoding is {call_site_table_encoding:?}");
116
117    let call_site_table_size = unsafe { reader.read_uleb128() };
118    let action_table = unsafe {
119        log!("(pers) read call_site has length {call_site_table_size:?}");
120        reader.ptr.wrapping_add(call_site_table_size as usize)
121    };
122
123    log!("(pers) action table sits at offset {action_table:?}");
124    let ip = context.ip;
125
126    if !USING_SJLJ_EXCEPTIONS {
127        // read the callsite table
128        while reader.ptr < action_table {
129            let call_site_record_reader = &mut reader;
130            unsafe {
131                // Offset of the call site relative to the previous call site, counted in number of 16-byte bundles
132                let call_site_start =
133                    read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
134                let call_site_length =
135                    read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
136                // Offset of the landing pad, typically a byte offset relative to the LPStart address.
137                let call_site_lpad =
138                    read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
139                // Offset of the first associated action record, relative to the start of the actions table.
140                // This value is biased by 1 (1 indicates the start of the actions table), and 0 indicates that there are no actions.
141                let call_site_action_entry = call_site_record_reader.read_uleb128();
142
143                log!("(pers) read cs_start is {call_site_start:?}");
144                log!("(pers) read cs_len is {call_site_length:?}");
145                log!("(pers) read cs_lpad is {call_site_lpad:?}");
146                log!("(pers) read cs_ae is {call_site_action_entry:?}");
147                // Callsite table is sorted by cs_start, so if we've passed the ip, we
148                // may stop searching.
149                if ip < func_start.wrapping_add(call_site_start) {
150                    break;
151                }
152
153                // Call site matches the current ip. It's a candidate.
154                if ip < func_start.wrapping_add(call_site_start + call_site_length) {
155                    log!(
156                        "(pers) found a matching call site: {func_start:?} <= {ip:?} <= {:?}",
157                        func_start.wrapping_add(call_site_start + call_site_length)
158                    );
159                    if call_site_lpad == 0 {
160                        return Ok(EHAction::None);
161                    } else {
162                        let lpad = lpad_base.wrapping_add(call_site_lpad);
163                        let mut catches = vec![];
164
165                        log!("(pers) lpad sits at {lpad:?}");
166
167                        if call_site_action_entry == 0 {
168                            // We don't generate cleanup clauses, so this can't happen
169                            return Ok(EHAction::Terminate);
170                        }
171
172                        log!("(pers) read cs_action_entry: {call_site_action_entry}");
173                        log!("(pers) action_table: {action_table:?}");
174
175                        // Convert 1-based byte offset into
176                        let mut action_record: *const u8 =
177                            action_table.wrapping_add((call_site_action_entry - 1) as usize);
178
179                        log!("(pers) first action at: {action_record:?}");
180
181                        loop {
182                            // Read the action record.
183                            let mut action_record_reader = DwarfReader::new(action_record);
184                            // The two record kinds have the same format, with only small differences.
185                            // They are distinguished by the "type_filter" field: Catch clauses have strictly positive switch values,
186                            // and exception specifications have strictly negative switch values. Value 0 indicates a catch-all clause.
187                            let type_filter = action_record_reader.read_sleb128();
188                            log!(
189                                "(pers) type_filter for action #{call_site_action_entry}: {type_filter:?}"
190                            );
191
192                            if type_filter > 0 {
193                                // This is a catch clause so the type_filter is an index into the types table.
194                                //
195                                // Positive value, starting at 1.
196                                // Index in the types table of the __typeinfo for the catch-clause type.
197                                // 1 is the first word preceding TTBase, 2 is the second word, and so on.
198                                // Used by the runtime to check if the thrown exception type matches the catch-clause type.
199                                let types_table_index = type_filter;
200                                if types_table_base.is_null() {
201                                    panic!();
202                                }
203
204                                let tag_ptr = {
205                                    let new_types_table_index =
206                                        match DwEhPe(types_table_encoding.0 & 0x0f) {
207                                            gimli::DW_EH_PE_absptr => {
208                                                type_filter * (size_of::<*const u8>() as i64)
209                                            }
210                                            gimli::DW_EH_PE_sdata2 | gimli::DW_EH_PE_udata2 => {
211                                                type_filter * 2
212                                            }
213                                            gimli::DW_EH_PE_sdata4 | gimli::DW_EH_PE_udata4 => {
214                                                type_filter * 4
215                                            }
216                                            gimli::DW_EH_PE_sdata8 | gimli::DW_EH_PE_udata8 => {
217                                                type_filter * 8
218                                            }
219                                            _ => panic!(),
220                                        };
221
222                                    log!(
223                                        "(pers) new_types_table_index for action #{call_site_action_entry}: {new_types_table_index:?}"
224                                    );
225
226                                    let typeinfo = types_table_base
227                                        .wrapping_sub(new_types_table_index as usize);
228                                    log!("(pers) reading ttype info from {typeinfo:?}");
229                                    read_encoded_pointer(
230                                        // Basically just reader.read() a SLEB128.
231                                        &mut DwarfReader::new(typeinfo),
232                                        context,
233                                        types_table_encoding,
234                                    )
235                                };
236                                let tag_ptr = tag_ptr.unwrap();
237
238                                if tag_ptr.is_null() {
239                                    if catches.is_empty() {
240                                        // No specifics so far, so we definitely have a catch-all we should use
241                                        return Ok(EHAction::CatchAll { lpad });
242                                    } else {
243                                        // We do have catch clauses that *may* need to be used, so we must
244                                        // defer to phase 2 anyway, but this catch-all will be used if
245                                        // none of those clauses match, so we can return early.
246                                        return Ok(EHAction::CatchSpecificOrAll {
247                                            lpad,
248                                            tags: catches,
249                                        });
250                                    }
251                                }
252
253                                let tag = std::mem::transmute::<*const u8, *const u32>(tag_ptr)
254                                    .read_unaligned();
255                                log!("(pers) read tag {tag:?}");
256
257                                // Since we don't know what this tag corresponds to, we must defer
258                                // the decision to the second phase.
259                                catches.push(tag);
260                            } else if type_filter == 0 {
261                                // We don't create cleanup clauses, so this can't happen
262                                return Ok(EHAction::Terminate);
263                            }
264
265                            let next_action_record = action_record_reader.clone().read_sleb128();
266                            if next_action_record == 0 {
267                                return Ok(if catches.is_empty() {
268                                    EHAction::None
269                                } else {
270                                    EHAction::CatchSpecific {
271                                        lpad,
272                                        tags: catches,
273                                    }
274                                });
275                            }
276
277                            action_record = action_record_reader
278                                .ptr
279                                .wrapping_add(next_action_record as usize);
280                        }
281                    }
282                }
283            }
284        }
285
286        // Ip is not present in the table. This indicates a nounwind call.
287        Ok(EHAction::Terminate)
288    } else {
289        todo!()
290    }
291}
292
293#[inline]
294fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
295    if align.is_power_of_two() {
296        Ok(unrounded.next_multiple_of(align))
297    } else {
298        Err(())
299    }
300}
301
302/// Reads an offset (`usize`) from `reader` whose encoding is described by `encoding`.
303///
304/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
305/// In addition the upper ("application") part must be zero.
306///
307/// # Errors
308/// Returns `Err` if `encoding`
309/// * is not a valid DWARF Exception Header Encoding,
310/// * is `DW_EH_PE_omit`, or
311/// * has a non-zero application part.
312///
313/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
314unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: DwEhPe) -> Result<usize, ()> {
315    if encoding == gimli::DW_EH_PE_omit || encoding.0 & 0xF0 != 0 {
316        return Err(());
317    }
318    let result = unsafe {
319        match DwEhPe(encoding.0 & 0x0F) {
320            // despite the name, LLVM also uses absptr for offsets instead of pointers
321            gimli::DW_EH_PE_absptr => reader.read::<usize>(),
322            gimli::DW_EH_PE_uleb128 => reader.read_uleb128() as usize,
323            gimli::DW_EH_PE_udata2 => reader.read::<u16>() as usize,
324            gimli::DW_EH_PE_udata4 => reader.read::<u32>() as usize,
325            gimli::DW_EH_PE_udata8 => reader.read::<u64>() as usize,
326            gimli::DW_EH_PE_sleb128 => reader.read_sleb128() as usize,
327            gimli::DW_EH_PE_sdata2 => reader.read::<i16>() as usize,
328            gimli::DW_EH_PE_sdata4 => reader.read::<i32>() as usize,
329            gimli::DW_EH_PE_sdata8 => reader.read::<i64>() as usize,
330            _ => return Err(()),
331        }
332    };
333    Ok(result)
334}
335
336/// Reads a pointer from `reader` whose encoding is described by `encoding`.
337///
338/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
339///
340/// # Errors
341/// Returns `Err` if `encoding`
342/// * is not a valid DWARF Exception Header Encoding,
343/// * is `DW_EH_PE_omit`, or
344/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding
345///   (not `DW_EH_PE_absptr`) in the value format part.
346///
347/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
348unsafe fn read_encoded_pointer(
349    reader: &mut DwarfReader,
350    context: &EHContext<'_>,
351    encoding: DwEhPe,
352) -> Result<*const u8, ()> {
353    if encoding == gimli::DW_EH_PE_omit {
354        return Err(());
355    }
356
357    log!("(pers) About to read encoded pointer at {:?}", reader.ptr);
358
359    let base_ptr = match DwEhPe(encoding.0 & 0x70) {
360        gimli::DW_EH_PE_absptr => {
361            log!("(pers) encoding is: DW_EH_PE_absptr");
362            core::ptr::null()
363        }
364        // relative to address of the encoded value, despite the name
365        gimli::DW_EH_PE_pcrel => {
366            log!("(pers) encoding is: DW_EH_PE_pcrel");
367            reader.ptr
368        }
369        gimli::DW_EH_PE_funcrel => {
370            log!("(pers) encoding is: DW_EH_PE_funcrel");
371            if context.func_start.is_null() {
372                return Err(());
373            }
374            context.func_start
375        }
376        gimli::DW_EH_PE_textrel => {
377            log!("(pers) encoding is: DW_EH_PE_textrel");
378            (*context.get_text_start)()
379        }
380        gimli::DW_EH_PE_datarel => {
381            log!("(pers) encoding is: DW_EH_PE_datarel");
382
383            (*context.get_data_start)()
384        }
385        // aligned means the value is aligned to the size of a pointer
386        gimli::DW_EH_PE_aligned => {
387            log!("(pers) encoding is: DW_EH_PE_aligned");
388            reader.ptr = {
389                let this = reader.ptr;
390                let addr = round_up(
391                    {
392                        let this = reader.ptr;
393                        unsafe { mem::transmute::<*const (), usize>(this.cast::<()>()) }
394                    },
395                    mem::size_of::<*const u8>(),
396                )?;
397                // In the mean-time, this operation is defined to be "as if" it was
398                // a wrapping_offset, so we can emulate it as such. This should properly
399                // restore pointer provenance even under today's compiler.
400                let self_addr = unsafe { mem::transmute::<*const (), isize>(this.cast::<()>()) };
401                let dest_addr = addr as isize;
402                let offset = dest_addr.wrapping_sub(self_addr);
403
404                // This is the canonical desugaring of this operation
405                this.wrapping_byte_offset(offset)
406            };
407            core::ptr::null()
408        }
409        _ => return Err(()),
410    };
411
412    let mut ptr = if base_ptr.is_null() {
413        // any value encoding other than absptr would be nonsensical here;
414        // there would be no source of pointer provenance
415        if DwEhPe(encoding.0 & 0x0f) != gimli::DW_EH_PE_absptr {
416            return Err(());
417        }
418        unsafe { reader.read::<*const u8>() }
419    } else {
420        log!("(pers) since base_ptr is not null, we must an offset");
421        let offset = unsafe { read_encoded_offset(reader, DwEhPe(encoding.0 & 0x0f))? };
422        log!("(pers) read offset is {offset:x?}");
423        base_ptr.wrapping_add(offset)
424    };
425
426    log!("(pers) about to read from {ptr:?}");
427
428    if encoding.0 & gimli::DW_EH_PE_indirect.0 != 0 {
429        ptr = unsafe { ptr.cast::<*const u8>().read_unaligned() };
430    }
431
432    log!("(pers) returning ptr value {ptr:?}");
433
434    Ok(ptr)
435}