wasmer_vm/libcalls/eh/dwarf/
eh.rs

1//! Parsing of GCC-style Language-Specific Data Area (LSDA)
2//! For details se*const ():
3//!  * <https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html>
4//!  * <https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html>
5//!  * <https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf>
6//!  * <https://www.airs.com/blog/archives/460>
7//!  * <https://www.airs.com/blog/archives/464>
8//!
9//! A reference implementation may be found in the GCC source tree
10//! (`<root>/libgcc/unwind-c.c` as of this writing).
11
12#![allow(non_upper_case_globals)]
13#![allow(clippy::transmutes_expressible_as_ptr_casts)]
14#![allow(clippy::comparison_chain)]
15#![allow(unused)]
16
17use core::{mem, ptr};
18
19use gimli::DwEhPe;
20
21use super::DwarfReader;
22
23#[derive(Copy, Clone)]
24pub struct EHContext<'a> {
25    pub ip: *const u8,                             // Current instruction pointer
26    pub func_start: *const u8,                     // Pointer to the current function
27    pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section
28    pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section
29}
30
31/// Landing pad.
32type LPad = *const u8;
33
34#[derive(Debug, Clone)]
35pub enum EHAction {
36    None,
37    CatchAll { lpad: LPad },
38    CatchSpecific { lpad: LPad, tags: Vec<u32> },
39    CatchSpecificOrAll { lpad: LPad, tags: Vec<u32> },
40    Terminate,
41}
42
43/// 32-bit ARM Darwin platforms uses SjLj exceptions.
44///
45/// The exception is watchOS armv7k (specifically that subarchitecture), which
46/// instead uses DWARF Call Frame Information (CFI) unwinding.
47///
48/// <https://github.com/llvm/llvm-project/blob/llvmorg-18.1.4/clang/lib/Driver/ToolChains/Darwin.cpp#L3107-L3119>
49pub const USING_SJLJ_EXCEPTIONS: bool = cfg!(all(
50    target_vendor = "apple",
51    not(target_os = "watchos"),
52    target_arch = "arm"
53));
54
55/* change to true to enable logging from the personality function */
56macro_rules! log {
57    ($e: expr) => {
58        if false {
59            eprintln!($e)
60        }
61
62    };
63
64    ($($e: expr),*) => {
65        if false {
66            eprintln!($($e),*)
67        }
68
69    };
70}
71
72pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result<EHAction, ()> {
73    if lsda.is_null() {
74        log!("(pers) LSDA is null for IP {:?}", context.ip);
75        return Ok(EHAction::None);
76    }
77
78    log!("(pers) Analysing LSDA at {lsda:?}");
79
80    let func_start = context.func_start;
81    let mut reader = DwarfReader::new(lsda);
82
83    let lpad_start_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
84    log!("(pers) Read LP start encoding {lpad_start_encoding:?}");
85
86    let lpad_base = unsafe {
87        // base address for landing pad offsets
88        if lpad_start_encoding != gimli::DW_EH_PE_omit {
89            read_encoded_pointer(&mut reader, context, lpad_start_encoding)?
90        } else {
91            log!("(pers) (is omit)");
92            func_start
93        }
94    };
95    log!("(pers) read landingpad base: {lpad_base:?}");
96
97    let types_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
98    log!("(pers) read ttype encoding: {types_table_encoding:?}");
99
100    // If no value for types_table_encoding was given it means that there's no
101    // types_table, therefore we can't possibly use this lpad.
102    if types_table_encoding == gimli::DW_EH_PE_omit {
103        log!("(pers) ttype is omit, returning None");
104        return Ok(EHAction::None);
105    }
106
107    let types_table_base_offset = unsafe { reader.read_uleb128() };
108
109    let types_table_base = unsafe {
110        log!("(pers) read class_info offset {types_table_base_offset:?}");
111        reader.ptr.wrapping_add(types_table_base_offset as _)
112    };
113    log!("(pers) read types_table_base sits at offset {types_table_base:?}");
114
115    let call_site_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
116    log!("(pers) read call_site_table_encoding is {call_site_table_encoding:?}");
117
118    let call_site_table_size = unsafe { reader.read_uleb128() };
119    let action_table = unsafe {
120        log!("(pers) read call_site has length {call_site_table_size:?}");
121        reader.ptr.wrapping_add(call_site_table_size as usize)
122    };
123
124    log!("(pers) action table sits at offset {action_table:?}");
125    let ip = context.ip;
126
127    if !USING_SJLJ_EXCEPTIONS {
128        // read the callsite table
129        while reader.ptr < action_table {
130            let call_site_record_reader = &mut reader;
131            unsafe {
132                // Offset of the call site relative to the previous call site, counted in number of 16-byte bundles
133                let call_site_start =
134                    read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
135                let call_site_length =
136                    read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
137                // Offset of the landing pad, typically a byte offset relative to the LPStart address.
138                let call_site_lpad =
139                    read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
140                // Offset of the first associated action record, relative to the start of the actions table.
141                // This value is biased by 1 (1 indicates the start of the actions table), and 0 indicates that there are no actions.
142                let call_site_action_entry = call_site_record_reader.read_uleb128();
143
144                log!("(pers) read cs_start is {call_site_start:?}");
145                log!("(pers) read cs_len is {call_site_length:?}");
146                log!("(pers) read cs_lpad is {call_site_lpad:?}");
147                log!("(pers) read cs_ae is {call_site_action_entry:?}");
148                // Callsite table is sorted by cs_start, so if we've passed the ip, we
149                // may stop searching.
150                if ip < func_start.wrapping_add(call_site_start) {
151                    break;
152                }
153
154                // Call site matches the current ip. It's a candidate.
155                if ip < func_start.wrapping_add(call_site_start + call_site_length) {
156                    log!(
157                        "(pers) found a matching call site: {func_start:?} <= {ip:?} <= {:?}",
158                        func_start.wrapping_add(call_site_start + call_site_length)
159                    );
160                    if call_site_lpad == 0 {
161                        return Ok(EHAction::None);
162                    } else {
163                        let lpad = lpad_base.wrapping_add(call_site_lpad);
164                        let mut catches = vec![];
165
166                        log!("(pers) lpad sits at {lpad:?}");
167
168                        if call_site_action_entry == 0 {
169                            // We don't generate cleanup clauses, so this can't happen
170                            return Ok(EHAction::Terminate);
171                        }
172
173                        log!("(pers) read cs_action_entry: {call_site_action_entry}");
174                        log!("(pers) action_table: {action_table:?}");
175
176                        // Convert 1-based byte offset into
177                        let mut action_record: *const u8 =
178                            action_table.wrapping_add((call_site_action_entry - 1) as usize);
179
180                        log!("(pers) first action at: {action_record:?}");
181
182                        loop {
183                            // Read the action record.
184                            let mut action_record_reader = DwarfReader::new(action_record);
185                            // The two record kinds have the same format, with only small differences.
186                            // They are distinguished by the "type_filter" field: Catch clauses have strictly positive switch values,
187                            // and exception specifications have strictly negative switch values. Value 0 indicates a catch-all clause.
188                            let type_filter = action_record_reader.read_sleb128();
189                            log!(
190                                "(pers) type_filter for action #{call_site_action_entry}: {type_filter:?}"
191                            );
192
193                            if type_filter > 0 {
194                                // This is a catch clause so the type_filter is an index into the types table.
195                                //
196                                // Positive value, starting at 1.
197                                // Index in the types table of the __typeinfo for the catch-clause type.
198                                // 1 is the first word preceding TTBase, 2 is the second word, and so on.
199                                // Used by the runtime to check if the thrown exception type matches the catch-clause type.
200                                let types_table_index = type_filter;
201                                if types_table_base.is_null() {
202                                    panic!();
203                                }
204
205                                let tag_ptr = {
206                                    let new_types_table_index =
207                                        match DwEhPe(types_table_encoding.0 & 0x0f) {
208                                            gimli::DW_EH_PE_absptr => {
209                                                type_filter * (size_of::<*const u8>() as i64)
210                                            }
211                                            gimli::DW_EH_PE_sdata2 | gimli::DW_EH_PE_udata2 => {
212                                                type_filter * 2
213                                            }
214                                            gimli::DW_EH_PE_sdata4 | gimli::DW_EH_PE_udata4 => {
215                                                type_filter * 4
216                                            }
217                                            gimli::DW_EH_PE_sdata8 | gimli::DW_EH_PE_udata8 => {
218                                                type_filter * 8
219                                            }
220                                            _ => panic!(),
221                                        };
222
223                                    log!(
224                                        "(pers) new_types_table_index for action #{call_site_action_entry}: {new_types_table_index:?}"
225                                    );
226
227                                    let typeinfo = types_table_base
228                                        .wrapping_sub(new_types_table_index as usize);
229                                    log!("(pers) reading ttype info from {typeinfo:?}");
230                                    read_encoded_pointer(
231                                        // Basically just reader.read() a SLEB128.
232                                        &mut DwarfReader::new(typeinfo),
233                                        context,
234                                        types_table_encoding,
235                                    )
236                                };
237                                let tag_ptr = tag_ptr.unwrap();
238
239                                if tag_ptr.is_null() {
240                                    if catches.is_empty() {
241                                        // No specifics so far, so we definitely have a catch-all we should use
242                                        return Ok(EHAction::CatchAll { lpad });
243                                    } else {
244                                        // We do have catch clauses that *may* need to be used, so we must
245                                        // defer to phase 2 anyway, but this catch-all will be used if
246                                        // none of those clauses match, so we can return early.
247                                        return Ok(EHAction::CatchSpecificOrAll {
248                                            lpad,
249                                            tags: catches,
250                                        });
251                                    }
252                                }
253
254                                let tag = std::mem::transmute::<*const u8, *const u32>(tag_ptr)
255                                    .read_unaligned();
256                                log!("(pers) read tag {tag:?}");
257
258                                // Since we don't know what this tag corresponds to, we must defer
259                                // the decision to the second phase.
260                                catches.push(tag);
261                            } else if type_filter == 0 {
262                                // We don't create cleanup clauses, so this can't happen
263                                return Ok(EHAction::Terminate);
264                            }
265
266                            let next_action_record = action_record_reader.clone().read_sleb128();
267                            if next_action_record == 0 {
268                                return Ok(if catches.is_empty() {
269                                    EHAction::None
270                                } else {
271                                    EHAction::CatchSpecific {
272                                        lpad,
273                                        tags: catches,
274                                    }
275                                });
276                            }
277
278                            action_record = action_record_reader
279                                .ptr
280                                .wrapping_add(next_action_record as usize);
281                        }
282                    }
283                }
284            }
285        }
286
287        // Ip is not present in the table. This indicates a nounwind call.
288        Ok(EHAction::Terminate)
289    } else {
290        todo!()
291    }
292}
293
294#[inline]
295fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
296    if align.is_power_of_two() {
297        Ok(unrounded.next_multiple_of(align))
298    } else {
299        Err(())
300    }
301}
302
303/// Reads an offset (`usize`) from `reader` whose encoding is described by `encoding`.
304///
305/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
306/// In addition the upper ("application") part must be zero.
307///
308/// # Errors
309/// Returns `Err` if `encoding`
310/// * is not a valid DWARF Exception Header Encoding,
311/// * is `DW_EH_PE_omit`, or
312/// * has a non-zero application part.
313///
314/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
315unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: DwEhPe) -> Result<usize, ()> {
316    if encoding == gimli::DW_EH_PE_omit || encoding.0 & 0xF0 != 0 {
317        return Err(());
318    }
319    let result = unsafe {
320        match DwEhPe(encoding.0 & 0x0F) {
321            // despite the name, LLVM also uses absptr for offsets instead of pointers
322            gimli::DW_EH_PE_absptr => reader.read::<usize>(),
323            gimli::DW_EH_PE_uleb128 => reader.read_uleb128() as usize,
324            gimli::DW_EH_PE_udata2 => reader.read::<u16>() as usize,
325            gimli::DW_EH_PE_udata4 => reader.read::<u32>() as usize,
326            gimli::DW_EH_PE_udata8 => reader.read::<u64>() as usize,
327            gimli::DW_EH_PE_sleb128 => reader.read_sleb128() as usize,
328            gimli::DW_EH_PE_sdata2 => reader.read::<i16>() as usize,
329            gimli::DW_EH_PE_sdata4 => reader.read::<i32>() as usize,
330            gimli::DW_EH_PE_sdata8 => reader.read::<i64>() as usize,
331            _ => return Err(()),
332        }
333    };
334    Ok(result)
335}
336
337/// Reads a pointer from `reader` whose encoding is described by `encoding`.
338///
339/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
340///
341/// # Errors
342/// Returns `Err` if `encoding`
343/// * is not a valid DWARF Exception Header Encoding,
344/// * is `DW_EH_PE_omit`, or
345/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding
346///   (not `DW_EH_PE_absptr`) in the value format part.
347///
348/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
349unsafe fn read_encoded_pointer(
350    reader: &mut DwarfReader,
351    context: &EHContext<'_>,
352    encoding: DwEhPe,
353) -> Result<*const u8, ()> {
354    if encoding == gimli::DW_EH_PE_omit {
355        return Err(());
356    }
357
358    log!("(pers) About to read encoded pointer at {:?}", reader.ptr);
359
360    let base_ptr = match DwEhPe(encoding.0 & 0x70) {
361        gimli::DW_EH_PE_absptr => {
362            log!("(pers) encoding is: DW_EH_PE_absptr");
363            core::ptr::null()
364        }
365        // relative to address of the encoded value, despite the name
366        gimli::DW_EH_PE_pcrel => {
367            log!("(pers) encoding is: DW_EH_PE_pcrel");
368            reader.ptr
369        }
370        gimli::DW_EH_PE_funcrel => {
371            log!("(pers) encoding is: DW_EH_PE_funcrel");
372            if context.func_start.is_null() {
373                return Err(());
374            }
375            context.func_start
376        }
377        gimli::DW_EH_PE_textrel => {
378            log!("(pers) encoding is: DW_EH_PE_textrel");
379            (*context.get_text_start)()
380        }
381        gimli::DW_EH_PE_datarel => {
382            log!("(pers) encoding is: DW_EH_PE_datarel");
383
384            (*context.get_data_start)()
385        }
386        // aligned means the value is aligned to the size of a pointer
387        gimli::DW_EH_PE_aligned => {
388            log!("(pers) encoding is: DW_EH_PE_aligned");
389            reader.ptr = {
390                let this = reader.ptr;
391                let addr = round_up(
392                    {
393                        let this = reader.ptr;
394                        unsafe { mem::transmute::<*const (), usize>(this.cast::<()>()) }
395                    },
396                    mem::size_of::<*const u8>(),
397                )?;
398                // In the mean-time, this operation is defined to be "as if" it was
399                // a wrapping_offset, so we can emulate it as such. This should properly
400                // restore pointer provenance even under today's compiler.
401                let self_addr = unsafe { mem::transmute::<*const (), isize>(this.cast::<()>()) };
402                let dest_addr = addr as isize;
403                let offset = dest_addr.wrapping_sub(self_addr);
404
405                // This is the canonical desugaring of this operation
406                this.wrapping_byte_offset(offset)
407            };
408            core::ptr::null()
409        }
410        _ => return Err(()),
411    };
412
413    let mut ptr = if base_ptr.is_null() {
414        // any value encoding other than absptr would be nonsensical here;
415        // there would be no source of pointer provenance
416        if DwEhPe(encoding.0 & 0x0f) != gimli::DW_EH_PE_absptr {
417            return Err(());
418        }
419        unsafe { reader.read::<*const u8>() }
420    } else {
421        log!("(pers) since base_ptr is not null, we must an offset");
422        let offset = unsafe { read_encoded_offset(reader, DwEhPe(encoding.0 & 0x0f))? };
423        log!("(pers) read offset is {offset:x?}");
424        // For relative encodings, a raw zero denotes a null pointer. Do not
425        // apply the base or indirect dereference in that case.
426        // Upstream implementation: https://github.com/llvm/llvm-project/blob/main/libcxxabi/src/cxa_personality.cpp#L341-L342.
427        if offset == 0 {
428            core::ptr::null()
429        } else {
430            base_ptr.wrapping_add(offset)
431        }
432    };
433
434    log!("(pers) about to read from {ptr:?}");
435
436    if !ptr.is_null() && encoding.0 & gimli::DW_EH_PE_indirect.0 != 0 {
437        ptr = unsafe { ptr.cast::<*const u8>().read_unaligned() };
438    }
439
440    log!("(pers) returning ptr value {ptr:?}");
441
442    Ok(ptr)
443}