wasmer_vm/libcalls/eh/dwarf/
eh.rs

1//! Parsing of GCC-style Language-Specific Data Area (LSDA)
2//! For details se*const ():
3//!  * <https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html>
4//!  * <https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html>
5//!  * <https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf>
6//!  * <https://www.airs.com/blog/archives/460>
7//!  * <https://www.airs.com/blog/archives/464>
8//!
9//! A reference implementation may be found in the GCC source tree
10//! (`<root>/libgcc/unwind-c.c` as of this writing).
11
12#![allow(non_upper_case_globals)]
13#![allow(clippy::transmutes_expressible_as_ptr_casts)]
14#![allow(clippy::comparison_chain)]
15#![allow(unused)]
16
17use core::{mem, ptr};
18
19use super::DwarfReader;
20
21pub const DW_EH_PE_omit: u8 = 0xFF;
22pub const DW_EH_PE_absptr: u8 = 0x00;
23
24pub const DW_EH_PE_uleb128: u8 = 0x01;
25pub const DW_EH_PE_udata2: u8 = 0x02;
26pub const DW_EH_PE_udata4: u8 = 0x03;
27pub const DW_EH_PE_udata8: u8 = 0x04;
28pub const DW_EH_PE_sleb128: u8 = 0x09;
29pub const DW_EH_PE_sdata2: u8 = 0x0A;
30pub const DW_EH_PE_sdata4: u8 = 0x0B;
31pub const DW_EH_PE_sdata8: u8 = 0x0C;
32
33pub const DW_EH_PE_pcrel: u8 = 0x10;
34pub const DW_EH_PE_textrel: u8 = 0x20;
35pub const DW_EH_PE_datarel: u8 = 0x30;
36pub const DW_EH_PE_funcrel: u8 = 0x40;
37pub const DW_EH_PE_aligned: u8 = 0x50;
38
39pub const DW_EH_PE_indirect: u8 = 0x80;
40
41#[derive(Copy, Clone)]
42pub struct EHContext<'a> {
43    pub ip: *const u8,                             // Current instruction pointer
44    pub func_start: *const u8,                     // Pointer to the current function
45    pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section
46    pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section
47}
48
49/// Landing pad.
50type LPad = *const u8;
51
52#[derive(Debug, Clone)]
53pub enum EHAction {
54    None,
55    CatchAll { lpad: LPad },
56    CatchSpecific { lpad: LPad, tags: Vec<u32> },
57    CatchSpecificOrAll { lpad: LPad, tags: Vec<u32> },
58    Terminate,
59}
60
61/// 32-bit ARM Darwin platforms uses SjLj exceptions.
62///
63/// The exception is watchOS armv7k (specifically that subarchitecture), which
64/// instead uses DWARF Call Frame Information (CFI) unwinding.
65///
66/// <https://github.com/llvm/llvm-project/blob/llvmorg-18.1.4/clang/lib/Driver/ToolChains/Darwin.cpp#L3107-L3119>
67pub const USING_SJLJ_EXCEPTIONS: bool = cfg!(all(
68    target_vendor = "apple",
69    not(target_os = "watchos"),
70    target_arch = "arm"
71));
72
73/* change to true to enable logging from the personality function */
74macro_rules! log {
75    ($e: expr_2021) => {
76        if false {
77            eprintln!($e)
78        }
79
80    };
81
82    ($($e: expr_2021),*) => {
83        if false {
84            eprintln!($($e),*)
85        }
86
87    };
88}
89
90pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result<EHAction, ()> {
91    if lsda.is_null() {
92        return Ok(EHAction::None);
93    }
94
95    log!("(pers) Analysing LSDA at {lsda:?}");
96
97    let func_start = context.func_start;
98    let mut reader = DwarfReader::new(lsda);
99
100    let lpad_base = unsafe {
101        let lp_start_encoding = reader.read::<u8>();
102
103        log!("(pers) Read LP start encoding {lp_start_encoding:?}");
104        // base address for landing pad offsets
105        if lp_start_encoding != DW_EH_PE_omit {
106            read_encoded_pointer(&mut reader, context, lp_start_encoding)?
107        } else {
108            log!("(pers) (is omit)");
109            func_start
110        }
111    };
112    log!("(pers) read landingpad base: {lpad_base:?}");
113
114    let ttype_encoding = unsafe { reader.read::<u8>() };
115    log!("(pers) read ttype encoding: {ttype_encoding:?}");
116
117    // If no value for type_table_encoding was given it means that there's no
118    // type_table, therefore we can't possibly use this lpad.
119    if ttype_encoding == DW_EH_PE_omit {
120        log!("(pers) ttype is omit, returning None");
121        return Ok(EHAction::None);
122    }
123
124    let class_info = unsafe {
125        let offset = reader.read_uleb128();
126        log!("(pers) read class_info offset {offset:?}");
127        reader.ptr.wrapping_add(offset as _)
128    };
129    log!("(pers) read class_info sits at offset {class_info:?}");
130
131    let call_site_encoding = unsafe { reader.read::<u8>() };
132    log!("(pers) read call_site_encoding is {call_site_encoding:?}");
133
134    let action_table = unsafe {
135        let call_site_table_length = reader.read_uleb128();
136        log!("(pers) read call_site has length {call_site_table_length:?}");
137        reader.ptr.wrapping_add(call_site_table_length as usize)
138    };
139
140    log!("(pers) action table sits at offset {action_table:?}");
141    let ip = context.ip;
142
143    if !USING_SJLJ_EXCEPTIONS {
144        // read the callsite table
145        while reader.ptr < action_table {
146            unsafe {
147                // these are offsets rather than pointers;
148                let cs_start = read_encoded_offset(&mut reader, call_site_encoding)?;
149                let cs_len = read_encoded_offset(&mut reader, call_site_encoding)?;
150                let cs_lpad = read_encoded_offset(&mut reader, call_site_encoding)?;
151                let cs_action_entry = reader.read_uleb128();
152
153                log!("(pers) read cs_start is {cs_start:?}");
154                log!("(pers) read cs_len is {cs_len:?}");
155                log!("(pers) read cs_lpad is {cs_lpad:?}");
156                log!("(pers) read cs_ae is {cs_action_entry:?}");
157                // Callsite table is sorted by cs_start, so if we've passed the ip, we
158                // may stop searching.
159                if ip < func_start.wrapping_add(cs_start) {
160                    break;
161                }
162
163                if ip < func_start.wrapping_add(cs_start + cs_len) {
164                    log!(
165                        "(pers) found a matching call site: {func_start:?} <= {ip:?} <= {:?}",
166                        func_start.wrapping_add(cs_start + cs_len)
167                    );
168                    if cs_lpad == 0 {
169                        return Ok(EHAction::None);
170                    } else {
171                        let lpad = lpad_base.wrapping_add(cs_lpad);
172                        let mut catches = vec![];
173
174                        log!("(pers) lpad sits at {lpad:?}");
175
176                        if cs_action_entry == 0 {
177                            // We don't generate cleanup clauses, so this can't happen
178                            return Ok(EHAction::Terminate);
179                        }
180
181                        log!("(pers) read cs_action_entry: {cs_action_entry}");
182                        log!("(pers) action_table: {action_table:?}");
183
184                        // Convert 1-based byte offset into
185                        let mut action: *const u8 =
186                            action_table.wrapping_add((cs_action_entry - 1) as usize);
187
188                        log!("(pers) first action at: {action:?}");
189
190                        loop {
191                            let mut reader = DwarfReader::new(action);
192                            let ttype_index = reader.read_sleb128();
193                            log!(
194                                "(pers) ttype_index for action #{cs_action_entry}: {ttype_index:?}"
195                            );
196
197                            if ttype_index > 0 {
198                                if class_info.is_null() {
199                                    panic!();
200                                }
201
202                                let tag_ptr = {
203                                    let new_ttype_index = match ttype_encoding & 0x0f {
204                                        DW_EH_PE_absptr => {
205                                            ttype_index * (size_of::<*const u8>() as i64)
206                                        }
207                                        DW_EH_PE_sdata2 | DW_EH_PE_udata2 => ttype_index * 2,
208                                        DW_EH_PE_sdata4 | DW_EH_PE_udata4 => ttype_index * 4,
209                                        DW_EH_PE_sdata8 | DW_EH_PE_udata8 => ttype_index * 8,
210                                        _ => panic!(),
211                                    };
212
213                                    log!(
214                                        "(pers) new_ttype_index for action #{cs_action_entry}: {new_ttype_index:?}"
215                                    );
216
217                                    let i = class_info.wrapping_sub(new_ttype_index as usize);
218                                    log!("(pers) reading ttype info from {i:?}");
219                                    read_encoded_pointer(
220                                        &mut DwarfReader::new(i),
221                                        context,
222                                        ttype_encoding,
223                                    )
224                                };
225                                let tag_ptr = tag_ptr.unwrap();
226
227                                if tag_ptr.is_null() {
228                                    if catches.is_empty() {
229                                        // No specifics so far, so we definitely have a catch-all we should use
230                                        return Ok(EHAction::CatchAll { lpad });
231                                    } else {
232                                        // We do have catch clauses that *may* need to be used, so we must
233                                        // defer to phase 2 anyway, but this catch-all will be used if
234                                        // none of those clauses match, so we can return early.
235                                        return Ok(EHAction::CatchSpecificOrAll {
236                                            lpad,
237                                            tags: catches,
238                                        });
239                                    }
240                                }
241
242                                let tag = std::mem::transmute::<*const u8, *const u32>(tag_ptr)
243                                    .read_unaligned();
244
245                                log!("(pers) read tag {tag:?}");
246
247                                // Since we don't know what this tag corresponds to, we must defer
248                                // the decision to the second phase.
249                                catches.push(tag);
250                            } else if ttype_index == 0 {
251                                // We don't create cleanup clauses, so this can't happen
252                                return Ok(EHAction::Terminate);
253                            }
254
255                            let action_offset = reader.clone().read_sleb128();
256                            if action_offset == 0 {
257                                return Ok(if catches.is_empty() {
258                                    EHAction::None
259                                } else {
260                                    EHAction::CatchSpecific {
261                                        lpad,
262                                        tags: catches,
263                                    }
264                                });
265                            }
266
267                            action = reader.ptr.wrapping_add(action_offset as usize);
268                        }
269                    }
270                }
271            }
272        }
273
274        // Ip is not present in the table. This indicates a nounwind call.
275        Ok(EHAction::Terminate)
276    } else {
277        todo!()
278    }
279}
280
281#[inline]
282fn get_encoding_size(encoding: u8) -> usize {
283    if encoding == DW_EH_PE_omit {
284        return 0;
285    }
286
287    match encoding & 0x0f {
288        DW_EH_PE_absptr => size_of::<usize>(),
289        DW_EH_PE_udata2 | DW_EH_PE_sdata2 => size_of::<u16>(),
290        DW_EH_PE_udata4 | DW_EH_PE_sdata4 => size_of::<u32>(),
291        DW_EH_PE_udata8 | DW_EH_PE_sdata8 => size_of::<u64>(),
292        _ => panic!(),
293    }
294}
295
296#[inline]
297fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
298    if align.is_power_of_two() {
299        Ok((unrounded + align - 1) & !(align - 1))
300    } else {
301        Err(())
302    }
303}
304
305/// Reads an offset (`usize`) from `reader` whose encoding is described by `encoding`.
306///
307/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
308/// In addition the upper ("application") part must be zero.
309///
310/// # Errors
311/// Returns `Err` if `encoding`
312/// * is not a valid DWARF Exception Header Encoding,
313/// * is `DW_EH_PE_omit`, or
314/// * has a non-zero application part.
315///
316/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
317unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: u8) -> Result<usize, ()> {
318    if encoding == DW_EH_PE_omit || encoding & 0xF0 != 0 {
319        return Err(());
320    }
321    let result = unsafe {
322        match encoding & 0x0F {
323            // despite the name, LLVM also uses absptr for offsets instead of pointers
324            DW_EH_PE_absptr => reader.read::<usize>(),
325            DW_EH_PE_uleb128 => reader.read_uleb128() as usize,
326            DW_EH_PE_udata2 => reader.read::<u16>() as usize,
327            DW_EH_PE_udata4 => reader.read::<u32>() as usize,
328            DW_EH_PE_udata8 => reader.read::<u64>() as usize,
329            DW_EH_PE_sleb128 => reader.read_sleb128() as usize,
330            DW_EH_PE_sdata2 => reader.read::<i16>() as usize,
331            DW_EH_PE_sdata4 => reader.read::<i32>() as usize,
332            DW_EH_PE_sdata8 => reader.read::<i64>() as usize,
333            _ => return Err(()),
334        }
335    };
336    Ok(result)
337}
338
339/// Reads a pointer from `reader` whose encoding is described by `encoding`.
340///
341/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
342///
343/// # Errors
344/// Returns `Err` if `encoding`
345/// * is not a valid DWARF Exception Header Encoding,
346/// * is `DW_EH_PE_omit`, or
347/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding
348///   (not `DW_EH_PE_absptr`) in the value format part.
349///
350/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
351unsafe fn read_encoded_pointer(
352    reader: &mut DwarfReader,
353    context: &EHContext<'_>,
354    encoding: u8,
355) -> Result<*const u8, ()> {
356    if encoding == DW_EH_PE_omit {
357        return Err(());
358    }
359
360    log!("(pers) About to read encoded pointer at {:?}", reader.ptr);
361
362    let base_ptr = match encoding & 0x70 {
363        DW_EH_PE_absptr => {
364            log!("(pers) encoding is: DW_EH_PE_absptr ({DW_EH_PE_absptr})");
365            core::ptr::null()
366        }
367        // relative to address of the encoded value, despite the name
368        DW_EH_PE_pcrel => {
369            log!("(pers) encoding is: DW_EH_PE_pcrel ({DW_EH_PE_pcrel})");
370            reader.ptr
371        }
372        DW_EH_PE_funcrel => {
373            log!("(pers) encoding is: DW_EH_PE_funcrel ({DW_EH_PE_funcrel})");
374            if context.func_start.is_null() {
375                return Err(());
376            }
377            context.func_start
378        }
379        DW_EH_PE_textrel => {
380            log!("(pers) encoding is: DW_EH_PE_textrel ({DW_EH_PE_textrel})");
381            (*context.get_text_start)()
382        }
383        DW_EH_PE_datarel => {
384            log!("(pers) encoding is: DW_EH_PE_textrel ({DW_EH_PE_datarel})");
385
386            (*context.get_data_start)()
387        }
388        // aligned means the value is aligned to the size of a pointer
389        DW_EH_PE_aligned => {
390            log!("(pers) encoding is: DW_EH_PE_textrel ({DW_EH_PE_aligned})");
391            reader.ptr = {
392                let this = reader.ptr;
393                let addr = round_up(
394                    {
395                        let this = reader.ptr;
396                        unsafe { mem::transmute::<*const (), usize>(this.cast::<()>()) }
397                    },
398                    mem::size_of::<*const u8>(),
399                )?;
400                // In the mean-time, this operation is defined to be "as if" it was
401                // a wrapping_offset, so we can emulate it as such. This should properly
402                // restore pointer provenance even under today's compiler.
403                let self_addr = unsafe { mem::transmute::<*const (), isize>(this.cast::<()>()) };
404                let dest_addr = addr as isize;
405                let offset = dest_addr.wrapping_sub(self_addr);
406
407                // This is the canonical desugaring of this operation
408                this.wrapping_byte_offset(offset)
409            };
410            core::ptr::null()
411        }
412        _ => return Err(()),
413    };
414
415    let mut ptr = if base_ptr.is_null() {
416        // any value encoding other than absptr would be nonsensical here;
417        // there would be no source of pointer provenance
418        if encoding & 0x0F != DW_EH_PE_absptr {
419            return Err(());
420        }
421        unsafe { reader.read::<*const u8>() }
422    } else {
423        log!("(pers) since base_ptr is not null, we must an offset");
424        let offset = unsafe { read_encoded_offset(reader, encoding & 0x0F)? };
425        log!("(pers) read offset is {offset:x?}");
426        base_ptr.wrapping_add(offset)
427    };
428
429    log!("(pers) about to read from {ptr:?}");
430
431    if encoding & DW_EH_PE_indirect != 0 {
432        ptr = unsafe { ptr.cast::<*const u8>().read_unaligned() };
433    }
434
435    log!("(pers) returning ptr value {ptr:?}");
436
437    Ok(ptr)
438}