wasmer_vm/libcalls/eh/dwarf/eh.rs
1//! Parsing of GCC-style Language-Specific Data Area (LSDA)
2//! For details se*const ():
3//! * <https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html>
4//! * <https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html>
5//! * <https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf>
6//! * <https://www.airs.com/blog/archives/460>
7//! * <https://www.airs.com/blog/archives/464>
8//!
9//! A reference implementation may be found in the GCC source tree
10//! (`<root>/libgcc/unwind-c.c` as of this writing).
11
12#![allow(non_upper_case_globals)]
13#![allow(clippy::transmutes_expressible_as_ptr_casts)]
14#![allow(clippy::comparison_chain)]
15#![allow(unused)]
16
17use core::{mem, ptr};
18
19use gimli::DwEhPe;
20
21use super::DwarfReader;
22
23#[derive(Copy, Clone)]
24pub struct EHContext<'a> {
25 pub ip: *const u8, // Current instruction pointer
26 pub func_start: *const u8, // Pointer to the current function
27 pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section
28 pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section
29}
30
31/// Landing pad.
32type LPad = *const u8;
33
34#[derive(Debug, Clone)]
35pub enum EHAction {
36 None,
37 CatchAll { lpad: LPad },
38 CatchSpecific { lpad: LPad, tags: Vec<u32> },
39 CatchSpecificOrAll { lpad: LPad, tags: Vec<u32> },
40 Terminate,
41}
42
43/// 32-bit ARM Darwin platforms uses SjLj exceptions.
44///
45/// The exception is watchOS armv7k (specifically that subarchitecture), which
46/// instead uses DWARF Call Frame Information (CFI) unwinding.
47///
48/// <https://github.com/llvm/llvm-project/blob/llvmorg-18.1.4/clang/lib/Driver/ToolChains/Darwin.cpp#L3107-L3119>
49pub const USING_SJLJ_EXCEPTIONS: bool = cfg!(all(
50 target_vendor = "apple",
51 not(target_os = "watchos"),
52 target_arch = "arm"
53));
54
55/* change to true to enable logging from the personality function */
56macro_rules! log {
57 ($e: expr) => {
58 if false {
59 eprintln!($e)
60 }
61
62 };
63
64 ($($e: expr),*) => {
65 if false {
66 eprintln!($($e),*)
67 }
68
69 };
70}
71
72pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result<EHAction, ()> {
73 if lsda.is_null() {
74 log!("(pers) LSDA is null for IP {:?}", context.ip);
75 return Ok(EHAction::None);
76 }
77
78 log!("(pers) Analysing LSDA at {lsda:?}");
79
80 let func_start = context.func_start;
81 let mut reader = DwarfReader::new(lsda);
82
83 let lpad_start_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
84 log!("(pers) Read LP start encoding {lpad_start_encoding:?}");
85
86 let lpad_base = unsafe {
87 // base address for landing pad offsets
88 if lpad_start_encoding != gimli::DW_EH_PE_omit {
89 read_encoded_pointer(&mut reader, context, lpad_start_encoding)?
90 } else {
91 log!("(pers) (is omit)");
92 func_start
93 }
94 };
95 log!("(pers) read landingpad base: {lpad_base:?}");
96
97 let types_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
98 log!("(pers) read ttype encoding: {types_table_encoding:?}");
99
100 // If no value for types_table_encoding was given it means that there's no
101 // types_table, therefore we can't possibly use this lpad.
102 if types_table_encoding == gimli::DW_EH_PE_omit {
103 log!("(pers) ttype is omit, returning None");
104 return Ok(EHAction::None);
105 }
106
107 let types_table_base_offset = unsafe { reader.read_uleb128() };
108
109 let types_table_base = unsafe {
110 log!("(pers) read class_info offset {types_table_base_offset:?}");
111 reader.ptr.wrapping_add(types_table_base_offset as _)
112 };
113 log!("(pers) read types_table_base sits at offset {types_table_base:?}");
114
115 let call_site_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
116 log!("(pers) read call_site_table_encoding is {call_site_table_encoding:?}");
117
118 let call_site_table_size = unsafe { reader.read_uleb128() };
119 let action_table = unsafe {
120 log!("(pers) read call_site has length {call_site_table_size:?}");
121 reader.ptr.wrapping_add(call_site_table_size as usize)
122 };
123
124 log!("(pers) action table sits at offset {action_table:?}");
125 let ip = context.ip;
126
127 if !USING_SJLJ_EXCEPTIONS {
128 // read the callsite table
129 while reader.ptr < action_table {
130 let call_site_record_reader = &mut reader;
131 unsafe {
132 // Offset of the call site relative to the previous call site, counted in number of 16-byte bundles
133 let call_site_start =
134 read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
135 let call_site_length =
136 read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
137 // Offset of the landing pad, typically a byte offset relative to the LPStart address.
138 let call_site_lpad =
139 read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
140 // Offset of the first associated action record, relative to the start of the actions table.
141 // This value is biased by 1 (1 indicates the start of the actions table), and 0 indicates that there are no actions.
142 let call_site_action_entry = call_site_record_reader.read_uleb128();
143
144 log!("(pers) read cs_start is {call_site_start:?}");
145 log!("(pers) read cs_len is {call_site_length:?}");
146 log!("(pers) read cs_lpad is {call_site_lpad:?}");
147 log!("(pers) read cs_ae is {call_site_action_entry:?}");
148 // Callsite table is sorted by cs_start, so if we've passed the ip, we
149 // may stop searching.
150 if ip < func_start.wrapping_add(call_site_start) {
151 break;
152 }
153
154 // Call site matches the current ip. It's a candidate.
155 if ip < func_start.wrapping_add(call_site_start + call_site_length) {
156 log!(
157 "(pers) found a matching call site: {func_start:?} <= {ip:?} <= {:?}",
158 func_start.wrapping_add(call_site_start + call_site_length)
159 );
160 if call_site_lpad == 0 {
161 return Ok(EHAction::None);
162 } else {
163 let lpad = lpad_base.wrapping_add(call_site_lpad);
164 let mut catches = vec![];
165
166 log!("(pers) lpad sits at {lpad:?}");
167
168 if call_site_action_entry == 0 {
169 // We don't generate cleanup clauses, so this can't happen
170 return Ok(EHAction::Terminate);
171 }
172
173 log!("(pers) read cs_action_entry: {call_site_action_entry}");
174 log!("(pers) action_table: {action_table:?}");
175
176 // Convert 1-based byte offset into
177 let mut action_record: *const u8 =
178 action_table.wrapping_add((call_site_action_entry - 1) as usize);
179
180 log!("(pers) first action at: {action_record:?}");
181
182 loop {
183 // Read the action record.
184 let mut action_record_reader = DwarfReader::new(action_record);
185 // The two record kinds have the same format, with only small differences.
186 // They are distinguished by the "type_filter" field: Catch clauses have strictly positive switch values,
187 // and exception specifications have strictly negative switch values. Value 0 indicates a catch-all clause.
188 let type_filter = action_record_reader.read_sleb128();
189 log!(
190 "(pers) type_filter for action #{call_site_action_entry}: {type_filter:?}"
191 );
192
193 if type_filter > 0 {
194 // This is a catch clause so the type_filter is an index into the types table.
195 //
196 // Positive value, starting at 1.
197 // Index in the types table of the __typeinfo for the catch-clause type.
198 // 1 is the first word preceding TTBase, 2 is the second word, and so on.
199 // Used by the runtime to check if the thrown exception type matches the catch-clause type.
200 let types_table_index = type_filter;
201 if types_table_base.is_null() {
202 panic!();
203 }
204
205 let tag_ptr = {
206 let new_types_table_index =
207 match DwEhPe(types_table_encoding.0 & 0x0f) {
208 gimli::DW_EH_PE_absptr => {
209 type_filter * (size_of::<*const u8>() as i64)
210 }
211 gimli::DW_EH_PE_sdata2 | gimli::DW_EH_PE_udata2 => {
212 type_filter * 2
213 }
214 gimli::DW_EH_PE_sdata4 | gimli::DW_EH_PE_udata4 => {
215 type_filter * 4
216 }
217 gimli::DW_EH_PE_sdata8 | gimli::DW_EH_PE_udata8 => {
218 type_filter * 8
219 }
220 _ => panic!(),
221 };
222
223 log!(
224 "(pers) new_types_table_index for action #{call_site_action_entry}: {new_types_table_index:?}"
225 );
226
227 let typeinfo = types_table_base
228 .wrapping_sub(new_types_table_index as usize);
229 log!("(pers) reading ttype info from {typeinfo:?}");
230 read_encoded_pointer(
231 // Basically just reader.read() a SLEB128.
232 &mut DwarfReader::new(typeinfo),
233 context,
234 types_table_encoding,
235 )
236 };
237 let tag_ptr = tag_ptr.unwrap();
238
239 if tag_ptr.is_null() {
240 if catches.is_empty() {
241 // No specifics so far, so we definitely have a catch-all we should use
242 return Ok(EHAction::CatchAll { lpad });
243 } else {
244 // We do have catch clauses that *may* need to be used, so we must
245 // defer to phase 2 anyway, but this catch-all will be used if
246 // none of those clauses match, so we can return early.
247 return Ok(EHAction::CatchSpecificOrAll {
248 lpad,
249 tags: catches,
250 });
251 }
252 }
253
254 let tag = std::mem::transmute::<*const u8, *const u32>(tag_ptr)
255 .read_unaligned();
256 log!("(pers) read tag {tag:?}");
257
258 // Since we don't know what this tag corresponds to, we must defer
259 // the decision to the second phase.
260 catches.push(tag);
261 } else if type_filter == 0 {
262 // We don't create cleanup clauses, so this can't happen
263 return Ok(EHAction::Terminate);
264 }
265
266 let next_action_record = action_record_reader.clone().read_sleb128();
267 if next_action_record == 0 {
268 return Ok(if catches.is_empty() {
269 EHAction::None
270 } else {
271 EHAction::CatchSpecific {
272 lpad,
273 tags: catches,
274 }
275 });
276 }
277
278 action_record = action_record_reader
279 .ptr
280 .wrapping_add(next_action_record as usize);
281 }
282 }
283 }
284 }
285 }
286
287 // Ip is not present in the table. This indicates a nounwind call.
288 Ok(EHAction::Terminate)
289 } else {
290 todo!()
291 }
292}
293
294#[inline]
295fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
296 if align.is_power_of_two() {
297 Ok(unrounded.next_multiple_of(align))
298 } else {
299 Err(())
300 }
301}
302
303/// Reads an offset (`usize`) from `reader` whose encoding is described by `encoding`.
304///
305/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
306/// In addition the upper ("application") part must be zero.
307///
308/// # Errors
309/// Returns `Err` if `encoding`
310/// * is not a valid DWARF Exception Header Encoding,
311/// * is `DW_EH_PE_omit`, or
312/// * has a non-zero application part.
313///
314/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
315unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: DwEhPe) -> Result<usize, ()> {
316 if encoding == gimli::DW_EH_PE_omit || encoding.0 & 0xF0 != 0 {
317 return Err(());
318 }
319 let result = unsafe {
320 match DwEhPe(encoding.0 & 0x0F) {
321 // despite the name, LLVM also uses absptr for offsets instead of pointers
322 gimli::DW_EH_PE_absptr => reader.read::<usize>(),
323 gimli::DW_EH_PE_uleb128 => reader.read_uleb128() as usize,
324 gimli::DW_EH_PE_udata2 => reader.read::<u16>() as usize,
325 gimli::DW_EH_PE_udata4 => reader.read::<u32>() as usize,
326 gimli::DW_EH_PE_udata8 => reader.read::<u64>() as usize,
327 gimli::DW_EH_PE_sleb128 => reader.read_sleb128() as usize,
328 gimli::DW_EH_PE_sdata2 => reader.read::<i16>() as usize,
329 gimli::DW_EH_PE_sdata4 => reader.read::<i32>() as usize,
330 gimli::DW_EH_PE_sdata8 => reader.read::<i64>() as usize,
331 _ => return Err(()),
332 }
333 };
334 Ok(result)
335}
336
337/// Reads a pointer from `reader` whose encoding is described by `encoding`.
338///
339/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
340///
341/// # Errors
342/// Returns `Err` if `encoding`
343/// * is not a valid DWARF Exception Header Encoding,
344/// * is `DW_EH_PE_omit`, or
345/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding
346/// (not `DW_EH_PE_absptr`) in the value format part.
347///
348/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
349unsafe fn read_encoded_pointer(
350 reader: &mut DwarfReader,
351 context: &EHContext<'_>,
352 encoding: DwEhPe,
353) -> Result<*const u8, ()> {
354 if encoding == gimli::DW_EH_PE_omit {
355 return Err(());
356 }
357
358 log!("(pers) About to read encoded pointer at {:?}", reader.ptr);
359
360 let base_ptr = match DwEhPe(encoding.0 & 0x70) {
361 gimli::DW_EH_PE_absptr => {
362 log!("(pers) encoding is: DW_EH_PE_absptr");
363 core::ptr::null()
364 }
365 // relative to address of the encoded value, despite the name
366 gimli::DW_EH_PE_pcrel => {
367 log!("(pers) encoding is: DW_EH_PE_pcrel");
368 reader.ptr
369 }
370 gimli::DW_EH_PE_funcrel => {
371 log!("(pers) encoding is: DW_EH_PE_funcrel");
372 if context.func_start.is_null() {
373 return Err(());
374 }
375 context.func_start
376 }
377 gimli::DW_EH_PE_textrel => {
378 log!("(pers) encoding is: DW_EH_PE_textrel");
379 (*context.get_text_start)()
380 }
381 gimli::DW_EH_PE_datarel => {
382 log!("(pers) encoding is: DW_EH_PE_datarel");
383
384 (*context.get_data_start)()
385 }
386 // aligned means the value is aligned to the size of a pointer
387 gimli::DW_EH_PE_aligned => {
388 log!("(pers) encoding is: DW_EH_PE_aligned");
389 reader.ptr = {
390 let this = reader.ptr;
391 let addr = round_up(
392 {
393 let this = reader.ptr;
394 unsafe { mem::transmute::<*const (), usize>(this.cast::<()>()) }
395 },
396 mem::size_of::<*const u8>(),
397 )?;
398 // In the mean-time, this operation is defined to be "as if" it was
399 // a wrapping_offset, so we can emulate it as such. This should properly
400 // restore pointer provenance even under today's compiler.
401 let self_addr = unsafe { mem::transmute::<*const (), isize>(this.cast::<()>()) };
402 let dest_addr = addr as isize;
403 let offset = dest_addr.wrapping_sub(self_addr);
404
405 // This is the canonical desugaring of this operation
406 this.wrapping_byte_offset(offset)
407 };
408 core::ptr::null()
409 }
410 _ => return Err(()),
411 };
412
413 let mut ptr = if base_ptr.is_null() {
414 // any value encoding other than absptr would be nonsensical here;
415 // there would be no source of pointer provenance
416 if DwEhPe(encoding.0 & 0x0f) != gimli::DW_EH_PE_absptr {
417 return Err(());
418 }
419 unsafe { reader.read::<*const u8>() }
420 } else {
421 log!("(pers) since base_ptr is not null, we must an offset");
422 let offset = unsafe { read_encoded_offset(reader, DwEhPe(encoding.0 & 0x0f))? };
423 log!("(pers) read offset is {offset:x?}");
424 // For relative encodings, a raw zero denotes a null pointer. Do not
425 // apply the base or indirect dereference in that case.
426 // Upstream implementation: https://github.com/llvm/llvm-project/blob/main/libcxxabi/src/cxa_personality.cpp#L341-L342.
427 if offset == 0 {
428 core::ptr::null()
429 } else {
430 base_ptr.wrapping_add(offset)
431 }
432 };
433
434 log!("(pers) about to read from {ptr:?}");
435
436 if !ptr.is_null() && encoding.0 & gimli::DW_EH_PE_indirect.0 != 0 {
437 ptr = unsafe { ptr.cast::<*const u8>().read_unaligned() };
438 }
439
440 log!("(pers) returning ptr value {ptr:?}");
441
442 Ok(ptr)
443}