wasmer_vm/libcalls/eh/dwarf/eh.rs
1//! Parsing of GCC-style Language-Specific Data Area (LSDA)
2//! For details se*const ():
3//! * <https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html>
4//! * <https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html>
5//! * <https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf>
6//! * <https://www.airs.com/blog/archives/460>
7//! * <https://www.airs.com/blog/archives/464>
8//!
9//! A reference implementation may be found in the GCC source tree
10//! (`<root>/libgcc/unwind-c.c` as of this writing).
11
12#![allow(non_upper_case_globals)]
13#![allow(clippy::transmutes_expressible_as_ptr_casts)]
14#![allow(clippy::comparison_chain)]
15#![allow(unused)]
16
17use core::{mem, ptr};
18
19use gimli::DwEhPe;
20
21use super::DwarfReader;
22
23#[derive(Copy, Clone)]
24pub struct EHContext<'a> {
25 pub ip: *const u8, // Current instruction pointer
26 pub func_start: *const u8, // Pointer to the current function
27 pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section
28 pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section
29}
30
31/// Landing pad.
32type LPad = *const u8;
33
34#[derive(Debug, Clone)]
35pub enum EHAction {
36 None,
37 CatchAll { lpad: LPad },
38 CatchSpecific { lpad: LPad, tags: Vec<u32> },
39 CatchSpecificOrAll { lpad: LPad, tags: Vec<u32> },
40 Terminate,
41}
42
43/// 32-bit ARM Darwin platforms uses SjLj exceptions.
44///
45/// The exception is watchOS armv7k (specifically that subarchitecture), which
46/// instead uses DWARF Call Frame Information (CFI) unwinding.
47///
48/// <https://github.com/llvm/llvm-project/blob/llvmorg-18.1.4/clang/lib/Driver/ToolChains/Darwin.cpp#L3107-L3119>
49pub const USING_SJLJ_EXCEPTIONS: bool = cfg!(all(
50 target_vendor = "apple",
51 not(target_os = "watchos"),
52 target_arch = "arm"
53));
54
55/* change to true to enable logging from the personality function */
56macro_rules! log {
57 ($e: expr) => {
58 if false {
59 eprintln!($e)
60 }
61
62 };
63
64 ($($e: expr),*) => {
65 if false {
66 eprintln!($($e),*)
67 }
68
69 };
70}
71
72pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result<EHAction, ()> {
73 if lsda.is_null() {
74 return Ok(EHAction::None);
75 }
76
77 log!("(pers) Analysing LSDA at {lsda:?}");
78
79 let func_start = context.func_start;
80 let mut reader = DwarfReader::new(lsda);
81
82 let lpad_start_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
83 log!("(pers) Read LP start encoding {lpad_start_encoding:?}");
84
85 let lpad_base = unsafe {
86 // base address for landing pad offsets
87 if lpad_start_encoding != gimli::DW_EH_PE_omit {
88 read_encoded_pointer(&mut reader, context, lpad_start_encoding)?
89 } else {
90 log!("(pers) (is omit)");
91 func_start
92 }
93 };
94 log!("(pers) read landingpad base: {lpad_base:?}");
95
96 let types_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
97 log!("(pers) read ttype encoding: {types_table_encoding:?}");
98
99 // If no value for types_table_encoding was given it means that there's no
100 // types_table, therefore we can't possibly use this lpad.
101 if types_table_encoding == gimli::DW_EH_PE_omit {
102 log!("(pers) ttype is omit, returning None");
103 return Ok(EHAction::None);
104 }
105
106 let types_table_base_offset = unsafe { reader.read_uleb128() };
107
108 let types_table_base = unsafe {
109 log!("(pers) read class_info offset {types_table_base_offset:?}");
110 reader.ptr.wrapping_add(types_table_base_offset as _)
111 };
112 log!("(pers) read types_table_base sits at offset {types_table_base:?}");
113
114 let call_site_table_encoding = unsafe { DwEhPe(reader.read::<u8>()) };
115 log!("(pers) read call_site_table_encoding is {call_site_table_encoding:?}");
116
117 let call_site_table_size = unsafe { reader.read_uleb128() };
118 let action_table = unsafe {
119 log!("(pers) read call_site has length {call_site_table_size:?}");
120 reader.ptr.wrapping_add(call_site_table_size as usize)
121 };
122
123 log!("(pers) action table sits at offset {action_table:?}");
124 let ip = context.ip;
125
126 if !USING_SJLJ_EXCEPTIONS {
127 // read the callsite table
128 while reader.ptr < action_table {
129 let call_site_record_reader = &mut reader;
130 unsafe {
131 // Offset of the call site relative to the previous call site, counted in number of 16-byte bundles
132 let call_site_start =
133 read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
134 let call_site_length =
135 read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
136 // Offset of the landing pad, typically a byte offset relative to the LPStart address.
137 let call_site_lpad =
138 read_encoded_offset(call_site_record_reader, call_site_table_encoding)?;
139 // Offset of the first associated action record, relative to the start of the actions table.
140 // This value is biased by 1 (1 indicates the start of the actions table), and 0 indicates that there are no actions.
141 let call_site_action_entry = call_site_record_reader.read_uleb128();
142
143 log!("(pers) read cs_start is {call_site_start:?}");
144 log!("(pers) read cs_len is {call_site_length:?}");
145 log!("(pers) read cs_lpad is {call_site_lpad:?}");
146 log!("(pers) read cs_ae is {call_site_action_entry:?}");
147 // Callsite table is sorted by cs_start, so if we've passed the ip, we
148 // may stop searching.
149 if ip < func_start.wrapping_add(call_site_start) {
150 break;
151 }
152
153 // Call site matches the current ip. It's a candidate.
154 if ip < func_start.wrapping_add(call_site_start + call_site_length) {
155 log!(
156 "(pers) found a matching call site: {func_start:?} <= {ip:?} <= {:?}",
157 func_start.wrapping_add(call_site_start + call_site_length)
158 );
159 if call_site_lpad == 0 {
160 return Ok(EHAction::None);
161 } else {
162 let lpad = lpad_base.wrapping_add(call_site_lpad);
163 let mut catches = vec![];
164
165 log!("(pers) lpad sits at {lpad:?}");
166
167 if call_site_action_entry == 0 {
168 // We don't generate cleanup clauses, so this can't happen
169 return Ok(EHAction::Terminate);
170 }
171
172 log!("(pers) read cs_action_entry: {call_site_action_entry}");
173 log!("(pers) action_table: {action_table:?}");
174
175 // Convert 1-based byte offset into
176 let mut action_record: *const u8 =
177 action_table.wrapping_add((call_site_action_entry - 1) as usize);
178
179 log!("(pers) first action at: {action_record:?}");
180
181 loop {
182 // Read the action record.
183 let mut action_record_reader = DwarfReader::new(action_record);
184 // The two record kinds have the same format, with only small differences.
185 // They are distinguished by the "type_filter" field: Catch clauses have strictly positive switch values,
186 // and exception specifications have strictly negative switch values. Value 0 indicates a catch-all clause.
187 let type_filter = action_record_reader.read_sleb128();
188 log!(
189 "(pers) type_filter for action #{call_site_action_entry}: {type_filter:?}"
190 );
191
192 if type_filter > 0 {
193 // This is a catch clause so the type_filter is an index into the types table.
194 //
195 // Positive value, starting at 1.
196 // Index in the types table of the __typeinfo for the catch-clause type.
197 // 1 is the first word preceding TTBase, 2 is the second word, and so on.
198 // Used by the runtime to check if the thrown exception type matches the catch-clause type.
199 let types_table_index = type_filter;
200 if types_table_base.is_null() {
201 panic!();
202 }
203
204 let tag_ptr = {
205 let new_types_table_index =
206 match DwEhPe(types_table_encoding.0 & 0x0f) {
207 gimli::DW_EH_PE_absptr => {
208 type_filter * (size_of::<*const u8>() as i64)
209 }
210 gimli::DW_EH_PE_sdata2 | gimli::DW_EH_PE_udata2 => {
211 type_filter * 2
212 }
213 gimli::DW_EH_PE_sdata4 | gimli::DW_EH_PE_udata4 => {
214 type_filter * 4
215 }
216 gimli::DW_EH_PE_sdata8 | gimli::DW_EH_PE_udata8 => {
217 type_filter * 8
218 }
219 _ => panic!(),
220 };
221
222 log!(
223 "(pers) new_types_table_index for action #{call_site_action_entry}: {new_types_table_index:?}"
224 );
225
226 let typeinfo = types_table_base
227 .wrapping_sub(new_types_table_index as usize);
228 log!("(pers) reading ttype info from {typeinfo:?}");
229 read_encoded_pointer(
230 // Basically just reader.read() a SLEB128.
231 &mut DwarfReader::new(typeinfo),
232 context,
233 types_table_encoding,
234 )
235 };
236 let tag_ptr = tag_ptr.unwrap();
237
238 if tag_ptr.is_null() {
239 if catches.is_empty() {
240 // No specifics so far, so we definitely have a catch-all we should use
241 return Ok(EHAction::CatchAll { lpad });
242 } else {
243 // We do have catch clauses that *may* need to be used, so we must
244 // defer to phase 2 anyway, but this catch-all will be used if
245 // none of those clauses match, so we can return early.
246 return Ok(EHAction::CatchSpecificOrAll {
247 lpad,
248 tags: catches,
249 });
250 }
251 }
252
253 let tag = std::mem::transmute::<*const u8, *const u32>(tag_ptr)
254 .read_unaligned();
255 log!("(pers) read tag {tag:?}");
256
257 // Since we don't know what this tag corresponds to, we must defer
258 // the decision to the second phase.
259 catches.push(tag);
260 } else if type_filter == 0 {
261 // We don't create cleanup clauses, so this can't happen
262 return Ok(EHAction::Terminate);
263 }
264
265 let next_action_record = action_record_reader.clone().read_sleb128();
266 if next_action_record == 0 {
267 return Ok(if catches.is_empty() {
268 EHAction::None
269 } else {
270 EHAction::CatchSpecific {
271 lpad,
272 tags: catches,
273 }
274 });
275 }
276
277 action_record = action_record_reader
278 .ptr
279 .wrapping_add(next_action_record as usize);
280 }
281 }
282 }
283 }
284 }
285
286 // Ip is not present in the table. This indicates a nounwind call.
287 Ok(EHAction::Terminate)
288 } else {
289 todo!()
290 }
291}
292
293#[inline]
294fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
295 if align.is_power_of_two() {
296 Ok(unrounded.next_multiple_of(align))
297 } else {
298 Err(())
299 }
300}
301
302/// Reads an offset (`usize`) from `reader` whose encoding is described by `encoding`.
303///
304/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
305/// In addition the upper ("application") part must be zero.
306///
307/// # Errors
308/// Returns `Err` if `encoding`
309/// * is not a valid DWARF Exception Header Encoding,
310/// * is `DW_EH_PE_omit`, or
311/// * has a non-zero application part.
312///
313/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
314unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: DwEhPe) -> Result<usize, ()> {
315 if encoding == gimli::DW_EH_PE_omit || encoding.0 & 0xF0 != 0 {
316 return Err(());
317 }
318 let result = unsafe {
319 match DwEhPe(encoding.0 & 0x0F) {
320 // despite the name, LLVM also uses absptr for offsets instead of pointers
321 gimli::DW_EH_PE_absptr => reader.read::<usize>(),
322 gimli::DW_EH_PE_uleb128 => reader.read_uleb128() as usize,
323 gimli::DW_EH_PE_udata2 => reader.read::<u16>() as usize,
324 gimli::DW_EH_PE_udata4 => reader.read::<u32>() as usize,
325 gimli::DW_EH_PE_udata8 => reader.read::<u64>() as usize,
326 gimli::DW_EH_PE_sleb128 => reader.read_sleb128() as usize,
327 gimli::DW_EH_PE_sdata2 => reader.read::<i16>() as usize,
328 gimli::DW_EH_PE_sdata4 => reader.read::<i32>() as usize,
329 gimli::DW_EH_PE_sdata8 => reader.read::<i64>() as usize,
330 _ => return Err(()),
331 }
332 };
333 Ok(result)
334}
335
336/// Reads a pointer from `reader` whose encoding is described by `encoding`.
337///
338/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
339///
340/// # Errors
341/// Returns `Err` if `encoding`
342/// * is not a valid DWARF Exception Header Encoding,
343/// * is `DW_EH_PE_omit`, or
344/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding
345/// (not `DW_EH_PE_absptr`) in the value format part.
346///
347/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
348unsafe fn read_encoded_pointer(
349 reader: &mut DwarfReader,
350 context: &EHContext<'_>,
351 encoding: DwEhPe,
352) -> Result<*const u8, ()> {
353 if encoding == gimli::DW_EH_PE_omit {
354 return Err(());
355 }
356
357 log!("(pers) About to read encoded pointer at {:?}", reader.ptr);
358
359 let base_ptr = match DwEhPe(encoding.0 & 0x70) {
360 gimli::DW_EH_PE_absptr => {
361 log!("(pers) encoding is: DW_EH_PE_absptr");
362 core::ptr::null()
363 }
364 // relative to address of the encoded value, despite the name
365 gimli::DW_EH_PE_pcrel => {
366 log!("(pers) encoding is: DW_EH_PE_pcrel");
367 reader.ptr
368 }
369 gimli::DW_EH_PE_funcrel => {
370 log!("(pers) encoding is: DW_EH_PE_funcrel");
371 if context.func_start.is_null() {
372 return Err(());
373 }
374 context.func_start
375 }
376 gimli::DW_EH_PE_textrel => {
377 log!("(pers) encoding is: DW_EH_PE_textrel");
378 (*context.get_text_start)()
379 }
380 gimli::DW_EH_PE_datarel => {
381 log!("(pers) encoding is: DW_EH_PE_datarel");
382
383 (*context.get_data_start)()
384 }
385 // aligned means the value is aligned to the size of a pointer
386 gimli::DW_EH_PE_aligned => {
387 log!("(pers) encoding is: DW_EH_PE_aligned");
388 reader.ptr = {
389 let this = reader.ptr;
390 let addr = round_up(
391 {
392 let this = reader.ptr;
393 unsafe { mem::transmute::<*const (), usize>(this.cast::<()>()) }
394 },
395 mem::size_of::<*const u8>(),
396 )?;
397 // In the mean-time, this operation is defined to be "as if" it was
398 // a wrapping_offset, so we can emulate it as such. This should properly
399 // restore pointer provenance even under today's compiler.
400 let self_addr = unsafe { mem::transmute::<*const (), isize>(this.cast::<()>()) };
401 let dest_addr = addr as isize;
402 let offset = dest_addr.wrapping_sub(self_addr);
403
404 // This is the canonical desugaring of this operation
405 this.wrapping_byte_offset(offset)
406 };
407 core::ptr::null()
408 }
409 _ => return Err(()),
410 };
411
412 let mut ptr = if base_ptr.is_null() {
413 // any value encoding other than absptr would be nonsensical here;
414 // there would be no source of pointer provenance
415 if DwEhPe(encoding.0 & 0x0f) != gimli::DW_EH_PE_absptr {
416 return Err(());
417 }
418 unsafe { reader.read::<*const u8>() }
419 } else {
420 log!("(pers) since base_ptr is not null, we must an offset");
421 let offset = unsafe { read_encoded_offset(reader, DwEhPe(encoding.0 & 0x0f))? };
422 log!("(pers) read offset is {offset:x?}");
423 base_ptr.wrapping_add(offset)
424 };
425
426 log!("(pers) about to read from {ptr:?}");
427
428 if encoding.0 & gimli::DW_EH_PE_indirect.0 != 0 {
429 ptr = unsafe { ptr.cast::<*const u8>().read_unaligned() };
430 }
431
432 log!("(pers) returning ptr value {ptr:?}");
433
434 Ok(ptr)
435}