1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
//! Parsing of GCC-style Language-Specific Data Area (LSDA)
//! For details se*const ():
//!  * <https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html>
//!  * <https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html>
//!  * <https://itanium-cxx-abi.github.io/cxx-abi/exceptions.pdf>
//!  * <https://www.airs.com/blog/archives/460>
//!  * <https://www.airs.com/blog/archives/464>
//!
//! A reference implementation may be found in the GCC source tree
//! (`<root>/libgcc/unwind-c.c` as of this writing).

#![allow(non_upper_case_globals)]
#![allow(clippy::transmutes_expressible_as_ptr_casts)]
#![allow(clippy::comparison_chain)]
#![allow(unused)]

use core::{mem, ptr};

use super::DwarfReader;

pub const DW_EH_PE_omit: u8 = 0xFF;
pub const DW_EH_PE_absptr: u8 = 0x00;

pub const DW_EH_PE_uleb128: u8 = 0x01;
pub const DW_EH_PE_udata2: u8 = 0x02;
pub const DW_EH_PE_udata4: u8 = 0x03;
pub const DW_EH_PE_udata8: u8 = 0x04;
pub const DW_EH_PE_sleb128: u8 = 0x09;
pub const DW_EH_PE_sdata2: u8 = 0x0A;
pub const DW_EH_PE_sdata4: u8 = 0x0B;
pub const DW_EH_PE_sdata8: u8 = 0x0C;

pub const DW_EH_PE_pcrel: u8 = 0x10;
pub const DW_EH_PE_textrel: u8 = 0x20;
pub const DW_EH_PE_datarel: u8 = 0x30;
pub const DW_EH_PE_funcrel: u8 = 0x40;
pub const DW_EH_PE_aligned: u8 = 0x50;

pub const DW_EH_PE_indirect: u8 = 0x80;

#[derive(Copy, Clone)]
pub struct EHContext<'a> {
    pub ip: *const u8,                             // Current instruction pointer
    pub func_start: *const u8,                     // Pointer to the current function
    pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section
    pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section
    pub tag: u64,                                  // The tag associated with the WasmerException
}

/// Landing pad.
type LPad = *const u8;

#[derive(Debug, Clone)]
pub enum EHAction {
    None,
    Cleanup(LPad),
    Catch { lpad: LPad, tag: u64 },
    Filter { lpad: LPad, tag: u64 },
    Terminate,
}

/// 32-bit ARM Darwin platforms uses SjLj exceptions.
///
/// The exception is watchOS armv7k (specifically that subarchitecture), which
/// instead uses DWARF Call Frame Information (CFI) unwinding.
///
/// <https://github.com/llvm/llvm-project/blob/llvmorg-18.1.4/clang/lib/Driver/ToolChains/Darwin.cpp#L3107-L3119>
pub const USING_SJLJ_EXCEPTIONS: bool = cfg!(all(
    target_vendor = "apple",
    not(target_os = "watchos"),
    target_arch = "arm"
));

/* change to true to enable logging from the personality function */
macro_rules! log {
    ($e: expr) => {
        if false {
            eprintln!($e)
        }

    };

    ($($e: expr),*) => {
        if false {
            eprintln!($($e),*)
        }

    };
}

pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result<EHAction, ()> {
    if lsda.is_null() {
        return Ok(EHAction::None);
    }

    log!("(pers) Analysing LSDA at {lsda:?}");

    let func_start = context.func_start;
    let mut reader = DwarfReader::new(lsda);

    let lpad_base = unsafe {
        let lp_start_encoding = reader.read::<u8>();

        log!("(pers) Read LP start encoding {lp_start_encoding:?}");
        // base address for landing pad offsets
        if lp_start_encoding != DW_EH_PE_omit {
            read_encoded_pointer(&mut reader, context, lp_start_encoding)?
        } else {
            log!("(pers) (is omit)");
            func_start
        }
    };
    log!("(pers) read landingpad base: {lpad_base:?}");

    let ttype_encoding = unsafe { reader.read::<u8>() };
    log!("(pers) read ttype encoding: {ttype_encoding:?}");

    // If no value for type_table_encoding was given it means that there's no
    // type_table, therefore we can't possibly use this lpad.
    if ttype_encoding == DW_EH_PE_omit {
        log!("(pers) ttype is omit, returning None");
        return Ok(EHAction::None);
    }

    let class_info = unsafe {
        let offset = reader.read_uleb128();
        log!("(pers) read class_info offset {offset:?}");
        reader.ptr.wrapping_add(offset as _)
    };
    log!("(pers) read class_info sits at offset {class_info:?}");

    let call_site_encoding = unsafe { reader.read::<u8>() };
    log!("(pers) read call_site_encoding is {call_site_encoding:?}");

    let action_table = unsafe {
        let call_site_table_length = reader.read_uleb128();
        log!("(pers) read call_site has length {call_site_table_length:?}");
        reader.ptr.wrapping_add(call_site_table_length as usize)
    };

    log!("(pers) action table sits at offset {action_table:?}");
    let ip = context.ip;

    if !USING_SJLJ_EXCEPTIONS {
        // read the callsite table
        while reader.ptr < action_table {
            unsafe {
                // these are offsets rather than pointers;
                let cs_start = read_encoded_offset(&mut reader, call_site_encoding)?;
                let cs_len = read_encoded_offset(&mut reader, call_site_encoding)?;
                let cs_lpad = read_encoded_offset(&mut reader, call_site_encoding)?;
                let cs_action_entry = reader.read_uleb128();

                log!("(pers) read cs_start is {cs_start:?}");
                log!("(pers) read cs_len is {cs_len:?}");
                log!("(pers) read cs_lpad is {cs_lpad:?}");
                log!("(pers) read cs_ae is {cs_action_entry:?}");
                // Callsite table is sorted by cs_start, so if we've passed the ip, we
                // may stop searching.
                if ip < func_start.wrapping_add(cs_start) {
                    break;
                }

                if ip < func_start.wrapping_add(cs_start + cs_len) {
                    log!(
                        "(pers) found a matching call site: {func_start:?} <= {ip:?} <= {:?}",
                        func_start.wrapping_add(cs_start + cs_len)
                    );
                    if cs_lpad == 0 {
                        return Ok(EHAction::None);
                    } else {
                        let lpad = lpad_base.wrapping_add(cs_lpad);

                        log!("(pers) lpad sits at {lpad:?}");

                        if cs_action_entry == 0 {
                            return Ok(EHAction::Cleanup(lpad));
                        }

                        log!("(pers) read cs_action_entry: {cs_action_entry}");
                        log!("(pers) action_table: {action_table:?}");

                        // Convert 1-based byte offset into
                        let mut action: *const u8 =
                            action_table.wrapping_add((cs_action_entry - 1) as usize);

                        log!("(pers) first action at: {action:?}");

                        loop {
                            let mut reader = DwarfReader::new(action);
                            let ttype_index = reader.read_sleb128();
                            log!(
                                "(pers) ttype_index for action #{cs_action_entry}: {ttype_index:?}"
                            );

                            if ttype_index > 0 {
                                if class_info.is_null() {
                                    panic!();
                                }

                                let tag_ptr = {
                                    let new_ttype_index = match ttype_encoding & 0x0f {
                                        DW_EH_PE_absptr => {
                                            ttype_index * (size_of::<*const u8>() as i64)
                                        }
                                        DW_EH_PE_sdata2 | DW_EH_PE_udata2 => ttype_index * 2,
                                        DW_EH_PE_sdata4 | DW_EH_PE_udata4 => ttype_index * 4,
                                        DW_EH_PE_sdata8 | DW_EH_PE_udata8 => ttype_index * 8,
                                        _ => panic!(),
                                    };

                                    log!("(pers) new_ttype_index for action #{cs_action_entry}: {new_ttype_index:?}");

                                    let i = class_info.wrapping_sub(new_ttype_index as usize);
                                    log!("(pers) reading ttype info from {i:?}");
                                    read_encoded_pointer(
                                        &mut DwarfReader::new(i),
                                        context,
                                        ttype_encoding,
                                    )
                                };
                                let tag_ptr = tag_ptr.unwrap();

                                if tag_ptr.is_null() {
                                    return Ok(EHAction::Catch { lpad, tag: 0 });
                                }

                                let tag = std::mem::transmute::<*const u8, *const u64>(tag_ptr)
                                    .read_unaligned();

                                if context.tag == tag {
                                    return Ok(EHAction::Catch { lpad, tag });
                                }
                            } else if ttype_index == 0 {
                                return Ok(EHAction::Cleanup(lpad));
                            }

                            let action_offset = reader.clone().read_sleb128();
                            if action_offset == 0 {
                                return Ok(EHAction::None);
                            }

                            action = reader.ptr.wrapping_add(action_offset as usize);
                        }
                    }
                }
            }
        }

        // Ip is not present in the table. This indicates a nounwind call.
        Ok(EHAction::Terminate)
    } else {
        todo!()
    }
}

#[inline]
fn get_encoding_size(encoding: u8) -> usize {
    if encoding == DW_EH_PE_omit {
        return 0;
    }

    match encoding & 0x0f {
        DW_EH_PE_absptr => size_of::<usize>(),
        DW_EH_PE_udata2 | DW_EH_PE_sdata2 => size_of::<u16>(),
        DW_EH_PE_udata4 | DW_EH_PE_sdata4 => size_of::<u32>(),
        DW_EH_PE_udata8 | DW_EH_PE_sdata8 => size_of::<u64>(),
        _ => panic!(),
    }
}

#[inline]
fn round_up(unrounded: usize, align: usize) -> Result<usize, ()> {
    if align.is_power_of_two() {
        Ok((unrounded + align - 1) & !(align - 1))
    } else {
        Err(())
    }
}

/// Reads an offset (`usize`) from `reader` whose encoding is described by `encoding`.
///
/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
/// In addition the upper ("application") part must be zero.
///
/// # Errors
/// Returns `Err` if `encoding`
/// * is not a valid DWARF Exception Header Encoding,
/// * is `DW_EH_PE_omit`, or
/// * has a non-zero application part.
///
/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: u8) -> Result<usize, ()> {
    if encoding == DW_EH_PE_omit || encoding & 0xF0 != 0 {
        return Err(());
    }
    let result = unsafe {
        match encoding & 0x0F {
            // despite the name, LLVM also uses absptr for offsets instead of pointers
            DW_EH_PE_absptr => reader.read::<usize>(),
            DW_EH_PE_uleb128 => reader.read_uleb128() as usize,
            DW_EH_PE_udata2 => reader.read::<u16>() as usize,
            DW_EH_PE_udata4 => reader.read::<u32>() as usize,
            DW_EH_PE_udata8 => reader.read::<u64>() as usize,
            DW_EH_PE_sleb128 => reader.read_sleb128() as usize,
            DW_EH_PE_sdata2 => reader.read::<i16>() as usize,
            DW_EH_PE_sdata4 => reader.read::<i32>() as usize,
            DW_EH_PE_sdata8 => reader.read::<i64>() as usize,
            _ => return Err(()),
        }
    };
    Ok(result)
}

/// Reads a pointer from `reader` whose encoding is described by `encoding`.
///
/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext].
///
/// # Errors
/// Returns `Err` if `encoding`
/// * is not a valid DWARF Exception Header Encoding,
/// * is `DW_EH_PE_omit`, or
/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding
///   (not `DW_EH_PE_absptr`) in the value format part.
///
/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html
unsafe fn read_encoded_pointer(
    reader: &mut DwarfReader,
    context: &EHContext<'_>,
    encoding: u8,
) -> Result<*const u8, ()> {
    if encoding == DW_EH_PE_omit {
        return Err(());
    }

    log!("(pers) About to read encoded pointer at {:?}", reader.ptr);

    let base_ptr = match encoding & 0x70 {
        DW_EH_PE_absptr => {
            log!("(pers) encoding is: DW_EH_PE_absptr ({DW_EH_PE_absptr})");
            core::ptr::null()
        }
        // relative to address of the encoded value, despite the name
        DW_EH_PE_pcrel => {
            log!("(pers) encoding is: DW_EH_PE_pcrel ({DW_EH_PE_pcrel})");
            reader.ptr
        }
        DW_EH_PE_funcrel => {
            log!("(pers) encoding is: DW_EH_PE_funcrel ({DW_EH_PE_funcrel})");
            if context.func_start.is_null() {
                return Err(());
            }
            context.func_start
        }
        DW_EH_PE_textrel => {
            log!("(pers) encoding is: DW_EH_PE_textrel ({DW_EH_PE_textrel})");
            (*context.get_text_start)()
        }
        DW_EH_PE_datarel => {
            log!("(pers) encoding is: DW_EH_PE_textrel ({DW_EH_PE_datarel})");

            (*context.get_data_start)()
        }
        // aligned means the value is aligned to the size of a pointer
        DW_EH_PE_aligned => {
            log!("(pers) encoding is: DW_EH_PE_textrel ({DW_EH_PE_aligned})");
            reader.ptr = {
                let this = reader.ptr;
                let addr = round_up(
                    {
                        let this = reader.ptr;
                        unsafe { mem::transmute::<*const (), usize>(this.cast::<()>()) }
                    },
                    mem::size_of::<*const u8>(),
                )?;
                // In the mean-time, this operation is defined to be "as if" it was
                // a wrapping_offset, so we can emulate it as such. This should properly
                // restore pointer provenance even under today's compiler.
                let self_addr = unsafe { mem::transmute::<*const (), isize>(this.cast::<()>()) };
                let dest_addr = addr as isize;
                let offset = dest_addr.wrapping_sub(self_addr);

                // This is the canonical desugaring of this operation
                this.wrapping_byte_offset(offset)
            };
            core::ptr::null()
        }
        _ => return Err(()),
    };

    let mut ptr = if base_ptr.is_null() {
        // any value encoding other than absptr would be nonsensical here;
        // there would be no source of pointer provenance
        if encoding & 0x0F != DW_EH_PE_absptr {
            return Err(());
        }
        unsafe { reader.read::<*const u8>() }
    } else {
        log!("(pers) since base_ptr is not null, we must an offset");
        let offset = unsafe { read_encoded_offset(reader, encoding & 0x0F)? };
        log!("(pers) read offset is {offset:x?}");
        base_ptr.wrapping_add(offset)
    };

    log!("(pers) about to read from {ptr:?}");

    if encoding & DW_EH_PE_indirect != 0 {
        ptr = unsafe { ptr.cast::<*const u8>().read_unaligned() };
    }

    log!("(pers) returning ptr value {ptr:?}");

    Ok(ptr)
}