wasmer_compiler_llvm/
config.rs

1use crate::compiler::LLVMCompiler;
2pub use inkwell::OptimizationLevel as LLVMOptLevel;
3use inkwell::targets::{
4    CodeModel, InitializationConfig, RelocMode, Target as InkwellTarget, TargetMachine,
5    TargetTriple,
6};
7use itertools::Itertools;
8use std::sync::Arc;
9use std::{fmt::Debug, num::NonZero};
10use target_lexicon::BinaryFormat;
11use wasmer_compiler::{Compiler, CompilerConfig, Engine, EngineBuilder, ModuleMiddleware};
12use wasmer_types::{
13    Features, FunctionType, LocalFunctionIndex,
14    target::{Architecture, OperatingSystem, Target, Triple},
15};
16
17/// The InkWell ModuleInfo type
18pub type InkwellModule<'ctx> = inkwell::module::Module<'ctx>;
19
20/// The InkWell MemoryBuffer type
21pub type InkwellMemoryBuffer = inkwell::memory_buffer::MemoryBuffer;
22
23/// The compiled function kind, used for debugging in the `LLVMCallbacks`.
24#[derive(Debug, Clone)]
25pub enum CompiledKind {
26    // A locally-defined function in the Wasm file.
27    Local(LocalFunctionIndex),
28    // A function call trampoline for a given signature.
29    FunctionCallTrampoline(FunctionType),
30    // A dynamic function trampoline for a given signature.
31    DynamicFunctionTrampoline(FunctionType),
32    // An entire Wasm module.
33    Module,
34}
35
36/// Callbacks to the different LLVM compilation phases.
37pub trait LLVMCallbacks: Debug + Send + Sync {
38    fn preopt_ir(&self, function: &CompiledKind, module: &InkwellModule);
39    fn postopt_ir(&self, function: &CompiledKind, module: &InkwellModule);
40    fn obj_memory_buffer(&self, function: &CompiledKind, memory_buffer: &InkwellMemoryBuffer);
41    fn asm_memory_buffer(&self, function: &CompiledKind, memory_buffer: &InkwellMemoryBuffer);
42}
43
44#[derive(Debug, Clone)]
45pub struct LLVM {
46    pub(crate) enable_nan_canonicalization: bool,
47    pub(crate) enable_g0m0_opt: bool,
48    pub(crate) enable_verifier: bool,
49    pub(crate) enable_perfmap: bool,
50    pub(crate) opt_level: LLVMOptLevel,
51    is_pic: bool,
52    pub(crate) callbacks: Option<Arc<dyn LLVMCallbacks>>,
53    /// The middleware chain.
54    pub(crate) middlewares: Vec<Arc<dyn ModuleMiddleware>>,
55    /// Number of threads to use when compiling a module.
56    pub(crate) num_threads: NonZero<usize>,
57}
58
59impl LLVM {
60    /// Creates a new configuration object with the default configuration
61    /// specified.
62    pub fn new() -> Self {
63        Self {
64            enable_nan_canonicalization: false,
65            enable_verifier: false,
66            enable_perfmap: false,
67            opt_level: LLVMOptLevel::Aggressive,
68            is_pic: false,
69            callbacks: None,
70            middlewares: vec![],
71            enable_g0m0_opt: false,
72            num_threads: std::thread::available_parallelism().unwrap_or(NonZero::new(1).unwrap()),
73        }
74    }
75
76    /// The optimization levels when optimizing the IR.
77    pub fn opt_level(&mut self, opt_level: LLVMOptLevel) -> &mut Self {
78        self.opt_level = opt_level;
79        self
80    }
81
82    /// (warning: experimental) Pass the value of the first (#0) global and the base pointer of the
83    /// first (#0) memory as parameter between guest functions.
84    pub fn enable_pass_params_opt(&mut self) -> &mut Self {
85        // internally, the "pass_params" opt is known as g0m0 opt.
86        self.enable_g0m0_opt = true;
87        self
88    }
89
90    pub fn num_threads(&mut self, num_threads: NonZero<usize>) -> &mut Self {
91        self.num_threads = num_threads;
92        self
93    }
94
95    /// Callbacks that will triggered in the different compilation
96    /// phases in LLVM.
97    pub fn callbacks(&mut self, callbacks: Option<Arc<dyn LLVMCallbacks>>) -> &mut Self {
98        self.callbacks = callbacks;
99        self
100    }
101
102    fn reloc_mode(&self, binary_format: BinaryFormat) -> RelocMode {
103        if matches!(binary_format, BinaryFormat::Macho) {
104            return RelocMode::Static;
105        }
106
107        if self.is_pic {
108            RelocMode::PIC
109        } else {
110            RelocMode::Static
111        }
112    }
113
114    fn code_model(&self, binary_format: BinaryFormat) -> CodeModel {
115        // We normally use the large code model, but when targeting shared
116        // objects, we are required to use PIC. If we use PIC anyways, we lose
117        // any benefit from large code model and there's some cost on all
118        // platforms, plus some platforms (MachO) don't support PIC + large
119        // at all.
120        if matches!(binary_format, BinaryFormat::Macho) {
121            return CodeModel::Default;
122        }
123
124        if self.is_pic {
125            CodeModel::Small
126        } else {
127            CodeModel::Large
128        }
129    }
130
131    pub(crate) fn target_operating_system(&self, target: &Target) -> OperatingSystem {
132        match target.triple().operating_system {
133            OperatingSystem::Darwin(deployment) if !self.is_pic => {
134                // LLVM detects static relocation + darwin + 64-bit and
135                // force-enables PIC because MachO doesn't support that
136                // combination. They don't check whether they're targeting
137                // MachO, they check whether the OS is set to Darwin.
138                //
139                // Since both linux and darwin use SysV ABI, this should work.
140                //  but not in the case of Aarch64, there the ABI is slightly different
141                #[allow(clippy::match_single_binding)]
142                match target.triple().architecture {
143                    Architecture::Aarch64(_) => OperatingSystem::Darwin(deployment),
144                    _ => OperatingSystem::Linux,
145                }
146            }
147            other => other,
148        }
149    }
150
151    pub(crate) fn target_binary_format(&self, target: &Target) -> target_lexicon::BinaryFormat {
152        if self.is_pic {
153            target.triple().binary_format
154        } else {
155            match self.target_operating_system(target) {
156                OperatingSystem::Darwin(_) => target_lexicon::BinaryFormat::Macho,
157                _ => target_lexicon::BinaryFormat::Elf,
158            }
159        }
160    }
161
162    fn target_triple(&self, target: &Target) -> TargetTriple {
163        let architecture = if target.triple().architecture
164            == Architecture::Riscv64(target_lexicon::Riscv64Architecture::Riscv64gc)
165        {
166            target_lexicon::Architecture::Riscv64(target_lexicon::Riscv64Architecture::Riscv64)
167        } else {
168            target.triple().architecture
169        };
170        // Hack: we're using is_pic to determine whether this is a native
171        // build or not.
172
173        let operating_system = self.target_operating_system(target);
174        let binary_format = self.target_binary_format(target);
175
176        let triple = Triple {
177            architecture,
178            vendor: target.triple().vendor.clone(),
179            operating_system,
180            environment: target.triple().environment,
181            binary_format,
182        };
183        TargetTriple::create(&triple.to_string())
184    }
185
186    /// Generates the target machine for the current target
187    pub fn target_machine(&self, target: &Target) -> TargetMachine {
188        let triple = target.triple();
189        let cpu_features = &target.cpu_features();
190
191        match triple.architecture {
192            Architecture::X86_64 | Architecture::X86_32(_) => {
193                InkwellTarget::initialize_x86(&InitializationConfig {
194                    asm_parser: true,
195                    asm_printer: true,
196                    base: true,
197                    disassembler: true,
198                    info: true,
199                    machine_code: true,
200                })
201            }
202            Architecture::Aarch64(_) => InkwellTarget::initialize_aarch64(&InitializationConfig {
203                asm_parser: true,
204                asm_printer: true,
205                base: true,
206                disassembler: true,
207                info: true,
208                machine_code: true,
209            }),
210            Architecture::Riscv64(_) => InkwellTarget::initialize_riscv(&InitializationConfig {
211                asm_parser: true,
212                asm_printer: true,
213                base: true,
214                disassembler: true,
215                info: true,
216                machine_code: true,
217            }),
218            Architecture::LoongArch64 => {
219                InkwellTarget::initialize_loongarch(&InitializationConfig {
220                    asm_parser: true,
221                    asm_printer: true,
222                    base: true,
223                    disassembler: true,
224                    info: true,
225                    machine_code: true,
226                })
227            }
228            // Architecture::Arm(_) => InkwellTarget::initialize_arm(&InitializationConfig {
229            //     asm_parser: true,
230            //     asm_printer: true,
231            //     base: true,
232            //     disassembler: true,
233            //     info: true,
234            //     machine_code: true,
235            // }),
236            _ => unimplemented!("target {} not yet supported in Wasmer", triple),
237        }
238
239        // The CPU features formatted as LLVM strings
240        // We can safely map to gcc-like features as the CPUFeatures
241        // are compliant with the same string representations as gcc.
242        let llvm_cpu_features = cpu_features
243            .iter()
244            .map(|feature| format!("+{feature}"))
245            .join(",");
246
247        let target_triple = self.target_triple(target);
248        let llvm_target = InkwellTarget::from_triple(&target_triple).unwrap();
249        let llvm_target_machine = llvm_target
250            .create_target_machine(
251                &target_triple,
252                match triple.architecture {
253                    Architecture::Riscv64(_) => "generic-rv64",
254                    Architecture::LoongArch64 => "generic-la64",
255                    _ => "generic",
256                },
257                match triple.architecture {
258                    Architecture::Riscv64(_) => "+m,+a,+c,+d,+f",
259                    Architecture::LoongArch64 => "+f,+d",
260                    _ => &llvm_cpu_features,
261                },
262                self.opt_level,
263                self.reloc_mode(self.target_binary_format(target)),
264                match triple.architecture {
265                    Architecture::LoongArch64 | Architecture::Riscv64(_) => CodeModel::Medium,
266                    _ => self.code_model(self.target_binary_format(target)),
267                },
268            )
269            .unwrap();
270
271        if let Architecture::Riscv64(_) = triple.architecture {
272            // TODO: totally non-portable way to change ABI
273            unsafe {
274                // This structure mimic the internal structure from inkwell
275                // that is defined as
276                //  #[derive(Debug)]
277                //  pub struct TargetMachine {
278                //    pub(crate) target_machine: LLVMTargetMachineRef,
279                //  }
280                pub struct MyTargetMachine {
281                    pub target_machine: *const u8,
282                }
283                // It is use to live patch the create LLVMTargetMachine
284                // to hard change the ABI and force "-mabi=lp64d" ABI
285                // instead of the default that don't use float registers
286                // because there is no current way to do this change
287
288                let my_target_machine: MyTargetMachine = std::mem::transmute(llvm_target_machine);
289
290                *((my_target_machine.target_machine as *mut u8).offset(0x410) as *mut u64) = 5;
291                std::ptr::copy_nonoverlapping(
292                    c"lp64d".as_ptr(),
293                    (my_target_machine.target_machine as *mut i8).offset(0x418),
294                    6,
295                );
296
297                std::mem::transmute::<MyTargetMachine, inkwell::targets::TargetMachine>(
298                    my_target_machine,
299                )
300            }
301        } else {
302            llvm_target_machine
303        }
304    }
305}
306
307impl CompilerConfig for LLVM {
308    /// Emit code suitable for dlopen.
309    fn enable_pic(&mut self) {
310        // TODO: although we can emit PIC, the object file parser does not yet
311        // support all the relocations.
312        self.is_pic = true;
313    }
314
315    fn enable_perfmap(&mut self) {
316        self.enable_perfmap = true
317    }
318
319    /// Whether to verify compiler IR.
320    fn enable_verifier(&mut self) {
321        self.enable_verifier = true;
322    }
323
324    fn canonicalize_nans(&mut self, enable: bool) {
325        self.enable_nan_canonicalization = enable;
326    }
327
328    /// Transform it into the compiler.
329    fn compiler(self: Box<Self>) -> Box<dyn Compiler> {
330        Box::new(LLVMCompiler::new(*self))
331    }
332
333    /// Pushes a middleware onto the back of the middleware chain.
334    fn push_middleware(&mut self, middleware: Arc<dyn ModuleMiddleware>) {
335        self.middlewares.push(middleware);
336    }
337
338    fn supported_features_for_target(&self, _target: &Target) -> wasmer_types::Features {
339        let mut feats = Features::default();
340        feats.exceptions(true);
341        feats
342    }
343}
344
345impl Default for LLVM {
346    fn default() -> LLVM {
347        Self::new()
348    }
349}
350
351impl From<LLVM> for Engine {
352    fn from(config: LLVM) -> Self {
353        EngineBuilder::new(config).engine()
354    }
355}