wasmer_compiler_llvm/
config.rs

1use crate::compiler::LLVMCompiler;
2pub use inkwell::OptimizationLevel as LLVMOptLevel;
3use inkwell::targets::{
4    CodeModel, InitializationConfig, RelocMode, Target as InkwellTarget, TargetMachine,
5    TargetMachineOptions, TargetTriple,
6};
7use itertools::Itertools;
8use std::fs::File;
9use std::io::{self, Write};
10use std::path::PathBuf;
11use std::sync::Arc;
12use std::{fmt::Debug, num::NonZero};
13use target_lexicon::BinaryFormat;
14use wasmer_compiler::misc::{CompiledKind, function_kind_to_filename};
15use wasmer_compiler::{Compiler, CompilerConfig, Engine, EngineBuilder, ModuleMiddleware};
16use wasmer_types::{
17    Features,
18    target::{Architecture, OperatingSystem, Target, Triple},
19};
20
21/// The InkWell ModuleInfo type
22pub type InkwellModule<'ctx> = inkwell::module::Module<'ctx>;
23
24/// The InkWell MemoryBuffer type
25pub type InkwellMemoryBuffer = inkwell::memory_buffer::MemoryBuffer;
26
27/// Callbacks to the different LLVM compilation phases.
28#[derive(Debug, Clone)]
29pub struct LLVMCallbacks {
30    debug_dir: PathBuf,
31}
32
33impl LLVMCallbacks {
34    pub fn new(debug_dir: PathBuf) -> Result<Self, io::Error> {
35        // Create the debug dir in case it doesn't exist
36        std::fs::create_dir_all(&debug_dir)?;
37        Ok(Self { debug_dir })
38    }
39
40    fn base_path(&self, module_hash: &Option<String>) -> PathBuf {
41        let mut path = self.debug_dir.clone();
42        if let Some(hash) = module_hash {
43            path.push(hash);
44        }
45        std::fs::create_dir_all(&path)
46            .unwrap_or_else(|_| panic!("cannot create debug directory: {}", path.display()));
47        path
48    }
49
50    pub fn preopt_ir(
51        &self,
52        kind: &CompiledKind,
53        module_hash: &Option<String>,
54        module: &InkwellModule,
55    ) {
56        let mut path = self.base_path(module_hash);
57        path.push(function_kind_to_filename(kind, ".preopt.ll"));
58        module
59            .print_to_file(&path)
60            .expect("Error while dumping pre optimized LLVM IR");
61    }
62    pub fn postopt_ir(
63        &self,
64        kind: &CompiledKind,
65        module_hash: &Option<String>,
66        module: &InkwellModule,
67    ) {
68        let mut path = self.base_path(module_hash);
69        path.push(function_kind_to_filename(kind, ".postopt.ll"));
70        module
71            .print_to_file(&path)
72            .expect("Error while dumping post optimized LLVM IR");
73    }
74    pub fn obj_memory_buffer(
75        &self,
76        kind: &CompiledKind,
77        module_hash: &Option<String>,
78        memory_buffer: &InkwellMemoryBuffer,
79    ) {
80        let mut path = self.base_path(module_hash);
81        path.push(function_kind_to_filename(kind, ".o"));
82        let mem_buf_slice = memory_buffer.as_slice();
83        let mut file =
84            File::create(path).expect("Error while creating debug object file from LLVM IR");
85        file.write_all(mem_buf_slice).unwrap();
86    }
87
88    pub fn asm_memory_buffer(
89        &self,
90        kind: &CompiledKind,
91        module_hash: &Option<String>,
92        asm_memory_buffer: &InkwellMemoryBuffer,
93    ) {
94        let mut path = self.base_path(module_hash);
95        path.push(function_kind_to_filename(kind, ".s"));
96        let mem_buf_slice = asm_memory_buffer.as_slice();
97        let mut file =
98            File::create(path).expect("Error while creating debug assembly file from LLVM IR");
99        file.write_all(mem_buf_slice).unwrap();
100    }
101}
102
103#[derive(Debug, Clone)]
104pub struct LLVM {
105    pub(crate) enable_nan_canonicalization: bool,
106    pub(crate) enable_g0m0_opt: bool,
107    pub(crate) enable_verifier: bool,
108    pub(crate) enable_perfmap: bool,
109    pub(crate) opt_level: LLVMOptLevel,
110    is_pic: bool,
111    pub(crate) callbacks: Option<LLVMCallbacks>,
112    /// The middleware chain.
113    pub(crate) middlewares: Vec<Arc<dyn ModuleMiddleware>>,
114    /// Number of threads to use when compiling a module.
115    pub(crate) num_threads: NonZero<usize>,
116    pub(crate) verbose_asm: bool,
117}
118
119impl LLVM {
120    /// Creates a new configuration object with the default configuration
121    /// specified.
122    pub fn new() -> Self {
123        Self {
124            enable_nan_canonicalization: false,
125            enable_verifier: false,
126            enable_perfmap: false,
127            opt_level: LLVMOptLevel::Aggressive,
128            is_pic: false,
129            callbacks: None,
130            middlewares: vec![],
131            enable_g0m0_opt: false,
132            verbose_asm: false,
133            num_threads: std::thread::available_parallelism().unwrap_or(NonZero::new(1).unwrap()),
134        }
135    }
136
137    /// The optimization levels when optimizing the IR.
138    pub fn opt_level(&mut self, opt_level: LLVMOptLevel) -> &mut Self {
139        self.opt_level = opt_level;
140        self
141    }
142
143    /// (warning: experimental) Pass the value of the first (#0) global and the base pointer of the
144    /// first (#0) memory as parameter between guest functions.
145    pub fn enable_pass_params_opt(&mut self) -> &mut Self {
146        // internally, the "pass_params" opt is known as g0m0 opt.
147        self.enable_g0m0_opt = true;
148        self
149    }
150
151    pub fn num_threads(&mut self, num_threads: NonZero<usize>) -> &mut Self {
152        self.num_threads = num_threads;
153        self
154    }
155
156    pub fn verbose_asm(&mut self, verbose_asm: bool) -> &mut Self {
157        self.verbose_asm = verbose_asm;
158        self
159    }
160
161    /// Callbacks that will triggered in the different compilation
162    /// phases in LLVM.
163    pub fn callbacks(&mut self, callbacks: Option<LLVMCallbacks>) -> &mut Self {
164        self.callbacks = callbacks;
165        self
166    }
167
168    fn reloc_mode(&self, binary_format: BinaryFormat) -> RelocMode {
169        if matches!(binary_format, BinaryFormat::Macho) {
170            return RelocMode::Static;
171        }
172
173        if self.is_pic {
174            RelocMode::PIC
175        } else {
176            RelocMode::Static
177        }
178    }
179
180    fn code_model(&self, binary_format: BinaryFormat) -> CodeModel {
181        // We normally use the large code model, but when targeting shared
182        // objects, we are required to use PIC. If we use PIC anyways, we lose
183        // any benefit from large code model and there's some cost on all
184        // platforms, plus some platforms (MachO) don't support PIC + large
185        // at all.
186        if matches!(binary_format, BinaryFormat::Macho) {
187            return CodeModel::Default;
188        }
189
190        if self.is_pic {
191            CodeModel::Small
192        } else {
193            CodeModel::Large
194        }
195    }
196
197    pub(crate) fn target_operating_system(&self, target: &Target) -> OperatingSystem {
198        match target.triple().operating_system {
199            OperatingSystem::Darwin(deployment) if !self.is_pic => {
200                // LLVM detects static relocation + darwin + 64-bit and
201                // force-enables PIC because MachO doesn't support that
202                // combination. They don't check whether they're targeting
203                // MachO, they check whether the OS is set to Darwin.
204                //
205                // Since both linux and darwin use SysV ABI, this should work.
206                //  but not in the case of Aarch64, there the ABI is slightly different
207                #[allow(clippy::match_single_binding)]
208                match target.triple().architecture {
209                    Architecture::Aarch64(_) => OperatingSystem::Darwin(deployment),
210                    _ => OperatingSystem::Linux,
211                }
212            }
213            other => other,
214        }
215    }
216
217    pub(crate) fn target_binary_format(&self, target: &Target) -> target_lexicon::BinaryFormat {
218        if self.is_pic {
219            target.triple().binary_format
220        } else {
221            match self.target_operating_system(target) {
222                OperatingSystem::Darwin(_) => target_lexicon::BinaryFormat::Macho,
223                _ => target_lexicon::BinaryFormat::Elf,
224            }
225        }
226    }
227
228    fn target_triple(&self, target: &Target) -> TargetTriple {
229        let architecture = if target.triple().architecture
230            == Architecture::Riscv64(target_lexicon::Riscv64Architecture::Riscv64gc)
231        {
232            target_lexicon::Architecture::Riscv64(target_lexicon::Riscv64Architecture::Riscv64)
233        } else {
234            target.triple().architecture
235        };
236        // Hack: we're using is_pic to determine whether this is a native
237        // build or not.
238
239        let operating_system = self.target_operating_system(target);
240        let binary_format = self.target_binary_format(target);
241
242        let triple = Triple {
243            architecture,
244            vendor: target.triple().vendor.clone(),
245            operating_system,
246            environment: target.triple().environment,
247            binary_format,
248        };
249        TargetTriple::create(&triple.to_string())
250    }
251
252    /// Generates the target machine for the current target
253    pub fn target_machine(&self, target: &Target) -> TargetMachine {
254        self.target_machine_with_opt(target, true)
255    }
256
257    pub(crate) fn target_machine_with_opt(
258        &self,
259        target: &Target,
260        enable_optimization: bool,
261    ) -> TargetMachine {
262        let triple = target.triple();
263        let cpu_features = &target.cpu_features();
264
265        match triple.architecture {
266            Architecture::X86_64 | Architecture::X86_32(_) => {
267                InkwellTarget::initialize_x86(&InitializationConfig {
268                    asm_parser: true,
269                    asm_printer: true,
270                    base: true,
271                    disassembler: true,
272                    info: true,
273                    machine_code: true,
274                })
275            }
276            Architecture::Aarch64(_) => InkwellTarget::initialize_aarch64(&InitializationConfig {
277                asm_parser: true,
278                asm_printer: true,
279                base: true,
280                disassembler: true,
281                info: true,
282                machine_code: true,
283            }),
284            Architecture::Riscv64(_) | Architecture::Riscv32(_) => {
285                InkwellTarget::initialize_riscv(&InitializationConfig {
286                    asm_parser: true,
287                    asm_printer: true,
288                    base: true,
289                    disassembler: true,
290                    info: true,
291                    machine_code: true,
292                })
293            }
294            Architecture::LoongArch64 => {
295                InkwellTarget::initialize_loongarch(&InitializationConfig {
296                    asm_parser: true,
297                    asm_printer: true,
298                    base: true,
299                    disassembler: true,
300                    info: true,
301                    machine_code: true,
302                })
303            }
304            _ => unimplemented!("target {} not yet supported in Wasmer", triple),
305        }
306
307        // The CPU features formatted as LLVM strings
308        // We can safely map to gcc-like features as the CPUFeatures
309        // are compliant with the same string representations as gcc.
310        let llvm_cpu_features = cpu_features
311            .iter()
312            .map(|feature| format!("+{feature}"))
313            .join(",");
314
315        let target_triple = self.target_triple(target);
316        let llvm_target = InkwellTarget::from_triple(&target_triple).unwrap();
317        let mut llvm_target_machine_options = TargetMachineOptions::new()
318            .set_cpu(match triple.architecture {
319                Architecture::Riscv64(_) => "generic-rv64",
320                Architecture::Riscv32(_) => "generic-rv32",
321                Architecture::LoongArch64 => "generic-la64",
322                _ => "generic",
323            })
324            .set_features(match triple.architecture {
325                Architecture::Riscv64(_) => "+m,+a,+c,+d,+f",
326                Architecture::Riscv32(_) => "+m,+a,+c,+d,+f",
327                Architecture::LoongArch64 => "+f,+d",
328                _ => &llvm_cpu_features,
329            })
330            .set_level(if enable_optimization {
331                self.opt_level
332            } else {
333                LLVMOptLevel::None
334            })
335            .set_reloc_mode(self.reloc_mode(self.target_binary_format(target)))
336            .set_code_model(match triple.architecture {
337                Architecture::LoongArch64 | Architecture::Riscv64(_) | Architecture::Riscv32(_) => {
338                    CodeModel::Medium
339                }
340                _ => self.code_model(self.target_binary_format(target)),
341            });
342        if let Architecture::Riscv64(_) = triple.architecture {
343            llvm_target_machine_options = llvm_target_machine_options.set_abi("lp64d");
344        }
345        let target_machine = llvm_target
346            .create_target_machine_from_options(&target_triple, llvm_target_machine_options)
347            .unwrap();
348        target_machine.set_asm_verbosity(self.verbose_asm);
349        target_machine
350    }
351}
352
353impl CompilerConfig for LLVM {
354    /// Emit code suitable for dlopen.
355    fn enable_pic(&mut self) {
356        // TODO: although we can emit PIC, the object file parser does not yet
357        // support all the relocations.
358        self.is_pic = true;
359    }
360
361    fn enable_perfmap(&mut self) {
362        self.enable_perfmap = true
363    }
364
365    /// Whether to verify compiler IR.
366    fn enable_verifier(&mut self) {
367        self.enable_verifier = true;
368    }
369
370    fn canonicalize_nans(&mut self, enable: bool) {
371        self.enable_nan_canonicalization = enable;
372    }
373
374    /// Transform it into the compiler.
375    fn compiler(self: Box<Self>) -> Box<dyn Compiler> {
376        Box::new(LLVMCompiler::new(*self))
377    }
378
379    /// Pushes a middleware onto the back of the middleware chain.
380    fn push_middleware(&mut self, middleware: Arc<dyn ModuleMiddleware>) {
381        self.middlewares.push(middleware);
382    }
383
384    fn supported_features_for_target(&self, _target: &Target) -> wasmer_types::Features {
385        let mut feats = Features::default();
386        feats.exceptions(true);
387        feats
388    }
389}
390
391impl Default for LLVM {
392    fn default() -> LLVM {
393        Self::new()
394    }
395}
396
397impl From<LLVM> for Engine {
398    fn from(config: LLVM) -> Self {
399        EngineBuilder::new(config).engine()
400    }
401}