wasmer_compiler_singlepass/
machine_x64.rs

1#[cfg(feature = "unwind")]
2use crate::unwind_winx64::create_unwind_info_from_insts;
3use crate::{
4    codegen_error,
5    common_decl::*,
6    emitter_x64::*,
7    location::{Location as AbstractLocation, Reg},
8    machine::*,
9    unwind::{UnwindInstructions, UnwindOps, UnwindRegister},
10    x64_decl::{ArgumentRegisterAllocator, GPR, X64Register, XMM, new_machine_state},
11};
12use dynasmrt::{DynasmError, VecAssembler, x64::X64Relocation};
13#[cfg(feature = "unwind")]
14use gimli::{X86_64, write::CallFrameInstruction};
15use std::ops::{Deref, DerefMut};
16use wasmer_compiler::{
17    types::{
18        address_map::InstructionAddressMap,
19        function::FunctionBody,
20        relocation::{Relocation, RelocationKind, RelocationTarget},
21        section::{CustomSection, CustomSectionProtection, SectionBody},
22    },
23    wasmparser::{MemArg, ValType as WpType},
24};
25use wasmer_types::{
26    CompileError, FunctionIndex, FunctionType, SourceLoc, TrapCode, TrapInformation, Type,
27    VMOffsets,
28    target::{CallingConvention, CpuFeature, Target},
29};
30
31type Assembler = VecAssembler<X64Relocation>;
32
33pub struct AssemblerX64 {
34    /// the actual inner
35    pub inner: Assembler,
36    /// the simd instructions set on the target.
37    /// Currently only supports SSE 4.2 and AVX
38    pub simd_arch: Option<CpuFeature>,
39    /// Full Target cpu
40    pub target: Option<Target>,
41}
42
43impl AssemblerX64 {
44    fn new(baseaddr: usize, target: Option<Target>) -> Result<Self, CompileError> {
45        let simd_arch = if target.is_none() {
46            Some(CpuFeature::SSE42)
47        } else {
48            let target = target.as_ref().unwrap();
49            if target.cpu_features().contains(CpuFeature::AVX) {
50                Some(CpuFeature::AVX)
51            } else if target.cpu_features().contains(CpuFeature::SSE42) {
52                Some(CpuFeature::SSE42)
53            } else {
54                return Err(CompileError::UnsupportedTarget(
55                    "x86_64 without AVX or SSE 4.2, use -m avx to enable".to_string(),
56                ));
57            }
58        };
59
60        Ok(Self {
61            inner: Assembler::new(baseaddr),
62            simd_arch,
63            target,
64        })
65    }
66
67    fn finalize(self) -> Result<Vec<u8>, DynasmError> {
68        self.inner.finalize()
69    }
70}
71
72impl Deref for AssemblerX64 {
73    type Target = Assembler;
74
75    fn deref(&self) -> &Self::Target {
76        &self.inner
77    }
78}
79
80impl DerefMut for AssemblerX64 {
81    fn deref_mut(&mut self) -> &mut Self::Target {
82        &mut self.inner
83    }
84}
85
86type Location = AbstractLocation<GPR, XMM>;
87
88pub struct MachineX86_64 {
89    assembler: AssemblerX64,
90    used_gprs: u32,
91    used_simd: u32,
92    trap_table: TrapTable,
93    /// Map from byte offset into wasm function to range of native instructions.
94    ///
95    // Ordered by increasing InstructionAddressMap::srcloc.
96    instructions_address_map: Vec<InstructionAddressMap>,
97    /// The source location for the current operator.
98    src_loc: u32,
99    /// Vector of unwind operations with offset
100    unwind_ops: Vec<(usize, UnwindOps<GPR, XMM>)>,
101}
102
103impl MachineX86_64 {
104    pub fn new(target: Option<Target>) -> Result<Self, CompileError> {
105        let assembler = AssemblerX64::new(0, target)?;
106        Ok(MachineX86_64 {
107            assembler,
108            used_gprs: 0,
109            used_simd: 0,
110            trap_table: TrapTable::default(),
111            instructions_address_map: vec![],
112            src_loc: 0,
113            unwind_ops: vec![],
114        })
115    }
116    pub fn emit_relaxed_binop(
117        &mut self,
118        op: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
119        sz: Size,
120        src: Location,
121        dst: Location,
122    ) -> Result<(), CompileError> {
123        enum RelaxMode {
124            Direct,
125            SrcToGPR,
126            DstToGPR,
127            BothToGPR,
128        }
129        let mode = match (src, dst) {
130            (Location::GPR(_), Location::GPR(_))
131                if std::ptr::eq(op as *const u8, AssemblerX64::emit_imul as *const u8) =>
132            {
133                RelaxMode::Direct
134            }
135            _ if std::ptr::eq(op as *const u8, AssemblerX64::emit_imul as *const u8) => {
136                RelaxMode::BothToGPR
137            }
138
139            (Location::Memory(_, _), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
140            (Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => {
141                RelaxMode::BothToGPR
142            }
143            (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR,
144            (Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
145            (Location::Imm64(_), Location::GPR(_))
146                if (op as *const u8 != AssemblerX64::emit_mov as *const u8) =>
147            {
148                RelaxMode::SrcToGPR
149            }
150            (_, Location::SIMD(_)) => RelaxMode::SrcToGPR,
151            _ => RelaxMode::Direct,
152        };
153
154        match mode {
155            RelaxMode::SrcToGPR => {
156                let temp = self.acquire_temp_gpr().ok_or_else(|| {
157                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
158                })?;
159                self.move_location(sz, src, Location::GPR(temp))?;
160                op(&mut self.assembler, sz, Location::GPR(temp), dst)?;
161                self.release_gpr(temp);
162            }
163            RelaxMode::DstToGPR => {
164                let temp = self.acquire_temp_gpr().ok_or_else(|| {
165                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
166                })?;
167                self.move_location(sz, dst, Location::GPR(temp))?;
168                op(&mut self.assembler, sz, src, Location::GPR(temp))?;
169                self.release_gpr(temp);
170            }
171            RelaxMode::BothToGPR => {
172                let temp_src = self.acquire_temp_gpr().ok_or_else(|| {
173                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
174                })?;
175                let temp_dst = self.acquire_temp_gpr().ok_or_else(|| {
176                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
177                })?;
178                self.move_location(sz, src, Location::GPR(temp_src))?;
179                self.move_location(sz, dst, Location::GPR(temp_dst))?;
180                op(
181                    &mut self.assembler,
182                    sz,
183                    Location::GPR(temp_src),
184                    Location::GPR(temp_dst),
185                )?;
186                match dst {
187                    Location::Memory(_, _) | Location::GPR(_) => {
188                        self.move_location(sz, Location::GPR(temp_dst), dst)?;
189                    }
190                    _ => {}
191                }
192                self.release_gpr(temp_dst);
193                self.release_gpr(temp_src);
194            }
195            RelaxMode::Direct => {
196                op(&mut self.assembler, sz, src, dst)?;
197            }
198        }
199        Ok(())
200    }
201    pub fn emit_relaxed_zx_sx(
202        &mut self,
203        op: fn(&mut AssemblerX64, Size, Location, Size, Location) -> Result<(), CompileError>,
204        sz_src: Size,
205        src: Location,
206        sz_dst: Size,
207        dst: Location,
208    ) -> Result<(), CompileError> {
209        match src {
210            Location::Imm32(_) | Location::Imm64(_) => {
211                let tmp_src = self.acquire_temp_gpr().ok_or_else(|| {
212                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
213                })?;
214                self.assembler
215                    .emit_mov(Size::S64, src, Location::GPR(tmp_src))?;
216                let src = Location::GPR(tmp_src);
217
218                match dst {
219                    Location::Imm32(_) | Location::Imm64(_) => unreachable!(),
220                    Location::Memory(_, _) => {
221                        let tmp_dst = self.acquire_temp_gpr().ok_or_else(|| {
222                            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
223                        })?;
224                        op(
225                            &mut self.assembler,
226                            sz_src,
227                            src,
228                            sz_dst,
229                            Location::GPR(tmp_dst),
230                        )?;
231                        self.move_location(Size::S64, Location::GPR(tmp_dst), dst)?;
232
233                        self.release_gpr(tmp_dst);
234                    }
235                    Location::GPR(_) => {
236                        op(&mut self.assembler, sz_src, src, sz_dst, dst)?;
237                    }
238                    _ => {
239                        codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
240                    }
241                };
242
243                self.release_gpr(tmp_src);
244            }
245            Location::GPR(_) | Location::Memory(_, _) => {
246                match dst {
247                    Location::Imm32(_) | Location::Imm64(_) => unreachable!(),
248                    Location::Memory(_, _) => {
249                        let tmp_dst = self.acquire_temp_gpr().ok_or_else(|| {
250                            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
251                        })?;
252                        op(
253                            &mut self.assembler,
254                            sz_src,
255                            src,
256                            sz_dst,
257                            Location::GPR(tmp_dst),
258                        )?;
259                        self.move_location(Size::S64, Location::GPR(tmp_dst), dst)?;
260
261                        self.release_gpr(tmp_dst);
262                    }
263                    Location::GPR(_) => {
264                        op(&mut self.assembler, sz_src, src, sz_dst, dst)?;
265                    }
266                    _ => {
267                        codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
268                    }
269                };
270            }
271            _ => {
272                codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
273            }
274        }
275        Ok(())
276    }
277    /// I32 binary operation with both operands popped from the virtual stack.
278    fn emit_binop_i32(
279        &mut self,
280        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
281        loc_a: Location,
282        loc_b: Location,
283        ret: Location,
284    ) -> Result<(), CompileError> {
285        if loc_a != ret {
286            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
287                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
288            })?;
289            self.emit_relaxed_mov(Size::S32, loc_a, Location::GPR(tmp))?;
290            self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp))?;
291            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp), ret)?;
292            self.release_gpr(tmp);
293        } else {
294            self.emit_relaxed_binop(f, Size::S32, loc_b, ret)?;
295        }
296        Ok(())
297    }
298    /// I64 binary operation with both operands popped from the virtual stack.
299    fn emit_binop_i64(
300        &mut self,
301        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
302        loc_a: Location,
303        loc_b: Location,
304        ret: Location,
305    ) -> Result<(), CompileError> {
306        if loc_a != ret {
307            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
308                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
309            })?;
310            self.emit_relaxed_mov(Size::S64, loc_a, Location::GPR(tmp))?;
311            self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp))?;
312            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp), ret)?;
313            self.release_gpr(tmp);
314        } else {
315            self.emit_relaxed_binop(f, Size::S64, loc_b, ret)?;
316        }
317        Ok(())
318    }
319    /// I64 comparison with.
320    fn emit_cmpop_i64_dynamic_b(
321        &mut self,
322        c: Condition,
323        loc_a: Location,
324        loc_b: Location,
325        ret: Location,
326    ) -> Result<(), CompileError> {
327        match ret {
328            Location::GPR(x) => {
329                self.emit_relaxed_cmp(Size::S64, loc_b, loc_a)?;
330                self.assembler.emit_set(c, x)?;
331                self.assembler
332                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x))?;
333            }
334            Location::Memory(_, _) => {
335                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
336                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
337                })?;
338                self.emit_relaxed_cmp(Size::S64, loc_b, loc_a)?;
339                self.assembler.emit_set(c, tmp)?;
340                self.assembler
341                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp))?;
342                self.move_location(Size::S32, Location::GPR(tmp), ret)?;
343                self.release_gpr(tmp);
344            }
345            _ => {
346                codegen_error!("singlepass emit_cmpop_i64_dynamic_b unreachable");
347            }
348        }
349        Ok(())
350    }
351    /// I64 shift with both operands popped from the virtual stack.
352    fn emit_shift_i64(
353        &mut self,
354        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
355        loc_a: Location,
356        loc_b: Location,
357        ret: Location,
358    ) -> Result<(), CompileError> {
359        self.assembler
360            .emit_mov(Size::S64, loc_b, Location::GPR(GPR::RCX))?;
361
362        if loc_a != ret {
363            self.emit_relaxed_mov(Size::S64, loc_a, ret)?;
364        }
365
366        f(&mut self.assembler, Size::S64, Location::GPR(GPR::RCX), ret)
367    }
368    /// Moves `loc` to a valid location for `div`/`idiv`.
369    fn emit_relaxed_xdiv(
370        &mut self,
371        op: fn(&mut AssemblerX64, Size, Location) -> Result<(), CompileError>,
372        sz: Size,
373        loc: Location,
374        integer_division_by_zero: Label,
375    ) -> Result<usize, CompileError> {
376        self.assembler.emit_cmp(sz, Location::Imm32(0), loc)?;
377        self.assembler
378            .emit_jmp(Condition::Equal, integer_division_by_zero)?;
379
380        match loc {
381            Location::Imm64(_) | Location::Imm32(_) => {
382                self.move_location(sz, loc, Location::GPR(GPR::RCX))?; // must not be used during div (rax, rdx)
383                let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow);
384                op(&mut self.assembler, sz, Location::GPR(GPR::RCX))?;
385                self.mark_instruction_address_end(offset);
386                Ok(offset)
387            }
388            _ => {
389                let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow);
390                op(&mut self.assembler, sz, loc)?;
391                self.mark_instruction_address_end(offset);
392                Ok(offset)
393            }
394        }
395    }
396    /// I32 comparison with.
397    fn emit_cmpop_i32_dynamic_b(
398        &mut self,
399        c: Condition,
400        loc_a: Location,
401        loc_b: Location,
402        ret: Location,
403    ) -> Result<(), CompileError> {
404        match ret {
405            Location::GPR(x) => {
406                self.emit_relaxed_cmp(Size::S32, loc_b, loc_a)?;
407                self.assembler.emit_set(c, x)?;
408                self.assembler
409                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x))?;
410            }
411            Location::Memory(_, _) => {
412                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
413                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
414                })?;
415                self.emit_relaxed_cmp(Size::S32, loc_b, loc_a)?;
416                self.assembler.emit_set(c, tmp)?;
417                self.assembler
418                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp))?;
419                self.move_location(Size::S32, Location::GPR(tmp), ret)?;
420                self.release_gpr(tmp);
421            }
422            _ => {
423                codegen_error!("singlepass emit_cmpop_i32_dynamic_b unreachable");
424            }
425        }
426        Ok(())
427    }
428    /// I32 shift with both operands popped from the virtual stack.
429    fn emit_shift_i32(
430        &mut self,
431        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
432        loc_a: Location,
433        loc_b: Location,
434        ret: Location,
435    ) -> Result<(), CompileError> {
436        self.assembler
437            .emit_mov(Size::S32, loc_b, Location::GPR(GPR::RCX))?;
438
439        if loc_a != ret {
440            self.emit_relaxed_mov(Size::S32, loc_a, ret)?;
441        }
442
443        f(&mut self.assembler, Size::S32, Location::GPR(GPR::RCX), ret)
444    }
445
446    #[allow(clippy::too_many_arguments)]
447    fn memory_op<F: FnOnce(&mut Self, GPR) -> Result<(), CompileError>>(
448        &mut self,
449        addr: Location,
450        memarg: &MemArg,
451        check_alignment: bool,
452        value_size: usize,
453        need_check: bool,
454        imported_memories: bool,
455        offset: i32,
456        heap_access_oob: Label,
457        unaligned_atomic: Label,
458        cb: F,
459    ) -> Result<(), CompileError> {
460        // This function as been re-writen to use only 2 temporary register instead of 3
461        // without compromisong on the perfomances.
462        // The number of memory move should be equivalent to previous 3-temp regs version
463        // Register pressure is high on x86_64, and this is needed to be able to use
464        // instruction that neead RAX, like cmpxchg for example
465        let tmp_addr = self.acquire_temp_gpr().ok_or_else(|| {
466            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
467        })?;
468        let tmp2 = self.acquire_temp_gpr().ok_or_else(|| {
469            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
470        })?;
471
472        // Reusing `tmp_addr` for temporary indirection here, since it's not used before the last reference to `{base,bound}_loc`.
473        let base_loc = if imported_memories {
474            // Imported memories require one level of indirection.
475            self.emit_relaxed_binop(
476                AssemblerX64::emit_mov,
477                Size::S64,
478                Location::Memory(self.get_vmctx_reg(), offset),
479                Location::GPR(tmp2),
480            )?;
481            Location::Memory(tmp2, 0)
482        } else {
483            Location::Memory(self.get_vmctx_reg(), offset)
484        };
485
486        // Load base into temporary register.
487        self.assembler
488            .emit_mov(Size::S64, base_loc, Location::GPR(tmp2))?;
489
490        // Load effective address.
491        // `base_loc` and `bound_loc` becomes INVALID after this line, because `tmp_addr`
492        // might be reused.
493        self.assembler
494            .emit_mov(Size::S32, addr, Location::GPR(tmp_addr))?;
495
496        // Add offset to memory address.
497        if memarg.offset != 0 {
498            self.assembler.emit_add(
499                Size::S32,
500                Location::Imm32(memarg.offset as u32),
501                Location::GPR(tmp_addr),
502            )?;
503
504            // Trap if offset calculation overflowed.
505            self.assembler.emit_jmp(Condition::Carry, heap_access_oob)?;
506        }
507
508        if need_check {
509            let bound_loc = if imported_memories {
510                // Imported memories require one level of indirection.
511                self.emit_relaxed_binop(
512                    AssemblerX64::emit_mov,
513                    Size::S64,
514                    Location::Memory(self.get_vmctx_reg(), offset),
515                    Location::GPR(tmp2),
516                )?;
517                Location::Memory(tmp2, 8)
518            } else {
519                Location::Memory(self.get_vmctx_reg(), offset + 8)
520            };
521            self.assembler
522                .emit_mov(Size::S64, bound_loc, Location::GPR(tmp2))?;
523
524            // We will compare the upper bound limit without having add the "temp_base" value, as it's a constant
525            self.assembler.emit_lea(
526                Size::S64,
527                Location::Memory(tmp2, -(value_size as i32)),
528                Location::GPR(tmp2),
529            )?;
530            // Trap if the end address of the requested area is above that of the linear memory.
531            self.assembler
532                .emit_cmp(Size::S64, Location::GPR(tmp2), Location::GPR(tmp_addr))?;
533
534            // `tmp_bound` is inclusive. So trap only if `tmp_addr > tmp_bound`.
535            self.assembler.emit_jmp(Condition::Above, heap_access_oob)?;
536        }
537        // get back baseloc, as it might have been destroid with the upper memory test
538        let base_loc = if imported_memories {
539            // Imported memories require one level of indirection.
540            self.emit_relaxed_binop(
541                AssemblerX64::emit_mov,
542                Size::S64,
543                Location::Memory(self.get_vmctx_reg(), offset),
544                Location::GPR(tmp2),
545            )?;
546            Location::Memory(tmp2, 0)
547        } else {
548            Location::Memory(self.get_vmctx_reg(), offset)
549        };
550        // Wasm linear memory -> real memory
551        self.assembler
552            .emit_add(Size::S64, base_loc, Location::GPR(tmp_addr))?;
553
554        self.release_gpr(tmp2);
555
556        let align = value_size as u32;
557        if check_alignment && align != 1 {
558            let tmp_aligncheck = self.acquire_temp_gpr().ok_or_else(|| {
559                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
560            })?;
561            self.assembler.emit_mov(
562                Size::S32,
563                Location::GPR(tmp_addr),
564                Location::GPR(tmp_aligncheck),
565            )?;
566            self.assembler.emit_and(
567                Size::S64,
568                Location::Imm32(align - 1),
569                Location::GPR(tmp_aligncheck),
570            )?;
571            self.assembler
572                .emit_jmp(Condition::NotEqual, unaligned_atomic)?;
573            self.release_gpr(tmp_aligncheck);
574        }
575        let begin = self.assembler.get_offset().0;
576        cb(self, tmp_addr)?;
577        let end = self.assembler.get_offset().0;
578        self.mark_address_range_with_trap_code(TrapCode::HeapAccessOutOfBounds, begin, end);
579
580        self.release_gpr(tmp_addr);
581        Ok(())
582    }
583
584    #[allow(clippy::too_many_arguments)]
585    fn emit_compare_and_swap<F: FnOnce(&mut Self, GPR, GPR) -> Result<(), CompileError>>(
586        &mut self,
587        loc: Location,
588        target: Location,
589        ret: Location,
590        memarg: &MemArg,
591        value_size: usize,
592        memory_sz: Size,
593        stack_sz: Size,
594        need_check: bool,
595        imported_memories: bool,
596        offset: i32,
597        heap_access_oob: Label,
598        unaligned_atomic: Label,
599        cb: F,
600    ) -> Result<(), CompileError> {
601        if memory_sz > stack_sz {
602            codegen_error!("singlepass emit_compare_and_swap unreachable");
603        }
604
605        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
606        let value = if loc == Location::GPR(GPR::R14) {
607            GPR::R13
608        } else {
609            GPR::R14
610        };
611        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
612
613        self.move_location(stack_sz, loc, Location::GPR(value))?;
614
615        let retry = self.assembler.get_label();
616        self.emit_label(retry)?;
617
618        self.memory_op(
619            target,
620            memarg,
621            true,
622            value_size,
623            need_check,
624            imported_memories,
625            offset,
626            heap_access_oob,
627            unaligned_atomic,
628            |this, addr| {
629                this.load_address(memory_sz, Location::GPR(compare), Location::Memory(addr, 0))?;
630                this.move_location(stack_sz, Location::GPR(compare), ret)?;
631                cb(this, compare, value)?;
632                this.assembler.emit_lock_cmpxchg(
633                    memory_sz,
634                    Location::GPR(value),
635                    Location::Memory(addr, 0),
636                )
637            },
638        )?;
639
640        self.assembler.emit_jmp(Condition::NotEqual, retry)?;
641
642        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
643        self.release_gpr(compare);
644        Ok(())
645    }
646
647    // Checks for underflow/overflow/nan.
648    #[allow(clippy::too_many_arguments)]
649    fn emit_f32_int_conv_check(
650        &mut self,
651        reg: XMM,
652        lower_bound: f32,
653        upper_bound: f32,
654        underflow_label: Label,
655        overflow_label: Label,
656        nan_label: Label,
657        succeed_label: Label,
658    ) -> Result<(), CompileError> {
659        let lower_bound = f32::to_bits(lower_bound);
660        let upper_bound = f32::to_bits(upper_bound);
661
662        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
663            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
664        })?;
665        let tmp_x = self.acquire_temp_simd().ok_or_else(|| {
666            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
667        })?;
668
669        // Underflow.
670        self.move_location(Size::S32, Location::Imm32(lower_bound), Location::GPR(tmp))?;
671        self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x))?;
672        self.assembler
673            .emit_vcmpless(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
674        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
675        self.assembler
676            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
677        self.assembler
678            .emit_jmp(Condition::NotEqual, underflow_label)?;
679
680        // Overflow.
681        self.move_location(Size::S32, Location::Imm32(upper_bound), Location::GPR(tmp))?;
682        self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x))?;
683        self.assembler
684            .emit_vcmpgess(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
685        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
686        self.assembler
687            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
688        self.assembler
689            .emit_jmp(Condition::NotEqual, overflow_label)?;
690
691        // NaN.
692        self.assembler
693            .emit_vcmpeqss(reg, XMMOrMemory::XMM(reg), tmp_x)?;
694        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
695        self.assembler
696            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
697        self.assembler.emit_jmp(Condition::Equal, nan_label)?;
698
699        self.assembler.emit_jmp(Condition::None, succeed_label)?;
700
701        self.release_simd(tmp_x);
702        self.release_gpr(tmp);
703        Ok(())
704    }
705
706    // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32.
707    fn emit_f32_int_conv_check_trap(
708        &mut self,
709        reg: XMM,
710        lower_bound: f32,
711        upper_bound: f32,
712    ) -> Result<(), CompileError> {
713        let trap_overflow = self.assembler.get_label();
714        let trap_badconv = self.assembler.get_label();
715        let end = self.assembler.get_label();
716
717        self.emit_f32_int_conv_check(
718            reg,
719            lower_bound,
720            upper_bound,
721            trap_overflow,
722            trap_overflow,
723            trap_badconv,
724            end,
725        )?;
726
727        self.emit_label(trap_overflow)?;
728
729        self.emit_illegal_op_internal(TrapCode::IntegerOverflow)?;
730
731        self.emit_label(trap_badconv)?;
732
733        self.emit_illegal_op_internal(TrapCode::BadConversionToInteger)?;
734
735        self.emit_label(end)?;
736        Ok(())
737    }
738    #[allow(clippy::too_many_arguments)]
739    fn emit_f32_int_conv_check_sat<
740        F1: FnOnce(&mut Self) -> Result<(), CompileError>,
741        F2: FnOnce(&mut Self) -> Result<(), CompileError>,
742        F3: FnOnce(&mut Self) -> Result<(), CompileError>,
743        F4: FnOnce(&mut Self) -> Result<(), CompileError>,
744    >(
745        &mut self,
746        reg: XMM,
747        lower_bound: f32,
748        upper_bound: f32,
749        underflow_cb: F1,
750        overflow_cb: F2,
751        nan_cb: Option<F3>,
752        convert_cb: F4,
753    ) -> Result<(), CompileError> {
754        // As an optimization nan_cb is optional, and when set to None we turn
755        // use 'underflow' as the 'nan' label. This is useful for callers who
756        // set the return value to zero for both underflow and nan.
757
758        let underflow = self.assembler.get_label();
759        let overflow = self.assembler.get_label();
760        let nan = if nan_cb.is_some() {
761            self.assembler.get_label()
762        } else {
763            underflow
764        };
765        let convert = self.assembler.get_label();
766        let end = self.assembler.get_label();
767
768        self.emit_f32_int_conv_check(
769            reg,
770            lower_bound,
771            upper_bound,
772            underflow,
773            overflow,
774            nan,
775            convert,
776        )?;
777
778        self.emit_label(underflow)?;
779        underflow_cb(self)?;
780        self.assembler.emit_jmp(Condition::None, end)?;
781
782        self.emit_label(overflow)?;
783        overflow_cb(self)?;
784        self.assembler.emit_jmp(Condition::None, end)?;
785
786        if let Some(cb) = nan_cb {
787            self.emit_label(nan)?;
788            cb(self)?;
789            self.assembler.emit_jmp(Condition::None, end)?;
790        }
791
792        self.emit_label(convert)?;
793        convert_cb(self)?;
794        self.emit_label(end)
795    }
796    // Checks for underflow/overflow/nan.
797    #[allow(clippy::too_many_arguments)]
798    fn emit_f64_int_conv_check(
799        &mut self,
800        reg: XMM,
801        lower_bound: f64,
802        upper_bound: f64,
803        underflow_label: Label,
804        overflow_label: Label,
805        nan_label: Label,
806        succeed_label: Label,
807    ) -> Result<(), CompileError> {
808        let lower_bound = f64::to_bits(lower_bound);
809        let upper_bound = f64::to_bits(upper_bound);
810
811        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
812            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
813        })?;
814        let tmp_x = self.acquire_temp_simd().ok_or_else(|| {
815            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
816        })?;
817
818        // Underflow.
819        self.move_location(Size::S64, Location::Imm64(lower_bound), Location::GPR(tmp))?;
820        self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x))?;
821        self.assembler
822            .emit_vcmplesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
823        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
824        self.assembler
825            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
826        self.assembler
827            .emit_jmp(Condition::NotEqual, underflow_label)?;
828
829        // Overflow.
830        self.move_location(Size::S64, Location::Imm64(upper_bound), Location::GPR(tmp))?;
831        self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x))?;
832        self.assembler
833            .emit_vcmpgesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
834        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
835        self.assembler
836            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
837        self.assembler
838            .emit_jmp(Condition::NotEqual, overflow_label)?;
839
840        // NaN.
841        self.assembler
842            .emit_vcmpeqsd(reg, XMMOrMemory::XMM(reg), tmp_x)?;
843        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
844        self.assembler
845            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
846        self.assembler.emit_jmp(Condition::Equal, nan_label)?;
847
848        self.assembler.emit_jmp(Condition::None, succeed_label)?;
849
850        self.release_simd(tmp_x);
851        self.release_gpr(tmp);
852        Ok(())
853    }
854    // Checks for underflow/overflow/nan before IxxTrunc{U/S}F64.. return offset/len for trap_overflow and trap_badconv
855    fn emit_f64_int_conv_check_trap(
856        &mut self,
857        reg: XMM,
858        lower_bound: f64,
859        upper_bound: f64,
860    ) -> Result<(), CompileError> {
861        let trap_overflow = self.assembler.get_label();
862        let trap_badconv = self.assembler.get_label();
863        let end = self.assembler.get_label();
864
865        self.emit_f64_int_conv_check(
866            reg,
867            lower_bound,
868            upper_bound,
869            trap_overflow,
870            trap_overflow,
871            trap_badconv,
872            end,
873        )?;
874
875        self.emit_label(trap_overflow)?;
876        self.emit_illegal_op_internal(TrapCode::IntegerOverflow)?;
877
878        self.emit_label(trap_badconv)?;
879        self.emit_illegal_op_internal(TrapCode::BadConversionToInteger)?;
880
881        self.emit_label(end)
882    }
883    #[allow(clippy::too_many_arguments)]
884    fn emit_f64_int_conv_check_sat<
885        F1: FnOnce(&mut Self) -> Result<(), CompileError>,
886        F2: FnOnce(&mut Self) -> Result<(), CompileError>,
887        F3: FnOnce(&mut Self) -> Result<(), CompileError>,
888        F4: FnOnce(&mut Self) -> Result<(), CompileError>,
889    >(
890        &mut self,
891        reg: XMM,
892        lower_bound: f64,
893        upper_bound: f64,
894        underflow_cb: F1,
895        overflow_cb: F2,
896        nan_cb: Option<F3>,
897        convert_cb: F4,
898    ) -> Result<(), CompileError> {
899        // As an optimization nan_cb is optional, and when set to None we turn
900        // use 'underflow' as the 'nan' label. This is useful for callers who
901        // set the return value to zero for both underflow and nan.
902
903        let underflow = self.assembler.get_label();
904        let overflow = self.assembler.get_label();
905        let nan = if nan_cb.is_some() {
906            self.assembler.get_label()
907        } else {
908            underflow
909        };
910        let convert = self.assembler.get_label();
911        let end = self.assembler.get_label();
912
913        self.emit_f64_int_conv_check(
914            reg,
915            lower_bound,
916            upper_bound,
917            underflow,
918            overflow,
919            nan,
920            convert,
921        )?;
922
923        self.emit_label(underflow)?;
924        underflow_cb(self)?;
925        self.assembler.emit_jmp(Condition::None, end)?;
926
927        self.emit_label(overflow)?;
928        overflow_cb(self)?;
929        self.assembler.emit_jmp(Condition::None, end)?;
930
931        if let Some(cb) = nan_cb {
932            self.emit_label(nan)?;
933            cb(self)?;
934            self.assembler.emit_jmp(Condition::None, end)?;
935        }
936
937        self.emit_label(convert)?;
938        convert_cb(self)?;
939        self.emit_label(end)
940    }
941    /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
942    fn emit_relaxed_avx(
943        &mut self,
944        op: fn(&mut AssemblerX64, XMM, XMMOrMemory, XMM) -> Result<(), CompileError>,
945        src1: Location,
946        src2: Location,
947        dst: Location,
948    ) -> Result<(), CompileError> {
949        self.emit_relaxed_avx_base(
950            |this, src1, src2, dst| op(&mut this.assembler, src1, src2, dst),
951            src1,
952            src2,
953            dst,
954        )
955    }
956
957    /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
958    fn emit_relaxed_avx_base<
959        F: FnOnce(&mut Self, XMM, XMMOrMemory, XMM) -> Result<(), CompileError>,
960    >(
961        &mut self,
962        op: F,
963        src1: Location,
964        src2: Location,
965        dst: Location,
966    ) -> Result<(), CompileError> {
967        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
968            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
969        })?;
970        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
971            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
972        })?;
973        let tmp3 = self.acquire_temp_simd().ok_or_else(|| {
974            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
975        })?;
976        let tmpg = self.acquire_temp_gpr().ok_or_else(|| {
977            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
978        })?;
979
980        let src1 = match src1 {
981            Location::SIMD(x) => x,
982            Location::GPR(_) | Location::Memory(_, _) => {
983                self.assembler
984                    .emit_mov(Size::S64, src1, Location::SIMD(tmp1))?;
985                tmp1
986            }
987            Location::Imm32(_) => {
988                self.assembler
989                    .emit_mov(Size::S32, src1, Location::GPR(tmpg))?;
990                self.move_location(Size::S32, Location::GPR(tmpg), Location::SIMD(tmp1))?;
991                tmp1
992            }
993            Location::Imm64(_) => {
994                self.assembler
995                    .emit_mov(Size::S64, src1, Location::GPR(tmpg))?;
996                self.move_location(Size::S64, Location::GPR(tmpg), Location::SIMD(tmp1))?;
997                tmp1
998            }
999            _ => {
1000                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1001            }
1002        };
1003
1004        let src2 = match src2 {
1005            Location::SIMD(x) => XMMOrMemory::XMM(x),
1006            Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp),
1007            Location::GPR(_) => {
1008                self.assembler
1009                    .emit_mov(Size::S64, src2, Location::SIMD(tmp2))?;
1010                XMMOrMemory::XMM(tmp2)
1011            }
1012            Location::Imm32(_) => {
1013                self.assembler
1014                    .emit_mov(Size::S32, src2, Location::GPR(tmpg))?;
1015                self.move_location(Size::S32, Location::GPR(tmpg), Location::SIMD(tmp2))?;
1016                XMMOrMemory::XMM(tmp2)
1017            }
1018            Location::Imm64(_) => {
1019                self.assembler
1020                    .emit_mov(Size::S64, src2, Location::GPR(tmpg))?;
1021                self.move_location(Size::S64, Location::GPR(tmpg), Location::SIMD(tmp2))?;
1022                XMMOrMemory::XMM(tmp2)
1023            }
1024            _ => {
1025                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1026            }
1027        };
1028
1029        match dst {
1030            Location::SIMD(x) => {
1031                op(self, src1, src2, x)?;
1032            }
1033            Location::Memory(_, _) | Location::GPR(_) => {
1034                op(self, src1, src2, tmp3)?;
1035                self.assembler
1036                    .emit_mov(Size::S64, Location::SIMD(tmp3), dst)?;
1037            }
1038            _ => {
1039                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1040            }
1041        }
1042
1043        self.release_gpr(tmpg);
1044        self.release_simd(tmp3);
1045        self.release_simd(tmp2);
1046        self.release_simd(tmp1);
1047        Ok(())
1048    }
1049
1050    fn convert_i64_f64_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1051        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1052            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1053        })?;
1054        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1055            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1056        })?;
1057
1058        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1059        self.emit_f64_int_conv_check_sat(
1060            tmp_in,
1061            GEF64_LT_U64_MIN,
1062            LEF64_GT_U64_MAX,
1063            |this| {
1064                this.assembler
1065                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1066            },
1067            |this| {
1068                this.assembler.emit_mov(
1069                    Size::S64,
1070                    Location::Imm64(u64::MAX),
1071                    Location::GPR(tmp_out),
1072                )
1073            },
1074            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1075            |this| {
1076                if this.assembler.arch_has_itruncf() {
1077                    this.assembler.arch_emit_i64_trunc_uf64(tmp_in, tmp_out)
1078                } else {
1079                    let tmp = this.acquire_temp_gpr().ok_or_else(|| {
1080                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1081                    })?;
1082                    let tmp_x1 = this.acquire_temp_simd().ok_or_else(|| {
1083                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1084                    })?;
1085                    let tmp_x2 = this.acquire_temp_simd().ok_or_else(|| {
1086                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1087                    })?;
1088
1089                    this.assembler.emit_mov(
1090                        Size::S64,
1091                        Location::Imm64(4890909195324358656u64),
1092                        Location::GPR(tmp),
1093                    )?; //double 9.2233720368547758E+18
1094                    this.assembler.emit_mov(
1095                        Size::S64,
1096                        Location::GPR(tmp),
1097                        Location::SIMD(tmp_x1),
1098                    )?;
1099                    this.assembler.emit_mov(
1100                        Size::S64,
1101                        Location::SIMD(tmp_in),
1102                        Location::SIMD(tmp_x2),
1103                    )?;
1104                    this.assembler
1105                        .emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1106                    this.assembler
1107                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1108                    this.assembler.emit_mov(
1109                        Size::S64,
1110                        Location::Imm64(0x8000000000000000u64),
1111                        Location::GPR(tmp),
1112                    )?;
1113                    this.assembler.emit_xor(
1114                        Size::S64,
1115                        Location::GPR(tmp_out),
1116                        Location::GPR(tmp),
1117                    )?;
1118                    this.assembler
1119                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1120                    this.assembler
1121                        .emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1122                    this.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1123
1124                    this.release_simd(tmp_x2);
1125                    this.release_simd(tmp_x1);
1126                    this.release_gpr(tmp);
1127                    Ok(())
1128                }
1129            },
1130        )?;
1131
1132        self.assembler
1133            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1134        self.release_simd(tmp_in);
1135        self.release_gpr(tmp_out);
1136        Ok(())
1137    }
1138    fn convert_i64_f64_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1139        if self.assembler.arch_has_itruncf() {
1140            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1141                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1142            })?;
1143            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1144                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1145            })?;
1146            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1147            self.assembler.arch_emit_i64_trunc_uf64(tmp_in, tmp_out)?;
1148            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1149            self.release_simd(tmp_in);
1150            self.release_gpr(tmp_out);
1151        } else {
1152            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1153                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1154            })?;
1155            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1156                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1157            })?; // xmm2
1158
1159            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1160            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_U64_MIN, LEF64_GT_U64_MAX)?;
1161
1162            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
1163                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1164            })?; // r15
1165            let tmp_x1 = self.acquire_temp_simd().ok_or_else(|| {
1166                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1167            })?; // xmm1
1168            let tmp_x2 = self.acquire_temp_simd().ok_or_else(|| {
1169                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1170            })?; // xmm3
1171
1172            self.move_location(
1173                Size::S64,
1174                Location::Imm64(4890909195324358656u64),
1175                Location::GPR(tmp),
1176            )?; //double 9.2233720368547758E+18
1177            self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x1))?;
1178            self.move_location(Size::S64, Location::SIMD(tmp_in), Location::SIMD(tmp_x2))?;
1179            self.assembler
1180                .emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1181            self.assembler
1182                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1183            self.move_location(
1184                Size::S64,
1185                Location::Imm64(0x8000000000000000u64),
1186                Location::GPR(tmp),
1187            )?;
1188            self.assembler
1189                .emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp))?;
1190            self.assembler
1191                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1192            self.assembler
1193                .emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1194            self.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1195            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1196
1197            self.release_simd(tmp_x2);
1198            self.release_simd(tmp_x1);
1199            self.release_gpr(tmp);
1200            self.release_simd(tmp_in);
1201            self.release_gpr(tmp_out);
1202        }
1203        Ok(())
1204    }
1205    fn convert_i64_f64_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1206        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1207            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1208        })?;
1209        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1210            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1211        })?;
1212
1213        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1214        self.emit_f64_int_conv_check_sat(
1215            tmp_in,
1216            GEF64_LT_I64_MIN,
1217            LEF64_GT_I64_MAX,
1218            |this| {
1219                this.assembler.emit_mov(
1220                    Size::S64,
1221                    Location::Imm64(i64::MIN as u64),
1222                    Location::GPR(tmp_out),
1223                )
1224            },
1225            |this| {
1226                this.assembler.emit_mov(
1227                    Size::S64,
1228                    Location::Imm64(i64::MAX as u64),
1229                    Location::GPR(tmp_out),
1230                )
1231            },
1232            Some(|this: &mut Self| {
1233                this.assembler
1234                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1235            }),
1236            |this| {
1237                if this.assembler.arch_has_itruncf() {
1238                    this.assembler.arch_emit_i64_trunc_sf64(tmp_in, tmp_out)
1239                } else {
1240                    this.assembler
1241                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1242                }
1243            },
1244        )?;
1245
1246        self.assembler
1247            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1248        self.release_simd(tmp_in);
1249        self.release_gpr(tmp_out);
1250        Ok(())
1251    }
1252    fn convert_i64_f64_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1253        if self.assembler.arch_has_itruncf() {
1254            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1255                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1256            })?;
1257            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1258                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1259            })?;
1260            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1261            self.assembler.arch_emit_i64_trunc_sf64(tmp_in, tmp_out)?;
1262            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1263            self.release_simd(tmp_in);
1264            self.release_gpr(tmp_out);
1265        } else {
1266            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1267                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1268            })?;
1269            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1270                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1271            })?;
1272
1273            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1274            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_I64_MIN, LEF64_GT_I64_MAX)?;
1275
1276            self.assembler
1277                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1278            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1279
1280            self.release_simd(tmp_in);
1281            self.release_gpr(tmp_out);
1282        }
1283        Ok(())
1284    }
1285    fn convert_i32_f64_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1286        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1287            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1288        })?;
1289        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1290            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1291        })?;
1292
1293        let real_in = match loc {
1294            Location::Imm32(_) | Location::Imm64(_) => {
1295                self.move_location(Size::S64, loc, Location::GPR(tmp_out))?;
1296                self.move_location(Size::S64, Location::GPR(tmp_out), Location::SIMD(tmp_in))?;
1297                tmp_in
1298            }
1299            Location::SIMD(x) => x,
1300            _ => {
1301                self.move_location(Size::S64, loc, Location::SIMD(tmp_in))?;
1302                tmp_in
1303            }
1304        };
1305
1306        self.emit_f64_int_conv_check_sat(
1307            real_in,
1308            GEF64_LT_I32_MIN,
1309            LEF64_GT_I32_MAX,
1310            |this| {
1311                this.assembler.emit_mov(
1312                    Size::S32,
1313                    Location::Imm32(i32::MIN as u32),
1314                    Location::GPR(tmp_out),
1315                )
1316            },
1317            |this| {
1318                this.assembler.emit_mov(
1319                    Size::S32,
1320                    Location::Imm32(i32::MAX as u32),
1321                    Location::GPR(tmp_out),
1322                )
1323            },
1324            Some(|this: &mut Self| {
1325                this.assembler
1326                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1327            }),
1328            |this| {
1329                if this.assembler.arch_has_itruncf() {
1330                    this.assembler.arch_emit_i32_trunc_sf64(tmp_in, tmp_out)
1331                } else {
1332                    this.assembler
1333                        .emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out)
1334                }
1335            },
1336        )?;
1337
1338        self.assembler
1339            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1340        self.release_simd(tmp_in);
1341        self.release_gpr(tmp_out);
1342        Ok(())
1343    }
1344    fn convert_i32_f64_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1345        if self.assembler.arch_has_itruncf() {
1346            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1347                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1348            })?;
1349            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1350                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1351            })?;
1352            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1353            self.assembler.arch_emit_i32_trunc_sf64(tmp_in, tmp_out)?;
1354            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1355            self.release_simd(tmp_in);
1356            self.release_gpr(tmp_out);
1357        } else {
1358            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1359                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1360            })?;
1361            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1362                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1363            })?;
1364
1365            let real_in = match loc {
1366                Location::Imm32(_) | Location::Imm64(_) => {
1367                    self.move_location(Size::S64, loc, Location::GPR(tmp_out))?;
1368                    self.move_location(Size::S64, Location::GPR(tmp_out), Location::SIMD(tmp_in))?;
1369                    tmp_in
1370                }
1371                Location::SIMD(x) => x,
1372                _ => {
1373                    self.move_location(Size::S64, loc, Location::SIMD(tmp_in))?;
1374                    tmp_in
1375                }
1376            };
1377
1378            self.emit_f64_int_conv_check_trap(real_in, GEF64_LT_I32_MIN, LEF64_GT_I32_MAX)?;
1379
1380            self.assembler
1381                .emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out)?;
1382            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1383
1384            self.release_simd(tmp_in);
1385            self.release_gpr(tmp_out);
1386        }
1387        Ok(())
1388    }
1389    fn convert_i32_f64_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1390        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1391            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1392        })?;
1393        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1394            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1395        })?;
1396
1397        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1398        self.emit_f64_int_conv_check_sat(
1399            tmp_in,
1400            GEF64_LT_U32_MIN,
1401            LEF64_GT_U32_MAX,
1402            |this| {
1403                this.assembler
1404                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1405            },
1406            |this| {
1407                this.assembler.emit_mov(
1408                    Size::S32,
1409                    Location::Imm32(u32::MAX),
1410                    Location::GPR(tmp_out),
1411                )
1412            },
1413            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1414            |this| {
1415                if this.assembler.arch_has_itruncf() {
1416                    this.assembler.arch_emit_i32_trunc_uf64(tmp_in, tmp_out)
1417                } else {
1418                    this.assembler
1419                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1420                }
1421            },
1422        )?;
1423
1424        self.assembler
1425            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1426        self.release_simd(tmp_in);
1427        self.release_gpr(tmp_out);
1428        Ok(())
1429    }
1430    fn convert_i32_f64_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1431        if self.assembler.arch_has_itruncf() {
1432            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1433                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1434            })?;
1435            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1436                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1437            })?;
1438            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1439            self.assembler.arch_emit_i32_trunc_uf64(tmp_in, tmp_out)?;
1440            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1441            self.release_simd(tmp_in);
1442            self.release_gpr(tmp_out);
1443        } else {
1444            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1445                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1446            })?;
1447            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1448                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1449            })?;
1450
1451            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1452            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_U32_MIN, LEF64_GT_U32_MAX)?;
1453
1454            self.assembler
1455                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1456            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1457
1458            self.release_simd(tmp_in);
1459            self.release_gpr(tmp_out);
1460        }
1461        Ok(())
1462    }
1463    fn convert_i64_f32_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1464        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1465            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1466        })?;
1467        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1468            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1469        })?;
1470
1471        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1472        self.emit_f32_int_conv_check_sat(
1473            tmp_in,
1474            GEF32_LT_U64_MIN,
1475            LEF32_GT_U64_MAX,
1476            |this| {
1477                this.assembler
1478                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1479            },
1480            |this| {
1481                this.assembler.emit_mov(
1482                    Size::S64,
1483                    Location::Imm64(u64::MAX),
1484                    Location::GPR(tmp_out),
1485                )
1486            },
1487            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1488            |this| {
1489                if this.assembler.arch_has_itruncf() {
1490                    this.assembler.arch_emit_i64_trunc_uf32(tmp_in, tmp_out)
1491                } else {
1492                    let tmp = this.acquire_temp_gpr().ok_or_else(|| {
1493                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1494                    })?;
1495                    let tmp_x1 = this.acquire_temp_simd().ok_or_else(|| {
1496                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1497                    })?;
1498                    let tmp_x2 = this.acquire_temp_simd().ok_or_else(|| {
1499                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1500                    })?;
1501
1502                    this.assembler.emit_mov(
1503                        Size::S32,
1504                        Location::Imm32(1593835520u32),
1505                        Location::GPR(tmp),
1506                    )?; //float 9.22337203E+18
1507                    this.assembler.emit_mov(
1508                        Size::S32,
1509                        Location::GPR(tmp),
1510                        Location::SIMD(tmp_x1),
1511                    )?;
1512                    this.assembler.emit_mov(
1513                        Size::S32,
1514                        Location::SIMD(tmp_in),
1515                        Location::SIMD(tmp_x2),
1516                    )?;
1517                    this.assembler
1518                        .emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1519                    this.assembler
1520                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1521                    this.assembler.emit_mov(
1522                        Size::S64,
1523                        Location::Imm64(0x8000000000000000u64),
1524                        Location::GPR(tmp),
1525                    )?;
1526                    this.assembler.emit_xor(
1527                        Size::S64,
1528                        Location::GPR(tmp_out),
1529                        Location::GPR(tmp),
1530                    )?;
1531                    this.assembler
1532                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1533                    this.assembler
1534                        .emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1535                    this.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1536
1537                    this.release_simd(tmp_x2);
1538                    this.release_simd(tmp_x1);
1539                    this.release_gpr(tmp);
1540                    Ok(())
1541                }
1542            },
1543        )?;
1544
1545        self.assembler
1546            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1547        self.release_simd(tmp_in);
1548        self.release_gpr(tmp_out);
1549        Ok(())
1550    }
1551    fn convert_i64_f32_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1552        if self.assembler.arch_has_itruncf() {
1553            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1554                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1555            })?;
1556            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1557                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1558            })?;
1559            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1560            self.assembler.arch_emit_i64_trunc_uf32(tmp_in, tmp_out)?;
1561            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1562            self.release_simd(tmp_in);
1563            self.release_gpr(tmp_out);
1564        } else {
1565            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1566                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1567            })?;
1568            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1569                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1570            })?; // xmm2
1571
1572            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1573            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_U64_MIN, LEF32_GT_U64_MAX)?;
1574
1575            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
1576                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1577            })?; // r15
1578            let tmp_x1 = self.acquire_temp_simd().ok_or_else(|| {
1579                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1580            })?; // xmm1
1581            let tmp_x2 = self.acquire_temp_simd().ok_or_else(|| {
1582                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1583            })?; // xmm3
1584
1585            self.move_location(
1586                Size::S32,
1587                Location::Imm32(1593835520u32),
1588                Location::GPR(tmp),
1589            )?; //float 9.22337203E+18
1590            self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x1))?;
1591            self.move_location(Size::S32, Location::SIMD(tmp_in), Location::SIMD(tmp_x2))?;
1592            self.assembler
1593                .emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1594            self.assembler
1595                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1596            self.move_location(
1597                Size::S64,
1598                Location::Imm64(0x8000000000000000u64),
1599                Location::GPR(tmp),
1600            )?;
1601            self.assembler
1602                .emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp))?;
1603            self.assembler
1604                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1605            self.assembler
1606                .emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1607            self.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1608            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1609
1610            self.release_simd(tmp_x2);
1611            self.release_simd(tmp_x1);
1612            self.release_gpr(tmp);
1613            self.release_simd(tmp_in);
1614            self.release_gpr(tmp_out);
1615        }
1616        Ok(())
1617    }
1618    fn convert_i64_f32_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1619        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1620            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1621        })?;
1622        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1623            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1624        })?;
1625
1626        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1627        self.emit_f32_int_conv_check_sat(
1628            tmp_in,
1629            GEF32_LT_I64_MIN,
1630            LEF32_GT_I64_MAX,
1631            |this| {
1632                this.assembler.emit_mov(
1633                    Size::S64,
1634                    Location::Imm64(i64::MIN as u64),
1635                    Location::GPR(tmp_out),
1636                )
1637            },
1638            |this| {
1639                this.assembler.emit_mov(
1640                    Size::S64,
1641                    Location::Imm64(i64::MAX as u64),
1642                    Location::GPR(tmp_out),
1643                )
1644            },
1645            Some(|this: &mut Self| {
1646                this.assembler
1647                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1648            }),
1649            |this| {
1650                if this.assembler.arch_has_itruncf() {
1651                    this.assembler.arch_emit_i64_trunc_sf32(tmp_in, tmp_out)
1652                } else {
1653                    this.assembler
1654                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1655                }
1656            },
1657        )?;
1658
1659        self.assembler
1660            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1661        self.release_simd(tmp_in);
1662        self.release_gpr(tmp_out);
1663        Ok(())
1664    }
1665    fn convert_i64_f32_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1666        if self.assembler.arch_has_itruncf() {
1667            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1668                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1669            })?;
1670            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1671                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1672            })?;
1673            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1674            self.assembler.arch_emit_i64_trunc_sf32(tmp_in, tmp_out)?;
1675            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1676            self.release_simd(tmp_in);
1677            self.release_gpr(tmp_out);
1678        } else {
1679            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1680                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1681            })?;
1682            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1683                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1684            })?;
1685
1686            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1687            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_I64_MIN, LEF32_GT_I64_MAX)?;
1688            self.assembler
1689                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1690            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1691
1692            self.release_simd(tmp_in);
1693            self.release_gpr(tmp_out);
1694        }
1695        Ok(())
1696    }
1697    fn convert_i32_f32_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1698        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1699            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1700        })?;
1701        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1702            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1703        })?;
1704
1705        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1706        self.emit_f32_int_conv_check_sat(
1707            tmp_in,
1708            GEF32_LT_I32_MIN,
1709            LEF32_GT_I32_MAX,
1710            |this| {
1711                this.assembler.emit_mov(
1712                    Size::S32,
1713                    Location::Imm32(i32::MIN as u32),
1714                    Location::GPR(tmp_out),
1715                )
1716            },
1717            |this| {
1718                this.assembler.emit_mov(
1719                    Size::S32,
1720                    Location::Imm32(i32::MAX as u32),
1721                    Location::GPR(tmp_out),
1722                )
1723            },
1724            Some(|this: &mut Self| {
1725                this.assembler
1726                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1727            }),
1728            |this| {
1729                if this.assembler.arch_has_itruncf() {
1730                    this.assembler.arch_emit_i32_trunc_sf32(tmp_in, tmp_out)
1731                } else {
1732                    this.assembler
1733                        .emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out)
1734                }
1735            },
1736        )?;
1737
1738        self.assembler
1739            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1740        self.release_simd(tmp_in);
1741        self.release_gpr(tmp_out);
1742        Ok(())
1743    }
1744    fn convert_i32_f32_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1745        if self.assembler.arch_has_itruncf() {
1746            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1747                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1748            })?;
1749            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1750                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1751            })?;
1752            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1753            self.assembler.arch_emit_i32_trunc_sf32(tmp_in, tmp_out)?;
1754            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1755            self.release_simd(tmp_in);
1756            self.release_gpr(tmp_out);
1757        } else {
1758            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1759                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1760            })?;
1761            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1762                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1763            })?;
1764
1765            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1766            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_I32_MIN, LEF32_GT_I32_MAX)?;
1767
1768            self.assembler
1769                .emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1770            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1771
1772            self.release_simd(tmp_in);
1773            self.release_gpr(tmp_out);
1774        }
1775        Ok(())
1776    }
1777    fn convert_i32_f32_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1778        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1779            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1780        })?;
1781        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1782            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1783        })?;
1784        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1785        self.emit_f32_int_conv_check_sat(
1786            tmp_in,
1787            GEF32_LT_U32_MIN,
1788            LEF32_GT_U32_MAX,
1789            |this| {
1790                this.assembler
1791                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1792            },
1793            |this| {
1794                this.assembler.emit_mov(
1795                    Size::S32,
1796                    Location::Imm32(u32::MAX),
1797                    Location::GPR(tmp_out),
1798                )
1799            },
1800            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1801            |this| {
1802                if this.assembler.arch_has_itruncf() {
1803                    this.assembler.arch_emit_i32_trunc_uf32(tmp_in, tmp_out)
1804                } else {
1805                    this.assembler
1806                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1807                }
1808            },
1809        )?;
1810
1811        self.assembler
1812            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1813        self.release_simd(tmp_in);
1814        self.release_gpr(tmp_out);
1815        Ok(())
1816    }
1817    fn convert_i32_f32_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1818        if self.assembler.arch_has_itruncf() {
1819            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1820                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1821            })?;
1822            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1823                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1824            })?;
1825            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1826            self.assembler.arch_emit_i32_trunc_uf32(tmp_in, tmp_out)?;
1827            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1828            self.release_simd(tmp_in);
1829            self.release_gpr(tmp_out);
1830        } else {
1831            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1832                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1833            })?;
1834            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1835                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1836            })?;
1837            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1838            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_U32_MIN, LEF32_GT_U32_MAX)?;
1839
1840            self.assembler
1841                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1842            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1843
1844            self.release_simd(tmp_in);
1845            self.release_gpr(tmp_out);
1846        }
1847        Ok(())
1848    }
1849
1850    fn emit_relaxed_atomic_xchg(
1851        &mut self,
1852        sz: Size,
1853        src: Location,
1854        dst: Location,
1855    ) -> Result<(), CompileError> {
1856        self.emit_relaxed_binop(AssemblerX64::emit_xchg, sz, src, dst)
1857    }
1858
1859    fn used_gprs_contains(&self, r: &GPR) -> bool {
1860        self.used_gprs & (1 << r.into_index()) != 0
1861    }
1862    fn used_simd_contains(&self, r: &XMM) -> bool {
1863        self.used_simd & (1 << r.into_index()) != 0
1864    }
1865    fn used_gprs_insert(&mut self, r: GPR) {
1866        self.used_gprs |= 1 << r.into_index();
1867    }
1868    fn used_simd_insert(&mut self, r: XMM) {
1869        self.used_simd |= 1 << r.into_index();
1870    }
1871    fn used_gprs_remove(&mut self, r: &GPR) -> bool {
1872        let ret = self.used_gprs_contains(r);
1873        self.used_gprs &= !(1 << r.into_index());
1874        ret
1875    }
1876    fn used_simd_remove(&mut self, r: &XMM) -> bool {
1877        let ret = self.used_simd_contains(r);
1878        self.used_simd &= !(1 << r.into_index());
1879        ret
1880    }
1881    fn emit_unwind_op(&mut self, op: UnwindOps<GPR, XMM>) -> Result<(), CompileError> {
1882        self.unwind_ops.push((self.get_offset().0, op));
1883        Ok(())
1884    }
1885    fn emit_illegal_op_internal(&mut self, trap: TrapCode) -> Result<(), CompileError> {
1886        let v = trap as u8;
1887        self.assembler.emit_ud1_payload(v)
1888    }
1889}
1890
1891impl Machine for MachineX86_64 {
1892    type GPR = GPR;
1893    type SIMD = XMM;
1894    fn assembler_get_offset(&self) -> Offset {
1895        self.assembler.get_offset()
1896    }
1897    fn index_from_gpr(&self, x: GPR) -> RegisterIndex {
1898        RegisterIndex(x as usize)
1899    }
1900    fn index_from_simd(&self, x: XMM) -> RegisterIndex {
1901        RegisterIndex(x as usize + 16)
1902    }
1903
1904    fn get_vmctx_reg(&self) -> GPR {
1905        GPR::R15
1906    }
1907
1908    fn get_used_gprs(&self) -> Vec<GPR> {
1909        GPR::iterator()
1910            .filter(|x| self.used_gprs & (1 << x.into_index()) != 0)
1911            .cloned()
1912            .collect()
1913    }
1914
1915    fn get_used_simd(&self) -> Vec<XMM> {
1916        XMM::iterator()
1917            .filter(|x| self.used_simd & (1 << x.into_index()) != 0)
1918            .cloned()
1919            .collect()
1920    }
1921
1922    fn pick_gpr(&self) -> Option<GPR> {
1923        use GPR::*;
1924        static REGS: &[GPR] = &[RSI, RDI, R8, R9, R10, R11];
1925        for r in REGS {
1926            if !self.used_gprs_contains(r) {
1927                return Some(*r);
1928            }
1929        }
1930        None
1931    }
1932
1933    // Picks an unused general purpose register for internal temporary use.
1934    fn pick_temp_gpr(&self) -> Option<GPR> {
1935        use GPR::*;
1936        static REGS: &[GPR] = &[RAX, RCX, RDX];
1937        for r in REGS {
1938            if !self.used_gprs_contains(r) {
1939                return Some(*r);
1940            }
1941        }
1942        None
1943    }
1944
1945    fn acquire_temp_gpr(&mut self) -> Option<GPR> {
1946        let gpr = self.pick_temp_gpr();
1947        if let Some(x) = gpr {
1948            self.used_gprs_insert(x);
1949        }
1950        gpr
1951    }
1952
1953    fn release_gpr(&mut self, gpr: GPR) {
1954        assert!(self.used_gprs_remove(&gpr));
1955    }
1956
1957    fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR {
1958        assert!(!self.used_gprs_contains(&gpr));
1959        self.used_gprs_insert(gpr);
1960        gpr
1961    }
1962
1963    fn reserve_gpr(&mut self, gpr: GPR) {
1964        self.used_gprs_insert(gpr);
1965    }
1966
1967    fn push_used_gpr(&mut self, used_gprs: &[GPR]) -> Result<usize, CompileError> {
1968        for r in used_gprs.iter() {
1969            self.assembler.emit_push(Size::S64, Location::GPR(*r))?;
1970        }
1971        Ok(used_gprs.len() * 8)
1972    }
1973    fn pop_used_gpr(&mut self, used_gprs: &[GPR]) -> Result<(), CompileError> {
1974        for r in used_gprs.iter().rev() {
1975            self.assembler.emit_pop(Size::S64, Location::GPR(*r))?;
1976        }
1977        Ok(())
1978    }
1979
1980    // Picks an unused XMM register.
1981    fn pick_simd(&self) -> Option<XMM> {
1982        use XMM::*;
1983        static REGS: &[XMM] = &[XMM3, XMM4, XMM5, XMM6, XMM7];
1984        for r in REGS {
1985            if !self.used_simd_contains(r) {
1986                return Some(*r);
1987            }
1988        }
1989        None
1990    }
1991
1992    // Picks an unused XMM register for internal temporary use.
1993    fn pick_temp_simd(&self) -> Option<XMM> {
1994        use XMM::*;
1995        static REGS: &[XMM] = &[XMM0, XMM1, XMM2];
1996        for r in REGS {
1997            if !self.used_simd_contains(r) {
1998                return Some(*r);
1999            }
2000        }
2001        None
2002    }
2003
2004    // Acquires a temporary XMM register.
2005    fn acquire_temp_simd(&mut self) -> Option<XMM> {
2006        let simd = self.pick_temp_simd();
2007        if let Some(x) = simd {
2008            self.used_simd_insert(x);
2009        }
2010        simd
2011    }
2012
2013    fn reserve_simd(&mut self, simd: XMM) {
2014        self.used_simd_insert(simd);
2015    }
2016
2017    // Releases a temporary XMM register.
2018    fn release_simd(&mut self, simd: XMM) {
2019        assert!(self.used_simd_remove(&simd));
2020    }
2021
2022    fn push_used_simd(&mut self, used_xmms: &[XMM]) -> Result<usize, CompileError> {
2023        self.adjust_stack((used_xmms.len() * 8) as u32)?;
2024
2025        for (i, r) in used_xmms.iter().enumerate() {
2026            self.move_location(
2027                Size::S64,
2028                Location::SIMD(*r),
2029                Location::Memory(GPR::RSP, (i * 8) as i32),
2030            )?;
2031        }
2032
2033        Ok(used_xmms.len() * 8)
2034    }
2035    fn pop_used_simd(&mut self, used_xmms: &[XMM]) -> Result<(), CompileError> {
2036        for (i, r) in used_xmms.iter().enumerate() {
2037            self.move_location(
2038                Size::S64,
2039                Location::Memory(GPR::RSP, (i * 8) as i32),
2040                Location::SIMD(*r),
2041            )?;
2042        }
2043        self.assembler.emit_add(
2044            Size::S64,
2045            Location::Imm32((used_xmms.len() * 8) as u32),
2046            Location::GPR(GPR::RSP),
2047        )
2048    }
2049
2050    /// Set the source location of the Wasm to the given offset.
2051    fn set_srcloc(&mut self, offset: u32) {
2052        self.src_loc = offset;
2053    }
2054    /// Marks each address in the code range emitted by `f` with the trap code `code`.
2055    fn mark_address_range_with_trap_code(&mut self, code: TrapCode, begin: usize, end: usize) {
2056        for i in begin..end {
2057            self.trap_table.offset_to_code.insert(i, code);
2058        }
2059        self.mark_instruction_address_end(begin);
2060    }
2061
2062    /// Marks one address as trappable with trap code `code`.
2063    fn mark_address_with_trap_code(&mut self, code: TrapCode) {
2064        let offset = self.assembler.get_offset().0;
2065        self.trap_table.offset_to_code.insert(offset, code);
2066        self.mark_instruction_address_end(offset);
2067    }
2068    /// Marks the instruction as trappable with trap code `code`. return "begin" offset
2069    fn mark_instruction_with_trap_code(&mut self, code: TrapCode) -> usize {
2070        let offset = self.assembler.get_offset().0;
2071        self.trap_table.offset_to_code.insert(offset, code);
2072        offset
2073    }
2074    /// Pushes the instruction to the address map, calculating the offset from a
2075    /// provided beginning address.
2076    fn mark_instruction_address_end(&mut self, begin: usize) {
2077        self.instructions_address_map.push(InstructionAddressMap {
2078            srcloc: SourceLoc::new(self.src_loc),
2079            code_offset: begin,
2080            code_len: self.assembler.get_offset().0 - begin,
2081        });
2082    }
2083
2084    /// Insert a StackOverflow (at offset 0)
2085    fn insert_stackoverflow(&mut self) {
2086        let offset = 0;
2087        self.trap_table
2088            .offset_to_code
2089            .insert(offset, TrapCode::StackOverflow);
2090        self.mark_instruction_address_end(offset);
2091    }
2092
2093    /// Get all current TrapInformation
2094    fn collect_trap_information(&self) -> Vec<TrapInformation> {
2095        self.trap_table
2096            .offset_to_code
2097            .clone()
2098            .into_iter()
2099            .map(|(offset, code)| TrapInformation {
2100                code_offset: offset as u32,
2101                trap_code: code,
2102            })
2103            .collect()
2104    }
2105
2106    fn instructions_address_map(&self) -> Vec<InstructionAddressMap> {
2107        self.instructions_address_map.clone()
2108    }
2109
2110    // Memory location for a local on the stack
2111    fn local_on_stack(&mut self, stack_offset: i32) -> Location {
2112        Location::Memory(GPR::RBP, -stack_offset)
2113    }
2114
2115    // Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example)
2116    fn round_stack_adjust(&self, value: usize) -> usize {
2117        value
2118    }
2119
2120    // Adjust stack for locals
2121    fn adjust_stack(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2122        self.assembler.emit_sub(
2123            Size::S64,
2124            Location::Imm32(delta_stack_offset),
2125            Location::GPR(GPR::RSP),
2126        )
2127    }
2128    // restore stack
2129    fn restore_stack(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2130        self.assembler.emit_add(
2131            Size::S64,
2132            Location::Imm32(delta_stack_offset),
2133            Location::GPR(GPR::RSP),
2134        )
2135    }
2136    fn pop_stack_locals(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2137        self.assembler.emit_add(
2138            Size::S64,
2139            Location::Imm32(delta_stack_offset),
2140            Location::GPR(GPR::RSP),
2141        )
2142    }
2143    // push a value on the stack for a native call
2144    fn move_location_for_native(
2145        &mut self,
2146        _size: Size,
2147        loc: Location,
2148        dest: Location,
2149    ) -> Result<(), CompileError> {
2150        match loc {
2151            Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2152                let tmp = self.pick_temp_gpr();
2153                if let Some(x) = tmp {
2154                    self.assembler.emit_mov(Size::S64, loc, Location::GPR(x))?;
2155                    self.assembler.emit_mov(Size::S64, Location::GPR(x), dest)
2156                } else {
2157                    self.assembler
2158                        .emit_mov(Size::S64, Location::GPR(GPR::RAX), dest)?;
2159                    self.assembler
2160                        .emit_mov(Size::S64, loc, Location::GPR(GPR::RAX))?;
2161                    self.assembler
2162                        .emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest)
2163                }
2164            }
2165            _ => self.assembler.emit_mov(Size::S64, loc, dest),
2166        }
2167    }
2168
2169    // Zero a location that is 32bits
2170    fn zero_location(&mut self, size: Size, location: Location) -> Result<(), CompileError> {
2171        self.assembler.emit_mov(size, Location::Imm32(0), location)
2172    }
2173
2174    // GPR Reg used for local pointer on the stack
2175    fn local_pointer(&self) -> GPR {
2176        GPR::RBP
2177    }
2178
2179    // Determine whether a local should be allocated on the stack.
2180    fn is_local_on_stack(&self, idx: usize) -> bool {
2181        idx > 3
2182    }
2183
2184    // Determine a local's location.
2185    fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location {
2186        // Use callee-saved registers for the first locals.
2187        match idx {
2188            0 => Location::GPR(GPR::R12),
2189            1 => Location::GPR(GPR::R13),
2190            2 => Location::GPR(GPR::R14),
2191            3 => Location::GPR(GPR::RBX),
2192            _ => Location::Memory(GPR::RBP, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)),
2193        }
2194    }
2195    // Move a local to the stack
2196    fn move_local(&mut self, stack_offset: i32, location: Location) -> Result<(), CompileError> {
2197        self.assembler.emit_mov(
2198            Size::S64,
2199            location,
2200            Location::Memory(GPR::RBP, -stack_offset),
2201        )?;
2202        match location {
2203            Location::GPR(x) => self.emit_unwind_op(UnwindOps::SaveRegister {
2204                reg: UnwindRegister::GPR(x),
2205                bp_neg_offset: stack_offset,
2206            }),
2207            Location::SIMD(x) => self.emit_unwind_op(UnwindOps::SaveRegister {
2208                reg: UnwindRegister::FPR(x),
2209                bp_neg_offset: stack_offset,
2210            }),
2211            _ => Ok(()),
2212        }
2213    }
2214
2215    // List of register to save, depending on the CallingConvention
2216    fn list_to_save(&self, calling_convention: CallingConvention) -> Vec<Location> {
2217        match calling_convention {
2218            CallingConvention::WindowsFastcall => {
2219                vec![Location::GPR(GPR::RDI), Location::GPR(GPR::RSI)]
2220            }
2221            _ => vec![],
2222        }
2223    }
2224
2225    // Get param location
2226    fn get_param_location(
2227        &self,
2228        idx: usize,
2229        _sz: Size,
2230        stack_location: &mut usize,
2231        calling_convention: CallingConvention,
2232    ) -> Location {
2233        match calling_convention {
2234            CallingConvention::WindowsFastcall => match idx {
2235                0 => Location::GPR(GPR::RCX),
2236                1 => Location::GPR(GPR::RDX),
2237                2 => Location::GPR(GPR::R8),
2238                3 => Location::GPR(GPR::R9),
2239                _ => {
2240                    let loc = Location::Memory(GPR::RSP, *stack_location as i32);
2241                    *stack_location += 8;
2242                    loc
2243                }
2244            },
2245            _ => match idx {
2246                0 => Location::GPR(GPR::RDI),
2247                1 => Location::GPR(GPR::RSI),
2248                2 => Location::GPR(GPR::RDX),
2249                3 => Location::GPR(GPR::RCX),
2250                4 => Location::GPR(GPR::R8),
2251                5 => Location::GPR(GPR::R9),
2252                _ => {
2253                    let loc = Location::Memory(GPR::RSP, *stack_location as i32);
2254                    *stack_location += 8;
2255                    loc
2256                }
2257            },
2258        }
2259    }
2260    // Get call param location
2261    fn get_call_param_location(
2262        &self,
2263        idx: usize,
2264        _sz: Size,
2265        _stack_location: &mut usize,
2266        calling_convention: CallingConvention,
2267    ) -> Location {
2268        match calling_convention {
2269            CallingConvention::WindowsFastcall => match idx {
2270                0 => Location::GPR(GPR::RCX),
2271                1 => Location::GPR(GPR::RDX),
2272                2 => Location::GPR(GPR::R8),
2273                3 => Location::GPR(GPR::R9),
2274                _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32),
2275            },
2276            _ => match idx {
2277                0 => Location::GPR(GPR::RDI),
2278                1 => Location::GPR(GPR::RSI),
2279                2 => Location::GPR(GPR::RDX),
2280                3 => Location::GPR(GPR::RCX),
2281                4 => Location::GPR(GPR::R8),
2282                5 => Location::GPR(GPR::R9),
2283                _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
2284            },
2285        }
2286    }
2287    // Get simple param location
2288    fn get_simple_param_location(
2289        &self,
2290        idx: usize,
2291        calling_convention: CallingConvention,
2292    ) -> Location {
2293        match calling_convention {
2294            CallingConvention::WindowsFastcall => match idx {
2295                0 => Location::GPR(GPR::RCX),
2296                1 => Location::GPR(GPR::RDX),
2297                2 => Location::GPR(GPR::R8),
2298                3 => Location::GPR(GPR::R9),
2299                _ => Location::Memory(GPR::RBP, (32 + 16 + (idx - 4) * 8) as i32),
2300            },
2301            _ => match idx {
2302                0 => Location::GPR(GPR::RDI),
2303                1 => Location::GPR(GPR::RSI),
2304                2 => Location::GPR(GPR::RDX),
2305                3 => Location::GPR(GPR::RCX),
2306                4 => Location::GPR(GPR::R8),
2307                5 => Location::GPR(GPR::R9),
2308                _ => Location::Memory(GPR::RBP, (16 + (idx - 6) * 8) as i32),
2309            },
2310        }
2311    }
2312    // move a location to another
2313    fn move_location(
2314        &mut self,
2315        size: Size,
2316        source: Location,
2317        dest: Location,
2318    ) -> Result<(), CompileError> {
2319        match source {
2320            Location::GPR(_) => self.assembler.emit_mov(size, source, dest),
2321            Location::Memory(_, _) => match dest {
2322                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2323                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2324                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2325                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2326                    })?;
2327                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2328                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2329                }
2330                _ => codegen_error!("singlepass move_location unreachable"),
2331            },
2332            Location::Memory2(_, _, _, _) => match dest {
2333                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2334                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2335                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2336                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2337                    })?;
2338                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2339                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2340                }
2341                _ => codegen_error!("singlepass move_location unreachable"),
2342            },
2343            Location::Imm8(_) | Location::Imm32(_) | Location::Imm64(_) => match dest {
2344                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2345                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2346                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2347                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2348                    })?;
2349                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2350                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2351                }
2352                _ => codegen_error!("singlepass move_location unreachable"),
2353            },
2354            Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2355            _ => codegen_error!("singlepass move_location unreachable"),
2356        }
2357    }
2358    // move a location to another
2359    fn move_location_extend(
2360        &mut self,
2361        size_val: Size,
2362        signed: bool,
2363        source: Location,
2364        size_op: Size,
2365        dest: Location,
2366    ) -> Result<(), CompileError> {
2367        let dst = match dest {
2368            Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2369                Location::GPR(self.acquire_temp_gpr().ok_or_else(|| {
2370                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
2371                })?)
2372            }
2373            Location::GPR(_) | Location::SIMD(_) => dest,
2374            _ => codegen_error!("singlepass move_location_extend unreachable"),
2375        };
2376        match source {
2377            Location::GPR(_)
2378            | Location::Memory(_, _)
2379            | Location::Memory2(_, _, _, _)
2380            | Location::Imm32(_)
2381            | Location::Imm64(_) => match size_val {
2382                Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dst),
2383                Size::S16 | Size::S8 => {
2384                    if signed {
2385                        self.assembler.emit_movsx(size_val, source, size_op, dst)
2386                    } else {
2387                        self.assembler.emit_movzx(size_val, source, size_op, dst)
2388                    }
2389                }
2390            },
2391            _ => panic!(
2392                "unimplemented move_location_extend({size_val:?}, {signed}, {source:?}, {size_op:?}, {dest:?}"
2393            ),
2394        }?;
2395        if dst != dest {
2396            self.assembler.emit_mov(size_op, dst, dest)?;
2397            match dst {
2398                Location::GPR(x) => self.release_gpr(x),
2399                _ => codegen_error!("singlepass move_location_extend unreachable"),
2400            };
2401        }
2402        Ok(())
2403    }
2404    fn load_address(
2405        &mut self,
2406        size: Size,
2407        reg: Location,
2408        mem: Location,
2409    ) -> Result<(), CompileError> {
2410        match reg {
2411            Location::GPR(_) => {
2412                match mem {
2413                    Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2414                        // Memory moves with size < 32b do not zero upper bits.
2415                        if size < Size::S32 {
2416                            self.assembler.emit_xor(Size::S32, reg, reg)?;
2417                        }
2418                        self.assembler.emit_mov(size, mem, reg)?;
2419                    }
2420                    _ => codegen_error!("singlepass load_address unreachable"),
2421                }
2422            }
2423            _ => codegen_error!("singlepass load_address unreachable"),
2424        }
2425        Ok(())
2426    }
2427    // Init the stack loc counter
2428    fn init_stack_loc(
2429        &mut self,
2430        init_stack_loc_cnt: u64,
2431        last_stack_loc: Location,
2432    ) -> Result<(), CompileError> {
2433        // Since these assemblies take up to 24 bytes, if more than 2 slots are initialized, then they are smaller.
2434        self.assembler.emit_mov(
2435            Size::S64,
2436            Location::Imm64(init_stack_loc_cnt),
2437            Location::GPR(GPR::RCX),
2438        )?;
2439        self.assembler
2440            .emit_xor(Size::S64, Location::GPR(GPR::RAX), Location::GPR(GPR::RAX))?;
2441        self.assembler
2442            .emit_lea(Size::S64, last_stack_loc, Location::GPR(GPR::RDI))?;
2443        self.assembler.emit_rep_stosq()
2444    }
2445    // Restore save_area
2446    fn restore_saved_area(&mut self, saved_area_offset: i32) -> Result<(), CompileError> {
2447        self.assembler.emit_lea(
2448            Size::S64,
2449            Location::Memory(GPR::RBP, -saved_area_offset),
2450            Location::GPR(GPR::RSP),
2451        )
2452    }
2453    // Pop a location
2454    fn pop_location(&mut self, location: Location) -> Result<(), CompileError> {
2455        self.assembler.emit_pop(Size::S64, location)
2456    }
2457    // Create a new `MachineState` with default values.
2458    fn new_machine_state(&self) -> MachineState {
2459        new_machine_state()
2460    }
2461
2462    // assembler finalize
2463    fn assembler_finalize(self) -> Result<Vec<u8>, CompileError> {
2464        self.assembler.finalize().map_err(|e| {
2465            CompileError::Codegen(format!("Assembler failed finalization with: {e:?}"))
2466        })
2467    }
2468
2469    fn get_offset(&self) -> Offset {
2470        self.assembler.get_offset()
2471    }
2472
2473    fn finalize_function(&mut self) -> Result<(), CompileError> {
2474        self.assembler.finalize_function()?;
2475        Ok(())
2476    }
2477
2478    fn emit_function_prolog(&mut self) -> Result<(), CompileError> {
2479        self.emit_push(Size::S64, Location::GPR(GPR::RBP))?;
2480        self.emit_unwind_op(UnwindOps::PushFP { up_to_sp: 16 })?;
2481        self.move_location(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RBP))?;
2482        self.emit_unwind_op(UnwindOps::DefineNewFrame)
2483    }
2484
2485    fn emit_function_epilog(&mut self) -> Result<(), CompileError> {
2486        self.move_location(Size::S64, Location::GPR(GPR::RBP), Location::GPR(GPR::RSP))?;
2487        self.emit_pop(Size::S64, Location::GPR(GPR::RBP))
2488    }
2489
2490    fn emit_function_return_value(
2491        &mut self,
2492        ty: WpType,
2493        canonicalize: bool,
2494        loc: Location,
2495    ) -> Result<(), CompileError> {
2496        if canonicalize {
2497            self.canonicalize_nan(
2498                match ty {
2499                    WpType::F32 => Size::S32,
2500                    WpType::F64 => Size::S64,
2501                    _ => codegen_error!("singlepass emit_function_return_value unreachable"),
2502                },
2503                loc,
2504                Location::GPR(GPR::RAX),
2505            )
2506        } else {
2507            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(GPR::RAX))
2508        }
2509    }
2510
2511    fn emit_function_return_float(&mut self) -> Result<(), CompileError> {
2512        self.move_location(
2513            Size::S64,
2514            Location::GPR(GPR::RAX),
2515            Location::SIMD(XMM::XMM0),
2516        )
2517    }
2518
2519    fn arch_supports_canonicalize_nan(&self) -> bool {
2520        self.assembler.arch_supports_canonicalize_nan()
2521    }
2522    fn canonicalize_nan(
2523        &mut self,
2524        sz: Size,
2525        input: Location,
2526        output: Location,
2527    ) -> Result<(), CompileError> {
2528        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
2529            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2530        })?;
2531        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
2532            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2533        })?;
2534        let tmp3 = self.acquire_temp_simd().ok_or_else(|| {
2535            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2536        })?;
2537
2538        self.emit_relaxed_mov(sz, input, Location::SIMD(tmp1))?;
2539        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
2540            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
2541        })?;
2542
2543        match sz {
2544            Size::S32 => {
2545                self.assembler
2546                    .emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2)?;
2547                self.move_location(
2548                    Size::S32,
2549                    Location::Imm32(0x7FC0_0000), // Canonical NaN
2550                    Location::GPR(tmpg1),
2551                )?;
2552                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp3))?;
2553                self.assembler
2554                    .emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1)?;
2555            }
2556            Size::S64 => {
2557                self.assembler
2558                    .emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2)?;
2559                self.move_location(
2560                    Size::S64,
2561                    Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
2562                    Location::GPR(tmpg1),
2563                )?;
2564                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp3))?;
2565                self.assembler
2566                    .emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1)?;
2567            }
2568            _ => codegen_error!("singlepass canonicalize_nan unreachable"),
2569        }
2570
2571        self.emit_relaxed_mov(sz, Location::SIMD(tmp1), output)?;
2572
2573        self.release_gpr(tmpg1);
2574        self.release_simd(tmp3);
2575        self.release_simd(tmp2);
2576        self.release_simd(tmp1);
2577        Ok(())
2578    }
2579
2580    fn emit_illegal_op(&mut self, trap: TrapCode) -> Result<(), CompileError> {
2581        // code below is kept as a reference on how to emit illegal op with trap info
2582        // without an Undefined opcode with payload
2583        /*
2584        let offset = self.assembler.get_offset().0;
2585        self.trap_table
2586        .offset_to_code
2587        .insert(offset, trap);
2588        self.assembler.emit_ud2();
2589        self.mark_instruction_address_end(offset);*/
2590        let v = trap as u8;
2591        // payload needs to be between 0-15
2592        // this will emit an 40 0F B9 Cx opcode, with x the payload
2593        let offset = self.assembler.get_offset().0;
2594        self.assembler.emit_ud1_payload(v)?;
2595        self.mark_instruction_address_end(offset);
2596        Ok(())
2597    }
2598    fn get_label(&mut self) -> Label {
2599        self.assembler.new_dynamic_label()
2600    }
2601    fn emit_label(&mut self, label: Label) -> Result<(), CompileError> {
2602        self.assembler.emit_label(label)
2603    }
2604    fn get_grp_for_call(&self) -> GPR {
2605        GPR::RAX
2606    }
2607    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError> {
2608        self.assembler.emit_call_register(reg)
2609    }
2610    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError> {
2611        self.assembler.emit_call_label(label)
2612    }
2613    fn get_gpr_for_ret(&self) -> GPR {
2614        GPR::RAX
2615    }
2616    fn get_simd_for_ret(&self) -> XMM {
2617        XMM::XMM0
2618    }
2619
2620    fn arch_requires_indirect_call_trampoline(&self) -> bool {
2621        self.assembler.arch_requires_indirect_call_trampoline()
2622    }
2623
2624    fn arch_emit_indirect_call_with_trampoline(
2625        &mut self,
2626        location: Location,
2627    ) -> Result<(), CompileError> {
2628        self.assembler
2629            .arch_emit_indirect_call_with_trampoline(location)
2630    }
2631
2632    fn emit_debug_breakpoint(&mut self) -> Result<(), CompileError> {
2633        self.assembler.emit_bkpt()
2634    }
2635
2636    fn emit_call_location(&mut self, location: Location) -> Result<(), CompileError> {
2637        self.assembler.emit_call_location(location)
2638    }
2639
2640    fn location_address(
2641        &mut self,
2642        size: Size,
2643        source: Location,
2644        dest: Location,
2645    ) -> Result<(), CompileError> {
2646        self.assembler.emit_lea(size, source, dest)
2647    }
2648    // logic
2649    fn location_and(
2650        &mut self,
2651        size: Size,
2652        source: Location,
2653        dest: Location,
2654        _flags: bool,
2655    ) -> Result<(), CompileError> {
2656        self.assembler.emit_and(size, source, dest)
2657    }
2658    fn location_xor(
2659        &mut self,
2660        size: Size,
2661        source: Location,
2662        dest: Location,
2663        _flags: bool,
2664    ) -> Result<(), CompileError> {
2665        self.assembler.emit_xor(size, source, dest)
2666    }
2667    fn location_or(
2668        &mut self,
2669        size: Size,
2670        source: Location,
2671        dest: Location,
2672        _flags: bool,
2673    ) -> Result<(), CompileError> {
2674        self.assembler.emit_or(size, source, dest)
2675    }
2676    fn location_test(
2677        &mut self,
2678        size: Size,
2679        source: Location,
2680        dest: Location,
2681    ) -> Result<(), CompileError> {
2682        self.assembler.emit_test(size, source, dest)
2683    }
2684    // math
2685    fn location_add(
2686        &mut self,
2687        size: Size,
2688        source: Location,
2689        dest: Location,
2690        _flags: bool,
2691    ) -> Result<(), CompileError> {
2692        self.assembler.emit_add(size, source, dest)
2693    }
2694    fn location_sub(
2695        &mut self,
2696        size: Size,
2697        source: Location,
2698        dest: Location,
2699        _flags: bool,
2700    ) -> Result<(), CompileError> {
2701        self.assembler.emit_sub(size, source, dest)
2702    }
2703    fn location_cmp(
2704        &mut self,
2705        size: Size,
2706        source: Location,
2707        dest: Location,
2708    ) -> Result<(), CompileError> {
2709        self.assembler.emit_cmp(size, source, dest)
2710    }
2711
2712    // unconditionnal jmp
2713    fn jmp_unconditionnal(&mut self, label: Label) -> Result<(), CompileError> {
2714        self.assembler.emit_jmp(Condition::None, label)
2715    }
2716
2717    fn jmp_on_condition(
2718        &mut self,
2719        cond: UnsignedCondition,
2720        size: Size,
2721        source: AbstractLocation<Self::GPR, Self::SIMD>,
2722        dest: AbstractLocation<Self::GPR, Self::SIMD>,
2723        label: Label,
2724    ) -> Result<(), CompileError> {
2725        self.assembler.emit_cmp(size, source, dest)?;
2726        let cond = match cond {
2727            UnsignedCondition::Equal => Condition::Equal,
2728            UnsignedCondition::NotEqual => Condition::NotEqual,
2729            UnsignedCondition::Above => Condition::Above,
2730            UnsignedCondition::AboveEqual => Condition::AboveEqual,
2731            UnsignedCondition::Below => Condition::Below,
2732            UnsignedCondition::BelowEqual => Condition::BelowEqual,
2733        };
2734        self.assembler.emit_jmp(cond, label)
2735    }
2736
2737    // jmp table
2738    fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) -> Result<(), CompileError> {
2739        let tmp1 = self
2740            .pick_temp_gpr()
2741            .ok_or_else(|| CompileError::Codegen("singlepass can't pick a temp gpr".to_owned()))?;
2742        self.reserve_gpr(tmp1);
2743        let tmp2 = self
2744            .pick_temp_gpr()
2745            .ok_or_else(|| CompileError::Codegen("singlepass can't pick a temp gpr".to_owned()))?;
2746        self.reserve_gpr(tmp2);
2747
2748        self.assembler.emit_lea_label(label, Location::GPR(tmp1))?;
2749        self.move_location(Size::S32, cond, Location::GPR(tmp2))?;
2750
2751        let instr_size = self.assembler.get_jmp_instr_size();
2752        self.assembler
2753            .emit_imul_imm32_gpr64(instr_size as _, tmp2)?;
2754        self.assembler
2755            .emit_add(Size::S64, Location::GPR(tmp1), Location::GPR(tmp2))?;
2756        self.assembler.emit_jmp_location(Location::GPR(tmp2))?;
2757        self.release_gpr(tmp2);
2758        self.release_gpr(tmp1);
2759        Ok(())
2760    }
2761
2762    fn align_for_loop(&mut self) -> Result<(), CompileError> {
2763        // Pad with NOPs to the next 16-byte boundary.
2764        // Here we don't use the dynasm `.align 16` attribute because it pads the alignment with single-byte nops
2765        // which may lead to efficiency problems.
2766        match self.assembler.get_offset().0 % 16 {
2767            0 => {}
2768            x => {
2769                self.assembler.emit_nop_n(16 - x)?;
2770            }
2771        }
2772        assert_eq!(self.assembler.get_offset().0 % 16, 0);
2773        Ok(())
2774    }
2775
2776    fn emit_ret(&mut self) -> Result<(), CompileError> {
2777        self.assembler.emit_ret()
2778    }
2779
2780    fn emit_push(&mut self, size: Size, loc: Location) -> Result<(), CompileError> {
2781        self.assembler.emit_push(size, loc)
2782    }
2783    fn emit_pop(&mut self, size: Size, loc: Location) -> Result<(), CompileError> {
2784        self.assembler.emit_pop(size, loc)
2785    }
2786
2787    fn emit_memory_fence(&mut self) -> Result<(), CompileError> {
2788        // nothing on x86_64
2789        Ok(())
2790    }
2791
2792    fn location_neg(
2793        &mut self,
2794        size_val: Size, // size of src
2795        signed: bool,
2796        source: Location,
2797        size_op: Size,
2798        dest: Location,
2799    ) -> Result<(), CompileError> {
2800        self.move_location_extend(size_val, signed, source, size_op, dest)?;
2801        self.assembler.emit_neg(size_val, dest)
2802    }
2803
2804    fn emit_imul_imm32(&mut self, size: Size, imm32: u32, gpr: GPR) -> Result<(), CompileError> {
2805        match size {
2806            Size::S64 => self.assembler.emit_imul_imm32_gpr64(imm32, gpr),
2807            _ => {
2808                codegen_error!("singlepass emit_imul_imm32 unreachable");
2809            }
2810        }
2811    }
2812
2813    // relaxed binop based...
2814    fn emit_relaxed_mov(
2815        &mut self,
2816        sz: Size,
2817        src: Location,
2818        dst: Location,
2819    ) -> Result<(), CompileError> {
2820        self.emit_relaxed_binop(AssemblerX64::emit_mov, sz, src, dst)
2821    }
2822    fn emit_relaxed_cmp(
2823        &mut self,
2824        sz: Size,
2825        src: Location,
2826        dst: Location,
2827    ) -> Result<(), CompileError> {
2828        self.emit_relaxed_binop(AssemblerX64::emit_cmp, sz, src, dst)
2829    }
2830    fn emit_relaxed_zero_extension(
2831        &mut self,
2832        sz_src: Size,
2833        src: Location,
2834        sz_dst: Size,
2835        dst: Location,
2836    ) -> Result<(), CompileError> {
2837        if (sz_src == Size::S32 || sz_src == Size::S64) && sz_dst == Size::S64 {
2838            self.emit_relaxed_binop(AssemblerX64::emit_mov, sz_src, src, dst)
2839        } else {
2840            self.emit_relaxed_zx_sx(AssemblerX64::emit_movzx, sz_src, src, sz_dst, dst)
2841        }
2842    }
2843    fn emit_relaxed_sign_extension(
2844        &mut self,
2845        sz_src: Size,
2846        src: Location,
2847        sz_dst: Size,
2848        dst: Location,
2849    ) -> Result<(), CompileError> {
2850        self.emit_relaxed_zx_sx(AssemblerX64::emit_movsx, sz_src, src, sz_dst, dst)
2851    }
2852
2853    fn emit_binop_add32(
2854        &mut self,
2855        loc_a: Location,
2856        loc_b: Location,
2857        ret: Location,
2858    ) -> Result<(), CompileError> {
2859        self.emit_binop_i32(AssemblerX64::emit_add, loc_a, loc_b, ret)
2860    }
2861    fn emit_binop_sub32(
2862        &mut self,
2863        loc_a: Location,
2864        loc_b: Location,
2865        ret: Location,
2866    ) -> Result<(), CompileError> {
2867        self.emit_binop_i32(AssemblerX64::emit_sub, loc_a, loc_b, ret)
2868    }
2869    fn emit_binop_mul32(
2870        &mut self,
2871        loc_a: Location,
2872        loc_b: Location,
2873        ret: Location,
2874    ) -> Result<(), CompileError> {
2875        self.emit_binop_i32(AssemblerX64::emit_imul, loc_a, loc_b, ret)
2876    }
2877    fn emit_binop_udiv32(
2878        &mut self,
2879        loc_a: Location,
2880        loc_b: Location,
2881        ret: Location,
2882        integer_division_by_zero: Label,
2883        _integer_overflow: Label,
2884    ) -> Result<usize, CompileError> {
2885        // We assume that RAX and RDX are temporary registers here.
2886        self.assembler
2887            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2888        self.assembler
2889            .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
2890        let offset = self.emit_relaxed_xdiv(
2891            AssemblerX64::emit_div,
2892            Size::S32,
2893            loc_b,
2894            integer_division_by_zero,
2895        )?;
2896        self.assembler
2897            .emit_mov(Size::S32, Location::GPR(GPR::RAX), ret)?;
2898        Ok(offset)
2899    }
2900    fn emit_binop_sdiv32(
2901        &mut self,
2902        loc_a: Location,
2903        loc_b: Location,
2904        ret: Location,
2905        integer_division_by_zero: Label,
2906        _integer_overflow: Label,
2907    ) -> Result<usize, CompileError> {
2908        // We assume that RAX and RDX are temporary registers here.
2909        self.assembler
2910            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2911        self.assembler.emit_cdq()?;
2912        let offset = self.emit_relaxed_xdiv(
2913            AssemblerX64::emit_idiv,
2914            Size::S32,
2915            loc_b,
2916            integer_division_by_zero,
2917        )?;
2918        self.assembler
2919            .emit_mov(Size::S32, Location::GPR(GPR::RAX), ret)?;
2920        Ok(offset)
2921    }
2922    fn emit_binop_urem32(
2923        &mut self,
2924        loc_a: Location,
2925        loc_b: Location,
2926        ret: Location,
2927        integer_division_by_zero: Label,
2928        _integer_overflow: Label,
2929    ) -> Result<usize, CompileError> {
2930        // We assume that RAX and RDX are temporary registers here.
2931        self.assembler
2932            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2933        self.assembler
2934            .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
2935        let offset = self.emit_relaxed_xdiv(
2936            AssemblerX64::emit_div,
2937            Size::S32,
2938            loc_b,
2939            integer_division_by_zero,
2940        )?;
2941        self.assembler
2942            .emit_mov(Size::S32, Location::GPR(GPR::RDX), ret)?;
2943        Ok(offset)
2944    }
2945    fn emit_binop_srem32(
2946        &mut self,
2947        loc_a: Location,
2948        loc_b: Location,
2949        ret: Location,
2950        integer_division_by_zero: Label,
2951        _integer_overflow: Label,
2952    ) -> Result<usize, CompileError> {
2953        // We assume that RAX and RDX are temporary registers here.
2954        let normal_path = self.assembler.get_label();
2955        let end = self.assembler.get_label();
2956
2957        self.emit_relaxed_cmp(Size::S32, Location::Imm32(0x80000000), loc_a)?;
2958        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
2959        self.emit_relaxed_cmp(Size::S32, Location::Imm32(0xffffffff), loc_b)?;
2960        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
2961        self.move_location(Size::S32, Location::Imm32(0), ret)?;
2962        self.assembler.emit_jmp(Condition::None, end)?;
2963
2964        self.emit_label(normal_path)?;
2965        self.assembler
2966            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2967        self.assembler.emit_cdq()?;
2968        let offset = self.emit_relaxed_xdiv(
2969            AssemblerX64::emit_idiv,
2970            Size::S32,
2971            loc_b,
2972            integer_division_by_zero,
2973        )?;
2974        self.assembler
2975            .emit_mov(Size::S32, Location::GPR(GPR::RDX), ret)?;
2976
2977        self.emit_label(end)?;
2978        Ok(offset)
2979    }
2980    fn emit_binop_and32(
2981        &mut self,
2982        loc_a: Location,
2983        loc_b: Location,
2984        ret: Location,
2985    ) -> Result<(), CompileError> {
2986        self.emit_binop_i32(AssemblerX64::emit_and, loc_a, loc_b, ret)
2987    }
2988    fn emit_binop_or32(
2989        &mut self,
2990        loc_a: Location,
2991        loc_b: Location,
2992        ret: Location,
2993    ) -> Result<(), CompileError> {
2994        self.emit_binop_i32(AssemblerX64::emit_or, loc_a, loc_b, ret)
2995    }
2996    fn emit_binop_xor32(
2997        &mut self,
2998        loc_a: Location,
2999        loc_b: Location,
3000        ret: Location,
3001    ) -> Result<(), CompileError> {
3002        self.emit_binop_i32(AssemblerX64::emit_xor, loc_a, loc_b, ret)
3003    }
3004    fn i32_cmp_ge_s(
3005        &mut self,
3006        loc_a: Location,
3007        loc_b: Location,
3008        ret: Location,
3009    ) -> Result<(), CompileError> {
3010        self.emit_cmpop_i32_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret)
3011    }
3012    fn i32_cmp_gt_s(
3013        &mut self,
3014        loc_a: Location,
3015        loc_b: Location,
3016        ret: Location,
3017    ) -> Result<(), CompileError> {
3018        self.emit_cmpop_i32_dynamic_b(Condition::Greater, loc_a, loc_b, ret)
3019    }
3020    fn i32_cmp_le_s(
3021        &mut self,
3022        loc_a: Location,
3023        loc_b: Location,
3024        ret: Location,
3025    ) -> Result<(), CompileError> {
3026        self.emit_cmpop_i32_dynamic_b(Condition::LessEqual, loc_a, loc_b, ret)
3027    }
3028    fn i32_cmp_lt_s(
3029        &mut self,
3030        loc_a: Location,
3031        loc_b: Location,
3032        ret: Location,
3033    ) -> Result<(), CompileError> {
3034        self.emit_cmpop_i32_dynamic_b(Condition::Less, loc_a, loc_b, ret)
3035    }
3036    fn i32_cmp_ge_u(
3037        &mut self,
3038        loc_a: Location,
3039        loc_b: Location,
3040        ret: Location,
3041    ) -> Result<(), CompileError> {
3042        self.emit_cmpop_i32_dynamic_b(Condition::AboveEqual, loc_a, loc_b, ret)
3043    }
3044    fn i32_cmp_gt_u(
3045        &mut self,
3046        loc_a: Location,
3047        loc_b: Location,
3048        ret: Location,
3049    ) -> Result<(), CompileError> {
3050        self.emit_cmpop_i32_dynamic_b(Condition::Above, loc_a, loc_b, ret)
3051    }
3052    fn i32_cmp_le_u(
3053        &mut self,
3054        loc_a: Location,
3055        loc_b: Location,
3056        ret: Location,
3057    ) -> Result<(), CompileError> {
3058        self.emit_cmpop_i32_dynamic_b(Condition::BelowEqual, loc_a, loc_b, ret)
3059    }
3060    fn i32_cmp_lt_u(
3061        &mut self,
3062        loc_a: Location,
3063        loc_b: Location,
3064        ret: Location,
3065    ) -> Result<(), CompileError> {
3066        self.emit_cmpop_i32_dynamic_b(Condition::Below, loc_a, loc_b, ret)
3067    }
3068    fn i32_cmp_ne(
3069        &mut self,
3070        loc_a: Location,
3071        loc_b: Location,
3072        ret: Location,
3073    ) -> Result<(), CompileError> {
3074        self.emit_cmpop_i32_dynamic_b(Condition::NotEqual, loc_a, loc_b, ret)
3075    }
3076    fn i32_cmp_eq(
3077        &mut self,
3078        loc_a: Location,
3079        loc_b: Location,
3080        ret: Location,
3081    ) -> Result<(), CompileError> {
3082        self.emit_cmpop_i32_dynamic_b(Condition::Equal, loc_a, loc_b, ret)
3083    }
3084    fn i32_clz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3085        let src = match loc {
3086            Location::Imm32(_) | Location::Memory(_, _) => {
3087                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3088                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3089                })?;
3090                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3091                tmp
3092            }
3093            Location::GPR(reg) => reg,
3094            _ => {
3095                codegen_error!("singlepass i32_clz unreachable");
3096            }
3097        };
3098        let dst = match ret {
3099            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
3100                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3101            })?,
3102            Location::GPR(reg) => reg,
3103            _ => {
3104                codegen_error!("singlepass i32_clz unreachable");
3105            }
3106        };
3107
3108        if self.assembler.arch_has_xzcnt() {
3109            self.assembler
3110                .arch_emit_lzcnt(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3111        } else {
3112            let zero_path = self.assembler.get_label();
3113            let end = self.assembler.get_label();
3114
3115            self.assembler.emit_test_gpr_64(src)?;
3116            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
3117            self.assembler
3118                .emit_bsr(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3119            self.assembler
3120                .emit_xor(Size::S32, Location::Imm32(31), Location::GPR(dst))?;
3121            self.assembler.emit_jmp(Condition::None, end)?;
3122            self.emit_label(zero_path)?;
3123            self.move_location(Size::S32, Location::Imm32(32), Location::GPR(dst))?;
3124            self.emit_label(end)?;
3125        }
3126        match loc {
3127            Location::Imm32(_) | Location::Memory(_, _) => {
3128                self.release_gpr(src);
3129            }
3130            _ => {}
3131        };
3132        if let Location::Memory(_, _) = ret {
3133            self.move_location(Size::S32, Location::GPR(dst), ret)?;
3134            self.release_gpr(dst);
3135        };
3136        Ok(())
3137    }
3138    fn i32_ctz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3139        let src = match loc {
3140            Location::Imm32(_) | Location::Memory(_, _) => {
3141                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3142                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3143                })?;
3144                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3145                tmp
3146            }
3147            Location::GPR(reg) => reg,
3148            _ => {
3149                codegen_error!("singlepass i32_ctz unreachable");
3150            }
3151        };
3152        let dst = match ret {
3153            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
3154                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3155            })?,
3156            Location::GPR(reg) => reg,
3157            _ => {
3158                codegen_error!("singlepass i32_ctz unreachable");
3159            }
3160        };
3161
3162        if self.assembler.arch_has_xzcnt() {
3163            self.assembler
3164                .arch_emit_tzcnt(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3165        } else {
3166            let zero_path = self.assembler.get_label();
3167            let end = self.assembler.get_label();
3168
3169            self.assembler.emit_test_gpr_64(src)?;
3170            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
3171            self.assembler
3172                .emit_bsf(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3173            self.assembler.emit_jmp(Condition::None, end)?;
3174            self.emit_label(zero_path)?;
3175            self.move_location(Size::S32, Location::Imm32(32), Location::GPR(dst))?;
3176            self.emit_label(end)?;
3177        }
3178
3179        match loc {
3180            Location::Imm32(_) | Location::Memory(_, _) => {
3181                self.release_gpr(src);
3182            }
3183            _ => {}
3184        };
3185        if let Location::Memory(_, _) = ret {
3186            self.move_location(Size::S32, Location::GPR(dst), ret)?;
3187            self.release_gpr(dst);
3188        };
3189        Ok(())
3190    }
3191    fn i32_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3192        match loc {
3193            Location::Imm32(_) => {
3194                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3195                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3196                })?;
3197                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3198                if let Location::Memory(_, _) = ret {
3199                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
3200                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3201                    })?;
3202                    self.assembler.emit_popcnt(
3203                        Size::S32,
3204                        Location::GPR(tmp),
3205                        Location::GPR(out_tmp),
3206                    )?;
3207                    self.move_location(Size::S32, Location::GPR(out_tmp), ret)?;
3208                    self.release_gpr(out_tmp);
3209                } else {
3210                    self.assembler
3211                        .emit_popcnt(Size::S32, Location::GPR(tmp), ret)?;
3212                }
3213                self.release_gpr(tmp);
3214            }
3215            Location::Memory(_, _) | Location::GPR(_) => {
3216                if let Location::Memory(_, _) = ret {
3217                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
3218                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3219                    })?;
3220                    self.assembler
3221                        .emit_popcnt(Size::S32, loc, Location::GPR(out_tmp))?;
3222                    self.move_location(Size::S32, Location::GPR(out_tmp), ret)?;
3223                    self.release_gpr(out_tmp);
3224                } else {
3225                    self.assembler.emit_popcnt(Size::S32, loc, ret)?;
3226                }
3227            }
3228            _ => {
3229                codegen_error!("singlepass i32_popcnt unreachable");
3230            }
3231        }
3232        Ok(())
3233    }
3234    fn i32_shl(
3235        &mut self,
3236        loc_a: Location,
3237        loc_b: Location,
3238        ret: Location,
3239    ) -> Result<(), CompileError> {
3240        self.emit_shift_i32(AssemblerX64::emit_shl, loc_a, loc_b, ret)
3241    }
3242    fn i32_shr(
3243        &mut self,
3244        loc_a: Location,
3245        loc_b: Location,
3246        ret: Location,
3247    ) -> Result<(), CompileError> {
3248        self.emit_shift_i32(AssemblerX64::emit_shr, loc_a, loc_b, ret)
3249    }
3250    fn i32_sar(
3251        &mut self,
3252        loc_a: Location,
3253        loc_b: Location,
3254        ret: Location,
3255    ) -> Result<(), CompileError> {
3256        self.emit_shift_i32(AssemblerX64::emit_sar, loc_a, loc_b, ret)
3257    }
3258    fn i32_rol(
3259        &mut self,
3260        loc_a: Location,
3261        loc_b: Location,
3262        ret: Location,
3263    ) -> Result<(), CompileError> {
3264        self.emit_shift_i32(AssemblerX64::emit_rol, loc_a, loc_b, ret)
3265    }
3266    fn i32_ror(
3267        &mut self,
3268        loc_a: Location,
3269        loc_b: Location,
3270        ret: Location,
3271    ) -> Result<(), CompileError> {
3272        self.emit_shift_i32(AssemblerX64::emit_ror, loc_a, loc_b, ret)
3273    }
3274    fn i32_load(
3275        &mut self,
3276        addr: Location,
3277        memarg: &MemArg,
3278        ret: Location,
3279        need_check: bool,
3280        imported_memories: bool,
3281        offset: i32,
3282        heap_access_oob: Label,
3283        unaligned_atomic: Label,
3284    ) -> Result<(), CompileError> {
3285        self.memory_op(
3286            addr,
3287            memarg,
3288            false,
3289            4,
3290            need_check,
3291            imported_memories,
3292            offset,
3293            heap_access_oob,
3294            unaligned_atomic,
3295            |this, addr| {
3296                this.emit_relaxed_binop(
3297                    AssemblerX64::emit_mov,
3298                    Size::S32,
3299                    Location::Memory(addr, 0),
3300                    ret,
3301                )
3302            },
3303        )
3304    }
3305    fn i32_load_8u(
3306        &mut self,
3307        addr: Location,
3308        memarg: &MemArg,
3309        ret: Location,
3310        need_check: bool,
3311        imported_memories: bool,
3312        offset: i32,
3313        heap_access_oob: Label,
3314        unaligned_atomic: Label,
3315    ) -> Result<(), CompileError> {
3316        self.memory_op(
3317            addr,
3318            memarg,
3319            false,
3320            1,
3321            need_check,
3322            imported_memories,
3323            offset,
3324            heap_access_oob,
3325            unaligned_atomic,
3326            |this, addr| {
3327                this.emit_relaxed_zx_sx(
3328                    AssemblerX64::emit_movzx,
3329                    Size::S8,
3330                    Location::Memory(addr, 0),
3331                    Size::S32,
3332                    ret,
3333                )
3334            },
3335        )
3336    }
3337    fn i32_load_8s(
3338        &mut self,
3339        addr: Location,
3340        memarg: &MemArg,
3341        ret: Location,
3342        need_check: bool,
3343        imported_memories: bool,
3344        offset: i32,
3345        heap_access_oob: Label,
3346        unaligned_atomic: Label,
3347    ) -> Result<(), CompileError> {
3348        self.memory_op(
3349            addr,
3350            memarg,
3351            false,
3352            1,
3353            need_check,
3354            imported_memories,
3355            offset,
3356            heap_access_oob,
3357            unaligned_atomic,
3358            |this, addr| {
3359                this.emit_relaxed_zx_sx(
3360                    AssemblerX64::emit_movsx,
3361                    Size::S8,
3362                    Location::Memory(addr, 0),
3363                    Size::S32,
3364                    ret,
3365                )
3366            },
3367        )
3368    }
3369    fn i32_load_16u(
3370        &mut self,
3371        addr: Location,
3372        memarg: &MemArg,
3373        ret: Location,
3374        need_check: bool,
3375        imported_memories: bool,
3376        offset: i32,
3377        heap_access_oob: Label,
3378        unaligned_atomic: Label,
3379    ) -> Result<(), CompileError> {
3380        self.memory_op(
3381            addr,
3382            memarg,
3383            false,
3384            2,
3385            need_check,
3386            imported_memories,
3387            offset,
3388            heap_access_oob,
3389            unaligned_atomic,
3390            |this, addr| {
3391                this.emit_relaxed_zx_sx(
3392                    AssemblerX64::emit_movzx,
3393                    Size::S16,
3394                    Location::Memory(addr, 0),
3395                    Size::S32,
3396                    ret,
3397                )
3398            },
3399        )
3400    }
3401    fn i32_load_16s(
3402        &mut self,
3403        addr: Location,
3404        memarg: &MemArg,
3405        ret: Location,
3406        need_check: bool,
3407        imported_memories: bool,
3408        offset: i32,
3409        heap_access_oob: Label,
3410        unaligned_atomic: Label,
3411    ) -> Result<(), CompileError> {
3412        self.memory_op(
3413            addr,
3414            memarg,
3415            false,
3416            2,
3417            need_check,
3418            imported_memories,
3419            offset,
3420            heap_access_oob,
3421            unaligned_atomic,
3422            |this, addr| {
3423                this.emit_relaxed_zx_sx(
3424                    AssemblerX64::emit_movsx,
3425                    Size::S16,
3426                    Location::Memory(addr, 0),
3427                    Size::S32,
3428                    ret,
3429                )
3430            },
3431        )
3432    }
3433    fn i32_atomic_load(
3434        &mut self,
3435        addr: Location,
3436        memarg: &MemArg,
3437        ret: Location,
3438        need_check: bool,
3439        imported_memories: bool,
3440        offset: i32,
3441        heap_access_oob: Label,
3442        unaligned_atomic: Label,
3443    ) -> Result<(), CompileError> {
3444        self.memory_op(
3445            addr,
3446            memarg,
3447            true,
3448            4,
3449            need_check,
3450            imported_memories,
3451            offset,
3452            heap_access_oob,
3453            unaligned_atomic,
3454            |this, addr| this.emit_relaxed_mov(Size::S32, Location::Memory(addr, 0), ret),
3455        )
3456    }
3457    fn i32_atomic_load_8u(
3458        &mut self,
3459        addr: Location,
3460        memarg: &MemArg,
3461        ret: Location,
3462        need_check: bool,
3463        imported_memories: bool,
3464        offset: i32,
3465        heap_access_oob: Label,
3466        unaligned_atomic: Label,
3467    ) -> Result<(), CompileError> {
3468        self.memory_op(
3469            addr,
3470            memarg,
3471            true,
3472            1,
3473            need_check,
3474            imported_memories,
3475            offset,
3476            heap_access_oob,
3477            unaligned_atomic,
3478            |this, addr| {
3479                this.emit_relaxed_zero_extension(
3480                    Size::S8,
3481                    Location::Memory(addr, 0),
3482                    Size::S32,
3483                    ret,
3484                )
3485            },
3486        )
3487    }
3488    fn i32_atomic_load_16u(
3489        &mut self,
3490        addr: Location,
3491        memarg: &MemArg,
3492        ret: Location,
3493        need_check: bool,
3494        imported_memories: bool,
3495        offset: i32,
3496        heap_access_oob: Label,
3497        unaligned_atomic: Label,
3498    ) -> Result<(), CompileError> {
3499        self.memory_op(
3500            addr,
3501            memarg,
3502            true,
3503            2,
3504            need_check,
3505            imported_memories,
3506            offset,
3507            heap_access_oob,
3508            unaligned_atomic,
3509            |this, addr| {
3510                this.emit_relaxed_zero_extension(
3511                    Size::S16,
3512                    Location::Memory(addr, 0),
3513                    Size::S32,
3514                    ret,
3515                )
3516            },
3517        )
3518    }
3519    fn i32_save(
3520        &mut self,
3521        target_value: Location,
3522        memarg: &MemArg,
3523        target_addr: Location,
3524        need_check: bool,
3525        imported_memories: bool,
3526        offset: i32,
3527        heap_access_oob: Label,
3528        unaligned_atomic: Label,
3529    ) -> Result<(), CompileError> {
3530        self.memory_op(
3531            target_addr,
3532            memarg,
3533            false,
3534            4,
3535            need_check,
3536            imported_memories,
3537            offset,
3538            heap_access_oob,
3539            unaligned_atomic,
3540            |this, addr| {
3541                this.emit_relaxed_binop(
3542                    AssemblerX64::emit_mov,
3543                    Size::S32,
3544                    target_value,
3545                    Location::Memory(addr, 0),
3546                )
3547            },
3548        )
3549    }
3550    fn i32_save_8(
3551        &mut self,
3552        target_value: Location,
3553        memarg: &MemArg,
3554        target_addr: Location,
3555        need_check: bool,
3556        imported_memories: bool,
3557        offset: i32,
3558        heap_access_oob: Label,
3559        unaligned_atomic: Label,
3560    ) -> Result<(), CompileError> {
3561        self.memory_op(
3562            target_addr,
3563            memarg,
3564            false,
3565            1,
3566            need_check,
3567            imported_memories,
3568            offset,
3569            heap_access_oob,
3570            unaligned_atomic,
3571            |this, addr| {
3572                this.emit_relaxed_binop(
3573                    AssemblerX64::emit_mov,
3574                    Size::S8,
3575                    target_value,
3576                    Location::Memory(addr, 0),
3577                )
3578            },
3579        )
3580    }
3581    fn i32_save_16(
3582        &mut self,
3583        target_value: Location,
3584        memarg: &MemArg,
3585        target_addr: Location,
3586        need_check: bool,
3587        imported_memories: bool,
3588        offset: i32,
3589        heap_access_oob: Label,
3590        unaligned_atomic: Label,
3591    ) -> Result<(), CompileError> {
3592        self.memory_op(
3593            target_addr,
3594            memarg,
3595            false,
3596            2,
3597            need_check,
3598            imported_memories,
3599            offset,
3600            heap_access_oob,
3601            unaligned_atomic,
3602            |this, addr| {
3603                this.emit_relaxed_binop(
3604                    AssemblerX64::emit_mov,
3605                    Size::S16,
3606                    target_value,
3607                    Location::Memory(addr, 0),
3608                )
3609            },
3610        )
3611    }
3612    // x86_64 have a strong memory model, so coherency between all threads (core) is garantied
3613    // and aligned move is guarantied to be atomic, too or from memory
3614    // so store/load an atomic is a simple mov on x86_64
3615    fn i32_atomic_save(
3616        &mut self,
3617        value: Location,
3618        memarg: &MemArg,
3619        target_addr: Location,
3620        need_check: bool,
3621        imported_memories: bool,
3622        offset: i32,
3623        heap_access_oob: Label,
3624        unaligned_atomic: Label,
3625    ) -> Result<(), CompileError> {
3626        self.memory_op(
3627            target_addr,
3628            memarg,
3629            true,
3630            4,
3631            need_check,
3632            imported_memories,
3633            offset,
3634            heap_access_oob,
3635            unaligned_atomic,
3636            |this, addr| {
3637                this.emit_relaxed_binop(
3638                    AssemblerX64::emit_mov,
3639                    Size::S32,
3640                    value,
3641                    Location::Memory(addr, 0),
3642                )
3643            },
3644        )
3645    }
3646    fn i32_atomic_save_8(
3647        &mut self,
3648        value: Location,
3649        memarg: &MemArg,
3650        target_addr: Location,
3651        need_check: bool,
3652        imported_memories: bool,
3653        offset: i32,
3654        heap_access_oob: Label,
3655        unaligned_atomic: Label,
3656    ) -> Result<(), CompileError> {
3657        self.memory_op(
3658            target_addr,
3659            memarg,
3660            true,
3661            1,
3662            need_check,
3663            imported_memories,
3664            offset,
3665            heap_access_oob,
3666            unaligned_atomic,
3667            |this, addr| {
3668                this.emit_relaxed_binop(
3669                    AssemblerX64::emit_mov,
3670                    Size::S8,
3671                    value,
3672                    Location::Memory(addr, 0),
3673                )
3674            },
3675        )
3676    }
3677    fn i32_atomic_save_16(
3678        &mut self,
3679        value: Location,
3680        memarg: &MemArg,
3681        target_addr: Location,
3682        need_check: bool,
3683        imported_memories: bool,
3684        offset: i32,
3685        heap_access_oob: Label,
3686        unaligned_atomic: Label,
3687    ) -> Result<(), CompileError> {
3688        self.memory_op(
3689            target_addr,
3690            memarg,
3691            true,
3692            2,
3693            need_check,
3694            imported_memories,
3695            offset,
3696            heap_access_oob,
3697            unaligned_atomic,
3698            |this, addr| {
3699                this.emit_relaxed_binop(
3700                    AssemblerX64::emit_mov,
3701                    Size::S16,
3702                    value,
3703                    Location::Memory(addr, 0),
3704                )
3705            },
3706        )
3707    }
3708    // i32 atomic Add with i32
3709    fn i32_atomic_add(
3710        &mut self,
3711        loc: Location,
3712        target: Location,
3713        memarg: &MemArg,
3714        ret: Location,
3715        need_check: bool,
3716        imported_memories: bool,
3717        offset: i32,
3718        heap_access_oob: Label,
3719        unaligned_atomic: Label,
3720    ) -> Result<(), CompileError> {
3721        let value = self.acquire_temp_gpr().ok_or_else(|| {
3722            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3723        })?;
3724        self.move_location(Size::S32, loc, Location::GPR(value))?;
3725        self.memory_op(
3726            target,
3727            memarg,
3728            true,
3729            4,
3730            need_check,
3731            imported_memories,
3732            offset,
3733            heap_access_oob,
3734            unaligned_atomic,
3735            |this, addr| {
3736                this.assembler.emit_lock_xadd(
3737                    Size::S32,
3738                    Location::GPR(value),
3739                    Location::Memory(addr, 0),
3740                )
3741            },
3742        )?;
3743        self.move_location(Size::S32, Location::GPR(value), ret)?;
3744        self.release_gpr(value);
3745        Ok(())
3746    }
3747    // i32 atomic Add with u8
3748    fn i32_atomic_add_8u(
3749        &mut self,
3750        loc: Location,
3751        target: Location,
3752        memarg: &MemArg,
3753        ret: Location,
3754        need_check: bool,
3755        imported_memories: bool,
3756        offset: i32,
3757        heap_access_oob: Label,
3758        unaligned_atomic: Label,
3759    ) -> Result<(), CompileError> {
3760        let value = self.acquire_temp_gpr().ok_or_else(|| {
3761            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3762        })?;
3763        self.move_location_extend(Size::S8, false, loc, Size::S32, Location::GPR(value))?;
3764        self.memory_op(
3765            target,
3766            memarg,
3767            true,
3768            1,
3769            need_check,
3770            imported_memories,
3771            offset,
3772            heap_access_oob,
3773            unaligned_atomic,
3774            |this, addr| {
3775                this.assembler.emit_lock_xadd(
3776                    Size::S8,
3777                    Location::GPR(value),
3778                    Location::Memory(addr, 0),
3779                )
3780            },
3781        )?;
3782        self.move_location(Size::S32, Location::GPR(value), ret)?;
3783        self.release_gpr(value);
3784        Ok(())
3785    }
3786    // i32 atomic Add with u16
3787    fn i32_atomic_add_16u(
3788        &mut self,
3789        loc: Location,
3790        target: Location,
3791        memarg: &MemArg,
3792        ret: Location,
3793        need_check: bool,
3794        imported_memories: bool,
3795        offset: i32,
3796        heap_access_oob: Label,
3797        unaligned_atomic: Label,
3798    ) -> Result<(), CompileError> {
3799        let value = self.acquire_temp_gpr().ok_or_else(|| {
3800            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3801        })?;
3802        self.move_location_extend(Size::S16, false, loc, Size::S32, Location::GPR(value))?;
3803        self.memory_op(
3804            target,
3805            memarg,
3806            true,
3807            2,
3808            need_check,
3809            imported_memories,
3810            offset,
3811            heap_access_oob,
3812            unaligned_atomic,
3813            |this, addr| {
3814                this.assembler.emit_lock_xadd(
3815                    Size::S16,
3816                    Location::GPR(value),
3817                    Location::Memory(addr, 0),
3818                )
3819            },
3820        )?;
3821        self.move_location(Size::S32, Location::GPR(value), ret)?;
3822        self.release_gpr(value);
3823        Ok(())
3824    }
3825    // i32 atomic Sub with i32
3826    fn i32_atomic_sub(
3827        &mut self,
3828        loc: Location,
3829        target: Location,
3830        memarg: &MemArg,
3831        ret: Location,
3832        need_check: bool,
3833        imported_memories: bool,
3834        offset: i32,
3835        heap_access_oob: Label,
3836        unaligned_atomic: Label,
3837    ) -> Result<(), CompileError> {
3838        let value = self.acquire_temp_gpr().ok_or_else(|| {
3839            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3840        })?;
3841        self.location_neg(Size::S32, false, loc, Size::S32, Location::GPR(value))?;
3842        self.memory_op(
3843            target,
3844            memarg,
3845            true,
3846            4,
3847            need_check,
3848            imported_memories,
3849            offset,
3850            heap_access_oob,
3851            unaligned_atomic,
3852            |this, addr| {
3853                this.assembler.emit_lock_xadd(
3854                    Size::S32,
3855                    Location::GPR(value),
3856                    Location::Memory(addr, 0),
3857                )
3858            },
3859        )?;
3860        self.move_location(Size::S32, Location::GPR(value), ret)?;
3861        self.release_gpr(value);
3862        Ok(())
3863    }
3864    // i32 atomic Sub with u8
3865    fn i32_atomic_sub_8u(
3866        &mut self,
3867        loc: Location,
3868        target: Location,
3869        memarg: &MemArg,
3870        ret: Location,
3871        need_check: bool,
3872        imported_memories: bool,
3873        offset: i32,
3874        heap_access_oob: Label,
3875        unaligned_atomic: Label,
3876    ) -> Result<(), CompileError> {
3877        let value = self.acquire_temp_gpr().ok_or_else(|| {
3878            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3879        })?;
3880        self.location_neg(Size::S8, false, loc, Size::S32, Location::GPR(value))?;
3881        self.memory_op(
3882            target,
3883            memarg,
3884            true,
3885            1,
3886            need_check,
3887            imported_memories,
3888            offset,
3889            heap_access_oob,
3890            unaligned_atomic,
3891            |this, addr| {
3892                this.assembler.emit_lock_xadd(
3893                    Size::S8,
3894                    Location::GPR(value),
3895                    Location::Memory(addr, 0),
3896                )
3897            },
3898        )?;
3899        self.move_location(Size::S32, Location::GPR(value), ret)?;
3900        self.release_gpr(value);
3901        Ok(())
3902    }
3903    // i32 atomic Sub with u16
3904    fn i32_atomic_sub_16u(
3905        &mut self,
3906        loc: Location,
3907        target: Location,
3908        memarg: &MemArg,
3909        ret: Location,
3910        need_check: bool,
3911        imported_memories: bool,
3912        offset: i32,
3913        heap_access_oob: Label,
3914        unaligned_atomic: Label,
3915    ) -> Result<(), CompileError> {
3916        let value = self.acquire_temp_gpr().ok_or_else(|| {
3917            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3918        })?;
3919        self.location_neg(Size::S16, false, loc, Size::S32, Location::GPR(value))?;
3920        self.memory_op(
3921            target,
3922            memarg,
3923            true,
3924            2,
3925            need_check,
3926            imported_memories,
3927            offset,
3928            heap_access_oob,
3929            unaligned_atomic,
3930            |this, addr| {
3931                this.assembler.emit_lock_xadd(
3932                    Size::S16,
3933                    Location::GPR(value),
3934                    Location::Memory(addr, 0),
3935                )
3936            },
3937        )?;
3938        self.move_location(Size::S32, Location::GPR(value), ret)?;
3939        self.release_gpr(value);
3940        Ok(())
3941    }
3942    // i32 atomic And with i32
3943    fn i32_atomic_and(
3944        &mut self,
3945        loc: Location,
3946        target: Location,
3947        memarg: &MemArg,
3948        ret: Location,
3949        need_check: bool,
3950        imported_memories: bool,
3951        offset: i32,
3952        heap_access_oob: Label,
3953        unaligned_atomic: Label,
3954    ) -> Result<(), CompileError> {
3955        self.emit_compare_and_swap(
3956            loc,
3957            target,
3958            ret,
3959            memarg,
3960            4,
3961            Size::S32,
3962            Size::S32,
3963            need_check,
3964            imported_memories,
3965            offset,
3966            heap_access_oob,
3967            unaligned_atomic,
3968            |this, src, dst| {
3969                this.assembler
3970                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
3971            },
3972        )
3973    }
3974    // i32 atomic And with u8
3975    fn i32_atomic_and_8u(
3976        &mut self,
3977        loc: Location,
3978        target: Location,
3979        memarg: &MemArg,
3980        ret: Location,
3981        need_check: bool,
3982        imported_memories: bool,
3983        offset: i32,
3984        heap_access_oob: Label,
3985        unaligned_atomic: Label,
3986    ) -> Result<(), CompileError> {
3987        self.emit_compare_and_swap(
3988            loc,
3989            target,
3990            ret,
3991            memarg,
3992            1,
3993            Size::S8,
3994            Size::S32,
3995            need_check,
3996            imported_memories,
3997            offset,
3998            heap_access_oob,
3999            unaligned_atomic,
4000            |this, src, dst| {
4001                this.assembler
4002                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
4003            },
4004        )
4005    }
4006    // i32 atomic And with u16
4007    fn i32_atomic_and_16u(
4008        &mut self,
4009        loc: Location,
4010        target: Location,
4011        memarg: &MemArg,
4012        ret: Location,
4013        need_check: bool,
4014        imported_memories: bool,
4015        offset: i32,
4016        heap_access_oob: Label,
4017        unaligned_atomic: Label,
4018    ) -> Result<(), CompileError> {
4019        self.emit_compare_and_swap(
4020            loc,
4021            target,
4022            ret,
4023            memarg,
4024            2,
4025            Size::S16,
4026            Size::S32,
4027            need_check,
4028            imported_memories,
4029            offset,
4030            heap_access_oob,
4031            unaligned_atomic,
4032            |this, src, dst| {
4033                this.assembler
4034                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
4035            },
4036        )
4037    }
4038    // i32 atomic Or with i32
4039    fn i32_atomic_or(
4040        &mut self,
4041        loc: Location,
4042        target: Location,
4043        memarg: &MemArg,
4044        ret: Location,
4045        need_check: bool,
4046        imported_memories: bool,
4047        offset: i32,
4048        heap_access_oob: Label,
4049        unaligned_atomic: Label,
4050    ) -> Result<(), CompileError> {
4051        self.emit_compare_and_swap(
4052            loc,
4053            target,
4054            ret,
4055            memarg,
4056            4,
4057            Size::S32,
4058            Size::S32,
4059            need_check,
4060            imported_memories,
4061            offset,
4062            heap_access_oob,
4063            unaligned_atomic,
4064            |this, src, dst| {
4065                this.assembler
4066                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4067            },
4068        )
4069    }
4070    // i32 atomic Or with u8
4071    fn i32_atomic_or_8u(
4072        &mut self,
4073        loc: Location,
4074        target: Location,
4075        memarg: &MemArg,
4076        ret: Location,
4077        need_check: bool,
4078        imported_memories: bool,
4079        offset: i32,
4080        heap_access_oob: Label,
4081        unaligned_atomic: Label,
4082    ) -> Result<(), CompileError> {
4083        self.emit_compare_and_swap(
4084            loc,
4085            target,
4086            ret,
4087            memarg,
4088            1,
4089            Size::S8,
4090            Size::S32,
4091            need_check,
4092            imported_memories,
4093            offset,
4094            heap_access_oob,
4095            unaligned_atomic,
4096            |this, src, dst| {
4097                this.assembler
4098                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4099            },
4100        )
4101    }
4102    // i32 atomic Or with u16
4103    fn i32_atomic_or_16u(
4104        &mut self,
4105        loc: Location,
4106        target: Location,
4107        memarg: &MemArg,
4108        ret: Location,
4109        need_check: bool,
4110        imported_memories: bool,
4111        offset: i32,
4112        heap_access_oob: Label,
4113        unaligned_atomic: Label,
4114    ) -> Result<(), CompileError> {
4115        self.emit_compare_and_swap(
4116            loc,
4117            target,
4118            ret,
4119            memarg,
4120            2,
4121            Size::S16,
4122            Size::S32,
4123            need_check,
4124            imported_memories,
4125            offset,
4126            heap_access_oob,
4127            unaligned_atomic,
4128            |this, src, dst| {
4129                this.assembler
4130                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4131            },
4132        )
4133    }
4134    // i32 atomic Xor with i32
4135    fn i32_atomic_xor(
4136        &mut self,
4137        loc: Location,
4138        target: Location,
4139        memarg: &MemArg,
4140        ret: Location,
4141        need_check: bool,
4142        imported_memories: bool,
4143        offset: i32,
4144        heap_access_oob: Label,
4145        unaligned_atomic: Label,
4146    ) -> Result<(), CompileError> {
4147        self.emit_compare_and_swap(
4148            loc,
4149            target,
4150            ret,
4151            memarg,
4152            4,
4153            Size::S32,
4154            Size::S32,
4155            need_check,
4156            imported_memories,
4157            offset,
4158            heap_access_oob,
4159            unaligned_atomic,
4160            |this, src, dst| {
4161                this.assembler
4162                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4163            },
4164        )
4165    }
4166    // i32 atomic Xor with u8
4167    fn i32_atomic_xor_8u(
4168        &mut self,
4169        loc: Location,
4170        target: Location,
4171        memarg: &MemArg,
4172        ret: Location,
4173        need_check: bool,
4174        imported_memories: bool,
4175        offset: i32,
4176        heap_access_oob: Label,
4177        unaligned_atomic: Label,
4178    ) -> Result<(), CompileError> {
4179        self.emit_compare_and_swap(
4180            loc,
4181            target,
4182            ret,
4183            memarg,
4184            1,
4185            Size::S8,
4186            Size::S32,
4187            need_check,
4188            imported_memories,
4189            offset,
4190            heap_access_oob,
4191            unaligned_atomic,
4192            |this, src, dst| {
4193                this.assembler
4194                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4195            },
4196        )
4197    }
4198    // i32 atomic Xor with u16
4199    fn i32_atomic_xor_16u(
4200        &mut self,
4201        loc: Location,
4202        target: Location,
4203        memarg: &MemArg,
4204        ret: Location,
4205        need_check: bool,
4206        imported_memories: bool,
4207        offset: i32,
4208        heap_access_oob: Label,
4209        unaligned_atomic: Label,
4210    ) -> Result<(), CompileError> {
4211        self.emit_compare_and_swap(
4212            loc,
4213            target,
4214            ret,
4215            memarg,
4216            2,
4217            Size::S16,
4218            Size::S32,
4219            need_check,
4220            imported_memories,
4221            offset,
4222            heap_access_oob,
4223            unaligned_atomic,
4224            |this, src, dst| {
4225                this.assembler
4226                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4227            },
4228        )
4229    }
4230    // i32 atomic Exchange with i32
4231    fn i32_atomic_xchg(
4232        &mut self,
4233        loc: Location,
4234        target: Location,
4235        memarg: &MemArg,
4236        ret: Location,
4237        need_check: bool,
4238        imported_memories: bool,
4239        offset: i32,
4240        heap_access_oob: Label,
4241        unaligned_atomic: Label,
4242    ) -> Result<(), CompileError> {
4243        let value = self.acquire_temp_gpr().ok_or_else(|| {
4244            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4245        })?;
4246        self.move_location(Size::S32, loc, Location::GPR(value))?;
4247        self.memory_op(
4248            target,
4249            memarg,
4250            true,
4251            4,
4252            need_check,
4253            imported_memories,
4254            offset,
4255            heap_access_oob,
4256            unaligned_atomic,
4257            |this, addr| {
4258                this.assembler
4259                    .emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0))
4260            },
4261        )?;
4262        self.move_location(Size::S32, Location::GPR(value), ret)?;
4263        self.release_gpr(value);
4264        Ok(())
4265    }
4266    // i32 atomic Exchange with u8
4267    fn i32_atomic_xchg_8u(
4268        &mut self,
4269        loc: Location,
4270        target: Location,
4271        memarg: &MemArg,
4272        ret: Location,
4273        need_check: bool,
4274        imported_memories: bool,
4275        offset: i32,
4276        heap_access_oob: Label,
4277        unaligned_atomic: Label,
4278    ) -> Result<(), CompileError> {
4279        let value = self.acquire_temp_gpr().ok_or_else(|| {
4280            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4281        })?;
4282        self.assembler
4283            .emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value))?;
4284        self.memory_op(
4285            target,
4286            memarg,
4287            true,
4288            1,
4289            need_check,
4290            imported_memories,
4291            offset,
4292            heap_access_oob,
4293            unaligned_atomic,
4294            |this, addr| {
4295                this.assembler
4296                    .emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0))
4297            },
4298        )?;
4299        self.move_location(Size::S32, Location::GPR(value), ret)?;
4300        self.release_gpr(value);
4301        Ok(())
4302    }
4303    // i32 atomic Exchange with u16
4304    fn i32_atomic_xchg_16u(
4305        &mut self,
4306        loc: Location,
4307        target: Location,
4308        memarg: &MemArg,
4309        ret: Location,
4310        need_check: bool,
4311        imported_memories: bool,
4312        offset: i32,
4313        heap_access_oob: Label,
4314        unaligned_atomic: Label,
4315    ) -> Result<(), CompileError> {
4316        let value = self.acquire_temp_gpr().ok_or_else(|| {
4317            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4318        })?;
4319        self.assembler
4320            .emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value))?;
4321        self.memory_op(
4322            target,
4323            memarg,
4324            true,
4325            2,
4326            need_check,
4327            imported_memories,
4328            offset,
4329            heap_access_oob,
4330            unaligned_atomic,
4331            |this, addr| {
4332                this.assembler
4333                    .emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0))
4334            },
4335        )?;
4336        self.move_location(Size::S32, Location::GPR(value), ret)?;
4337        self.release_gpr(value);
4338        Ok(())
4339    }
4340    // i32 atomic Exchange with i32
4341    fn i32_atomic_cmpxchg(
4342        &mut self,
4343        new: Location,
4344        cmp: Location,
4345        target: Location,
4346        memarg: &MemArg,
4347        ret: Location,
4348        need_check: bool,
4349        imported_memories: bool,
4350        offset: i32,
4351        heap_access_oob: Label,
4352        unaligned_atomic: Label,
4353    ) -> Result<(), CompileError> {
4354        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4355        let value = if cmp == Location::GPR(GPR::R14) {
4356            if new == Location::GPR(GPR::R13) {
4357                GPR::R12
4358            } else {
4359                GPR::R13
4360            }
4361        } else {
4362            GPR::R14
4363        };
4364        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4365        self.assembler
4366            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4367        self.assembler
4368            .emit_mov(Size::S32, new, Location::GPR(value))?;
4369
4370        self.memory_op(
4371            target,
4372            memarg,
4373            true,
4374            4,
4375            need_check,
4376            imported_memories,
4377            offset,
4378            heap_access_oob,
4379            unaligned_atomic,
4380            |this, addr| {
4381                this.assembler.emit_lock_cmpxchg(
4382                    Size::S32,
4383                    Location::GPR(value),
4384                    Location::Memory(addr, 0),
4385                )?;
4386                this.assembler
4387                    .emit_mov(Size::S32, Location::GPR(compare), ret)
4388            },
4389        )?;
4390        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4391        self.release_gpr(compare);
4392        Ok(())
4393    }
4394    // i32 atomic Exchange with u8
4395    fn i32_atomic_cmpxchg_8u(
4396        &mut self,
4397        new: Location,
4398        cmp: Location,
4399        target: Location,
4400        memarg: &MemArg,
4401        ret: Location,
4402        need_check: bool,
4403        imported_memories: bool,
4404        offset: i32,
4405        heap_access_oob: Label,
4406        unaligned_atomic: Label,
4407    ) -> Result<(), CompileError> {
4408        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4409        let value = if cmp == Location::GPR(GPR::R14) {
4410            if new == Location::GPR(GPR::R13) {
4411                GPR::R12
4412            } else {
4413                GPR::R13
4414            }
4415        } else {
4416            GPR::R14
4417        };
4418        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4419        self.assembler
4420            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4421        self.assembler
4422            .emit_mov(Size::S32, new, Location::GPR(value))?;
4423
4424        self.memory_op(
4425            target,
4426            memarg,
4427            true,
4428            1,
4429            need_check,
4430            imported_memories,
4431            offset,
4432            heap_access_oob,
4433            unaligned_atomic,
4434            |this, addr| {
4435                this.assembler.emit_lock_cmpxchg(
4436                    Size::S8,
4437                    Location::GPR(value),
4438                    Location::Memory(addr, 0),
4439                )?;
4440                this.assembler
4441                    .emit_movzx(Size::S8, Location::GPR(compare), Size::S32, ret)
4442            },
4443        )?;
4444        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4445        self.release_gpr(compare);
4446        Ok(())
4447    }
4448    // i32 atomic Exchange with u16
4449    fn i32_atomic_cmpxchg_16u(
4450        &mut self,
4451        new: Location,
4452        cmp: Location,
4453        target: Location,
4454        memarg: &MemArg,
4455        ret: Location,
4456        need_check: bool,
4457        imported_memories: bool,
4458        offset: i32,
4459        heap_access_oob: Label,
4460        unaligned_atomic: Label,
4461    ) -> Result<(), CompileError> {
4462        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4463        let value = if cmp == Location::GPR(GPR::R14) {
4464            if new == Location::GPR(GPR::R13) {
4465                GPR::R12
4466            } else {
4467                GPR::R13
4468            }
4469        } else {
4470            GPR::R14
4471        };
4472        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4473        self.assembler
4474            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4475        self.assembler
4476            .emit_mov(Size::S32, new, Location::GPR(value))?;
4477
4478        self.memory_op(
4479            target,
4480            memarg,
4481            true,
4482            2,
4483            need_check,
4484            imported_memories,
4485            offset,
4486            heap_access_oob,
4487            unaligned_atomic,
4488            |this, addr| {
4489                this.assembler.emit_lock_cmpxchg(
4490                    Size::S16,
4491                    Location::GPR(value),
4492                    Location::Memory(addr, 0),
4493                )?;
4494                this.assembler
4495                    .emit_movzx(Size::S16, Location::GPR(compare), Size::S32, ret)
4496            },
4497        )?;
4498        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4499        self.release_gpr(compare);
4500        Ok(())
4501    }
4502
4503    fn emit_call_with_reloc(
4504        &mut self,
4505        _calling_convention: CallingConvention,
4506        reloc_target: RelocationTarget,
4507    ) -> Result<Vec<Relocation>, CompileError> {
4508        let mut relocations = vec![];
4509        let next = self.get_label();
4510        let reloc_at = self.assembler.get_offset().0 + 1; // skip E8
4511        self.assembler.emit_call_label(next)?;
4512        self.emit_label(next)?;
4513        relocations.push(Relocation {
4514            kind: RelocationKind::X86CallPCRel4,
4515            reloc_target,
4516            offset: reloc_at as u32,
4517            addend: -4,
4518        });
4519        Ok(relocations)
4520    }
4521
4522    fn emit_binop_add64(
4523        &mut self,
4524        loc_a: Location,
4525        loc_b: Location,
4526        ret: Location,
4527    ) -> Result<(), CompileError> {
4528        self.emit_binop_i64(AssemblerX64::emit_add, loc_a, loc_b, ret)
4529    }
4530    fn emit_binop_sub64(
4531        &mut self,
4532        loc_a: Location,
4533        loc_b: Location,
4534        ret: Location,
4535    ) -> Result<(), CompileError> {
4536        self.emit_binop_i64(AssemblerX64::emit_sub, loc_a, loc_b, ret)
4537    }
4538    fn emit_binop_mul64(
4539        &mut self,
4540        loc_a: Location,
4541        loc_b: Location,
4542        ret: Location,
4543    ) -> Result<(), CompileError> {
4544        self.emit_binop_i64(AssemblerX64::emit_imul, loc_a, loc_b, ret)
4545    }
4546    fn emit_binop_udiv64(
4547        &mut self,
4548        loc_a: Location,
4549        loc_b: Location,
4550        ret: Location,
4551        integer_division_by_zero: Label,
4552        _integer_overflow: Label,
4553    ) -> Result<usize, CompileError> {
4554        // We assume that RAX and RDX are temporary registers here.
4555        self.assembler
4556            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4557        self.assembler
4558            .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
4559        let offset = self.emit_relaxed_xdiv(
4560            AssemblerX64::emit_div,
4561            Size::S64,
4562            loc_b,
4563            integer_division_by_zero,
4564        )?;
4565        self.assembler
4566            .emit_mov(Size::S64, Location::GPR(GPR::RAX), ret)?;
4567        Ok(offset)
4568    }
4569    fn emit_binop_sdiv64(
4570        &mut self,
4571        loc_a: Location,
4572        loc_b: Location,
4573        ret: Location,
4574        integer_division_by_zero: Label,
4575        _integer_overflow: Label,
4576    ) -> Result<usize, CompileError> {
4577        // We assume that RAX and RDX are temporary registers here.
4578        self.assembler
4579            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4580        self.assembler.emit_cqo()?;
4581        let offset = self.emit_relaxed_xdiv(
4582            AssemblerX64::emit_idiv,
4583            Size::S64,
4584            loc_b,
4585            integer_division_by_zero,
4586        )?;
4587        self.assembler
4588            .emit_mov(Size::S64, Location::GPR(GPR::RAX), ret)?;
4589        Ok(offset)
4590    }
4591    fn emit_binop_urem64(
4592        &mut self,
4593        loc_a: Location,
4594        loc_b: Location,
4595        ret: Location,
4596        integer_division_by_zero: Label,
4597        _integer_overflow: Label,
4598    ) -> Result<usize, CompileError> {
4599        // We assume that RAX and RDX are temporary registers here.
4600        self.assembler
4601            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4602        self.assembler
4603            .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
4604        let offset = self.emit_relaxed_xdiv(
4605            AssemblerX64::emit_div,
4606            Size::S64,
4607            loc_b,
4608            integer_division_by_zero,
4609        )?;
4610        self.assembler
4611            .emit_mov(Size::S64, Location::GPR(GPR::RDX), ret)?;
4612        Ok(offset)
4613    }
4614    fn emit_binop_srem64(
4615        &mut self,
4616        loc_a: Location,
4617        loc_b: Location,
4618        ret: Location,
4619        integer_division_by_zero: Label,
4620        _integer_overflow: Label,
4621    ) -> Result<usize, CompileError> {
4622        // We assume that RAX and RDX are temporary registers here.
4623        let normal_path = self.assembler.get_label();
4624        let end = self.assembler.get_label();
4625
4626        self.emit_relaxed_cmp(Size::S64, Location::Imm64(0x8000000000000000u64), loc_a)?;
4627        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
4628        self.emit_relaxed_cmp(Size::S64, Location::Imm64(0xffffffffffffffffu64), loc_b)?;
4629        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
4630        self.move_location(Size::S64, Location::Imm64(0), ret)?;
4631        self.assembler.emit_jmp(Condition::None, end)?;
4632
4633        self.emit_label(normal_path)?;
4634        self.assembler
4635            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4636        self.assembler.emit_cqo()?;
4637        let offset = self.emit_relaxed_xdiv(
4638            AssemblerX64::emit_idiv,
4639            Size::S64,
4640            loc_b,
4641            integer_division_by_zero,
4642        )?;
4643        self.assembler
4644            .emit_mov(Size::S64, Location::GPR(GPR::RDX), ret)?;
4645
4646        self.emit_label(end)?;
4647        Ok(offset)
4648    }
4649    fn emit_binop_and64(
4650        &mut self,
4651        loc_a: Location,
4652        loc_b: Location,
4653        ret: Location,
4654    ) -> Result<(), CompileError> {
4655        self.emit_binop_i64(AssemblerX64::emit_and, loc_a, loc_b, ret)
4656    }
4657    fn emit_binop_or64(
4658        &mut self,
4659        loc_a: Location,
4660        loc_b: Location,
4661        ret: Location,
4662    ) -> Result<(), CompileError> {
4663        self.emit_binop_i64(AssemblerX64::emit_or, loc_a, loc_b, ret)
4664    }
4665    fn emit_binop_xor64(
4666        &mut self,
4667        loc_a: Location,
4668        loc_b: Location,
4669        ret: Location,
4670    ) -> Result<(), CompileError> {
4671        self.emit_binop_i64(AssemblerX64::emit_xor, loc_a, loc_b, ret)
4672    }
4673    fn i64_cmp_ge_s(
4674        &mut self,
4675        loc_a: Location,
4676        loc_b: Location,
4677        ret: Location,
4678    ) -> Result<(), CompileError> {
4679        self.emit_cmpop_i64_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret)
4680    }
4681    fn i64_cmp_gt_s(
4682        &mut self,
4683        loc_a: Location,
4684        loc_b: Location,
4685        ret: Location,
4686    ) -> Result<(), CompileError> {
4687        self.emit_cmpop_i64_dynamic_b(Condition::Greater, loc_a, loc_b, ret)
4688    }
4689    fn i64_cmp_le_s(
4690        &mut self,
4691        loc_a: Location,
4692        loc_b: Location,
4693        ret: Location,
4694    ) -> Result<(), CompileError> {
4695        self.emit_cmpop_i64_dynamic_b(Condition::LessEqual, loc_a, loc_b, ret)
4696    }
4697    fn i64_cmp_lt_s(
4698        &mut self,
4699        loc_a: Location,
4700        loc_b: Location,
4701        ret: Location,
4702    ) -> Result<(), CompileError> {
4703        self.emit_cmpop_i64_dynamic_b(Condition::Less, loc_a, loc_b, ret)
4704    }
4705    fn i64_cmp_ge_u(
4706        &mut self,
4707        loc_a: Location,
4708        loc_b: Location,
4709        ret: Location,
4710    ) -> Result<(), CompileError> {
4711        self.emit_cmpop_i64_dynamic_b(Condition::AboveEqual, loc_a, loc_b, ret)
4712    }
4713    fn i64_cmp_gt_u(
4714        &mut self,
4715        loc_a: Location,
4716        loc_b: Location,
4717        ret: Location,
4718    ) -> Result<(), CompileError> {
4719        self.emit_cmpop_i64_dynamic_b(Condition::Above, loc_a, loc_b, ret)
4720    }
4721    fn i64_cmp_le_u(
4722        &mut self,
4723        loc_a: Location,
4724        loc_b: Location,
4725        ret: Location,
4726    ) -> Result<(), CompileError> {
4727        self.emit_cmpop_i64_dynamic_b(Condition::BelowEqual, loc_a, loc_b, ret)
4728    }
4729    fn i64_cmp_lt_u(
4730        &mut self,
4731        loc_a: Location,
4732        loc_b: Location,
4733        ret: Location,
4734    ) -> Result<(), CompileError> {
4735        self.emit_cmpop_i64_dynamic_b(Condition::Below, loc_a, loc_b, ret)
4736    }
4737    fn i64_cmp_ne(
4738        &mut self,
4739        loc_a: Location,
4740        loc_b: Location,
4741        ret: Location,
4742    ) -> Result<(), CompileError> {
4743        self.emit_cmpop_i64_dynamic_b(Condition::NotEqual, loc_a, loc_b, ret)
4744    }
4745    fn i64_cmp_eq(
4746        &mut self,
4747        loc_a: Location,
4748        loc_b: Location,
4749        ret: Location,
4750    ) -> Result<(), CompileError> {
4751        self.emit_cmpop_i64_dynamic_b(Condition::Equal, loc_a, loc_b, ret)
4752    }
4753    fn i64_clz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4754        let src = match loc {
4755            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4756                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4757                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4758                })?;
4759                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4760                tmp
4761            }
4762            Location::GPR(reg) => reg,
4763            _ => {
4764                codegen_error!("singlepass i64_clz unreachable");
4765            }
4766        };
4767        let dst = match ret {
4768            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
4769                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4770            })?,
4771            Location::GPR(reg) => reg,
4772            _ => {
4773                codegen_error!("singlepass i64_clz unreachable");
4774            }
4775        };
4776
4777        if self.assembler.arch_has_xzcnt() {
4778            self.assembler
4779                .arch_emit_lzcnt(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4780        } else {
4781            let zero_path = self.assembler.get_label();
4782            let end = self.assembler.get_label();
4783
4784            self.assembler.emit_test_gpr_64(src)?;
4785            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
4786            self.assembler
4787                .emit_bsr(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4788            self.assembler
4789                .emit_xor(Size::S64, Location::Imm32(63), Location::GPR(dst))?;
4790            self.assembler.emit_jmp(Condition::None, end)?;
4791            self.emit_label(zero_path)?;
4792            self.move_location(Size::S64, Location::Imm32(64), Location::GPR(dst))?;
4793            self.emit_label(end)?;
4794        }
4795        match loc {
4796            Location::Imm64(_) | Location::Memory(_, _) => {
4797                self.release_gpr(src);
4798            }
4799            _ => {}
4800        };
4801        if let Location::Memory(_, _) = ret {
4802            self.move_location(Size::S64, Location::GPR(dst), ret)?;
4803            self.release_gpr(dst);
4804        };
4805        Ok(())
4806    }
4807    fn i64_ctz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4808        let src = match loc {
4809            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4810                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4811                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4812                })?;
4813                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4814                tmp
4815            }
4816            Location::GPR(reg) => reg,
4817            _ => {
4818                codegen_error!("singlepass i64_ctz unreachable");
4819            }
4820        };
4821        let dst = match ret {
4822            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
4823                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4824            })?,
4825            Location::GPR(reg) => reg,
4826            _ => {
4827                codegen_error!("singlepass i64_ctz unreachable");
4828            }
4829        };
4830
4831        if self.assembler.arch_has_xzcnt() {
4832            self.assembler
4833                .arch_emit_tzcnt(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4834        } else {
4835            let zero_path = self.assembler.get_label();
4836            let end = self.assembler.get_label();
4837
4838            self.assembler.emit_test_gpr_64(src)?;
4839            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
4840            self.assembler
4841                .emit_bsf(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4842            self.assembler.emit_jmp(Condition::None, end)?;
4843            self.emit_label(zero_path)?;
4844            self.move_location(Size::S64, Location::Imm64(64), Location::GPR(dst))?;
4845            self.emit_label(end)?;
4846        }
4847
4848        match loc {
4849            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4850                self.release_gpr(src);
4851            }
4852            _ => {}
4853        };
4854        if let Location::Memory(_, _) = ret {
4855            self.move_location(Size::S64, Location::GPR(dst), ret)?;
4856            self.release_gpr(dst);
4857        };
4858        Ok(())
4859    }
4860    fn i64_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4861        match loc {
4862            Location::Imm64(_) | Location::Imm32(_) => {
4863                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4864                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4865                })?;
4866                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4867                if let Location::Memory(_, _) = ret {
4868                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
4869                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4870                    })?;
4871                    self.assembler.emit_popcnt(
4872                        Size::S64,
4873                        Location::GPR(tmp),
4874                        Location::GPR(out_tmp),
4875                    )?;
4876                    self.move_location(Size::S64, Location::GPR(out_tmp), ret)?;
4877                    self.release_gpr(out_tmp);
4878                } else {
4879                    self.assembler
4880                        .emit_popcnt(Size::S64, Location::GPR(tmp), ret)?;
4881                }
4882                self.release_gpr(tmp);
4883            }
4884            Location::Memory(_, _) | Location::GPR(_) => {
4885                if let Location::Memory(_, _) = ret {
4886                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
4887                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4888                    })?;
4889                    self.assembler
4890                        .emit_popcnt(Size::S64, loc, Location::GPR(out_tmp))?;
4891                    self.move_location(Size::S64, Location::GPR(out_tmp), ret)?;
4892                    self.release_gpr(out_tmp);
4893                } else {
4894                    self.assembler.emit_popcnt(Size::S64, loc, ret)?;
4895                }
4896            }
4897            _ => {
4898                codegen_error!("singlepass i64_popcnt unreachable");
4899            }
4900        }
4901        Ok(())
4902    }
4903    fn i64_shl(
4904        &mut self,
4905        loc_a: Location,
4906        loc_b: Location,
4907        ret: Location,
4908    ) -> Result<(), CompileError> {
4909        self.emit_shift_i64(AssemblerX64::emit_shl, loc_a, loc_b, ret)
4910    }
4911    fn i64_shr(
4912        &mut self,
4913        loc_a: Location,
4914        loc_b: Location,
4915        ret: Location,
4916    ) -> Result<(), CompileError> {
4917        self.emit_shift_i64(AssemblerX64::emit_shr, loc_a, loc_b, ret)
4918    }
4919    fn i64_sar(
4920        &mut self,
4921        loc_a: Location,
4922        loc_b: Location,
4923        ret: Location,
4924    ) -> Result<(), CompileError> {
4925        self.emit_shift_i64(AssemblerX64::emit_sar, loc_a, loc_b, ret)
4926    }
4927    fn i64_rol(
4928        &mut self,
4929        loc_a: Location,
4930        loc_b: Location,
4931        ret: Location,
4932    ) -> Result<(), CompileError> {
4933        self.emit_shift_i64(AssemblerX64::emit_rol, loc_a, loc_b, ret)
4934    }
4935    fn i64_ror(
4936        &mut self,
4937        loc_a: Location,
4938        loc_b: Location,
4939        ret: Location,
4940    ) -> Result<(), CompileError> {
4941        self.emit_shift_i64(AssemblerX64::emit_ror, loc_a, loc_b, ret)
4942    }
4943    fn i64_load(
4944        &mut self,
4945        addr: Location,
4946        memarg: &MemArg,
4947        ret: Location,
4948        need_check: bool,
4949        imported_memories: bool,
4950        offset: i32,
4951        heap_access_oob: Label,
4952        unaligned_atomic: Label,
4953    ) -> Result<(), CompileError> {
4954        self.memory_op(
4955            addr,
4956            memarg,
4957            false,
4958            8,
4959            need_check,
4960            imported_memories,
4961            offset,
4962            heap_access_oob,
4963            unaligned_atomic,
4964            |this, addr| {
4965                this.emit_relaxed_binop(
4966                    AssemblerX64::emit_mov,
4967                    Size::S64,
4968                    Location::Memory(addr, 0),
4969                    ret,
4970                )
4971            },
4972        )
4973    }
4974    fn i64_load_8u(
4975        &mut self,
4976        addr: Location,
4977        memarg: &MemArg,
4978        ret: Location,
4979        need_check: bool,
4980        imported_memories: bool,
4981        offset: i32,
4982        heap_access_oob: Label,
4983        unaligned_atomic: Label,
4984    ) -> Result<(), CompileError> {
4985        self.memory_op(
4986            addr,
4987            memarg,
4988            false,
4989            1,
4990            need_check,
4991            imported_memories,
4992            offset,
4993            heap_access_oob,
4994            unaligned_atomic,
4995            |this, addr| {
4996                this.emit_relaxed_zx_sx(
4997                    AssemblerX64::emit_movzx,
4998                    Size::S8,
4999                    Location::Memory(addr, 0),
5000                    Size::S64,
5001                    ret,
5002                )
5003            },
5004        )
5005    }
5006    fn i64_load_8s(
5007        &mut self,
5008        addr: Location,
5009        memarg: &MemArg,
5010        ret: Location,
5011        need_check: bool,
5012        imported_memories: bool,
5013        offset: i32,
5014        heap_access_oob: Label,
5015        unaligned_atomic: Label,
5016    ) -> Result<(), CompileError> {
5017        self.memory_op(
5018            addr,
5019            memarg,
5020            false,
5021            1,
5022            need_check,
5023            imported_memories,
5024            offset,
5025            heap_access_oob,
5026            unaligned_atomic,
5027            |this, addr| {
5028                this.emit_relaxed_zx_sx(
5029                    AssemblerX64::emit_movsx,
5030                    Size::S8,
5031                    Location::Memory(addr, 0),
5032                    Size::S64,
5033                    ret,
5034                )
5035            },
5036        )
5037    }
5038    fn i64_load_16u(
5039        &mut self,
5040        addr: Location,
5041        memarg: &MemArg,
5042        ret: Location,
5043        need_check: bool,
5044        imported_memories: bool,
5045        offset: i32,
5046        heap_access_oob: Label,
5047        unaligned_atomic: Label,
5048    ) -> Result<(), CompileError> {
5049        self.memory_op(
5050            addr,
5051            memarg,
5052            false,
5053            2,
5054            need_check,
5055            imported_memories,
5056            offset,
5057            heap_access_oob,
5058            unaligned_atomic,
5059            |this, addr| {
5060                this.emit_relaxed_zx_sx(
5061                    AssemblerX64::emit_movzx,
5062                    Size::S16,
5063                    Location::Memory(addr, 0),
5064                    Size::S64,
5065                    ret,
5066                )
5067            },
5068        )
5069    }
5070    fn i64_load_16s(
5071        &mut self,
5072        addr: Location,
5073        memarg: &MemArg,
5074        ret: Location,
5075        need_check: bool,
5076        imported_memories: bool,
5077        offset: i32,
5078        heap_access_oob: Label,
5079        unaligned_atomic: Label,
5080    ) -> Result<(), CompileError> {
5081        self.memory_op(
5082            addr,
5083            memarg,
5084            false,
5085            2,
5086            need_check,
5087            imported_memories,
5088            offset,
5089            heap_access_oob,
5090            unaligned_atomic,
5091            |this, addr| {
5092                this.emit_relaxed_zx_sx(
5093                    AssemblerX64::emit_movsx,
5094                    Size::S16,
5095                    Location::Memory(addr, 0),
5096                    Size::S64,
5097                    ret,
5098                )
5099            },
5100        )
5101    }
5102    fn i64_load_32u(
5103        &mut self,
5104        addr: Location,
5105        memarg: &MemArg,
5106        ret: Location,
5107        need_check: bool,
5108        imported_memories: bool,
5109        offset: i32,
5110        heap_access_oob: Label,
5111        unaligned_atomic: Label,
5112    ) -> Result<(), CompileError> {
5113        self.memory_op(
5114            addr,
5115            memarg,
5116            false,
5117            4,
5118            need_check,
5119            imported_memories,
5120            offset,
5121            heap_access_oob,
5122            unaligned_atomic,
5123            |this, addr| {
5124                match ret {
5125                    Location::GPR(_) => {}
5126                    Location::Memory(base, offset) => {
5127                        this.assembler.emit_mov(
5128                            Size::S32,
5129                            Location::Imm32(0),
5130                            Location::Memory(base, offset + 4),
5131                        )?; // clear upper bits
5132                    }
5133                    _ => {
5134                        codegen_error!("singlepass i64_load_32u unreacahble");
5135                    }
5136                }
5137                this.emit_relaxed_binop(
5138                    AssemblerX64::emit_mov,
5139                    Size::S32,
5140                    Location::Memory(addr, 0),
5141                    ret,
5142                )
5143            },
5144        )
5145    }
5146    fn i64_load_32s(
5147        &mut self,
5148        addr: Location,
5149        memarg: &MemArg,
5150        ret: Location,
5151        need_check: bool,
5152        imported_memories: bool,
5153        offset: i32,
5154        heap_access_oob: Label,
5155        unaligned_atomic: Label,
5156    ) -> Result<(), CompileError> {
5157        self.memory_op(
5158            addr,
5159            memarg,
5160            false,
5161            4,
5162            need_check,
5163            imported_memories,
5164            offset,
5165            heap_access_oob,
5166            unaligned_atomic,
5167            |this, addr| {
5168                this.emit_relaxed_zx_sx(
5169                    AssemblerX64::emit_movsx,
5170                    Size::S32,
5171                    Location::Memory(addr, 0),
5172                    Size::S64,
5173                    ret,
5174                )
5175            },
5176        )
5177    }
5178    fn i64_atomic_load(
5179        &mut self,
5180        addr: Location,
5181        memarg: &MemArg,
5182        ret: Location,
5183        need_check: bool,
5184        imported_memories: bool,
5185        offset: i32,
5186        heap_access_oob: Label,
5187        unaligned_atomic: Label,
5188    ) -> Result<(), CompileError> {
5189        self.memory_op(
5190            addr,
5191            memarg,
5192            true,
5193            8,
5194            need_check,
5195            imported_memories,
5196            offset,
5197            heap_access_oob,
5198            unaligned_atomic,
5199            |this, addr| this.emit_relaxed_mov(Size::S64, Location::Memory(addr, 0), ret),
5200        )
5201    }
5202    fn i64_atomic_load_8u(
5203        &mut self,
5204        addr: Location,
5205        memarg: &MemArg,
5206        ret: Location,
5207        need_check: bool,
5208        imported_memories: bool,
5209        offset: i32,
5210        heap_access_oob: Label,
5211        unaligned_atomic: Label,
5212    ) -> Result<(), CompileError> {
5213        self.memory_op(
5214            addr,
5215            memarg,
5216            true,
5217            1,
5218            need_check,
5219            imported_memories,
5220            offset,
5221            heap_access_oob,
5222            unaligned_atomic,
5223            |this, addr| {
5224                this.emit_relaxed_zero_extension(
5225                    Size::S8,
5226                    Location::Memory(addr, 0),
5227                    Size::S64,
5228                    ret,
5229                )
5230            },
5231        )
5232    }
5233    fn i64_atomic_load_16u(
5234        &mut self,
5235        addr: Location,
5236        memarg: &MemArg,
5237        ret: Location,
5238        need_check: bool,
5239        imported_memories: bool,
5240        offset: i32,
5241        heap_access_oob: Label,
5242        unaligned_atomic: Label,
5243    ) -> Result<(), CompileError> {
5244        self.memory_op(
5245            addr,
5246            memarg,
5247            true,
5248            2,
5249            need_check,
5250            imported_memories,
5251            offset,
5252            heap_access_oob,
5253            unaligned_atomic,
5254            |this, addr| {
5255                this.emit_relaxed_zero_extension(
5256                    Size::S16,
5257                    Location::Memory(addr, 0),
5258                    Size::S64,
5259                    ret,
5260                )
5261            },
5262        )
5263    }
5264    fn i64_atomic_load_32u(
5265        &mut self,
5266        addr: Location,
5267        memarg: &MemArg,
5268        ret: Location,
5269        need_check: bool,
5270        imported_memories: bool,
5271        offset: i32,
5272        heap_access_oob: Label,
5273        unaligned_atomic: Label,
5274    ) -> Result<(), CompileError> {
5275        self.memory_op(
5276            addr,
5277            memarg,
5278            true,
5279            4,
5280            need_check,
5281            imported_memories,
5282            offset,
5283            heap_access_oob,
5284            unaligned_atomic,
5285            |this, addr| {
5286                match ret {
5287                    Location::GPR(_) => {}
5288                    Location::Memory(base, offset) => {
5289                        this.move_location(
5290                            Size::S32,
5291                            Location::Imm32(0),
5292                            Location::Memory(base, offset + 4),
5293                        )?; // clear upper bits
5294                    }
5295                    _ => {
5296                        codegen_error!("singlepass i64_atomic_load_32u unreachable");
5297                    }
5298                }
5299                this.emit_relaxed_zero_extension(
5300                    Size::S32,
5301                    Location::Memory(addr, 0),
5302                    Size::S64,
5303                    ret,
5304                )
5305            },
5306        )
5307    }
5308    fn i64_save(
5309        &mut self,
5310        target_value: Location,
5311        memarg: &MemArg,
5312        target_addr: Location,
5313        need_check: bool,
5314        imported_memories: bool,
5315        offset: i32,
5316        heap_access_oob: Label,
5317        unaligned_atomic: Label,
5318    ) -> Result<(), CompileError> {
5319        self.memory_op(
5320            target_addr,
5321            memarg,
5322            false,
5323            8,
5324            need_check,
5325            imported_memories,
5326            offset,
5327            heap_access_oob,
5328            unaligned_atomic,
5329            |this, addr| {
5330                this.emit_relaxed_binop(
5331                    AssemblerX64::emit_mov,
5332                    Size::S64,
5333                    target_value,
5334                    Location::Memory(addr, 0),
5335                )
5336            },
5337        )
5338    }
5339    fn i64_save_8(
5340        &mut self,
5341        target_value: Location,
5342        memarg: &MemArg,
5343        target_addr: Location,
5344        need_check: bool,
5345        imported_memories: bool,
5346        offset: i32,
5347        heap_access_oob: Label,
5348        unaligned_atomic: Label,
5349    ) -> Result<(), CompileError> {
5350        self.memory_op(
5351            target_addr,
5352            memarg,
5353            false,
5354            1,
5355            need_check,
5356            imported_memories,
5357            offset,
5358            heap_access_oob,
5359            unaligned_atomic,
5360            |this, addr| {
5361                this.emit_relaxed_binop(
5362                    AssemblerX64::emit_mov,
5363                    Size::S8,
5364                    target_value,
5365                    Location::Memory(addr, 0),
5366                )
5367            },
5368        )
5369    }
5370    fn i64_save_16(
5371        &mut self,
5372        target_value: Location,
5373        memarg: &MemArg,
5374        target_addr: Location,
5375        need_check: bool,
5376        imported_memories: bool,
5377        offset: i32,
5378        heap_access_oob: Label,
5379        unaligned_atomic: Label,
5380    ) -> Result<(), CompileError> {
5381        self.memory_op(
5382            target_addr,
5383            memarg,
5384            false,
5385            2,
5386            need_check,
5387            imported_memories,
5388            offset,
5389            heap_access_oob,
5390            unaligned_atomic,
5391            |this, addr| {
5392                this.emit_relaxed_binop(
5393                    AssemblerX64::emit_mov,
5394                    Size::S16,
5395                    target_value,
5396                    Location::Memory(addr, 0),
5397                )
5398            },
5399        )
5400    }
5401    fn i64_save_32(
5402        &mut self,
5403        target_value: Location,
5404        memarg: &MemArg,
5405        target_addr: Location,
5406        need_check: bool,
5407        imported_memories: bool,
5408        offset: i32,
5409        heap_access_oob: Label,
5410        unaligned_atomic: Label,
5411    ) -> Result<(), CompileError> {
5412        self.memory_op(
5413            target_addr,
5414            memarg,
5415            false,
5416            4,
5417            need_check,
5418            imported_memories,
5419            offset,
5420            heap_access_oob,
5421            unaligned_atomic,
5422            |this, addr| {
5423                this.emit_relaxed_binop(
5424                    AssemblerX64::emit_mov,
5425                    Size::S32,
5426                    target_value,
5427                    Location::Memory(addr, 0),
5428                )
5429            },
5430        )
5431    }
5432    fn i64_atomic_save(
5433        &mut self,
5434        value: Location,
5435        memarg: &MemArg,
5436        target_addr: Location,
5437        need_check: bool,
5438        imported_memories: bool,
5439        offset: i32,
5440        heap_access_oob: Label,
5441        unaligned_atomic: Label,
5442    ) -> Result<(), CompileError> {
5443        self.memory_op(
5444            target_addr,
5445            memarg,
5446            true,
5447            8,
5448            need_check,
5449            imported_memories,
5450            offset,
5451            heap_access_oob,
5452            unaligned_atomic,
5453            |this, addr| this.emit_relaxed_atomic_xchg(Size::S64, value, Location::Memory(addr, 0)),
5454        )
5455    }
5456    fn i64_atomic_save_8(
5457        &mut self,
5458        value: Location,
5459        memarg: &MemArg,
5460        target_addr: Location,
5461        need_check: bool,
5462        imported_memories: bool,
5463        offset: i32,
5464        heap_access_oob: Label,
5465        unaligned_atomic: Label,
5466    ) -> Result<(), CompileError> {
5467        self.memory_op(
5468            target_addr,
5469            memarg,
5470            true,
5471            1,
5472            need_check,
5473            imported_memories,
5474            offset,
5475            heap_access_oob,
5476            unaligned_atomic,
5477            |this, addr| this.emit_relaxed_atomic_xchg(Size::S8, value, Location::Memory(addr, 0)),
5478        )
5479    }
5480    fn i64_atomic_save_16(
5481        &mut self,
5482        value: Location,
5483        memarg: &MemArg,
5484        target_addr: Location,
5485        need_check: bool,
5486        imported_memories: bool,
5487        offset: i32,
5488        heap_access_oob: Label,
5489        unaligned_atomic: Label,
5490    ) -> Result<(), CompileError> {
5491        self.memory_op(
5492            target_addr,
5493            memarg,
5494            true,
5495            2,
5496            need_check,
5497            imported_memories,
5498            offset,
5499            heap_access_oob,
5500            unaligned_atomic,
5501            |this, addr| this.emit_relaxed_atomic_xchg(Size::S16, value, Location::Memory(addr, 0)),
5502        )
5503    }
5504    fn i64_atomic_save_32(
5505        &mut self,
5506        value: Location,
5507        memarg: &MemArg,
5508        target_addr: Location,
5509        need_check: bool,
5510        imported_memories: bool,
5511        offset: i32,
5512        heap_access_oob: Label,
5513        unaligned_atomic: Label,
5514    ) -> Result<(), CompileError> {
5515        self.memory_op(
5516            target_addr,
5517            memarg,
5518            true,
5519            2,
5520            need_check,
5521            imported_memories,
5522            offset,
5523            heap_access_oob,
5524            unaligned_atomic,
5525            |this, addr| this.emit_relaxed_atomic_xchg(Size::S32, value, Location::Memory(addr, 0)),
5526        )
5527    }
5528    // i64 atomic Add with i64
5529    fn i64_atomic_add(
5530        &mut self,
5531        loc: Location,
5532        target: Location,
5533        memarg: &MemArg,
5534        ret: Location,
5535        need_check: bool,
5536        imported_memories: bool,
5537        offset: i32,
5538        heap_access_oob: Label,
5539        unaligned_atomic: Label,
5540    ) -> Result<(), CompileError> {
5541        let value = self.acquire_temp_gpr().ok_or_else(|| {
5542            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5543        })?;
5544        self.move_location(Size::S64, loc, Location::GPR(value))?;
5545        self.memory_op(
5546            target,
5547            memarg,
5548            true,
5549            8,
5550            need_check,
5551            imported_memories,
5552            offset,
5553            heap_access_oob,
5554            unaligned_atomic,
5555            |this, addr| {
5556                this.assembler.emit_lock_xadd(
5557                    Size::S64,
5558                    Location::GPR(value),
5559                    Location::Memory(addr, 0),
5560                )
5561            },
5562        )?;
5563        self.move_location(Size::S64, Location::GPR(value), ret)?;
5564        self.release_gpr(value);
5565        Ok(())
5566    }
5567    // i64 atomic Add with u8
5568    fn i64_atomic_add_8u(
5569        &mut self,
5570        loc: Location,
5571        target: Location,
5572        memarg: &MemArg,
5573        ret: Location,
5574        need_check: bool,
5575        imported_memories: bool,
5576        offset: i32,
5577        heap_access_oob: Label,
5578        unaligned_atomic: Label,
5579    ) -> Result<(), CompileError> {
5580        let value = self.acquire_temp_gpr().ok_or_else(|| {
5581            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5582        })?;
5583        self.move_location_extend(Size::S8, false, loc, Size::S64, Location::GPR(value))?;
5584        self.memory_op(
5585            target,
5586            memarg,
5587            true,
5588            1,
5589            need_check,
5590            imported_memories,
5591            offset,
5592            heap_access_oob,
5593            unaligned_atomic,
5594            |this, addr| {
5595                this.assembler.emit_lock_xadd(
5596                    Size::S8,
5597                    Location::GPR(value),
5598                    Location::Memory(addr, 0),
5599                )
5600            },
5601        )?;
5602        self.move_location(Size::S64, Location::GPR(value), ret)?;
5603        self.release_gpr(value);
5604        Ok(())
5605    }
5606    // i64 atomic Add with u16
5607    fn i64_atomic_add_16u(
5608        &mut self,
5609        loc: Location,
5610        target: Location,
5611        memarg: &MemArg,
5612        ret: Location,
5613        need_check: bool,
5614        imported_memories: bool,
5615        offset: i32,
5616        heap_access_oob: Label,
5617        unaligned_atomic: Label,
5618    ) -> Result<(), CompileError> {
5619        let value = self.acquire_temp_gpr().ok_or_else(|| {
5620            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5621        })?;
5622        self.move_location_extend(Size::S16, false, loc, Size::S64, Location::GPR(value))?;
5623        self.memory_op(
5624            target,
5625            memarg,
5626            true,
5627            2,
5628            need_check,
5629            imported_memories,
5630            offset,
5631            heap_access_oob,
5632            unaligned_atomic,
5633            |this, addr| {
5634                this.assembler.emit_lock_xadd(
5635                    Size::S16,
5636                    Location::GPR(value),
5637                    Location::Memory(addr, 0),
5638                )
5639            },
5640        )?;
5641        self.move_location(Size::S64, Location::GPR(value), ret)?;
5642        self.release_gpr(value);
5643        Ok(())
5644    }
5645    // i64 atomic Add with u32
5646    fn i64_atomic_add_32u(
5647        &mut self,
5648        loc: Location,
5649        target: Location,
5650        memarg: &MemArg,
5651        ret: Location,
5652        need_check: bool,
5653        imported_memories: bool,
5654        offset: i32,
5655        heap_access_oob: Label,
5656        unaligned_atomic: Label,
5657    ) -> Result<(), CompileError> {
5658        let value = self.acquire_temp_gpr().ok_or_else(|| {
5659            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5660        })?;
5661        self.move_location_extend(Size::S32, false, loc, Size::S64, Location::GPR(value))?;
5662        self.memory_op(
5663            target,
5664            memarg,
5665            true,
5666            4,
5667            need_check,
5668            imported_memories,
5669            offset,
5670            heap_access_oob,
5671            unaligned_atomic,
5672            |this, addr| {
5673                this.assembler.emit_lock_xadd(
5674                    Size::S32,
5675                    Location::GPR(value),
5676                    Location::Memory(addr, 0),
5677                )
5678            },
5679        )?;
5680        self.move_location(Size::S64, Location::GPR(value), ret)?;
5681        self.release_gpr(value);
5682        Ok(())
5683    }
5684    // i64 atomic Sub with i64
5685    fn i64_atomic_sub(
5686        &mut self,
5687        loc: Location,
5688        target: Location,
5689        memarg: &MemArg,
5690        ret: Location,
5691        need_check: bool,
5692        imported_memories: bool,
5693        offset: i32,
5694        heap_access_oob: Label,
5695        unaligned_atomic: Label,
5696    ) -> Result<(), CompileError> {
5697        let value = self.acquire_temp_gpr().ok_or_else(|| {
5698            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5699        })?;
5700        self.location_neg(Size::S64, false, loc, Size::S64, Location::GPR(value))?;
5701        self.memory_op(
5702            target,
5703            memarg,
5704            true,
5705            8,
5706            need_check,
5707            imported_memories,
5708            offset,
5709            heap_access_oob,
5710            unaligned_atomic,
5711            |this, addr| {
5712                this.assembler.emit_lock_xadd(
5713                    Size::S64,
5714                    Location::GPR(value),
5715                    Location::Memory(addr, 0),
5716                )
5717            },
5718        )?;
5719        self.move_location(Size::S64, Location::GPR(value), ret)?;
5720        self.release_gpr(value);
5721        Ok(())
5722    }
5723    // i64 atomic Sub with u8
5724    fn i64_atomic_sub_8u(
5725        &mut self,
5726        loc: Location,
5727        target: Location,
5728        memarg: &MemArg,
5729        ret: Location,
5730        need_check: bool,
5731        imported_memories: bool,
5732        offset: i32,
5733        heap_access_oob: Label,
5734        unaligned_atomic: Label,
5735    ) -> Result<(), CompileError> {
5736        let value = self.acquire_temp_gpr().ok_or_else(|| {
5737            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5738        })?;
5739        self.location_neg(Size::S8, false, loc, Size::S64, Location::GPR(value))?;
5740        self.memory_op(
5741            target,
5742            memarg,
5743            true,
5744            1,
5745            need_check,
5746            imported_memories,
5747            offset,
5748            heap_access_oob,
5749            unaligned_atomic,
5750            |this, addr| {
5751                this.assembler.emit_lock_xadd(
5752                    Size::S8,
5753                    Location::GPR(value),
5754                    Location::Memory(addr, 0),
5755                )
5756            },
5757        )?;
5758        self.move_location(Size::S64, Location::GPR(value), ret)?;
5759        self.release_gpr(value);
5760        Ok(())
5761    }
5762    // i64 atomic Sub with u16
5763    fn i64_atomic_sub_16u(
5764        &mut self,
5765        loc: Location,
5766        target: Location,
5767        memarg: &MemArg,
5768        ret: Location,
5769        need_check: bool,
5770        imported_memories: bool,
5771        offset: i32,
5772        heap_access_oob: Label,
5773        unaligned_atomic: Label,
5774    ) -> Result<(), CompileError> {
5775        let value = self.acquire_temp_gpr().ok_or_else(|| {
5776            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5777        })?;
5778        self.location_neg(Size::S16, false, loc, Size::S64, Location::GPR(value))?;
5779        self.memory_op(
5780            target,
5781            memarg,
5782            true,
5783            2,
5784            need_check,
5785            imported_memories,
5786            offset,
5787            heap_access_oob,
5788            unaligned_atomic,
5789            |this, addr| {
5790                this.assembler.emit_lock_xadd(
5791                    Size::S16,
5792                    Location::GPR(value),
5793                    Location::Memory(addr, 0),
5794                )
5795            },
5796        )?;
5797        self.move_location(Size::S64, Location::GPR(value), ret)?;
5798        self.release_gpr(value);
5799        Ok(())
5800    }
5801    // i64 atomic Sub with u32
5802    fn i64_atomic_sub_32u(
5803        &mut self,
5804        loc: Location,
5805        target: Location,
5806        memarg: &MemArg,
5807        ret: Location,
5808        need_check: bool,
5809        imported_memories: bool,
5810        offset: i32,
5811        heap_access_oob: Label,
5812        unaligned_atomic: Label,
5813    ) -> Result<(), CompileError> {
5814        let value = self.acquire_temp_gpr().ok_or_else(|| {
5815            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5816        })?;
5817        self.location_neg(Size::S32, false, loc, Size::S64, Location::GPR(value))?;
5818        self.memory_op(
5819            target,
5820            memarg,
5821            true,
5822            4,
5823            need_check,
5824            imported_memories,
5825            offset,
5826            heap_access_oob,
5827            unaligned_atomic,
5828            |this, addr| {
5829                this.assembler.emit_lock_xadd(
5830                    Size::S32,
5831                    Location::GPR(value),
5832                    Location::Memory(addr, 0),
5833                )
5834            },
5835        )?;
5836        self.move_location(Size::S64, Location::GPR(value), ret)?;
5837        self.release_gpr(value);
5838        Ok(())
5839    }
5840    // i64 atomic And with i64
5841    fn i64_atomic_and(
5842        &mut self,
5843        loc: Location,
5844        target: Location,
5845        memarg: &MemArg,
5846        ret: Location,
5847        need_check: bool,
5848        imported_memories: bool,
5849        offset: i32,
5850        heap_access_oob: Label,
5851        unaligned_atomic: Label,
5852    ) -> Result<(), CompileError> {
5853        self.emit_compare_and_swap(
5854            loc,
5855            target,
5856            ret,
5857            memarg,
5858            8,
5859            Size::S64,
5860            Size::S64,
5861            need_check,
5862            imported_memories,
5863            offset,
5864            heap_access_oob,
5865            unaligned_atomic,
5866            |this, src, dst| {
5867                this.assembler
5868                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5869            },
5870        )
5871    }
5872    // i64 atomic And with u8
5873    fn i64_atomic_and_8u(
5874        &mut self,
5875        loc: Location,
5876        target: Location,
5877        memarg: &MemArg,
5878        ret: Location,
5879        need_check: bool,
5880        imported_memories: bool,
5881        offset: i32,
5882        heap_access_oob: Label,
5883        unaligned_atomic: Label,
5884    ) -> Result<(), CompileError> {
5885        self.emit_compare_and_swap(
5886            loc,
5887            target,
5888            ret,
5889            memarg,
5890            1,
5891            Size::S8,
5892            Size::S64,
5893            need_check,
5894            imported_memories,
5895            offset,
5896            heap_access_oob,
5897            unaligned_atomic,
5898            |this, src, dst| {
5899                this.assembler
5900                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5901            },
5902        )
5903    }
5904    // i64 atomic And with u16
5905    fn i64_atomic_and_16u(
5906        &mut self,
5907        loc: Location,
5908        target: Location,
5909        memarg: &MemArg,
5910        ret: Location,
5911        need_check: bool,
5912        imported_memories: bool,
5913        offset: i32,
5914        heap_access_oob: Label,
5915        unaligned_atomic: Label,
5916    ) -> Result<(), CompileError> {
5917        self.emit_compare_and_swap(
5918            loc,
5919            target,
5920            ret,
5921            memarg,
5922            2,
5923            Size::S16,
5924            Size::S64,
5925            need_check,
5926            imported_memories,
5927            offset,
5928            heap_access_oob,
5929            unaligned_atomic,
5930            |this, src, dst| {
5931                this.assembler
5932                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5933            },
5934        )
5935    }
5936    // i64 atomic And with u32
5937    fn i64_atomic_and_32u(
5938        &mut self,
5939        loc: Location,
5940        target: Location,
5941        memarg: &MemArg,
5942        ret: Location,
5943        need_check: bool,
5944        imported_memories: bool,
5945        offset: i32,
5946        heap_access_oob: Label,
5947        unaligned_atomic: Label,
5948    ) -> Result<(), CompileError> {
5949        self.emit_compare_and_swap(
5950            loc,
5951            target,
5952            ret,
5953            memarg,
5954            4,
5955            Size::S32,
5956            Size::S64,
5957            need_check,
5958            imported_memories,
5959            offset,
5960            heap_access_oob,
5961            unaligned_atomic,
5962            |this, src, dst| {
5963                this.assembler
5964                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5965            },
5966        )
5967    }
5968    // i64 atomic Or with i64
5969    fn i64_atomic_or(
5970        &mut self,
5971        loc: Location,
5972        target: Location,
5973        memarg: &MemArg,
5974        ret: Location,
5975        need_check: bool,
5976        imported_memories: bool,
5977        offset: i32,
5978        heap_access_oob: Label,
5979        unaligned_atomic: Label,
5980    ) -> Result<(), CompileError> {
5981        self.emit_compare_and_swap(
5982            loc,
5983            target,
5984            ret,
5985            memarg,
5986            8,
5987            Size::S64,
5988            Size::S64,
5989            need_check,
5990            imported_memories,
5991            offset,
5992            heap_access_oob,
5993            unaligned_atomic,
5994            |this, src, dst| {
5995                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
5996            },
5997        )
5998    }
5999    // i64 atomic Or with u8
6000    fn i64_atomic_or_8u(
6001        &mut self,
6002        loc: Location,
6003        target: Location,
6004        memarg: &MemArg,
6005        ret: Location,
6006        need_check: bool,
6007        imported_memories: bool,
6008        offset: i32,
6009        heap_access_oob: Label,
6010        unaligned_atomic: Label,
6011    ) -> Result<(), CompileError> {
6012        self.emit_compare_and_swap(
6013            loc,
6014            target,
6015            ret,
6016            memarg,
6017            1,
6018            Size::S8,
6019            Size::S64,
6020            need_check,
6021            imported_memories,
6022            offset,
6023            heap_access_oob,
6024            unaligned_atomic,
6025            |this, src, dst| {
6026                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6027            },
6028        )
6029    }
6030    // i64 atomic Or with u16
6031    fn i64_atomic_or_16u(
6032        &mut self,
6033        loc: Location,
6034        target: Location,
6035        memarg: &MemArg,
6036        ret: Location,
6037        need_check: bool,
6038        imported_memories: bool,
6039        offset: i32,
6040        heap_access_oob: Label,
6041        unaligned_atomic: Label,
6042    ) -> Result<(), CompileError> {
6043        self.emit_compare_and_swap(
6044            loc,
6045            target,
6046            ret,
6047            memarg,
6048            2,
6049            Size::S16,
6050            Size::S64,
6051            need_check,
6052            imported_memories,
6053            offset,
6054            heap_access_oob,
6055            unaligned_atomic,
6056            |this, src, dst| {
6057                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6058            },
6059        )
6060    }
6061    // i64 atomic Or with u32
6062    fn i64_atomic_or_32u(
6063        &mut self,
6064        loc: Location,
6065        target: Location,
6066        memarg: &MemArg,
6067        ret: Location,
6068        need_check: bool,
6069        imported_memories: bool,
6070        offset: i32,
6071        heap_access_oob: Label,
6072        unaligned_atomic: Label,
6073    ) -> Result<(), CompileError> {
6074        self.emit_compare_and_swap(
6075            loc,
6076            target,
6077            ret,
6078            memarg,
6079            4,
6080            Size::S32,
6081            Size::S64,
6082            need_check,
6083            imported_memories,
6084            offset,
6085            heap_access_oob,
6086            unaligned_atomic,
6087            |this, src, dst| {
6088                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6089            },
6090        )
6091    }
6092    // i64 atomic xor with i64
6093    fn i64_atomic_xor(
6094        &mut self,
6095        loc: Location,
6096        target: Location,
6097        memarg: &MemArg,
6098        ret: Location,
6099        need_check: bool,
6100        imported_memories: bool,
6101        offset: i32,
6102        heap_access_oob: Label,
6103        unaligned_atomic: Label,
6104    ) -> Result<(), CompileError> {
6105        self.emit_compare_and_swap(
6106            loc,
6107            target,
6108            ret,
6109            memarg,
6110            8,
6111            Size::S64,
6112            Size::S64,
6113            need_check,
6114            imported_memories,
6115            offset,
6116            heap_access_oob,
6117            unaligned_atomic,
6118            |this, src, dst| {
6119                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6120            },
6121        )
6122    }
6123    // i64 atomic xor with u8
6124    fn i64_atomic_xor_8u(
6125        &mut self,
6126        loc: Location,
6127        target: Location,
6128        memarg: &MemArg,
6129        ret: Location,
6130        need_check: bool,
6131        imported_memories: bool,
6132        offset: i32,
6133        heap_access_oob: Label,
6134        unaligned_atomic: Label,
6135    ) -> Result<(), CompileError> {
6136        self.emit_compare_and_swap(
6137            loc,
6138            target,
6139            ret,
6140            memarg,
6141            1,
6142            Size::S8,
6143            Size::S64,
6144            need_check,
6145            imported_memories,
6146            offset,
6147            heap_access_oob,
6148            unaligned_atomic,
6149            |this, src, dst| {
6150                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6151            },
6152        )
6153    }
6154    // i64 atomic xor with u16
6155    fn i64_atomic_xor_16u(
6156        &mut self,
6157        loc: Location,
6158        target: Location,
6159        memarg: &MemArg,
6160        ret: Location,
6161        need_check: bool,
6162        imported_memories: bool,
6163        offset: i32,
6164        heap_access_oob: Label,
6165        unaligned_atomic: Label,
6166    ) -> Result<(), CompileError> {
6167        self.emit_compare_and_swap(
6168            loc,
6169            target,
6170            ret,
6171            memarg,
6172            2,
6173            Size::S16,
6174            Size::S64,
6175            need_check,
6176            imported_memories,
6177            offset,
6178            heap_access_oob,
6179            unaligned_atomic,
6180            |this, src, dst| {
6181                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6182            },
6183        )
6184    }
6185    // i64 atomic xor with u32
6186    fn i64_atomic_xor_32u(
6187        &mut self,
6188        loc: Location,
6189        target: Location,
6190        memarg: &MemArg,
6191        ret: Location,
6192        need_check: bool,
6193        imported_memories: bool,
6194        offset: i32,
6195        heap_access_oob: Label,
6196        unaligned_atomic: Label,
6197    ) -> Result<(), CompileError> {
6198        self.emit_compare_and_swap(
6199            loc,
6200            target,
6201            ret,
6202            memarg,
6203            4,
6204            Size::S32,
6205            Size::S64,
6206            need_check,
6207            imported_memories,
6208            offset,
6209            heap_access_oob,
6210            unaligned_atomic,
6211            |this, src, dst| {
6212                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6213            },
6214        )
6215    }
6216    // i64 atomic Exchange with i64
6217    fn i64_atomic_xchg(
6218        &mut self,
6219        loc: Location,
6220        target: Location,
6221        memarg: &MemArg,
6222        ret: Location,
6223        need_check: bool,
6224        imported_memories: bool,
6225        offset: i32,
6226        heap_access_oob: Label,
6227        unaligned_atomic: Label,
6228    ) -> Result<(), CompileError> {
6229        let value = self.acquire_temp_gpr().ok_or_else(|| {
6230            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6231        })?;
6232        self.move_location(Size::S64, loc, Location::GPR(value))?;
6233        self.memory_op(
6234            target,
6235            memarg,
6236            true,
6237            8,
6238            need_check,
6239            imported_memories,
6240            offset,
6241            heap_access_oob,
6242            unaligned_atomic,
6243            |this, addr| {
6244                this.assembler
6245                    .emit_xchg(Size::S64, Location::GPR(value), Location::Memory(addr, 0))
6246            },
6247        )?;
6248        self.move_location(Size::S64, Location::GPR(value), ret)?;
6249        self.release_gpr(value);
6250        Ok(())
6251    }
6252    // i64 atomic Exchange with u8
6253    fn i64_atomic_xchg_8u(
6254        &mut self,
6255        loc: Location,
6256        target: Location,
6257        memarg: &MemArg,
6258        ret: Location,
6259        need_check: bool,
6260        imported_memories: bool,
6261        offset: i32,
6262        heap_access_oob: Label,
6263        unaligned_atomic: Label,
6264    ) -> Result<(), CompileError> {
6265        let value = self.acquire_temp_gpr().ok_or_else(|| {
6266            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6267        })?;
6268        self.assembler
6269            .emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value))?;
6270        self.memory_op(
6271            target,
6272            memarg,
6273            true,
6274            1,
6275            need_check,
6276            imported_memories,
6277            offset,
6278            heap_access_oob,
6279            unaligned_atomic,
6280            |this, addr| {
6281                this.assembler
6282                    .emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0))
6283            },
6284        )?;
6285        self.move_location(Size::S64, Location::GPR(value), ret)?;
6286        self.release_gpr(value);
6287        Ok(())
6288    }
6289    // i64 atomic Exchange with u16
6290    fn i64_atomic_xchg_16u(
6291        &mut self,
6292        loc: Location,
6293        target: Location,
6294        memarg: &MemArg,
6295        ret: Location,
6296        need_check: bool,
6297        imported_memories: bool,
6298        offset: i32,
6299        heap_access_oob: Label,
6300        unaligned_atomic: Label,
6301    ) -> Result<(), CompileError> {
6302        let value = self.acquire_temp_gpr().ok_or_else(|| {
6303            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6304        })?;
6305        self.assembler
6306            .emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value))?;
6307        self.memory_op(
6308            target,
6309            memarg,
6310            true,
6311            2,
6312            need_check,
6313            imported_memories,
6314            offset,
6315            heap_access_oob,
6316            unaligned_atomic,
6317            |this, addr| {
6318                this.assembler
6319                    .emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0))
6320            },
6321        )?;
6322        self.move_location(Size::S64, Location::GPR(value), ret)?;
6323        self.release_gpr(value);
6324        Ok(())
6325    }
6326    // i64 atomic Exchange with u32
6327    fn i64_atomic_xchg_32u(
6328        &mut self,
6329        loc: Location,
6330        target: Location,
6331        memarg: &MemArg,
6332        ret: Location,
6333        need_check: bool,
6334        imported_memories: bool,
6335        offset: i32,
6336        heap_access_oob: Label,
6337        unaligned_atomic: Label,
6338    ) -> Result<(), CompileError> {
6339        let value = self.acquire_temp_gpr().ok_or_else(|| {
6340            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6341        })?;
6342        self.assembler
6343            .emit_movzx(Size::S32, loc, Size::S64, Location::GPR(value))?;
6344        self.memory_op(
6345            target,
6346            memarg,
6347            true,
6348            4,
6349            need_check,
6350            imported_memories,
6351            offset,
6352            heap_access_oob,
6353            unaligned_atomic,
6354            |this, addr| {
6355                this.assembler
6356                    .emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0))
6357            },
6358        )?;
6359        self.move_location(Size::S64, Location::GPR(value), ret)?;
6360        self.release_gpr(value);
6361        Ok(())
6362    }
6363    // i64 atomic Exchange with i64
6364    fn i64_atomic_cmpxchg(
6365        &mut self,
6366        new: Location,
6367        cmp: Location,
6368        target: Location,
6369        memarg: &MemArg,
6370        ret: Location,
6371        need_check: bool,
6372        imported_memories: bool,
6373        offset: i32,
6374        heap_access_oob: Label,
6375        unaligned_atomic: Label,
6376    ) -> Result<(), CompileError> {
6377        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6378        let value = if cmp == Location::GPR(GPR::R14) {
6379            if new == Location::GPR(GPR::R13) {
6380                GPR::R12
6381            } else {
6382                GPR::R13
6383            }
6384        } else {
6385            GPR::R14
6386        };
6387        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6388        self.assembler
6389            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6390        self.assembler
6391            .emit_mov(Size::S64, new, Location::GPR(value))?;
6392
6393        self.memory_op(
6394            target,
6395            memarg,
6396            true,
6397            8,
6398            need_check,
6399            imported_memories,
6400            offset,
6401            heap_access_oob,
6402            unaligned_atomic,
6403            |this, addr| {
6404                this.assembler.emit_lock_cmpxchg(
6405                    Size::S64,
6406                    Location::GPR(value),
6407                    Location::Memory(addr, 0),
6408                )?;
6409                this.assembler
6410                    .emit_mov(Size::S64, Location::GPR(compare), ret)
6411            },
6412        )?;
6413        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6414        self.release_gpr(compare);
6415        Ok(())
6416    }
6417    // i64 atomic Exchange with u8
6418    fn i64_atomic_cmpxchg_8u(
6419        &mut self,
6420        new: Location,
6421        cmp: Location,
6422        target: Location,
6423        memarg: &MemArg,
6424        ret: Location,
6425        need_check: bool,
6426        imported_memories: bool,
6427        offset: i32,
6428        heap_access_oob: Label,
6429        unaligned_atomic: Label,
6430    ) -> Result<(), CompileError> {
6431        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6432        let value = if cmp == Location::GPR(GPR::R14) {
6433            if new == Location::GPR(GPR::R13) {
6434                GPR::R12
6435            } else {
6436                GPR::R13
6437            }
6438        } else {
6439            GPR::R14
6440        };
6441        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6442        self.assembler
6443            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6444        self.assembler
6445            .emit_mov(Size::S64, new, Location::GPR(value))?;
6446
6447        self.memory_op(
6448            target,
6449            memarg,
6450            true,
6451            1,
6452            need_check,
6453            imported_memories,
6454            offset,
6455            heap_access_oob,
6456            unaligned_atomic,
6457            |this, addr| {
6458                this.assembler.emit_lock_cmpxchg(
6459                    Size::S8,
6460                    Location::GPR(value),
6461                    Location::Memory(addr, 0),
6462                )?;
6463                this.assembler
6464                    .emit_movzx(Size::S8, Location::GPR(compare), Size::S64, ret)
6465            },
6466        )?;
6467        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6468        self.release_gpr(compare);
6469        Ok(())
6470    }
6471    // i64 atomic Exchange with u16
6472    fn i64_atomic_cmpxchg_16u(
6473        &mut self,
6474        new: Location,
6475        cmp: Location,
6476        target: Location,
6477        memarg: &MemArg,
6478        ret: Location,
6479        need_check: bool,
6480        imported_memories: bool,
6481        offset: i32,
6482        heap_access_oob: Label,
6483        unaligned_atomic: Label,
6484    ) -> Result<(), CompileError> {
6485        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6486        let value = if cmp == Location::GPR(GPR::R14) {
6487            if new == Location::GPR(GPR::R13) {
6488                GPR::R12
6489            } else {
6490                GPR::R13
6491            }
6492        } else {
6493            GPR::R14
6494        };
6495        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6496        self.assembler
6497            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6498        self.assembler
6499            .emit_mov(Size::S64, new, Location::GPR(value))?;
6500
6501        self.memory_op(
6502            target,
6503            memarg,
6504            true,
6505            2,
6506            need_check,
6507            imported_memories,
6508            offset,
6509            heap_access_oob,
6510            unaligned_atomic,
6511            |this, addr| {
6512                this.assembler.emit_lock_cmpxchg(
6513                    Size::S16,
6514                    Location::GPR(value),
6515                    Location::Memory(addr, 0),
6516                )?;
6517                this.assembler
6518                    .emit_movzx(Size::S16, Location::GPR(compare), Size::S64, ret)
6519            },
6520        )?;
6521        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6522        self.release_gpr(compare);
6523        Ok(())
6524    }
6525    // i64 atomic Exchange with u32
6526    fn i64_atomic_cmpxchg_32u(
6527        &mut self,
6528        new: Location,
6529        cmp: Location,
6530        target: Location,
6531        memarg: &MemArg,
6532        ret: Location,
6533        need_check: bool,
6534        imported_memories: bool,
6535        offset: i32,
6536        heap_access_oob: Label,
6537        unaligned_atomic: Label,
6538    ) -> Result<(), CompileError> {
6539        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6540        let value = if cmp == Location::GPR(GPR::R14) {
6541            if new == Location::GPR(GPR::R13) {
6542                GPR::R12
6543            } else {
6544                GPR::R13
6545            }
6546        } else {
6547            GPR::R14
6548        };
6549        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6550        self.assembler
6551            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6552        self.assembler
6553            .emit_mov(Size::S64, new, Location::GPR(value))?;
6554
6555        self.memory_op(
6556            target,
6557            memarg,
6558            true,
6559            4,
6560            need_check,
6561            imported_memories,
6562            offset,
6563            heap_access_oob,
6564            unaligned_atomic,
6565            |this, addr| {
6566                this.assembler.emit_lock_cmpxchg(
6567                    Size::S32,
6568                    Location::GPR(value),
6569                    Location::Memory(addr, 0),
6570                )?;
6571                this.assembler
6572                    .emit_mov(Size::S32, Location::GPR(compare), ret)
6573            },
6574        )?;
6575        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6576        self.release_gpr(compare);
6577        Ok(())
6578    }
6579
6580    fn f32_load(
6581        &mut self,
6582        addr: Location,
6583        memarg: &MemArg,
6584        ret: Location,
6585        need_check: bool,
6586        imported_memories: bool,
6587        offset: i32,
6588        heap_access_oob: Label,
6589        unaligned_atomic: Label,
6590    ) -> Result<(), CompileError> {
6591        self.memory_op(
6592            addr,
6593            memarg,
6594            false,
6595            4,
6596            need_check,
6597            imported_memories,
6598            offset,
6599            heap_access_oob,
6600            unaligned_atomic,
6601            |this, addr| {
6602                this.emit_relaxed_binop(
6603                    AssemblerX64::emit_mov,
6604                    Size::S32,
6605                    Location::Memory(addr, 0),
6606                    ret,
6607                )
6608            },
6609        )
6610    }
6611    fn f32_save(
6612        &mut self,
6613        target_value: Location,
6614        memarg: &MemArg,
6615        target_addr: Location,
6616        canonicalize: bool,
6617        need_check: bool,
6618        imported_memories: bool,
6619        offset: i32,
6620        heap_access_oob: Label,
6621        unaligned_atomic: Label,
6622    ) -> Result<(), CompileError> {
6623        let canonicalize = canonicalize && self.arch_supports_canonicalize_nan();
6624        self.memory_op(
6625            target_addr,
6626            memarg,
6627            false,
6628            4,
6629            need_check,
6630            imported_memories,
6631            offset,
6632            heap_access_oob,
6633            unaligned_atomic,
6634            |this, addr| {
6635                if !canonicalize {
6636                    this.emit_relaxed_binop(
6637                        AssemblerX64::emit_mov,
6638                        Size::S32,
6639                        target_value,
6640                        Location::Memory(addr, 0),
6641                    )
6642                } else {
6643                    this.canonicalize_nan(Size::S32, target_value, Location::Memory(addr, 0))
6644                }
6645            },
6646        )
6647    }
6648    fn f64_load(
6649        &mut self,
6650        addr: Location,
6651        memarg: &MemArg,
6652        ret: Location,
6653        need_check: bool,
6654        imported_memories: bool,
6655        offset: i32,
6656        heap_access_oob: Label,
6657        unaligned_atomic: Label,
6658    ) -> Result<(), CompileError> {
6659        self.memory_op(
6660            addr,
6661            memarg,
6662            false,
6663            8,
6664            need_check,
6665            imported_memories,
6666            offset,
6667            heap_access_oob,
6668            unaligned_atomic,
6669            |this, addr| {
6670                this.emit_relaxed_binop(
6671                    AssemblerX64::emit_mov,
6672                    Size::S64,
6673                    Location::Memory(addr, 0),
6674                    ret,
6675                )
6676            },
6677        )
6678    }
6679    fn f64_save(
6680        &mut self,
6681        target_value: Location,
6682        memarg: &MemArg,
6683        target_addr: Location,
6684        canonicalize: bool,
6685        need_check: bool,
6686        imported_memories: bool,
6687        offset: i32,
6688        heap_access_oob: Label,
6689        unaligned_atomic: Label,
6690    ) -> Result<(), CompileError> {
6691        let canonicalize = canonicalize && self.arch_supports_canonicalize_nan();
6692        self.memory_op(
6693            target_addr,
6694            memarg,
6695            false,
6696            8,
6697            need_check,
6698            imported_memories,
6699            offset,
6700            heap_access_oob,
6701            unaligned_atomic,
6702            |this, addr| {
6703                if !canonicalize {
6704                    this.emit_relaxed_binop(
6705                        AssemblerX64::emit_mov,
6706                        Size::S64,
6707                        target_value,
6708                        Location::Memory(addr, 0),
6709                    )
6710                } else {
6711                    this.canonicalize_nan(Size::S64, target_value, Location::Memory(addr, 0))
6712                }
6713            },
6714        )
6715    }
6716
6717    fn convert_f64_i64(
6718        &mut self,
6719        loc: Location,
6720        signed: bool,
6721        ret: Location,
6722    ) -> Result<(), CompileError> {
6723        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6724            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6725        })?;
6726        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6727            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6728        })?;
6729        if self.assembler.arch_has_fconverti() {
6730            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6731            if signed {
6732                self.assembler.arch_emit_f64_convert_si64(tmp_in, tmp_out)?;
6733            } else {
6734                self.assembler.arch_emit_f64_convert_ui64(tmp_in, tmp_out)?;
6735            }
6736            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp_out), ret)?;
6737        } else if signed {
6738            self.assembler
6739                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6740            self.assembler
6741                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6742            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6743        } else {
6744            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6745                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6746            })?;
6747
6748            let do_convert = self.assembler.get_label();
6749            let end_convert = self.assembler.get_label();
6750
6751            self.assembler
6752                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6753            self.assembler.emit_test_gpr_64(tmp_in)?;
6754            self.assembler.emit_jmp(Condition::Signed, do_convert)?;
6755            self.assembler
6756                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6757            self.assembler.emit_jmp(Condition::None, end_convert)?;
6758            self.emit_label(do_convert)?;
6759            self.move_location(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp))?;
6760            self.assembler
6761                .emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp))?;
6762            self.assembler
6763                .emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in))?;
6764            self.assembler
6765                .emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in))?;
6766            self.assembler
6767                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6768            self.assembler
6769                .emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out)?;
6770            self.emit_label(end_convert)?;
6771            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6772
6773            self.release_gpr(tmp);
6774        }
6775        self.release_gpr(tmp_in);
6776        self.release_simd(tmp_out);
6777        Ok(())
6778    }
6779    fn convert_f64_i32(
6780        &mut self,
6781        loc: Location,
6782        signed: bool,
6783        ret: Location,
6784    ) -> Result<(), CompileError> {
6785        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6786            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6787        })?;
6788        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6789            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6790        })?;
6791        if self.assembler.arch_has_fconverti() {
6792            self.emit_relaxed_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6793            if signed {
6794                self.assembler.arch_emit_f64_convert_si32(tmp_in, tmp_out)?;
6795            } else {
6796                self.assembler.arch_emit_f64_convert_ui32(tmp_in, tmp_out)?;
6797            }
6798            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp_out), ret)?;
6799        } else {
6800            self.assembler
6801                .emit_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6802            if signed {
6803                self.assembler
6804                    .emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6805            } else {
6806                self.assembler
6807                    .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6808            }
6809            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6810        }
6811        self.release_gpr(tmp_in);
6812        self.release_simd(tmp_out);
6813        Ok(())
6814    }
6815    fn convert_f32_i64(
6816        &mut self,
6817        loc: Location,
6818        signed: bool,
6819        ret: Location,
6820    ) -> Result<(), CompileError> {
6821        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6822            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6823        })?;
6824        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6825            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6826        })?;
6827        if self.assembler.arch_has_fconverti() {
6828            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6829            if signed {
6830                self.assembler.arch_emit_f32_convert_si64(tmp_in, tmp_out)?;
6831            } else {
6832                self.assembler.arch_emit_f32_convert_ui64(tmp_in, tmp_out)?;
6833            }
6834            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp_out), ret)?;
6835        } else if signed {
6836            self.assembler
6837                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6838            self.assembler
6839                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6840            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6841        } else {
6842            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6843                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6844            })?;
6845
6846            let do_convert = self.assembler.get_label();
6847            let end_convert = self.assembler.get_label();
6848
6849            self.assembler
6850                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6851            self.assembler.emit_test_gpr_64(tmp_in)?;
6852            self.assembler.emit_jmp(Condition::Signed, do_convert)?;
6853            self.assembler
6854                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6855            self.assembler.emit_jmp(Condition::None, end_convert)?;
6856            self.emit_label(do_convert)?;
6857            self.move_location(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp))?;
6858            self.assembler
6859                .emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp))?;
6860            self.assembler
6861                .emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in))?;
6862            self.assembler
6863                .emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in))?;
6864            self.assembler
6865                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6866            self.assembler
6867                .emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out)?;
6868            self.emit_label(end_convert)?;
6869            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6870
6871            self.release_gpr(tmp);
6872        }
6873        self.release_gpr(tmp_in);
6874        self.release_simd(tmp_out);
6875        Ok(())
6876    }
6877    fn convert_f32_i32(
6878        &mut self,
6879        loc: Location,
6880        signed: bool,
6881        ret: Location,
6882    ) -> Result<(), CompileError> {
6883        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6884            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6885        })?;
6886        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6887            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6888        })?;
6889        if self.assembler.arch_has_fconverti() {
6890            self.emit_relaxed_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6891            if signed {
6892                self.assembler.arch_emit_f32_convert_si32(tmp_in, tmp_out)?;
6893            } else {
6894                self.assembler.arch_emit_f32_convert_ui32(tmp_in, tmp_out)?;
6895            }
6896            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp_out), ret)?;
6897        } else {
6898            self.assembler
6899                .emit_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6900            if signed {
6901                self.assembler
6902                    .emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6903            } else {
6904                self.assembler
6905                    .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6906            }
6907            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6908        }
6909        self.release_gpr(tmp_in);
6910        self.release_simd(tmp_out);
6911        Ok(())
6912    }
6913    fn convert_i64_f64(
6914        &mut self,
6915        loc: Location,
6916        ret: Location,
6917        signed: bool,
6918        sat: bool,
6919    ) -> Result<(), CompileError> {
6920        match (signed, sat) {
6921            (false, true) => self.convert_i64_f64_u_s(loc, ret),
6922            (false, false) => self.convert_i64_f64_u_u(loc, ret),
6923            (true, true) => self.convert_i64_f64_s_s(loc, ret),
6924            (true, false) => self.convert_i64_f64_s_u(loc, ret),
6925        }
6926    }
6927    fn convert_i32_f64(
6928        &mut self,
6929        loc: Location,
6930        ret: Location,
6931        signed: bool,
6932        sat: bool,
6933    ) -> Result<(), CompileError> {
6934        match (signed, sat) {
6935            (false, true) => self.convert_i32_f64_u_s(loc, ret),
6936            (false, false) => self.convert_i32_f64_u_u(loc, ret),
6937            (true, true) => self.convert_i32_f64_s_s(loc, ret),
6938            (true, false) => self.convert_i32_f64_s_u(loc, ret),
6939        }
6940    }
6941    fn convert_i64_f32(
6942        &mut self,
6943        loc: Location,
6944        ret: Location,
6945        signed: bool,
6946        sat: bool,
6947    ) -> Result<(), CompileError> {
6948        match (signed, sat) {
6949            (false, true) => self.convert_i64_f32_u_s(loc, ret),
6950            (false, false) => self.convert_i64_f32_u_u(loc, ret),
6951            (true, true) => self.convert_i64_f32_s_s(loc, ret),
6952            (true, false) => self.convert_i64_f32_s_u(loc, ret),
6953        }
6954    }
6955    fn convert_i32_f32(
6956        &mut self,
6957        loc: Location,
6958        ret: Location,
6959        signed: bool,
6960        sat: bool,
6961    ) -> Result<(), CompileError> {
6962        match (signed, sat) {
6963            (false, true) => self.convert_i32_f32_u_s(loc, ret),
6964            (false, false) => self.convert_i32_f32_u_u(loc, ret),
6965            (true, true) => self.convert_i32_f32_s_s(loc, ret),
6966            (true, false) => self.convert_i32_f32_s_u(loc, ret),
6967        }
6968    }
6969    fn convert_f64_f32(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6970        self.emit_relaxed_avx(AssemblerX64::emit_vcvtss2sd, loc, loc, ret)
6971    }
6972    fn convert_f32_f64(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6973        self.emit_relaxed_avx(AssemblerX64::emit_vcvtsd2ss, loc, loc, ret)
6974    }
6975    fn f64_neg(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6976        if self.assembler.arch_has_fneg() {
6977            let tmp = self.acquire_temp_simd().ok_or_else(|| {
6978                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6979            })?;
6980            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp))?;
6981            self.assembler.arch_emit_f64_neg(tmp, tmp)?;
6982            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp), ret)?;
6983            self.release_simd(tmp);
6984        } else {
6985            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6986                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6987            })?;
6988            self.move_location(Size::S64, loc, Location::GPR(tmp))?;
6989            self.assembler.emit_btc_gpr_imm8_64(63, tmp)?;
6990            self.move_location(Size::S64, Location::GPR(tmp), ret)?;
6991            self.release_gpr(tmp);
6992        }
6993        Ok(())
6994    }
6995    fn f64_abs(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6996        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6997            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6998        })?;
6999        let c = self.acquire_temp_gpr().ok_or_else(|| {
7000            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7001        })?;
7002
7003        self.move_location(Size::S64, loc, Location::GPR(tmp))?;
7004        self.move_location(
7005            Size::S64,
7006            Location::Imm64(0x7fffffffffffffffu64),
7007            Location::GPR(c),
7008        )?;
7009        self.assembler
7010            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp))?;
7011        self.move_location(Size::S64, Location::GPR(tmp), ret)?;
7012
7013        self.release_gpr(c);
7014        self.release_gpr(tmp);
7015        Ok(())
7016    }
7017    fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) -> Result<(), CompileError> {
7018        let c = self.acquire_temp_gpr().ok_or_else(|| {
7019            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7020        })?;
7021
7022        self.move_location(
7023            Size::S64,
7024            Location::Imm64(0x7fffffffffffffffu64),
7025            Location::GPR(c),
7026        )?;
7027        self.assembler
7028            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp1))?;
7029
7030        self.move_location(
7031            Size::S64,
7032            Location::Imm64(0x8000000000000000u64),
7033            Location::GPR(c),
7034        )?;
7035        self.assembler
7036            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp2))?;
7037
7038        self.assembler
7039            .emit_or(Size::S64, Location::GPR(tmp2), Location::GPR(tmp1))?;
7040
7041        self.release_gpr(c);
7042        Ok(())
7043    }
7044    fn f64_sqrt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7045        self.emit_relaxed_avx(AssemblerX64::emit_vsqrtsd, loc, loc, ret)
7046    }
7047    fn f64_trunc(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7048        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_trunc, loc, loc, ret)
7049    }
7050    fn f64_ceil(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7051        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_ceil, loc, loc, ret)
7052    }
7053    fn f64_floor(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7054        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_floor, loc, loc, ret)
7055    }
7056    fn f64_nearest(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7057        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_nearest, loc, loc, ret)
7058    }
7059    fn f64_cmp_ge(
7060        &mut self,
7061        loc_a: Location,
7062        loc_b: Location,
7063        ret: Location,
7064    ) -> Result<(), CompileError> {
7065        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgesd, loc_a, loc_b, ret)?;
7066        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7067    }
7068    fn f64_cmp_gt(
7069        &mut self,
7070        loc_a: Location,
7071        loc_b: Location,
7072        ret: Location,
7073    ) -> Result<(), CompileError> {
7074        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtsd, loc_a, loc_b, ret)?;
7075        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7076    }
7077    fn f64_cmp_le(
7078        &mut self,
7079        loc_a: Location,
7080        loc_b: Location,
7081        ret: Location,
7082    ) -> Result<(), CompileError> {
7083        self.emit_relaxed_avx(AssemblerX64::emit_vcmplesd, loc_a, loc_b, ret)?;
7084        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7085    }
7086    fn f64_cmp_lt(
7087        &mut self,
7088        loc_a: Location,
7089        loc_b: Location,
7090        ret: Location,
7091    ) -> Result<(), CompileError> {
7092        self.emit_relaxed_avx(AssemblerX64::emit_vcmpltsd, loc_a, loc_b, ret)?;
7093        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7094    }
7095    fn f64_cmp_ne(
7096        &mut self,
7097        loc_a: Location,
7098        loc_b: Location,
7099        ret: Location,
7100    ) -> Result<(), CompileError> {
7101        self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqsd, loc_a, loc_b, ret)?;
7102        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7103    }
7104    fn f64_cmp_eq(
7105        &mut self,
7106        loc_a: Location,
7107        loc_b: Location,
7108        ret: Location,
7109    ) -> Result<(), CompileError> {
7110        self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqsd, loc_a, loc_b, ret)?;
7111        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7112    }
7113    fn f64_min(
7114        &mut self,
7115        loc_a: Location,
7116        loc_b: Location,
7117        ret: Location,
7118    ) -> Result<(), CompileError> {
7119        if !self.arch_supports_canonicalize_nan() {
7120            self.emit_relaxed_avx(AssemblerX64::emit_vminsd, loc_a, loc_b, ret)
7121        } else {
7122            let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7123                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7124            })?;
7125            let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7126                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7127            })?;
7128            let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7129                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7130            })?;
7131            let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7132                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7133            })?;
7134
7135            let src1 = match loc_a {
7136                Location::SIMD(x) => x,
7137                Location::GPR(_) | Location::Memory(_, _) => {
7138                    self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7139                    tmp1
7140                }
7141                Location::Imm32(_) => {
7142                    self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7143                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7144                    tmp1
7145                }
7146                Location::Imm64(_) => {
7147                    self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7148                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7149                    tmp1
7150                }
7151                _ => {
7152                    codegen_error!("singlepass f64_min unreachable");
7153                }
7154            };
7155            let src2 = match loc_b {
7156                Location::SIMD(x) => x,
7157                Location::GPR(_) | Location::Memory(_, _) => {
7158                    self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7159                    tmp2
7160                }
7161                Location::Imm32(_) => {
7162                    self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7163                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7164                    tmp2
7165                }
7166                Location::Imm64(_) => {
7167                    self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7168                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7169                    tmp2
7170                }
7171                _ => {
7172                    codegen_error!("singlepass f64_min unreachable");
7173                }
7174            };
7175
7176            let tmp_xmm1 = XMM::XMM8;
7177            let tmp_xmm2 = XMM::XMM9;
7178            let tmp_xmm3 = XMM::XMM10;
7179
7180            self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1))?;
7181            self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2))?;
7182            self.assembler
7183                .emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7184            self.assembler
7185                .emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7186            let label1 = self.assembler.get_label();
7187            let label2 = self.assembler.get_label();
7188            self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7189            self.assembler
7190                .emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7191            self.assembler.emit_jmp(Condition::None, label2)?;
7192            self.emit_label(label1)?;
7193            // load float -0.0
7194            self.move_location(
7195                Size::S64,
7196                Location::Imm64(0x8000_0000_0000_0000), // Negative zero
7197                Location::GPR(tmpg1),
7198            )?;
7199            self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2))?;
7200            self.emit_label(label2)?;
7201            self.assembler
7202                .emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7203            self.assembler.emit_vblendvpd(
7204                tmp_xmm3,
7205                XMMOrMemory::XMM(tmp_xmm2),
7206                tmp_xmm1,
7207                tmp_xmm1,
7208            )?;
7209            self.assembler
7210                .emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1)?;
7211            // load float canonical nan
7212            self.move_location(
7213                Size::S64,
7214                Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
7215                Location::GPR(tmpg1),
7216            )?;
7217            self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7218            self.assembler
7219                .emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7220            match ret {
7221                Location::SIMD(x) => {
7222                    self.assembler
7223                        .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7224                }
7225                Location::Memory(_, _) | Location::GPR(_) => {
7226                    self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7227                }
7228                _ => {
7229                    codegen_error!("singlepass f64_min unreachable");
7230                }
7231            }
7232
7233            self.release_gpr(tmpg2);
7234            self.release_gpr(tmpg1);
7235            self.release_simd(tmp2);
7236            self.release_simd(tmp1);
7237            Ok(())
7238        }
7239    }
7240    fn f64_max(
7241        &mut self,
7242        loc_a: Location,
7243        loc_b: Location,
7244        ret: Location,
7245    ) -> Result<(), CompileError> {
7246        if !self.arch_supports_canonicalize_nan() {
7247            self.emit_relaxed_avx(AssemblerX64::emit_vmaxsd, loc_a, loc_b, ret)
7248        } else {
7249            let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7250                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7251            })?;
7252            let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7253                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7254            })?;
7255            let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7256                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7257            })?;
7258            let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7259                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7260            })?;
7261
7262            let src1 = match loc_a {
7263                Location::SIMD(x) => x,
7264                Location::GPR(_) | Location::Memory(_, _) => {
7265                    self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7266                    tmp1
7267                }
7268                Location::Imm32(_) => {
7269                    self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7270                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7271                    tmp1
7272                }
7273                Location::Imm64(_) => {
7274                    self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7275                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7276                    tmp1
7277                }
7278                _ => {
7279                    codegen_error!("singlepass f64_max unreachable");
7280                }
7281            };
7282            let src2 = match loc_b {
7283                Location::SIMD(x) => x,
7284                Location::GPR(_) | Location::Memory(_, _) => {
7285                    self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7286                    tmp2
7287                }
7288                Location::Imm32(_) => {
7289                    self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7290                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7291                    tmp2
7292                }
7293                Location::Imm64(_) => {
7294                    self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7295                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7296                    tmp2
7297                }
7298                _ => {
7299                    codegen_error!("singlepass f64_max unreachable");
7300                }
7301            };
7302
7303            let tmp_xmm1 = XMM::XMM8;
7304            let tmp_xmm2 = XMM::XMM9;
7305            let tmp_xmm3 = XMM::XMM10;
7306
7307            self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1))?;
7308            self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2))?;
7309            self.assembler
7310                .emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7311            self.assembler
7312                .emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7313            let label1 = self.assembler.get_label();
7314            let label2 = self.assembler.get_label();
7315            self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7316            self.assembler
7317                .emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7318            self.assembler.emit_jmp(Condition::None, label2)?;
7319            self.emit_label(label1)?;
7320            self.assembler
7321                .emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2)?;
7322            self.emit_label(label2)?;
7323            self.assembler
7324                .emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7325            self.assembler.emit_vblendvpd(
7326                tmp_xmm3,
7327                XMMOrMemory::XMM(tmp_xmm2),
7328                tmp_xmm1,
7329                tmp_xmm1,
7330            )?;
7331            self.assembler
7332                .emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1)?;
7333            // load float canonical nan
7334            self.move_location(
7335                Size::S64,
7336                Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
7337                Location::GPR(tmpg1),
7338            )?;
7339            self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7340            self.assembler
7341                .emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7342            match ret {
7343                Location::SIMD(x) => {
7344                    self.assembler
7345                        .emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7346                }
7347                Location::Memory(_, _) | Location::GPR(_) => {
7348                    self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7349                }
7350                _ => {
7351                    codegen_error!("singlepass f64_max unreachable");
7352                }
7353            }
7354
7355            self.release_gpr(tmpg2);
7356            self.release_gpr(tmpg1);
7357            self.release_simd(tmp2);
7358            self.release_simd(tmp1);
7359            Ok(())
7360        }
7361    }
7362    fn f64_add(
7363        &mut self,
7364        loc_a: Location,
7365        loc_b: Location,
7366        ret: Location,
7367    ) -> Result<(), CompileError> {
7368        self.emit_relaxed_avx(AssemblerX64::emit_vaddsd, loc_a, loc_b, ret)
7369    }
7370    fn f64_sub(
7371        &mut self,
7372        loc_a: Location,
7373        loc_b: Location,
7374        ret: Location,
7375    ) -> Result<(), CompileError> {
7376        self.emit_relaxed_avx(AssemblerX64::emit_vsubsd, loc_a, loc_b, ret)
7377    }
7378    fn f64_mul(
7379        &mut self,
7380        loc_a: Location,
7381        loc_b: Location,
7382        ret: Location,
7383    ) -> Result<(), CompileError> {
7384        self.emit_relaxed_avx(AssemblerX64::emit_vmulsd, loc_a, loc_b, ret)
7385    }
7386    fn f64_div(
7387        &mut self,
7388        loc_a: Location,
7389        loc_b: Location,
7390        ret: Location,
7391    ) -> Result<(), CompileError> {
7392        self.emit_relaxed_avx(AssemblerX64::emit_vdivsd, loc_a, loc_b, ret)
7393    }
7394    fn f32_neg(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7395        if self.assembler.arch_has_fneg() {
7396            let tmp = self.acquire_temp_simd().ok_or_else(|| {
7397                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7398            })?;
7399            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp))?;
7400            self.assembler.arch_emit_f32_neg(tmp, tmp)?;
7401            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp), ret)?;
7402            self.release_simd(tmp);
7403        } else {
7404            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7405                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7406            })?;
7407            self.move_location(Size::S32, loc, Location::GPR(tmp))?;
7408            self.assembler.emit_btc_gpr_imm8_32(31, tmp)?;
7409            self.move_location(Size::S32, Location::GPR(tmp), ret)?;
7410            self.release_gpr(tmp);
7411        }
7412        Ok(())
7413    }
7414    fn f32_abs(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7415        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7416            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7417        })?;
7418        self.move_location(Size::S32, loc, Location::GPR(tmp))?;
7419        self.assembler.emit_and(
7420            Size::S32,
7421            Location::Imm32(0x7fffffffu32),
7422            Location::GPR(tmp),
7423        )?;
7424        self.move_location(Size::S32, Location::GPR(tmp), ret)?;
7425        self.release_gpr(tmp);
7426        Ok(())
7427    }
7428    fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) -> Result<(), CompileError> {
7429        self.assembler.emit_and(
7430            Size::S32,
7431            Location::Imm32(0x7fffffffu32),
7432            Location::GPR(tmp1),
7433        )?;
7434        self.assembler.emit_and(
7435            Size::S32,
7436            Location::Imm32(0x80000000u32),
7437            Location::GPR(tmp2),
7438        )?;
7439        self.assembler
7440            .emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1))
7441    }
7442    fn f32_sqrt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7443        self.emit_relaxed_avx(AssemblerX64::emit_vsqrtss, loc, loc, ret)
7444    }
7445    fn f32_trunc(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7446        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_trunc, loc, loc, ret)
7447    }
7448    fn f32_ceil(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7449        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_ceil, loc, loc, ret)
7450    }
7451    fn f32_floor(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7452        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_floor, loc, loc, ret)
7453    }
7454    fn f32_nearest(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7455        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_nearest, loc, loc, ret)
7456    }
7457    fn f32_cmp_ge(
7458        &mut self,
7459        loc_a: Location,
7460        loc_b: Location,
7461        ret: Location,
7462    ) -> Result<(), CompileError> {
7463        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgess, loc_a, loc_b, ret)?;
7464        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7465    }
7466    fn f32_cmp_gt(
7467        &mut self,
7468        loc_a: Location,
7469        loc_b: Location,
7470        ret: Location,
7471    ) -> Result<(), CompileError> {
7472        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtss, loc_a, loc_b, ret)?;
7473        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7474    }
7475    fn f32_cmp_le(
7476        &mut self,
7477        loc_a: Location,
7478        loc_b: Location,
7479        ret: Location,
7480    ) -> Result<(), CompileError> {
7481        self.emit_relaxed_avx(AssemblerX64::emit_vcmpless, loc_a, loc_b, ret)?;
7482        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7483    }
7484    fn f32_cmp_lt(
7485        &mut self,
7486        loc_a: Location,
7487        loc_b: Location,
7488        ret: Location,
7489    ) -> Result<(), CompileError> {
7490        self.emit_relaxed_avx(AssemblerX64::emit_vcmpltss, loc_a, loc_b, ret)?;
7491        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7492    }
7493    fn f32_cmp_ne(
7494        &mut self,
7495        loc_a: Location,
7496        loc_b: Location,
7497        ret: Location,
7498    ) -> Result<(), CompileError> {
7499        self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqss, loc_a, loc_b, ret)?;
7500        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7501    }
7502    fn f32_cmp_eq(
7503        &mut self,
7504        loc_a: Location,
7505        loc_b: Location,
7506        ret: Location,
7507    ) -> Result<(), CompileError> {
7508        self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqss, loc_a, loc_b, ret)?;
7509        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7510    }
7511    fn f32_min(
7512        &mut self,
7513        loc_a: Location,
7514        loc_b: Location,
7515        ret: Location,
7516    ) -> Result<(), CompileError> {
7517        if !self.arch_supports_canonicalize_nan() {
7518            self.emit_relaxed_avx(AssemblerX64::emit_vminss, loc_a, loc_b, ret)
7519        } else {
7520            let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7521                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7522            })?;
7523            let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7524                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7525            })?;
7526            let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7527                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7528            })?;
7529            let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7530                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7531            })?;
7532
7533            let src1 = match loc_a {
7534                Location::SIMD(x) => x,
7535                Location::GPR(_) | Location::Memory(_, _) => {
7536                    self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7537                    tmp1
7538                }
7539                Location::Imm32(_) => {
7540                    self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7541                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7542                    tmp1
7543                }
7544                Location::Imm64(_) => {
7545                    self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7546                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7547                    tmp1
7548                }
7549                _ => {
7550                    codegen_error!("singlepass f32_min unreachable");
7551                }
7552            };
7553            let src2 = match loc_b {
7554                Location::SIMD(x) => x,
7555                Location::GPR(_) | Location::Memory(_, _) => {
7556                    self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7557                    tmp2
7558                }
7559                Location::Imm32(_) => {
7560                    self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7561                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7562                    tmp2
7563                }
7564                Location::Imm64(_) => {
7565                    self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7566                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7567                    tmp2
7568                }
7569                _ => {
7570                    codegen_error!("singlepass f32_min unreachable");
7571                }
7572            };
7573
7574            let tmp_xmm1 = XMM::XMM8;
7575            let tmp_xmm2 = XMM::XMM9;
7576            let tmp_xmm3 = XMM::XMM10;
7577
7578            self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1))?;
7579            self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2))?;
7580            self.assembler
7581                .emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7582            self.assembler
7583                .emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7584            let label1 = self.assembler.get_label();
7585            let label2 = self.assembler.get_label();
7586            self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7587            self.assembler
7588                .emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7589            self.assembler.emit_jmp(Condition::None, label2)?;
7590            self.emit_label(label1)?;
7591            // load float -0.0
7592            self.move_location(
7593                Size::S64,
7594                Location::Imm32(0x8000_0000), // Negative zero
7595                Location::GPR(tmpg1),
7596            )?;
7597            self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2))?;
7598            self.emit_label(label2)?;
7599            self.assembler
7600                .emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7601            self.assembler.emit_vblendvps(
7602                tmp_xmm3,
7603                XMMOrMemory::XMM(tmp_xmm2),
7604                tmp_xmm1,
7605                tmp_xmm1,
7606            )?;
7607            self.assembler
7608                .emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1)?;
7609            // load float canonical nan
7610            self.move_location(
7611                Size::S64,
7612                Location::Imm32(0x7FC0_0000), // Canonical NaN
7613                Location::GPR(tmpg1),
7614            )?;
7615            self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7616            self.assembler
7617                .emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7618            match ret {
7619                Location::SIMD(x) => {
7620                    self.assembler
7621                        .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7622                }
7623                Location::Memory(_, _) | Location::GPR(_) => {
7624                    self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7625                }
7626                _ => {
7627                    codegen_error!("singlepass f32_min unreachable");
7628                }
7629            }
7630
7631            self.release_gpr(tmpg2);
7632            self.release_gpr(tmpg1);
7633            self.release_simd(tmp2);
7634            self.release_simd(tmp1);
7635            Ok(())
7636        }
7637    }
7638    fn f32_max(
7639        &mut self,
7640        loc_a: Location,
7641        loc_b: Location,
7642        ret: Location,
7643    ) -> Result<(), CompileError> {
7644        if !self.arch_supports_canonicalize_nan() {
7645            self.emit_relaxed_avx(AssemblerX64::emit_vmaxss, loc_a, loc_b, ret)
7646        } else {
7647            let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7648                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7649            })?;
7650            let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7651                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7652            })?;
7653            let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7654                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7655            })?;
7656            let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7657                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7658            })?;
7659
7660            let src1 = match loc_a {
7661                Location::SIMD(x) => x,
7662                Location::GPR(_) | Location::Memory(_, _) => {
7663                    self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7664                    tmp1
7665                }
7666                Location::Imm32(_) => {
7667                    self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7668                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7669                    tmp1
7670                }
7671                Location::Imm64(_) => {
7672                    self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7673                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7674                    tmp1
7675                }
7676                _ => {
7677                    codegen_error!("singlepass f32_max unreachable");
7678                }
7679            };
7680            let src2 = match loc_b {
7681                Location::SIMD(x) => x,
7682                Location::GPR(_) | Location::Memory(_, _) => {
7683                    self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7684                    tmp2
7685                }
7686                Location::Imm32(_) => {
7687                    self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7688                    self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7689                    tmp2
7690                }
7691                Location::Imm64(_) => {
7692                    self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7693                    self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7694                    tmp2
7695                }
7696                _ => {
7697                    codegen_error!("singlepass f32_max unreachable");
7698                }
7699            };
7700
7701            let tmp_xmm1 = XMM::XMM8;
7702            let tmp_xmm2 = XMM::XMM9;
7703            let tmp_xmm3 = XMM::XMM10;
7704
7705            self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1))?;
7706            self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2))?;
7707            self.assembler
7708                .emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7709            self.assembler
7710                .emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7711            let label1 = self.assembler.get_label();
7712            let label2 = self.assembler.get_label();
7713            self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7714            self.assembler
7715                .emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7716            self.assembler.emit_jmp(Condition::None, label2)?;
7717            self.emit_label(label1)?;
7718            self.assembler
7719                .emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2)?;
7720            self.emit_label(label2)?;
7721            self.assembler
7722                .emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7723            self.assembler.emit_vblendvps(
7724                tmp_xmm3,
7725                XMMOrMemory::XMM(tmp_xmm2),
7726                tmp_xmm1,
7727                tmp_xmm1,
7728            )?;
7729            self.assembler
7730                .emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1)?;
7731            // load float canonical nan
7732            self.move_location(
7733                Size::S64,
7734                Location::Imm32(0x7FC0_0000), // Canonical NaN
7735                Location::GPR(tmpg1),
7736            )?;
7737            self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7738            self.assembler
7739                .emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7740            match ret {
7741                Location::SIMD(x) => {
7742                    self.assembler
7743                        .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7744                }
7745                Location::Memory(_, _) | Location::GPR(_) => {
7746                    self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7747                }
7748                _ => {
7749                    codegen_error!("singlepass f32_max unreachable");
7750                }
7751            }
7752
7753            self.release_gpr(tmpg2);
7754            self.release_gpr(tmpg1);
7755            self.release_simd(tmp2);
7756            self.release_simd(tmp1);
7757            Ok(())
7758        }
7759    }
7760    fn f32_add(
7761        &mut self,
7762        loc_a: Location,
7763        loc_b: Location,
7764        ret: Location,
7765    ) -> Result<(), CompileError> {
7766        self.emit_relaxed_avx(AssemblerX64::emit_vaddss, loc_a, loc_b, ret)
7767    }
7768    fn f32_sub(
7769        &mut self,
7770        loc_a: Location,
7771        loc_b: Location,
7772        ret: Location,
7773    ) -> Result<(), CompileError> {
7774        self.emit_relaxed_avx(AssemblerX64::emit_vsubss, loc_a, loc_b, ret)
7775    }
7776    fn f32_mul(
7777        &mut self,
7778        loc_a: Location,
7779        loc_b: Location,
7780        ret: Location,
7781    ) -> Result<(), CompileError> {
7782        self.emit_relaxed_avx(AssemblerX64::emit_vmulss, loc_a, loc_b, ret)
7783    }
7784    fn f32_div(
7785        &mut self,
7786        loc_a: Location,
7787        loc_b: Location,
7788        ret: Location,
7789    ) -> Result<(), CompileError> {
7790        self.emit_relaxed_avx(AssemblerX64::emit_vdivss, loc_a, loc_b, ret)
7791    }
7792
7793    fn gen_std_trampoline(
7794        &self,
7795        sig: &FunctionType,
7796        calling_convention: CallingConvention,
7797    ) -> Result<FunctionBody, CompileError> {
7798        // the cpu feature here is irrelevant
7799        let mut a = AssemblerX64::new(0, None)?;
7800
7801        // Calculate stack offset.
7802        let mut stack_offset: u32 = 0;
7803        for (i, _param) in sig.params().iter().enumerate() {
7804            if let Location::Memory(_, _) =
7805                self.get_simple_param_location(1 + i, calling_convention)
7806            {
7807                stack_offset += 8;
7808            }
7809        }
7810        let stack_padding: u32 = match calling_convention {
7811            CallingConvention::WindowsFastcall => 32,
7812            _ => 0,
7813        };
7814
7815        // Align to 16 bytes. We push two 8-byte registers below, so here we need to ensure stack_offset % 16 == 8.
7816        if stack_offset % 16 != 8 {
7817            stack_offset += 8;
7818        }
7819
7820        // Used callee-saved registers
7821        a.emit_push(Size::S64, Location::GPR(GPR::R15))?;
7822        a.emit_push(Size::S64, Location::GPR(GPR::R14))?;
7823
7824        // Prepare stack space.
7825        a.emit_sub(
7826            Size::S64,
7827            Location::Imm32(stack_offset + stack_padding),
7828            Location::GPR(GPR::RSP),
7829        )?;
7830
7831        // Arguments
7832        a.emit_mov(
7833            Size::S64,
7834            self.get_simple_param_location(1, calling_convention),
7835            Location::GPR(GPR::R15),
7836        )?; // func_ptr
7837        a.emit_mov(
7838            Size::S64,
7839            self.get_simple_param_location(2, calling_convention),
7840            Location::GPR(GPR::R14),
7841        )?; // args_rets
7842
7843        // Move arguments to their locations.
7844        // `callee_vmctx` is already in the first argument register, so no need to move.
7845        {
7846            let mut n_stack_args: usize = 0;
7847            for (i, _param) in sig.params().iter().enumerate() {
7848                let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i]
7849                let dst_loc = self.get_simple_param_location(1 + i, calling_convention);
7850
7851                match dst_loc {
7852                    Location::GPR(_) => {
7853                        a.emit_mov(Size::S64, src_loc, dst_loc)?;
7854                    }
7855                    Location::Memory(_, _) => {
7856                        // This location is for reading arguments but we are writing arguments here.
7857                        // So recalculate it.
7858                        a.emit_mov(Size::S64, src_loc, Location::GPR(GPR::RAX))?;
7859                        a.emit_mov(
7860                            Size::S64,
7861                            Location::GPR(GPR::RAX),
7862                            Location::Memory(
7863                                GPR::RSP,
7864                                (stack_padding as usize + n_stack_args * 8) as _,
7865                            ),
7866                        )?;
7867                        n_stack_args += 1;
7868                    }
7869                    _ => codegen_error!("singlepass gen_std_trampoline unreachable"),
7870                }
7871            }
7872        }
7873
7874        // Call.
7875        a.emit_call_location(Location::GPR(GPR::R15))?;
7876
7877        // Restore stack.
7878        a.emit_add(
7879            Size::S64,
7880            Location::Imm32(stack_offset + stack_padding),
7881            Location::GPR(GPR::RSP),
7882        )?;
7883
7884        // Write return value.
7885        if !sig.results().is_empty() {
7886            a.emit_mov(
7887                Size::S64,
7888                Location::GPR(GPR::RAX),
7889                Location::Memory(GPR::R14, 0),
7890            )?;
7891        }
7892
7893        // Restore callee-saved registers.
7894        a.emit_pop(Size::S64, Location::GPR(GPR::R14))?;
7895        a.emit_pop(Size::S64, Location::GPR(GPR::R15))?;
7896
7897        a.emit_ret()?;
7898
7899        let mut body = a.finalize().unwrap();
7900        body.shrink_to_fit();
7901        Ok(FunctionBody {
7902            body,
7903            unwind_info: None,
7904        })
7905    }
7906    // Generates dynamic import function call trampoline for a function type.
7907    fn gen_std_dynamic_import_trampoline(
7908        &self,
7909        vmoffsets: &VMOffsets,
7910        sig: &FunctionType,
7911        calling_convention: CallingConvention,
7912    ) -> Result<FunctionBody, CompileError> {
7913        // the cpu feature here is irrelevant
7914        let mut a = AssemblerX64::new(0, None)?;
7915
7916        // Allocate argument array.
7917        let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding
7918        let stack_padding: usize = match calling_convention {
7919            CallingConvention::WindowsFastcall => 32,
7920            _ => 0,
7921        };
7922        a.emit_sub(
7923            Size::S64,
7924            Location::Imm32((stack_offset + stack_padding) as _),
7925            Location::GPR(GPR::RSP),
7926        )?;
7927
7928        // Copy arguments.
7929        if !sig.params().is_empty() {
7930            let mut argalloc = ArgumentRegisterAllocator::default();
7931            argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
7932
7933            let mut stack_param_count: usize = 0;
7934
7935            for (i, ty) in sig.params().iter().enumerate() {
7936                let source_loc = match argalloc.next(*ty, calling_convention)? {
7937                    Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
7938                    Some(X64Register::XMM(xmm)) => Location::SIMD(xmm),
7939                    None => {
7940                        a.emit_mov(
7941                            Size::S64,
7942                            Location::Memory(
7943                                GPR::RSP,
7944                                (stack_padding * 2 + stack_offset + 8 + stack_param_count * 8) as _,
7945                            ),
7946                            Location::GPR(GPR::RAX),
7947                        )?;
7948                        stack_param_count += 1;
7949                        Location::GPR(GPR::RAX)
7950                    }
7951                };
7952                a.emit_mov(
7953                    Size::S64,
7954                    source_loc,
7955                    Location::Memory(GPR::RSP, (stack_padding + i * 16) as _),
7956                )?;
7957
7958                // Zero upper 64 bits.
7959                a.emit_mov(
7960                    Size::S64,
7961                    Location::Imm32(0),
7962                    Location::Memory(GPR::RSP, (stack_padding + i * 16 + 8) as _),
7963                )?;
7964            }
7965        }
7966
7967        match calling_convention {
7968            CallingConvention::WindowsFastcall => {
7969                // Load target address.
7970                a.emit_mov(
7971                    Size::S64,
7972                    Location::Memory(
7973                        GPR::RCX,
7974                        vmoffsets.vmdynamicfunction_import_context_address() as i32,
7975                    ),
7976                    Location::GPR(GPR::RAX),
7977                )?;
7978                // Load values array.
7979                a.emit_lea(
7980                    Size::S64,
7981                    Location::Memory(GPR::RSP, stack_padding as i32),
7982                    Location::GPR(GPR::RDX),
7983                )?;
7984            }
7985            _ => {
7986                // Load target address.
7987                a.emit_mov(
7988                    Size::S64,
7989                    Location::Memory(
7990                        GPR::RDI,
7991                        vmoffsets.vmdynamicfunction_import_context_address() as i32,
7992                    ),
7993                    Location::GPR(GPR::RAX),
7994                )?;
7995                // Load values array.
7996                a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RSI))?;
7997            }
7998        };
7999
8000        // Call target.
8001        a.emit_call_location(Location::GPR(GPR::RAX))?;
8002
8003        // Fetch return value.
8004        if !sig.results().is_empty() {
8005            assert_eq!(sig.results().len(), 1);
8006            a.emit_mov(
8007                Size::S64,
8008                Location::Memory(GPR::RSP, stack_padding as i32),
8009                Location::GPR(GPR::RAX),
8010            )?;
8011        }
8012
8013        // Release values array.
8014        a.emit_add(
8015            Size::S64,
8016            Location::Imm32((stack_offset + stack_padding) as _),
8017            Location::GPR(GPR::RSP),
8018        )?;
8019
8020        // Return.
8021        a.emit_ret()?;
8022
8023        let mut body = a.finalize().unwrap();
8024        body.shrink_to_fit();
8025        Ok(FunctionBody {
8026            body,
8027            unwind_info: None,
8028        })
8029    }
8030    // Singlepass calls import functions through a trampoline.
8031    fn gen_import_call_trampoline(
8032        &self,
8033        vmoffsets: &VMOffsets,
8034        index: FunctionIndex,
8035        sig: &FunctionType,
8036        calling_convention: CallingConvention,
8037    ) -> Result<CustomSection, CompileError> {
8038        // the cpu feature here is irrelevant
8039        let mut a = AssemblerX64::new(0, None)?;
8040
8041        // TODO: ARM entry trampoline is not emitted.
8042
8043        // Singlepass internally treats all arguments as integers
8044        // For the standard Windows calling convention requires
8045        //  floating point arguments to be passed in XMM registers for the 4 first arguments only
8046        //  That's the only change to do, other arguments are not to be changed
8047        // For the standard System V calling convention requires
8048        //  floating point arguments to be passed in XMM registers.
8049        //  Translation is expensive, so only do it if needed.
8050        if sig
8051            .params()
8052            .iter()
8053            .any(|&x| x == Type::F32 || x == Type::F64)
8054        {
8055            match calling_convention {
8056                CallingConvention::WindowsFastcall => {
8057                    let mut param_locations: Vec<Location> = vec![];
8058                    static PARAM_REGS: &[GPR] = &[GPR::RDX, GPR::R8, GPR::R9];
8059                    #[allow(clippy::needless_range_loop)]
8060                    for i in 0..sig.params().len() {
8061                        let loc = match i {
8062                            0..=2 => Location::GPR(PARAM_REGS[i]),
8063                            _ => Location::Memory(GPR::RSP, 32 + 8 + ((i - 3) * 8) as i32), // will not be used anyway
8064                        };
8065                        param_locations.push(loc);
8066                    }
8067
8068                    // Copy Float arguments to XMM from GPR.
8069                    let mut argalloc = ArgumentRegisterAllocator::default();
8070                    for (i, ty) in sig.params().iter().enumerate() {
8071                        let prev_loc = param_locations[i];
8072                        match argalloc.next(*ty, calling_convention)? {
8073                            Some(X64Register::GPR(_gpr)) => continue,
8074                            Some(X64Register::XMM(xmm)) => {
8075                                a.emit_mov(Size::S64, prev_loc, Location::SIMD(xmm))?
8076                            }
8077                            None => continue,
8078                        };
8079                    }
8080                }
8081                _ => {
8082                    let mut param_locations = vec![];
8083
8084                    // Allocate stack space for arguments.
8085                    let stack_offset: i32 = if sig.params().len() > 5 {
8086                        5 * 8
8087                    } else {
8088                        (sig.params().len() as i32) * 8
8089                    };
8090                    if stack_offset > 0 {
8091                        a.emit_sub(
8092                            Size::S64,
8093                            Location::Imm32(stack_offset as u32),
8094                            Location::GPR(GPR::RSP),
8095                        )?;
8096                    }
8097
8098                    // Store all arguments to the stack to prevent overwrite.
8099                    static PARAM_REGS: &[GPR] = &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
8100                    #[allow(clippy::needless_range_loop)]
8101                    for i in 0..sig.params().len() {
8102                        let loc = match i {
8103                            0..=4 => {
8104                                let loc = Location::Memory(GPR::RSP, (i * 8) as i32);
8105                                a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc)?;
8106                                loc
8107                            }
8108                            _ => {
8109                                Location::Memory(GPR::RSP, stack_offset + 8 + ((i - 5) * 8) as i32)
8110                            }
8111                        };
8112                        param_locations.push(loc);
8113                    }
8114
8115                    // Copy arguments.
8116                    let mut argalloc = ArgumentRegisterAllocator::default();
8117                    argalloc.next(Type::I64, calling_convention)?.unwrap(); // skip VMContext
8118                    let mut caller_stack_offset: i32 = 0;
8119                    for (i, ty) in sig.params().iter().enumerate() {
8120                        let prev_loc = param_locations[i];
8121                        let targ = match argalloc.next(*ty, calling_convention)? {
8122                            Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
8123                            Some(X64Register::XMM(xmm)) => Location::SIMD(xmm),
8124                            None => {
8125                                // No register can be allocated. Put this argument on the stack.
8126                                //
8127                                // Since here we never use fewer registers than by the original call, on the caller's frame
8128                                // we always have enough space to store the rearranged arguments, and the copy "backward" between different
8129                                // slots in the caller argument region will always work.
8130                                a.emit_mov(Size::S64, prev_loc, Location::GPR(GPR::RAX))?;
8131                                a.emit_mov(
8132                                    Size::S64,
8133                                    Location::GPR(GPR::RAX),
8134                                    Location::Memory(
8135                                        GPR::RSP,
8136                                        stack_offset + 8 + caller_stack_offset,
8137                                    ),
8138                                )?;
8139                                caller_stack_offset += 8;
8140                                continue;
8141                            }
8142                        };
8143                        a.emit_mov(Size::S64, prev_loc, targ)?;
8144                    }
8145
8146                    // Restore stack pointer.
8147                    if stack_offset > 0 {
8148                        a.emit_add(
8149                            Size::S64,
8150                            Location::Imm32(stack_offset as u32),
8151                            Location::GPR(GPR::RSP),
8152                        )?;
8153                    }
8154                }
8155            }
8156        }
8157
8158        // Emits a tail call trampoline that loads the address of the target import function
8159        // from Ctx and jumps to it.
8160
8161        let offset = vmoffsets.vmctx_vmfunction_import(index);
8162
8163        match calling_convention {
8164            CallingConvention::WindowsFastcall => {
8165                a.emit_mov(
8166                    Size::S64,
8167                    Location::Memory(GPR::RCX, offset as i32), // function pointer
8168                    Location::GPR(GPR::RAX),
8169                )?;
8170                a.emit_mov(
8171                    Size::S64,
8172                    Location::Memory(GPR::RCX, offset as i32 + 8), // target vmctx
8173                    Location::GPR(GPR::RCX),
8174                )?;
8175            }
8176            _ => {
8177                a.emit_mov(
8178                    Size::S64,
8179                    Location::Memory(GPR::RDI, offset as i32), // function pointer
8180                    Location::GPR(GPR::RAX),
8181                )?;
8182                a.emit_mov(
8183                    Size::S64,
8184                    Location::Memory(GPR::RDI, offset as i32 + 8), // target vmctx
8185                    Location::GPR(GPR::RDI),
8186                )?;
8187            }
8188        }
8189        a.emit_host_redirection(GPR::RAX)?;
8190
8191        let mut contents = a.finalize().unwrap();
8192        contents.shrink_to_fit();
8193        let section_body = SectionBody::new_with_vec(contents);
8194
8195        Ok(CustomSection {
8196            protection: CustomSectionProtection::ReadExecute,
8197            alignment: None,
8198            bytes: section_body,
8199            relocations: vec![],
8200        })
8201    }
8202    #[cfg(feature = "unwind")]
8203    fn gen_dwarf_unwind_info(&mut self, code_len: usize) -> Option<UnwindInstructions> {
8204        let mut instructions = vec![];
8205        for &(instruction_offset, ref inst) in &self.unwind_ops {
8206            let instruction_offset = instruction_offset as u32;
8207            match *inst {
8208                UnwindOps::PushFP { up_to_sp } => {
8209                    instructions.push((
8210                        instruction_offset,
8211                        CallFrameInstruction::CfaOffset(up_to_sp as i32),
8212                    ));
8213                    instructions.push((
8214                        instruction_offset,
8215                        CallFrameInstruction::Offset(X86_64::RBP, -(up_to_sp as i32)),
8216                    ));
8217                }
8218                UnwindOps::DefineNewFrame => {
8219                    instructions.push((
8220                        instruction_offset,
8221                        CallFrameInstruction::CfaRegister(X86_64::RBP),
8222                    ));
8223                }
8224                UnwindOps::SaveRegister { reg, bp_neg_offset } => instructions.push((
8225                    instruction_offset,
8226                    CallFrameInstruction::Offset(reg.dwarf_index(), -bp_neg_offset),
8227                )),
8228                UnwindOps::Push2Regs { .. } => unimplemented!(),
8229            }
8230        }
8231        Some(UnwindInstructions {
8232            instructions,
8233            len: code_len as u32,
8234        })
8235    }
8236    #[cfg(not(feature = "unwind"))]
8237    fn gen_dwarf_unwind_info(&mut self, _code_len: usize) -> Option<UnwindInstructions> {
8238        None
8239    }
8240
8241    #[cfg(feature = "unwind")]
8242    fn gen_windows_unwind_info(&mut self, _code_len: usize) -> Option<Vec<u8>> {
8243        let unwind_info = create_unwind_info_from_insts(&self.unwind_ops);
8244        if let Some(unwind) = unwind_info {
8245            let sz = unwind.emit_size();
8246            let mut tbl = vec![0; sz];
8247            unwind.emit(&mut tbl);
8248            Some(tbl)
8249        } else {
8250            None
8251        }
8252    }
8253
8254    #[cfg(not(feature = "unwind"))]
8255    fn gen_windows_unwind_info(&mut self, _code_len: usize) -> Option<Vec<u8>> {
8256        None
8257    }
8258}
8259
8260#[cfg(test)]
8261mod test {
8262    use super::*;
8263    use enumset::enum_set;
8264    use std::str::FromStr;
8265    use wasmer_types::target::{CpuFeature, Target, Triple};
8266
8267    fn test_move_location(machine: &mut MachineX86_64) -> Result<(), CompileError> {
8268        machine.move_location_for_native(
8269            Size::S64,
8270            Location::GPR(GPR::RAX),
8271            Location::GPR(GPR::RCX),
8272        )?;
8273        machine.move_location_for_native(
8274            Size::S64,
8275            Location::GPR(GPR::RAX),
8276            Location::Memory(GPR::RDX, 10),
8277        )?;
8278        machine.move_location_for_native(
8279            Size::S64,
8280            Location::GPR(GPR::RAX),
8281            Location::Memory(GPR::RDX, -10),
8282        )?;
8283        machine.move_location_for_native(
8284            Size::S64,
8285            Location::Memory(GPR::RDX, 10),
8286            Location::GPR(GPR::RAX),
8287        )?;
8288        machine.move_location_for_native(
8289            Size::S64,
8290            Location::Imm64(50),
8291            Location::GPR(GPR::RAX),
8292        )?;
8293        machine.move_location_for_native(
8294            Size::S64,
8295            Location::Imm32(50),
8296            Location::GPR(GPR::RAX),
8297        )?;
8298        machine.move_location_for_native(Size::S64, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8299
8300        machine.move_location_for_native(
8301            Size::S32,
8302            Location::GPR(GPR::RAX),
8303            Location::GPR(GPR::RCX),
8304        )?;
8305        machine.move_location_for_native(
8306            Size::S32,
8307            Location::GPR(GPR::RAX),
8308            Location::Memory(GPR::RDX, 10),
8309        )?;
8310        machine.move_location_for_native(
8311            Size::S32,
8312            Location::GPR(GPR::RAX),
8313            Location::Memory(GPR::RDX, -10),
8314        )?;
8315        machine.move_location_for_native(
8316            Size::S32,
8317            Location::Memory(GPR::RDX, 10),
8318            Location::GPR(GPR::RAX),
8319        )?;
8320        machine.move_location_for_native(
8321            Size::S32,
8322            Location::Imm32(50),
8323            Location::GPR(GPR::RAX),
8324        )?;
8325        machine.move_location_for_native(Size::S32, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8326
8327        machine.move_location_for_native(
8328            Size::S16,
8329            Location::GPR(GPR::RAX),
8330            Location::GPR(GPR::RCX),
8331        )?;
8332        machine.move_location_for_native(
8333            Size::S16,
8334            Location::GPR(GPR::RAX),
8335            Location::Memory(GPR::RDX, 10),
8336        )?;
8337        machine.move_location_for_native(
8338            Size::S16,
8339            Location::GPR(GPR::RAX),
8340            Location::Memory(GPR::RDX, -10),
8341        )?;
8342        machine.move_location_for_native(
8343            Size::S16,
8344            Location::Memory(GPR::RDX, 10),
8345            Location::GPR(GPR::RAX),
8346        )?;
8347        machine.move_location_for_native(Size::S16, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8348
8349        machine.move_location_for_native(
8350            Size::S8,
8351            Location::GPR(GPR::RAX),
8352            Location::GPR(GPR::RCX),
8353        )?;
8354        machine.move_location_for_native(
8355            Size::S8,
8356            Location::GPR(GPR::RAX),
8357            Location::Memory(GPR::RDX, 10),
8358        )?;
8359        machine.move_location_for_native(
8360            Size::S8,
8361            Location::GPR(GPR::RAX),
8362            Location::Memory(GPR::RDX, -10),
8363        )?;
8364        machine.move_location_for_native(
8365            Size::S8,
8366            Location::Memory(GPR::RDX, 10),
8367            Location::GPR(GPR::RAX),
8368        )?;
8369        machine.move_location_for_native(Size::S8, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8370
8371        machine.move_location_for_native(
8372            Size::S64,
8373            Location::SIMD(XMM::XMM0),
8374            Location::GPR(GPR::RAX),
8375        )?;
8376        machine.move_location_for_native(
8377            Size::S64,
8378            Location::SIMD(XMM::XMM0),
8379            Location::Memory(GPR::RDX, -10),
8380        )?;
8381        machine.move_location_for_native(
8382            Size::S64,
8383            Location::GPR(GPR::RAX),
8384            Location::SIMD(XMM::XMM0),
8385        )?;
8386        machine.move_location_for_native(
8387            Size::S64,
8388            Location::Memory(GPR::RDX, -10),
8389            Location::SIMD(XMM::XMM0),
8390        )?;
8391
8392        Ok(())
8393    }
8394
8395    fn test_move_location_extended(
8396        machine: &mut MachineX86_64,
8397        signed: bool,
8398        sized: Size,
8399    ) -> Result<(), CompileError> {
8400        machine.move_location_extend(
8401            sized,
8402            signed,
8403            Location::GPR(GPR::RAX),
8404            Size::S64,
8405            Location::GPR(GPR::RCX),
8406        )?;
8407        machine.move_location_extend(
8408            sized,
8409            signed,
8410            Location::GPR(GPR::RAX),
8411            Size::S64,
8412            Location::Memory(GPR::RCX, 10),
8413        )?;
8414        machine.move_location_extend(
8415            sized,
8416            signed,
8417            Location::Memory(GPR::RAX, 10),
8418            Size::S64,
8419            Location::GPR(GPR::RCX),
8420        )?;
8421        if sized != Size::S32 {
8422            machine.move_location_extend(
8423                sized,
8424                signed,
8425                Location::GPR(GPR::RAX),
8426                Size::S32,
8427                Location::GPR(GPR::RCX),
8428            )?;
8429            machine.move_location_extend(
8430                sized,
8431                signed,
8432                Location::GPR(GPR::RAX),
8433                Size::S32,
8434                Location::Memory(GPR::RCX, 10),
8435            )?;
8436            machine.move_location_extend(
8437                sized,
8438                signed,
8439                Location::Memory(GPR::RAX, 10),
8440                Size::S32,
8441                Location::GPR(GPR::RCX),
8442            )?;
8443        }
8444
8445        Ok(())
8446    }
8447
8448    fn test_binop_op(
8449        machine: &mut MachineX86_64,
8450        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8451    ) -> Result<(), CompileError> {
8452        op(
8453            machine,
8454            Location::GPR(GPR::RDX),
8455            Location::GPR(GPR::RDX),
8456            Location::GPR(GPR::RAX),
8457        )?;
8458        op(
8459            machine,
8460            Location::GPR(GPR::RDX),
8461            Location::Imm32(10),
8462            Location::GPR(GPR::RAX),
8463        )?;
8464        op(
8465            machine,
8466            Location::GPR(GPR::RAX),
8467            Location::GPR(GPR::RAX),
8468            Location::GPR(GPR::RAX),
8469        )?;
8470        op(
8471            machine,
8472            Location::Imm32(10),
8473            Location::GPR(GPR::RDX),
8474            Location::GPR(GPR::RAX),
8475        )?;
8476        op(
8477            machine,
8478            Location::GPR(GPR::RAX),
8479            Location::GPR(GPR::RDX),
8480            Location::Memory(GPR::RAX, 10),
8481        )?;
8482        op(
8483            machine,
8484            Location::GPR(GPR::RAX),
8485            Location::Memory(GPR::RDX, 16),
8486            Location::Memory(GPR::RAX, 10),
8487        )?;
8488        op(
8489            machine,
8490            Location::Memory(GPR::RAX, 0),
8491            Location::Memory(GPR::RDX, 16),
8492            Location::Memory(GPR::RAX, 10),
8493        )?;
8494
8495        Ok(())
8496    }
8497
8498    fn test_float_binop_op(
8499        machine: &mut MachineX86_64,
8500        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8501    ) -> Result<(), CompileError> {
8502        op(
8503            machine,
8504            Location::SIMD(XMM::XMM3),
8505            Location::SIMD(XMM::XMM2),
8506            Location::SIMD(XMM::XMM0),
8507        )?;
8508        op(
8509            machine,
8510            Location::SIMD(XMM::XMM0),
8511            Location::SIMD(XMM::XMM2),
8512            Location::SIMD(XMM::XMM0),
8513        )?;
8514        op(
8515            machine,
8516            Location::SIMD(XMM::XMM0),
8517            Location::SIMD(XMM::XMM0),
8518            Location::SIMD(XMM::XMM0),
8519        )?;
8520        op(
8521            machine,
8522            Location::Memory(GPR::RBP, 0),
8523            Location::SIMD(XMM::XMM2),
8524            Location::SIMD(XMM::XMM0),
8525        )?;
8526        op(
8527            machine,
8528            Location::Memory(GPR::RBP, 0),
8529            Location::Memory(GPR::RDX, 10),
8530            Location::SIMD(XMM::XMM0),
8531        )?;
8532        op(
8533            machine,
8534            Location::Memory(GPR::RBP, 0),
8535            Location::Memory(GPR::RDX, 16),
8536            Location::Memory(GPR::RAX, 32),
8537        )?;
8538        op(
8539            machine,
8540            Location::SIMD(XMM::XMM0),
8541            Location::Memory(GPR::RDX, 16),
8542            Location::Memory(GPR::RAX, 32),
8543        )?;
8544        op(
8545            machine,
8546            Location::SIMD(XMM::XMM0),
8547            Location::SIMD(XMM::XMM1),
8548            Location::Memory(GPR::RAX, 32),
8549        )?;
8550
8551        Ok(())
8552    }
8553
8554    fn test_float_cmp_op(
8555        machine: &mut MachineX86_64,
8556        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8557    ) -> Result<(), CompileError> {
8558        op(
8559            machine,
8560            Location::SIMD(XMM::XMM3),
8561            Location::SIMD(XMM::XMM2),
8562            Location::GPR(GPR::RAX),
8563        )?;
8564        op(
8565            machine,
8566            Location::SIMD(XMM::XMM0),
8567            Location::SIMD(XMM::XMM0),
8568            Location::GPR(GPR::RAX),
8569        )?;
8570        op(
8571            machine,
8572            Location::Memory(GPR::RBP, 0),
8573            Location::SIMD(XMM::XMM2),
8574            Location::GPR(GPR::RAX),
8575        )?;
8576        op(
8577            machine,
8578            Location::Memory(GPR::RBP, 0),
8579            Location::Memory(GPR::RDX, 10),
8580            Location::GPR(GPR::RAX),
8581        )?;
8582        op(
8583            machine,
8584            Location::Memory(GPR::RBP, 0),
8585            Location::Memory(GPR::RDX, 16),
8586            Location::Memory(GPR::RAX, 32),
8587        )?;
8588        op(
8589            machine,
8590            Location::SIMD(XMM::XMM0),
8591            Location::Memory(GPR::RDX, 16),
8592            Location::Memory(GPR::RAX, 32),
8593        )?;
8594        op(
8595            machine,
8596            Location::SIMD(XMM::XMM0),
8597            Location::SIMD(XMM::XMM1),
8598            Location::Memory(GPR::RAX, 32),
8599        )?;
8600
8601        Ok(())
8602    }
8603
8604    #[test]
8605    fn tests_avx() -> Result<(), CompileError> {
8606        let set = enum_set!(CpuFeature::AVX);
8607        let target = Target::new(Triple::from_str("x86_64-linux-gnu").unwrap(), set);
8608        let mut machine = MachineX86_64::new(Some(target))?;
8609
8610        test_move_location(&mut machine)?;
8611        test_move_location_extended(&mut machine, false, Size::S8)?;
8612        test_move_location_extended(&mut machine, false, Size::S16)?;
8613        test_move_location_extended(&mut machine, false, Size::S32)?;
8614        test_move_location_extended(&mut machine, true, Size::S8)?;
8615        test_move_location_extended(&mut machine, true, Size::S16)?;
8616        test_move_location_extended(&mut machine, true, Size::S32)?;
8617        test_binop_op(&mut machine, MachineX86_64::emit_binop_add32)?;
8618        test_binop_op(&mut machine, MachineX86_64::emit_binop_add64)?;
8619        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub32)?;
8620        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub64)?;
8621        test_binop_op(&mut machine, MachineX86_64::emit_binop_and32)?;
8622        test_binop_op(&mut machine, MachineX86_64::emit_binop_and64)?;
8623        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor32)?;
8624        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor64)?;
8625        test_binop_op(&mut machine, MachineX86_64::emit_binop_or32)?;
8626        test_binop_op(&mut machine, MachineX86_64::emit_binop_or64)?;
8627        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul32)?;
8628        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul64)?;
8629        test_float_binop_op(&mut machine, MachineX86_64::f32_add)?;
8630        test_float_binop_op(&mut machine, MachineX86_64::f32_sub)?;
8631        test_float_binop_op(&mut machine, MachineX86_64::f32_mul)?;
8632        test_float_binop_op(&mut machine, MachineX86_64::f32_div)?;
8633        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_eq)?;
8634        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_lt)?;
8635        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_le)?;
8636
8637        Ok(())
8638    }
8639
8640    #[test]
8641    fn tests_sse42() -> Result<(), CompileError> {
8642        let set = enum_set!(CpuFeature::SSE42);
8643        let target = Target::new(Triple::from_str("x86_64-linux-gnu").unwrap(), set);
8644        let mut machine = MachineX86_64::new(Some(target))?;
8645
8646        test_move_location(&mut machine)?;
8647        test_move_location_extended(&mut machine, false, Size::S8)?;
8648        test_move_location_extended(&mut machine, false, Size::S16)?;
8649        test_move_location_extended(&mut machine, false, Size::S32)?;
8650        test_move_location_extended(&mut machine, true, Size::S8)?;
8651        test_move_location_extended(&mut machine, true, Size::S16)?;
8652        test_move_location_extended(&mut machine, true, Size::S32)?;
8653        test_binop_op(&mut machine, MachineX86_64::emit_binop_add32)?;
8654        test_binop_op(&mut machine, MachineX86_64::emit_binop_add64)?;
8655        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub32)?;
8656        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub64)?;
8657        test_binop_op(&mut machine, MachineX86_64::emit_binop_and32)?;
8658        test_binop_op(&mut machine, MachineX86_64::emit_binop_and64)?;
8659        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor32)?;
8660        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor64)?;
8661        test_binop_op(&mut machine, MachineX86_64::emit_binop_or32)?;
8662        test_binop_op(&mut machine, MachineX86_64::emit_binop_or64)?;
8663        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul32)?;
8664        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul64)?;
8665        test_float_binop_op(&mut machine, MachineX86_64::f32_add)?;
8666        test_float_binop_op(&mut machine, MachineX86_64::f32_sub)?;
8667        test_float_binop_op(&mut machine, MachineX86_64::f32_mul)?;
8668        test_float_binop_op(&mut machine, MachineX86_64::f32_div)?;
8669        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_eq)?;
8670        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_lt)?;
8671        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_le)?;
8672
8673        Ok(())
8674    }
8675}