wasmer_compiler_singlepass/
machine_x64.rs

1#[cfg(feature = "unwind")]
2use crate::unwind_winx64::create_unwind_info_from_insts;
3use crate::{
4    codegen_error,
5    common_decl::*,
6    emitter_x64::*,
7    location::{Location as AbstractLocation, Reg},
8    machine::*,
9    unwind::{UnwindInstructions, UnwindOps, UnwindRegister},
10    x64_decl::{ArgumentRegisterAllocator, GPR, X64Register, XMM},
11};
12use dynasmrt::{DynasmError, VecAssembler, x64::X64Relocation};
13#[cfg(feature = "unwind")]
14use gimli::{X86_64, write::CallFrameInstruction};
15use std::{
16    collections::HashMap,
17    ops::{Deref, DerefMut},
18};
19use wasmer_compiler::{
20    types::{
21        address_map::InstructionAddressMap,
22        function::FunctionBody,
23        relocation::{Relocation, RelocationKind, RelocationTarget},
24        section::{CustomSection, CustomSectionProtection, SectionBody},
25    },
26    wasmparser::MemArg,
27};
28use wasmer_types::{
29    CompileError, FunctionIndex, FunctionType, SourceLoc, TrapCode, TrapInformation, Type,
30    VMOffsets,
31    target::{CallingConvention, CpuFeature, Target},
32};
33
34type Assembler = VecAssembler<X64Relocation>;
35
36pub struct AssemblerX64 {
37    /// the actual inner
38    pub inner: Assembler,
39    /// the simd instructions set on the target.
40    /// Currently only supports SSE 4.2 and AVX
41    pub simd_arch: CpuFeature,
42    /// Full Target cpu
43    pub target: Option<Target>,
44}
45
46impl AssemblerX64 {
47    fn new(baseaddr: usize, target: Option<Target>) -> Result<Self, CompileError> {
48        let simd_arch = target.as_ref().map_or_else(
49            || Ok(CpuFeature::SSE42),
50            |target| {
51                if target.cpu_features().contains(CpuFeature::AVX) {
52                    Ok(CpuFeature::AVX)
53                } else if target.cpu_features().contains(CpuFeature::SSE42) {
54                    Ok(CpuFeature::SSE42)
55                } else {
56                    Err(CompileError::UnsupportedTarget(
57                        "x86_64 without AVX or SSE 4.2, use -m avx to enable".to_string(),
58                    ))
59                }
60            },
61        )?;
62
63        Ok(Self {
64            inner: Assembler::new(baseaddr),
65            simd_arch,
66            target,
67        })
68    }
69
70    fn finalize(self) -> Result<Vec<u8>, DynasmError> {
71        self.inner.finalize()
72    }
73}
74
75impl Deref for AssemblerX64 {
76    type Target = Assembler;
77
78    fn deref(&self) -> &Self::Target {
79        &self.inner
80    }
81}
82
83impl DerefMut for AssemblerX64 {
84    fn deref_mut(&mut self) -> &mut Self::Target {
85        &mut self.inner
86    }
87}
88
89type Location = AbstractLocation<GPR, XMM>;
90
91pub struct MachineX86_64 {
92    assembler: AssemblerX64,
93    used_gprs: u32,
94    used_simd: u32,
95    trap_table: TrapTable,
96    /// Map from byte offset into wasm function to range of native instructions.
97    ///
98    // Ordered by increasing InstructionAddressMap::srcloc.
99    instructions_address_map: Vec<InstructionAddressMap>,
100    /// The source location for the current operator.
101    src_loc: u32,
102    /// Vector of unwind operations with offset
103    unwind_ops: Vec<(usize, UnwindOps<GPR, XMM>)>,
104}
105
106/// Get registers for first N function return values.
107/// NOTE: The register set must be disjoint from pick_gpr registers!
108pub(crate) const X86_64_RETURN_VALUE_REGISTERS: [GPR; 2] = [GPR::RAX, GPR::RDX];
109
110impl MachineX86_64 {
111    pub fn new(target: Option<Target>) -> Result<Self, CompileError> {
112        let assembler = AssemblerX64::new(0, target)?;
113        Ok(MachineX86_64 {
114            assembler,
115            used_gprs: 0,
116            used_simd: 0,
117            trap_table: TrapTable::default(),
118            instructions_address_map: vec![],
119            src_loc: 0,
120            unwind_ops: vec![],
121        })
122    }
123    pub fn emit_relaxed_binop(
124        &mut self,
125        op: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
126        sz: Size,
127        src: Location,
128        dst: Location,
129    ) -> Result<(), CompileError> {
130        enum RelaxMode {
131            Direct,
132            SrcToGPR,
133            DstToGPR,
134            BothToGPR,
135        }
136        let mode = match (src, dst) {
137            (Location::GPR(_), Location::GPR(_))
138                if std::ptr::eq(op as *const u8, AssemblerX64::emit_imul as *const u8) =>
139            {
140                RelaxMode::Direct
141            }
142            _ if std::ptr::eq(op as *const u8, AssemblerX64::emit_imul as *const u8) => {
143                RelaxMode::BothToGPR
144            }
145
146            (Location::Memory(_, _), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
147            (Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => {
148                RelaxMode::BothToGPR
149            }
150            (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR,
151            (Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
152            (Location::Imm64(_), Location::GPR(_))
153                if (op as *const u8 != AssemblerX64::emit_mov as *const u8) =>
154            {
155                RelaxMode::SrcToGPR
156            }
157            (_, Location::SIMD(_)) => RelaxMode::SrcToGPR,
158            _ => RelaxMode::Direct,
159        };
160
161        match mode {
162            RelaxMode::SrcToGPR => {
163                let temp = self.acquire_temp_gpr().ok_or_else(|| {
164                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
165                })?;
166                self.move_location(sz, src, Location::GPR(temp))?;
167                op(&mut self.assembler, sz, Location::GPR(temp), dst)?;
168                self.release_gpr(temp);
169            }
170            RelaxMode::DstToGPR => {
171                let temp = self.acquire_temp_gpr().ok_or_else(|| {
172                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
173                })?;
174                self.move_location(sz, dst, Location::GPR(temp))?;
175                op(&mut self.assembler, sz, src, Location::GPR(temp))?;
176                self.release_gpr(temp);
177            }
178            RelaxMode::BothToGPR => {
179                let temp_src = self.acquire_temp_gpr().ok_or_else(|| {
180                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
181                })?;
182                let temp_dst = self.acquire_temp_gpr().ok_or_else(|| {
183                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
184                })?;
185                self.move_location(sz, src, Location::GPR(temp_src))?;
186                self.move_location(sz, dst, Location::GPR(temp_dst))?;
187                op(
188                    &mut self.assembler,
189                    sz,
190                    Location::GPR(temp_src),
191                    Location::GPR(temp_dst),
192                )?;
193                match dst {
194                    Location::Memory(_, _) | Location::GPR(_) => {
195                        self.move_location(sz, Location::GPR(temp_dst), dst)?;
196                    }
197                    _ => {}
198                }
199                self.release_gpr(temp_dst);
200                self.release_gpr(temp_src);
201            }
202            RelaxMode::Direct => {
203                op(&mut self.assembler, sz, src, dst)?;
204            }
205        }
206        Ok(())
207    }
208    pub fn emit_relaxed_zx_sx(
209        &mut self,
210        op: fn(&mut AssemblerX64, Size, Location, Size, Location) -> Result<(), CompileError>,
211        sz_src: Size,
212        src: Location,
213        sz_dst: Size,
214        dst: Location,
215    ) -> Result<(), CompileError> {
216        match src {
217            Location::Imm32(_) | Location::Imm64(_) => {
218                let tmp_src = self.acquire_temp_gpr().ok_or_else(|| {
219                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
220                })?;
221                self.assembler
222                    .emit_mov(Size::S64, src, Location::GPR(tmp_src))?;
223                let src = Location::GPR(tmp_src);
224
225                match dst {
226                    Location::Imm32(_) | Location::Imm64(_) => unreachable!(),
227                    Location::Memory(_, _) => {
228                        let tmp_dst = self.acquire_temp_gpr().ok_or_else(|| {
229                            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
230                        })?;
231                        op(
232                            &mut self.assembler,
233                            sz_src,
234                            src,
235                            sz_dst,
236                            Location::GPR(tmp_dst),
237                        )?;
238                        self.move_location(Size::S64, Location::GPR(tmp_dst), dst)?;
239
240                        self.release_gpr(tmp_dst);
241                    }
242                    Location::GPR(_) => {
243                        op(&mut self.assembler, sz_src, src, sz_dst, dst)?;
244                    }
245                    _ => {
246                        codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
247                    }
248                };
249
250                self.release_gpr(tmp_src);
251            }
252            Location::GPR(_) | Location::Memory(_, _) => {
253                match dst {
254                    Location::Imm32(_) | Location::Imm64(_) => unreachable!(),
255                    Location::Memory(_, _) => {
256                        let tmp_dst = self.acquire_temp_gpr().ok_or_else(|| {
257                            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
258                        })?;
259                        op(
260                            &mut self.assembler,
261                            sz_src,
262                            src,
263                            sz_dst,
264                            Location::GPR(tmp_dst),
265                        )?;
266                        self.move_location(Size::S64, Location::GPR(tmp_dst), dst)?;
267
268                        self.release_gpr(tmp_dst);
269                    }
270                    Location::GPR(_) => {
271                        op(&mut self.assembler, sz_src, src, sz_dst, dst)?;
272                    }
273                    _ => {
274                        codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
275                    }
276                };
277            }
278            _ => {
279                codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
280            }
281        }
282        Ok(())
283    }
284    /// I32 binary operation with both operands popped from the virtual stack.
285    fn emit_binop_i32(
286        &mut self,
287        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
288        loc_a: Location,
289        loc_b: Location,
290        ret: Location,
291    ) -> Result<(), CompileError> {
292        if loc_a != ret {
293            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
294                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
295            })?;
296            self.emit_relaxed_mov(Size::S32, loc_a, Location::GPR(tmp))?;
297            self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp))?;
298            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp), ret)?;
299            self.release_gpr(tmp);
300        } else {
301            self.emit_relaxed_binop(f, Size::S32, loc_b, ret)?;
302        }
303        Ok(())
304    }
305    /// I64 binary operation with both operands popped from the virtual stack.
306    fn emit_binop_i64(
307        &mut self,
308        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
309        loc_a: Location,
310        loc_b: Location,
311        ret: Location,
312    ) -> Result<(), CompileError> {
313        if loc_a != ret {
314            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
315                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
316            })?;
317            self.emit_relaxed_mov(Size::S64, loc_a, Location::GPR(tmp))?;
318            self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp))?;
319            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp), ret)?;
320            self.release_gpr(tmp);
321        } else {
322            self.emit_relaxed_binop(f, Size::S64, loc_b, ret)?;
323        }
324        Ok(())
325    }
326    /// I64 comparison with.
327    fn emit_cmpop_i64_dynamic_b(
328        &mut self,
329        c: Condition,
330        loc_a: Location,
331        loc_b: Location,
332        ret: Location,
333    ) -> Result<(), CompileError> {
334        match ret {
335            Location::GPR(x) => {
336                self.emit_relaxed_cmp(Size::S64, loc_b, loc_a)?;
337                self.assembler.emit_set(c, x)?;
338                self.assembler
339                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x))?;
340            }
341            Location::Memory(_, _) => {
342                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
343                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
344                })?;
345                self.emit_relaxed_cmp(Size::S64, loc_b, loc_a)?;
346                self.assembler.emit_set(c, tmp)?;
347                self.assembler
348                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp))?;
349                self.move_location(Size::S32, Location::GPR(tmp), ret)?;
350                self.release_gpr(tmp);
351            }
352            _ => {
353                codegen_error!("singlepass emit_cmpop_i64_dynamic_b unreachable");
354            }
355        }
356        Ok(())
357    }
358    /// I64 shift with both operands popped from the virtual stack.
359    fn emit_shift_i64(
360        &mut self,
361        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
362        loc_a: Location,
363        loc_b: Location,
364        ret: Location,
365    ) -> Result<(), CompileError> {
366        self.assembler
367            .emit_mov(Size::S64, loc_b, Location::GPR(GPR::RCX))?;
368
369        if loc_a != ret {
370            self.emit_relaxed_mov(Size::S64, loc_a, ret)?;
371        }
372
373        f(&mut self.assembler, Size::S64, Location::GPR(GPR::RCX), ret)
374    }
375    /// Moves `loc` to a valid location for `div`/`idiv`.
376    fn emit_relaxed_xdiv(
377        &mut self,
378        op: fn(&mut AssemblerX64, Size, Location) -> Result<(), CompileError>,
379        sz: Size,
380        loc: Location,
381        integer_division_by_zero: Label,
382    ) -> Result<usize, CompileError> {
383        self.assembler.emit_cmp(sz, Location::Imm32(0), loc)?;
384        self.assembler
385            .emit_jmp(Condition::Equal, integer_division_by_zero)?;
386
387        match loc {
388            Location::Imm64(_) | Location::Imm32(_) => {
389                self.move_location(sz, loc, Location::GPR(GPR::RCX))?; // must not be used during div (rax, rdx)
390                let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow);
391                op(&mut self.assembler, sz, Location::GPR(GPR::RCX))?;
392                self.mark_instruction_address_end(offset);
393                Ok(offset)
394            }
395            _ => {
396                let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow);
397                op(&mut self.assembler, sz, loc)?;
398                self.mark_instruction_address_end(offset);
399                Ok(offset)
400            }
401        }
402    }
403    /// I32 comparison with.
404    fn emit_cmpop_i32_dynamic_b(
405        &mut self,
406        c: Condition,
407        loc_a: Location,
408        loc_b: Location,
409        ret: Location,
410    ) -> Result<(), CompileError> {
411        match ret {
412            Location::GPR(x) => {
413                self.emit_relaxed_cmp(Size::S32, loc_b, loc_a)?;
414                self.assembler.emit_set(c, x)?;
415                self.assembler
416                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x))?;
417            }
418            Location::Memory(_, _) => {
419                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
420                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
421                })?;
422                self.emit_relaxed_cmp(Size::S32, loc_b, loc_a)?;
423                self.assembler.emit_set(c, tmp)?;
424                self.assembler
425                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp))?;
426                self.move_location(Size::S32, Location::GPR(tmp), ret)?;
427                self.release_gpr(tmp);
428            }
429            _ => {
430                codegen_error!("singlepass emit_cmpop_i32_dynamic_b unreachable");
431            }
432        }
433        Ok(())
434    }
435    /// I32 shift with both operands popped from the virtual stack.
436    fn emit_shift_i32(
437        &mut self,
438        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
439        loc_a: Location,
440        loc_b: Location,
441        ret: Location,
442    ) -> Result<(), CompileError> {
443        self.assembler
444            .emit_mov(Size::S32, loc_b, Location::GPR(GPR::RCX))?;
445
446        if loc_a != ret {
447            self.emit_relaxed_mov(Size::S32, loc_a, ret)?;
448        }
449
450        f(&mut self.assembler, Size::S32, Location::GPR(GPR::RCX), ret)
451    }
452
453    #[allow(clippy::too_many_arguments)]
454    fn memory_op<F: FnOnce(&mut Self, GPR) -> Result<(), CompileError>>(
455        &mut self,
456        addr: Location,
457        memarg: &MemArg,
458        check_alignment: bool,
459        value_size: usize,
460        need_check: bool,
461        imported_memories: bool,
462        offset: i32,
463        heap_access_oob: Label,
464        unaligned_atomic: Label,
465        cb: F,
466    ) -> Result<(), CompileError> {
467        // This function as been re-writen to use only 2 temporary register instead of 3
468        // without compromisong on the perfomances.
469        // The number of memory move should be equivalent to previous 3-temp regs version
470        // Register pressure is high on x86_64, and this is needed to be able to use
471        // instruction that neead RAX, like cmpxchg for example
472        let tmp_addr = self.acquire_temp_gpr().ok_or_else(|| {
473            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
474        })?;
475        let tmp2 = self.acquire_temp_gpr().ok_or_else(|| {
476            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
477        })?;
478
479        // Reusing `tmp_addr` for temporary indirection here, since it's not used before the last reference to `{base,bound}_loc`.
480        let base_loc = if imported_memories {
481            // Imported memories require one level of indirection.
482            self.emit_relaxed_binop(
483                AssemblerX64::emit_mov,
484                Size::S64,
485                Location::Memory(self.get_vmctx_reg(), offset),
486                Location::GPR(tmp2),
487            )?;
488            Location::Memory(tmp2, 0)
489        } else {
490            Location::Memory(self.get_vmctx_reg(), offset)
491        };
492
493        // Load base into temporary register.
494        self.assembler
495            .emit_mov(Size::S64, base_loc, Location::GPR(tmp2))?;
496
497        // Load effective address.
498        // `base_loc` and `bound_loc` becomes INVALID after this line, because `tmp_addr`
499        // might be reused.
500        self.assembler
501            .emit_mov(Size::S32, addr, Location::GPR(tmp_addr))?;
502
503        // Add offset to memory address.
504        if memarg.offset != 0 {
505            self.assembler.emit_add(
506                Size::S32,
507                Location::Imm32(memarg.offset as u32),
508                Location::GPR(tmp_addr),
509            )?;
510
511            // Trap if offset calculation overflowed.
512            self.assembler.emit_jmp(Condition::Carry, heap_access_oob)?;
513        }
514
515        if need_check {
516            let bound_loc = if imported_memories {
517                // Imported memories require one level of indirection.
518                self.emit_relaxed_binop(
519                    AssemblerX64::emit_mov,
520                    Size::S64,
521                    Location::Memory(self.get_vmctx_reg(), offset),
522                    Location::GPR(tmp2),
523                )?;
524                Location::Memory(tmp2, 8)
525            } else {
526                Location::Memory(self.get_vmctx_reg(), offset + 8)
527            };
528            self.assembler
529                .emit_mov(Size::S64, bound_loc, Location::GPR(tmp2))?;
530
531            // We will compare the upper bound limit without having add the "temp_base" value, as it's a constant
532            self.assembler.emit_lea(
533                Size::S64,
534                Location::Memory(tmp2, -(value_size as i32)),
535                Location::GPR(tmp2),
536            )?;
537            // Trap if the end address of the requested area is above that of the linear memory.
538            self.assembler
539                .emit_cmp(Size::S64, Location::GPR(tmp2), Location::GPR(tmp_addr))?;
540
541            // `tmp_bound` is inclusive. So trap only if `tmp_addr > tmp_bound`.
542            self.assembler.emit_jmp(Condition::Above, heap_access_oob)?;
543        }
544        // get back baseloc, as it might have been destroid with the upper memory test
545        let base_loc = if imported_memories {
546            // Imported memories require one level of indirection.
547            self.emit_relaxed_binop(
548                AssemblerX64::emit_mov,
549                Size::S64,
550                Location::Memory(self.get_vmctx_reg(), offset),
551                Location::GPR(tmp2),
552            )?;
553            Location::Memory(tmp2, 0)
554        } else {
555            Location::Memory(self.get_vmctx_reg(), offset)
556        };
557        // Wasm linear memory -> real memory
558        self.assembler
559            .emit_add(Size::S64, base_loc, Location::GPR(tmp_addr))?;
560
561        self.release_gpr(tmp2);
562
563        let align = value_size as u32;
564        if check_alignment && align != 1 {
565            let tmp_aligncheck = self.acquire_temp_gpr().ok_or_else(|| {
566                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
567            })?;
568            self.assembler.emit_mov(
569                Size::S32,
570                Location::GPR(tmp_addr),
571                Location::GPR(tmp_aligncheck),
572            )?;
573            self.assembler.emit_and(
574                Size::S64,
575                Location::Imm32(align - 1),
576                Location::GPR(tmp_aligncheck),
577            )?;
578            self.assembler
579                .emit_jmp(Condition::NotEqual, unaligned_atomic)?;
580            self.release_gpr(tmp_aligncheck);
581        }
582        let begin = self.assembler.get_offset().0;
583        cb(self, tmp_addr)?;
584        let end = self.assembler.get_offset().0;
585        self.mark_address_range_with_trap_code(TrapCode::HeapAccessOutOfBounds, begin, end);
586
587        self.release_gpr(tmp_addr);
588        Ok(())
589    }
590
591    #[allow(clippy::too_many_arguments)]
592    fn emit_compare_and_swap<F: FnOnce(&mut Self, GPR, GPR) -> Result<(), CompileError>>(
593        &mut self,
594        loc: Location,
595        target: Location,
596        ret: Location,
597        memarg: &MemArg,
598        value_size: usize,
599        memory_sz: Size,
600        stack_sz: Size,
601        need_check: bool,
602        imported_memories: bool,
603        offset: i32,
604        heap_access_oob: Label,
605        unaligned_atomic: Label,
606        cb: F,
607    ) -> Result<(), CompileError> {
608        if memory_sz > stack_sz {
609            codegen_error!("singlepass emit_compare_and_swap unreachable");
610        }
611
612        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
613        let value = if loc == Location::GPR(GPR::R14) {
614            GPR::R13
615        } else {
616            GPR::R14
617        };
618        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
619
620        self.move_location(stack_sz, loc, Location::GPR(value))?;
621
622        let retry = self.assembler.get_label();
623        self.emit_label(retry)?;
624
625        self.memory_op(
626            target,
627            memarg,
628            true,
629            value_size,
630            need_check,
631            imported_memories,
632            offset,
633            heap_access_oob,
634            unaligned_atomic,
635            |this, addr| {
636                this.load_address(memory_sz, Location::GPR(compare), Location::Memory(addr, 0))?;
637                this.move_location(stack_sz, Location::GPR(compare), ret)?;
638                cb(this, compare, value)?;
639                this.assembler.emit_lock_cmpxchg(
640                    memory_sz,
641                    Location::GPR(value),
642                    Location::Memory(addr, 0),
643                )
644            },
645        )?;
646
647        self.assembler.emit_jmp(Condition::NotEqual, retry)?;
648
649        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
650        self.release_gpr(compare);
651        Ok(())
652    }
653
654    // Checks for underflow/overflow/nan.
655    #[allow(clippy::too_many_arguments)]
656    fn emit_f32_int_conv_check(
657        &mut self,
658        reg: XMM,
659        lower_bound: f32,
660        upper_bound: f32,
661        underflow_label: Label,
662        overflow_label: Label,
663        nan_label: Label,
664        succeed_label: Label,
665    ) -> Result<(), CompileError> {
666        let lower_bound = f32::to_bits(lower_bound);
667        let upper_bound = f32::to_bits(upper_bound);
668
669        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
670            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
671        })?;
672        let tmp_x = self.acquire_temp_simd().ok_or_else(|| {
673            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
674        })?;
675
676        // Underflow.
677        self.move_location(Size::S32, Location::Imm32(lower_bound), Location::GPR(tmp))?;
678        self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x))?;
679        self.assembler
680            .emit_vcmpless(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
681        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
682        self.assembler
683            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
684        self.assembler
685            .emit_jmp(Condition::NotEqual, underflow_label)?;
686
687        // Overflow.
688        self.move_location(Size::S32, Location::Imm32(upper_bound), Location::GPR(tmp))?;
689        self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x))?;
690        self.assembler
691            .emit_vcmpgess(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
692        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
693        self.assembler
694            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
695        self.assembler
696            .emit_jmp(Condition::NotEqual, overflow_label)?;
697
698        // NaN.
699        self.assembler
700            .emit_vcmpeqss(reg, XMMOrMemory::XMM(reg), tmp_x)?;
701        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
702        self.assembler
703            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
704        self.assembler.emit_jmp(Condition::Equal, nan_label)?;
705
706        self.assembler.emit_jmp(Condition::None, succeed_label)?;
707
708        self.release_simd(tmp_x);
709        self.release_gpr(tmp);
710        Ok(())
711    }
712
713    // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32.
714    fn emit_f32_int_conv_check_trap(
715        &mut self,
716        reg: XMM,
717        lower_bound: f32,
718        upper_bound: f32,
719    ) -> Result<(), CompileError> {
720        let trap_overflow = self.assembler.get_label();
721        let trap_badconv = self.assembler.get_label();
722        let end = self.assembler.get_label();
723
724        self.emit_f32_int_conv_check(
725            reg,
726            lower_bound,
727            upper_bound,
728            trap_overflow,
729            trap_overflow,
730            trap_badconv,
731            end,
732        )?;
733
734        self.emit_label(trap_overflow)?;
735
736        self.emit_illegal_op_internal(TrapCode::IntegerOverflow)?;
737
738        self.emit_label(trap_badconv)?;
739
740        self.emit_illegal_op_internal(TrapCode::BadConversionToInteger)?;
741
742        self.emit_label(end)?;
743        Ok(())
744    }
745    #[allow(clippy::too_many_arguments)]
746    fn emit_f32_int_conv_check_sat<
747        F1: FnOnce(&mut Self) -> Result<(), CompileError>,
748        F2: FnOnce(&mut Self) -> Result<(), CompileError>,
749        F3: FnOnce(&mut Self) -> Result<(), CompileError>,
750        F4: FnOnce(&mut Self) -> Result<(), CompileError>,
751    >(
752        &mut self,
753        reg: XMM,
754        lower_bound: f32,
755        upper_bound: f32,
756        underflow_cb: F1,
757        overflow_cb: F2,
758        nan_cb: Option<F3>,
759        convert_cb: F4,
760    ) -> Result<(), CompileError> {
761        // As an optimization nan_cb is optional, and when set to None we turn
762        // use 'underflow' as the 'nan' label. This is useful for callers who
763        // set the return value to zero for both underflow and nan.
764
765        let underflow = self.assembler.get_label();
766        let overflow = self.assembler.get_label();
767        let nan = if nan_cb.is_some() {
768            self.assembler.get_label()
769        } else {
770            underflow
771        };
772        let convert = self.assembler.get_label();
773        let end = self.assembler.get_label();
774
775        self.emit_f32_int_conv_check(
776            reg,
777            lower_bound,
778            upper_bound,
779            underflow,
780            overflow,
781            nan,
782            convert,
783        )?;
784
785        self.emit_label(underflow)?;
786        underflow_cb(self)?;
787        self.assembler.emit_jmp(Condition::None, end)?;
788
789        self.emit_label(overflow)?;
790        overflow_cb(self)?;
791        self.assembler.emit_jmp(Condition::None, end)?;
792
793        if let Some(cb) = nan_cb {
794            self.emit_label(nan)?;
795            cb(self)?;
796            self.assembler.emit_jmp(Condition::None, end)?;
797        }
798
799        self.emit_label(convert)?;
800        convert_cb(self)?;
801        self.emit_label(end)
802    }
803    // Checks for underflow/overflow/nan.
804    #[allow(clippy::too_many_arguments)]
805    fn emit_f64_int_conv_check(
806        &mut self,
807        reg: XMM,
808        lower_bound: f64,
809        upper_bound: f64,
810        underflow_label: Label,
811        overflow_label: Label,
812        nan_label: Label,
813        succeed_label: Label,
814    ) -> Result<(), CompileError> {
815        let lower_bound = f64::to_bits(lower_bound);
816        let upper_bound = f64::to_bits(upper_bound);
817
818        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
819            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
820        })?;
821        let tmp_x = self.acquire_temp_simd().ok_or_else(|| {
822            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
823        })?;
824
825        // Underflow.
826        self.move_location(Size::S64, Location::Imm64(lower_bound), Location::GPR(tmp))?;
827        self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x))?;
828        self.assembler
829            .emit_vcmplesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
830        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
831        self.assembler
832            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
833        self.assembler
834            .emit_jmp(Condition::NotEqual, underflow_label)?;
835
836        // Overflow.
837        self.move_location(Size::S64, Location::Imm64(upper_bound), Location::GPR(tmp))?;
838        self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x))?;
839        self.assembler
840            .emit_vcmpgesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
841        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
842        self.assembler
843            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
844        self.assembler
845            .emit_jmp(Condition::NotEqual, overflow_label)?;
846
847        // NaN.
848        self.assembler
849            .emit_vcmpeqsd(reg, XMMOrMemory::XMM(reg), tmp_x)?;
850        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
851        self.assembler
852            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
853        self.assembler.emit_jmp(Condition::Equal, nan_label)?;
854
855        self.assembler.emit_jmp(Condition::None, succeed_label)?;
856
857        self.release_simd(tmp_x);
858        self.release_gpr(tmp);
859        Ok(())
860    }
861    // Checks for underflow/overflow/nan before IxxTrunc{U/S}F64.. return offset/len for trap_overflow and trap_badconv
862    fn emit_f64_int_conv_check_trap(
863        &mut self,
864        reg: XMM,
865        lower_bound: f64,
866        upper_bound: f64,
867    ) -> Result<(), CompileError> {
868        let trap_overflow = self.assembler.get_label();
869        let trap_badconv = self.assembler.get_label();
870        let end = self.assembler.get_label();
871
872        self.emit_f64_int_conv_check(
873            reg,
874            lower_bound,
875            upper_bound,
876            trap_overflow,
877            trap_overflow,
878            trap_badconv,
879            end,
880        )?;
881
882        self.emit_label(trap_overflow)?;
883        self.emit_illegal_op_internal(TrapCode::IntegerOverflow)?;
884
885        self.emit_label(trap_badconv)?;
886        self.emit_illegal_op_internal(TrapCode::BadConversionToInteger)?;
887
888        self.emit_label(end)
889    }
890    #[allow(clippy::too_many_arguments)]
891    fn emit_f64_int_conv_check_sat<
892        F1: FnOnce(&mut Self) -> Result<(), CompileError>,
893        F2: FnOnce(&mut Self) -> Result<(), CompileError>,
894        F3: FnOnce(&mut Self) -> Result<(), CompileError>,
895        F4: FnOnce(&mut Self) -> Result<(), CompileError>,
896    >(
897        &mut self,
898        reg: XMM,
899        lower_bound: f64,
900        upper_bound: f64,
901        underflow_cb: F1,
902        overflow_cb: F2,
903        nan_cb: Option<F3>,
904        convert_cb: F4,
905    ) -> Result<(), CompileError> {
906        // As an optimization nan_cb is optional, and when set to None we turn
907        // use 'underflow' as the 'nan' label. This is useful for callers who
908        // set the return value to zero for both underflow and nan.
909
910        let underflow = self.assembler.get_label();
911        let overflow = self.assembler.get_label();
912        let nan = if nan_cb.is_some() {
913            self.assembler.get_label()
914        } else {
915            underflow
916        };
917        let convert = self.assembler.get_label();
918        let end = self.assembler.get_label();
919
920        self.emit_f64_int_conv_check(
921            reg,
922            lower_bound,
923            upper_bound,
924            underflow,
925            overflow,
926            nan,
927            convert,
928        )?;
929
930        self.emit_label(underflow)?;
931        underflow_cb(self)?;
932        self.assembler.emit_jmp(Condition::None, end)?;
933
934        self.emit_label(overflow)?;
935        overflow_cb(self)?;
936        self.assembler.emit_jmp(Condition::None, end)?;
937
938        if let Some(cb) = nan_cb {
939            self.emit_label(nan)?;
940            cb(self)?;
941            self.assembler.emit_jmp(Condition::None, end)?;
942        }
943
944        self.emit_label(convert)?;
945        convert_cb(self)?;
946        self.emit_label(end)
947    }
948    /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
949    fn emit_relaxed_avx(
950        &mut self,
951        op: fn(&mut AssemblerX64, XMM, XMMOrMemory, XMM) -> Result<(), CompileError>,
952        src1: Location,
953        src2: Location,
954        dst: Location,
955    ) -> Result<(), CompileError> {
956        self.emit_relaxed_avx_base(
957            |this, src1, src2, dst| op(&mut this.assembler, src1, src2, dst),
958            src1,
959            src2,
960            dst,
961        )
962    }
963
964    /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
965    fn emit_relaxed_avx_base<
966        F: FnOnce(&mut Self, XMM, XMMOrMemory, XMM) -> Result<(), CompileError>,
967    >(
968        &mut self,
969        op: F,
970        src1: Location,
971        src2: Location,
972        dst: Location,
973    ) -> Result<(), CompileError> {
974        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
975            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
976        })?;
977        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
978            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
979        })?;
980        let tmp3 = self.acquire_temp_simd().ok_or_else(|| {
981            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
982        })?;
983        let tmpg = self.acquire_temp_gpr().ok_or_else(|| {
984            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
985        })?;
986
987        let src1 = match src1 {
988            Location::SIMD(x) => x,
989            Location::GPR(_) | Location::Memory(_, _) => {
990                self.assembler
991                    .emit_mov(Size::S64, src1, Location::SIMD(tmp1))?;
992                tmp1
993            }
994            Location::Imm32(_) => {
995                self.assembler
996                    .emit_mov(Size::S32, src1, Location::GPR(tmpg))?;
997                self.move_location(Size::S32, Location::GPR(tmpg), Location::SIMD(tmp1))?;
998                tmp1
999            }
1000            Location::Imm64(_) => {
1001                self.assembler
1002                    .emit_mov(Size::S64, src1, Location::GPR(tmpg))?;
1003                self.move_location(Size::S64, Location::GPR(tmpg), Location::SIMD(tmp1))?;
1004                tmp1
1005            }
1006            _ => {
1007                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1008            }
1009        };
1010
1011        let src2 = match src2 {
1012            Location::SIMD(x) => XMMOrMemory::XMM(x),
1013            Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp),
1014            Location::GPR(_) => {
1015                self.assembler
1016                    .emit_mov(Size::S64, src2, Location::SIMD(tmp2))?;
1017                XMMOrMemory::XMM(tmp2)
1018            }
1019            Location::Imm32(_) => {
1020                self.assembler
1021                    .emit_mov(Size::S32, src2, Location::GPR(tmpg))?;
1022                self.move_location(Size::S32, Location::GPR(tmpg), Location::SIMD(tmp2))?;
1023                XMMOrMemory::XMM(tmp2)
1024            }
1025            Location::Imm64(_) => {
1026                self.assembler
1027                    .emit_mov(Size::S64, src2, Location::GPR(tmpg))?;
1028                self.move_location(Size::S64, Location::GPR(tmpg), Location::SIMD(tmp2))?;
1029                XMMOrMemory::XMM(tmp2)
1030            }
1031            _ => {
1032                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1033            }
1034        };
1035
1036        match dst {
1037            Location::SIMD(x) => {
1038                op(self, src1, src2, x)?;
1039            }
1040            Location::Memory(_, _) | Location::GPR(_) => {
1041                op(self, src1, src2, tmp3)?;
1042                self.assembler
1043                    .emit_mov(Size::S64, Location::SIMD(tmp3), dst)?;
1044            }
1045            _ => {
1046                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1047            }
1048        }
1049
1050        self.release_gpr(tmpg);
1051        self.release_simd(tmp3);
1052        self.release_simd(tmp2);
1053        self.release_simd(tmp1);
1054        Ok(())
1055    }
1056
1057    fn convert_i64_f64_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1058        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1059            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1060        })?;
1061        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1062            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1063        })?;
1064
1065        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1066        self.emit_f64_int_conv_check_sat(
1067            tmp_in,
1068            GEF64_LT_U64_MIN,
1069            LEF64_GT_U64_MAX,
1070            |this| {
1071                this.assembler
1072                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1073            },
1074            |this| {
1075                this.assembler.emit_mov(
1076                    Size::S64,
1077                    Location::Imm64(u64::MAX),
1078                    Location::GPR(tmp_out),
1079                )
1080            },
1081            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1082            |this| {
1083                if this.assembler.arch_has_itruncf() {
1084                    this.assembler.arch_emit_i64_trunc_uf64(tmp_in, tmp_out)
1085                } else {
1086                    let tmp = this.acquire_temp_gpr().ok_or_else(|| {
1087                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1088                    })?;
1089                    let tmp_x1 = this.acquire_temp_simd().ok_or_else(|| {
1090                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1091                    })?;
1092                    let tmp_x2 = this.acquire_temp_simd().ok_or_else(|| {
1093                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1094                    })?;
1095
1096                    this.assembler.emit_mov(
1097                        Size::S64,
1098                        Location::Imm64(4890909195324358656u64),
1099                        Location::GPR(tmp),
1100                    )?; //double 9.2233720368547758E+18
1101                    this.assembler.emit_mov(
1102                        Size::S64,
1103                        Location::GPR(tmp),
1104                        Location::SIMD(tmp_x1),
1105                    )?;
1106                    this.assembler.emit_mov(
1107                        Size::S64,
1108                        Location::SIMD(tmp_in),
1109                        Location::SIMD(tmp_x2),
1110                    )?;
1111                    this.assembler
1112                        .emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1113                    this.assembler
1114                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1115                    this.assembler.emit_mov(
1116                        Size::S64,
1117                        Location::Imm64(0x8000000000000000u64),
1118                        Location::GPR(tmp),
1119                    )?;
1120                    this.assembler.emit_xor(
1121                        Size::S64,
1122                        Location::GPR(tmp_out),
1123                        Location::GPR(tmp),
1124                    )?;
1125                    this.assembler
1126                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1127                    this.assembler
1128                        .emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1129                    this.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1130
1131                    this.release_simd(tmp_x2);
1132                    this.release_simd(tmp_x1);
1133                    this.release_gpr(tmp);
1134                    Ok(())
1135                }
1136            },
1137        )?;
1138
1139        self.assembler
1140            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1141        self.release_simd(tmp_in);
1142        self.release_gpr(tmp_out);
1143        Ok(())
1144    }
1145    fn convert_i64_f64_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1146        if self.assembler.arch_has_itruncf() {
1147            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1148                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1149            })?;
1150            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1151                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1152            })?;
1153            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1154            self.assembler.arch_emit_i64_trunc_uf64(tmp_in, tmp_out)?;
1155            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1156            self.release_simd(tmp_in);
1157            self.release_gpr(tmp_out);
1158        } else {
1159            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1160                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1161            })?;
1162            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1163                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1164            })?; // xmm2
1165
1166            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1167            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_U64_MIN, LEF64_GT_U64_MAX)?;
1168
1169            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
1170                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1171            })?; // r15
1172            let tmp_x1 = self.acquire_temp_simd().ok_or_else(|| {
1173                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1174            })?; // xmm1
1175            let tmp_x2 = self.acquire_temp_simd().ok_or_else(|| {
1176                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1177            })?; // xmm3
1178
1179            self.move_location(
1180                Size::S64,
1181                Location::Imm64(4890909195324358656u64),
1182                Location::GPR(tmp),
1183            )?; //double 9.2233720368547758E+18
1184            self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x1))?;
1185            self.move_location(Size::S64, Location::SIMD(tmp_in), Location::SIMD(tmp_x2))?;
1186            self.assembler
1187                .emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1188            self.assembler
1189                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1190            self.move_location(
1191                Size::S64,
1192                Location::Imm64(0x8000000000000000u64),
1193                Location::GPR(tmp),
1194            )?;
1195            self.assembler
1196                .emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp))?;
1197            self.assembler
1198                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1199            self.assembler
1200                .emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1201            self.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1202            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1203
1204            self.release_simd(tmp_x2);
1205            self.release_simd(tmp_x1);
1206            self.release_gpr(tmp);
1207            self.release_simd(tmp_in);
1208            self.release_gpr(tmp_out);
1209        }
1210        Ok(())
1211    }
1212    fn convert_i64_f64_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1213        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1214            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1215        })?;
1216        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1217            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1218        })?;
1219
1220        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1221        self.emit_f64_int_conv_check_sat(
1222            tmp_in,
1223            GEF64_LT_I64_MIN,
1224            LEF64_GT_I64_MAX,
1225            |this| {
1226                this.assembler.emit_mov(
1227                    Size::S64,
1228                    Location::Imm64(i64::MIN as u64),
1229                    Location::GPR(tmp_out),
1230                )
1231            },
1232            |this| {
1233                this.assembler.emit_mov(
1234                    Size::S64,
1235                    Location::Imm64(i64::MAX as u64),
1236                    Location::GPR(tmp_out),
1237                )
1238            },
1239            Some(|this: &mut Self| {
1240                this.assembler
1241                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1242            }),
1243            |this| {
1244                if this.assembler.arch_has_itruncf() {
1245                    this.assembler.arch_emit_i64_trunc_sf64(tmp_in, tmp_out)
1246                } else {
1247                    this.assembler
1248                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1249                }
1250            },
1251        )?;
1252
1253        self.assembler
1254            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1255        self.release_simd(tmp_in);
1256        self.release_gpr(tmp_out);
1257        Ok(())
1258    }
1259    fn convert_i64_f64_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1260        if self.assembler.arch_has_itruncf() {
1261            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1262                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1263            })?;
1264            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1265                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1266            })?;
1267            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1268            self.assembler.arch_emit_i64_trunc_sf64(tmp_in, tmp_out)?;
1269            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1270            self.release_simd(tmp_in);
1271            self.release_gpr(tmp_out);
1272        } else {
1273            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1274                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1275            })?;
1276            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1277                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1278            })?;
1279
1280            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1281            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_I64_MIN, LEF64_GT_I64_MAX)?;
1282
1283            self.assembler
1284                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1285            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1286
1287            self.release_simd(tmp_in);
1288            self.release_gpr(tmp_out);
1289        }
1290        Ok(())
1291    }
1292    fn convert_i32_f64_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1293        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1294            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1295        })?;
1296        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1297            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1298        })?;
1299
1300        let real_in = match loc {
1301            Location::Imm32(_) | Location::Imm64(_) => {
1302                self.move_location(Size::S64, loc, Location::GPR(tmp_out))?;
1303                self.move_location(Size::S64, Location::GPR(tmp_out), Location::SIMD(tmp_in))?;
1304                tmp_in
1305            }
1306            Location::SIMD(x) => x,
1307            _ => {
1308                self.move_location(Size::S64, loc, Location::SIMD(tmp_in))?;
1309                tmp_in
1310            }
1311        };
1312
1313        self.emit_f64_int_conv_check_sat(
1314            real_in,
1315            GEF64_LT_I32_MIN,
1316            LEF64_GT_I32_MAX,
1317            |this| {
1318                this.assembler.emit_mov(
1319                    Size::S32,
1320                    Location::Imm32(i32::MIN as u32),
1321                    Location::GPR(tmp_out),
1322                )
1323            },
1324            |this| {
1325                this.assembler.emit_mov(
1326                    Size::S32,
1327                    Location::Imm32(i32::MAX as u32),
1328                    Location::GPR(tmp_out),
1329                )
1330            },
1331            Some(|this: &mut Self| {
1332                this.assembler
1333                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1334            }),
1335            |this| {
1336                if this.assembler.arch_has_itruncf() {
1337                    this.assembler.arch_emit_i32_trunc_sf64(tmp_in, tmp_out)
1338                } else {
1339                    this.assembler
1340                        .emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out)
1341                }
1342            },
1343        )?;
1344
1345        self.assembler
1346            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1347        self.release_simd(tmp_in);
1348        self.release_gpr(tmp_out);
1349        Ok(())
1350    }
1351    fn convert_i32_f64_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1352        if self.assembler.arch_has_itruncf() {
1353            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1354                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1355            })?;
1356            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1357                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1358            })?;
1359            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1360            self.assembler.arch_emit_i32_trunc_sf64(tmp_in, tmp_out)?;
1361            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1362            self.release_simd(tmp_in);
1363            self.release_gpr(tmp_out);
1364        } else {
1365            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1366                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1367            })?;
1368            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1369                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1370            })?;
1371
1372            let real_in = match loc {
1373                Location::Imm32(_) | Location::Imm64(_) => {
1374                    self.move_location(Size::S64, loc, Location::GPR(tmp_out))?;
1375                    self.move_location(Size::S64, Location::GPR(tmp_out), Location::SIMD(tmp_in))?;
1376                    tmp_in
1377                }
1378                Location::SIMD(x) => x,
1379                _ => {
1380                    self.move_location(Size::S64, loc, Location::SIMD(tmp_in))?;
1381                    tmp_in
1382                }
1383            };
1384
1385            self.emit_f64_int_conv_check_trap(real_in, GEF64_LT_I32_MIN, LEF64_GT_I32_MAX)?;
1386
1387            self.assembler
1388                .emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out)?;
1389            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1390
1391            self.release_simd(tmp_in);
1392            self.release_gpr(tmp_out);
1393        }
1394        Ok(())
1395    }
1396    fn convert_i32_f64_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1397        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1398            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1399        })?;
1400        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1401            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1402        })?;
1403
1404        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1405        self.emit_f64_int_conv_check_sat(
1406            tmp_in,
1407            GEF64_LT_U32_MIN,
1408            LEF64_GT_U32_MAX,
1409            |this| {
1410                this.assembler
1411                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1412            },
1413            |this| {
1414                this.assembler.emit_mov(
1415                    Size::S32,
1416                    Location::Imm32(u32::MAX),
1417                    Location::GPR(tmp_out),
1418                )
1419            },
1420            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1421            |this| {
1422                if this.assembler.arch_has_itruncf() {
1423                    this.assembler.arch_emit_i32_trunc_uf64(tmp_in, tmp_out)
1424                } else {
1425                    this.assembler
1426                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1427                }
1428            },
1429        )?;
1430
1431        self.assembler
1432            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1433        self.release_simd(tmp_in);
1434        self.release_gpr(tmp_out);
1435        Ok(())
1436    }
1437    fn convert_i32_f64_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1438        if self.assembler.arch_has_itruncf() {
1439            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1440                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1441            })?;
1442            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1443                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1444            })?;
1445            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1446            self.assembler.arch_emit_i32_trunc_uf64(tmp_in, tmp_out)?;
1447            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1448            self.release_simd(tmp_in);
1449            self.release_gpr(tmp_out);
1450        } else {
1451            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1452                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1453            })?;
1454            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1455                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1456            })?;
1457
1458            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1459            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_U32_MIN, LEF64_GT_U32_MAX)?;
1460
1461            self.assembler
1462                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1463            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1464
1465            self.release_simd(tmp_in);
1466            self.release_gpr(tmp_out);
1467        }
1468        Ok(())
1469    }
1470    fn convert_i64_f32_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1471        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1472            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1473        })?;
1474        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1475            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1476        })?;
1477
1478        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1479        self.emit_f32_int_conv_check_sat(
1480            tmp_in,
1481            GEF32_LT_U64_MIN,
1482            LEF32_GT_U64_MAX,
1483            |this| {
1484                this.assembler
1485                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1486            },
1487            |this| {
1488                this.assembler.emit_mov(
1489                    Size::S64,
1490                    Location::Imm64(u64::MAX),
1491                    Location::GPR(tmp_out),
1492                )
1493            },
1494            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1495            |this| {
1496                if this.assembler.arch_has_itruncf() {
1497                    this.assembler.arch_emit_i64_trunc_uf32(tmp_in, tmp_out)
1498                } else {
1499                    let tmp = this.acquire_temp_gpr().ok_or_else(|| {
1500                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1501                    })?;
1502                    let tmp_x1 = this.acquire_temp_simd().ok_or_else(|| {
1503                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1504                    })?;
1505                    let tmp_x2 = this.acquire_temp_simd().ok_or_else(|| {
1506                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1507                    })?;
1508
1509                    this.assembler.emit_mov(
1510                        Size::S32,
1511                        Location::Imm32(1593835520u32),
1512                        Location::GPR(tmp),
1513                    )?; //float 9.22337203E+18
1514                    this.assembler.emit_mov(
1515                        Size::S32,
1516                        Location::GPR(tmp),
1517                        Location::SIMD(tmp_x1),
1518                    )?;
1519                    this.assembler.emit_mov(
1520                        Size::S32,
1521                        Location::SIMD(tmp_in),
1522                        Location::SIMD(tmp_x2),
1523                    )?;
1524                    this.assembler
1525                        .emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1526                    this.assembler
1527                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1528                    this.assembler.emit_mov(
1529                        Size::S64,
1530                        Location::Imm64(0x8000000000000000u64),
1531                        Location::GPR(tmp),
1532                    )?;
1533                    this.assembler.emit_xor(
1534                        Size::S64,
1535                        Location::GPR(tmp_out),
1536                        Location::GPR(tmp),
1537                    )?;
1538                    this.assembler
1539                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1540                    this.assembler
1541                        .emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1542                    this.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1543
1544                    this.release_simd(tmp_x2);
1545                    this.release_simd(tmp_x1);
1546                    this.release_gpr(tmp);
1547                    Ok(())
1548                }
1549            },
1550        )?;
1551
1552        self.assembler
1553            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1554        self.release_simd(tmp_in);
1555        self.release_gpr(tmp_out);
1556        Ok(())
1557    }
1558    fn convert_i64_f32_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1559        if self.assembler.arch_has_itruncf() {
1560            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1561                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1562            })?;
1563            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1564                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1565            })?;
1566            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1567            self.assembler.arch_emit_i64_trunc_uf32(tmp_in, tmp_out)?;
1568            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1569            self.release_simd(tmp_in);
1570            self.release_gpr(tmp_out);
1571        } else {
1572            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1573                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1574            })?;
1575            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1576                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1577            })?; // xmm2
1578
1579            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1580            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_U64_MIN, LEF32_GT_U64_MAX)?;
1581
1582            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
1583                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1584            })?; // r15
1585            let tmp_x1 = self.acquire_temp_simd().ok_or_else(|| {
1586                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1587            })?; // xmm1
1588            let tmp_x2 = self.acquire_temp_simd().ok_or_else(|| {
1589                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1590            })?; // xmm3
1591
1592            self.move_location(
1593                Size::S32,
1594                Location::Imm32(1593835520u32),
1595                Location::GPR(tmp),
1596            )?; //float 9.22337203E+18
1597            self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x1))?;
1598            self.move_location(Size::S32, Location::SIMD(tmp_in), Location::SIMD(tmp_x2))?;
1599            self.assembler
1600                .emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1601            self.assembler
1602                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1603            self.move_location(
1604                Size::S64,
1605                Location::Imm64(0x8000000000000000u64),
1606                Location::GPR(tmp),
1607            )?;
1608            self.assembler
1609                .emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp))?;
1610            self.assembler
1611                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1612            self.assembler
1613                .emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1614            self.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1615            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1616
1617            self.release_simd(tmp_x2);
1618            self.release_simd(tmp_x1);
1619            self.release_gpr(tmp);
1620            self.release_simd(tmp_in);
1621            self.release_gpr(tmp_out);
1622        }
1623        Ok(())
1624    }
1625    fn convert_i64_f32_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1626        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1627            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1628        })?;
1629        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1630            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1631        })?;
1632
1633        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1634        self.emit_f32_int_conv_check_sat(
1635            tmp_in,
1636            GEF32_LT_I64_MIN,
1637            LEF32_GT_I64_MAX,
1638            |this| {
1639                this.assembler.emit_mov(
1640                    Size::S64,
1641                    Location::Imm64(i64::MIN as u64),
1642                    Location::GPR(tmp_out),
1643                )
1644            },
1645            |this| {
1646                this.assembler.emit_mov(
1647                    Size::S64,
1648                    Location::Imm64(i64::MAX as u64),
1649                    Location::GPR(tmp_out),
1650                )
1651            },
1652            Some(|this: &mut Self| {
1653                this.assembler
1654                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1655            }),
1656            |this| {
1657                if this.assembler.arch_has_itruncf() {
1658                    this.assembler.arch_emit_i64_trunc_sf32(tmp_in, tmp_out)
1659                } else {
1660                    this.assembler
1661                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1662                }
1663            },
1664        )?;
1665
1666        self.assembler
1667            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1668        self.release_simd(tmp_in);
1669        self.release_gpr(tmp_out);
1670        Ok(())
1671    }
1672    fn convert_i64_f32_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1673        if self.assembler.arch_has_itruncf() {
1674            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1675                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1676            })?;
1677            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1678                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1679            })?;
1680            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1681            self.assembler.arch_emit_i64_trunc_sf32(tmp_in, tmp_out)?;
1682            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1683            self.release_simd(tmp_in);
1684            self.release_gpr(tmp_out);
1685        } else {
1686            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1687                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1688            })?;
1689            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1690                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1691            })?;
1692
1693            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1694            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_I64_MIN, LEF32_GT_I64_MAX)?;
1695            self.assembler
1696                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1697            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1698
1699            self.release_simd(tmp_in);
1700            self.release_gpr(tmp_out);
1701        }
1702        Ok(())
1703    }
1704    fn convert_i32_f32_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1705        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1706            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1707        })?;
1708        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1709            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1710        })?;
1711
1712        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1713        self.emit_f32_int_conv_check_sat(
1714            tmp_in,
1715            GEF32_LT_I32_MIN,
1716            LEF32_GT_I32_MAX,
1717            |this| {
1718                this.assembler.emit_mov(
1719                    Size::S32,
1720                    Location::Imm32(i32::MIN as u32),
1721                    Location::GPR(tmp_out),
1722                )
1723            },
1724            |this| {
1725                this.assembler.emit_mov(
1726                    Size::S32,
1727                    Location::Imm32(i32::MAX as u32),
1728                    Location::GPR(tmp_out),
1729                )
1730            },
1731            Some(|this: &mut Self| {
1732                this.assembler
1733                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1734            }),
1735            |this| {
1736                if this.assembler.arch_has_itruncf() {
1737                    this.assembler.arch_emit_i32_trunc_sf32(tmp_in, tmp_out)
1738                } else {
1739                    this.assembler
1740                        .emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out)
1741                }
1742            },
1743        )?;
1744
1745        self.assembler
1746            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1747        self.release_simd(tmp_in);
1748        self.release_gpr(tmp_out);
1749        Ok(())
1750    }
1751    fn convert_i32_f32_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1752        if self.assembler.arch_has_itruncf() {
1753            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1754                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1755            })?;
1756            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1757                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1758            })?;
1759            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1760            self.assembler.arch_emit_i32_trunc_sf32(tmp_in, tmp_out)?;
1761            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1762            self.release_simd(tmp_in);
1763            self.release_gpr(tmp_out);
1764        } else {
1765            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1766                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1767            })?;
1768            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1769                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1770            })?;
1771
1772            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1773            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_I32_MIN, LEF32_GT_I32_MAX)?;
1774
1775            self.assembler
1776                .emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1777            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1778
1779            self.release_simd(tmp_in);
1780            self.release_gpr(tmp_out);
1781        }
1782        Ok(())
1783    }
1784    fn convert_i32_f32_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1785        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1786            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1787        })?;
1788        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1789            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1790        })?;
1791        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1792        self.emit_f32_int_conv_check_sat(
1793            tmp_in,
1794            GEF32_LT_U32_MIN,
1795            LEF32_GT_U32_MAX,
1796            |this| {
1797                this.assembler
1798                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1799            },
1800            |this| {
1801                this.assembler.emit_mov(
1802                    Size::S32,
1803                    Location::Imm32(u32::MAX),
1804                    Location::GPR(tmp_out),
1805                )
1806            },
1807            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1808            |this| {
1809                if this.assembler.arch_has_itruncf() {
1810                    this.assembler.arch_emit_i32_trunc_uf32(tmp_in, tmp_out)
1811                } else {
1812                    this.assembler
1813                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1814                }
1815            },
1816        )?;
1817
1818        self.assembler
1819            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1820        self.release_simd(tmp_in);
1821        self.release_gpr(tmp_out);
1822        Ok(())
1823    }
1824    fn convert_i32_f32_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1825        if self.assembler.arch_has_itruncf() {
1826            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1827                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1828            })?;
1829            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1830                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1831            })?;
1832            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1833            self.assembler.arch_emit_i32_trunc_uf32(tmp_in, tmp_out)?;
1834            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1835            self.release_simd(tmp_in);
1836            self.release_gpr(tmp_out);
1837        } else {
1838            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1839                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1840            })?;
1841            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1842                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1843            })?;
1844            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1845            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_U32_MIN, LEF32_GT_U32_MAX)?;
1846
1847            self.assembler
1848                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1849            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1850
1851            self.release_simd(tmp_in);
1852            self.release_gpr(tmp_out);
1853        }
1854        Ok(())
1855    }
1856
1857    fn emit_relaxed_atomic_xchg(
1858        &mut self,
1859        sz: Size,
1860        src: Location,
1861        dst: Location,
1862    ) -> Result<(), CompileError> {
1863        self.emit_relaxed_binop(AssemblerX64::emit_xchg, sz, src, dst)
1864    }
1865
1866    fn used_gprs_contains(&self, r: &GPR) -> bool {
1867        self.used_gprs & (1 << r.into_index()) != 0
1868    }
1869    fn used_simd_contains(&self, r: &XMM) -> bool {
1870        self.used_simd & (1 << r.into_index()) != 0
1871    }
1872    fn used_gprs_insert(&mut self, r: GPR) {
1873        self.used_gprs |= 1 << r.into_index();
1874    }
1875    fn used_simd_insert(&mut self, r: XMM) {
1876        self.used_simd |= 1 << r.into_index();
1877    }
1878    fn used_gprs_remove(&mut self, r: &GPR) -> bool {
1879        let ret = self.used_gprs_contains(r);
1880        self.used_gprs &= !(1 << r.into_index());
1881        ret
1882    }
1883    fn used_simd_remove(&mut self, r: &XMM) -> bool {
1884        let ret = self.used_simd_contains(r);
1885        self.used_simd &= !(1 << r.into_index());
1886        ret
1887    }
1888    fn emit_unwind_op(&mut self, op: UnwindOps<GPR, XMM>) -> Result<(), CompileError> {
1889        self.unwind_ops.push((self.get_offset().0, op));
1890        Ok(())
1891    }
1892    fn emit_illegal_op_internal(&mut self, trap: TrapCode) -> Result<(), CompileError> {
1893        let v = trap as u8;
1894        self.assembler.emit_ud1_payload(v)
1895    }
1896
1897    // logic
1898    fn location_xor(
1899        &mut self,
1900        size: Size,
1901        source: Location,
1902        dest: Location,
1903        _flags: bool,
1904    ) -> Result<(), CompileError> {
1905        self.assembler.emit_xor(size, source, dest)
1906    }
1907    fn location_or(
1908        &mut self,
1909        size: Size,
1910        source: Location,
1911        dest: Location,
1912        _flags: bool,
1913    ) -> Result<(), CompileError> {
1914        self.assembler.emit_or(size, source, dest)
1915    }
1916    fn load_address(
1917        &mut self,
1918        size: Size,
1919        reg: Location,
1920        mem: Location,
1921    ) -> Result<(), CompileError> {
1922        match reg {
1923            Location::GPR(_) => {
1924                match mem {
1925                    Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
1926                        // Memory moves with size < 32b do not zero upper bits.
1927                        if size < Size::S32 {
1928                            self.assembler.emit_xor(Size::S32, reg, reg)?;
1929                        }
1930                        self.assembler.emit_mov(size, mem, reg)?;
1931                    }
1932                    _ => codegen_error!("singlepass load_address unreachable"),
1933                }
1934            }
1935            _ => codegen_error!("singlepass load_address unreachable"),
1936        }
1937        Ok(())
1938    }
1939
1940    fn location_neg(
1941        &mut self,
1942        size_val: Size, // size of src
1943        signed: bool,
1944        source: Location,
1945        size_op: Size,
1946        dest: Location,
1947    ) -> Result<(), CompileError> {
1948        self.move_location_extend(size_val, signed, source, size_op, dest)?;
1949        self.assembler.emit_neg(size_val, dest)
1950    }
1951
1952    fn emit_relaxed_zero_extension(
1953        &mut self,
1954        sz_src: Size,
1955        src: Location,
1956        sz_dst: Size,
1957        dst: Location,
1958    ) -> Result<(), CompileError> {
1959        if (sz_src == Size::S32 || sz_src == Size::S64) && sz_dst == Size::S64 {
1960            self.emit_relaxed_binop(AssemblerX64::emit_mov, sz_src, src, dst)
1961        } else {
1962            self.emit_relaxed_zx_sx(AssemblerX64::emit_movzx, sz_src, src, sz_dst, dst)
1963        }
1964    }
1965}
1966
1967impl Machine for MachineX86_64 {
1968    type GPR = GPR;
1969    type SIMD = XMM;
1970    fn assembler_get_offset(&self) -> Offset {
1971        self.assembler.get_offset()
1972    }
1973
1974    fn get_vmctx_reg(&self) -> GPR {
1975        GPR::R15
1976    }
1977
1978    fn get_used_gprs(&self) -> Vec<GPR> {
1979        GPR::iterator()
1980            .filter(|x| self.used_gprs & (1 << x.into_index()) != 0)
1981            .cloned()
1982            .collect()
1983    }
1984
1985    fn get_used_simd(&self) -> Vec<XMM> {
1986        XMM::iterator()
1987            .filter(|x| self.used_simd & (1 << x.into_index()) != 0)
1988            .cloned()
1989            .collect()
1990    }
1991
1992    fn pick_gpr(&self) -> Option<GPR> {
1993        use GPR::*;
1994        static REGS: &[GPR] = &[RSI, RDI, R8, R9, R10, R11];
1995        for r in REGS {
1996            if !self.used_gprs_contains(r) {
1997                return Some(*r);
1998            }
1999        }
2000        None
2001    }
2002
2003    // Picks an unused general purpose register for internal temporary use.
2004    fn pick_temp_gpr(&self) -> Option<GPR> {
2005        use GPR::*;
2006        static REGS: &[GPR] = &[RAX, RCX, RDX];
2007        for r in REGS {
2008            if !self.used_gprs_contains(r) {
2009                return Some(*r);
2010            }
2011        }
2012        None
2013    }
2014
2015    fn acquire_temp_gpr(&mut self) -> Option<GPR> {
2016        let gpr = self.pick_temp_gpr();
2017        if let Some(x) = gpr {
2018            self.used_gprs_insert(x);
2019        }
2020        gpr
2021    }
2022
2023    fn release_gpr(&mut self, gpr: GPR) {
2024        assert!(self.used_gprs_remove(&gpr));
2025    }
2026
2027    fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR {
2028        assert!(!self.used_gprs_contains(&gpr));
2029        self.used_gprs_insert(gpr);
2030        gpr
2031    }
2032
2033    fn reserve_gpr(&mut self, gpr: GPR) {
2034        self.used_gprs_insert(gpr);
2035    }
2036
2037    fn push_used_gpr(&mut self, used_gprs: &[GPR]) -> Result<usize, CompileError> {
2038        for r in used_gprs.iter() {
2039            self.assembler.emit_push(Size::S64, Location::GPR(*r))?;
2040        }
2041        Ok(used_gprs.len() * 8)
2042    }
2043    fn pop_used_gpr(&mut self, used_gprs: &[GPR]) -> Result<(), CompileError> {
2044        for r in used_gprs.iter().rev() {
2045            self.assembler.emit_pop(Size::S64, Location::GPR(*r))?;
2046        }
2047        Ok(())
2048    }
2049
2050    // Picks an unused XMM register.
2051    fn pick_simd(&self) -> Option<XMM> {
2052        use XMM::*;
2053        static REGS: &[XMM] = &[XMM3, XMM4, XMM5, XMM6, XMM7];
2054        for r in REGS {
2055            if !self.used_simd_contains(r) {
2056                return Some(*r);
2057            }
2058        }
2059        None
2060    }
2061
2062    // Picks an unused XMM register for internal temporary use.
2063    fn pick_temp_simd(&self) -> Option<XMM> {
2064        use XMM::*;
2065        static REGS: &[XMM] = &[XMM0, XMM1, XMM2];
2066        for r in REGS {
2067            if !self.used_simd_contains(r) {
2068                return Some(*r);
2069            }
2070        }
2071        None
2072    }
2073
2074    // Acquires a temporary XMM register.
2075    fn acquire_temp_simd(&mut self) -> Option<XMM> {
2076        let simd = self.pick_temp_simd();
2077        if let Some(x) = simd {
2078            self.used_simd_insert(x);
2079        }
2080        simd
2081    }
2082
2083    fn reserve_simd(&mut self, simd: XMM) {
2084        self.used_simd_insert(simd);
2085    }
2086
2087    // Releases a temporary XMM register.
2088    fn release_simd(&mut self, simd: XMM) {
2089        assert!(self.used_simd_remove(&simd));
2090    }
2091
2092    fn push_used_simd(&mut self, used_xmms: &[XMM]) -> Result<usize, CompileError> {
2093        self.extend_stack((used_xmms.len() * 8) as u32)?;
2094
2095        for (i, r) in used_xmms.iter().enumerate() {
2096            self.move_location(
2097                Size::S64,
2098                Location::SIMD(*r),
2099                Location::Memory(GPR::RSP, (i * 8) as i32),
2100            )?;
2101        }
2102
2103        Ok(used_xmms.len() * 8)
2104    }
2105    fn pop_used_simd(&mut self, used_xmms: &[XMM]) -> Result<(), CompileError> {
2106        for (i, r) in used_xmms.iter().enumerate() {
2107            self.move_location(
2108                Size::S64,
2109                Location::Memory(GPR::RSP, (i * 8) as i32),
2110                Location::SIMD(*r),
2111            )?;
2112        }
2113        self.assembler.emit_add(
2114            Size::S64,
2115            Location::Imm32((used_xmms.len() * 8) as u32),
2116            Location::GPR(GPR::RSP),
2117        )
2118    }
2119
2120    /// Set the source location of the Wasm to the given offset.
2121    fn set_srcloc(&mut self, offset: u32) {
2122        self.src_loc = offset;
2123    }
2124    /// Marks each address in the code range emitted by `f` with the trap code `code`.
2125    fn mark_address_range_with_trap_code(&mut self, code: TrapCode, begin: usize, end: usize) {
2126        for i in begin..end {
2127            self.trap_table.offset_to_code.insert(i, code);
2128        }
2129        self.mark_instruction_address_end(begin);
2130    }
2131
2132    /// Marks one address as trappable with trap code `code`.
2133    fn mark_address_with_trap_code(&mut self, code: TrapCode) {
2134        let offset = self.assembler.get_offset().0;
2135        self.trap_table.offset_to_code.insert(offset, code);
2136        self.mark_instruction_address_end(offset);
2137    }
2138    /// Marks the instruction as trappable with trap code `code`. return "begin" offset
2139    fn mark_instruction_with_trap_code(&mut self, code: TrapCode) -> usize {
2140        let offset = self.assembler.get_offset().0;
2141        self.trap_table.offset_to_code.insert(offset, code);
2142        offset
2143    }
2144    /// Pushes the instruction to the address map, calculating the offset from a
2145    /// provided beginning address.
2146    fn mark_instruction_address_end(&mut self, begin: usize) {
2147        self.instructions_address_map.push(InstructionAddressMap {
2148            srcloc: SourceLoc::new(self.src_loc),
2149            code_offset: begin,
2150            code_len: self.assembler.get_offset().0 - begin,
2151        });
2152    }
2153
2154    /// Insert a StackOverflow (at offset 0)
2155    fn insert_stackoverflow(&mut self) {
2156        let offset = 0;
2157        self.trap_table
2158            .offset_to_code
2159            .insert(offset, TrapCode::StackOverflow);
2160        self.mark_instruction_address_end(offset);
2161    }
2162
2163    /// Get all current TrapInformation
2164    fn collect_trap_information(&self) -> Vec<TrapInformation> {
2165        self.trap_table
2166            .offset_to_code
2167            .clone()
2168            .into_iter()
2169            .map(|(offset, code)| TrapInformation {
2170                code_offset: offset as u32,
2171                trap_code: code,
2172            })
2173            .collect()
2174    }
2175
2176    fn instructions_address_map(&self) -> Vec<InstructionAddressMap> {
2177        self.instructions_address_map.clone()
2178    }
2179
2180    // Memory location for a local on the stack
2181    fn local_on_stack(&mut self, stack_offset: i32) -> Location {
2182        Location::Memory(GPR::RBP, -stack_offset)
2183    }
2184
2185    // Return a rounded stack adjustement value (must be multiple of 16bytes on ARM64 for example)
2186    fn round_stack_adjust(&self, value: usize) -> usize {
2187        value
2188    }
2189
2190    fn extend_stack(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2191        self.assembler.emit_sub(
2192            Size::S64,
2193            Location::Imm32(delta_stack_offset),
2194            Location::GPR(GPR::RSP),
2195        )
2196    }
2197
2198    fn truncate_stack(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2199        self.assembler.emit_add(
2200            Size::S64,
2201            Location::Imm32(delta_stack_offset),
2202            Location::GPR(GPR::RSP),
2203        )
2204    }
2205
2206    // push a value on the stack for a native call
2207    fn move_location_for_native(
2208        &mut self,
2209        _size: Size,
2210        loc: Location,
2211        dest: Location,
2212    ) -> Result<(), CompileError> {
2213        match loc {
2214            Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2215                let tmp = self.pick_temp_gpr();
2216                if let Some(x) = tmp {
2217                    self.assembler.emit_mov(Size::S64, loc, Location::GPR(x))?;
2218                    self.assembler.emit_mov(Size::S64, Location::GPR(x), dest)
2219                } else {
2220                    self.assembler
2221                        .emit_mov(Size::S64, Location::GPR(GPR::RAX), dest)?;
2222                    self.assembler
2223                        .emit_mov(Size::S64, loc, Location::GPR(GPR::RAX))?;
2224                    self.assembler
2225                        .emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest)
2226                }
2227            }
2228            _ => self.assembler.emit_mov(Size::S64, loc, dest),
2229        }
2230    }
2231
2232    // Zero a location that is 32bits
2233    fn zero_location(&mut self, size: Size, location: Location) -> Result<(), CompileError> {
2234        self.assembler.emit_mov(size, Location::Imm32(0), location)
2235    }
2236
2237    // GPR Reg used for local pointer on the stack
2238    fn local_pointer(&self) -> GPR {
2239        GPR::RBP
2240    }
2241
2242    // Determine whether a local should be allocated on the stack.
2243    fn is_local_on_stack(&self, idx: usize) -> bool {
2244        idx > 3
2245    }
2246
2247    // Determine a local's location.
2248    fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location {
2249        // Use callee-saved registers for the first locals.
2250        match idx {
2251            0 => Location::GPR(GPR::R12),
2252            1 => Location::GPR(GPR::R13),
2253            2 => Location::GPR(GPR::R14),
2254            3 => Location::GPR(GPR::RBX),
2255            _ => Location::Memory(GPR::RBP, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)),
2256        }
2257    }
2258    // Move a local to the stack
2259    fn move_local(&mut self, stack_offset: i32, location: Location) -> Result<(), CompileError> {
2260        self.assembler.emit_mov(
2261            Size::S64,
2262            location,
2263            Location::Memory(GPR::RBP, -stack_offset),
2264        )?;
2265        match location {
2266            Location::GPR(x) => self.emit_unwind_op(UnwindOps::SaveRegister {
2267                reg: UnwindRegister::GPR(x),
2268                bp_neg_offset: stack_offset,
2269            }),
2270            Location::SIMD(x) => self.emit_unwind_op(UnwindOps::SaveRegister {
2271                reg: UnwindRegister::FPR(x),
2272                bp_neg_offset: stack_offset,
2273            }),
2274            _ => Ok(()),
2275        }
2276    }
2277
2278    // List of register to save, depending on the CallingConvention
2279    fn list_to_save(&self, calling_convention: CallingConvention) -> Vec<Location> {
2280        match calling_convention {
2281            CallingConvention::WindowsFastcall => {
2282                vec![Location::GPR(GPR::RDI), Location::GPR(GPR::RSI)]
2283            }
2284            _ => vec![],
2285        }
2286    }
2287
2288    /// Get registers for first N function call parameters.
2289    fn get_param_registers(&self, calling_convention: CallingConvention) -> &'static [Self::GPR] {
2290        match calling_convention {
2291            CallingConvention::WindowsFastcall => &[GPR::RCX, GPR::RDX, GPR::R8, GPR::R9],
2292            _ => &[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9],
2293        }
2294    }
2295
2296    // Get param location
2297    fn get_param_location(
2298        &self,
2299        idx: usize,
2300        _sz: Size,
2301        stack_location: &mut usize,
2302        calling_convention: CallingConvention,
2303    ) -> Location {
2304        self.get_param_registers(calling_convention)
2305            .get(idx)
2306            .map_or_else(
2307                || {
2308                    let loc = Location::Memory(GPR::RSP, *stack_location as i32);
2309                    *stack_location += 8;
2310                    loc
2311                },
2312                |reg| Location::GPR(*reg),
2313            )
2314    }
2315    // Get call param location
2316    fn get_call_param_location(
2317        &self,
2318        return_slots: usize,
2319        idx: usize,
2320        _sz: Size,
2321        _stack_location: &mut usize,
2322        calling_convention: CallingConvention,
2323    ) -> Location {
2324        let register_params = self.get_param_registers(calling_convention);
2325        let return_values_memory_size =
2326            8 * return_slots.saturating_sub(X86_64_RETURN_VALUE_REGISTERS.len());
2327        match calling_convention {
2328            CallingConvention::WindowsFastcall => register_params.get(idx).map_or_else(
2329                || {
2330                    Location::Memory(
2331                        GPR::RBP,
2332                        (32 + 16 + return_values_memory_size + (idx - register_params.len()) * 8)
2333                            as i32,
2334                    )
2335                },
2336                |reg| Location::GPR(*reg),
2337            ),
2338            _ => register_params.get(idx).map_or_else(
2339                || {
2340                    Location::Memory(
2341                        GPR::RBP,
2342                        (16 + return_values_memory_size + (idx - register_params.len()) * 8) as i32,
2343                    )
2344                },
2345                |reg| Location::GPR(*reg),
2346            ),
2347        }
2348    }
2349
2350    fn get_simple_param_location(
2351        &self,
2352        idx: usize,
2353        calling_convention: CallingConvention,
2354    ) -> Self::GPR {
2355        self.get_param_registers(calling_convention)[idx]
2356    }
2357
2358    /// Get return value location (to build a call, using SP for stack return values).
2359    fn get_return_value_location(
2360        &self,
2361        idx: usize,
2362        stack_location: &mut usize,
2363        calling_convention: CallingConvention,
2364    ) -> Location {
2365        X86_64_RETURN_VALUE_REGISTERS.get(idx).map_or_else(
2366            || {
2367                let stack_padding = match calling_convention {
2368                    CallingConvention::WindowsFastcall => 32,
2369                    _ => 0,
2370                };
2371                let loc = Location::Memory(GPR::RSP, *stack_location as i32 + stack_padding);
2372                *stack_location += 8;
2373                loc
2374            },
2375            |reg| Location::GPR(*reg),
2376        )
2377    }
2378
2379    /// Get return value location (from a call, using FP for stack return values).
2380    fn get_call_return_value_location(
2381        &self,
2382        idx: usize,
2383        calling_convention: CallingConvention,
2384    ) -> Location {
2385        X86_64_RETURN_VALUE_REGISTERS.get(idx).map_or_else(
2386            || {
2387                let stack_padding = match calling_convention {
2388                    CallingConvention::WindowsFastcall => 32,
2389                    _ => 0,
2390                };
2391                Location::Memory(
2392                    GPR::RBP,
2393                    (16 + stack_padding + (idx - X86_64_RETURN_VALUE_REGISTERS.len()) * 8) as i32,
2394                )
2395            },
2396            |reg| Location::GPR(*reg),
2397        )
2398    }
2399
2400    // move a location to another
2401    fn move_location(
2402        &mut self,
2403        size: Size,
2404        source: Location,
2405        dest: Location,
2406    ) -> Result<(), CompileError> {
2407        match source {
2408            Location::GPR(_) => self.assembler.emit_mov(size, source, dest),
2409            Location::Memory(_, _) => match dest {
2410                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2411                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2412                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2413                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2414                    })?;
2415                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2416                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2417                }
2418                _ => codegen_error!("singlepass move_location unreachable"),
2419            },
2420            Location::Memory2(_, _, _, _) => match dest {
2421                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2422                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2423                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2424                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2425                    })?;
2426                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2427                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2428                }
2429                _ => codegen_error!("singlepass move_location unreachable"),
2430            },
2431            Location::Imm8(_) | Location::Imm32(_) | Location::Imm64(_) => match dest {
2432                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2433                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2434                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2435                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2436                    })?;
2437                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2438                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2439                }
2440                _ => codegen_error!("singlepass move_location unreachable"),
2441            },
2442            Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2443            _ => codegen_error!("singlepass move_location unreachable"),
2444        }
2445    }
2446    // move a location to another
2447    fn move_location_extend(
2448        &mut self,
2449        size_val: Size,
2450        signed: bool,
2451        source: Location,
2452        size_op: Size,
2453        dest: Location,
2454    ) -> Result<(), CompileError> {
2455        let dst = match dest {
2456            Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2457                Location::GPR(self.acquire_temp_gpr().ok_or_else(|| {
2458                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
2459                })?)
2460            }
2461            Location::GPR(_) | Location::SIMD(_) => dest,
2462            _ => codegen_error!("singlepass move_location_extend unreachable"),
2463        };
2464        match source {
2465            Location::GPR(_)
2466            | Location::Memory(_, _)
2467            | Location::Memory2(_, _, _, _)
2468            | Location::Imm32(_)
2469            | Location::Imm64(_) => match size_val {
2470                Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dst),
2471                Size::S16 | Size::S8 => {
2472                    if signed {
2473                        self.assembler.emit_movsx(size_val, source, size_op, dst)
2474                    } else {
2475                        self.assembler.emit_movzx(size_val, source, size_op, dst)
2476                    }
2477                }
2478            },
2479            _ => panic!(
2480                "unimplemented move_location_extend({size_val:?}, {signed}, {source:?}, {size_op:?}, {dest:?}"
2481            ),
2482        }?;
2483        if dst != dest {
2484            self.assembler.emit_mov(size_op, dst, dest)?;
2485            match dst {
2486                Location::GPR(x) => self.release_gpr(x),
2487                _ => codegen_error!("singlepass move_location_extend unreachable"),
2488            };
2489        }
2490        Ok(())
2491    }
2492
2493    // Init the stack loc counter
2494    fn init_stack_loc(
2495        &mut self,
2496        init_stack_loc_cnt: u64,
2497        last_stack_loc: Location,
2498    ) -> Result<(), CompileError> {
2499        // Since these assemblies take up to 24 bytes, if more than 2 slots are initialized, then they are smaller.
2500        self.assembler.emit_mov(
2501            Size::S64,
2502            Location::Imm64(init_stack_loc_cnt),
2503            Location::GPR(GPR::RCX),
2504        )?;
2505        self.assembler
2506            .emit_xor(Size::S64, Location::GPR(GPR::RAX), Location::GPR(GPR::RAX))?;
2507        self.assembler
2508            .emit_lea(Size::S64, last_stack_loc, Location::GPR(GPR::RDI))?;
2509        self.assembler.emit_rep_stosq()
2510    }
2511    // Restore save_area
2512    fn restore_saved_area(&mut self, saved_area_offset: i32) -> Result<(), CompileError> {
2513        self.assembler.emit_lea(
2514            Size::S64,
2515            Location::Memory(GPR::RBP, -saved_area_offset),
2516            Location::GPR(GPR::RSP),
2517        )
2518    }
2519    // Pop a location
2520    fn pop_location(&mut self, location: Location) -> Result<(), CompileError> {
2521        self.assembler.emit_pop(Size::S64, location)
2522    }
2523
2524    // assembler finalize
2525    fn assembler_finalize(
2526        self,
2527        assembly_comments: HashMap<usize, AssemblyComment>,
2528    ) -> Result<FinalizedAssembly, CompileError> {
2529        Ok(FinalizedAssembly {
2530            body: self.assembler.finalize().map_err(|e| {
2531                CompileError::Codegen(format!("Assembler failed finalization with: {e:?}"))
2532            })?,
2533            assembly_comments,
2534        })
2535    }
2536
2537    fn get_offset(&self) -> Offset {
2538        self.assembler.get_offset()
2539    }
2540
2541    fn finalize_function(&mut self) -> Result<(), CompileError> {
2542        self.assembler.finalize_function()?;
2543        Ok(())
2544    }
2545
2546    fn emit_function_prolog(&mut self) -> Result<(), CompileError> {
2547        self.emit_push(Size::S64, Location::GPR(GPR::RBP))?;
2548        self.emit_unwind_op(UnwindOps::PushFP { up_to_sp: 16 })?;
2549        self.move_location(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RBP))?;
2550        self.emit_unwind_op(UnwindOps::DefineNewFrame)
2551    }
2552
2553    fn emit_function_epilog(&mut self) -> Result<(), CompileError> {
2554        self.move_location(Size::S64, Location::GPR(GPR::RBP), Location::GPR(GPR::RSP))?;
2555        self.emit_pop(Size::S64, Location::GPR(GPR::RBP))
2556    }
2557
2558    fn emit_function_return_float(&mut self) -> Result<(), CompileError> {
2559        self.move_location(
2560            Size::S64,
2561            Location::GPR(GPR::RAX),
2562            Location::SIMD(XMM::XMM0),
2563        )
2564    }
2565
2566    fn canonicalize_nan(
2567        &mut self,
2568        sz: Size,
2569        input: Location,
2570        output: Location,
2571    ) -> Result<(), CompileError> {
2572        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
2573            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2574        })?;
2575        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
2576            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2577        })?;
2578        let tmp3 = self.acquire_temp_simd().ok_or_else(|| {
2579            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2580        })?;
2581
2582        self.emit_relaxed_mov(sz, input, Location::SIMD(tmp1))?;
2583        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
2584            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
2585        })?;
2586
2587        match sz {
2588            Size::S32 => {
2589                self.assembler
2590                    .emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2)?;
2591                self.move_location(
2592                    Size::S32,
2593                    Location::Imm32(0x7FC0_0000), // Canonical NaN
2594                    Location::GPR(tmpg1),
2595                )?;
2596                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp3))?;
2597                self.assembler
2598                    .emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1)?;
2599            }
2600            Size::S64 => {
2601                self.assembler
2602                    .emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2)?;
2603                self.move_location(
2604                    Size::S64,
2605                    Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
2606                    Location::GPR(tmpg1),
2607                )?;
2608                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp3))?;
2609                self.assembler
2610                    .emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1)?;
2611            }
2612            _ => codegen_error!("singlepass canonicalize_nan unreachable"),
2613        }
2614
2615        self.emit_relaxed_mov(sz, Location::SIMD(tmp1), output)?;
2616
2617        self.release_gpr(tmpg1);
2618        self.release_simd(tmp3);
2619        self.release_simd(tmp2);
2620        self.release_simd(tmp1);
2621        Ok(())
2622    }
2623
2624    fn emit_illegal_op(&mut self, trap: TrapCode) -> Result<(), CompileError> {
2625        // code below is kept as a reference on how to emit illegal op with trap info
2626        // without an Undefined opcode with payload
2627        /*
2628        let offset = self.assembler.get_offset().0;
2629        self.trap_table
2630        .offset_to_code
2631        .insert(offset, trap);
2632        self.assembler.emit_ud2();
2633        self.mark_instruction_address_end(offset);*/
2634        let v = trap as u8;
2635        // payload needs to be between 0-15
2636        // this will emit an 40 0F B9 Cx opcode, with x the payload
2637        let offset = self.assembler.get_offset().0;
2638        self.assembler.emit_ud1_payload(v)?;
2639        self.mark_instruction_address_end(offset);
2640        Ok(())
2641    }
2642    fn get_label(&mut self) -> Label {
2643        self.assembler.new_dynamic_label()
2644    }
2645    fn emit_label(&mut self, label: Label) -> Result<(), CompileError> {
2646        self.assembler.emit_label(label)
2647    }
2648    fn get_gpr_for_call(&self) -> GPR {
2649        GPR::RAX
2650    }
2651    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError> {
2652        self.assembler.emit_call_register(reg)
2653    }
2654    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError> {
2655        self.assembler.emit_call_label(label)
2656    }
2657
2658    fn arch_emit_indirect_call_with_trampoline(
2659        &mut self,
2660        location: Location,
2661    ) -> Result<(), CompileError> {
2662        self.assembler
2663            .arch_emit_indirect_call_with_trampoline(location)
2664    }
2665
2666    fn emit_debug_breakpoint(&mut self) -> Result<(), CompileError> {
2667        self.assembler.emit_bkpt()
2668    }
2669
2670    fn emit_call_location(&mut self, location: Location) -> Result<(), CompileError> {
2671        self.assembler.emit_call_location(location)
2672    }
2673    fn location_add(
2674        &mut self,
2675        size: Size,
2676        source: Location,
2677        dest: Location,
2678        _flags: bool,
2679    ) -> Result<(), CompileError> {
2680        self.assembler.emit_add(size, source, dest)
2681    }
2682    fn location_cmp(
2683        &mut self,
2684        size: Size,
2685        source: Location,
2686        dest: Location,
2687    ) -> Result<(), CompileError> {
2688        self.assembler.emit_cmp(size, source, dest)
2689    }
2690
2691    // unconditional jmp
2692    fn jmp_unconditional(&mut self, label: Label) -> Result<(), CompileError> {
2693        self.assembler.emit_jmp(Condition::None, label)
2694    }
2695
2696    fn jmp_on_condition(
2697        &mut self,
2698        cond: UnsignedCondition,
2699        size: Size,
2700        loc_a: AbstractLocation<Self::GPR, Self::SIMD>,
2701        loc_b: AbstractLocation<Self::GPR, Self::SIMD>,
2702        label: Label,
2703    ) -> Result<(), CompileError> {
2704        self.assembler.emit_cmp(size, loc_b, loc_a)?;
2705        let cond = match cond {
2706            UnsignedCondition::Equal => Condition::Equal,
2707            UnsignedCondition::NotEqual => Condition::NotEqual,
2708            UnsignedCondition::Above => Condition::Above,
2709            UnsignedCondition::AboveEqual => Condition::AboveEqual,
2710            UnsignedCondition::Below => Condition::Below,
2711            UnsignedCondition::BelowEqual => Condition::BelowEqual,
2712        };
2713        self.assembler.emit_jmp(cond, label)
2714    }
2715
2716    // jmp table
2717    fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) -> Result<(), CompileError> {
2718        let tmp1 = self
2719            .pick_temp_gpr()
2720            .ok_or_else(|| CompileError::Codegen("singlepass can't pick a temp gpr".to_owned()))?;
2721        self.reserve_gpr(tmp1);
2722        let tmp2 = self
2723            .pick_temp_gpr()
2724            .ok_or_else(|| CompileError::Codegen("singlepass can't pick a temp gpr".to_owned()))?;
2725        self.reserve_gpr(tmp2);
2726
2727        self.assembler.emit_lea_label(label, Location::GPR(tmp1))?;
2728        self.move_location(Size::S32, cond, Location::GPR(tmp2))?;
2729
2730        let instr_size = self.assembler.get_jmp_instr_size();
2731        self.assembler
2732            .emit_imul_imm32_gpr64(instr_size as _, tmp2)?;
2733        self.assembler
2734            .emit_add(Size::S64, Location::GPR(tmp1), Location::GPR(tmp2))?;
2735        self.assembler.emit_jmp_location(Location::GPR(tmp2))?;
2736        self.release_gpr(tmp2);
2737        self.release_gpr(tmp1);
2738        Ok(())
2739    }
2740
2741    fn align_for_loop(&mut self) -> Result<(), CompileError> {
2742        // Pad with NOPs to the next 16-byte boundary.
2743        // Here we don't use the dynasm `.align 16` attribute because it pads the alignment with single-byte nops
2744        // which may lead to efficiency problems.
2745        match self.assembler.get_offset().0 % 16 {
2746            0 => {}
2747            x => {
2748                self.assembler.emit_nop_n(16 - x)?;
2749            }
2750        }
2751        assert_eq!(self.assembler.get_offset().0 % 16, 0);
2752        Ok(())
2753    }
2754
2755    fn emit_ret(&mut self) -> Result<(), CompileError> {
2756        self.assembler.emit_ret()
2757    }
2758
2759    fn emit_push(&mut self, size: Size, loc: Location) -> Result<(), CompileError> {
2760        self.assembler.emit_push(size, loc)
2761    }
2762    fn emit_pop(&mut self, size: Size, loc: Location) -> Result<(), CompileError> {
2763        self.assembler.emit_pop(size, loc)
2764    }
2765
2766    fn emit_memory_fence(&mut self) -> Result<(), CompileError> {
2767        // nothing on x86_64
2768        Ok(())
2769    }
2770
2771    fn emit_imul_imm32(&mut self, size: Size, imm32: u32, gpr: GPR) -> Result<(), CompileError> {
2772        match size {
2773            Size::S64 => self.assembler.emit_imul_imm32_gpr64(imm32, gpr),
2774            _ => {
2775                codegen_error!("singlepass emit_imul_imm32 unreachable");
2776            }
2777        }
2778    }
2779
2780    // relaxed binop based...
2781    fn emit_relaxed_mov(
2782        &mut self,
2783        sz: Size,
2784        src: Location,
2785        dst: Location,
2786    ) -> Result<(), CompileError> {
2787        self.emit_relaxed_binop(AssemblerX64::emit_mov, sz, src, dst)
2788    }
2789    fn emit_relaxed_cmp(
2790        &mut self,
2791        sz: Size,
2792        src: Location,
2793        dst: Location,
2794    ) -> Result<(), CompileError> {
2795        self.emit_relaxed_binop(AssemblerX64::emit_cmp, sz, src, dst)
2796    }
2797
2798    fn emit_relaxed_sign_extension(
2799        &mut self,
2800        sz_src: Size,
2801        src: Location,
2802        sz_dst: Size,
2803        dst: Location,
2804    ) -> Result<(), CompileError> {
2805        self.emit_relaxed_zx_sx(AssemblerX64::emit_movsx, sz_src, src, sz_dst, dst)
2806    }
2807
2808    fn emit_binop_add32(
2809        &mut self,
2810        loc_a: Location,
2811        loc_b: Location,
2812        ret: Location,
2813    ) -> Result<(), CompileError> {
2814        self.emit_binop_i32(AssemblerX64::emit_add, loc_a, loc_b, ret)
2815    }
2816    fn emit_binop_sub32(
2817        &mut self,
2818        loc_a: Location,
2819        loc_b: Location,
2820        ret: Location,
2821    ) -> Result<(), CompileError> {
2822        self.emit_binop_i32(AssemblerX64::emit_sub, loc_a, loc_b, ret)
2823    }
2824    fn emit_binop_mul32(
2825        &mut self,
2826        loc_a: Location,
2827        loc_b: Location,
2828        ret: Location,
2829    ) -> Result<(), CompileError> {
2830        self.emit_binop_i32(AssemblerX64::emit_imul, loc_a, loc_b, ret)
2831    }
2832    fn emit_binop_udiv32(
2833        &mut self,
2834        loc_a: Location,
2835        loc_b: Location,
2836        ret: Location,
2837        integer_division_by_zero: Label,
2838    ) -> Result<usize, CompileError> {
2839        // We assume that RAX and RDX are temporary registers here.
2840        self.assembler
2841            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2842        self.assembler
2843            .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
2844        let offset = self.emit_relaxed_xdiv(
2845            AssemblerX64::emit_div,
2846            Size::S32,
2847            loc_b,
2848            integer_division_by_zero,
2849        )?;
2850        self.assembler
2851            .emit_mov(Size::S32, Location::GPR(GPR::RAX), ret)?;
2852        Ok(offset)
2853    }
2854    fn emit_binop_sdiv32(
2855        &mut self,
2856        loc_a: Location,
2857        loc_b: Location,
2858        ret: Location,
2859        integer_division_by_zero: Label,
2860        _integer_overflow: Label,
2861    ) -> Result<usize, CompileError> {
2862        // We assume that RAX and RDX are temporary registers here.
2863        self.assembler
2864            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2865        self.assembler.emit_cdq()?;
2866        let offset = self.emit_relaxed_xdiv(
2867            AssemblerX64::emit_idiv,
2868            Size::S32,
2869            loc_b,
2870            integer_division_by_zero,
2871        )?;
2872        self.assembler
2873            .emit_mov(Size::S32, Location::GPR(GPR::RAX), ret)?;
2874        Ok(offset)
2875    }
2876    fn emit_binop_urem32(
2877        &mut self,
2878        loc_a: Location,
2879        loc_b: Location,
2880        ret: Location,
2881        integer_division_by_zero: Label,
2882    ) -> Result<usize, CompileError> {
2883        // We assume that RAX and RDX are temporary registers here.
2884        self.assembler
2885            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2886        self.assembler
2887            .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
2888        let offset = self.emit_relaxed_xdiv(
2889            AssemblerX64::emit_div,
2890            Size::S32,
2891            loc_b,
2892            integer_division_by_zero,
2893        )?;
2894        self.assembler
2895            .emit_mov(Size::S32, Location::GPR(GPR::RDX), ret)?;
2896        Ok(offset)
2897    }
2898    fn emit_binop_srem32(
2899        &mut self,
2900        loc_a: Location,
2901        loc_b: Location,
2902        ret: Location,
2903        integer_division_by_zero: Label,
2904    ) -> Result<usize, CompileError> {
2905        // We assume that RAX and RDX are temporary registers here.
2906        let normal_path = self.assembler.get_label();
2907        let end = self.assembler.get_label();
2908
2909        self.emit_relaxed_cmp(Size::S32, Location::Imm32(0x80000000), loc_a)?;
2910        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
2911        self.emit_relaxed_cmp(Size::S32, Location::Imm32(0xffffffff), loc_b)?;
2912        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
2913        self.move_location(Size::S32, Location::Imm32(0), ret)?;
2914        self.assembler.emit_jmp(Condition::None, end)?;
2915
2916        self.emit_label(normal_path)?;
2917        self.assembler
2918            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2919        self.assembler.emit_cdq()?;
2920        let offset = self.emit_relaxed_xdiv(
2921            AssemblerX64::emit_idiv,
2922            Size::S32,
2923            loc_b,
2924            integer_division_by_zero,
2925        )?;
2926        self.assembler
2927            .emit_mov(Size::S32, Location::GPR(GPR::RDX), ret)?;
2928
2929        self.emit_label(end)?;
2930        Ok(offset)
2931    }
2932    fn emit_binop_and32(
2933        &mut self,
2934        loc_a: Location,
2935        loc_b: Location,
2936        ret: Location,
2937    ) -> Result<(), CompileError> {
2938        self.emit_binop_i32(AssemblerX64::emit_and, loc_a, loc_b, ret)
2939    }
2940    fn emit_binop_or32(
2941        &mut self,
2942        loc_a: Location,
2943        loc_b: Location,
2944        ret: Location,
2945    ) -> Result<(), CompileError> {
2946        self.emit_binop_i32(AssemblerX64::emit_or, loc_a, loc_b, ret)
2947    }
2948    fn emit_binop_xor32(
2949        &mut self,
2950        loc_a: Location,
2951        loc_b: Location,
2952        ret: Location,
2953    ) -> Result<(), CompileError> {
2954        self.emit_binop_i32(AssemblerX64::emit_xor, loc_a, loc_b, ret)
2955    }
2956    fn i32_cmp_ge_s(
2957        &mut self,
2958        loc_a: Location,
2959        loc_b: Location,
2960        ret: Location,
2961    ) -> Result<(), CompileError> {
2962        self.emit_cmpop_i32_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret)
2963    }
2964    fn i32_cmp_gt_s(
2965        &mut self,
2966        loc_a: Location,
2967        loc_b: Location,
2968        ret: Location,
2969    ) -> Result<(), CompileError> {
2970        self.emit_cmpop_i32_dynamic_b(Condition::Greater, loc_a, loc_b, ret)
2971    }
2972    fn i32_cmp_le_s(
2973        &mut self,
2974        loc_a: Location,
2975        loc_b: Location,
2976        ret: Location,
2977    ) -> Result<(), CompileError> {
2978        self.emit_cmpop_i32_dynamic_b(Condition::LessEqual, loc_a, loc_b, ret)
2979    }
2980    fn i32_cmp_lt_s(
2981        &mut self,
2982        loc_a: Location,
2983        loc_b: Location,
2984        ret: Location,
2985    ) -> Result<(), CompileError> {
2986        self.emit_cmpop_i32_dynamic_b(Condition::Less, loc_a, loc_b, ret)
2987    }
2988    fn i32_cmp_ge_u(
2989        &mut self,
2990        loc_a: Location,
2991        loc_b: Location,
2992        ret: Location,
2993    ) -> Result<(), CompileError> {
2994        self.emit_cmpop_i32_dynamic_b(Condition::AboveEqual, loc_a, loc_b, ret)
2995    }
2996    fn i32_cmp_gt_u(
2997        &mut self,
2998        loc_a: Location,
2999        loc_b: Location,
3000        ret: Location,
3001    ) -> Result<(), CompileError> {
3002        self.emit_cmpop_i32_dynamic_b(Condition::Above, loc_a, loc_b, ret)
3003    }
3004    fn i32_cmp_le_u(
3005        &mut self,
3006        loc_a: Location,
3007        loc_b: Location,
3008        ret: Location,
3009    ) -> Result<(), CompileError> {
3010        self.emit_cmpop_i32_dynamic_b(Condition::BelowEqual, loc_a, loc_b, ret)
3011    }
3012    fn i32_cmp_lt_u(
3013        &mut self,
3014        loc_a: Location,
3015        loc_b: Location,
3016        ret: Location,
3017    ) -> Result<(), CompileError> {
3018        self.emit_cmpop_i32_dynamic_b(Condition::Below, loc_a, loc_b, ret)
3019    }
3020    fn i32_cmp_ne(
3021        &mut self,
3022        loc_a: Location,
3023        loc_b: Location,
3024        ret: Location,
3025    ) -> Result<(), CompileError> {
3026        self.emit_cmpop_i32_dynamic_b(Condition::NotEqual, loc_a, loc_b, ret)
3027    }
3028    fn i32_cmp_eq(
3029        &mut self,
3030        loc_a: Location,
3031        loc_b: Location,
3032        ret: Location,
3033    ) -> Result<(), CompileError> {
3034        self.emit_cmpop_i32_dynamic_b(Condition::Equal, loc_a, loc_b, ret)
3035    }
3036    fn i32_clz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3037        let src = match loc {
3038            Location::Imm32(_) | Location::Memory(_, _) => {
3039                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3040                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3041                })?;
3042                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3043                tmp
3044            }
3045            Location::GPR(reg) => reg,
3046            _ => {
3047                codegen_error!("singlepass i32_clz unreachable");
3048            }
3049        };
3050        let dst = match ret {
3051            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
3052                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3053            })?,
3054            Location::GPR(reg) => reg,
3055            _ => {
3056                codegen_error!("singlepass i32_clz unreachable");
3057            }
3058        };
3059
3060        if self.assembler.arch_has_xzcnt() {
3061            self.assembler
3062                .arch_emit_lzcnt(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3063        } else {
3064            let zero_path = self.assembler.get_label();
3065            let end = self.assembler.get_label();
3066
3067            self.assembler.emit_test_gpr_64(src)?;
3068            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
3069            self.assembler
3070                .emit_bsr(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3071            self.assembler
3072                .emit_xor(Size::S32, Location::Imm32(31), Location::GPR(dst))?;
3073            self.assembler.emit_jmp(Condition::None, end)?;
3074            self.emit_label(zero_path)?;
3075            self.move_location(Size::S32, Location::Imm32(32), Location::GPR(dst))?;
3076            self.emit_label(end)?;
3077        }
3078        match loc {
3079            Location::Imm32(_) | Location::Memory(_, _) => {
3080                self.release_gpr(src);
3081            }
3082            _ => {}
3083        };
3084        if let Location::Memory(_, _) = ret {
3085            self.move_location(Size::S32, Location::GPR(dst), ret)?;
3086            self.release_gpr(dst);
3087        };
3088        Ok(())
3089    }
3090    fn i32_ctz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3091        let src = match loc {
3092            Location::Imm32(_) | Location::Memory(_, _) => {
3093                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3094                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3095                })?;
3096                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3097                tmp
3098            }
3099            Location::GPR(reg) => reg,
3100            _ => {
3101                codegen_error!("singlepass i32_ctz unreachable");
3102            }
3103        };
3104        let dst = match ret {
3105            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
3106                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3107            })?,
3108            Location::GPR(reg) => reg,
3109            _ => {
3110                codegen_error!("singlepass i32_ctz unreachable");
3111            }
3112        };
3113
3114        if self.assembler.arch_has_xzcnt() {
3115            self.assembler
3116                .arch_emit_tzcnt(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3117        } else {
3118            let zero_path = self.assembler.get_label();
3119            let end = self.assembler.get_label();
3120
3121            self.assembler.emit_test_gpr_64(src)?;
3122            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
3123            self.assembler
3124                .emit_bsf(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3125            self.assembler.emit_jmp(Condition::None, end)?;
3126            self.emit_label(zero_path)?;
3127            self.move_location(Size::S32, Location::Imm32(32), Location::GPR(dst))?;
3128            self.emit_label(end)?;
3129        }
3130
3131        match loc {
3132            Location::Imm32(_) | Location::Memory(_, _) => {
3133                self.release_gpr(src);
3134            }
3135            _ => {}
3136        };
3137        if let Location::Memory(_, _) = ret {
3138            self.move_location(Size::S32, Location::GPR(dst), ret)?;
3139            self.release_gpr(dst);
3140        };
3141        Ok(())
3142    }
3143    fn i32_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3144        match loc {
3145            Location::Imm32(_) => {
3146                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3147                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3148                })?;
3149                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3150                if let Location::Memory(_, _) = ret {
3151                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
3152                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3153                    })?;
3154                    self.assembler.emit_popcnt(
3155                        Size::S32,
3156                        Location::GPR(tmp),
3157                        Location::GPR(out_tmp),
3158                    )?;
3159                    self.move_location(Size::S32, Location::GPR(out_tmp), ret)?;
3160                    self.release_gpr(out_tmp);
3161                } else {
3162                    self.assembler
3163                        .emit_popcnt(Size::S32, Location::GPR(tmp), ret)?;
3164                }
3165                self.release_gpr(tmp);
3166            }
3167            Location::Memory(_, _) | Location::GPR(_) => {
3168                if let Location::Memory(_, _) = ret {
3169                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
3170                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3171                    })?;
3172                    self.assembler
3173                        .emit_popcnt(Size::S32, loc, Location::GPR(out_tmp))?;
3174                    self.move_location(Size::S32, Location::GPR(out_tmp), ret)?;
3175                    self.release_gpr(out_tmp);
3176                } else {
3177                    self.assembler.emit_popcnt(Size::S32, loc, ret)?;
3178                }
3179            }
3180            _ => {
3181                codegen_error!("singlepass i32_popcnt unreachable");
3182            }
3183        }
3184        Ok(())
3185    }
3186    fn i32_shl(
3187        &mut self,
3188        loc_a: Location,
3189        loc_b: Location,
3190        ret: Location,
3191    ) -> Result<(), CompileError> {
3192        self.emit_shift_i32(AssemblerX64::emit_shl, loc_a, loc_b, ret)
3193    }
3194    fn i32_shr(
3195        &mut self,
3196        loc_a: Location,
3197        loc_b: Location,
3198        ret: Location,
3199    ) -> Result<(), CompileError> {
3200        self.emit_shift_i32(AssemblerX64::emit_shr, loc_a, loc_b, ret)
3201    }
3202    fn i32_sar(
3203        &mut self,
3204        loc_a: Location,
3205        loc_b: Location,
3206        ret: Location,
3207    ) -> Result<(), CompileError> {
3208        self.emit_shift_i32(AssemblerX64::emit_sar, loc_a, loc_b, ret)
3209    }
3210    fn i32_rol(
3211        &mut self,
3212        loc_a: Location,
3213        loc_b: Location,
3214        ret: Location,
3215    ) -> Result<(), CompileError> {
3216        self.emit_shift_i32(AssemblerX64::emit_rol, loc_a, loc_b, ret)
3217    }
3218    fn i32_ror(
3219        &mut self,
3220        loc_a: Location,
3221        loc_b: Location,
3222        ret: Location,
3223    ) -> Result<(), CompileError> {
3224        self.emit_shift_i32(AssemblerX64::emit_ror, loc_a, loc_b, ret)
3225    }
3226    fn i32_load(
3227        &mut self,
3228        addr: Location,
3229        memarg: &MemArg,
3230        ret: Location,
3231        need_check: bool,
3232        imported_memories: bool,
3233        offset: i32,
3234        heap_access_oob: Label,
3235        unaligned_atomic: Label,
3236    ) -> Result<(), CompileError> {
3237        self.memory_op(
3238            addr,
3239            memarg,
3240            false,
3241            4,
3242            need_check,
3243            imported_memories,
3244            offset,
3245            heap_access_oob,
3246            unaligned_atomic,
3247            |this, addr| {
3248                this.emit_relaxed_binop(
3249                    AssemblerX64::emit_mov,
3250                    Size::S32,
3251                    Location::Memory(addr, 0),
3252                    ret,
3253                )
3254            },
3255        )
3256    }
3257    fn i32_load_8u(
3258        &mut self,
3259        addr: Location,
3260        memarg: &MemArg,
3261        ret: Location,
3262        need_check: bool,
3263        imported_memories: bool,
3264        offset: i32,
3265        heap_access_oob: Label,
3266        unaligned_atomic: Label,
3267    ) -> Result<(), CompileError> {
3268        self.memory_op(
3269            addr,
3270            memarg,
3271            false,
3272            1,
3273            need_check,
3274            imported_memories,
3275            offset,
3276            heap_access_oob,
3277            unaligned_atomic,
3278            |this, addr| {
3279                this.emit_relaxed_zx_sx(
3280                    AssemblerX64::emit_movzx,
3281                    Size::S8,
3282                    Location::Memory(addr, 0),
3283                    Size::S32,
3284                    ret,
3285                )
3286            },
3287        )
3288    }
3289    fn i32_load_8s(
3290        &mut self,
3291        addr: Location,
3292        memarg: &MemArg,
3293        ret: Location,
3294        need_check: bool,
3295        imported_memories: bool,
3296        offset: i32,
3297        heap_access_oob: Label,
3298        unaligned_atomic: Label,
3299    ) -> Result<(), CompileError> {
3300        self.memory_op(
3301            addr,
3302            memarg,
3303            false,
3304            1,
3305            need_check,
3306            imported_memories,
3307            offset,
3308            heap_access_oob,
3309            unaligned_atomic,
3310            |this, addr| {
3311                this.emit_relaxed_zx_sx(
3312                    AssemblerX64::emit_movsx,
3313                    Size::S8,
3314                    Location::Memory(addr, 0),
3315                    Size::S32,
3316                    ret,
3317                )
3318            },
3319        )
3320    }
3321    fn i32_load_16u(
3322        &mut self,
3323        addr: Location,
3324        memarg: &MemArg,
3325        ret: Location,
3326        need_check: bool,
3327        imported_memories: bool,
3328        offset: i32,
3329        heap_access_oob: Label,
3330        unaligned_atomic: Label,
3331    ) -> Result<(), CompileError> {
3332        self.memory_op(
3333            addr,
3334            memarg,
3335            false,
3336            2,
3337            need_check,
3338            imported_memories,
3339            offset,
3340            heap_access_oob,
3341            unaligned_atomic,
3342            |this, addr| {
3343                this.emit_relaxed_zx_sx(
3344                    AssemblerX64::emit_movzx,
3345                    Size::S16,
3346                    Location::Memory(addr, 0),
3347                    Size::S32,
3348                    ret,
3349                )
3350            },
3351        )
3352    }
3353    fn i32_load_16s(
3354        &mut self,
3355        addr: Location,
3356        memarg: &MemArg,
3357        ret: Location,
3358        need_check: bool,
3359        imported_memories: bool,
3360        offset: i32,
3361        heap_access_oob: Label,
3362        unaligned_atomic: Label,
3363    ) -> Result<(), CompileError> {
3364        self.memory_op(
3365            addr,
3366            memarg,
3367            false,
3368            2,
3369            need_check,
3370            imported_memories,
3371            offset,
3372            heap_access_oob,
3373            unaligned_atomic,
3374            |this, addr| {
3375                this.emit_relaxed_zx_sx(
3376                    AssemblerX64::emit_movsx,
3377                    Size::S16,
3378                    Location::Memory(addr, 0),
3379                    Size::S32,
3380                    ret,
3381                )
3382            },
3383        )
3384    }
3385    fn i32_atomic_load(
3386        &mut self,
3387        addr: Location,
3388        memarg: &MemArg,
3389        ret: Location,
3390        need_check: bool,
3391        imported_memories: bool,
3392        offset: i32,
3393        heap_access_oob: Label,
3394        unaligned_atomic: Label,
3395    ) -> Result<(), CompileError> {
3396        self.memory_op(
3397            addr,
3398            memarg,
3399            true,
3400            4,
3401            need_check,
3402            imported_memories,
3403            offset,
3404            heap_access_oob,
3405            unaligned_atomic,
3406            |this, addr| this.emit_relaxed_mov(Size::S32, Location::Memory(addr, 0), ret),
3407        )
3408    }
3409    fn i32_atomic_load_8u(
3410        &mut self,
3411        addr: Location,
3412        memarg: &MemArg,
3413        ret: Location,
3414        need_check: bool,
3415        imported_memories: bool,
3416        offset: i32,
3417        heap_access_oob: Label,
3418        unaligned_atomic: Label,
3419    ) -> Result<(), CompileError> {
3420        self.memory_op(
3421            addr,
3422            memarg,
3423            true,
3424            1,
3425            need_check,
3426            imported_memories,
3427            offset,
3428            heap_access_oob,
3429            unaligned_atomic,
3430            |this, addr| {
3431                this.emit_relaxed_zero_extension(
3432                    Size::S8,
3433                    Location::Memory(addr, 0),
3434                    Size::S32,
3435                    ret,
3436                )
3437            },
3438        )
3439    }
3440    fn i32_atomic_load_16u(
3441        &mut self,
3442        addr: Location,
3443        memarg: &MemArg,
3444        ret: Location,
3445        need_check: bool,
3446        imported_memories: bool,
3447        offset: i32,
3448        heap_access_oob: Label,
3449        unaligned_atomic: Label,
3450    ) -> Result<(), CompileError> {
3451        self.memory_op(
3452            addr,
3453            memarg,
3454            true,
3455            2,
3456            need_check,
3457            imported_memories,
3458            offset,
3459            heap_access_oob,
3460            unaligned_atomic,
3461            |this, addr| {
3462                this.emit_relaxed_zero_extension(
3463                    Size::S16,
3464                    Location::Memory(addr, 0),
3465                    Size::S32,
3466                    ret,
3467                )
3468            },
3469        )
3470    }
3471    fn i32_save(
3472        &mut self,
3473        target_value: Location,
3474        memarg: &MemArg,
3475        target_addr: Location,
3476        need_check: bool,
3477        imported_memories: bool,
3478        offset: i32,
3479        heap_access_oob: Label,
3480        unaligned_atomic: Label,
3481    ) -> Result<(), CompileError> {
3482        self.memory_op(
3483            target_addr,
3484            memarg,
3485            false,
3486            4,
3487            need_check,
3488            imported_memories,
3489            offset,
3490            heap_access_oob,
3491            unaligned_atomic,
3492            |this, addr| {
3493                this.emit_relaxed_binop(
3494                    AssemblerX64::emit_mov,
3495                    Size::S32,
3496                    target_value,
3497                    Location::Memory(addr, 0),
3498                )
3499            },
3500        )
3501    }
3502    fn i32_save_8(
3503        &mut self,
3504        target_value: Location,
3505        memarg: &MemArg,
3506        target_addr: Location,
3507        need_check: bool,
3508        imported_memories: bool,
3509        offset: i32,
3510        heap_access_oob: Label,
3511        unaligned_atomic: Label,
3512    ) -> Result<(), CompileError> {
3513        self.memory_op(
3514            target_addr,
3515            memarg,
3516            false,
3517            1,
3518            need_check,
3519            imported_memories,
3520            offset,
3521            heap_access_oob,
3522            unaligned_atomic,
3523            |this, addr| {
3524                this.emit_relaxed_binop(
3525                    AssemblerX64::emit_mov,
3526                    Size::S8,
3527                    target_value,
3528                    Location::Memory(addr, 0),
3529                )
3530            },
3531        )
3532    }
3533    fn i32_save_16(
3534        &mut self,
3535        target_value: Location,
3536        memarg: &MemArg,
3537        target_addr: Location,
3538        need_check: bool,
3539        imported_memories: bool,
3540        offset: i32,
3541        heap_access_oob: Label,
3542        unaligned_atomic: Label,
3543    ) -> Result<(), CompileError> {
3544        self.memory_op(
3545            target_addr,
3546            memarg,
3547            false,
3548            2,
3549            need_check,
3550            imported_memories,
3551            offset,
3552            heap_access_oob,
3553            unaligned_atomic,
3554            |this, addr| {
3555                this.emit_relaxed_binop(
3556                    AssemblerX64::emit_mov,
3557                    Size::S16,
3558                    target_value,
3559                    Location::Memory(addr, 0),
3560                )
3561            },
3562        )
3563    }
3564    // x86_64 have a strong memory model, so coherency between all threads (core) is garantied
3565    // and aligned move is guarantied to be atomic, too or from memory
3566    // so store/load an atomic is a simple mov on x86_64
3567    fn i32_atomic_save(
3568        &mut self,
3569        value: Location,
3570        memarg: &MemArg,
3571        target_addr: Location,
3572        need_check: bool,
3573        imported_memories: bool,
3574        offset: i32,
3575        heap_access_oob: Label,
3576        unaligned_atomic: Label,
3577    ) -> Result<(), CompileError> {
3578        self.memory_op(
3579            target_addr,
3580            memarg,
3581            true,
3582            4,
3583            need_check,
3584            imported_memories,
3585            offset,
3586            heap_access_oob,
3587            unaligned_atomic,
3588            |this, addr| {
3589                this.emit_relaxed_binop(
3590                    AssemblerX64::emit_mov,
3591                    Size::S32,
3592                    value,
3593                    Location::Memory(addr, 0),
3594                )
3595            },
3596        )
3597    }
3598    fn i32_atomic_save_8(
3599        &mut self,
3600        value: Location,
3601        memarg: &MemArg,
3602        target_addr: Location,
3603        need_check: bool,
3604        imported_memories: bool,
3605        offset: i32,
3606        heap_access_oob: Label,
3607        unaligned_atomic: Label,
3608    ) -> Result<(), CompileError> {
3609        self.memory_op(
3610            target_addr,
3611            memarg,
3612            true,
3613            1,
3614            need_check,
3615            imported_memories,
3616            offset,
3617            heap_access_oob,
3618            unaligned_atomic,
3619            |this, addr| {
3620                this.emit_relaxed_binop(
3621                    AssemblerX64::emit_mov,
3622                    Size::S8,
3623                    value,
3624                    Location::Memory(addr, 0),
3625                )
3626            },
3627        )
3628    }
3629    fn i32_atomic_save_16(
3630        &mut self,
3631        value: Location,
3632        memarg: &MemArg,
3633        target_addr: Location,
3634        need_check: bool,
3635        imported_memories: bool,
3636        offset: i32,
3637        heap_access_oob: Label,
3638        unaligned_atomic: Label,
3639    ) -> Result<(), CompileError> {
3640        self.memory_op(
3641            target_addr,
3642            memarg,
3643            true,
3644            2,
3645            need_check,
3646            imported_memories,
3647            offset,
3648            heap_access_oob,
3649            unaligned_atomic,
3650            |this, addr| {
3651                this.emit_relaxed_binop(
3652                    AssemblerX64::emit_mov,
3653                    Size::S16,
3654                    value,
3655                    Location::Memory(addr, 0),
3656                )
3657            },
3658        )
3659    }
3660    // i32 atomic Add with i32
3661    fn i32_atomic_add(
3662        &mut self,
3663        loc: Location,
3664        target: Location,
3665        memarg: &MemArg,
3666        ret: Location,
3667        need_check: bool,
3668        imported_memories: bool,
3669        offset: i32,
3670        heap_access_oob: Label,
3671        unaligned_atomic: Label,
3672    ) -> Result<(), CompileError> {
3673        let value = self.acquire_temp_gpr().ok_or_else(|| {
3674            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3675        })?;
3676        self.move_location(Size::S32, loc, Location::GPR(value))?;
3677        self.memory_op(
3678            target,
3679            memarg,
3680            true,
3681            4,
3682            need_check,
3683            imported_memories,
3684            offset,
3685            heap_access_oob,
3686            unaligned_atomic,
3687            |this, addr| {
3688                this.assembler.emit_lock_xadd(
3689                    Size::S32,
3690                    Location::GPR(value),
3691                    Location::Memory(addr, 0),
3692                )
3693            },
3694        )?;
3695        self.move_location(Size::S32, Location::GPR(value), ret)?;
3696        self.release_gpr(value);
3697        Ok(())
3698    }
3699    // i32 atomic Add with u8
3700    fn i32_atomic_add_8u(
3701        &mut self,
3702        loc: Location,
3703        target: Location,
3704        memarg: &MemArg,
3705        ret: Location,
3706        need_check: bool,
3707        imported_memories: bool,
3708        offset: i32,
3709        heap_access_oob: Label,
3710        unaligned_atomic: Label,
3711    ) -> Result<(), CompileError> {
3712        let value = self.acquire_temp_gpr().ok_or_else(|| {
3713            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3714        })?;
3715        self.move_location_extend(Size::S8, false, loc, Size::S32, Location::GPR(value))?;
3716        self.memory_op(
3717            target,
3718            memarg,
3719            true,
3720            1,
3721            need_check,
3722            imported_memories,
3723            offset,
3724            heap_access_oob,
3725            unaligned_atomic,
3726            |this, addr| {
3727                this.assembler.emit_lock_xadd(
3728                    Size::S8,
3729                    Location::GPR(value),
3730                    Location::Memory(addr, 0),
3731                )
3732            },
3733        )?;
3734        self.move_location(Size::S32, Location::GPR(value), ret)?;
3735        self.release_gpr(value);
3736        Ok(())
3737    }
3738    // i32 atomic Add with u16
3739    fn i32_atomic_add_16u(
3740        &mut self,
3741        loc: Location,
3742        target: Location,
3743        memarg: &MemArg,
3744        ret: Location,
3745        need_check: bool,
3746        imported_memories: bool,
3747        offset: i32,
3748        heap_access_oob: Label,
3749        unaligned_atomic: Label,
3750    ) -> Result<(), CompileError> {
3751        let value = self.acquire_temp_gpr().ok_or_else(|| {
3752            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3753        })?;
3754        self.move_location_extend(Size::S16, false, loc, Size::S32, Location::GPR(value))?;
3755        self.memory_op(
3756            target,
3757            memarg,
3758            true,
3759            2,
3760            need_check,
3761            imported_memories,
3762            offset,
3763            heap_access_oob,
3764            unaligned_atomic,
3765            |this, addr| {
3766                this.assembler.emit_lock_xadd(
3767                    Size::S16,
3768                    Location::GPR(value),
3769                    Location::Memory(addr, 0),
3770                )
3771            },
3772        )?;
3773        self.move_location(Size::S32, Location::GPR(value), ret)?;
3774        self.release_gpr(value);
3775        Ok(())
3776    }
3777    // i32 atomic Sub with i32
3778    fn i32_atomic_sub(
3779        &mut self,
3780        loc: Location,
3781        target: Location,
3782        memarg: &MemArg,
3783        ret: Location,
3784        need_check: bool,
3785        imported_memories: bool,
3786        offset: i32,
3787        heap_access_oob: Label,
3788        unaligned_atomic: Label,
3789    ) -> Result<(), CompileError> {
3790        let value = self.acquire_temp_gpr().ok_or_else(|| {
3791            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3792        })?;
3793        self.location_neg(Size::S32, false, loc, Size::S32, Location::GPR(value))?;
3794        self.memory_op(
3795            target,
3796            memarg,
3797            true,
3798            4,
3799            need_check,
3800            imported_memories,
3801            offset,
3802            heap_access_oob,
3803            unaligned_atomic,
3804            |this, addr| {
3805                this.assembler.emit_lock_xadd(
3806                    Size::S32,
3807                    Location::GPR(value),
3808                    Location::Memory(addr, 0),
3809                )
3810            },
3811        )?;
3812        self.move_location(Size::S32, Location::GPR(value), ret)?;
3813        self.release_gpr(value);
3814        Ok(())
3815    }
3816    // i32 atomic Sub with u8
3817    fn i32_atomic_sub_8u(
3818        &mut self,
3819        loc: Location,
3820        target: Location,
3821        memarg: &MemArg,
3822        ret: Location,
3823        need_check: bool,
3824        imported_memories: bool,
3825        offset: i32,
3826        heap_access_oob: Label,
3827        unaligned_atomic: Label,
3828    ) -> Result<(), CompileError> {
3829        let value = self.acquire_temp_gpr().ok_or_else(|| {
3830            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3831        })?;
3832        self.location_neg(Size::S8, false, loc, Size::S32, Location::GPR(value))?;
3833        self.memory_op(
3834            target,
3835            memarg,
3836            true,
3837            1,
3838            need_check,
3839            imported_memories,
3840            offset,
3841            heap_access_oob,
3842            unaligned_atomic,
3843            |this, addr| {
3844                this.assembler.emit_lock_xadd(
3845                    Size::S8,
3846                    Location::GPR(value),
3847                    Location::Memory(addr, 0),
3848                )
3849            },
3850        )?;
3851        self.move_location(Size::S32, Location::GPR(value), ret)?;
3852        self.release_gpr(value);
3853        Ok(())
3854    }
3855    // i32 atomic Sub with u16
3856    fn i32_atomic_sub_16u(
3857        &mut self,
3858        loc: Location,
3859        target: Location,
3860        memarg: &MemArg,
3861        ret: Location,
3862        need_check: bool,
3863        imported_memories: bool,
3864        offset: i32,
3865        heap_access_oob: Label,
3866        unaligned_atomic: Label,
3867    ) -> Result<(), CompileError> {
3868        let value = self.acquire_temp_gpr().ok_or_else(|| {
3869            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3870        })?;
3871        self.location_neg(Size::S16, false, loc, Size::S32, Location::GPR(value))?;
3872        self.memory_op(
3873            target,
3874            memarg,
3875            true,
3876            2,
3877            need_check,
3878            imported_memories,
3879            offset,
3880            heap_access_oob,
3881            unaligned_atomic,
3882            |this, addr| {
3883                this.assembler.emit_lock_xadd(
3884                    Size::S16,
3885                    Location::GPR(value),
3886                    Location::Memory(addr, 0),
3887                )
3888            },
3889        )?;
3890        self.move_location(Size::S32, Location::GPR(value), ret)?;
3891        self.release_gpr(value);
3892        Ok(())
3893    }
3894    // i32 atomic And with i32
3895    fn i32_atomic_and(
3896        &mut self,
3897        loc: Location,
3898        target: Location,
3899        memarg: &MemArg,
3900        ret: Location,
3901        need_check: bool,
3902        imported_memories: bool,
3903        offset: i32,
3904        heap_access_oob: Label,
3905        unaligned_atomic: Label,
3906    ) -> Result<(), CompileError> {
3907        self.emit_compare_and_swap(
3908            loc,
3909            target,
3910            ret,
3911            memarg,
3912            4,
3913            Size::S32,
3914            Size::S32,
3915            need_check,
3916            imported_memories,
3917            offset,
3918            heap_access_oob,
3919            unaligned_atomic,
3920            |this, src, dst| {
3921                this.assembler
3922                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
3923            },
3924        )
3925    }
3926    // i32 atomic And with u8
3927    fn i32_atomic_and_8u(
3928        &mut self,
3929        loc: Location,
3930        target: Location,
3931        memarg: &MemArg,
3932        ret: Location,
3933        need_check: bool,
3934        imported_memories: bool,
3935        offset: i32,
3936        heap_access_oob: Label,
3937        unaligned_atomic: Label,
3938    ) -> Result<(), CompileError> {
3939        self.emit_compare_and_swap(
3940            loc,
3941            target,
3942            ret,
3943            memarg,
3944            1,
3945            Size::S8,
3946            Size::S32,
3947            need_check,
3948            imported_memories,
3949            offset,
3950            heap_access_oob,
3951            unaligned_atomic,
3952            |this, src, dst| {
3953                this.assembler
3954                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
3955            },
3956        )
3957    }
3958    // i32 atomic And with u16
3959    fn i32_atomic_and_16u(
3960        &mut self,
3961        loc: Location,
3962        target: Location,
3963        memarg: &MemArg,
3964        ret: Location,
3965        need_check: bool,
3966        imported_memories: bool,
3967        offset: i32,
3968        heap_access_oob: Label,
3969        unaligned_atomic: Label,
3970    ) -> Result<(), CompileError> {
3971        self.emit_compare_and_swap(
3972            loc,
3973            target,
3974            ret,
3975            memarg,
3976            2,
3977            Size::S16,
3978            Size::S32,
3979            need_check,
3980            imported_memories,
3981            offset,
3982            heap_access_oob,
3983            unaligned_atomic,
3984            |this, src, dst| {
3985                this.assembler
3986                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
3987            },
3988        )
3989    }
3990    // i32 atomic Or with i32
3991    fn i32_atomic_or(
3992        &mut self,
3993        loc: Location,
3994        target: Location,
3995        memarg: &MemArg,
3996        ret: Location,
3997        need_check: bool,
3998        imported_memories: bool,
3999        offset: i32,
4000        heap_access_oob: Label,
4001        unaligned_atomic: Label,
4002    ) -> Result<(), CompileError> {
4003        self.emit_compare_and_swap(
4004            loc,
4005            target,
4006            ret,
4007            memarg,
4008            4,
4009            Size::S32,
4010            Size::S32,
4011            need_check,
4012            imported_memories,
4013            offset,
4014            heap_access_oob,
4015            unaligned_atomic,
4016            |this, src, dst| {
4017                this.assembler
4018                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4019            },
4020        )
4021    }
4022    // i32 atomic Or with u8
4023    fn i32_atomic_or_8u(
4024        &mut self,
4025        loc: Location,
4026        target: Location,
4027        memarg: &MemArg,
4028        ret: Location,
4029        need_check: bool,
4030        imported_memories: bool,
4031        offset: i32,
4032        heap_access_oob: Label,
4033        unaligned_atomic: Label,
4034    ) -> Result<(), CompileError> {
4035        self.emit_compare_and_swap(
4036            loc,
4037            target,
4038            ret,
4039            memarg,
4040            1,
4041            Size::S8,
4042            Size::S32,
4043            need_check,
4044            imported_memories,
4045            offset,
4046            heap_access_oob,
4047            unaligned_atomic,
4048            |this, src, dst| {
4049                this.assembler
4050                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4051            },
4052        )
4053    }
4054    // i32 atomic Or with u16
4055    fn i32_atomic_or_16u(
4056        &mut self,
4057        loc: Location,
4058        target: Location,
4059        memarg: &MemArg,
4060        ret: Location,
4061        need_check: bool,
4062        imported_memories: bool,
4063        offset: i32,
4064        heap_access_oob: Label,
4065        unaligned_atomic: Label,
4066    ) -> Result<(), CompileError> {
4067        self.emit_compare_and_swap(
4068            loc,
4069            target,
4070            ret,
4071            memarg,
4072            2,
4073            Size::S16,
4074            Size::S32,
4075            need_check,
4076            imported_memories,
4077            offset,
4078            heap_access_oob,
4079            unaligned_atomic,
4080            |this, src, dst| {
4081                this.assembler
4082                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4083            },
4084        )
4085    }
4086    // i32 atomic Xor with i32
4087    fn i32_atomic_xor(
4088        &mut self,
4089        loc: Location,
4090        target: Location,
4091        memarg: &MemArg,
4092        ret: Location,
4093        need_check: bool,
4094        imported_memories: bool,
4095        offset: i32,
4096        heap_access_oob: Label,
4097        unaligned_atomic: Label,
4098    ) -> Result<(), CompileError> {
4099        self.emit_compare_and_swap(
4100            loc,
4101            target,
4102            ret,
4103            memarg,
4104            4,
4105            Size::S32,
4106            Size::S32,
4107            need_check,
4108            imported_memories,
4109            offset,
4110            heap_access_oob,
4111            unaligned_atomic,
4112            |this, src, dst| {
4113                this.assembler
4114                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4115            },
4116        )
4117    }
4118    // i32 atomic Xor with u8
4119    fn i32_atomic_xor_8u(
4120        &mut self,
4121        loc: Location,
4122        target: Location,
4123        memarg: &MemArg,
4124        ret: Location,
4125        need_check: bool,
4126        imported_memories: bool,
4127        offset: i32,
4128        heap_access_oob: Label,
4129        unaligned_atomic: Label,
4130    ) -> Result<(), CompileError> {
4131        self.emit_compare_and_swap(
4132            loc,
4133            target,
4134            ret,
4135            memarg,
4136            1,
4137            Size::S8,
4138            Size::S32,
4139            need_check,
4140            imported_memories,
4141            offset,
4142            heap_access_oob,
4143            unaligned_atomic,
4144            |this, src, dst| {
4145                this.assembler
4146                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4147            },
4148        )
4149    }
4150    // i32 atomic Xor with u16
4151    fn i32_atomic_xor_16u(
4152        &mut self,
4153        loc: Location,
4154        target: Location,
4155        memarg: &MemArg,
4156        ret: Location,
4157        need_check: bool,
4158        imported_memories: bool,
4159        offset: i32,
4160        heap_access_oob: Label,
4161        unaligned_atomic: Label,
4162    ) -> Result<(), CompileError> {
4163        self.emit_compare_and_swap(
4164            loc,
4165            target,
4166            ret,
4167            memarg,
4168            2,
4169            Size::S16,
4170            Size::S32,
4171            need_check,
4172            imported_memories,
4173            offset,
4174            heap_access_oob,
4175            unaligned_atomic,
4176            |this, src, dst| {
4177                this.assembler
4178                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4179            },
4180        )
4181    }
4182    // i32 atomic Exchange with i32
4183    fn i32_atomic_xchg(
4184        &mut self,
4185        loc: Location,
4186        target: Location,
4187        memarg: &MemArg,
4188        ret: Location,
4189        need_check: bool,
4190        imported_memories: bool,
4191        offset: i32,
4192        heap_access_oob: Label,
4193        unaligned_atomic: Label,
4194    ) -> Result<(), CompileError> {
4195        let value = self.acquire_temp_gpr().ok_or_else(|| {
4196            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4197        })?;
4198        self.move_location(Size::S32, loc, Location::GPR(value))?;
4199        self.memory_op(
4200            target,
4201            memarg,
4202            true,
4203            4,
4204            need_check,
4205            imported_memories,
4206            offset,
4207            heap_access_oob,
4208            unaligned_atomic,
4209            |this, addr| {
4210                this.assembler
4211                    .emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0))
4212            },
4213        )?;
4214        self.move_location(Size::S32, Location::GPR(value), ret)?;
4215        self.release_gpr(value);
4216        Ok(())
4217    }
4218    // i32 atomic Exchange with u8
4219    fn i32_atomic_xchg_8u(
4220        &mut self,
4221        loc: Location,
4222        target: Location,
4223        memarg: &MemArg,
4224        ret: Location,
4225        need_check: bool,
4226        imported_memories: bool,
4227        offset: i32,
4228        heap_access_oob: Label,
4229        unaligned_atomic: Label,
4230    ) -> Result<(), CompileError> {
4231        let value = self.acquire_temp_gpr().ok_or_else(|| {
4232            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4233        })?;
4234        self.assembler
4235            .emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value))?;
4236        self.memory_op(
4237            target,
4238            memarg,
4239            true,
4240            1,
4241            need_check,
4242            imported_memories,
4243            offset,
4244            heap_access_oob,
4245            unaligned_atomic,
4246            |this, addr| {
4247                this.assembler
4248                    .emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0))
4249            },
4250        )?;
4251        self.move_location(Size::S32, Location::GPR(value), ret)?;
4252        self.release_gpr(value);
4253        Ok(())
4254    }
4255    // i32 atomic Exchange with u16
4256    fn i32_atomic_xchg_16u(
4257        &mut self,
4258        loc: Location,
4259        target: Location,
4260        memarg: &MemArg,
4261        ret: Location,
4262        need_check: bool,
4263        imported_memories: bool,
4264        offset: i32,
4265        heap_access_oob: Label,
4266        unaligned_atomic: Label,
4267    ) -> Result<(), CompileError> {
4268        let value = self.acquire_temp_gpr().ok_or_else(|| {
4269            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4270        })?;
4271        self.assembler
4272            .emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value))?;
4273        self.memory_op(
4274            target,
4275            memarg,
4276            true,
4277            2,
4278            need_check,
4279            imported_memories,
4280            offset,
4281            heap_access_oob,
4282            unaligned_atomic,
4283            |this, addr| {
4284                this.assembler
4285                    .emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0))
4286            },
4287        )?;
4288        self.move_location(Size::S32, Location::GPR(value), ret)?;
4289        self.release_gpr(value);
4290        Ok(())
4291    }
4292    // i32 atomic Exchange with i32
4293    fn i32_atomic_cmpxchg(
4294        &mut self,
4295        new: Location,
4296        cmp: Location,
4297        target: Location,
4298        memarg: &MemArg,
4299        ret: Location,
4300        need_check: bool,
4301        imported_memories: bool,
4302        offset: i32,
4303        heap_access_oob: Label,
4304        unaligned_atomic: Label,
4305    ) -> Result<(), CompileError> {
4306        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4307        let value = if cmp == Location::GPR(GPR::R14) {
4308            if new == Location::GPR(GPR::R13) {
4309                GPR::R12
4310            } else {
4311                GPR::R13
4312            }
4313        } else {
4314            GPR::R14
4315        };
4316        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4317        self.assembler
4318            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4319        self.assembler
4320            .emit_mov(Size::S32, new, Location::GPR(value))?;
4321
4322        self.memory_op(
4323            target,
4324            memarg,
4325            true,
4326            4,
4327            need_check,
4328            imported_memories,
4329            offset,
4330            heap_access_oob,
4331            unaligned_atomic,
4332            |this, addr| {
4333                this.assembler.emit_lock_cmpxchg(
4334                    Size::S32,
4335                    Location::GPR(value),
4336                    Location::Memory(addr, 0),
4337                )?;
4338                this.assembler
4339                    .emit_mov(Size::S32, Location::GPR(compare), ret)
4340            },
4341        )?;
4342        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4343        self.release_gpr(compare);
4344        Ok(())
4345    }
4346    // i32 atomic Exchange with u8
4347    fn i32_atomic_cmpxchg_8u(
4348        &mut self,
4349        new: Location,
4350        cmp: Location,
4351        target: Location,
4352        memarg: &MemArg,
4353        ret: Location,
4354        need_check: bool,
4355        imported_memories: bool,
4356        offset: i32,
4357        heap_access_oob: Label,
4358        unaligned_atomic: Label,
4359    ) -> Result<(), CompileError> {
4360        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4361        let value = if cmp == Location::GPR(GPR::R14) {
4362            if new == Location::GPR(GPR::R13) {
4363                GPR::R12
4364            } else {
4365                GPR::R13
4366            }
4367        } else {
4368            GPR::R14
4369        };
4370        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4371        self.assembler
4372            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4373        self.assembler
4374            .emit_mov(Size::S32, new, Location::GPR(value))?;
4375
4376        self.memory_op(
4377            target,
4378            memarg,
4379            true,
4380            1,
4381            need_check,
4382            imported_memories,
4383            offset,
4384            heap_access_oob,
4385            unaligned_atomic,
4386            |this, addr| {
4387                this.assembler.emit_lock_cmpxchg(
4388                    Size::S8,
4389                    Location::GPR(value),
4390                    Location::Memory(addr, 0),
4391                )?;
4392                this.assembler
4393                    .emit_movzx(Size::S8, Location::GPR(compare), Size::S32, ret)
4394            },
4395        )?;
4396        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4397        self.release_gpr(compare);
4398        Ok(())
4399    }
4400    // i32 atomic Exchange with u16
4401    fn i32_atomic_cmpxchg_16u(
4402        &mut self,
4403        new: Location,
4404        cmp: Location,
4405        target: Location,
4406        memarg: &MemArg,
4407        ret: Location,
4408        need_check: bool,
4409        imported_memories: bool,
4410        offset: i32,
4411        heap_access_oob: Label,
4412        unaligned_atomic: Label,
4413    ) -> Result<(), CompileError> {
4414        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4415        let value = if cmp == Location::GPR(GPR::R14) {
4416            if new == Location::GPR(GPR::R13) {
4417                GPR::R12
4418            } else {
4419                GPR::R13
4420            }
4421        } else {
4422            GPR::R14
4423        };
4424        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4425        self.assembler
4426            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4427        self.assembler
4428            .emit_mov(Size::S32, new, Location::GPR(value))?;
4429
4430        self.memory_op(
4431            target,
4432            memarg,
4433            true,
4434            2,
4435            need_check,
4436            imported_memories,
4437            offset,
4438            heap_access_oob,
4439            unaligned_atomic,
4440            |this, addr| {
4441                this.assembler.emit_lock_cmpxchg(
4442                    Size::S16,
4443                    Location::GPR(value),
4444                    Location::Memory(addr, 0),
4445                )?;
4446                this.assembler
4447                    .emit_movzx(Size::S16, Location::GPR(compare), Size::S32, ret)
4448            },
4449        )?;
4450        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4451        self.release_gpr(compare);
4452        Ok(())
4453    }
4454
4455    fn emit_call_with_reloc(
4456        &mut self,
4457        _calling_convention: CallingConvention,
4458        reloc_target: RelocationTarget,
4459    ) -> Result<Vec<Relocation>, CompileError> {
4460        let mut relocations = vec![];
4461        let next = self.get_label();
4462        let reloc_at = self.assembler.get_offset().0 + 1; // skip E8
4463        self.assembler.emit_call_label(next)?;
4464        self.emit_label(next)?;
4465        relocations.push(Relocation {
4466            kind: RelocationKind::X86CallPCRel4,
4467            reloc_target,
4468            offset: reloc_at as u32,
4469            addend: -4,
4470        });
4471        Ok(relocations)
4472    }
4473
4474    fn emit_binop_add64(
4475        &mut self,
4476        loc_a: Location,
4477        loc_b: Location,
4478        ret: Location,
4479    ) -> Result<(), CompileError> {
4480        self.emit_binop_i64(AssemblerX64::emit_add, loc_a, loc_b, ret)
4481    }
4482    fn emit_binop_sub64(
4483        &mut self,
4484        loc_a: Location,
4485        loc_b: Location,
4486        ret: Location,
4487    ) -> Result<(), CompileError> {
4488        self.emit_binop_i64(AssemblerX64::emit_sub, loc_a, loc_b, ret)
4489    }
4490    fn emit_binop_mul64(
4491        &mut self,
4492        loc_a: Location,
4493        loc_b: Location,
4494        ret: Location,
4495    ) -> Result<(), CompileError> {
4496        self.emit_binop_i64(AssemblerX64::emit_imul, loc_a, loc_b, ret)
4497    }
4498    fn emit_binop_udiv64(
4499        &mut self,
4500        loc_a: Location,
4501        loc_b: Location,
4502        ret: Location,
4503        integer_division_by_zero: Label,
4504    ) -> Result<usize, CompileError> {
4505        // We assume that RAX and RDX are temporary registers here.
4506        self.assembler
4507            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4508        self.assembler
4509            .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
4510        let offset = self.emit_relaxed_xdiv(
4511            AssemblerX64::emit_div,
4512            Size::S64,
4513            loc_b,
4514            integer_division_by_zero,
4515        )?;
4516        self.assembler
4517            .emit_mov(Size::S64, Location::GPR(GPR::RAX), ret)?;
4518        Ok(offset)
4519    }
4520    fn emit_binop_sdiv64(
4521        &mut self,
4522        loc_a: Location,
4523        loc_b: Location,
4524        ret: Location,
4525        integer_division_by_zero: Label,
4526        _integer_overflow: Label,
4527    ) -> Result<usize, CompileError> {
4528        // We assume that RAX and RDX are temporary registers here.
4529        self.assembler
4530            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4531        self.assembler.emit_cqo()?;
4532        let offset = self.emit_relaxed_xdiv(
4533            AssemblerX64::emit_idiv,
4534            Size::S64,
4535            loc_b,
4536            integer_division_by_zero,
4537        )?;
4538        self.assembler
4539            .emit_mov(Size::S64, Location::GPR(GPR::RAX), ret)?;
4540        Ok(offset)
4541    }
4542    fn emit_binop_urem64(
4543        &mut self,
4544        loc_a: Location,
4545        loc_b: Location,
4546        ret: Location,
4547        integer_division_by_zero: Label,
4548    ) -> Result<usize, CompileError> {
4549        // We assume that RAX and RDX are temporary registers here.
4550        self.assembler
4551            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4552        self.assembler
4553            .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
4554        let offset = self.emit_relaxed_xdiv(
4555            AssemblerX64::emit_div,
4556            Size::S64,
4557            loc_b,
4558            integer_division_by_zero,
4559        )?;
4560        self.assembler
4561            .emit_mov(Size::S64, Location::GPR(GPR::RDX), ret)?;
4562        Ok(offset)
4563    }
4564    fn emit_binop_srem64(
4565        &mut self,
4566        loc_a: Location,
4567        loc_b: Location,
4568        ret: Location,
4569        integer_division_by_zero: Label,
4570    ) -> Result<usize, CompileError> {
4571        // We assume that RAX and RDX are temporary registers here.
4572        let normal_path = self.assembler.get_label();
4573        let end = self.assembler.get_label();
4574
4575        self.emit_relaxed_cmp(Size::S64, Location::Imm64(0x8000000000000000u64), loc_a)?;
4576        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
4577        self.emit_relaxed_cmp(Size::S64, Location::Imm64(0xffffffffffffffffu64), loc_b)?;
4578        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
4579        self.move_location(Size::S64, Location::Imm64(0), ret)?;
4580        self.assembler.emit_jmp(Condition::None, end)?;
4581
4582        self.emit_label(normal_path)?;
4583        self.assembler
4584            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4585        self.assembler.emit_cqo()?;
4586        let offset = self.emit_relaxed_xdiv(
4587            AssemblerX64::emit_idiv,
4588            Size::S64,
4589            loc_b,
4590            integer_division_by_zero,
4591        )?;
4592        self.assembler
4593            .emit_mov(Size::S64, Location::GPR(GPR::RDX), ret)?;
4594
4595        self.emit_label(end)?;
4596        Ok(offset)
4597    }
4598    fn emit_binop_and64(
4599        &mut self,
4600        loc_a: Location,
4601        loc_b: Location,
4602        ret: Location,
4603    ) -> Result<(), CompileError> {
4604        self.emit_binop_i64(AssemblerX64::emit_and, loc_a, loc_b, ret)
4605    }
4606    fn emit_binop_or64(
4607        &mut self,
4608        loc_a: Location,
4609        loc_b: Location,
4610        ret: Location,
4611    ) -> Result<(), CompileError> {
4612        self.emit_binop_i64(AssemblerX64::emit_or, loc_a, loc_b, ret)
4613    }
4614    fn emit_binop_xor64(
4615        &mut self,
4616        loc_a: Location,
4617        loc_b: Location,
4618        ret: Location,
4619    ) -> Result<(), CompileError> {
4620        self.emit_binop_i64(AssemblerX64::emit_xor, loc_a, loc_b, ret)
4621    }
4622    fn i64_cmp_ge_s(
4623        &mut self,
4624        loc_a: Location,
4625        loc_b: Location,
4626        ret: Location,
4627    ) -> Result<(), CompileError> {
4628        self.emit_cmpop_i64_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret)
4629    }
4630    fn i64_cmp_gt_s(
4631        &mut self,
4632        loc_a: Location,
4633        loc_b: Location,
4634        ret: Location,
4635    ) -> Result<(), CompileError> {
4636        self.emit_cmpop_i64_dynamic_b(Condition::Greater, loc_a, loc_b, ret)
4637    }
4638    fn i64_cmp_le_s(
4639        &mut self,
4640        loc_a: Location,
4641        loc_b: Location,
4642        ret: Location,
4643    ) -> Result<(), CompileError> {
4644        self.emit_cmpop_i64_dynamic_b(Condition::LessEqual, loc_a, loc_b, ret)
4645    }
4646    fn i64_cmp_lt_s(
4647        &mut self,
4648        loc_a: Location,
4649        loc_b: Location,
4650        ret: Location,
4651    ) -> Result<(), CompileError> {
4652        self.emit_cmpop_i64_dynamic_b(Condition::Less, loc_a, loc_b, ret)
4653    }
4654    fn i64_cmp_ge_u(
4655        &mut self,
4656        loc_a: Location,
4657        loc_b: Location,
4658        ret: Location,
4659    ) -> Result<(), CompileError> {
4660        self.emit_cmpop_i64_dynamic_b(Condition::AboveEqual, loc_a, loc_b, ret)
4661    }
4662    fn i64_cmp_gt_u(
4663        &mut self,
4664        loc_a: Location,
4665        loc_b: Location,
4666        ret: Location,
4667    ) -> Result<(), CompileError> {
4668        self.emit_cmpop_i64_dynamic_b(Condition::Above, loc_a, loc_b, ret)
4669    }
4670    fn i64_cmp_le_u(
4671        &mut self,
4672        loc_a: Location,
4673        loc_b: Location,
4674        ret: Location,
4675    ) -> Result<(), CompileError> {
4676        self.emit_cmpop_i64_dynamic_b(Condition::BelowEqual, loc_a, loc_b, ret)
4677    }
4678    fn i64_cmp_lt_u(
4679        &mut self,
4680        loc_a: Location,
4681        loc_b: Location,
4682        ret: Location,
4683    ) -> Result<(), CompileError> {
4684        self.emit_cmpop_i64_dynamic_b(Condition::Below, loc_a, loc_b, ret)
4685    }
4686    fn i64_cmp_ne(
4687        &mut self,
4688        loc_a: Location,
4689        loc_b: Location,
4690        ret: Location,
4691    ) -> Result<(), CompileError> {
4692        self.emit_cmpop_i64_dynamic_b(Condition::NotEqual, loc_a, loc_b, ret)
4693    }
4694    fn i64_cmp_eq(
4695        &mut self,
4696        loc_a: Location,
4697        loc_b: Location,
4698        ret: Location,
4699    ) -> Result<(), CompileError> {
4700        self.emit_cmpop_i64_dynamic_b(Condition::Equal, loc_a, loc_b, ret)
4701    }
4702    fn i64_clz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4703        let src = match loc {
4704            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4705                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4706                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4707                })?;
4708                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4709                tmp
4710            }
4711            Location::GPR(reg) => reg,
4712            _ => {
4713                codegen_error!("singlepass i64_clz unreachable");
4714            }
4715        };
4716        let dst = match ret {
4717            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
4718                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4719            })?,
4720            Location::GPR(reg) => reg,
4721            _ => {
4722                codegen_error!("singlepass i64_clz unreachable");
4723            }
4724        };
4725
4726        if self.assembler.arch_has_xzcnt() {
4727            self.assembler
4728                .arch_emit_lzcnt(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4729        } else {
4730            let zero_path = self.assembler.get_label();
4731            let end = self.assembler.get_label();
4732
4733            self.assembler.emit_test_gpr_64(src)?;
4734            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
4735            self.assembler
4736                .emit_bsr(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4737            self.assembler
4738                .emit_xor(Size::S64, Location::Imm32(63), Location::GPR(dst))?;
4739            self.assembler.emit_jmp(Condition::None, end)?;
4740            self.emit_label(zero_path)?;
4741            self.move_location(Size::S64, Location::Imm32(64), Location::GPR(dst))?;
4742            self.emit_label(end)?;
4743        }
4744        match loc {
4745            Location::Imm64(_) | Location::Memory(_, _) => {
4746                self.release_gpr(src);
4747            }
4748            _ => {}
4749        };
4750        if let Location::Memory(_, _) = ret {
4751            self.move_location(Size::S64, Location::GPR(dst), ret)?;
4752            self.release_gpr(dst);
4753        };
4754        Ok(())
4755    }
4756    fn i64_ctz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4757        let src = match loc {
4758            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4759                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4760                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4761                })?;
4762                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4763                tmp
4764            }
4765            Location::GPR(reg) => reg,
4766            _ => {
4767                codegen_error!("singlepass i64_ctz unreachable");
4768            }
4769        };
4770        let dst = match ret {
4771            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
4772                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4773            })?,
4774            Location::GPR(reg) => reg,
4775            _ => {
4776                codegen_error!("singlepass i64_ctz unreachable");
4777            }
4778        };
4779
4780        if self.assembler.arch_has_xzcnt() {
4781            self.assembler
4782                .arch_emit_tzcnt(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4783        } else {
4784            let zero_path = self.assembler.get_label();
4785            let end = self.assembler.get_label();
4786
4787            self.assembler.emit_test_gpr_64(src)?;
4788            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
4789            self.assembler
4790                .emit_bsf(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4791            self.assembler.emit_jmp(Condition::None, end)?;
4792            self.emit_label(zero_path)?;
4793            self.move_location(Size::S64, Location::Imm64(64), Location::GPR(dst))?;
4794            self.emit_label(end)?;
4795        }
4796
4797        match loc {
4798            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4799                self.release_gpr(src);
4800            }
4801            _ => {}
4802        };
4803        if let Location::Memory(_, _) = ret {
4804            self.move_location(Size::S64, Location::GPR(dst), ret)?;
4805            self.release_gpr(dst);
4806        };
4807        Ok(())
4808    }
4809    fn i64_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4810        match loc {
4811            Location::Imm64(_) | Location::Imm32(_) => {
4812                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4813                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4814                })?;
4815                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4816                if let Location::Memory(_, _) = ret {
4817                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
4818                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4819                    })?;
4820                    self.assembler.emit_popcnt(
4821                        Size::S64,
4822                        Location::GPR(tmp),
4823                        Location::GPR(out_tmp),
4824                    )?;
4825                    self.move_location(Size::S64, Location::GPR(out_tmp), ret)?;
4826                    self.release_gpr(out_tmp);
4827                } else {
4828                    self.assembler
4829                        .emit_popcnt(Size::S64, Location::GPR(tmp), ret)?;
4830                }
4831                self.release_gpr(tmp);
4832            }
4833            Location::Memory(_, _) | Location::GPR(_) => {
4834                if let Location::Memory(_, _) = ret {
4835                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
4836                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4837                    })?;
4838                    self.assembler
4839                        .emit_popcnt(Size::S64, loc, Location::GPR(out_tmp))?;
4840                    self.move_location(Size::S64, Location::GPR(out_tmp), ret)?;
4841                    self.release_gpr(out_tmp);
4842                } else {
4843                    self.assembler.emit_popcnt(Size::S64, loc, ret)?;
4844                }
4845            }
4846            _ => {
4847                codegen_error!("singlepass i64_popcnt unreachable");
4848            }
4849        }
4850        Ok(())
4851    }
4852    fn i64_shl(
4853        &mut self,
4854        loc_a: Location,
4855        loc_b: Location,
4856        ret: Location,
4857    ) -> Result<(), CompileError> {
4858        self.emit_shift_i64(AssemblerX64::emit_shl, loc_a, loc_b, ret)
4859    }
4860    fn i64_shr(
4861        &mut self,
4862        loc_a: Location,
4863        loc_b: Location,
4864        ret: Location,
4865    ) -> Result<(), CompileError> {
4866        self.emit_shift_i64(AssemblerX64::emit_shr, loc_a, loc_b, ret)
4867    }
4868    fn i64_sar(
4869        &mut self,
4870        loc_a: Location,
4871        loc_b: Location,
4872        ret: Location,
4873    ) -> Result<(), CompileError> {
4874        self.emit_shift_i64(AssemblerX64::emit_sar, loc_a, loc_b, ret)
4875    }
4876    fn i64_rol(
4877        &mut self,
4878        loc_a: Location,
4879        loc_b: Location,
4880        ret: Location,
4881    ) -> Result<(), CompileError> {
4882        self.emit_shift_i64(AssemblerX64::emit_rol, loc_a, loc_b, ret)
4883    }
4884    fn i64_ror(
4885        &mut self,
4886        loc_a: Location,
4887        loc_b: Location,
4888        ret: Location,
4889    ) -> Result<(), CompileError> {
4890        self.emit_shift_i64(AssemblerX64::emit_ror, loc_a, loc_b, ret)
4891    }
4892    fn i64_load(
4893        &mut self,
4894        addr: Location,
4895        memarg: &MemArg,
4896        ret: Location,
4897        need_check: bool,
4898        imported_memories: bool,
4899        offset: i32,
4900        heap_access_oob: Label,
4901        unaligned_atomic: Label,
4902    ) -> Result<(), CompileError> {
4903        self.memory_op(
4904            addr,
4905            memarg,
4906            false,
4907            8,
4908            need_check,
4909            imported_memories,
4910            offset,
4911            heap_access_oob,
4912            unaligned_atomic,
4913            |this, addr| {
4914                this.emit_relaxed_binop(
4915                    AssemblerX64::emit_mov,
4916                    Size::S64,
4917                    Location::Memory(addr, 0),
4918                    ret,
4919                )
4920            },
4921        )
4922    }
4923    fn i64_load_8u(
4924        &mut self,
4925        addr: Location,
4926        memarg: &MemArg,
4927        ret: Location,
4928        need_check: bool,
4929        imported_memories: bool,
4930        offset: i32,
4931        heap_access_oob: Label,
4932        unaligned_atomic: Label,
4933    ) -> Result<(), CompileError> {
4934        self.memory_op(
4935            addr,
4936            memarg,
4937            false,
4938            1,
4939            need_check,
4940            imported_memories,
4941            offset,
4942            heap_access_oob,
4943            unaligned_atomic,
4944            |this, addr| {
4945                this.emit_relaxed_zx_sx(
4946                    AssemblerX64::emit_movzx,
4947                    Size::S8,
4948                    Location::Memory(addr, 0),
4949                    Size::S64,
4950                    ret,
4951                )
4952            },
4953        )
4954    }
4955    fn i64_load_8s(
4956        &mut self,
4957        addr: Location,
4958        memarg: &MemArg,
4959        ret: Location,
4960        need_check: bool,
4961        imported_memories: bool,
4962        offset: i32,
4963        heap_access_oob: Label,
4964        unaligned_atomic: Label,
4965    ) -> Result<(), CompileError> {
4966        self.memory_op(
4967            addr,
4968            memarg,
4969            false,
4970            1,
4971            need_check,
4972            imported_memories,
4973            offset,
4974            heap_access_oob,
4975            unaligned_atomic,
4976            |this, addr| {
4977                this.emit_relaxed_zx_sx(
4978                    AssemblerX64::emit_movsx,
4979                    Size::S8,
4980                    Location::Memory(addr, 0),
4981                    Size::S64,
4982                    ret,
4983                )
4984            },
4985        )
4986    }
4987    fn i64_load_16u(
4988        &mut self,
4989        addr: Location,
4990        memarg: &MemArg,
4991        ret: Location,
4992        need_check: bool,
4993        imported_memories: bool,
4994        offset: i32,
4995        heap_access_oob: Label,
4996        unaligned_atomic: Label,
4997    ) -> Result<(), CompileError> {
4998        self.memory_op(
4999            addr,
5000            memarg,
5001            false,
5002            2,
5003            need_check,
5004            imported_memories,
5005            offset,
5006            heap_access_oob,
5007            unaligned_atomic,
5008            |this, addr| {
5009                this.emit_relaxed_zx_sx(
5010                    AssemblerX64::emit_movzx,
5011                    Size::S16,
5012                    Location::Memory(addr, 0),
5013                    Size::S64,
5014                    ret,
5015                )
5016            },
5017        )
5018    }
5019    fn i64_load_16s(
5020        &mut self,
5021        addr: Location,
5022        memarg: &MemArg,
5023        ret: Location,
5024        need_check: bool,
5025        imported_memories: bool,
5026        offset: i32,
5027        heap_access_oob: Label,
5028        unaligned_atomic: Label,
5029    ) -> Result<(), CompileError> {
5030        self.memory_op(
5031            addr,
5032            memarg,
5033            false,
5034            2,
5035            need_check,
5036            imported_memories,
5037            offset,
5038            heap_access_oob,
5039            unaligned_atomic,
5040            |this, addr| {
5041                this.emit_relaxed_zx_sx(
5042                    AssemblerX64::emit_movsx,
5043                    Size::S16,
5044                    Location::Memory(addr, 0),
5045                    Size::S64,
5046                    ret,
5047                )
5048            },
5049        )
5050    }
5051    fn i64_load_32u(
5052        &mut self,
5053        addr: Location,
5054        memarg: &MemArg,
5055        ret: Location,
5056        need_check: bool,
5057        imported_memories: bool,
5058        offset: i32,
5059        heap_access_oob: Label,
5060        unaligned_atomic: Label,
5061    ) -> Result<(), CompileError> {
5062        self.memory_op(
5063            addr,
5064            memarg,
5065            false,
5066            4,
5067            need_check,
5068            imported_memories,
5069            offset,
5070            heap_access_oob,
5071            unaligned_atomic,
5072            |this, addr| {
5073                match ret {
5074                    Location::GPR(_) => {}
5075                    Location::Memory(base, offset) => {
5076                        this.assembler.emit_mov(
5077                            Size::S32,
5078                            Location::Imm32(0),
5079                            Location::Memory(base, offset + 4),
5080                        )?; // clear upper bits
5081                    }
5082                    _ => {
5083                        codegen_error!("singlepass i64_load_32u unreacahble");
5084                    }
5085                }
5086                this.emit_relaxed_binop(
5087                    AssemblerX64::emit_mov,
5088                    Size::S32,
5089                    Location::Memory(addr, 0),
5090                    ret,
5091                )
5092            },
5093        )
5094    }
5095    fn i64_load_32s(
5096        &mut self,
5097        addr: Location,
5098        memarg: &MemArg,
5099        ret: Location,
5100        need_check: bool,
5101        imported_memories: bool,
5102        offset: i32,
5103        heap_access_oob: Label,
5104        unaligned_atomic: Label,
5105    ) -> Result<(), CompileError> {
5106        self.memory_op(
5107            addr,
5108            memarg,
5109            false,
5110            4,
5111            need_check,
5112            imported_memories,
5113            offset,
5114            heap_access_oob,
5115            unaligned_atomic,
5116            |this, addr| {
5117                this.emit_relaxed_zx_sx(
5118                    AssemblerX64::emit_movsx,
5119                    Size::S32,
5120                    Location::Memory(addr, 0),
5121                    Size::S64,
5122                    ret,
5123                )
5124            },
5125        )
5126    }
5127    fn i64_atomic_load(
5128        &mut self,
5129        addr: Location,
5130        memarg: &MemArg,
5131        ret: Location,
5132        need_check: bool,
5133        imported_memories: bool,
5134        offset: i32,
5135        heap_access_oob: Label,
5136        unaligned_atomic: Label,
5137    ) -> Result<(), CompileError> {
5138        self.memory_op(
5139            addr,
5140            memarg,
5141            true,
5142            8,
5143            need_check,
5144            imported_memories,
5145            offset,
5146            heap_access_oob,
5147            unaligned_atomic,
5148            |this, addr| this.emit_relaxed_mov(Size::S64, Location::Memory(addr, 0), ret),
5149        )
5150    }
5151    fn i64_atomic_load_8u(
5152        &mut self,
5153        addr: Location,
5154        memarg: &MemArg,
5155        ret: Location,
5156        need_check: bool,
5157        imported_memories: bool,
5158        offset: i32,
5159        heap_access_oob: Label,
5160        unaligned_atomic: Label,
5161    ) -> Result<(), CompileError> {
5162        self.memory_op(
5163            addr,
5164            memarg,
5165            true,
5166            1,
5167            need_check,
5168            imported_memories,
5169            offset,
5170            heap_access_oob,
5171            unaligned_atomic,
5172            |this, addr| {
5173                this.emit_relaxed_zero_extension(
5174                    Size::S8,
5175                    Location::Memory(addr, 0),
5176                    Size::S64,
5177                    ret,
5178                )
5179            },
5180        )
5181    }
5182    fn i64_atomic_load_16u(
5183        &mut self,
5184        addr: Location,
5185        memarg: &MemArg,
5186        ret: Location,
5187        need_check: bool,
5188        imported_memories: bool,
5189        offset: i32,
5190        heap_access_oob: Label,
5191        unaligned_atomic: Label,
5192    ) -> Result<(), CompileError> {
5193        self.memory_op(
5194            addr,
5195            memarg,
5196            true,
5197            2,
5198            need_check,
5199            imported_memories,
5200            offset,
5201            heap_access_oob,
5202            unaligned_atomic,
5203            |this, addr| {
5204                this.emit_relaxed_zero_extension(
5205                    Size::S16,
5206                    Location::Memory(addr, 0),
5207                    Size::S64,
5208                    ret,
5209                )
5210            },
5211        )
5212    }
5213    fn i64_atomic_load_32u(
5214        &mut self,
5215        addr: Location,
5216        memarg: &MemArg,
5217        ret: Location,
5218        need_check: bool,
5219        imported_memories: bool,
5220        offset: i32,
5221        heap_access_oob: Label,
5222        unaligned_atomic: Label,
5223    ) -> Result<(), CompileError> {
5224        self.memory_op(
5225            addr,
5226            memarg,
5227            true,
5228            4,
5229            need_check,
5230            imported_memories,
5231            offset,
5232            heap_access_oob,
5233            unaligned_atomic,
5234            |this, addr| {
5235                match ret {
5236                    Location::GPR(_) => {}
5237                    Location::Memory(base, offset) => {
5238                        this.move_location(
5239                            Size::S32,
5240                            Location::Imm32(0),
5241                            Location::Memory(base, offset + 4),
5242                        )?; // clear upper bits
5243                    }
5244                    _ => {
5245                        codegen_error!("singlepass i64_atomic_load_32u unreachable");
5246                    }
5247                }
5248                this.emit_relaxed_zero_extension(
5249                    Size::S32,
5250                    Location::Memory(addr, 0),
5251                    Size::S64,
5252                    ret,
5253                )
5254            },
5255        )
5256    }
5257    fn i64_save(
5258        &mut self,
5259        target_value: Location,
5260        memarg: &MemArg,
5261        target_addr: Location,
5262        need_check: bool,
5263        imported_memories: bool,
5264        offset: i32,
5265        heap_access_oob: Label,
5266        unaligned_atomic: Label,
5267    ) -> Result<(), CompileError> {
5268        self.memory_op(
5269            target_addr,
5270            memarg,
5271            false,
5272            8,
5273            need_check,
5274            imported_memories,
5275            offset,
5276            heap_access_oob,
5277            unaligned_atomic,
5278            |this, addr| {
5279                this.emit_relaxed_binop(
5280                    AssemblerX64::emit_mov,
5281                    Size::S64,
5282                    target_value,
5283                    Location::Memory(addr, 0),
5284                )
5285            },
5286        )
5287    }
5288    fn i64_save_8(
5289        &mut self,
5290        target_value: Location,
5291        memarg: &MemArg,
5292        target_addr: Location,
5293        need_check: bool,
5294        imported_memories: bool,
5295        offset: i32,
5296        heap_access_oob: Label,
5297        unaligned_atomic: Label,
5298    ) -> Result<(), CompileError> {
5299        self.memory_op(
5300            target_addr,
5301            memarg,
5302            false,
5303            1,
5304            need_check,
5305            imported_memories,
5306            offset,
5307            heap_access_oob,
5308            unaligned_atomic,
5309            |this, addr| {
5310                this.emit_relaxed_binop(
5311                    AssemblerX64::emit_mov,
5312                    Size::S8,
5313                    target_value,
5314                    Location::Memory(addr, 0),
5315                )
5316            },
5317        )
5318    }
5319    fn i64_save_16(
5320        &mut self,
5321        target_value: Location,
5322        memarg: &MemArg,
5323        target_addr: Location,
5324        need_check: bool,
5325        imported_memories: bool,
5326        offset: i32,
5327        heap_access_oob: Label,
5328        unaligned_atomic: Label,
5329    ) -> Result<(), CompileError> {
5330        self.memory_op(
5331            target_addr,
5332            memarg,
5333            false,
5334            2,
5335            need_check,
5336            imported_memories,
5337            offset,
5338            heap_access_oob,
5339            unaligned_atomic,
5340            |this, addr| {
5341                this.emit_relaxed_binop(
5342                    AssemblerX64::emit_mov,
5343                    Size::S16,
5344                    target_value,
5345                    Location::Memory(addr, 0),
5346                )
5347            },
5348        )
5349    }
5350    fn i64_save_32(
5351        &mut self,
5352        target_value: Location,
5353        memarg: &MemArg,
5354        target_addr: Location,
5355        need_check: bool,
5356        imported_memories: bool,
5357        offset: i32,
5358        heap_access_oob: Label,
5359        unaligned_atomic: Label,
5360    ) -> Result<(), CompileError> {
5361        self.memory_op(
5362            target_addr,
5363            memarg,
5364            false,
5365            4,
5366            need_check,
5367            imported_memories,
5368            offset,
5369            heap_access_oob,
5370            unaligned_atomic,
5371            |this, addr| {
5372                this.emit_relaxed_binop(
5373                    AssemblerX64::emit_mov,
5374                    Size::S32,
5375                    target_value,
5376                    Location::Memory(addr, 0),
5377                )
5378            },
5379        )
5380    }
5381    fn i64_atomic_save(
5382        &mut self,
5383        value: Location,
5384        memarg: &MemArg,
5385        target_addr: Location,
5386        need_check: bool,
5387        imported_memories: bool,
5388        offset: i32,
5389        heap_access_oob: Label,
5390        unaligned_atomic: Label,
5391    ) -> Result<(), CompileError> {
5392        self.memory_op(
5393            target_addr,
5394            memarg,
5395            true,
5396            8,
5397            need_check,
5398            imported_memories,
5399            offset,
5400            heap_access_oob,
5401            unaligned_atomic,
5402            |this, addr| this.emit_relaxed_atomic_xchg(Size::S64, value, Location::Memory(addr, 0)),
5403        )
5404    }
5405    fn i64_atomic_save_8(
5406        &mut self,
5407        value: Location,
5408        memarg: &MemArg,
5409        target_addr: Location,
5410        need_check: bool,
5411        imported_memories: bool,
5412        offset: i32,
5413        heap_access_oob: Label,
5414        unaligned_atomic: Label,
5415    ) -> Result<(), CompileError> {
5416        self.memory_op(
5417            target_addr,
5418            memarg,
5419            true,
5420            1,
5421            need_check,
5422            imported_memories,
5423            offset,
5424            heap_access_oob,
5425            unaligned_atomic,
5426            |this, addr| this.emit_relaxed_atomic_xchg(Size::S8, value, Location::Memory(addr, 0)),
5427        )
5428    }
5429    fn i64_atomic_save_16(
5430        &mut self,
5431        value: Location,
5432        memarg: &MemArg,
5433        target_addr: Location,
5434        need_check: bool,
5435        imported_memories: bool,
5436        offset: i32,
5437        heap_access_oob: Label,
5438        unaligned_atomic: Label,
5439    ) -> Result<(), CompileError> {
5440        self.memory_op(
5441            target_addr,
5442            memarg,
5443            true,
5444            2,
5445            need_check,
5446            imported_memories,
5447            offset,
5448            heap_access_oob,
5449            unaligned_atomic,
5450            |this, addr| this.emit_relaxed_atomic_xchg(Size::S16, value, Location::Memory(addr, 0)),
5451        )
5452    }
5453    fn i64_atomic_save_32(
5454        &mut self,
5455        value: Location,
5456        memarg: &MemArg,
5457        target_addr: Location,
5458        need_check: bool,
5459        imported_memories: bool,
5460        offset: i32,
5461        heap_access_oob: Label,
5462        unaligned_atomic: Label,
5463    ) -> Result<(), CompileError> {
5464        self.memory_op(
5465            target_addr,
5466            memarg,
5467            true,
5468            2,
5469            need_check,
5470            imported_memories,
5471            offset,
5472            heap_access_oob,
5473            unaligned_atomic,
5474            |this, addr| this.emit_relaxed_atomic_xchg(Size::S32, value, Location::Memory(addr, 0)),
5475        )
5476    }
5477    // i64 atomic Add with i64
5478    fn i64_atomic_add(
5479        &mut self,
5480        loc: Location,
5481        target: Location,
5482        memarg: &MemArg,
5483        ret: Location,
5484        need_check: bool,
5485        imported_memories: bool,
5486        offset: i32,
5487        heap_access_oob: Label,
5488        unaligned_atomic: Label,
5489    ) -> Result<(), CompileError> {
5490        let value = self.acquire_temp_gpr().ok_or_else(|| {
5491            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5492        })?;
5493        self.move_location(Size::S64, loc, Location::GPR(value))?;
5494        self.memory_op(
5495            target,
5496            memarg,
5497            true,
5498            8,
5499            need_check,
5500            imported_memories,
5501            offset,
5502            heap_access_oob,
5503            unaligned_atomic,
5504            |this, addr| {
5505                this.assembler.emit_lock_xadd(
5506                    Size::S64,
5507                    Location::GPR(value),
5508                    Location::Memory(addr, 0),
5509                )
5510            },
5511        )?;
5512        self.move_location(Size::S64, Location::GPR(value), ret)?;
5513        self.release_gpr(value);
5514        Ok(())
5515    }
5516    // i64 atomic Add with u8
5517    fn i64_atomic_add_8u(
5518        &mut self,
5519        loc: Location,
5520        target: Location,
5521        memarg: &MemArg,
5522        ret: Location,
5523        need_check: bool,
5524        imported_memories: bool,
5525        offset: i32,
5526        heap_access_oob: Label,
5527        unaligned_atomic: Label,
5528    ) -> Result<(), CompileError> {
5529        let value = self.acquire_temp_gpr().ok_or_else(|| {
5530            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5531        })?;
5532        self.move_location_extend(Size::S8, false, loc, Size::S64, Location::GPR(value))?;
5533        self.memory_op(
5534            target,
5535            memarg,
5536            true,
5537            1,
5538            need_check,
5539            imported_memories,
5540            offset,
5541            heap_access_oob,
5542            unaligned_atomic,
5543            |this, addr| {
5544                this.assembler.emit_lock_xadd(
5545                    Size::S8,
5546                    Location::GPR(value),
5547                    Location::Memory(addr, 0),
5548                )
5549            },
5550        )?;
5551        self.move_location(Size::S64, Location::GPR(value), ret)?;
5552        self.release_gpr(value);
5553        Ok(())
5554    }
5555    // i64 atomic Add with u16
5556    fn i64_atomic_add_16u(
5557        &mut self,
5558        loc: Location,
5559        target: Location,
5560        memarg: &MemArg,
5561        ret: Location,
5562        need_check: bool,
5563        imported_memories: bool,
5564        offset: i32,
5565        heap_access_oob: Label,
5566        unaligned_atomic: Label,
5567    ) -> Result<(), CompileError> {
5568        let value = self.acquire_temp_gpr().ok_or_else(|| {
5569            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5570        })?;
5571        self.move_location_extend(Size::S16, false, loc, Size::S64, Location::GPR(value))?;
5572        self.memory_op(
5573            target,
5574            memarg,
5575            true,
5576            2,
5577            need_check,
5578            imported_memories,
5579            offset,
5580            heap_access_oob,
5581            unaligned_atomic,
5582            |this, addr| {
5583                this.assembler.emit_lock_xadd(
5584                    Size::S16,
5585                    Location::GPR(value),
5586                    Location::Memory(addr, 0),
5587                )
5588            },
5589        )?;
5590        self.move_location(Size::S64, Location::GPR(value), ret)?;
5591        self.release_gpr(value);
5592        Ok(())
5593    }
5594    // i64 atomic Add with u32
5595    fn i64_atomic_add_32u(
5596        &mut self,
5597        loc: Location,
5598        target: Location,
5599        memarg: &MemArg,
5600        ret: Location,
5601        need_check: bool,
5602        imported_memories: bool,
5603        offset: i32,
5604        heap_access_oob: Label,
5605        unaligned_atomic: Label,
5606    ) -> Result<(), CompileError> {
5607        let value = self.acquire_temp_gpr().ok_or_else(|| {
5608            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5609        })?;
5610        self.move_location_extend(Size::S32, false, loc, Size::S64, Location::GPR(value))?;
5611        self.memory_op(
5612            target,
5613            memarg,
5614            true,
5615            4,
5616            need_check,
5617            imported_memories,
5618            offset,
5619            heap_access_oob,
5620            unaligned_atomic,
5621            |this, addr| {
5622                this.assembler.emit_lock_xadd(
5623                    Size::S32,
5624                    Location::GPR(value),
5625                    Location::Memory(addr, 0),
5626                )
5627            },
5628        )?;
5629        self.move_location(Size::S64, Location::GPR(value), ret)?;
5630        self.release_gpr(value);
5631        Ok(())
5632    }
5633    // i64 atomic Sub with i64
5634    fn i64_atomic_sub(
5635        &mut self,
5636        loc: Location,
5637        target: Location,
5638        memarg: &MemArg,
5639        ret: Location,
5640        need_check: bool,
5641        imported_memories: bool,
5642        offset: i32,
5643        heap_access_oob: Label,
5644        unaligned_atomic: Label,
5645    ) -> Result<(), CompileError> {
5646        let value = self.acquire_temp_gpr().ok_or_else(|| {
5647            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5648        })?;
5649        self.location_neg(Size::S64, false, loc, Size::S64, Location::GPR(value))?;
5650        self.memory_op(
5651            target,
5652            memarg,
5653            true,
5654            8,
5655            need_check,
5656            imported_memories,
5657            offset,
5658            heap_access_oob,
5659            unaligned_atomic,
5660            |this, addr| {
5661                this.assembler.emit_lock_xadd(
5662                    Size::S64,
5663                    Location::GPR(value),
5664                    Location::Memory(addr, 0),
5665                )
5666            },
5667        )?;
5668        self.move_location(Size::S64, Location::GPR(value), ret)?;
5669        self.release_gpr(value);
5670        Ok(())
5671    }
5672    // i64 atomic Sub with u8
5673    fn i64_atomic_sub_8u(
5674        &mut self,
5675        loc: Location,
5676        target: Location,
5677        memarg: &MemArg,
5678        ret: Location,
5679        need_check: bool,
5680        imported_memories: bool,
5681        offset: i32,
5682        heap_access_oob: Label,
5683        unaligned_atomic: Label,
5684    ) -> Result<(), CompileError> {
5685        let value = self.acquire_temp_gpr().ok_or_else(|| {
5686            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5687        })?;
5688        self.location_neg(Size::S8, false, loc, Size::S64, Location::GPR(value))?;
5689        self.memory_op(
5690            target,
5691            memarg,
5692            true,
5693            1,
5694            need_check,
5695            imported_memories,
5696            offset,
5697            heap_access_oob,
5698            unaligned_atomic,
5699            |this, addr| {
5700                this.assembler.emit_lock_xadd(
5701                    Size::S8,
5702                    Location::GPR(value),
5703                    Location::Memory(addr, 0),
5704                )
5705            },
5706        )?;
5707        self.move_location(Size::S64, Location::GPR(value), ret)?;
5708        self.release_gpr(value);
5709        Ok(())
5710    }
5711    // i64 atomic Sub with u16
5712    fn i64_atomic_sub_16u(
5713        &mut self,
5714        loc: Location,
5715        target: Location,
5716        memarg: &MemArg,
5717        ret: Location,
5718        need_check: bool,
5719        imported_memories: bool,
5720        offset: i32,
5721        heap_access_oob: Label,
5722        unaligned_atomic: Label,
5723    ) -> Result<(), CompileError> {
5724        let value = self.acquire_temp_gpr().ok_or_else(|| {
5725            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5726        })?;
5727        self.location_neg(Size::S16, false, loc, Size::S64, Location::GPR(value))?;
5728        self.memory_op(
5729            target,
5730            memarg,
5731            true,
5732            2,
5733            need_check,
5734            imported_memories,
5735            offset,
5736            heap_access_oob,
5737            unaligned_atomic,
5738            |this, addr| {
5739                this.assembler.emit_lock_xadd(
5740                    Size::S16,
5741                    Location::GPR(value),
5742                    Location::Memory(addr, 0),
5743                )
5744            },
5745        )?;
5746        self.move_location(Size::S64, Location::GPR(value), ret)?;
5747        self.release_gpr(value);
5748        Ok(())
5749    }
5750    // i64 atomic Sub with u32
5751    fn i64_atomic_sub_32u(
5752        &mut self,
5753        loc: Location,
5754        target: Location,
5755        memarg: &MemArg,
5756        ret: Location,
5757        need_check: bool,
5758        imported_memories: bool,
5759        offset: i32,
5760        heap_access_oob: Label,
5761        unaligned_atomic: Label,
5762    ) -> Result<(), CompileError> {
5763        let value = self.acquire_temp_gpr().ok_or_else(|| {
5764            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5765        })?;
5766        self.location_neg(Size::S32, false, loc, Size::S64, Location::GPR(value))?;
5767        self.memory_op(
5768            target,
5769            memarg,
5770            true,
5771            4,
5772            need_check,
5773            imported_memories,
5774            offset,
5775            heap_access_oob,
5776            unaligned_atomic,
5777            |this, addr| {
5778                this.assembler.emit_lock_xadd(
5779                    Size::S32,
5780                    Location::GPR(value),
5781                    Location::Memory(addr, 0),
5782                )
5783            },
5784        )?;
5785        self.move_location(Size::S64, Location::GPR(value), ret)?;
5786        self.release_gpr(value);
5787        Ok(())
5788    }
5789    // i64 atomic And with i64
5790    fn i64_atomic_and(
5791        &mut self,
5792        loc: Location,
5793        target: Location,
5794        memarg: &MemArg,
5795        ret: Location,
5796        need_check: bool,
5797        imported_memories: bool,
5798        offset: i32,
5799        heap_access_oob: Label,
5800        unaligned_atomic: Label,
5801    ) -> Result<(), CompileError> {
5802        self.emit_compare_and_swap(
5803            loc,
5804            target,
5805            ret,
5806            memarg,
5807            8,
5808            Size::S64,
5809            Size::S64,
5810            need_check,
5811            imported_memories,
5812            offset,
5813            heap_access_oob,
5814            unaligned_atomic,
5815            |this, src, dst| {
5816                this.assembler
5817                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5818            },
5819        )
5820    }
5821    // i64 atomic And with u8
5822    fn i64_atomic_and_8u(
5823        &mut self,
5824        loc: Location,
5825        target: Location,
5826        memarg: &MemArg,
5827        ret: Location,
5828        need_check: bool,
5829        imported_memories: bool,
5830        offset: i32,
5831        heap_access_oob: Label,
5832        unaligned_atomic: Label,
5833    ) -> Result<(), CompileError> {
5834        self.emit_compare_and_swap(
5835            loc,
5836            target,
5837            ret,
5838            memarg,
5839            1,
5840            Size::S8,
5841            Size::S64,
5842            need_check,
5843            imported_memories,
5844            offset,
5845            heap_access_oob,
5846            unaligned_atomic,
5847            |this, src, dst| {
5848                this.assembler
5849                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5850            },
5851        )
5852    }
5853    // i64 atomic And with u16
5854    fn i64_atomic_and_16u(
5855        &mut self,
5856        loc: Location,
5857        target: Location,
5858        memarg: &MemArg,
5859        ret: Location,
5860        need_check: bool,
5861        imported_memories: bool,
5862        offset: i32,
5863        heap_access_oob: Label,
5864        unaligned_atomic: Label,
5865    ) -> Result<(), CompileError> {
5866        self.emit_compare_and_swap(
5867            loc,
5868            target,
5869            ret,
5870            memarg,
5871            2,
5872            Size::S16,
5873            Size::S64,
5874            need_check,
5875            imported_memories,
5876            offset,
5877            heap_access_oob,
5878            unaligned_atomic,
5879            |this, src, dst| {
5880                this.assembler
5881                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5882            },
5883        )
5884    }
5885    // i64 atomic And with u32
5886    fn i64_atomic_and_32u(
5887        &mut self,
5888        loc: Location,
5889        target: Location,
5890        memarg: &MemArg,
5891        ret: Location,
5892        need_check: bool,
5893        imported_memories: bool,
5894        offset: i32,
5895        heap_access_oob: Label,
5896        unaligned_atomic: Label,
5897    ) -> Result<(), CompileError> {
5898        self.emit_compare_and_swap(
5899            loc,
5900            target,
5901            ret,
5902            memarg,
5903            4,
5904            Size::S32,
5905            Size::S64,
5906            need_check,
5907            imported_memories,
5908            offset,
5909            heap_access_oob,
5910            unaligned_atomic,
5911            |this, src, dst| {
5912                this.assembler
5913                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5914            },
5915        )
5916    }
5917    // i64 atomic Or with i64
5918    fn i64_atomic_or(
5919        &mut self,
5920        loc: Location,
5921        target: Location,
5922        memarg: &MemArg,
5923        ret: Location,
5924        need_check: bool,
5925        imported_memories: bool,
5926        offset: i32,
5927        heap_access_oob: Label,
5928        unaligned_atomic: Label,
5929    ) -> Result<(), CompileError> {
5930        self.emit_compare_and_swap(
5931            loc,
5932            target,
5933            ret,
5934            memarg,
5935            8,
5936            Size::S64,
5937            Size::S64,
5938            need_check,
5939            imported_memories,
5940            offset,
5941            heap_access_oob,
5942            unaligned_atomic,
5943            |this, src, dst| {
5944                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
5945            },
5946        )
5947    }
5948    // i64 atomic Or with u8
5949    fn i64_atomic_or_8u(
5950        &mut self,
5951        loc: Location,
5952        target: Location,
5953        memarg: &MemArg,
5954        ret: Location,
5955        need_check: bool,
5956        imported_memories: bool,
5957        offset: i32,
5958        heap_access_oob: Label,
5959        unaligned_atomic: Label,
5960    ) -> Result<(), CompileError> {
5961        self.emit_compare_and_swap(
5962            loc,
5963            target,
5964            ret,
5965            memarg,
5966            1,
5967            Size::S8,
5968            Size::S64,
5969            need_check,
5970            imported_memories,
5971            offset,
5972            heap_access_oob,
5973            unaligned_atomic,
5974            |this, src, dst| {
5975                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
5976            },
5977        )
5978    }
5979    // i64 atomic Or with u16
5980    fn i64_atomic_or_16u(
5981        &mut self,
5982        loc: Location,
5983        target: Location,
5984        memarg: &MemArg,
5985        ret: Location,
5986        need_check: bool,
5987        imported_memories: bool,
5988        offset: i32,
5989        heap_access_oob: Label,
5990        unaligned_atomic: Label,
5991    ) -> Result<(), CompileError> {
5992        self.emit_compare_and_swap(
5993            loc,
5994            target,
5995            ret,
5996            memarg,
5997            2,
5998            Size::S16,
5999            Size::S64,
6000            need_check,
6001            imported_memories,
6002            offset,
6003            heap_access_oob,
6004            unaligned_atomic,
6005            |this, src, dst| {
6006                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6007            },
6008        )
6009    }
6010    // i64 atomic Or with u32
6011    fn i64_atomic_or_32u(
6012        &mut self,
6013        loc: Location,
6014        target: Location,
6015        memarg: &MemArg,
6016        ret: Location,
6017        need_check: bool,
6018        imported_memories: bool,
6019        offset: i32,
6020        heap_access_oob: Label,
6021        unaligned_atomic: Label,
6022    ) -> Result<(), CompileError> {
6023        self.emit_compare_and_swap(
6024            loc,
6025            target,
6026            ret,
6027            memarg,
6028            4,
6029            Size::S32,
6030            Size::S64,
6031            need_check,
6032            imported_memories,
6033            offset,
6034            heap_access_oob,
6035            unaligned_atomic,
6036            |this, src, dst| {
6037                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6038            },
6039        )
6040    }
6041    // i64 atomic xor with i64
6042    fn i64_atomic_xor(
6043        &mut self,
6044        loc: Location,
6045        target: Location,
6046        memarg: &MemArg,
6047        ret: Location,
6048        need_check: bool,
6049        imported_memories: bool,
6050        offset: i32,
6051        heap_access_oob: Label,
6052        unaligned_atomic: Label,
6053    ) -> Result<(), CompileError> {
6054        self.emit_compare_and_swap(
6055            loc,
6056            target,
6057            ret,
6058            memarg,
6059            8,
6060            Size::S64,
6061            Size::S64,
6062            need_check,
6063            imported_memories,
6064            offset,
6065            heap_access_oob,
6066            unaligned_atomic,
6067            |this, src, dst| {
6068                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6069            },
6070        )
6071    }
6072    // i64 atomic xor with u8
6073    fn i64_atomic_xor_8u(
6074        &mut self,
6075        loc: Location,
6076        target: Location,
6077        memarg: &MemArg,
6078        ret: Location,
6079        need_check: bool,
6080        imported_memories: bool,
6081        offset: i32,
6082        heap_access_oob: Label,
6083        unaligned_atomic: Label,
6084    ) -> Result<(), CompileError> {
6085        self.emit_compare_and_swap(
6086            loc,
6087            target,
6088            ret,
6089            memarg,
6090            1,
6091            Size::S8,
6092            Size::S64,
6093            need_check,
6094            imported_memories,
6095            offset,
6096            heap_access_oob,
6097            unaligned_atomic,
6098            |this, src, dst| {
6099                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6100            },
6101        )
6102    }
6103    // i64 atomic xor with u16
6104    fn i64_atomic_xor_16u(
6105        &mut self,
6106        loc: Location,
6107        target: Location,
6108        memarg: &MemArg,
6109        ret: Location,
6110        need_check: bool,
6111        imported_memories: bool,
6112        offset: i32,
6113        heap_access_oob: Label,
6114        unaligned_atomic: Label,
6115    ) -> Result<(), CompileError> {
6116        self.emit_compare_and_swap(
6117            loc,
6118            target,
6119            ret,
6120            memarg,
6121            2,
6122            Size::S16,
6123            Size::S64,
6124            need_check,
6125            imported_memories,
6126            offset,
6127            heap_access_oob,
6128            unaligned_atomic,
6129            |this, src, dst| {
6130                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6131            },
6132        )
6133    }
6134    // i64 atomic xor with u32
6135    fn i64_atomic_xor_32u(
6136        &mut self,
6137        loc: Location,
6138        target: Location,
6139        memarg: &MemArg,
6140        ret: Location,
6141        need_check: bool,
6142        imported_memories: bool,
6143        offset: i32,
6144        heap_access_oob: Label,
6145        unaligned_atomic: Label,
6146    ) -> Result<(), CompileError> {
6147        self.emit_compare_and_swap(
6148            loc,
6149            target,
6150            ret,
6151            memarg,
6152            4,
6153            Size::S32,
6154            Size::S64,
6155            need_check,
6156            imported_memories,
6157            offset,
6158            heap_access_oob,
6159            unaligned_atomic,
6160            |this, src, dst| {
6161                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6162            },
6163        )
6164    }
6165    // i64 atomic Exchange with i64
6166    fn i64_atomic_xchg(
6167        &mut self,
6168        loc: Location,
6169        target: Location,
6170        memarg: &MemArg,
6171        ret: Location,
6172        need_check: bool,
6173        imported_memories: bool,
6174        offset: i32,
6175        heap_access_oob: Label,
6176        unaligned_atomic: Label,
6177    ) -> Result<(), CompileError> {
6178        let value = self.acquire_temp_gpr().ok_or_else(|| {
6179            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6180        })?;
6181        self.move_location(Size::S64, loc, Location::GPR(value))?;
6182        self.memory_op(
6183            target,
6184            memarg,
6185            true,
6186            8,
6187            need_check,
6188            imported_memories,
6189            offset,
6190            heap_access_oob,
6191            unaligned_atomic,
6192            |this, addr| {
6193                this.assembler
6194                    .emit_xchg(Size::S64, Location::GPR(value), Location::Memory(addr, 0))
6195            },
6196        )?;
6197        self.move_location(Size::S64, Location::GPR(value), ret)?;
6198        self.release_gpr(value);
6199        Ok(())
6200    }
6201    // i64 atomic Exchange with u8
6202    fn i64_atomic_xchg_8u(
6203        &mut self,
6204        loc: Location,
6205        target: Location,
6206        memarg: &MemArg,
6207        ret: Location,
6208        need_check: bool,
6209        imported_memories: bool,
6210        offset: i32,
6211        heap_access_oob: Label,
6212        unaligned_atomic: Label,
6213    ) -> Result<(), CompileError> {
6214        let value = self.acquire_temp_gpr().ok_or_else(|| {
6215            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6216        })?;
6217        self.assembler
6218            .emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value))?;
6219        self.memory_op(
6220            target,
6221            memarg,
6222            true,
6223            1,
6224            need_check,
6225            imported_memories,
6226            offset,
6227            heap_access_oob,
6228            unaligned_atomic,
6229            |this, addr| {
6230                this.assembler
6231                    .emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0))
6232            },
6233        )?;
6234        self.move_location(Size::S64, Location::GPR(value), ret)?;
6235        self.release_gpr(value);
6236        Ok(())
6237    }
6238    // i64 atomic Exchange with u16
6239    fn i64_atomic_xchg_16u(
6240        &mut self,
6241        loc: Location,
6242        target: Location,
6243        memarg: &MemArg,
6244        ret: Location,
6245        need_check: bool,
6246        imported_memories: bool,
6247        offset: i32,
6248        heap_access_oob: Label,
6249        unaligned_atomic: Label,
6250    ) -> Result<(), CompileError> {
6251        let value = self.acquire_temp_gpr().ok_or_else(|| {
6252            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6253        })?;
6254        self.assembler
6255            .emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value))?;
6256        self.memory_op(
6257            target,
6258            memarg,
6259            true,
6260            2,
6261            need_check,
6262            imported_memories,
6263            offset,
6264            heap_access_oob,
6265            unaligned_atomic,
6266            |this, addr| {
6267                this.assembler
6268                    .emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0))
6269            },
6270        )?;
6271        self.move_location(Size::S64, Location::GPR(value), ret)?;
6272        self.release_gpr(value);
6273        Ok(())
6274    }
6275    // i64 atomic Exchange with u32
6276    fn i64_atomic_xchg_32u(
6277        &mut self,
6278        loc: Location,
6279        target: Location,
6280        memarg: &MemArg,
6281        ret: Location,
6282        need_check: bool,
6283        imported_memories: bool,
6284        offset: i32,
6285        heap_access_oob: Label,
6286        unaligned_atomic: Label,
6287    ) -> Result<(), CompileError> {
6288        let value = self.acquire_temp_gpr().ok_or_else(|| {
6289            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6290        })?;
6291        self.assembler
6292            .emit_movzx(Size::S32, loc, Size::S64, Location::GPR(value))?;
6293        self.memory_op(
6294            target,
6295            memarg,
6296            true,
6297            4,
6298            need_check,
6299            imported_memories,
6300            offset,
6301            heap_access_oob,
6302            unaligned_atomic,
6303            |this, addr| {
6304                this.assembler
6305                    .emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0))
6306            },
6307        )?;
6308        self.move_location(Size::S64, Location::GPR(value), ret)?;
6309        self.release_gpr(value);
6310        Ok(())
6311    }
6312    // i64 atomic Exchange with i64
6313    fn i64_atomic_cmpxchg(
6314        &mut self,
6315        new: Location,
6316        cmp: Location,
6317        target: Location,
6318        memarg: &MemArg,
6319        ret: Location,
6320        need_check: bool,
6321        imported_memories: bool,
6322        offset: i32,
6323        heap_access_oob: Label,
6324        unaligned_atomic: Label,
6325    ) -> Result<(), CompileError> {
6326        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6327        let value = if cmp == Location::GPR(GPR::R14) {
6328            if new == Location::GPR(GPR::R13) {
6329                GPR::R12
6330            } else {
6331                GPR::R13
6332            }
6333        } else {
6334            GPR::R14
6335        };
6336        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6337        self.assembler
6338            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6339        self.assembler
6340            .emit_mov(Size::S64, new, Location::GPR(value))?;
6341
6342        self.memory_op(
6343            target,
6344            memarg,
6345            true,
6346            8,
6347            need_check,
6348            imported_memories,
6349            offset,
6350            heap_access_oob,
6351            unaligned_atomic,
6352            |this, addr| {
6353                this.assembler.emit_lock_cmpxchg(
6354                    Size::S64,
6355                    Location::GPR(value),
6356                    Location::Memory(addr, 0),
6357                )?;
6358                this.assembler
6359                    .emit_mov(Size::S64, Location::GPR(compare), ret)
6360            },
6361        )?;
6362        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6363        self.release_gpr(compare);
6364        Ok(())
6365    }
6366    // i64 atomic Exchange with u8
6367    fn i64_atomic_cmpxchg_8u(
6368        &mut self,
6369        new: Location,
6370        cmp: Location,
6371        target: Location,
6372        memarg: &MemArg,
6373        ret: Location,
6374        need_check: bool,
6375        imported_memories: bool,
6376        offset: i32,
6377        heap_access_oob: Label,
6378        unaligned_atomic: Label,
6379    ) -> Result<(), CompileError> {
6380        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6381        let value = if cmp == Location::GPR(GPR::R14) {
6382            if new == Location::GPR(GPR::R13) {
6383                GPR::R12
6384            } else {
6385                GPR::R13
6386            }
6387        } else {
6388            GPR::R14
6389        };
6390        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6391        self.assembler
6392            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6393        self.assembler
6394            .emit_mov(Size::S64, new, Location::GPR(value))?;
6395
6396        self.memory_op(
6397            target,
6398            memarg,
6399            true,
6400            1,
6401            need_check,
6402            imported_memories,
6403            offset,
6404            heap_access_oob,
6405            unaligned_atomic,
6406            |this, addr| {
6407                this.assembler.emit_lock_cmpxchg(
6408                    Size::S8,
6409                    Location::GPR(value),
6410                    Location::Memory(addr, 0),
6411                )?;
6412                this.assembler
6413                    .emit_movzx(Size::S8, Location::GPR(compare), Size::S64, ret)
6414            },
6415        )?;
6416        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6417        self.release_gpr(compare);
6418        Ok(())
6419    }
6420    // i64 atomic Exchange with u16
6421    fn i64_atomic_cmpxchg_16u(
6422        &mut self,
6423        new: Location,
6424        cmp: Location,
6425        target: Location,
6426        memarg: &MemArg,
6427        ret: Location,
6428        need_check: bool,
6429        imported_memories: bool,
6430        offset: i32,
6431        heap_access_oob: Label,
6432        unaligned_atomic: Label,
6433    ) -> Result<(), CompileError> {
6434        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6435        let value = if cmp == Location::GPR(GPR::R14) {
6436            if new == Location::GPR(GPR::R13) {
6437                GPR::R12
6438            } else {
6439                GPR::R13
6440            }
6441        } else {
6442            GPR::R14
6443        };
6444        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6445        self.assembler
6446            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6447        self.assembler
6448            .emit_mov(Size::S64, new, Location::GPR(value))?;
6449
6450        self.memory_op(
6451            target,
6452            memarg,
6453            true,
6454            2,
6455            need_check,
6456            imported_memories,
6457            offset,
6458            heap_access_oob,
6459            unaligned_atomic,
6460            |this, addr| {
6461                this.assembler.emit_lock_cmpxchg(
6462                    Size::S16,
6463                    Location::GPR(value),
6464                    Location::Memory(addr, 0),
6465                )?;
6466                this.assembler
6467                    .emit_movzx(Size::S16, Location::GPR(compare), Size::S64, ret)
6468            },
6469        )?;
6470        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6471        self.release_gpr(compare);
6472        Ok(())
6473    }
6474    // i64 atomic Exchange with u32
6475    fn i64_atomic_cmpxchg_32u(
6476        &mut self,
6477        new: Location,
6478        cmp: Location,
6479        target: Location,
6480        memarg: &MemArg,
6481        ret: Location,
6482        need_check: bool,
6483        imported_memories: bool,
6484        offset: i32,
6485        heap_access_oob: Label,
6486        unaligned_atomic: Label,
6487    ) -> Result<(), CompileError> {
6488        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6489        let value = if cmp == Location::GPR(GPR::R14) {
6490            if new == Location::GPR(GPR::R13) {
6491                GPR::R12
6492            } else {
6493                GPR::R13
6494            }
6495        } else {
6496            GPR::R14
6497        };
6498        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6499        self.assembler
6500            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6501        self.assembler
6502            .emit_mov(Size::S64, new, Location::GPR(value))?;
6503
6504        self.memory_op(
6505            target,
6506            memarg,
6507            true,
6508            4,
6509            need_check,
6510            imported_memories,
6511            offset,
6512            heap_access_oob,
6513            unaligned_atomic,
6514            |this, addr| {
6515                this.assembler.emit_lock_cmpxchg(
6516                    Size::S32,
6517                    Location::GPR(value),
6518                    Location::Memory(addr, 0),
6519                )?;
6520                this.assembler
6521                    .emit_mov(Size::S32, Location::GPR(compare), ret)
6522            },
6523        )?;
6524        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6525        self.release_gpr(compare);
6526        Ok(())
6527    }
6528
6529    fn f32_load(
6530        &mut self,
6531        addr: Location,
6532        memarg: &MemArg,
6533        ret: Location,
6534        need_check: bool,
6535        imported_memories: bool,
6536        offset: i32,
6537        heap_access_oob: Label,
6538        unaligned_atomic: Label,
6539    ) -> Result<(), CompileError> {
6540        self.memory_op(
6541            addr,
6542            memarg,
6543            false,
6544            4,
6545            need_check,
6546            imported_memories,
6547            offset,
6548            heap_access_oob,
6549            unaligned_atomic,
6550            |this, addr| {
6551                this.emit_relaxed_binop(
6552                    AssemblerX64::emit_mov,
6553                    Size::S32,
6554                    Location::Memory(addr, 0),
6555                    ret,
6556                )
6557            },
6558        )
6559    }
6560    fn f32_save(
6561        &mut self,
6562        target_value: Location,
6563        memarg: &MemArg,
6564        target_addr: Location,
6565        canonicalize: bool,
6566        need_check: bool,
6567        imported_memories: bool,
6568        offset: i32,
6569        heap_access_oob: Label,
6570        unaligned_atomic: Label,
6571    ) -> Result<(), CompileError> {
6572        self.memory_op(
6573            target_addr,
6574            memarg,
6575            false,
6576            4,
6577            need_check,
6578            imported_memories,
6579            offset,
6580            heap_access_oob,
6581            unaligned_atomic,
6582            |this, addr| {
6583                if !canonicalize {
6584                    this.emit_relaxed_binop(
6585                        AssemblerX64::emit_mov,
6586                        Size::S32,
6587                        target_value,
6588                        Location::Memory(addr, 0),
6589                    )
6590                } else {
6591                    this.canonicalize_nan(Size::S32, target_value, Location::Memory(addr, 0))
6592                }
6593            },
6594        )
6595    }
6596    fn f64_load(
6597        &mut self,
6598        addr: Location,
6599        memarg: &MemArg,
6600        ret: Location,
6601        need_check: bool,
6602        imported_memories: bool,
6603        offset: i32,
6604        heap_access_oob: Label,
6605        unaligned_atomic: Label,
6606    ) -> Result<(), CompileError> {
6607        self.memory_op(
6608            addr,
6609            memarg,
6610            false,
6611            8,
6612            need_check,
6613            imported_memories,
6614            offset,
6615            heap_access_oob,
6616            unaligned_atomic,
6617            |this, addr| {
6618                this.emit_relaxed_binop(
6619                    AssemblerX64::emit_mov,
6620                    Size::S64,
6621                    Location::Memory(addr, 0),
6622                    ret,
6623                )
6624            },
6625        )
6626    }
6627    fn f64_save(
6628        &mut self,
6629        target_value: Location,
6630        memarg: &MemArg,
6631        target_addr: Location,
6632        canonicalize: bool,
6633        need_check: bool,
6634        imported_memories: bool,
6635        offset: i32,
6636        heap_access_oob: Label,
6637        unaligned_atomic: Label,
6638    ) -> Result<(), CompileError> {
6639        self.memory_op(
6640            target_addr,
6641            memarg,
6642            false,
6643            8,
6644            need_check,
6645            imported_memories,
6646            offset,
6647            heap_access_oob,
6648            unaligned_atomic,
6649            |this, addr| {
6650                if !canonicalize {
6651                    this.emit_relaxed_binop(
6652                        AssemblerX64::emit_mov,
6653                        Size::S64,
6654                        target_value,
6655                        Location::Memory(addr, 0),
6656                    )
6657                } else {
6658                    this.canonicalize_nan(Size::S64, target_value, Location::Memory(addr, 0))
6659                }
6660            },
6661        )
6662    }
6663
6664    fn convert_f64_i64(
6665        &mut self,
6666        loc: Location,
6667        signed: bool,
6668        ret: Location,
6669    ) -> Result<(), CompileError> {
6670        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6671            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6672        })?;
6673        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6674            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6675        })?;
6676        if self.assembler.arch_has_fconverti() {
6677            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6678            if signed {
6679                self.assembler.arch_emit_f64_convert_si64(tmp_in, tmp_out)?;
6680            } else {
6681                self.assembler.arch_emit_f64_convert_ui64(tmp_in, tmp_out)?;
6682            }
6683            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp_out), ret)?;
6684        } else if signed {
6685            self.assembler
6686                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6687            self.assembler
6688                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6689            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6690        } else {
6691            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6692                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6693            })?;
6694
6695            let do_convert = self.assembler.get_label();
6696            let end_convert = self.assembler.get_label();
6697
6698            self.assembler
6699                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6700            self.assembler.emit_test_gpr_64(tmp_in)?;
6701            self.assembler.emit_jmp(Condition::Signed, do_convert)?;
6702            self.assembler
6703                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6704            self.assembler.emit_jmp(Condition::None, end_convert)?;
6705            self.emit_label(do_convert)?;
6706            self.move_location(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp))?;
6707            self.assembler
6708                .emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp))?;
6709            self.assembler
6710                .emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in))?;
6711            self.assembler
6712                .emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in))?;
6713            self.assembler
6714                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6715            self.assembler
6716                .emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out)?;
6717            self.emit_label(end_convert)?;
6718            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6719
6720            self.release_gpr(tmp);
6721        }
6722        self.release_gpr(tmp_in);
6723        self.release_simd(tmp_out);
6724        Ok(())
6725    }
6726    fn convert_f64_i32(
6727        &mut self,
6728        loc: Location,
6729        signed: bool,
6730        ret: Location,
6731    ) -> Result<(), CompileError> {
6732        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6733            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6734        })?;
6735        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6736            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6737        })?;
6738        if self.assembler.arch_has_fconverti() {
6739            self.emit_relaxed_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6740            if signed {
6741                self.assembler.arch_emit_f64_convert_si32(tmp_in, tmp_out)?;
6742            } else {
6743                self.assembler.arch_emit_f64_convert_ui32(tmp_in, tmp_out)?;
6744            }
6745            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp_out), ret)?;
6746        } else {
6747            self.assembler
6748                .emit_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6749            if signed {
6750                self.assembler
6751                    .emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6752            } else {
6753                self.assembler
6754                    .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6755            }
6756            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6757        }
6758        self.release_gpr(tmp_in);
6759        self.release_simd(tmp_out);
6760        Ok(())
6761    }
6762    fn convert_f32_i64(
6763        &mut self,
6764        loc: Location,
6765        signed: bool,
6766        ret: Location,
6767    ) -> Result<(), CompileError> {
6768        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6769            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6770        })?;
6771        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6772            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6773        })?;
6774        if self.assembler.arch_has_fconverti() {
6775            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6776            if signed {
6777                self.assembler.arch_emit_f32_convert_si64(tmp_in, tmp_out)?;
6778            } else {
6779                self.assembler.arch_emit_f32_convert_ui64(tmp_in, tmp_out)?;
6780            }
6781            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp_out), ret)?;
6782        } else if signed {
6783            self.assembler
6784                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6785            self.assembler
6786                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6787            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6788        } else {
6789            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6790                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6791            })?;
6792
6793            let do_convert = self.assembler.get_label();
6794            let end_convert = self.assembler.get_label();
6795
6796            self.assembler
6797                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6798            self.assembler.emit_test_gpr_64(tmp_in)?;
6799            self.assembler.emit_jmp(Condition::Signed, do_convert)?;
6800            self.assembler
6801                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6802            self.assembler.emit_jmp(Condition::None, end_convert)?;
6803            self.emit_label(do_convert)?;
6804            self.move_location(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp))?;
6805            self.assembler
6806                .emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp))?;
6807            self.assembler
6808                .emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in))?;
6809            self.assembler
6810                .emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in))?;
6811            self.assembler
6812                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6813            self.assembler
6814                .emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out)?;
6815            self.emit_label(end_convert)?;
6816            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6817
6818            self.release_gpr(tmp);
6819        }
6820        self.release_gpr(tmp_in);
6821        self.release_simd(tmp_out);
6822        Ok(())
6823    }
6824    fn convert_f32_i32(
6825        &mut self,
6826        loc: Location,
6827        signed: bool,
6828        ret: Location,
6829    ) -> Result<(), CompileError> {
6830        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6831            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6832        })?;
6833        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6834            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6835        })?;
6836        if self.assembler.arch_has_fconverti() {
6837            self.emit_relaxed_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6838            if signed {
6839                self.assembler.arch_emit_f32_convert_si32(tmp_in, tmp_out)?;
6840            } else {
6841                self.assembler.arch_emit_f32_convert_ui32(tmp_in, tmp_out)?;
6842            }
6843            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp_out), ret)?;
6844        } else {
6845            self.assembler
6846                .emit_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6847            if signed {
6848                self.assembler
6849                    .emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6850            } else {
6851                self.assembler
6852                    .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6853            }
6854            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6855        }
6856        self.release_gpr(tmp_in);
6857        self.release_simd(tmp_out);
6858        Ok(())
6859    }
6860    fn convert_i64_f64(
6861        &mut self,
6862        loc: Location,
6863        ret: Location,
6864        signed: bool,
6865        sat: bool,
6866    ) -> Result<(), CompileError> {
6867        match (signed, sat) {
6868            (false, true) => self.convert_i64_f64_u_s(loc, ret),
6869            (false, false) => self.convert_i64_f64_u_u(loc, ret),
6870            (true, true) => self.convert_i64_f64_s_s(loc, ret),
6871            (true, false) => self.convert_i64_f64_s_u(loc, ret),
6872        }
6873    }
6874    fn convert_i32_f64(
6875        &mut self,
6876        loc: Location,
6877        ret: Location,
6878        signed: bool,
6879        sat: bool,
6880    ) -> Result<(), CompileError> {
6881        match (signed, sat) {
6882            (false, true) => self.convert_i32_f64_u_s(loc, ret),
6883            (false, false) => self.convert_i32_f64_u_u(loc, ret),
6884            (true, true) => self.convert_i32_f64_s_s(loc, ret),
6885            (true, false) => self.convert_i32_f64_s_u(loc, ret),
6886        }
6887    }
6888    fn convert_i64_f32(
6889        &mut self,
6890        loc: Location,
6891        ret: Location,
6892        signed: bool,
6893        sat: bool,
6894    ) -> Result<(), CompileError> {
6895        match (signed, sat) {
6896            (false, true) => self.convert_i64_f32_u_s(loc, ret),
6897            (false, false) => self.convert_i64_f32_u_u(loc, ret),
6898            (true, true) => self.convert_i64_f32_s_s(loc, ret),
6899            (true, false) => self.convert_i64_f32_s_u(loc, ret),
6900        }
6901    }
6902    fn convert_i32_f32(
6903        &mut self,
6904        loc: Location,
6905        ret: Location,
6906        signed: bool,
6907        sat: bool,
6908    ) -> Result<(), CompileError> {
6909        match (signed, sat) {
6910            (false, true) => self.convert_i32_f32_u_s(loc, ret),
6911            (false, false) => self.convert_i32_f32_u_u(loc, ret),
6912            (true, true) => self.convert_i32_f32_s_s(loc, ret),
6913            (true, false) => self.convert_i32_f32_s_u(loc, ret),
6914        }
6915    }
6916    fn convert_f64_f32(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6917        self.emit_relaxed_avx(AssemblerX64::emit_vcvtss2sd, loc, loc, ret)
6918    }
6919    fn convert_f32_f64(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6920        self.emit_relaxed_avx(AssemblerX64::emit_vcvtsd2ss, loc, loc, ret)
6921    }
6922    fn f64_neg(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6923        if self.assembler.arch_has_fneg() {
6924            let tmp = self.acquire_temp_simd().ok_or_else(|| {
6925                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6926            })?;
6927            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp))?;
6928            self.assembler.arch_emit_f64_neg(tmp, tmp)?;
6929            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp), ret)?;
6930            self.release_simd(tmp);
6931        } else {
6932            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6933                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6934            })?;
6935            self.move_location(Size::S64, loc, Location::GPR(tmp))?;
6936            self.assembler.emit_btc_gpr_imm8_64(63, tmp)?;
6937            self.move_location(Size::S64, Location::GPR(tmp), ret)?;
6938            self.release_gpr(tmp);
6939        }
6940        Ok(())
6941    }
6942    fn f64_abs(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6943        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6944            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6945        })?;
6946        let c = self.acquire_temp_gpr().ok_or_else(|| {
6947            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6948        })?;
6949
6950        self.move_location(Size::S64, loc, Location::GPR(tmp))?;
6951        self.move_location(
6952            Size::S64,
6953            Location::Imm64(0x7fffffffffffffffu64),
6954            Location::GPR(c),
6955        )?;
6956        self.assembler
6957            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp))?;
6958        self.move_location(Size::S64, Location::GPR(tmp), ret)?;
6959
6960        self.release_gpr(c);
6961        self.release_gpr(tmp);
6962        Ok(())
6963    }
6964    fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) -> Result<(), CompileError> {
6965        let c = self.acquire_temp_gpr().ok_or_else(|| {
6966            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6967        })?;
6968
6969        self.move_location(
6970            Size::S64,
6971            Location::Imm64(0x7fffffffffffffffu64),
6972            Location::GPR(c),
6973        )?;
6974        self.assembler
6975            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp1))?;
6976
6977        self.move_location(
6978            Size::S64,
6979            Location::Imm64(0x8000000000000000u64),
6980            Location::GPR(c),
6981        )?;
6982        self.assembler
6983            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp2))?;
6984
6985        self.assembler
6986            .emit_or(Size::S64, Location::GPR(tmp2), Location::GPR(tmp1))?;
6987
6988        self.release_gpr(c);
6989        Ok(())
6990    }
6991    fn f64_sqrt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6992        self.emit_relaxed_avx(AssemblerX64::emit_vsqrtsd, loc, loc, ret)
6993    }
6994    fn f64_trunc(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6995        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_trunc, loc, loc, ret)
6996    }
6997    fn f64_ceil(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
6998        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_ceil, loc, loc, ret)
6999    }
7000    fn f64_floor(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7001        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_floor, loc, loc, ret)
7002    }
7003    fn f64_nearest(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7004        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_nearest, loc, loc, ret)
7005    }
7006    fn f64_cmp_ge(
7007        &mut self,
7008        loc_a: Location,
7009        loc_b: Location,
7010        ret: Location,
7011    ) -> Result<(), CompileError> {
7012        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgesd, loc_a, loc_b, ret)?;
7013        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7014    }
7015    fn f64_cmp_gt(
7016        &mut self,
7017        loc_a: Location,
7018        loc_b: Location,
7019        ret: Location,
7020    ) -> Result<(), CompileError> {
7021        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtsd, loc_a, loc_b, ret)?;
7022        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7023    }
7024    fn f64_cmp_le(
7025        &mut self,
7026        loc_a: Location,
7027        loc_b: Location,
7028        ret: Location,
7029    ) -> Result<(), CompileError> {
7030        self.emit_relaxed_avx(AssemblerX64::emit_vcmplesd, loc_a, loc_b, ret)?;
7031        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7032    }
7033    fn f64_cmp_lt(
7034        &mut self,
7035        loc_a: Location,
7036        loc_b: Location,
7037        ret: Location,
7038    ) -> Result<(), CompileError> {
7039        self.emit_relaxed_avx(AssemblerX64::emit_vcmpltsd, loc_a, loc_b, ret)?;
7040        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7041    }
7042    fn f64_cmp_ne(
7043        &mut self,
7044        loc_a: Location,
7045        loc_b: Location,
7046        ret: Location,
7047    ) -> Result<(), CompileError> {
7048        self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqsd, loc_a, loc_b, ret)?;
7049        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7050    }
7051    fn f64_cmp_eq(
7052        &mut self,
7053        loc_a: Location,
7054        loc_b: Location,
7055        ret: Location,
7056    ) -> Result<(), CompileError> {
7057        self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqsd, loc_a, loc_b, ret)?;
7058        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7059    }
7060    fn f64_min(
7061        &mut self,
7062        loc_a: Location,
7063        loc_b: Location,
7064        ret: Location,
7065    ) -> Result<(), CompileError> {
7066        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7067        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7068            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7069        })?;
7070        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7071            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7072        })?;
7073        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7074            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7075        })?;
7076        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7077            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7078        })?;
7079
7080        let src1 = match loc_a {
7081            Location::SIMD(x) => x,
7082            Location::GPR(_) | Location::Memory(_, _) => {
7083                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7084                tmp1
7085            }
7086            Location::Imm32(_) => {
7087                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7088                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7089                tmp1
7090            }
7091            Location::Imm64(_) => {
7092                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7093                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7094                tmp1
7095            }
7096            _ => {
7097                codegen_error!("singlepass f64_min unreachable");
7098            }
7099        };
7100        let src2 = match loc_b {
7101            Location::SIMD(x) => x,
7102            Location::GPR(_) | Location::Memory(_, _) => {
7103                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7104                tmp2
7105            }
7106            Location::Imm32(_) => {
7107                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7108                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7109                tmp2
7110            }
7111            Location::Imm64(_) => {
7112                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7113                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7114                tmp2
7115            }
7116            _ => {
7117                codegen_error!("singlepass f64_min unreachable");
7118            }
7119        };
7120
7121        let tmp_xmm1 = XMM::XMM8;
7122        let tmp_xmm2 = XMM::XMM9;
7123        let tmp_xmm3 = XMM::XMM10;
7124
7125        self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1))?;
7126        self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2))?;
7127        self.assembler
7128            .emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7129        self.assembler
7130            .emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7131        let label1 = self.assembler.get_label();
7132        let label2 = self.assembler.get_label();
7133        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7134        self.assembler
7135            .emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7136        self.assembler.emit_jmp(Condition::None, label2)?;
7137        self.emit_label(label1)?;
7138        // load float -0.0
7139        self.move_location(
7140            Size::S64,
7141            Location::Imm64(0x8000_0000_0000_0000), // Negative zero
7142            Location::GPR(tmpg1),
7143        )?;
7144        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2))?;
7145        self.emit_label(label2)?;
7146        self.assembler
7147            .emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7148        self.assembler
7149            .emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7150        self.assembler
7151            .emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1)?;
7152        // load float canonical nan
7153        self.move_location(
7154            Size::S64,
7155            Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
7156            Location::GPR(tmpg1),
7157        )?;
7158        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7159        self.assembler
7160            .emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7161        match ret {
7162            Location::SIMD(x) => {
7163                self.assembler
7164                    .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7165            }
7166            Location::Memory(_, _) | Location::GPR(_) => {
7167                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7168            }
7169            _ => {
7170                codegen_error!("singlepass f64_min unreachable");
7171            }
7172        }
7173
7174        self.release_gpr(tmpg2);
7175        self.release_gpr(tmpg1);
7176        self.release_simd(tmp2);
7177        self.release_simd(tmp1);
7178        Ok(())
7179    }
7180    fn f64_max(
7181        &mut self,
7182        loc_a: Location,
7183        loc_b: Location,
7184        ret: Location,
7185    ) -> Result<(), CompileError> {
7186        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7187        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7188            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7189        })?;
7190        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7191            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7192        })?;
7193        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7194            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7195        })?;
7196        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7197            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7198        })?;
7199
7200        let src1 = match loc_a {
7201            Location::SIMD(x) => x,
7202            Location::GPR(_) | Location::Memory(_, _) => {
7203                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7204                tmp1
7205            }
7206            Location::Imm32(_) => {
7207                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7208                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7209                tmp1
7210            }
7211            Location::Imm64(_) => {
7212                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7213                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7214                tmp1
7215            }
7216            _ => {
7217                codegen_error!("singlepass f64_max unreachable");
7218            }
7219        };
7220        let src2 = match loc_b {
7221            Location::SIMD(x) => x,
7222            Location::GPR(_) | Location::Memory(_, _) => {
7223                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7224                tmp2
7225            }
7226            Location::Imm32(_) => {
7227                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7228                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7229                tmp2
7230            }
7231            Location::Imm64(_) => {
7232                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7233                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7234                tmp2
7235            }
7236            _ => {
7237                codegen_error!("singlepass f64_max unreachable");
7238            }
7239        };
7240
7241        let tmp_xmm1 = XMM::XMM8;
7242        let tmp_xmm2 = XMM::XMM9;
7243        let tmp_xmm3 = XMM::XMM10;
7244
7245        self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1))?;
7246        self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2))?;
7247        self.assembler
7248            .emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7249        self.assembler
7250            .emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7251        let label1 = self.assembler.get_label();
7252        let label2 = self.assembler.get_label();
7253        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7254        self.assembler
7255            .emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7256        self.assembler.emit_jmp(Condition::None, label2)?;
7257        self.emit_label(label1)?;
7258        self.assembler
7259            .emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2)?;
7260        self.emit_label(label2)?;
7261        self.assembler
7262            .emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7263        self.assembler
7264            .emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7265        self.assembler
7266            .emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1)?;
7267        // load float canonical nan
7268        self.move_location(
7269            Size::S64,
7270            Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
7271            Location::GPR(tmpg1),
7272        )?;
7273        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7274        self.assembler
7275            .emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7276        match ret {
7277            Location::SIMD(x) => {
7278                self.assembler
7279                    .emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7280            }
7281            Location::Memory(_, _) | Location::GPR(_) => {
7282                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7283            }
7284            _ => {
7285                codegen_error!("singlepass f64_max unreachable");
7286            }
7287        }
7288
7289        self.release_gpr(tmpg2);
7290        self.release_gpr(tmpg1);
7291        self.release_simd(tmp2);
7292        self.release_simd(tmp1);
7293        Ok(())
7294    }
7295    fn f64_add(
7296        &mut self,
7297        loc_a: Location,
7298        loc_b: Location,
7299        ret: Location,
7300    ) -> Result<(), CompileError> {
7301        self.emit_relaxed_avx(AssemblerX64::emit_vaddsd, loc_a, loc_b, ret)
7302    }
7303    fn f64_sub(
7304        &mut self,
7305        loc_a: Location,
7306        loc_b: Location,
7307        ret: Location,
7308    ) -> Result<(), CompileError> {
7309        self.emit_relaxed_avx(AssemblerX64::emit_vsubsd, loc_a, loc_b, ret)
7310    }
7311    fn f64_mul(
7312        &mut self,
7313        loc_a: Location,
7314        loc_b: Location,
7315        ret: Location,
7316    ) -> Result<(), CompileError> {
7317        self.emit_relaxed_avx(AssemblerX64::emit_vmulsd, loc_a, loc_b, ret)
7318    }
7319    fn f64_div(
7320        &mut self,
7321        loc_a: Location,
7322        loc_b: Location,
7323        ret: Location,
7324    ) -> Result<(), CompileError> {
7325        self.emit_relaxed_avx(AssemblerX64::emit_vdivsd, loc_a, loc_b, ret)
7326    }
7327    fn f32_neg(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7328        if self.assembler.arch_has_fneg() {
7329            let tmp = self.acquire_temp_simd().ok_or_else(|| {
7330                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7331            })?;
7332            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp))?;
7333            self.assembler.arch_emit_f32_neg(tmp, tmp)?;
7334            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp), ret)?;
7335            self.release_simd(tmp);
7336        } else {
7337            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7338                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7339            })?;
7340            self.move_location(Size::S32, loc, Location::GPR(tmp))?;
7341            self.assembler.emit_btc_gpr_imm8_32(31, tmp)?;
7342            self.move_location(Size::S32, Location::GPR(tmp), ret)?;
7343            self.release_gpr(tmp);
7344        }
7345        Ok(())
7346    }
7347    fn f32_abs(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7348        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7349            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7350        })?;
7351        self.move_location(Size::S32, loc, Location::GPR(tmp))?;
7352        self.assembler.emit_and(
7353            Size::S32,
7354            Location::Imm32(0x7fffffffu32),
7355            Location::GPR(tmp),
7356        )?;
7357        self.move_location(Size::S32, Location::GPR(tmp), ret)?;
7358        self.release_gpr(tmp);
7359        Ok(())
7360    }
7361    fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) -> Result<(), CompileError> {
7362        self.assembler.emit_and(
7363            Size::S32,
7364            Location::Imm32(0x7fffffffu32),
7365            Location::GPR(tmp1),
7366        )?;
7367        self.assembler.emit_and(
7368            Size::S32,
7369            Location::Imm32(0x80000000u32),
7370            Location::GPR(tmp2),
7371        )?;
7372        self.assembler
7373            .emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1))
7374    }
7375    fn f32_sqrt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7376        self.emit_relaxed_avx(AssemblerX64::emit_vsqrtss, loc, loc, ret)
7377    }
7378    fn f32_trunc(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7379        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_trunc, loc, loc, ret)
7380    }
7381    fn f32_ceil(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7382        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_ceil, loc, loc, ret)
7383    }
7384    fn f32_floor(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7385        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_floor, loc, loc, ret)
7386    }
7387    fn f32_nearest(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7388        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_nearest, loc, loc, ret)
7389    }
7390    fn f32_cmp_ge(
7391        &mut self,
7392        loc_a: Location,
7393        loc_b: Location,
7394        ret: Location,
7395    ) -> Result<(), CompileError> {
7396        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgess, loc_a, loc_b, ret)?;
7397        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7398    }
7399    fn f32_cmp_gt(
7400        &mut self,
7401        loc_a: Location,
7402        loc_b: Location,
7403        ret: Location,
7404    ) -> Result<(), CompileError> {
7405        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtss, loc_a, loc_b, ret)?;
7406        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7407    }
7408    fn f32_cmp_le(
7409        &mut self,
7410        loc_a: Location,
7411        loc_b: Location,
7412        ret: Location,
7413    ) -> Result<(), CompileError> {
7414        self.emit_relaxed_avx(AssemblerX64::emit_vcmpless, loc_a, loc_b, ret)?;
7415        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7416    }
7417    fn f32_cmp_lt(
7418        &mut self,
7419        loc_a: Location,
7420        loc_b: Location,
7421        ret: Location,
7422    ) -> Result<(), CompileError> {
7423        self.emit_relaxed_avx(AssemblerX64::emit_vcmpltss, loc_a, loc_b, ret)?;
7424        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7425    }
7426    fn f32_cmp_ne(
7427        &mut self,
7428        loc_a: Location,
7429        loc_b: Location,
7430        ret: Location,
7431    ) -> Result<(), CompileError> {
7432        self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqss, loc_a, loc_b, ret)?;
7433        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7434    }
7435    fn f32_cmp_eq(
7436        &mut self,
7437        loc_a: Location,
7438        loc_b: Location,
7439        ret: Location,
7440    ) -> Result<(), CompileError> {
7441        self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqss, loc_a, loc_b, ret)?;
7442        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7443    }
7444    fn f32_min(
7445        &mut self,
7446        loc_a: Location,
7447        loc_b: Location,
7448        ret: Location,
7449    ) -> Result<(), CompileError> {
7450        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7451        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7452            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7453        })?;
7454        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7455            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7456        })?;
7457        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7458            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7459        })?;
7460        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7461            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7462        })?;
7463
7464        let src1 = match loc_a {
7465            Location::SIMD(x) => x,
7466            Location::GPR(_) | Location::Memory(_, _) => {
7467                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7468                tmp1
7469            }
7470            Location::Imm32(_) => {
7471                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7472                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7473                tmp1
7474            }
7475            Location::Imm64(_) => {
7476                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7477                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7478                tmp1
7479            }
7480            _ => {
7481                codegen_error!("singlepass f32_min unreachable");
7482            }
7483        };
7484        let src2 = match loc_b {
7485            Location::SIMD(x) => x,
7486            Location::GPR(_) | Location::Memory(_, _) => {
7487                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7488                tmp2
7489            }
7490            Location::Imm32(_) => {
7491                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7492                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7493                tmp2
7494            }
7495            Location::Imm64(_) => {
7496                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7497                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7498                tmp2
7499            }
7500            _ => {
7501                codegen_error!("singlepass f32_min unreachable");
7502            }
7503        };
7504
7505        let tmp_xmm1 = XMM::XMM8;
7506        let tmp_xmm2 = XMM::XMM9;
7507        let tmp_xmm3 = XMM::XMM10;
7508
7509        self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1))?;
7510        self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2))?;
7511        self.assembler
7512            .emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7513        self.assembler
7514            .emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7515        let label1 = self.assembler.get_label();
7516        let label2 = self.assembler.get_label();
7517        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7518        self.assembler
7519            .emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7520        self.assembler.emit_jmp(Condition::None, label2)?;
7521        self.emit_label(label1)?;
7522        // load float -0.0
7523        self.move_location(
7524            Size::S64,
7525            Location::Imm32(0x8000_0000), // Negative zero
7526            Location::GPR(tmpg1),
7527        )?;
7528        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2))?;
7529        self.emit_label(label2)?;
7530        self.assembler
7531            .emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7532        self.assembler
7533            .emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7534        self.assembler
7535            .emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1)?;
7536        // load float canonical nan
7537        self.move_location(
7538            Size::S64,
7539            Location::Imm32(0x7FC0_0000), // Canonical NaN
7540            Location::GPR(tmpg1),
7541        )?;
7542        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7543        self.assembler
7544            .emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7545        match ret {
7546            Location::SIMD(x) => {
7547                self.assembler
7548                    .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7549            }
7550            Location::Memory(_, _) | Location::GPR(_) => {
7551                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7552            }
7553            _ => {
7554                codegen_error!("singlepass f32_min unreachable");
7555            }
7556        }
7557
7558        self.release_gpr(tmpg2);
7559        self.release_gpr(tmpg1);
7560        self.release_simd(tmp2);
7561        self.release_simd(tmp1);
7562        Ok(())
7563    }
7564    fn f32_max(
7565        &mut self,
7566        loc_a: Location,
7567        loc_b: Location,
7568        ret: Location,
7569    ) -> Result<(), CompileError> {
7570        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7571        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7572            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7573        })?;
7574        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7575            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7576        })?;
7577        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7578            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7579        })?;
7580        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7581            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7582        })?;
7583
7584        let src1 = match loc_a {
7585            Location::SIMD(x) => x,
7586            Location::GPR(_) | Location::Memory(_, _) => {
7587                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7588                tmp1
7589            }
7590            Location::Imm32(_) => {
7591                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7592                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7593                tmp1
7594            }
7595            Location::Imm64(_) => {
7596                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7597                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7598                tmp1
7599            }
7600            _ => {
7601                codegen_error!("singlepass f32_max unreachable");
7602            }
7603        };
7604        let src2 = match loc_b {
7605            Location::SIMD(x) => x,
7606            Location::GPR(_) | Location::Memory(_, _) => {
7607                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7608                tmp2
7609            }
7610            Location::Imm32(_) => {
7611                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7612                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7613                tmp2
7614            }
7615            Location::Imm64(_) => {
7616                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7617                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7618                tmp2
7619            }
7620            _ => {
7621                codegen_error!("singlepass f32_max unreachable");
7622            }
7623        };
7624
7625        let tmp_xmm1 = XMM::XMM8;
7626        let tmp_xmm2 = XMM::XMM9;
7627        let tmp_xmm3 = XMM::XMM10;
7628
7629        self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1))?;
7630        self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2))?;
7631        self.assembler
7632            .emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7633        self.assembler
7634            .emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7635        let label1 = self.assembler.get_label();
7636        let label2 = self.assembler.get_label();
7637        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7638        self.assembler
7639            .emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7640        self.assembler.emit_jmp(Condition::None, label2)?;
7641        self.emit_label(label1)?;
7642        self.assembler
7643            .emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2)?;
7644        self.emit_label(label2)?;
7645        self.assembler
7646            .emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7647        self.assembler
7648            .emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7649        self.assembler
7650            .emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1)?;
7651        // load float canonical nan
7652        self.move_location(
7653            Size::S64,
7654            Location::Imm32(0x7FC0_0000), // Canonical NaN
7655            Location::GPR(tmpg1),
7656        )?;
7657        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7658        self.assembler
7659            .emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7660        match ret {
7661            Location::SIMD(x) => {
7662                self.assembler
7663                    .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7664            }
7665            Location::Memory(_, _) | Location::GPR(_) => {
7666                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7667            }
7668            _ => {
7669                codegen_error!("singlepass f32_max unreachable");
7670            }
7671        }
7672
7673        self.release_gpr(tmpg2);
7674        self.release_gpr(tmpg1);
7675        self.release_simd(tmp2);
7676        self.release_simd(tmp1);
7677        Ok(())
7678    }
7679    fn f32_add(
7680        &mut self,
7681        loc_a: Location,
7682        loc_b: Location,
7683        ret: Location,
7684    ) -> Result<(), CompileError> {
7685        self.emit_relaxed_avx(AssemblerX64::emit_vaddss, loc_a, loc_b, ret)
7686    }
7687    fn f32_sub(
7688        &mut self,
7689        loc_a: Location,
7690        loc_b: Location,
7691        ret: Location,
7692    ) -> Result<(), CompileError> {
7693        self.emit_relaxed_avx(AssemblerX64::emit_vsubss, loc_a, loc_b, ret)
7694    }
7695    fn f32_mul(
7696        &mut self,
7697        loc_a: Location,
7698        loc_b: Location,
7699        ret: Location,
7700    ) -> Result<(), CompileError> {
7701        self.emit_relaxed_avx(AssemblerX64::emit_vmulss, loc_a, loc_b, ret)
7702    }
7703    fn f32_div(
7704        &mut self,
7705        loc_a: Location,
7706        loc_b: Location,
7707        ret: Location,
7708    ) -> Result<(), CompileError> {
7709        self.emit_relaxed_avx(AssemblerX64::emit_vdivss, loc_a, loc_b, ret)
7710    }
7711
7712    fn gen_std_trampoline(
7713        &self,
7714        sig: &FunctionType,
7715        calling_convention: CallingConvention,
7716    ) -> Result<FunctionBody, CompileError> {
7717        // the cpu feature here is irrelevant
7718        let mut a = AssemblerX64::new(0, None)?;
7719
7720        // Calculate stack offset (+1 for the vmctx argument we are going to pass).
7721        let stack_params = (0..sig.params().len() + 1)
7722            .filter(|&i| {
7723                self.get_param_registers(calling_convention)
7724                    .get(i)
7725                    .is_none()
7726            })
7727            .count();
7728        let stack_return_slots = sig
7729            .results()
7730            .len()
7731            .saturating_sub(X86_64_RETURN_VALUE_REGISTERS.len());
7732
7733        // Stack slots are not shared in between function params and return values.
7734        let mut stack_offset = 8 * (stack_params + stack_return_slots) as u32;
7735        let stack_padding: u32 = match calling_convention {
7736            CallingConvention::WindowsFastcall => 32,
7737            _ => 0,
7738        };
7739
7740        // Align to 16 bytes. We push two 8-byte registers below, so here we need to ensure stack_offset % 16 == 8.
7741        if stack_offset % 16 != 8 {
7742            stack_offset += 8;
7743        }
7744
7745        // Used callee-saved registers
7746        a.emit_push(Size::S64, Location::GPR(GPR::R15))?;
7747        a.emit_push(Size::S64, Location::GPR(GPR::R14))?;
7748
7749        // Prepare stack space.
7750        a.emit_sub(
7751            Size::S64,
7752            Location::Imm32(stack_offset + stack_padding),
7753            Location::GPR(GPR::RSP),
7754        )?;
7755
7756        // Arguments
7757        a.emit_mov(
7758            Size::S64,
7759            Location::GPR(self.get_simple_param_location(1, calling_convention)),
7760            Location::GPR(GPR::R15),
7761        )?; // func_ptr
7762        a.emit_mov(
7763            Size::S64,
7764            Location::GPR(self.get_simple_param_location(2, calling_convention)),
7765            Location::GPR(GPR::R14),
7766        )?; // args_rets
7767
7768        // Move arguments to their locations.
7769        // `callee_vmctx` is already in the first argument register, so no need to move.
7770        {
7771            let mut n_stack_args = 0u32;
7772            for (i, _param) in sig.params().iter().enumerate() {
7773                let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i]
7774                let dst_loc = self.get_param_registers(calling_convention).get(1 + i);
7775
7776                match dst_loc {
7777                    Some(&gpr) => {
7778                        a.emit_mov(Size::S64, src_loc, Location::GPR(gpr))?;
7779                    }
7780                    None => {
7781                        // This location is for reading arguments but we are writing arguments here.
7782                        // So recalculate it.
7783                        a.emit_mov(Size::S64, src_loc, Location::GPR(GPR::RAX))?;
7784                        a.emit_mov(
7785                            Size::S64,
7786                            Location::GPR(GPR::RAX),
7787                            Location::Memory(
7788                                GPR::RSP,
7789                                (stack_padding + (n_stack_args + stack_return_slots as u32) * 8)
7790                                    as _,
7791                            ),
7792                        )?;
7793                        n_stack_args += 1;
7794                    }
7795                }
7796            }
7797        }
7798
7799        // Call.
7800        a.emit_call_location(Location::GPR(GPR::R15))?;
7801
7802        // Write return values.
7803        let mut n_stack_return_slots: usize = 0;
7804        for i in 0..sig.results().len() {
7805            let src = if let Some(&reg) = X86_64_RETURN_VALUE_REGISTERS.get(i) {
7806                Location::GPR(reg)
7807            } else {
7808                let loc = Location::GPR(GPR::R15);
7809                a.emit_mov(
7810                    Size::S64,
7811                    Location::Memory(
7812                        GPR::RSP,
7813                        (stack_padding + (n_stack_return_slots as u32 * 8)) as _,
7814                    ),
7815                    loc,
7816                )?;
7817                n_stack_return_slots += 1;
7818                loc
7819            };
7820            a.emit_mov(Size::S64, src, Location::Memory(GPR::R14, (i * 16) as _))?;
7821        }
7822
7823        // Restore stack.
7824        a.emit_add(
7825            Size::S64,
7826            Location::Imm32(stack_offset + stack_padding),
7827            Location::GPR(GPR::RSP),
7828        )?;
7829
7830        // Restore callee-saved registers.
7831        a.emit_pop(Size::S64, Location::GPR(GPR::R14))?;
7832        a.emit_pop(Size::S64, Location::GPR(GPR::R15))?;
7833
7834        a.emit_ret()?;
7835
7836        let mut body = a.finalize().unwrap();
7837        body.shrink_to_fit();
7838
7839        Ok(FunctionBody {
7840            body,
7841            unwind_info: None,
7842        })
7843    }
7844    // Generates dynamic import function call trampoline for a function type.
7845    fn gen_std_dynamic_import_trampoline(
7846        &self,
7847        vmoffsets: &VMOffsets,
7848        sig: &FunctionType,
7849        calling_convention: CallingConvention,
7850    ) -> Result<FunctionBody, CompileError> {
7851        // the cpu feature here is irrelevant
7852        let mut a = AssemblerX64::new(0, None)?;
7853
7854        // Allocate argument array.
7855        let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding
7856        let stack_padding: usize = match calling_convention {
7857            CallingConvention::WindowsFastcall => 32,
7858            _ => 0,
7859        };
7860        a.emit_sub(
7861            Size::S64,
7862            Location::Imm32((stack_offset + stack_padding) as _),
7863            Location::GPR(GPR::RSP),
7864        )?;
7865
7866        // Copy arguments.
7867        if !sig.params().is_empty() {
7868            let mut argalloc = ArgumentRegisterAllocator::default();
7869            argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
7870
7871            let mut stack_param_count: usize = 0;
7872
7873            for (i, ty) in sig.params().iter().enumerate() {
7874                let source_loc = match argalloc.next(*ty, calling_convention)? {
7875                    Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
7876                    Some(X64Register::XMM(xmm)) => Location::SIMD(xmm),
7877                    None => {
7878                        a.emit_mov(
7879                            Size::S64,
7880                            Location::Memory(
7881                                GPR::RSP,
7882                                (stack_padding * 2 + stack_offset + 8 + stack_param_count * 8) as _,
7883                            ),
7884                            Location::GPR(GPR::RAX),
7885                        )?;
7886                        stack_param_count += 1;
7887                        Location::GPR(GPR::RAX)
7888                    }
7889                };
7890                a.emit_mov(
7891                    Size::S64,
7892                    source_loc,
7893                    Location::Memory(GPR::RSP, (stack_padding + i * 16) as _),
7894                )?;
7895
7896                // Zero upper 64 bits.
7897                a.emit_mov(
7898                    Size::S64,
7899                    Location::Imm32(0),
7900                    Location::Memory(GPR::RSP, (stack_padding + i * 16 + 8) as _),
7901                )?;
7902            }
7903        }
7904
7905        match calling_convention {
7906            CallingConvention::WindowsFastcall => {
7907                // Load target address.
7908                a.emit_mov(
7909                    Size::S64,
7910                    Location::Memory(
7911                        GPR::RCX,
7912                        vmoffsets.vmdynamicfunction_import_context_address() as i32,
7913                    ),
7914                    Location::GPR(GPR::RAX),
7915                )?;
7916                // Load values array.
7917                a.emit_lea(
7918                    Size::S64,
7919                    Location::Memory(GPR::RSP, stack_padding as i32),
7920                    Location::GPR(GPR::RDX),
7921                )?;
7922            }
7923            _ => {
7924                // Load target address.
7925                a.emit_mov(
7926                    Size::S64,
7927                    Location::Memory(
7928                        GPR::RDI,
7929                        vmoffsets.vmdynamicfunction_import_context_address() as i32,
7930                    ),
7931                    Location::GPR(GPR::RAX),
7932                )?;
7933                // Load values array.
7934                a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RSI))?;
7935            }
7936        };
7937
7938        // Call target.
7939        a.emit_call_location(Location::GPR(GPR::RAX))?;
7940
7941        // Fetch return value.
7942        if !sig.results().is_empty() {
7943            assert_eq!(sig.results().len(), 1);
7944            a.emit_mov(
7945                Size::S64,
7946                Location::Memory(GPR::RSP, stack_padding as i32),
7947                Location::GPR(GPR::RAX),
7948            )?;
7949        }
7950
7951        // Release values array.
7952        a.emit_add(
7953            Size::S64,
7954            Location::Imm32((stack_offset + stack_padding) as _),
7955            Location::GPR(GPR::RSP),
7956        )?;
7957
7958        // Return.
7959        a.emit_ret()?;
7960
7961        let mut body = a.finalize().unwrap();
7962        body.shrink_to_fit();
7963        Ok(FunctionBody {
7964            body,
7965            unwind_info: None,
7966        })
7967    }
7968    // Singlepass calls import functions through a trampoline.
7969    fn gen_import_call_trampoline(
7970        &self,
7971        vmoffsets: &VMOffsets,
7972        index: FunctionIndex,
7973        sig: &FunctionType,
7974        calling_convention: CallingConvention,
7975    ) -> Result<CustomSection, CompileError> {
7976        // the cpu feature here is irrelevant
7977        let mut a = AssemblerX64::new(0, None)?;
7978
7979        // TODO: ARM entry trampoline is not emitted.
7980
7981        // Singlepass internally treats all arguments as integers
7982        // For the standard Windows calling convention requires
7983        //  floating point arguments to be passed in XMM registers for the 4 first arguments only
7984        //  That's the only change to do, other arguments are not to be changed
7985        // For the standard System V calling convention requires
7986        //  floating point arguments to be passed in XMM registers.
7987        //  Translation is expensive, so only do it if needed.
7988        if sig
7989            .params()
7990            .iter()
7991            .any(|&x| x == Type::F32 || x == Type::F64)
7992        {
7993            match calling_convention {
7994                CallingConvention::WindowsFastcall => {
7995                    let mut param_locations: Vec<Location> = vec![];
7996                    static PARAM_REGS: &[GPR] = &[GPR::RDX, GPR::R8, GPR::R9];
7997                    #[allow(clippy::needless_range_loop)]
7998                    for i in 0..sig.params().len() {
7999                        let loc = match i {
8000                            0..=2 => Location::GPR(PARAM_REGS[i]),
8001                            _ => Location::Memory(GPR::RSP, 32 + 8 + ((i - 3) * 8) as i32), // will not be used anyway
8002                        };
8003                        param_locations.push(loc);
8004                    }
8005
8006                    // Copy Float arguments to XMM from GPR.
8007                    let mut argalloc = ArgumentRegisterAllocator::default();
8008                    for (i, ty) in sig.params().iter().enumerate() {
8009                        let prev_loc = param_locations[i];
8010                        match argalloc.next(*ty, calling_convention)? {
8011                            Some(X64Register::GPR(_gpr)) => continue,
8012                            Some(X64Register::XMM(xmm)) => {
8013                                a.emit_mov(Size::S64, prev_loc, Location::SIMD(xmm))?
8014                            }
8015                            None => continue,
8016                        };
8017                    }
8018                }
8019                _ => {
8020                    let mut param_locations = vec![];
8021
8022                    // Allocate stack space for arguments.
8023                    let stack_offset: i32 = if sig.params().len() > 5 {
8024                        5 * 8
8025                    } else {
8026                        (sig.params().len() as i32) * 8
8027                    };
8028                    if stack_offset > 0 {
8029                        a.emit_sub(
8030                            Size::S64,
8031                            Location::Imm32(stack_offset as u32),
8032                            Location::GPR(GPR::RSP),
8033                        )?;
8034                    }
8035
8036                    // Store all arguments to the stack to prevent overwrite.
8037                    static PARAM_REGS: &[GPR] = &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
8038                    #[allow(clippy::needless_range_loop)]
8039                    for i in 0..sig.params().len() {
8040                        let loc = match i {
8041                            0..=4 => {
8042                                let loc = Location::Memory(GPR::RSP, (i * 8) as i32);
8043                                a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc)?;
8044                                loc
8045                            }
8046                            _ => {
8047                                Location::Memory(GPR::RSP, stack_offset + 8 + ((i - 5) * 8) as i32)
8048                            }
8049                        };
8050                        param_locations.push(loc);
8051                    }
8052
8053                    // Copy arguments.
8054                    let mut argalloc = ArgumentRegisterAllocator::default();
8055                    argalloc.next(Type::I64, calling_convention)?.unwrap(); // skip VMContext
8056                    let mut caller_stack_offset: i32 = 0;
8057                    for (i, ty) in sig.params().iter().enumerate() {
8058                        let prev_loc = param_locations[i];
8059                        let targ = match argalloc.next(*ty, calling_convention)? {
8060                            Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
8061                            Some(X64Register::XMM(xmm)) => Location::SIMD(xmm),
8062                            None => {
8063                                // No register can be allocated. Put this argument on the stack.
8064                                //
8065                                // Since here we never use fewer registers than by the original call, on the caller's frame
8066                                // we always have enough space to store the rearranged arguments, and the copy "backward" between different
8067                                // slots in the caller argument region will always work.
8068                                a.emit_mov(Size::S64, prev_loc, Location::GPR(GPR::RAX))?;
8069                                a.emit_mov(
8070                                    Size::S64,
8071                                    Location::GPR(GPR::RAX),
8072                                    Location::Memory(
8073                                        GPR::RSP,
8074                                        stack_offset + 8 + caller_stack_offset,
8075                                    ),
8076                                )?;
8077                                caller_stack_offset += 8;
8078                                continue;
8079                            }
8080                        };
8081                        a.emit_mov(Size::S64, prev_loc, targ)?;
8082                    }
8083
8084                    // Restore stack pointer.
8085                    if stack_offset > 0 {
8086                        a.emit_add(
8087                            Size::S64,
8088                            Location::Imm32(stack_offset as u32),
8089                            Location::GPR(GPR::RSP),
8090                        )?;
8091                    }
8092                }
8093            }
8094        }
8095
8096        // Emits a tail call trampoline that loads the address of the target import function
8097        // from Ctx and jumps to it.
8098
8099        let offset = vmoffsets.vmctx_vmfunction_import(index);
8100
8101        match calling_convention {
8102            CallingConvention::WindowsFastcall => {
8103                a.emit_mov(
8104                    Size::S64,
8105                    Location::Memory(GPR::RCX, offset as i32), // function pointer
8106                    Location::GPR(GPR::RAX),
8107                )?;
8108                a.emit_mov(
8109                    Size::S64,
8110                    Location::Memory(GPR::RCX, offset as i32 + 8), // target vmctx
8111                    Location::GPR(GPR::RCX),
8112                )?;
8113            }
8114            _ => {
8115                a.emit_mov(
8116                    Size::S64,
8117                    Location::Memory(GPR::RDI, offset as i32), // function pointer
8118                    Location::GPR(GPR::RAX),
8119                )?;
8120                a.emit_mov(
8121                    Size::S64,
8122                    Location::Memory(GPR::RDI, offset as i32 + 8), // target vmctx
8123                    Location::GPR(GPR::RDI),
8124                )?;
8125            }
8126        }
8127        a.emit_host_redirection(GPR::RAX)?;
8128
8129        let mut contents = a.finalize().unwrap();
8130        contents.shrink_to_fit();
8131        let section_body = SectionBody::new_with_vec(contents);
8132
8133        Ok(CustomSection {
8134            protection: CustomSectionProtection::ReadExecute,
8135            alignment: None,
8136            bytes: section_body,
8137            relocations: vec![],
8138        })
8139    }
8140    #[cfg(feature = "unwind")]
8141    fn gen_dwarf_unwind_info(&mut self, code_len: usize) -> Option<UnwindInstructions> {
8142        let mut instructions = vec![];
8143        for &(instruction_offset, ref inst) in &self.unwind_ops {
8144            let instruction_offset = instruction_offset as u32;
8145            match *inst {
8146                UnwindOps::PushFP { up_to_sp } => {
8147                    instructions.push((
8148                        instruction_offset,
8149                        CallFrameInstruction::CfaOffset(up_to_sp as i32),
8150                    ));
8151                    instructions.push((
8152                        instruction_offset,
8153                        CallFrameInstruction::Offset(X86_64::RBP, -(up_to_sp as i32)),
8154                    ));
8155                }
8156                UnwindOps::DefineNewFrame => {
8157                    instructions.push((
8158                        instruction_offset,
8159                        CallFrameInstruction::CfaRegister(X86_64::RBP),
8160                    ));
8161                }
8162                UnwindOps::SaveRegister { reg, bp_neg_offset } => instructions.push((
8163                    instruction_offset,
8164                    CallFrameInstruction::Offset(reg.dwarf_index(), -bp_neg_offset),
8165                )),
8166                UnwindOps::Push2Regs { .. } => unimplemented!(),
8167            }
8168        }
8169        Some(UnwindInstructions {
8170            instructions,
8171            len: code_len as u32,
8172        })
8173    }
8174    #[cfg(not(feature = "unwind"))]
8175    fn gen_dwarf_unwind_info(&mut self, _code_len: usize) -> Option<UnwindInstructions> {
8176        None
8177    }
8178
8179    #[cfg(feature = "unwind")]
8180    fn gen_windows_unwind_info(&mut self, _code_len: usize) -> Option<Vec<u8>> {
8181        let unwind_info = create_unwind_info_from_insts(&self.unwind_ops);
8182        if let Some(unwind) = unwind_info {
8183            let sz = unwind.emit_size();
8184            let mut tbl = vec![0; sz];
8185            unwind.emit(&mut tbl);
8186            Some(tbl)
8187        } else {
8188            None
8189        }
8190    }
8191
8192    #[cfg(not(feature = "unwind"))]
8193    fn gen_windows_unwind_info(&mut self, _code_len: usize) -> Option<Vec<u8>> {
8194        None
8195    }
8196}
8197
8198#[cfg(test)]
8199mod test {
8200    use super::*;
8201    use enumset::enum_set;
8202    use std::str::FromStr;
8203    use wasmer_types::target::{CpuFeature, Target, Triple};
8204
8205    fn test_move_location(machine: &mut MachineX86_64) -> Result<(), CompileError> {
8206        machine.move_location_for_native(
8207            Size::S64,
8208            Location::GPR(GPR::RAX),
8209            Location::GPR(GPR::RCX),
8210        )?;
8211        machine.move_location_for_native(
8212            Size::S64,
8213            Location::GPR(GPR::RAX),
8214            Location::Memory(GPR::RDX, 10),
8215        )?;
8216        machine.move_location_for_native(
8217            Size::S64,
8218            Location::GPR(GPR::RAX),
8219            Location::Memory(GPR::RDX, -10),
8220        )?;
8221        machine.move_location_for_native(
8222            Size::S64,
8223            Location::Memory(GPR::RDX, 10),
8224            Location::GPR(GPR::RAX),
8225        )?;
8226        machine.move_location_for_native(
8227            Size::S64,
8228            Location::Imm64(50),
8229            Location::GPR(GPR::RAX),
8230        )?;
8231        machine.move_location_for_native(
8232            Size::S64,
8233            Location::Imm32(50),
8234            Location::GPR(GPR::RAX),
8235        )?;
8236        machine.move_location_for_native(Size::S64, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8237
8238        machine.move_location_for_native(
8239            Size::S32,
8240            Location::GPR(GPR::RAX),
8241            Location::GPR(GPR::RCX),
8242        )?;
8243        machine.move_location_for_native(
8244            Size::S32,
8245            Location::GPR(GPR::RAX),
8246            Location::Memory(GPR::RDX, 10),
8247        )?;
8248        machine.move_location_for_native(
8249            Size::S32,
8250            Location::GPR(GPR::RAX),
8251            Location::Memory(GPR::RDX, -10),
8252        )?;
8253        machine.move_location_for_native(
8254            Size::S32,
8255            Location::Memory(GPR::RDX, 10),
8256            Location::GPR(GPR::RAX),
8257        )?;
8258        machine.move_location_for_native(
8259            Size::S32,
8260            Location::Imm32(50),
8261            Location::GPR(GPR::RAX),
8262        )?;
8263        machine.move_location_for_native(Size::S32, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8264
8265        machine.move_location_for_native(
8266            Size::S16,
8267            Location::GPR(GPR::RAX),
8268            Location::GPR(GPR::RCX),
8269        )?;
8270        machine.move_location_for_native(
8271            Size::S16,
8272            Location::GPR(GPR::RAX),
8273            Location::Memory(GPR::RDX, 10),
8274        )?;
8275        machine.move_location_for_native(
8276            Size::S16,
8277            Location::GPR(GPR::RAX),
8278            Location::Memory(GPR::RDX, -10),
8279        )?;
8280        machine.move_location_for_native(
8281            Size::S16,
8282            Location::Memory(GPR::RDX, 10),
8283            Location::GPR(GPR::RAX),
8284        )?;
8285        machine.move_location_for_native(Size::S16, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8286
8287        machine.move_location_for_native(
8288            Size::S8,
8289            Location::GPR(GPR::RAX),
8290            Location::GPR(GPR::RCX),
8291        )?;
8292        machine.move_location_for_native(
8293            Size::S8,
8294            Location::GPR(GPR::RAX),
8295            Location::Memory(GPR::RDX, 10),
8296        )?;
8297        machine.move_location_for_native(
8298            Size::S8,
8299            Location::GPR(GPR::RAX),
8300            Location::Memory(GPR::RDX, -10),
8301        )?;
8302        machine.move_location_for_native(
8303            Size::S8,
8304            Location::Memory(GPR::RDX, 10),
8305            Location::GPR(GPR::RAX),
8306        )?;
8307        machine.move_location_for_native(Size::S8, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8308
8309        machine.move_location_for_native(
8310            Size::S64,
8311            Location::SIMD(XMM::XMM0),
8312            Location::GPR(GPR::RAX),
8313        )?;
8314        machine.move_location_for_native(
8315            Size::S64,
8316            Location::SIMD(XMM::XMM0),
8317            Location::Memory(GPR::RDX, -10),
8318        )?;
8319        machine.move_location_for_native(
8320            Size::S64,
8321            Location::GPR(GPR::RAX),
8322            Location::SIMD(XMM::XMM0),
8323        )?;
8324        machine.move_location_for_native(
8325            Size::S64,
8326            Location::Memory(GPR::RDX, -10),
8327            Location::SIMD(XMM::XMM0),
8328        )?;
8329
8330        Ok(())
8331    }
8332
8333    fn test_move_location_extended(
8334        machine: &mut MachineX86_64,
8335        signed: bool,
8336        sized: Size,
8337    ) -> Result<(), CompileError> {
8338        machine.move_location_extend(
8339            sized,
8340            signed,
8341            Location::GPR(GPR::RAX),
8342            Size::S64,
8343            Location::GPR(GPR::RCX),
8344        )?;
8345        machine.move_location_extend(
8346            sized,
8347            signed,
8348            Location::GPR(GPR::RAX),
8349            Size::S64,
8350            Location::Memory(GPR::RCX, 10),
8351        )?;
8352        machine.move_location_extend(
8353            sized,
8354            signed,
8355            Location::Memory(GPR::RAX, 10),
8356            Size::S64,
8357            Location::GPR(GPR::RCX),
8358        )?;
8359        if sized != Size::S32 {
8360            machine.move_location_extend(
8361                sized,
8362                signed,
8363                Location::GPR(GPR::RAX),
8364                Size::S32,
8365                Location::GPR(GPR::RCX),
8366            )?;
8367            machine.move_location_extend(
8368                sized,
8369                signed,
8370                Location::GPR(GPR::RAX),
8371                Size::S32,
8372                Location::Memory(GPR::RCX, 10),
8373            )?;
8374            machine.move_location_extend(
8375                sized,
8376                signed,
8377                Location::Memory(GPR::RAX, 10),
8378                Size::S32,
8379                Location::GPR(GPR::RCX),
8380            )?;
8381        }
8382
8383        Ok(())
8384    }
8385
8386    fn test_binop_op(
8387        machine: &mut MachineX86_64,
8388        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8389    ) -> Result<(), CompileError> {
8390        op(
8391            machine,
8392            Location::GPR(GPR::RDX),
8393            Location::GPR(GPR::RDX),
8394            Location::GPR(GPR::RAX),
8395        )?;
8396        op(
8397            machine,
8398            Location::GPR(GPR::RDX),
8399            Location::Imm32(10),
8400            Location::GPR(GPR::RAX),
8401        )?;
8402        op(
8403            machine,
8404            Location::GPR(GPR::RAX),
8405            Location::GPR(GPR::RAX),
8406            Location::GPR(GPR::RAX),
8407        )?;
8408        op(
8409            machine,
8410            Location::Imm32(10),
8411            Location::GPR(GPR::RDX),
8412            Location::GPR(GPR::RAX),
8413        )?;
8414        op(
8415            machine,
8416            Location::GPR(GPR::RAX),
8417            Location::GPR(GPR::RDX),
8418            Location::Memory(GPR::RAX, 10),
8419        )?;
8420        op(
8421            machine,
8422            Location::GPR(GPR::RAX),
8423            Location::Memory(GPR::RDX, 16),
8424            Location::Memory(GPR::RAX, 10),
8425        )?;
8426        op(
8427            machine,
8428            Location::Memory(GPR::RAX, 0),
8429            Location::Memory(GPR::RDX, 16),
8430            Location::Memory(GPR::RAX, 10),
8431        )?;
8432
8433        Ok(())
8434    }
8435
8436    fn test_float_binop_op(
8437        machine: &mut MachineX86_64,
8438        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8439    ) -> Result<(), CompileError> {
8440        op(
8441            machine,
8442            Location::SIMD(XMM::XMM3),
8443            Location::SIMD(XMM::XMM2),
8444            Location::SIMD(XMM::XMM0),
8445        )?;
8446        op(
8447            machine,
8448            Location::SIMD(XMM::XMM0),
8449            Location::SIMD(XMM::XMM2),
8450            Location::SIMD(XMM::XMM0),
8451        )?;
8452        op(
8453            machine,
8454            Location::SIMD(XMM::XMM0),
8455            Location::SIMD(XMM::XMM0),
8456            Location::SIMD(XMM::XMM0),
8457        )?;
8458        op(
8459            machine,
8460            Location::Memory(GPR::RBP, 0),
8461            Location::SIMD(XMM::XMM2),
8462            Location::SIMD(XMM::XMM0),
8463        )?;
8464        op(
8465            machine,
8466            Location::Memory(GPR::RBP, 0),
8467            Location::Memory(GPR::RDX, 10),
8468            Location::SIMD(XMM::XMM0),
8469        )?;
8470        op(
8471            machine,
8472            Location::Memory(GPR::RBP, 0),
8473            Location::Memory(GPR::RDX, 16),
8474            Location::Memory(GPR::RAX, 32),
8475        )?;
8476        op(
8477            machine,
8478            Location::SIMD(XMM::XMM0),
8479            Location::Memory(GPR::RDX, 16),
8480            Location::Memory(GPR::RAX, 32),
8481        )?;
8482        op(
8483            machine,
8484            Location::SIMD(XMM::XMM0),
8485            Location::SIMD(XMM::XMM1),
8486            Location::Memory(GPR::RAX, 32),
8487        )?;
8488
8489        Ok(())
8490    }
8491
8492    fn test_float_cmp_op(
8493        machine: &mut MachineX86_64,
8494        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8495    ) -> Result<(), CompileError> {
8496        op(
8497            machine,
8498            Location::SIMD(XMM::XMM3),
8499            Location::SIMD(XMM::XMM2),
8500            Location::GPR(GPR::RAX),
8501        )?;
8502        op(
8503            machine,
8504            Location::SIMD(XMM::XMM0),
8505            Location::SIMD(XMM::XMM0),
8506            Location::GPR(GPR::RAX),
8507        )?;
8508        op(
8509            machine,
8510            Location::Memory(GPR::RBP, 0),
8511            Location::SIMD(XMM::XMM2),
8512            Location::GPR(GPR::RAX),
8513        )?;
8514        op(
8515            machine,
8516            Location::Memory(GPR::RBP, 0),
8517            Location::Memory(GPR::RDX, 10),
8518            Location::GPR(GPR::RAX),
8519        )?;
8520        op(
8521            machine,
8522            Location::Memory(GPR::RBP, 0),
8523            Location::Memory(GPR::RDX, 16),
8524            Location::Memory(GPR::RAX, 32),
8525        )?;
8526        op(
8527            machine,
8528            Location::SIMD(XMM::XMM0),
8529            Location::Memory(GPR::RDX, 16),
8530            Location::Memory(GPR::RAX, 32),
8531        )?;
8532        op(
8533            machine,
8534            Location::SIMD(XMM::XMM0),
8535            Location::SIMD(XMM::XMM1),
8536            Location::Memory(GPR::RAX, 32),
8537        )?;
8538
8539        Ok(())
8540    }
8541
8542    #[test]
8543    fn tests_avx() -> Result<(), CompileError> {
8544        let set = enum_set!(CpuFeature::AVX);
8545        let target = Target::new(Triple::from_str("x86_64-linux-gnu").unwrap(), set);
8546        let mut machine = MachineX86_64::new(Some(target))?;
8547
8548        test_move_location(&mut machine)?;
8549        test_move_location_extended(&mut machine, false, Size::S8)?;
8550        test_move_location_extended(&mut machine, false, Size::S16)?;
8551        test_move_location_extended(&mut machine, false, Size::S32)?;
8552        test_move_location_extended(&mut machine, true, Size::S8)?;
8553        test_move_location_extended(&mut machine, true, Size::S16)?;
8554        test_move_location_extended(&mut machine, true, Size::S32)?;
8555        test_binop_op(&mut machine, MachineX86_64::emit_binop_add32)?;
8556        test_binop_op(&mut machine, MachineX86_64::emit_binop_add64)?;
8557        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub32)?;
8558        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub64)?;
8559        test_binop_op(&mut machine, MachineX86_64::emit_binop_and32)?;
8560        test_binop_op(&mut machine, MachineX86_64::emit_binop_and64)?;
8561        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor32)?;
8562        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor64)?;
8563        test_binop_op(&mut machine, MachineX86_64::emit_binop_or32)?;
8564        test_binop_op(&mut machine, MachineX86_64::emit_binop_or64)?;
8565        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul32)?;
8566        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul64)?;
8567        test_float_binop_op(&mut machine, MachineX86_64::f32_add)?;
8568        test_float_binop_op(&mut machine, MachineX86_64::f32_sub)?;
8569        test_float_binop_op(&mut machine, MachineX86_64::f32_mul)?;
8570        test_float_binop_op(&mut machine, MachineX86_64::f32_div)?;
8571        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_eq)?;
8572        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_lt)?;
8573        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_le)?;
8574
8575        Ok(())
8576    }
8577
8578    #[test]
8579    fn tests_sse42() -> Result<(), CompileError> {
8580        let set = enum_set!(CpuFeature::SSE42);
8581        let target = Target::new(Triple::from_str("x86_64-linux-gnu").unwrap(), set);
8582        let mut machine = MachineX86_64::new(Some(target))?;
8583
8584        test_move_location(&mut machine)?;
8585        test_move_location_extended(&mut machine, false, Size::S8)?;
8586        test_move_location_extended(&mut machine, false, Size::S16)?;
8587        test_move_location_extended(&mut machine, false, Size::S32)?;
8588        test_move_location_extended(&mut machine, true, Size::S8)?;
8589        test_move_location_extended(&mut machine, true, Size::S16)?;
8590        test_move_location_extended(&mut machine, true, Size::S32)?;
8591        test_binop_op(&mut machine, MachineX86_64::emit_binop_add32)?;
8592        test_binop_op(&mut machine, MachineX86_64::emit_binop_add64)?;
8593        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub32)?;
8594        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub64)?;
8595        test_binop_op(&mut machine, MachineX86_64::emit_binop_and32)?;
8596        test_binop_op(&mut machine, MachineX86_64::emit_binop_and64)?;
8597        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor32)?;
8598        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor64)?;
8599        test_binop_op(&mut machine, MachineX86_64::emit_binop_or32)?;
8600        test_binop_op(&mut machine, MachineX86_64::emit_binop_or64)?;
8601        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul32)?;
8602        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul64)?;
8603        test_float_binop_op(&mut machine, MachineX86_64::f32_add)?;
8604        test_float_binop_op(&mut machine, MachineX86_64::f32_sub)?;
8605        test_float_binop_op(&mut machine, MachineX86_64::f32_mul)?;
8606        test_float_binop_op(&mut machine, MachineX86_64::f32_div)?;
8607        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_eq)?;
8608        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_lt)?;
8609        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_le)?;
8610
8611        Ok(())
8612    }
8613}