wasmer_compiler_singlepass/
machine_x64.rs

1#[cfg(feature = "unwind")]
2use crate::unwind_winx64::create_unwind_info_from_insts;
3use crate::{
4    codegen_error,
5    common_decl::*,
6    emitter_x64::*,
7    location::{Location as AbstractLocation, Reg},
8    machine::*,
9    unwind::{UnwindInstructions, UnwindOps, UnwindRegister},
10    x64_decl::{ArgumentRegisterAllocator, GPR, X64Register, XMM},
11};
12use dynasmrt::{DynasmError, VecAssembler, x64::X64Relocation};
13use fixedbitset::FixedBitSet;
14#[cfg(feature = "unwind")]
15use gimli::{X86_64, write::CallFrameInstruction};
16use std::{
17    collections::HashMap,
18    ops::{Deref, DerefMut},
19};
20use wasmer_compiler::{
21    types::{
22        address_map::InstructionAddressMap,
23        function::FunctionBody,
24        relocation::{Relocation, RelocationKind, RelocationTarget},
25        section::{CustomSection, CustomSectionProtection, SectionBody},
26    },
27    wasmparser::MemArg,
28};
29use wasmer_types::{
30    CompileError, FunctionIndex, FunctionType, SourceLoc, TrapCode, TrapInformation, Type,
31    VMOffsets,
32    target::{CallingConvention, CpuFeature, Target},
33};
34
35type Assembler = VecAssembler<X64Relocation>;
36
37pub struct AssemblerX64 {
38    /// the actual inner
39    pub inner: Assembler,
40    /// the simd instructions set on the target.
41    /// Currently only supports SSE 4.2 and AVX
42    pub simd_arch: CpuFeature,
43    /// Full Target cpu
44    pub target: Option<Target>,
45}
46
47impl AssemblerX64 {
48    fn new(baseaddr: usize, target: Option<Target>) -> Result<Self, CompileError> {
49        let simd_arch = target.as_ref().map_or_else(
50            || Ok(CpuFeature::SSE42),
51            |target| {
52                if target.cpu_features().contains(CpuFeature::AVX) {
53                    Ok(CpuFeature::AVX)
54                } else if target.cpu_features().contains(CpuFeature::SSE42) {
55                    Ok(CpuFeature::SSE42)
56                } else {
57                    Err(CompileError::UnsupportedTarget(
58                        "x86_64 without AVX or SSE 4.2, use -m avx to enable".to_string(),
59                    ))
60                }
61            },
62        )?;
63
64        Ok(Self {
65            inner: Assembler::new(baseaddr),
66            simd_arch,
67            target,
68        })
69    }
70
71    fn finalize(self) -> Result<Vec<u8>, DynasmError> {
72        self.inner.finalize()
73    }
74}
75
76impl Deref for AssemblerX64 {
77    type Target = Assembler;
78
79    fn deref(&self) -> &Self::Target {
80        &self.inner
81    }
82}
83
84impl DerefMut for AssemblerX64 {
85    fn deref_mut(&mut self) -> &mut Self::Target {
86        &mut self.inner
87    }
88}
89
90type Location = AbstractLocation<GPR, XMM>;
91
92pub struct MachineX86_64 {
93    assembler: AssemblerX64,
94    used_gprs: FixedBitSet,
95    used_simd: FixedBitSet,
96    trap_table: TrapTable,
97    /// Map from byte offset into wasm function to range of native instructions.
98    ///
99    // Ordered by increasing InstructionAddressMap::srcloc.
100    instructions_address_map: Vec<InstructionAddressMap>,
101    /// The source location for the current operator.
102    src_loc: u32,
103    /// Vector of unwind operations with offset
104    unwind_ops: Vec<(usize, UnwindOps<GPR, XMM>)>,
105}
106
107/// Get registers for first N function return values.
108/// NOTE: The register set must be disjoint from pick_gpr registers!
109pub(crate) const X86_64_RETURN_VALUE_REGISTERS: [GPR; 2] = [GPR::RAX, GPR::RDX];
110
111impl MachineX86_64 {
112    pub fn new(target: Option<Target>) -> Result<Self, CompileError> {
113        let assembler = AssemblerX64::new(0, target)?;
114        Ok(MachineX86_64 {
115            assembler,
116            used_gprs: FixedBitSet::with_capacity(16),
117            used_simd: FixedBitSet::with_capacity(16),
118            trap_table: TrapTable::default(),
119            instructions_address_map: vec![],
120            src_loc: 0,
121            unwind_ops: vec![],
122        })
123    }
124    pub fn emit_relaxed_binop(
125        &mut self,
126        op: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
127        sz: Size,
128        src: Location,
129        dst: Location,
130    ) -> Result<(), CompileError> {
131        enum RelaxMode {
132            Direct,
133            SrcToGPR,
134            DstToGPR,
135            BothToGPR,
136        }
137        let mode = match (src, dst) {
138            (Location::GPR(_), Location::GPR(_))
139                if std::ptr::eq(op as *const u8, AssemblerX64::emit_imul as *const u8) =>
140            {
141                RelaxMode::Direct
142            }
143            _ if std::ptr::eq(op as *const u8, AssemblerX64::emit_imul as *const u8) => {
144                RelaxMode::BothToGPR
145            }
146
147            (Location::Memory(_, _), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
148            (Location::Imm64(_), Location::Imm64(_)) | (Location::Imm64(_), Location::Imm32(_)) => {
149                RelaxMode::BothToGPR
150            }
151            (_, Location::Imm32(_)) | (_, Location::Imm64(_)) => RelaxMode::DstToGPR,
152            (Location::Imm64(_), Location::Memory(_, _)) => RelaxMode::SrcToGPR,
153            (Location::Imm64(_), Location::GPR(_))
154                if (op as *const u8 != AssemblerX64::emit_mov as *const u8) =>
155            {
156                RelaxMode::SrcToGPR
157            }
158            (_, Location::SIMD(_)) => RelaxMode::SrcToGPR,
159            _ => RelaxMode::Direct,
160        };
161
162        match mode {
163            RelaxMode::SrcToGPR => {
164                let temp = self.acquire_temp_gpr().ok_or_else(|| {
165                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
166                })?;
167                self.move_location(sz, src, Location::GPR(temp))?;
168                op(&mut self.assembler, sz, Location::GPR(temp), dst)?;
169                self.release_gpr(temp);
170            }
171            RelaxMode::DstToGPR => {
172                let temp = self.acquire_temp_gpr().ok_or_else(|| {
173                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
174                })?;
175                self.move_location(sz, dst, Location::GPR(temp))?;
176                op(&mut self.assembler, sz, src, Location::GPR(temp))?;
177                self.release_gpr(temp);
178            }
179            RelaxMode::BothToGPR => {
180                let temp_src = self.acquire_temp_gpr().ok_or_else(|| {
181                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
182                })?;
183                let temp_dst = self.acquire_temp_gpr().ok_or_else(|| {
184                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
185                })?;
186                self.move_location(sz, src, Location::GPR(temp_src))?;
187                self.move_location(sz, dst, Location::GPR(temp_dst))?;
188                op(
189                    &mut self.assembler,
190                    sz,
191                    Location::GPR(temp_src),
192                    Location::GPR(temp_dst),
193                )?;
194                match dst {
195                    Location::Memory(_, _) | Location::GPR(_) => {
196                        self.move_location(sz, Location::GPR(temp_dst), dst)?;
197                    }
198                    _ => {}
199                }
200                self.release_gpr(temp_dst);
201                self.release_gpr(temp_src);
202            }
203            RelaxMode::Direct => {
204                op(&mut self.assembler, sz, src, dst)?;
205            }
206        }
207        Ok(())
208    }
209    pub fn emit_relaxed_zx_sx(
210        &mut self,
211        op: fn(&mut AssemblerX64, Size, Location, Size, Location) -> Result<(), CompileError>,
212        sz_src: Size,
213        src: Location,
214        sz_dst: Size,
215        dst: Location,
216    ) -> Result<(), CompileError> {
217        match src {
218            Location::Imm32(_) | Location::Imm64(_) => {
219                let tmp_src = self.acquire_temp_gpr().ok_or_else(|| {
220                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
221                })?;
222                self.assembler
223                    .emit_mov(Size::S64, src, Location::GPR(tmp_src))?;
224                let src = Location::GPR(tmp_src);
225
226                match dst {
227                    Location::Imm32(_) | Location::Imm64(_) => unreachable!(),
228                    Location::Memory(_, _) => {
229                        let tmp_dst = self.acquire_temp_gpr().ok_or_else(|| {
230                            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
231                        })?;
232                        op(
233                            &mut self.assembler,
234                            sz_src,
235                            src,
236                            sz_dst,
237                            Location::GPR(tmp_dst),
238                        )?;
239                        self.move_location(Size::S64, Location::GPR(tmp_dst), dst)?;
240
241                        self.release_gpr(tmp_dst);
242                    }
243                    Location::GPR(_) => {
244                        op(&mut self.assembler, sz_src, src, sz_dst, dst)?;
245                    }
246                    _ => {
247                        codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
248                    }
249                };
250
251                self.release_gpr(tmp_src);
252            }
253            Location::GPR(_) | Location::Memory(_, _) => {
254                match dst {
255                    Location::Imm32(_) | Location::Imm64(_) => unreachable!(),
256                    Location::Memory(_, _) => {
257                        let tmp_dst = self.acquire_temp_gpr().ok_or_else(|| {
258                            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
259                        })?;
260                        op(
261                            &mut self.assembler,
262                            sz_src,
263                            src,
264                            sz_dst,
265                            Location::GPR(tmp_dst),
266                        )?;
267                        self.move_location(Size::S64, Location::GPR(tmp_dst), dst)?;
268
269                        self.release_gpr(tmp_dst);
270                    }
271                    Location::GPR(_) => {
272                        op(&mut self.assembler, sz_src, src, sz_dst, dst)?;
273                    }
274                    _ => {
275                        codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
276                    }
277                };
278            }
279            _ => {
280                codegen_error!("singlepass emit_relaxed_zx_sx unreachable");
281            }
282        }
283        Ok(())
284    }
285    /// I32 binary operation with both operands popped from the virtual stack.
286    fn emit_binop_i32(
287        &mut self,
288        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
289        loc_a: Location,
290        loc_b: Location,
291        ret: Location,
292    ) -> Result<(), CompileError> {
293        if loc_a != ret {
294            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
295                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
296            })?;
297            self.emit_relaxed_mov(Size::S32, loc_a, Location::GPR(tmp))?;
298            self.emit_relaxed_binop(f, Size::S32, loc_b, Location::GPR(tmp))?;
299            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp), ret)?;
300            self.release_gpr(tmp);
301        } else {
302            self.emit_relaxed_binop(f, Size::S32, loc_b, ret)?;
303        }
304        Ok(())
305    }
306    /// I64 binary operation with both operands popped from the virtual stack.
307    fn emit_binop_i64(
308        &mut self,
309        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
310        loc_a: Location,
311        loc_b: Location,
312        ret: Location,
313    ) -> Result<(), CompileError> {
314        if loc_a != ret {
315            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
316                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
317            })?;
318            self.emit_relaxed_mov(Size::S64, loc_a, Location::GPR(tmp))?;
319            self.emit_relaxed_binop(f, Size::S64, loc_b, Location::GPR(tmp))?;
320            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp), ret)?;
321            self.release_gpr(tmp);
322        } else {
323            self.emit_relaxed_binop(f, Size::S64, loc_b, ret)?;
324        }
325        Ok(())
326    }
327    /// I64 comparison with.
328    fn emit_cmpop_i64_dynamic_b(
329        &mut self,
330        c: Condition,
331        loc_a: Location,
332        loc_b: Location,
333        ret: Location,
334    ) -> Result<(), CompileError> {
335        match ret {
336            Location::GPR(x) => {
337                self.emit_relaxed_cmp(Size::S64, loc_b, loc_a)?;
338                self.assembler.emit_set(c, x)?;
339                self.assembler
340                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x))?;
341            }
342            Location::Memory(_, _) => {
343                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
344                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
345                })?;
346                self.emit_relaxed_cmp(Size::S64, loc_b, loc_a)?;
347                self.assembler.emit_set(c, tmp)?;
348                self.assembler
349                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp))?;
350                self.move_location(Size::S32, Location::GPR(tmp), ret)?;
351                self.release_gpr(tmp);
352            }
353            _ => {
354                codegen_error!("singlepass emit_cmpop_i64_dynamic_b unreachable");
355            }
356        }
357        Ok(())
358    }
359    /// I64 shift with both operands popped from the virtual stack.
360    fn emit_shift_i64(
361        &mut self,
362        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
363        loc_a: Location,
364        loc_b: Location,
365        ret: Location,
366    ) -> Result<(), CompileError> {
367        self.assembler
368            .emit_mov(Size::S64, loc_b, Location::GPR(GPR::RCX))?;
369
370        if loc_a != ret {
371            self.emit_relaxed_mov(Size::S64, loc_a, ret)?;
372        }
373
374        f(&mut self.assembler, Size::S64, Location::GPR(GPR::RCX), ret)
375    }
376    /// Moves `loc` to a valid location for `div`/`idiv`.
377    fn emit_relaxed_xdiv(
378        &mut self,
379        op: fn(&mut AssemblerX64, Size, Location) -> Result<(), CompileError>,
380        sz: Size,
381        loc: Location,
382        integer_division_by_zero: Label,
383    ) -> Result<usize, CompileError> {
384        self.assembler.emit_cmp(sz, Location::Imm32(0), loc)?;
385        self.assembler
386            .emit_jmp(Condition::Equal, integer_division_by_zero)?;
387
388        match loc {
389            Location::Imm64(_) | Location::Imm32(_) => {
390                self.move_location(sz, loc, Location::GPR(GPR::RCX))?; // must not be used during div (rax, rdx)
391                let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow);
392                op(&mut self.assembler, sz, Location::GPR(GPR::RCX))?;
393                self.mark_instruction_address_end(offset);
394                Ok(offset)
395            }
396            _ => {
397                let offset = self.mark_instruction_with_trap_code(TrapCode::IntegerOverflow);
398                op(&mut self.assembler, sz, loc)?;
399                self.mark_instruction_address_end(offset);
400                Ok(offset)
401            }
402        }
403    }
404    /// I32 comparison with.
405    fn emit_cmpop_i32_dynamic_b(
406        &mut self,
407        c: Condition,
408        loc_a: Location,
409        loc_b: Location,
410        ret: Location,
411    ) -> Result<(), CompileError> {
412        match ret {
413            Location::GPR(x) => {
414                self.emit_relaxed_cmp(Size::S32, loc_b, loc_a)?;
415                self.assembler.emit_set(c, x)?;
416                self.assembler
417                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(x))?;
418            }
419            Location::Memory(_, _) => {
420                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
421                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
422                })?;
423                self.emit_relaxed_cmp(Size::S32, loc_b, loc_a)?;
424                self.assembler.emit_set(c, tmp)?;
425                self.assembler
426                    .emit_and(Size::S32, Location::Imm32(0xff), Location::GPR(tmp))?;
427                self.move_location(Size::S32, Location::GPR(tmp), ret)?;
428                self.release_gpr(tmp);
429            }
430            _ => {
431                codegen_error!("singlepass emit_cmpop_i32_dynamic_b unreachable");
432            }
433        }
434        Ok(())
435    }
436    /// I32 shift with both operands popped from the virtual stack.
437    fn emit_shift_i32(
438        &mut self,
439        f: fn(&mut AssemblerX64, Size, Location, Location) -> Result<(), CompileError>,
440        loc_a: Location,
441        loc_b: Location,
442        ret: Location,
443    ) -> Result<(), CompileError> {
444        self.assembler
445            .emit_mov(Size::S32, loc_b, Location::GPR(GPR::RCX))?;
446
447        if loc_a != ret {
448            self.emit_relaxed_mov(Size::S32, loc_a, ret)?;
449        }
450
451        f(&mut self.assembler, Size::S32, Location::GPR(GPR::RCX), ret)
452    }
453
454    #[allow(clippy::too_many_arguments)]
455    fn memory_op<F: FnOnce(&mut Self, GPR) -> Result<(), CompileError>>(
456        &mut self,
457        addr: Location,
458        memarg: &MemArg,
459        check_alignment: bool,
460        value_size: usize,
461        need_check: bool,
462        imported_memories: bool,
463        offset: i32,
464        heap_access_oob: Label,
465        unaligned_atomic: Label,
466        cb: F,
467    ) -> Result<(), CompileError> {
468        // This function as been re-writen to use only 2 temporary register instead of 3
469        // without compromisong on the perfomances.
470        // The number of memory move should be equivalent to previous 3-temp regs version
471        // Register pressure is high on x86_64, and this is needed to be able to use
472        // instruction that neead RAX, like cmpxchg for example
473        let tmp_addr = self.acquire_temp_gpr().ok_or_else(|| {
474            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
475        })?;
476        let tmp2 = self.acquire_temp_gpr().ok_or_else(|| {
477            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
478        })?;
479
480        // Reusing `tmp_addr` for temporary indirection here, since it's not used before the last reference to `{base,bound}_loc`.
481        let base_loc = if imported_memories {
482            // Imported memories require one level of indirection.
483            self.emit_relaxed_binop(
484                AssemblerX64::emit_mov,
485                Size::S64,
486                Location::Memory(self.get_vmctx_reg(), offset),
487                Location::GPR(tmp2),
488            )?;
489            Location::Memory(tmp2, 0)
490        } else {
491            Location::Memory(self.get_vmctx_reg(), offset)
492        };
493
494        // Load base into temporary register.
495        self.assembler
496            .emit_mov(Size::S64, base_loc, Location::GPR(tmp2))?;
497
498        // Load effective address.
499        // `base_loc` and `bound_loc` becomes INVALID after this line, because `tmp_addr`
500        // might be reused.
501        self.assembler
502            .emit_mov(Size::S32, addr, Location::GPR(tmp_addr))?;
503
504        // Add offset to memory address.
505        if memarg.offset != 0 {
506            self.assembler.emit_add(
507                Size::S32,
508                Location::Imm32(memarg.offset as u32),
509                Location::GPR(tmp_addr),
510            )?;
511
512            // Trap if offset calculation overflowed.
513            self.assembler.emit_jmp(Condition::Carry, heap_access_oob)?;
514        }
515
516        if need_check {
517            let bound_loc = if imported_memories {
518                // Imported memories require one level of indirection.
519                self.emit_relaxed_binop(
520                    AssemblerX64::emit_mov,
521                    Size::S64,
522                    Location::Memory(self.get_vmctx_reg(), offset),
523                    Location::GPR(tmp2),
524                )?;
525                Location::Memory(tmp2, 8)
526            } else {
527                Location::Memory(self.get_vmctx_reg(), offset + 8)
528            };
529            self.assembler
530                .emit_mov(Size::S64, bound_loc, Location::GPR(tmp2))?;
531
532            // We will compare the upper bound limit without having add the "temp_base" value, as it's a constant
533            self.assembler.emit_lea(
534                Size::S64,
535                Location::Memory(tmp2, -(value_size as i32)),
536                Location::GPR(tmp2),
537            )?;
538            // Trap if the end address of the requested area is above that of the linear memory.
539            self.assembler
540                .emit_cmp(Size::S64, Location::GPR(tmp2), Location::GPR(tmp_addr))?;
541
542            // `tmp_bound` is inclusive. So trap only if `tmp_addr > tmp_bound`.
543            self.assembler.emit_jmp(Condition::Above, heap_access_oob)?;
544        }
545        // get back baseloc, as it might have been destroid with the upper memory test
546        let base_loc = if imported_memories {
547            // Imported memories require one level of indirection.
548            self.emit_relaxed_binop(
549                AssemblerX64::emit_mov,
550                Size::S64,
551                Location::Memory(self.get_vmctx_reg(), offset),
552                Location::GPR(tmp2),
553            )?;
554            Location::Memory(tmp2, 0)
555        } else {
556            Location::Memory(self.get_vmctx_reg(), offset)
557        };
558        // Wasm linear memory -> real memory
559        self.assembler
560            .emit_add(Size::S64, base_loc, Location::GPR(tmp_addr))?;
561
562        self.release_gpr(tmp2);
563
564        let align = value_size as u32;
565        if check_alignment && align != 1 {
566            let tmp_aligncheck = self.acquire_temp_gpr().ok_or_else(|| {
567                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
568            })?;
569            self.assembler.emit_mov(
570                Size::S32,
571                Location::GPR(tmp_addr),
572                Location::GPR(tmp_aligncheck),
573            )?;
574            self.assembler.emit_and(
575                Size::S64,
576                Location::Imm32(align - 1),
577                Location::GPR(tmp_aligncheck),
578            )?;
579            self.assembler
580                .emit_jmp(Condition::NotEqual, unaligned_atomic)?;
581            self.release_gpr(tmp_aligncheck);
582        }
583        let begin = self.assembler.get_offset().0;
584        cb(self, tmp_addr)?;
585        let end = self.assembler.get_offset().0;
586        self.mark_address_range_with_trap_code(TrapCode::HeapAccessOutOfBounds, begin, end);
587
588        self.release_gpr(tmp_addr);
589        Ok(())
590    }
591
592    #[allow(clippy::too_many_arguments)]
593    fn emit_compare_and_swap<F: FnOnce(&mut Self, GPR, GPR) -> Result<(), CompileError>>(
594        &mut self,
595        loc: Location,
596        target: Location,
597        ret: Location,
598        memarg: &MemArg,
599        value_size: usize,
600        memory_sz: Size,
601        stack_sz: Size,
602        need_check: bool,
603        imported_memories: bool,
604        offset: i32,
605        heap_access_oob: Label,
606        unaligned_atomic: Label,
607        cb: F,
608    ) -> Result<(), CompileError> {
609        if memory_sz > stack_sz {
610            codegen_error!("singlepass emit_compare_and_swap unreachable");
611        }
612
613        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
614        let value = if loc == Location::GPR(GPR::R14) {
615            GPR::R13
616        } else {
617            GPR::R14
618        };
619        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
620
621        self.move_location(stack_sz, loc, Location::GPR(value))?;
622
623        let retry = self.assembler.get_label();
624        self.emit_label(retry)?;
625
626        self.memory_op(
627            target,
628            memarg,
629            true,
630            value_size,
631            need_check,
632            imported_memories,
633            offset,
634            heap_access_oob,
635            unaligned_atomic,
636            |this, addr| {
637                this.load_address(memory_sz, Location::GPR(compare), Location::Memory(addr, 0))?;
638                this.move_location(stack_sz, Location::GPR(compare), ret)?;
639                cb(this, compare, value)?;
640                this.assembler.emit_lock_cmpxchg(
641                    memory_sz,
642                    Location::GPR(value),
643                    Location::Memory(addr, 0),
644                )
645            },
646        )?;
647
648        self.assembler.emit_jmp(Condition::NotEqual, retry)?;
649
650        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
651        self.release_gpr(compare);
652        Ok(())
653    }
654
655    // Checks for underflow/overflow/nan.
656    #[allow(clippy::too_many_arguments)]
657    fn emit_f32_int_conv_check(
658        &mut self,
659        reg: XMM,
660        lower_bound: f32,
661        upper_bound: f32,
662        underflow_label: Label,
663        overflow_label: Label,
664        nan_label: Label,
665        succeed_label: Label,
666    ) -> Result<(), CompileError> {
667        let lower_bound = f32::to_bits(lower_bound);
668        let upper_bound = f32::to_bits(upper_bound);
669
670        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
671            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
672        })?;
673        let tmp_x = self.acquire_temp_simd().ok_or_else(|| {
674            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
675        })?;
676
677        // Underflow.
678        self.move_location(Size::S32, Location::Imm32(lower_bound), Location::GPR(tmp))?;
679        self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x))?;
680        self.assembler
681            .emit_vcmpless(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
682        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
683        self.assembler
684            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
685        self.assembler
686            .emit_jmp(Condition::NotEqual, underflow_label)?;
687
688        // Overflow.
689        self.move_location(Size::S32, Location::Imm32(upper_bound), Location::GPR(tmp))?;
690        self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x))?;
691        self.assembler
692            .emit_vcmpgess(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
693        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
694        self.assembler
695            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
696        self.assembler
697            .emit_jmp(Condition::NotEqual, overflow_label)?;
698
699        // NaN.
700        self.assembler
701            .emit_vcmpeqss(reg, XMMOrMemory::XMM(reg), tmp_x)?;
702        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
703        self.assembler
704            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
705        self.assembler.emit_jmp(Condition::Equal, nan_label)?;
706
707        self.assembler.emit_jmp(Condition::None, succeed_label)?;
708
709        self.release_simd(tmp_x);
710        self.release_gpr(tmp);
711        Ok(())
712    }
713
714    // Checks for underflow/overflow/nan before IxxTrunc{U/S}F32.
715    fn emit_f32_int_conv_check_trap(
716        &mut self,
717        reg: XMM,
718        lower_bound: f32,
719        upper_bound: f32,
720    ) -> Result<(), CompileError> {
721        let trap_overflow = self.assembler.get_label();
722        let trap_badconv = self.assembler.get_label();
723        let end = self.assembler.get_label();
724
725        self.emit_f32_int_conv_check(
726            reg,
727            lower_bound,
728            upper_bound,
729            trap_overflow,
730            trap_overflow,
731            trap_badconv,
732            end,
733        )?;
734
735        self.emit_label(trap_overflow)?;
736
737        self.emit_illegal_op_internal(TrapCode::IntegerOverflow)?;
738
739        self.emit_label(trap_badconv)?;
740
741        self.emit_illegal_op_internal(TrapCode::BadConversionToInteger)?;
742
743        self.emit_label(end)?;
744        Ok(())
745    }
746    #[allow(clippy::too_many_arguments)]
747    fn emit_f32_int_conv_check_sat<
748        F1: FnOnce(&mut Self) -> Result<(), CompileError>,
749        F2: FnOnce(&mut Self) -> Result<(), CompileError>,
750        F3: FnOnce(&mut Self) -> Result<(), CompileError>,
751        F4: FnOnce(&mut Self) -> Result<(), CompileError>,
752    >(
753        &mut self,
754        reg: XMM,
755        lower_bound: f32,
756        upper_bound: f32,
757        underflow_cb: F1,
758        overflow_cb: F2,
759        nan_cb: Option<F3>,
760        convert_cb: F4,
761    ) -> Result<(), CompileError> {
762        // As an optimization nan_cb is optional, and when set to None we turn
763        // use 'underflow' as the 'nan' label. This is useful for callers who
764        // set the return value to zero for both underflow and nan.
765
766        let underflow = self.assembler.get_label();
767        let overflow = self.assembler.get_label();
768        let nan = if nan_cb.is_some() {
769            self.assembler.get_label()
770        } else {
771            underflow
772        };
773        let convert = self.assembler.get_label();
774        let end = self.assembler.get_label();
775
776        self.emit_f32_int_conv_check(
777            reg,
778            lower_bound,
779            upper_bound,
780            underflow,
781            overflow,
782            nan,
783            convert,
784        )?;
785
786        self.emit_label(underflow)?;
787        underflow_cb(self)?;
788        self.assembler.emit_jmp(Condition::None, end)?;
789
790        self.emit_label(overflow)?;
791        overflow_cb(self)?;
792        self.assembler.emit_jmp(Condition::None, end)?;
793
794        if let Some(cb) = nan_cb {
795            self.emit_label(nan)?;
796            cb(self)?;
797            self.assembler.emit_jmp(Condition::None, end)?;
798        }
799
800        self.emit_label(convert)?;
801        convert_cb(self)?;
802        self.emit_label(end)
803    }
804    // Checks for underflow/overflow/nan.
805    #[allow(clippy::too_many_arguments)]
806    fn emit_f64_int_conv_check(
807        &mut self,
808        reg: XMM,
809        lower_bound: f64,
810        upper_bound: f64,
811        underflow_label: Label,
812        overflow_label: Label,
813        nan_label: Label,
814        succeed_label: Label,
815    ) -> Result<(), CompileError> {
816        let lower_bound = f64::to_bits(lower_bound);
817        let upper_bound = f64::to_bits(upper_bound);
818
819        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
820            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
821        })?;
822        let tmp_x = self.acquire_temp_simd().ok_or_else(|| {
823            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
824        })?;
825
826        // Underflow.
827        self.move_location(Size::S64, Location::Imm64(lower_bound), Location::GPR(tmp))?;
828        self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x))?;
829        self.assembler
830            .emit_vcmplesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
831        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
832        self.assembler
833            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
834        self.assembler
835            .emit_jmp(Condition::NotEqual, underflow_label)?;
836
837        // Overflow.
838        self.move_location(Size::S64, Location::Imm64(upper_bound), Location::GPR(tmp))?;
839        self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x))?;
840        self.assembler
841            .emit_vcmpgesd(reg, XMMOrMemory::XMM(tmp_x), tmp_x)?;
842        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
843        self.assembler
844            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
845        self.assembler
846            .emit_jmp(Condition::NotEqual, overflow_label)?;
847
848        // NaN.
849        self.assembler
850            .emit_vcmpeqsd(reg, XMMOrMemory::XMM(reg), tmp_x)?;
851        self.move_location(Size::S32, Location::SIMD(tmp_x), Location::GPR(tmp))?;
852        self.assembler
853            .emit_cmp(Size::S32, Location::Imm32(0), Location::GPR(tmp))?;
854        self.assembler.emit_jmp(Condition::Equal, nan_label)?;
855
856        self.assembler.emit_jmp(Condition::None, succeed_label)?;
857
858        self.release_simd(tmp_x);
859        self.release_gpr(tmp);
860        Ok(())
861    }
862    // Checks for underflow/overflow/nan before IxxTrunc{U/S}F64.. return offset/len for trap_overflow and trap_badconv
863    fn emit_f64_int_conv_check_trap(
864        &mut self,
865        reg: XMM,
866        lower_bound: f64,
867        upper_bound: f64,
868    ) -> Result<(), CompileError> {
869        let trap_overflow = self.assembler.get_label();
870        let trap_badconv = self.assembler.get_label();
871        let end = self.assembler.get_label();
872
873        self.emit_f64_int_conv_check(
874            reg,
875            lower_bound,
876            upper_bound,
877            trap_overflow,
878            trap_overflow,
879            trap_badconv,
880            end,
881        )?;
882
883        self.emit_label(trap_overflow)?;
884        self.emit_illegal_op_internal(TrapCode::IntegerOverflow)?;
885
886        self.emit_label(trap_badconv)?;
887        self.emit_illegal_op_internal(TrapCode::BadConversionToInteger)?;
888
889        self.emit_label(end)
890    }
891    #[allow(clippy::too_many_arguments)]
892    fn emit_f64_int_conv_check_sat<
893        F1: FnOnce(&mut Self) -> Result<(), CompileError>,
894        F2: FnOnce(&mut Self) -> Result<(), CompileError>,
895        F3: FnOnce(&mut Self) -> Result<(), CompileError>,
896        F4: FnOnce(&mut Self) -> Result<(), CompileError>,
897    >(
898        &mut self,
899        reg: XMM,
900        lower_bound: f64,
901        upper_bound: f64,
902        underflow_cb: F1,
903        overflow_cb: F2,
904        nan_cb: Option<F3>,
905        convert_cb: F4,
906    ) -> Result<(), CompileError> {
907        // As an optimization nan_cb is optional, and when set to None we turn
908        // use 'underflow' as the 'nan' label. This is useful for callers who
909        // set the return value to zero for both underflow and nan.
910
911        let underflow = self.assembler.get_label();
912        let overflow = self.assembler.get_label();
913        let nan = if nan_cb.is_some() {
914            self.assembler.get_label()
915        } else {
916            underflow
917        };
918        let convert = self.assembler.get_label();
919        let end = self.assembler.get_label();
920
921        self.emit_f64_int_conv_check(
922            reg,
923            lower_bound,
924            upper_bound,
925            underflow,
926            overflow,
927            nan,
928            convert,
929        )?;
930
931        self.emit_label(underflow)?;
932        underflow_cb(self)?;
933        self.assembler.emit_jmp(Condition::None, end)?;
934
935        self.emit_label(overflow)?;
936        overflow_cb(self)?;
937        self.assembler.emit_jmp(Condition::None, end)?;
938
939        if let Some(cb) = nan_cb {
940            self.emit_label(nan)?;
941            cb(self)?;
942            self.assembler.emit_jmp(Condition::None, end)?;
943        }
944
945        self.emit_label(convert)?;
946        convert_cb(self)?;
947        self.emit_label(end)
948    }
949    /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
950    fn emit_relaxed_avx(
951        &mut self,
952        op: fn(&mut AssemblerX64, XMM, XMMOrMemory, XMM) -> Result<(), CompileError>,
953        src1: Location,
954        src2: Location,
955        dst: Location,
956    ) -> Result<(), CompileError> {
957        self.emit_relaxed_avx_base(
958            |this, src1, src2, dst| op(&mut this.assembler, src1, src2, dst),
959            src1,
960            src2,
961            dst,
962        )
963    }
964
965    /// Moves `src1` and `src2` to valid locations and possibly adds a layer of indirection for `dst` for AVX instructions.
966    fn emit_relaxed_avx_base<
967        F: FnOnce(&mut Self, XMM, XMMOrMemory, XMM) -> Result<(), CompileError>,
968    >(
969        &mut self,
970        op: F,
971        src1: Location,
972        src2: Location,
973        dst: Location,
974    ) -> Result<(), CompileError> {
975        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
976            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
977        })?;
978        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
979            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
980        })?;
981        let tmp3 = self.acquire_temp_simd().ok_or_else(|| {
982            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
983        })?;
984        let tmpg = self.acquire_temp_gpr().ok_or_else(|| {
985            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
986        })?;
987
988        let src1 = match src1 {
989            Location::SIMD(x) => x,
990            Location::GPR(_) | Location::Memory(_, _) => {
991                self.assembler
992                    .emit_mov(Size::S64, src1, Location::SIMD(tmp1))?;
993                tmp1
994            }
995            Location::Imm32(_) => {
996                self.assembler
997                    .emit_mov(Size::S32, src1, Location::GPR(tmpg))?;
998                self.move_location(Size::S32, Location::GPR(tmpg), Location::SIMD(tmp1))?;
999                tmp1
1000            }
1001            Location::Imm64(_) => {
1002                self.assembler
1003                    .emit_mov(Size::S64, src1, Location::GPR(tmpg))?;
1004                self.move_location(Size::S64, Location::GPR(tmpg), Location::SIMD(tmp1))?;
1005                tmp1
1006            }
1007            _ => {
1008                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1009            }
1010        };
1011
1012        let src2 = match src2 {
1013            Location::SIMD(x) => XMMOrMemory::XMM(x),
1014            Location::Memory(base, disp) => XMMOrMemory::Memory(base, disp),
1015            Location::GPR(_) => {
1016                self.assembler
1017                    .emit_mov(Size::S64, src2, Location::SIMD(tmp2))?;
1018                XMMOrMemory::XMM(tmp2)
1019            }
1020            Location::Imm32(_) => {
1021                self.assembler
1022                    .emit_mov(Size::S32, src2, Location::GPR(tmpg))?;
1023                self.move_location(Size::S32, Location::GPR(tmpg), Location::SIMD(tmp2))?;
1024                XMMOrMemory::XMM(tmp2)
1025            }
1026            Location::Imm64(_) => {
1027                self.assembler
1028                    .emit_mov(Size::S64, src2, Location::GPR(tmpg))?;
1029                self.move_location(Size::S64, Location::GPR(tmpg), Location::SIMD(tmp2))?;
1030                XMMOrMemory::XMM(tmp2)
1031            }
1032            _ => {
1033                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1034            }
1035        };
1036
1037        match dst {
1038            Location::SIMD(x) => {
1039                op(self, src1, src2, x)?;
1040            }
1041            Location::Memory(_, _) | Location::GPR(_) => {
1042                op(self, src1, src2, tmp3)?;
1043                self.assembler
1044                    .emit_mov(Size::S64, Location::SIMD(tmp3), dst)?;
1045            }
1046            _ => {
1047                codegen_error!("singlepass emit_relaxed_avx_base unreachable")
1048            }
1049        }
1050
1051        self.release_gpr(tmpg);
1052        self.release_simd(tmp3);
1053        self.release_simd(tmp2);
1054        self.release_simd(tmp1);
1055        Ok(())
1056    }
1057
1058    fn convert_i64_f64_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1059        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1060            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1061        })?;
1062        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1063            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1064        })?;
1065
1066        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1067        self.emit_f64_int_conv_check_sat(
1068            tmp_in,
1069            GEF64_LT_U64_MIN,
1070            LEF64_GT_U64_MAX,
1071            |this| {
1072                this.assembler
1073                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1074            },
1075            |this| {
1076                this.assembler.emit_mov(
1077                    Size::S64,
1078                    Location::Imm64(u64::MAX),
1079                    Location::GPR(tmp_out),
1080                )
1081            },
1082            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1083            |this| {
1084                if this.assembler.arch_has_itruncf() {
1085                    this.assembler.arch_emit_i64_trunc_uf64(tmp_in, tmp_out)
1086                } else {
1087                    let tmp = this.acquire_temp_gpr().ok_or_else(|| {
1088                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1089                    })?;
1090                    let tmp_x1 = this.acquire_temp_simd().ok_or_else(|| {
1091                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1092                    })?;
1093                    let tmp_x2 = this.acquire_temp_simd().ok_or_else(|| {
1094                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1095                    })?;
1096
1097                    this.assembler.emit_mov(
1098                        Size::S64,
1099                        Location::Imm64(4890909195324358656u64),
1100                        Location::GPR(tmp),
1101                    )?; //double 9.2233720368547758E+18
1102                    this.assembler.emit_mov(
1103                        Size::S64,
1104                        Location::GPR(tmp),
1105                        Location::SIMD(tmp_x1),
1106                    )?;
1107                    this.assembler.emit_mov(
1108                        Size::S64,
1109                        Location::SIMD(tmp_in),
1110                        Location::SIMD(tmp_x2),
1111                    )?;
1112                    this.assembler
1113                        .emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1114                    this.assembler
1115                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1116                    this.assembler.emit_mov(
1117                        Size::S64,
1118                        Location::Imm64(0x8000000000000000u64),
1119                        Location::GPR(tmp),
1120                    )?;
1121                    this.assembler.emit_xor(
1122                        Size::S64,
1123                        Location::GPR(tmp_out),
1124                        Location::GPR(tmp),
1125                    )?;
1126                    this.assembler
1127                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1128                    this.assembler
1129                        .emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1130                    this.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1131
1132                    this.release_simd(tmp_x2);
1133                    this.release_simd(tmp_x1);
1134                    this.release_gpr(tmp);
1135                    Ok(())
1136                }
1137            },
1138        )?;
1139
1140        self.assembler
1141            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1142        self.release_simd(tmp_in);
1143        self.release_gpr(tmp_out);
1144        Ok(())
1145    }
1146    fn convert_i64_f64_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1147        if self.assembler.arch_has_itruncf() {
1148            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1149                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1150            })?;
1151            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1152                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1153            })?;
1154            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1155            self.assembler.arch_emit_i64_trunc_uf64(tmp_in, tmp_out)?;
1156            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1157            self.release_simd(tmp_in);
1158            self.release_gpr(tmp_out);
1159        } else {
1160            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1161                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1162            })?;
1163            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1164                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1165            })?; // xmm2
1166
1167            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1168            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_U64_MIN, LEF64_GT_U64_MAX)?;
1169
1170            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
1171                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1172            })?; // r15
1173            let tmp_x1 = self.acquire_temp_simd().ok_or_else(|| {
1174                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1175            })?; // xmm1
1176            let tmp_x2 = self.acquire_temp_simd().ok_or_else(|| {
1177                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1178            })?; // xmm3
1179
1180            self.move_location(
1181                Size::S64,
1182                Location::Imm64(4890909195324358656u64),
1183                Location::GPR(tmp),
1184            )?; //double 9.2233720368547758E+18
1185            self.move_location(Size::S64, Location::GPR(tmp), Location::SIMD(tmp_x1))?;
1186            self.move_location(Size::S64, Location::SIMD(tmp_in), Location::SIMD(tmp_x2))?;
1187            self.assembler
1188                .emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1189            self.assembler
1190                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1191            self.move_location(
1192                Size::S64,
1193                Location::Imm64(0x8000000000000000u64),
1194                Location::GPR(tmp),
1195            )?;
1196            self.assembler
1197                .emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp))?;
1198            self.assembler
1199                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1200            self.assembler
1201                .emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1202            self.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1203            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1204
1205            self.release_simd(tmp_x2);
1206            self.release_simd(tmp_x1);
1207            self.release_gpr(tmp);
1208            self.release_simd(tmp_in);
1209            self.release_gpr(tmp_out);
1210        }
1211        Ok(())
1212    }
1213    fn convert_i64_f64_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1214        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1215            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1216        })?;
1217        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1218            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1219        })?;
1220
1221        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1222        self.emit_f64_int_conv_check_sat(
1223            tmp_in,
1224            GEF64_LT_I64_MIN,
1225            LEF64_GT_I64_MAX,
1226            |this| {
1227                this.assembler.emit_mov(
1228                    Size::S64,
1229                    Location::Imm64(i64::MIN as u64),
1230                    Location::GPR(tmp_out),
1231                )
1232            },
1233            |this| {
1234                this.assembler.emit_mov(
1235                    Size::S64,
1236                    Location::Imm64(i64::MAX as u64),
1237                    Location::GPR(tmp_out),
1238                )
1239            },
1240            Some(|this: &mut Self| {
1241                this.assembler
1242                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1243            }),
1244            |this| {
1245                if this.assembler.arch_has_itruncf() {
1246                    this.assembler.arch_emit_i64_trunc_sf64(tmp_in, tmp_out)
1247                } else {
1248                    this.assembler
1249                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1250                }
1251            },
1252        )?;
1253
1254        self.assembler
1255            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1256        self.release_simd(tmp_in);
1257        self.release_gpr(tmp_out);
1258        Ok(())
1259    }
1260    fn convert_i64_f64_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1261        if self.assembler.arch_has_itruncf() {
1262            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1263                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1264            })?;
1265            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1266                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1267            })?;
1268            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1269            self.assembler.arch_emit_i64_trunc_sf64(tmp_in, tmp_out)?;
1270            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1271            self.release_simd(tmp_in);
1272            self.release_gpr(tmp_out);
1273        } else {
1274            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1275                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1276            })?;
1277            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1278                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1279            })?;
1280
1281            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1282            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_I64_MIN, LEF64_GT_I64_MAX)?;
1283
1284            self.assembler
1285                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1286            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1287
1288            self.release_simd(tmp_in);
1289            self.release_gpr(tmp_out);
1290        }
1291        Ok(())
1292    }
1293    fn convert_i32_f64_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1294        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1295            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1296        })?;
1297        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1298            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1299        })?;
1300
1301        let real_in = match loc {
1302            Location::Imm32(_) | Location::Imm64(_) => {
1303                self.move_location(Size::S64, loc, Location::GPR(tmp_out))?;
1304                self.move_location(Size::S64, Location::GPR(tmp_out), Location::SIMD(tmp_in))?;
1305                tmp_in
1306            }
1307            Location::SIMD(x) => x,
1308            _ => {
1309                self.move_location(Size::S64, loc, Location::SIMD(tmp_in))?;
1310                tmp_in
1311            }
1312        };
1313
1314        self.emit_f64_int_conv_check_sat(
1315            real_in,
1316            GEF64_LT_I32_MIN,
1317            LEF64_GT_I32_MAX,
1318            |this| {
1319                this.assembler.emit_mov(
1320                    Size::S32,
1321                    Location::Imm32(i32::MIN as u32),
1322                    Location::GPR(tmp_out),
1323                )
1324            },
1325            |this| {
1326                this.assembler.emit_mov(
1327                    Size::S32,
1328                    Location::Imm32(i32::MAX as u32),
1329                    Location::GPR(tmp_out),
1330                )
1331            },
1332            Some(|this: &mut Self| {
1333                this.assembler
1334                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1335            }),
1336            |this| {
1337                if this.assembler.arch_has_itruncf() {
1338                    this.assembler.arch_emit_i32_trunc_sf64(tmp_in, tmp_out)
1339                } else {
1340                    this.assembler
1341                        .emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out)
1342                }
1343            },
1344        )?;
1345
1346        self.assembler
1347            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1348        self.release_simd(tmp_in);
1349        self.release_gpr(tmp_out);
1350        Ok(())
1351    }
1352    fn convert_i32_f64_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1353        if self.assembler.arch_has_itruncf() {
1354            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1355                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1356            })?;
1357            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1358                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1359            })?;
1360            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1361            self.assembler.arch_emit_i32_trunc_sf64(tmp_in, tmp_out)?;
1362            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1363            self.release_simd(tmp_in);
1364            self.release_gpr(tmp_out);
1365        } else {
1366            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1367                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1368            })?;
1369            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1370                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1371            })?;
1372
1373            let real_in = match loc {
1374                Location::Imm32(_) | Location::Imm64(_) => {
1375                    self.move_location(Size::S64, loc, Location::GPR(tmp_out))?;
1376                    self.move_location(Size::S64, Location::GPR(tmp_out), Location::SIMD(tmp_in))?;
1377                    tmp_in
1378                }
1379                Location::SIMD(x) => x,
1380                _ => {
1381                    self.move_location(Size::S64, loc, Location::SIMD(tmp_in))?;
1382                    tmp_in
1383                }
1384            };
1385
1386            self.emit_f64_int_conv_check_trap(real_in, GEF64_LT_I32_MIN, LEF64_GT_I32_MAX)?;
1387
1388            self.assembler
1389                .emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out)?;
1390            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1391
1392            self.release_simd(tmp_in);
1393            self.release_gpr(tmp_out);
1394        }
1395        Ok(())
1396    }
1397    fn convert_i32_f64_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1398        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1399            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1400        })?;
1401        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1402            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1403        })?;
1404
1405        self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1406        self.emit_f64_int_conv_check_sat(
1407            tmp_in,
1408            GEF64_LT_U32_MIN,
1409            LEF64_GT_U32_MAX,
1410            |this| {
1411                this.assembler
1412                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1413            },
1414            |this| {
1415                this.assembler.emit_mov(
1416                    Size::S32,
1417                    Location::Imm32(u32::MAX),
1418                    Location::GPR(tmp_out),
1419                )
1420            },
1421            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1422            |this| {
1423                if this.assembler.arch_has_itruncf() {
1424                    this.assembler.arch_emit_i32_trunc_uf64(tmp_in, tmp_out)
1425                } else {
1426                    this.assembler
1427                        .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1428                }
1429            },
1430        )?;
1431
1432        self.assembler
1433            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1434        self.release_simd(tmp_in);
1435        self.release_gpr(tmp_out);
1436        Ok(())
1437    }
1438    fn convert_i32_f64_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1439        if self.assembler.arch_has_itruncf() {
1440            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1441                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1442            })?;
1443            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1444                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1445            })?;
1446            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1447            self.assembler.arch_emit_i32_trunc_uf64(tmp_in, tmp_out)?;
1448            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1449            self.release_simd(tmp_in);
1450            self.release_gpr(tmp_out);
1451        } else {
1452            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1453                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1454            })?;
1455            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1456                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1457            })?;
1458
1459            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp_in))?;
1460            self.emit_f64_int_conv_check_trap(tmp_in, GEF64_LT_U32_MIN, LEF64_GT_U32_MAX)?;
1461
1462            self.assembler
1463                .emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1464            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1465
1466            self.release_simd(tmp_in);
1467            self.release_gpr(tmp_out);
1468        }
1469        Ok(())
1470    }
1471    fn convert_i64_f32_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1472        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1473            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1474        })?;
1475        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1476            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1477        })?;
1478
1479        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1480        self.emit_f32_int_conv_check_sat(
1481            tmp_in,
1482            GEF32_LT_U64_MIN,
1483            LEF32_GT_U64_MAX,
1484            |this| {
1485                this.assembler
1486                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1487            },
1488            |this| {
1489                this.assembler.emit_mov(
1490                    Size::S64,
1491                    Location::Imm64(u64::MAX),
1492                    Location::GPR(tmp_out),
1493                )
1494            },
1495            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1496            |this| {
1497                if this.assembler.arch_has_itruncf() {
1498                    this.assembler.arch_emit_i64_trunc_uf32(tmp_in, tmp_out)
1499                } else {
1500                    let tmp = this.acquire_temp_gpr().ok_or_else(|| {
1501                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1502                    })?;
1503                    let tmp_x1 = this.acquire_temp_simd().ok_or_else(|| {
1504                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1505                    })?;
1506                    let tmp_x2 = this.acquire_temp_simd().ok_or_else(|| {
1507                        CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1508                    })?;
1509
1510                    this.assembler.emit_mov(
1511                        Size::S32,
1512                        Location::Imm32(1593835520u32),
1513                        Location::GPR(tmp),
1514                    )?; //float 9.22337203E+18
1515                    this.assembler.emit_mov(
1516                        Size::S32,
1517                        Location::GPR(tmp),
1518                        Location::SIMD(tmp_x1),
1519                    )?;
1520                    this.assembler.emit_mov(
1521                        Size::S32,
1522                        Location::SIMD(tmp_in),
1523                        Location::SIMD(tmp_x2),
1524                    )?;
1525                    this.assembler
1526                        .emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1527                    this.assembler
1528                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1529                    this.assembler.emit_mov(
1530                        Size::S64,
1531                        Location::Imm64(0x8000000000000000u64),
1532                        Location::GPR(tmp),
1533                    )?;
1534                    this.assembler.emit_xor(
1535                        Size::S64,
1536                        Location::GPR(tmp_out),
1537                        Location::GPR(tmp),
1538                    )?;
1539                    this.assembler
1540                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1541                    this.assembler
1542                        .emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1543                    this.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1544
1545                    this.release_simd(tmp_x2);
1546                    this.release_simd(tmp_x1);
1547                    this.release_gpr(tmp);
1548                    Ok(())
1549                }
1550            },
1551        )?;
1552
1553        self.assembler
1554            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1555        self.release_simd(tmp_in);
1556        self.release_gpr(tmp_out);
1557        Ok(())
1558    }
1559    fn convert_i64_f32_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1560        if self.assembler.arch_has_itruncf() {
1561            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1562                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1563            })?;
1564            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1565                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1566            })?;
1567            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1568            self.assembler.arch_emit_i64_trunc_uf32(tmp_in, tmp_out)?;
1569            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1570            self.release_simd(tmp_in);
1571            self.release_gpr(tmp_out);
1572        } else {
1573            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1574                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1575            })?;
1576            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1577                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1578            })?; // xmm2
1579
1580            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1581            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_U64_MIN, LEF32_GT_U64_MAX)?;
1582
1583            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
1584                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1585            })?; // r15
1586            let tmp_x1 = self.acquire_temp_simd().ok_or_else(|| {
1587                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1588            })?; // xmm1
1589            let tmp_x2 = self.acquire_temp_simd().ok_or_else(|| {
1590                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1591            })?; // xmm3
1592
1593            self.move_location(
1594                Size::S32,
1595                Location::Imm32(1593835520u32),
1596                Location::GPR(tmp),
1597            )?; //float 9.22337203E+18
1598            self.move_location(Size::S32, Location::GPR(tmp), Location::SIMD(tmp_x1))?;
1599            self.move_location(Size::S32, Location::SIMD(tmp_in), Location::SIMD(tmp_x2))?;
1600            self.assembler
1601                .emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in)?;
1602            self.assembler
1603                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1604            self.move_location(
1605                Size::S64,
1606                Location::Imm64(0x8000000000000000u64),
1607                Location::GPR(tmp),
1608            )?;
1609            self.assembler
1610                .emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp))?;
1611            self.assembler
1612                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out)?;
1613            self.assembler
1614                .emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2)?;
1615            self.assembler.emit_cmovae_gpr_64(tmp, tmp_out)?;
1616            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1617
1618            self.release_simd(tmp_x2);
1619            self.release_simd(tmp_x1);
1620            self.release_gpr(tmp);
1621            self.release_simd(tmp_in);
1622            self.release_gpr(tmp_out);
1623        }
1624        Ok(())
1625    }
1626    fn convert_i64_f32_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1627        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1628            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1629        })?;
1630        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1631            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1632        })?;
1633
1634        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1635        self.emit_f32_int_conv_check_sat(
1636            tmp_in,
1637            GEF32_LT_I64_MIN,
1638            LEF32_GT_I64_MAX,
1639            |this| {
1640                this.assembler.emit_mov(
1641                    Size::S64,
1642                    Location::Imm64(i64::MIN as u64),
1643                    Location::GPR(tmp_out),
1644                )
1645            },
1646            |this| {
1647                this.assembler.emit_mov(
1648                    Size::S64,
1649                    Location::Imm64(i64::MAX as u64),
1650                    Location::GPR(tmp_out),
1651                )
1652            },
1653            Some(|this: &mut Self| {
1654                this.assembler
1655                    .emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out))
1656            }),
1657            |this| {
1658                if this.assembler.arch_has_itruncf() {
1659                    this.assembler.arch_emit_i64_trunc_sf32(tmp_in, tmp_out)
1660                } else {
1661                    this.assembler
1662                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1663                }
1664            },
1665        )?;
1666
1667        self.assembler
1668            .emit_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1669        self.release_simd(tmp_in);
1670        self.release_gpr(tmp_out);
1671        Ok(())
1672    }
1673    fn convert_i64_f32_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1674        if self.assembler.arch_has_itruncf() {
1675            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1676                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1677            })?;
1678            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1679                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1680            })?;
1681            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1682            self.assembler.arch_emit_i64_trunc_sf32(tmp_in, tmp_out)?;
1683            self.emit_relaxed_mov(Size::S64, Location::GPR(tmp_out), ret)?;
1684            self.release_simd(tmp_in);
1685            self.release_gpr(tmp_out);
1686        } else {
1687            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1688                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1689            })?;
1690            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1691                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1692            })?;
1693
1694            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1695            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_I64_MIN, LEF32_GT_I64_MAX)?;
1696            self.assembler
1697                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1698            self.move_location(Size::S64, Location::GPR(tmp_out), ret)?;
1699
1700            self.release_simd(tmp_in);
1701            self.release_gpr(tmp_out);
1702        }
1703        Ok(())
1704    }
1705    fn convert_i32_f32_s_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1706        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1707            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1708        })?;
1709        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1710            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1711        })?;
1712
1713        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1714        self.emit_f32_int_conv_check_sat(
1715            tmp_in,
1716            GEF32_LT_I32_MIN,
1717            LEF32_GT_I32_MAX,
1718            |this| {
1719                this.assembler.emit_mov(
1720                    Size::S32,
1721                    Location::Imm32(i32::MIN as u32),
1722                    Location::GPR(tmp_out),
1723                )
1724            },
1725            |this| {
1726                this.assembler.emit_mov(
1727                    Size::S32,
1728                    Location::Imm32(i32::MAX as u32),
1729                    Location::GPR(tmp_out),
1730                )
1731            },
1732            Some(|this: &mut Self| {
1733                this.assembler
1734                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1735            }),
1736            |this| {
1737                if this.assembler.arch_has_itruncf() {
1738                    this.assembler.arch_emit_i32_trunc_sf32(tmp_in, tmp_out)
1739                } else {
1740                    this.assembler
1741                        .emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out)
1742                }
1743            },
1744        )?;
1745
1746        self.assembler
1747            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1748        self.release_simd(tmp_in);
1749        self.release_gpr(tmp_out);
1750        Ok(())
1751    }
1752    fn convert_i32_f32_s_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1753        if self.assembler.arch_has_itruncf() {
1754            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1755                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1756            })?;
1757            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1758                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1759            })?;
1760            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1761            self.assembler.arch_emit_i32_trunc_sf32(tmp_in, tmp_out)?;
1762            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1763            self.release_simd(tmp_in);
1764            self.release_gpr(tmp_out);
1765        } else {
1766            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1767                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1768            })?;
1769            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1770                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1771            })?;
1772
1773            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1774            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_I32_MIN, LEF32_GT_I32_MAX)?;
1775
1776            self.assembler
1777                .emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1778            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1779
1780            self.release_simd(tmp_in);
1781            self.release_gpr(tmp_out);
1782        }
1783        Ok(())
1784    }
1785    fn convert_i32_f32_u_s(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1786        let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1787            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1788        })?;
1789        let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1790            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1791        })?;
1792        self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1793        self.emit_f32_int_conv_check_sat(
1794            tmp_in,
1795            GEF32_LT_U32_MIN,
1796            LEF32_GT_U32_MAX,
1797            |this| {
1798                this.assembler
1799                    .emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out))
1800            },
1801            |this| {
1802                this.assembler.emit_mov(
1803                    Size::S32,
1804                    Location::Imm32(u32::MAX),
1805                    Location::GPR(tmp_out),
1806                )
1807            },
1808            None::<fn(this: &mut Self) -> Result<(), CompileError>>,
1809            |this| {
1810                if this.assembler.arch_has_itruncf() {
1811                    this.assembler.arch_emit_i32_trunc_uf32(tmp_in, tmp_out)
1812                } else {
1813                    this.assembler
1814                        .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)
1815                }
1816            },
1817        )?;
1818
1819        self.assembler
1820            .emit_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1821        self.release_simd(tmp_in);
1822        self.release_gpr(tmp_out);
1823        Ok(())
1824    }
1825    fn convert_i32_f32_u_u(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
1826        if self.assembler.arch_has_itruncf() {
1827            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1828                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1829            })?;
1830            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1831                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1832            })?;
1833            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1834            self.assembler.arch_emit_i32_trunc_uf32(tmp_in, tmp_out)?;
1835            self.emit_relaxed_mov(Size::S32, Location::GPR(tmp_out), ret)?;
1836            self.release_simd(tmp_in);
1837            self.release_gpr(tmp_out);
1838        } else {
1839            let tmp_out = self.acquire_temp_gpr().ok_or_else(|| {
1840                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
1841            })?;
1842            let tmp_in = self.acquire_temp_simd().ok_or_else(|| {
1843                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
1844            })?;
1845            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp_in))?;
1846            self.emit_f32_int_conv_check_trap(tmp_in, GEF32_LT_U32_MIN, LEF32_GT_U32_MAX)?;
1847
1848            self.assembler
1849                .emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out)?;
1850            self.move_location(Size::S32, Location::GPR(tmp_out), ret)?;
1851
1852            self.release_simd(tmp_in);
1853            self.release_gpr(tmp_out);
1854        }
1855        Ok(())
1856    }
1857
1858    fn emit_relaxed_atomic_xchg(
1859        &mut self,
1860        sz: Size,
1861        src: Location,
1862        dst: Location,
1863    ) -> Result<(), CompileError> {
1864        self.emit_relaxed_binop(AssemblerX64::emit_xchg, sz, src, dst)
1865    }
1866
1867    fn used_gprs_contains(&self, r: &GPR) -> bool {
1868        self.used_gprs.contains(r.into_index())
1869    }
1870    fn used_simd_contains(&self, r: &XMM) -> bool {
1871        self.used_simd.contains(r.into_index())
1872    }
1873    fn used_gprs_insert(&mut self, r: GPR) {
1874        self.used_gprs.insert(r.into_index());
1875    }
1876    fn used_simd_insert(&mut self, r: XMM) {
1877        self.used_simd.insert(r.into_index());
1878    }
1879    fn used_gprs_remove(&mut self, r: &GPR) -> bool {
1880        let ret = self.used_gprs_contains(r);
1881        self.used_gprs.set(r.into_index(), false);
1882        ret
1883    }
1884    fn used_simd_remove(&mut self, r: &XMM) -> bool {
1885        let ret = self.used_simd_contains(r);
1886        self.used_simd.set(r.into_index(), false);
1887        ret
1888    }
1889    fn emit_unwind_op(&mut self, op: UnwindOps<GPR, XMM>) -> Result<(), CompileError> {
1890        self.unwind_ops.push((self.get_offset().0, op));
1891        Ok(())
1892    }
1893    fn emit_illegal_op_internal(&mut self, trap: TrapCode) -> Result<(), CompileError> {
1894        let v = trap as u8;
1895        self.assembler.emit_ud1_payload(v)
1896    }
1897
1898    // logic
1899    fn location_xor(
1900        &mut self,
1901        size: Size,
1902        source: Location,
1903        dest: Location,
1904        _flags: bool,
1905    ) -> Result<(), CompileError> {
1906        self.assembler.emit_xor(size, source, dest)
1907    }
1908    fn location_or(
1909        &mut self,
1910        size: Size,
1911        source: Location,
1912        dest: Location,
1913        _flags: bool,
1914    ) -> Result<(), CompileError> {
1915        self.assembler.emit_or(size, source, dest)
1916    }
1917    fn load_address(
1918        &mut self,
1919        size: Size,
1920        reg: Location,
1921        mem: Location,
1922    ) -> Result<(), CompileError> {
1923        match reg {
1924            Location::GPR(_) => {
1925                match mem {
1926                    Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
1927                        // Memory moves with size < 32b do not zero upper bits.
1928                        if size < Size::S32 {
1929                            self.assembler.emit_xor(Size::S32, reg, reg)?;
1930                        }
1931                        self.assembler.emit_mov(size, mem, reg)?;
1932                    }
1933                    _ => codegen_error!("singlepass load_address unreachable"),
1934                }
1935            }
1936            _ => codegen_error!("singlepass load_address unreachable"),
1937        }
1938        Ok(())
1939    }
1940
1941    fn location_neg(
1942        &mut self,
1943        size_val: Size, // size of src
1944        signed: bool,
1945        source: Location,
1946        size_op: Size,
1947        dest: Location,
1948    ) -> Result<(), CompileError> {
1949        self.move_location_extend(size_val, signed, source, size_op, dest)?;
1950        self.assembler.emit_neg(size_val, dest)
1951    }
1952
1953    fn emit_relaxed_zero_extension(
1954        &mut self,
1955        sz_src: Size,
1956        src: Location,
1957        sz_dst: Size,
1958        dst: Location,
1959    ) -> Result<(), CompileError> {
1960        if (sz_src == Size::S32 || sz_src == Size::S64) && sz_dst == Size::S64 {
1961            self.emit_relaxed_binop(AssemblerX64::emit_mov, sz_src, src, dst)
1962        } else {
1963            self.emit_relaxed_zx_sx(AssemblerX64::emit_movzx, sz_src, src, sz_dst, dst)
1964        }
1965    }
1966}
1967
1968impl Machine for MachineX86_64 {
1969    type GPR = GPR;
1970    type SIMD = XMM;
1971
1972    fn assembler_get_offset(&self) -> Offset {
1973        self.assembler.get_offset()
1974    }
1975
1976    fn get_vmctx_reg(&self) -> GPR {
1977        GPR::R15
1978    }
1979
1980    fn get_used_gprs(&self) -> Vec<GPR> {
1981        GPR::iterator()
1982            .filter(|x| self.used_gprs.contains(x.into_index()))
1983            .cloned()
1984            .collect()
1985    }
1986
1987    fn get_used_simd(&self) -> Vec<XMM> {
1988        XMM::iterator()
1989            .filter(|x| self.used_simd.contains(x.into_index()))
1990            .cloned()
1991            .collect()
1992    }
1993
1994    fn pick_gpr(&self) -> Option<GPR> {
1995        use GPR::*;
1996        static REGS: &[GPR] = &[R8, R9, R10, R11];
1997        for r in REGS {
1998            if !self.used_gprs_contains(r) {
1999                return Some(*r);
2000            }
2001        }
2002        None
2003    }
2004
2005    fn pick_temp_gpr(&self) -> Option<GPR> {
2006        use GPR::*;
2007        static REGS: &[GPR] = &[RAX, RCX, RDX, RDI, RSI];
2008        for r in REGS {
2009            if !self.used_gprs_contains(r) {
2010                return Some(*r);
2011            }
2012        }
2013        None
2014    }
2015
2016    fn acquire_temp_gpr(&mut self) -> Option<GPR> {
2017        let gpr = self.pick_temp_gpr();
2018        if let Some(x) = gpr {
2019            self.used_gprs_insert(x);
2020        }
2021        gpr
2022    }
2023
2024    fn release_gpr(&mut self, gpr: GPR) {
2025        assert!(self.used_gprs_remove(&gpr));
2026    }
2027
2028    fn reserve_unused_temp_gpr(&mut self, gpr: GPR) -> GPR {
2029        assert!(!self.used_gprs_contains(&gpr));
2030        self.used_gprs_insert(gpr);
2031        gpr
2032    }
2033
2034    fn reserve_gpr(&mut self, gpr: GPR) {
2035        self.used_gprs_insert(gpr);
2036    }
2037
2038    fn push_used_gpr(&mut self, used_gprs: &[GPR]) -> Result<usize, CompileError> {
2039        for r in used_gprs.iter() {
2040            self.assembler.emit_push(Size::S64, Location::GPR(*r))?;
2041        }
2042        Ok(used_gprs.len() * 8)
2043    }
2044
2045    fn pop_used_gpr(&mut self, used_gprs: &[GPR]) -> Result<(), CompileError> {
2046        for r in used_gprs.iter().rev() {
2047            self.assembler.emit_pop(Size::S64, Location::GPR(*r))?;
2048        }
2049        Ok(())
2050    }
2051
2052    fn pick_simd(&self) -> Option<XMM> {
2053        use XMM::*;
2054        static REGS: &[XMM] = &[XMM3, XMM4, XMM5, XMM6, XMM7];
2055        for r in REGS {
2056            if !self.used_simd_contains(r) {
2057                return Some(*r);
2058            }
2059        }
2060        None
2061    }
2062
2063    fn pick_temp_simd(&self) -> Option<XMM> {
2064        use XMM::*;
2065        static REGS: &[XMM] = &[XMM0, XMM1, XMM2];
2066        for r in REGS {
2067            if !self.used_simd_contains(r) {
2068                return Some(*r);
2069            }
2070        }
2071        None
2072    }
2073
2074    fn acquire_temp_simd(&mut self) -> Option<XMM> {
2075        let simd = self.pick_temp_simd();
2076        if let Some(x) = simd {
2077            self.used_simd_insert(x);
2078        }
2079        simd
2080    }
2081
2082    fn reserve_simd(&mut self, simd: XMM) {
2083        self.used_simd_insert(simd);
2084    }
2085
2086    fn release_simd(&mut self, simd: XMM) {
2087        assert!(self.used_simd_remove(&simd));
2088    }
2089
2090    fn push_used_simd(&mut self, used_xmms: &[XMM]) -> Result<usize, CompileError> {
2091        self.extend_stack((used_xmms.len() * 8) as u32)?;
2092
2093        for (i, r) in used_xmms.iter().enumerate() {
2094            self.move_location(
2095                Size::S64,
2096                Location::SIMD(*r),
2097                Location::Memory(GPR::RSP, (i * 8) as i32),
2098            )?;
2099        }
2100
2101        Ok(used_xmms.len() * 8)
2102    }
2103
2104    fn pop_used_simd(&mut self, used_xmms: &[XMM]) -> Result<(), CompileError> {
2105        for (i, r) in used_xmms.iter().enumerate() {
2106            self.move_location(
2107                Size::S64,
2108                Location::Memory(GPR::RSP, (i * 8) as i32),
2109                Location::SIMD(*r),
2110            )?;
2111        }
2112        self.assembler.emit_add(
2113            Size::S64,
2114            Location::Imm32((used_xmms.len() * 8) as u32),
2115            Location::GPR(GPR::RSP),
2116        )
2117    }
2118
2119    fn set_srcloc(&mut self, offset: u32) {
2120        self.src_loc = offset;
2121    }
2122
2123    fn mark_address_range_with_trap_code(&mut self, code: TrapCode, begin: usize, end: usize) {
2124        for i in begin..end {
2125            self.trap_table.offset_to_code.insert(i, code);
2126        }
2127        self.mark_instruction_address_end(begin);
2128    }
2129
2130    fn mark_address_with_trap_code(&mut self, code: TrapCode) {
2131        let offset = self.assembler.get_offset().0;
2132        self.trap_table.offset_to_code.insert(offset, code);
2133        self.mark_instruction_address_end(offset);
2134    }
2135
2136    fn mark_instruction_with_trap_code(&mut self, code: TrapCode) -> usize {
2137        let offset = self.assembler.get_offset().0;
2138        self.trap_table.offset_to_code.insert(offset, code);
2139        offset
2140    }
2141
2142    fn mark_instruction_address_end(&mut self, begin: usize) {
2143        self.instructions_address_map.push(InstructionAddressMap {
2144            srcloc: SourceLoc::new(self.src_loc),
2145            code_offset: begin,
2146            code_len: self.assembler.get_offset().0 - begin,
2147        });
2148    }
2149
2150    fn insert_stackoverflow(&mut self) {
2151        let offset = 0;
2152        self.trap_table
2153            .offset_to_code
2154            .insert(offset, TrapCode::StackOverflow);
2155        self.mark_instruction_address_end(offset);
2156    }
2157
2158    fn collect_trap_information(&self) -> Vec<TrapInformation> {
2159        self.trap_table
2160            .offset_to_code
2161            .clone()
2162            .into_iter()
2163            .map(|(offset, code)| TrapInformation {
2164                code_offset: offset as u32,
2165                trap_code: code,
2166            })
2167            .collect()
2168    }
2169
2170    fn instructions_address_map(&self) -> Vec<InstructionAddressMap> {
2171        self.instructions_address_map.clone()
2172    }
2173
2174    fn local_on_stack(&mut self, stack_offset: i32) -> Location {
2175        Location::Memory(GPR::RBP, -stack_offset)
2176    }
2177
2178    fn round_stack_adjust(&self, value: usize) -> usize {
2179        value
2180    }
2181
2182    fn extend_stack(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2183        self.assembler.emit_sub(
2184            Size::S64,
2185            Location::Imm32(delta_stack_offset),
2186            Location::GPR(GPR::RSP),
2187        )
2188    }
2189
2190    fn truncate_stack(&mut self, delta_stack_offset: u32) -> Result<(), CompileError> {
2191        self.assembler.emit_add(
2192            Size::S64,
2193            Location::Imm32(delta_stack_offset),
2194            Location::GPR(GPR::RSP),
2195        )
2196    }
2197
2198    fn move_location_for_native(
2199        &mut self,
2200        _size: Size,
2201        loc: Location,
2202        dest: Location,
2203    ) -> Result<(), CompileError> {
2204        match loc {
2205            Location::Imm64(_) | Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2206                let tmp = self.pick_temp_gpr();
2207                if let Some(x) = tmp {
2208                    self.assembler.emit_mov(Size::S64, loc, Location::GPR(x))?;
2209                    self.assembler.emit_mov(Size::S64, Location::GPR(x), dest)
2210                } else {
2211                    self.assembler
2212                        .emit_mov(Size::S64, Location::GPR(GPR::RAX), dest)?;
2213                    self.assembler
2214                        .emit_mov(Size::S64, loc, Location::GPR(GPR::RAX))?;
2215                    self.assembler
2216                        .emit_xchg(Size::S64, Location::GPR(GPR::RAX), dest)
2217                }
2218            }
2219            _ => self.assembler.emit_mov(Size::S64, loc, dest),
2220        }
2221    }
2222
2223    fn zero_location(&mut self, size: Size, location: Location) -> Result<(), CompileError> {
2224        self.assembler.emit_mov(size, Location::Imm32(0), location)
2225    }
2226
2227    fn local_pointer(&self) -> GPR {
2228        GPR::RBP
2229    }
2230
2231    fn is_local_on_stack(&self, idx: usize) -> bool {
2232        idx > 3
2233    }
2234
2235    fn get_local_location(&self, idx: usize, callee_saved_regs_size: usize) -> Location {
2236        // Use callee-saved registers for the first locals.
2237        match idx {
2238            0 => Location::GPR(GPR::R12),
2239            1 => Location::GPR(GPR::R13),
2240            2 => Location::GPR(GPR::R14),
2241            3 => Location::GPR(GPR::RBX),
2242            _ => Location::Memory(GPR::RBP, -(((idx - 3) * 8 + callee_saved_regs_size) as i32)),
2243        }
2244    }
2245
2246    fn move_local(&mut self, stack_offset: i32, location: Location) -> Result<(), CompileError> {
2247        self.assembler.emit_mov(
2248            Size::S64,
2249            location,
2250            Location::Memory(GPR::RBP, -stack_offset),
2251        )?;
2252        match location {
2253            Location::GPR(x) => self.emit_unwind_op(UnwindOps::SaveRegister {
2254                reg: UnwindRegister::GPR(x),
2255                bp_neg_offset: stack_offset,
2256            }),
2257            Location::SIMD(x) => self.emit_unwind_op(UnwindOps::SaveRegister {
2258                reg: UnwindRegister::FPR(x),
2259                bp_neg_offset: stack_offset,
2260            }),
2261            _ => Ok(()),
2262        }
2263    }
2264
2265    fn list_to_save(&self, calling_convention: CallingConvention) -> Vec<Location> {
2266        match calling_convention {
2267            CallingConvention::WindowsFastcall => {
2268                vec![Location::GPR(GPR::RDI), Location::GPR(GPR::RSI)]
2269            }
2270            _ => vec![],
2271        }
2272    }
2273
2274    fn get_param_registers(&self, calling_convention: CallingConvention) -> &'static [Self::GPR] {
2275        match calling_convention {
2276            CallingConvention::WindowsFastcall => &[GPR::RCX, GPR::RDX, GPR::R8, GPR::R9],
2277            _ => &[GPR::RDI, GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9],
2278        }
2279    }
2280
2281    fn get_param_location(
2282        &self,
2283        idx: usize,
2284        _sz: Size,
2285        stack_location: &mut usize,
2286        calling_convention: CallingConvention,
2287    ) -> Location {
2288        self.get_param_registers(calling_convention)
2289            .get(idx)
2290            .map_or_else(
2291                || {
2292                    let loc = Location::Memory(GPR::RSP, *stack_location as i32);
2293                    *stack_location += 8;
2294                    loc
2295                },
2296                |reg| Location::GPR(*reg),
2297            )
2298    }
2299
2300    fn get_call_param_location(
2301        &self,
2302        return_slots: usize,
2303        idx: usize,
2304        _sz: Size,
2305        _stack_location: &mut usize,
2306        calling_convention: CallingConvention,
2307    ) -> Location {
2308        let register_params = self.get_param_registers(calling_convention);
2309        let return_values_memory_size =
2310            8 * return_slots.saturating_sub(X86_64_RETURN_VALUE_REGISTERS.len());
2311        match calling_convention {
2312            CallingConvention::WindowsFastcall => register_params.get(idx).map_or_else(
2313                || {
2314                    Location::Memory(
2315                        GPR::RBP,
2316                        (32 + 16 + return_values_memory_size + (idx - register_params.len()) * 8)
2317                            as i32,
2318                    )
2319                },
2320                |reg| Location::GPR(*reg),
2321            ),
2322            _ => register_params.get(idx).map_or_else(
2323                || {
2324                    Location::Memory(
2325                        GPR::RBP,
2326                        (16 + return_values_memory_size + (idx - register_params.len()) * 8) as i32,
2327                    )
2328                },
2329                |reg| Location::GPR(*reg),
2330            ),
2331        }
2332    }
2333
2334    fn get_simple_param_location(
2335        &self,
2336        idx: usize,
2337        calling_convention: CallingConvention,
2338    ) -> Self::GPR {
2339        self.get_param_registers(calling_convention)[idx]
2340    }
2341
2342    fn adjust_gpr_param_location(
2343        &mut self,
2344        _register: Self::GPR,
2345        _size: Size,
2346    ) -> Result<(), CompileError> {
2347        Ok(())
2348    }
2349
2350    fn get_return_value_location(
2351        &self,
2352        idx: usize,
2353        stack_location: &mut usize,
2354        calling_convention: CallingConvention,
2355    ) -> Location {
2356        X86_64_RETURN_VALUE_REGISTERS.get(idx).map_or_else(
2357            || {
2358                let stack_padding = match calling_convention {
2359                    CallingConvention::WindowsFastcall => 32,
2360                    _ => 0,
2361                };
2362                let loc = Location::Memory(GPR::RSP, *stack_location as i32 + stack_padding);
2363                *stack_location += 8;
2364                loc
2365            },
2366            |reg| Location::GPR(*reg),
2367        )
2368    }
2369
2370    fn get_call_return_value_location(
2371        &self,
2372        idx: usize,
2373        calling_convention: CallingConvention,
2374    ) -> Location {
2375        X86_64_RETURN_VALUE_REGISTERS.get(idx).map_or_else(
2376            || {
2377                let stack_padding = match calling_convention {
2378                    CallingConvention::WindowsFastcall => 32,
2379                    _ => 0,
2380                };
2381                Location::Memory(
2382                    GPR::RBP,
2383                    (16 + stack_padding + (idx - X86_64_RETURN_VALUE_REGISTERS.len()) * 8) as i32,
2384                )
2385            },
2386            |reg| Location::GPR(*reg),
2387        )
2388    }
2389
2390    fn move_location(
2391        &mut self,
2392        size: Size,
2393        source: Location,
2394        dest: Location,
2395    ) -> Result<(), CompileError> {
2396        match source {
2397            Location::GPR(_) => self.assembler.emit_mov(size, source, dest),
2398            Location::Memory(_, _) => match dest {
2399                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2400                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2401                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2402                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2403                    })?;
2404                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2405                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2406                }
2407                _ => codegen_error!("singlepass move_location unreachable"),
2408            },
2409            Location::Memory2(_, _, _, _) => match dest {
2410                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2411                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2412                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2413                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2414                    })?;
2415                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2416                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2417                }
2418                _ => codegen_error!("singlepass move_location unreachable"),
2419            },
2420            Location::Imm8(_) | Location::Imm32(_) | Location::Imm64(_) => match dest {
2421                Location::GPR(_) | Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2422                Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2423                    let tmp = self.pick_temp_gpr().ok_or_else(|| {
2424                        CompileError::Codegen("singlepass can't pick a temp gpr".to_owned())
2425                    })?;
2426                    self.assembler.emit_mov(size, source, Location::GPR(tmp))?;
2427                    self.assembler.emit_mov(size, Location::GPR(tmp), dest)
2428                }
2429                _ => codegen_error!("singlepass move_location unreachable"),
2430            },
2431            Location::SIMD(_) => self.assembler.emit_mov(size, source, dest),
2432            _ => codegen_error!("singlepass move_location unreachable"),
2433        }
2434    }
2435
2436    fn move_location_extend(
2437        &mut self,
2438        size_val: Size,
2439        signed: bool,
2440        source: Location,
2441        size_op: Size,
2442        dest: Location,
2443    ) -> Result<(), CompileError> {
2444        let dst = match dest {
2445            Location::Memory(_, _) | Location::Memory2(_, _, _, _) => {
2446                Location::GPR(self.acquire_temp_gpr().ok_or_else(|| {
2447                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
2448                })?)
2449            }
2450            Location::GPR(_) | Location::SIMD(_) => dest,
2451            _ => codegen_error!("singlepass move_location_extend unreachable"),
2452        };
2453        match source {
2454            Location::GPR(_)
2455            | Location::Memory(_, _)
2456            | Location::Memory2(_, _, _, _)
2457            | Location::Imm32(_)
2458            | Location::Imm64(_) => match size_val {
2459                Size::S32 | Size::S64 => self.assembler.emit_mov(size_val, source, dst),
2460                Size::S16 | Size::S8 => {
2461                    if signed {
2462                        self.assembler.emit_movsx(size_val, source, size_op, dst)
2463                    } else {
2464                        self.assembler.emit_movzx(size_val, source, size_op, dst)
2465                    }
2466                }
2467            },
2468            _ => panic!(
2469                "unimplemented move_location_extend({size_val:?}, {signed}, {source:?}, {size_op:?}, {dest:?}"
2470            ),
2471        }?;
2472        if dst != dest {
2473            self.assembler.emit_mov(size_op, dst, dest)?;
2474            match dst {
2475                Location::GPR(x) => self.release_gpr(x),
2476                _ => codegen_error!("singlepass move_location_extend unreachable"),
2477            };
2478        }
2479        Ok(())
2480    }
2481
2482    fn init_stack_loc(
2483        &mut self,
2484        init_stack_loc_cnt: u64,
2485        last_stack_loc: Location,
2486    ) -> Result<(), CompileError> {
2487        // Since these assemblies take up to 24 bytes, if more than 2 slots are initialized, then they are smaller.
2488        self.assembler.emit_mov(
2489            Size::S64,
2490            Location::Imm64(init_stack_loc_cnt),
2491            Location::GPR(GPR::RCX),
2492        )?;
2493        self.assembler
2494            .emit_xor(Size::S64, Location::GPR(GPR::RAX), Location::GPR(GPR::RAX))?;
2495        self.assembler
2496            .emit_lea(Size::S64, last_stack_loc, Location::GPR(GPR::RDI))?;
2497        self.assembler.emit_rep_stosq()
2498    }
2499
2500    fn restore_saved_area(&mut self, saved_area_offset: i32) -> Result<(), CompileError> {
2501        self.assembler.emit_lea(
2502            Size::S64,
2503            Location::Memory(GPR::RBP, -saved_area_offset),
2504            Location::GPR(GPR::RSP),
2505        )
2506    }
2507
2508    fn pop_location(&mut self, location: Location) -> Result<(), CompileError> {
2509        self.assembler.emit_pop(Size::S64, location)
2510    }
2511
2512    fn assembler_finalize(
2513        self,
2514        assembly_comments: HashMap<usize, AssemblyComment>,
2515    ) -> Result<FinalizedAssembly, CompileError> {
2516        Ok(FinalizedAssembly {
2517            body: self.assembler.finalize().map_err(|e| {
2518                CompileError::Codegen(format!("Assembler failed finalization with: {e:?}"))
2519            })?,
2520            assembly_comments,
2521        })
2522    }
2523
2524    fn get_offset(&self) -> Offset {
2525        self.assembler.get_offset()
2526    }
2527
2528    fn finalize_function(&mut self) -> Result<(), CompileError> {
2529        self.assembler.finalize_function()?;
2530        Ok(())
2531    }
2532
2533    fn emit_function_prolog(&mut self) -> Result<(), CompileError> {
2534        self.emit_push(Size::S64, Location::GPR(GPR::RBP))?;
2535        self.emit_unwind_op(UnwindOps::PushFP { up_to_sp: 16 })?;
2536        self.move_location(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RBP))?;
2537        self.emit_unwind_op(UnwindOps::DefineNewFrame)
2538    }
2539
2540    fn emit_function_epilog(&mut self) -> Result<(), CompileError> {
2541        self.move_location(Size::S64, Location::GPR(GPR::RBP), Location::GPR(GPR::RSP))?;
2542        self.emit_pop(Size::S64, Location::GPR(GPR::RBP))
2543    }
2544
2545    fn emit_function_return_float(&mut self) -> Result<(), CompileError> {
2546        self.move_location(
2547            Size::S64,
2548            Location::GPR(GPR::RAX),
2549            Location::SIMD(XMM::XMM0),
2550        )
2551    }
2552
2553    fn canonicalize_nan(
2554        &mut self,
2555        sz: Size,
2556        input: Location,
2557        output: Location,
2558    ) -> Result<(), CompileError> {
2559        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
2560            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2561        })?;
2562        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
2563            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2564        })?;
2565        let tmp3 = self.acquire_temp_simd().ok_or_else(|| {
2566            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
2567        })?;
2568
2569        self.emit_relaxed_mov(sz, input, Location::SIMD(tmp1))?;
2570        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
2571            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
2572        })?;
2573
2574        match sz {
2575            Size::S32 => {
2576                self.assembler
2577                    .emit_vcmpunordss(tmp1, XMMOrMemory::XMM(tmp1), tmp2)?;
2578                self.move_location(
2579                    Size::S32,
2580                    Location::Imm32(0x7FC0_0000), // Canonical NaN
2581                    Location::GPR(tmpg1),
2582                )?;
2583                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp3))?;
2584                self.assembler
2585                    .emit_vblendvps(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1)?;
2586            }
2587            Size::S64 => {
2588                self.assembler
2589                    .emit_vcmpunordsd(tmp1, XMMOrMemory::XMM(tmp1), tmp2)?;
2590                self.move_location(
2591                    Size::S64,
2592                    Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
2593                    Location::GPR(tmpg1),
2594                )?;
2595                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp3))?;
2596                self.assembler
2597                    .emit_vblendvpd(tmp2, XMMOrMemory::XMM(tmp3), tmp1, tmp1)?;
2598            }
2599            _ => codegen_error!("singlepass canonicalize_nan unreachable"),
2600        }
2601
2602        self.emit_relaxed_mov(sz, Location::SIMD(tmp1), output)?;
2603
2604        self.release_gpr(tmpg1);
2605        self.release_simd(tmp3);
2606        self.release_simd(tmp2);
2607        self.release_simd(tmp1);
2608        Ok(())
2609    }
2610
2611    fn emit_illegal_op(&mut self, trap: TrapCode) -> Result<(), CompileError> {
2612        // code below is kept as a reference on how to emit illegal op with trap info
2613        // without an Undefined opcode with payload
2614        /*
2615        let offset = self.assembler.get_offset().0;
2616        self.trap_table
2617        .offset_to_code
2618        .insert(offset, trap);
2619        self.assembler.emit_ud2();
2620        self.mark_instruction_address_end(offset);*/
2621        let v = trap as u8;
2622        // payload needs to be between 0-15
2623        // this will emit an 40 0F B9 Cx opcode, with x the payload
2624        let offset = self.assembler.get_offset().0;
2625        self.assembler.emit_ud1_payload(v)?;
2626        self.mark_instruction_address_end(offset);
2627        Ok(())
2628    }
2629
2630    fn get_label(&mut self) -> Label {
2631        self.assembler.new_dynamic_label()
2632    }
2633
2634    fn emit_label(&mut self, label: Label) -> Result<(), CompileError> {
2635        self.assembler.emit_label(label)
2636    }
2637
2638    fn get_gpr_for_call(&self) -> GPR {
2639        GPR::RAX
2640    }
2641
2642    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError> {
2643        self.assembler.emit_call_register(reg)
2644    }
2645
2646    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError> {
2647        self.assembler.emit_call_label(label)
2648    }
2649
2650    fn arch_emit_indirect_call_with_trampoline(
2651        &mut self,
2652        location: Location,
2653    ) -> Result<(), CompileError> {
2654        self.assembler
2655            .arch_emit_indirect_call_with_trampoline(location)
2656    }
2657
2658    fn emit_debug_breakpoint(&mut self) -> Result<(), CompileError> {
2659        self.assembler.emit_bkpt()
2660    }
2661
2662    fn emit_call_location(&mut self, location: Location) -> Result<(), CompileError> {
2663        self.assembler.emit_call_location(location)
2664    }
2665
2666    fn location_add(
2667        &mut self,
2668        size: Size,
2669        source: Location,
2670        dest: Location,
2671        _flags: bool,
2672    ) -> Result<(), CompileError> {
2673        self.assembler.emit_add(size, source, dest)
2674    }
2675
2676    fn location_cmp(
2677        &mut self,
2678        size: Size,
2679        source: Location,
2680        dest: Location,
2681    ) -> Result<(), CompileError> {
2682        self.assembler.emit_cmp(size, source, dest)
2683    }
2684
2685    fn jmp_unconditional(&mut self, label: Label) -> Result<(), CompileError> {
2686        self.assembler.emit_jmp(Condition::None, label)
2687    }
2688
2689    fn jmp_on_condition(
2690        &mut self,
2691        cond: UnsignedCondition,
2692        size: Size,
2693        loc_a: AbstractLocation<Self::GPR, Self::SIMD>,
2694        loc_b: AbstractLocation<Self::GPR, Self::SIMD>,
2695        label: Label,
2696    ) -> Result<(), CompileError> {
2697        self.assembler.emit_cmp(size, loc_b, loc_a)?;
2698        let cond = match cond {
2699            UnsignedCondition::Equal => Condition::Equal,
2700            UnsignedCondition::NotEqual => Condition::NotEqual,
2701            UnsignedCondition::Above => Condition::Above,
2702            UnsignedCondition::AboveEqual => Condition::AboveEqual,
2703            UnsignedCondition::Below => Condition::Below,
2704            UnsignedCondition::BelowEqual => Condition::BelowEqual,
2705        };
2706        self.assembler.emit_jmp(cond, label)
2707    }
2708
2709    fn emit_jmp_to_jumptable(&mut self, label: Label, cond: Location) -> Result<(), CompileError> {
2710        let tmp1 = self
2711            .pick_temp_gpr()
2712            .ok_or_else(|| CompileError::Codegen("singlepass can't pick a temp gpr".to_owned()))?;
2713        self.reserve_gpr(tmp1);
2714        let tmp2 = self
2715            .pick_temp_gpr()
2716            .ok_or_else(|| CompileError::Codegen("singlepass can't pick a temp gpr".to_owned()))?;
2717        self.reserve_gpr(tmp2);
2718
2719        self.assembler.emit_lea_label(label, Location::GPR(tmp1))?;
2720        self.move_location(Size::S32, cond, Location::GPR(tmp2))?;
2721
2722        let instr_size = self.assembler.get_jmp_instr_size();
2723        self.assembler
2724            .emit_imul_imm32_gpr64(instr_size as _, tmp2)?;
2725        self.assembler
2726            .emit_add(Size::S64, Location::GPR(tmp1), Location::GPR(tmp2))?;
2727        self.assembler.emit_jmp_location(Location::GPR(tmp2))?;
2728        self.release_gpr(tmp2);
2729        self.release_gpr(tmp1);
2730        Ok(())
2731    }
2732
2733    fn align_for_loop(&mut self) -> Result<(), CompileError> {
2734        // Pad with NOPs to the next 16-byte boundary.
2735        // Here we don't use the dynasm `.align 16` attribute because it pads the alignment with single-byte nops
2736        // which may lead to efficiency problems.
2737        match self.assembler.get_offset().0 % 16 {
2738            0 => {}
2739            x => {
2740                self.assembler.emit_nop_n(16 - x)?;
2741            }
2742        }
2743        assert_eq!(self.assembler.get_offset().0 % 16, 0);
2744        Ok(())
2745    }
2746
2747    fn emit_ret(&mut self) -> Result<(), CompileError> {
2748        self.assembler.emit_ret()
2749    }
2750
2751    fn emit_push(&mut self, size: Size, loc: Location) -> Result<(), CompileError> {
2752        self.assembler.emit_push(size, loc)
2753    }
2754
2755    fn emit_pop(&mut self, size: Size, loc: Location) -> Result<(), CompileError> {
2756        self.assembler.emit_pop(size, loc)
2757    }
2758
2759    fn emit_memory_fence(&mut self) -> Result<(), CompileError> {
2760        // nothing on x86_64
2761        Ok(())
2762    }
2763
2764    fn emit_imul_imm32(&mut self, size: Size, imm32: u32, gpr: GPR) -> Result<(), CompileError> {
2765        match size {
2766            Size::S64 => self.assembler.emit_imul_imm32_gpr64(imm32, gpr),
2767            _ => {
2768                codegen_error!("singlepass emit_imul_imm32 unreachable");
2769            }
2770        }
2771    }
2772
2773    fn emit_relaxed_mov(
2774        &mut self,
2775        sz: Size,
2776        src: Location,
2777        dst: Location,
2778    ) -> Result<(), CompileError> {
2779        self.emit_relaxed_binop(AssemblerX64::emit_mov, sz, src, dst)
2780    }
2781
2782    fn emit_relaxed_cmp(
2783        &mut self,
2784        sz: Size,
2785        src: Location,
2786        dst: Location,
2787    ) -> Result<(), CompileError> {
2788        self.emit_relaxed_binop(AssemblerX64::emit_cmp, sz, src, dst)
2789    }
2790
2791    fn emit_relaxed_sign_extension(
2792        &mut self,
2793        sz_src: Size,
2794        src: Location,
2795        sz_dst: Size,
2796        dst: Location,
2797    ) -> Result<(), CompileError> {
2798        self.emit_relaxed_zx_sx(AssemblerX64::emit_movsx, sz_src, src, sz_dst, dst)
2799    }
2800
2801    fn emit_binop_add32(
2802        &mut self,
2803        loc_a: Location,
2804        loc_b: Location,
2805        ret: Location,
2806    ) -> Result<(), CompileError> {
2807        self.emit_binop_i32(AssemblerX64::emit_add, loc_a, loc_b, ret)
2808    }
2809
2810    fn emit_binop_sub32(
2811        &mut self,
2812        loc_a: Location,
2813        loc_b: Location,
2814        ret: Location,
2815    ) -> Result<(), CompileError> {
2816        self.emit_binop_i32(AssemblerX64::emit_sub, loc_a, loc_b, ret)
2817    }
2818
2819    fn emit_binop_mul32(
2820        &mut self,
2821        loc_a: Location,
2822        loc_b: Location,
2823        ret: Location,
2824    ) -> Result<(), CompileError> {
2825        self.emit_binop_i32(AssemblerX64::emit_imul, loc_a, loc_b, ret)
2826    }
2827
2828    fn emit_binop_udiv32(
2829        &mut self,
2830        loc_a: Location,
2831        loc_b: Location,
2832        ret: Location,
2833        integer_division_by_zero: Label,
2834    ) -> Result<usize, CompileError> {
2835        // We assume that RAX and RDX are temporary registers here.
2836        self.assembler
2837            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2838        self.assembler
2839            .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
2840        let offset = self.emit_relaxed_xdiv(
2841            AssemblerX64::emit_div,
2842            Size::S32,
2843            loc_b,
2844            integer_division_by_zero,
2845        )?;
2846        self.assembler
2847            .emit_mov(Size::S32, Location::GPR(GPR::RAX), ret)?;
2848        Ok(offset)
2849    }
2850
2851    fn emit_binop_sdiv32(
2852        &mut self,
2853        loc_a: Location,
2854        loc_b: Location,
2855        ret: Location,
2856        integer_division_by_zero: Label,
2857        _integer_overflow: Label,
2858    ) -> Result<usize, CompileError> {
2859        // We assume that RAX and RDX are temporary registers here.
2860        self.assembler
2861            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2862        self.assembler.emit_cdq()?;
2863        let offset = self.emit_relaxed_xdiv(
2864            AssemblerX64::emit_idiv,
2865            Size::S32,
2866            loc_b,
2867            integer_division_by_zero,
2868        )?;
2869        self.assembler
2870            .emit_mov(Size::S32, Location::GPR(GPR::RAX), ret)?;
2871        Ok(offset)
2872    }
2873
2874    fn emit_binop_urem32(
2875        &mut self,
2876        loc_a: Location,
2877        loc_b: Location,
2878        ret: Location,
2879        integer_division_by_zero: Label,
2880    ) -> Result<usize, CompileError> {
2881        // We assume that RAX and RDX are temporary registers here.
2882        self.assembler
2883            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2884        self.assembler
2885            .emit_xor(Size::S32, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
2886        let offset = self.emit_relaxed_xdiv(
2887            AssemblerX64::emit_div,
2888            Size::S32,
2889            loc_b,
2890            integer_division_by_zero,
2891        )?;
2892        self.assembler
2893            .emit_mov(Size::S32, Location::GPR(GPR::RDX), ret)?;
2894        Ok(offset)
2895    }
2896
2897    fn emit_binop_srem32(
2898        &mut self,
2899        loc_a: Location,
2900        loc_b: Location,
2901        ret: Location,
2902        integer_division_by_zero: Label,
2903    ) -> Result<usize, CompileError> {
2904        // We assume that RAX and RDX are temporary registers here.
2905        let normal_path = self.assembler.get_label();
2906        let end = self.assembler.get_label();
2907
2908        self.emit_relaxed_cmp(Size::S32, Location::Imm32(0x80000000), loc_a)?;
2909        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
2910        self.emit_relaxed_cmp(Size::S32, Location::Imm32(0xffffffff), loc_b)?;
2911        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
2912        self.move_location(Size::S32, Location::Imm32(0), ret)?;
2913        self.assembler.emit_jmp(Condition::None, end)?;
2914
2915        self.emit_label(normal_path)?;
2916        self.assembler
2917            .emit_mov(Size::S32, loc_a, Location::GPR(GPR::RAX))?;
2918        self.assembler.emit_cdq()?;
2919        let offset = self.emit_relaxed_xdiv(
2920            AssemblerX64::emit_idiv,
2921            Size::S32,
2922            loc_b,
2923            integer_division_by_zero,
2924        )?;
2925        self.assembler
2926            .emit_mov(Size::S32, Location::GPR(GPR::RDX), ret)?;
2927
2928        self.emit_label(end)?;
2929        Ok(offset)
2930    }
2931
2932    fn emit_binop_and32(
2933        &mut self,
2934        loc_a: Location,
2935        loc_b: Location,
2936        ret: Location,
2937    ) -> Result<(), CompileError> {
2938        self.emit_binop_i32(AssemblerX64::emit_and, loc_a, loc_b, ret)
2939    }
2940
2941    fn emit_binop_or32(
2942        &mut self,
2943        loc_a: Location,
2944        loc_b: Location,
2945        ret: Location,
2946    ) -> Result<(), CompileError> {
2947        self.emit_binop_i32(AssemblerX64::emit_or, loc_a, loc_b, ret)
2948    }
2949
2950    fn emit_binop_xor32(
2951        &mut self,
2952        loc_a: Location,
2953        loc_b: Location,
2954        ret: Location,
2955    ) -> Result<(), CompileError> {
2956        self.emit_binop_i32(AssemblerX64::emit_xor, loc_a, loc_b, ret)
2957    }
2958
2959    fn i32_cmp_ge_s(
2960        &mut self,
2961        loc_a: Location,
2962        loc_b: Location,
2963        ret: Location,
2964    ) -> Result<(), CompileError> {
2965        self.emit_cmpop_i32_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret)
2966    }
2967
2968    fn i32_cmp_gt_s(
2969        &mut self,
2970        loc_a: Location,
2971        loc_b: Location,
2972        ret: Location,
2973    ) -> Result<(), CompileError> {
2974        self.emit_cmpop_i32_dynamic_b(Condition::Greater, loc_a, loc_b, ret)
2975    }
2976
2977    fn i32_cmp_le_s(
2978        &mut self,
2979        loc_a: Location,
2980        loc_b: Location,
2981        ret: Location,
2982    ) -> Result<(), CompileError> {
2983        self.emit_cmpop_i32_dynamic_b(Condition::LessEqual, loc_a, loc_b, ret)
2984    }
2985
2986    fn i32_cmp_lt_s(
2987        &mut self,
2988        loc_a: Location,
2989        loc_b: Location,
2990        ret: Location,
2991    ) -> Result<(), CompileError> {
2992        self.emit_cmpop_i32_dynamic_b(Condition::Less, loc_a, loc_b, ret)
2993    }
2994
2995    fn i32_cmp_ge_u(
2996        &mut self,
2997        loc_a: Location,
2998        loc_b: Location,
2999        ret: Location,
3000    ) -> Result<(), CompileError> {
3001        self.emit_cmpop_i32_dynamic_b(Condition::AboveEqual, loc_a, loc_b, ret)
3002    }
3003
3004    fn i32_cmp_gt_u(
3005        &mut self,
3006        loc_a: Location,
3007        loc_b: Location,
3008        ret: Location,
3009    ) -> Result<(), CompileError> {
3010        self.emit_cmpop_i32_dynamic_b(Condition::Above, loc_a, loc_b, ret)
3011    }
3012
3013    fn i32_cmp_le_u(
3014        &mut self,
3015        loc_a: Location,
3016        loc_b: Location,
3017        ret: Location,
3018    ) -> Result<(), CompileError> {
3019        self.emit_cmpop_i32_dynamic_b(Condition::BelowEqual, loc_a, loc_b, ret)
3020    }
3021
3022    fn i32_cmp_lt_u(
3023        &mut self,
3024        loc_a: Location,
3025        loc_b: Location,
3026        ret: Location,
3027    ) -> Result<(), CompileError> {
3028        self.emit_cmpop_i32_dynamic_b(Condition::Below, loc_a, loc_b, ret)
3029    }
3030
3031    fn i32_cmp_ne(
3032        &mut self,
3033        loc_a: Location,
3034        loc_b: Location,
3035        ret: Location,
3036    ) -> Result<(), CompileError> {
3037        self.emit_cmpop_i32_dynamic_b(Condition::NotEqual, loc_a, loc_b, ret)
3038    }
3039
3040    fn i32_cmp_eq(
3041        &mut self,
3042        loc_a: Location,
3043        loc_b: Location,
3044        ret: Location,
3045    ) -> Result<(), CompileError> {
3046        self.emit_cmpop_i32_dynamic_b(Condition::Equal, loc_a, loc_b, ret)
3047    }
3048
3049    fn i32_clz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3050        let src = match loc {
3051            Location::Imm32(_) | Location::Memory(_, _) => {
3052                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3053                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3054                })?;
3055                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3056                tmp
3057            }
3058            Location::GPR(reg) => reg,
3059            _ => {
3060                codegen_error!("singlepass i32_clz unreachable");
3061            }
3062        };
3063        let dst = match ret {
3064            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
3065                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3066            })?,
3067            Location::GPR(reg) => reg,
3068            _ => {
3069                codegen_error!("singlepass i32_clz unreachable");
3070            }
3071        };
3072
3073        if self.assembler.arch_has_xzcnt() {
3074            self.assembler
3075                .arch_emit_lzcnt(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3076        } else {
3077            let zero_path = self.assembler.get_label();
3078            let end = self.assembler.get_label();
3079
3080            self.assembler.emit_test_gpr_64(src)?;
3081            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
3082            self.assembler
3083                .emit_bsr(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3084            self.assembler
3085                .emit_xor(Size::S32, Location::Imm32(31), Location::GPR(dst))?;
3086            self.assembler.emit_jmp(Condition::None, end)?;
3087            self.emit_label(zero_path)?;
3088            self.move_location(Size::S32, Location::Imm32(32), Location::GPR(dst))?;
3089            self.emit_label(end)?;
3090        }
3091        match loc {
3092            Location::Imm32(_) | Location::Memory(_, _) => {
3093                self.release_gpr(src);
3094            }
3095            _ => {}
3096        };
3097        if let Location::Memory(_, _) = ret {
3098            self.move_location(Size::S32, Location::GPR(dst), ret)?;
3099            self.release_gpr(dst);
3100        };
3101        Ok(())
3102    }
3103
3104    fn i32_ctz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3105        let src = match loc {
3106            Location::Imm32(_) | Location::Memory(_, _) => {
3107                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3108                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3109                })?;
3110                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3111                tmp
3112            }
3113            Location::GPR(reg) => reg,
3114            _ => {
3115                codegen_error!("singlepass i32_ctz unreachable");
3116            }
3117        };
3118        let dst = match ret {
3119            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
3120                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3121            })?,
3122            Location::GPR(reg) => reg,
3123            _ => {
3124                codegen_error!("singlepass i32_ctz unreachable");
3125            }
3126        };
3127
3128        if self.assembler.arch_has_xzcnt() {
3129            self.assembler
3130                .arch_emit_tzcnt(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3131        } else {
3132            let zero_path = self.assembler.get_label();
3133            let end = self.assembler.get_label();
3134
3135            self.assembler.emit_test_gpr_64(src)?;
3136            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
3137            self.assembler
3138                .emit_bsf(Size::S32, Location::GPR(src), Location::GPR(dst))?;
3139            self.assembler.emit_jmp(Condition::None, end)?;
3140            self.emit_label(zero_path)?;
3141            self.move_location(Size::S32, Location::Imm32(32), Location::GPR(dst))?;
3142            self.emit_label(end)?;
3143        }
3144
3145        match loc {
3146            Location::Imm32(_) | Location::Memory(_, _) => {
3147                self.release_gpr(src);
3148            }
3149            _ => {}
3150        };
3151        if let Location::Memory(_, _) = ret {
3152            self.move_location(Size::S32, Location::GPR(dst), ret)?;
3153            self.release_gpr(dst);
3154        };
3155        Ok(())
3156    }
3157
3158    fn i32_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
3159        match loc {
3160            Location::Imm32(_) => {
3161                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
3162                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3163                })?;
3164                self.move_location(Size::S32, loc, Location::GPR(tmp))?;
3165                if let Location::Memory(_, _) = ret {
3166                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
3167                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3168                    })?;
3169                    self.assembler.emit_popcnt(
3170                        Size::S32,
3171                        Location::GPR(tmp),
3172                        Location::GPR(out_tmp),
3173                    )?;
3174                    self.move_location(Size::S32, Location::GPR(out_tmp), ret)?;
3175                    self.release_gpr(out_tmp);
3176                } else {
3177                    self.assembler
3178                        .emit_popcnt(Size::S32, Location::GPR(tmp), ret)?;
3179                }
3180                self.release_gpr(tmp);
3181            }
3182            Location::Memory(_, _) | Location::GPR(_) => {
3183                if let Location::Memory(_, _) = ret {
3184                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
3185                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3186                    })?;
3187                    self.assembler
3188                        .emit_popcnt(Size::S32, loc, Location::GPR(out_tmp))?;
3189                    self.move_location(Size::S32, Location::GPR(out_tmp), ret)?;
3190                    self.release_gpr(out_tmp);
3191                } else {
3192                    self.assembler.emit_popcnt(Size::S32, loc, ret)?;
3193                }
3194            }
3195            _ => {
3196                codegen_error!("singlepass i32_popcnt unreachable");
3197            }
3198        }
3199        Ok(())
3200    }
3201
3202    fn i32_shl(
3203        &mut self,
3204        loc_a: Location,
3205        loc_b: Location,
3206        ret: Location,
3207    ) -> Result<(), CompileError> {
3208        self.emit_shift_i32(AssemblerX64::emit_shl, loc_a, loc_b, ret)
3209    }
3210
3211    fn i32_shr(
3212        &mut self,
3213        loc_a: Location,
3214        loc_b: Location,
3215        ret: Location,
3216    ) -> Result<(), CompileError> {
3217        self.emit_shift_i32(AssemblerX64::emit_shr, loc_a, loc_b, ret)
3218    }
3219
3220    fn i32_sar(
3221        &mut self,
3222        loc_a: Location,
3223        loc_b: Location,
3224        ret: Location,
3225    ) -> Result<(), CompileError> {
3226        self.emit_shift_i32(AssemblerX64::emit_sar, loc_a, loc_b, ret)
3227    }
3228
3229    fn i32_rol(
3230        &mut self,
3231        loc_a: Location,
3232        loc_b: Location,
3233        ret: Location,
3234    ) -> Result<(), CompileError> {
3235        self.emit_shift_i32(AssemblerX64::emit_rol, loc_a, loc_b, ret)
3236    }
3237
3238    fn i32_ror(
3239        &mut self,
3240        loc_a: Location,
3241        loc_b: Location,
3242        ret: Location,
3243    ) -> Result<(), CompileError> {
3244        self.emit_shift_i32(AssemblerX64::emit_ror, loc_a, loc_b, ret)
3245    }
3246
3247    fn i32_load(
3248        &mut self,
3249        addr: Location,
3250        memarg: &MemArg,
3251        ret: Location,
3252        need_check: bool,
3253        imported_memories: bool,
3254        offset: i32,
3255        heap_access_oob: Label,
3256        unaligned_atomic: Label,
3257    ) -> Result<(), CompileError> {
3258        self.memory_op(
3259            addr,
3260            memarg,
3261            false,
3262            4,
3263            need_check,
3264            imported_memories,
3265            offset,
3266            heap_access_oob,
3267            unaligned_atomic,
3268            |this, addr| {
3269                this.emit_relaxed_binop(
3270                    AssemblerX64::emit_mov,
3271                    Size::S32,
3272                    Location::Memory(addr, 0),
3273                    ret,
3274                )
3275            },
3276        )
3277    }
3278
3279    fn i32_load_8u(
3280        &mut self,
3281        addr: Location,
3282        memarg: &MemArg,
3283        ret: Location,
3284        need_check: bool,
3285        imported_memories: bool,
3286        offset: i32,
3287        heap_access_oob: Label,
3288        unaligned_atomic: Label,
3289    ) -> Result<(), CompileError> {
3290        self.memory_op(
3291            addr,
3292            memarg,
3293            false,
3294            1,
3295            need_check,
3296            imported_memories,
3297            offset,
3298            heap_access_oob,
3299            unaligned_atomic,
3300            |this, addr| {
3301                this.emit_relaxed_zx_sx(
3302                    AssemblerX64::emit_movzx,
3303                    Size::S8,
3304                    Location::Memory(addr, 0),
3305                    Size::S32,
3306                    ret,
3307                )
3308            },
3309        )
3310    }
3311
3312    fn i32_load_8s(
3313        &mut self,
3314        addr: Location,
3315        memarg: &MemArg,
3316        ret: Location,
3317        need_check: bool,
3318        imported_memories: bool,
3319        offset: i32,
3320        heap_access_oob: Label,
3321        unaligned_atomic: Label,
3322    ) -> Result<(), CompileError> {
3323        self.memory_op(
3324            addr,
3325            memarg,
3326            false,
3327            1,
3328            need_check,
3329            imported_memories,
3330            offset,
3331            heap_access_oob,
3332            unaligned_atomic,
3333            |this, addr| {
3334                this.emit_relaxed_zx_sx(
3335                    AssemblerX64::emit_movsx,
3336                    Size::S8,
3337                    Location::Memory(addr, 0),
3338                    Size::S32,
3339                    ret,
3340                )
3341            },
3342        )
3343    }
3344
3345    fn i32_load_16u(
3346        &mut self,
3347        addr: Location,
3348        memarg: &MemArg,
3349        ret: Location,
3350        need_check: bool,
3351        imported_memories: bool,
3352        offset: i32,
3353        heap_access_oob: Label,
3354        unaligned_atomic: Label,
3355    ) -> Result<(), CompileError> {
3356        self.memory_op(
3357            addr,
3358            memarg,
3359            false,
3360            2,
3361            need_check,
3362            imported_memories,
3363            offset,
3364            heap_access_oob,
3365            unaligned_atomic,
3366            |this, addr| {
3367                this.emit_relaxed_zx_sx(
3368                    AssemblerX64::emit_movzx,
3369                    Size::S16,
3370                    Location::Memory(addr, 0),
3371                    Size::S32,
3372                    ret,
3373                )
3374            },
3375        )
3376    }
3377
3378    fn i32_load_16s(
3379        &mut self,
3380        addr: Location,
3381        memarg: &MemArg,
3382        ret: Location,
3383        need_check: bool,
3384        imported_memories: bool,
3385        offset: i32,
3386        heap_access_oob: Label,
3387        unaligned_atomic: Label,
3388    ) -> Result<(), CompileError> {
3389        self.memory_op(
3390            addr,
3391            memarg,
3392            false,
3393            2,
3394            need_check,
3395            imported_memories,
3396            offset,
3397            heap_access_oob,
3398            unaligned_atomic,
3399            |this, addr| {
3400                this.emit_relaxed_zx_sx(
3401                    AssemblerX64::emit_movsx,
3402                    Size::S16,
3403                    Location::Memory(addr, 0),
3404                    Size::S32,
3405                    ret,
3406                )
3407            },
3408        )
3409    }
3410
3411    fn i32_atomic_load(
3412        &mut self,
3413        addr: Location,
3414        memarg: &MemArg,
3415        ret: Location,
3416        need_check: bool,
3417        imported_memories: bool,
3418        offset: i32,
3419        heap_access_oob: Label,
3420        unaligned_atomic: Label,
3421    ) -> Result<(), CompileError> {
3422        self.memory_op(
3423            addr,
3424            memarg,
3425            true,
3426            4,
3427            need_check,
3428            imported_memories,
3429            offset,
3430            heap_access_oob,
3431            unaligned_atomic,
3432            |this, addr| this.emit_relaxed_mov(Size::S32, Location::Memory(addr, 0), ret),
3433        )
3434    }
3435
3436    fn i32_atomic_load_8u(
3437        &mut self,
3438        addr: Location,
3439        memarg: &MemArg,
3440        ret: Location,
3441        need_check: bool,
3442        imported_memories: bool,
3443        offset: i32,
3444        heap_access_oob: Label,
3445        unaligned_atomic: Label,
3446    ) -> Result<(), CompileError> {
3447        self.memory_op(
3448            addr,
3449            memarg,
3450            true,
3451            1,
3452            need_check,
3453            imported_memories,
3454            offset,
3455            heap_access_oob,
3456            unaligned_atomic,
3457            |this, addr| {
3458                this.emit_relaxed_zero_extension(
3459                    Size::S8,
3460                    Location::Memory(addr, 0),
3461                    Size::S32,
3462                    ret,
3463                )
3464            },
3465        )
3466    }
3467
3468    fn i32_atomic_load_16u(
3469        &mut self,
3470        addr: Location,
3471        memarg: &MemArg,
3472        ret: Location,
3473        need_check: bool,
3474        imported_memories: bool,
3475        offset: i32,
3476        heap_access_oob: Label,
3477        unaligned_atomic: Label,
3478    ) -> Result<(), CompileError> {
3479        self.memory_op(
3480            addr,
3481            memarg,
3482            true,
3483            2,
3484            need_check,
3485            imported_memories,
3486            offset,
3487            heap_access_oob,
3488            unaligned_atomic,
3489            |this, addr| {
3490                this.emit_relaxed_zero_extension(
3491                    Size::S16,
3492                    Location::Memory(addr, 0),
3493                    Size::S32,
3494                    ret,
3495                )
3496            },
3497        )
3498    }
3499
3500    fn i32_save(
3501        &mut self,
3502        target_value: Location,
3503        memarg: &MemArg,
3504        target_addr: Location,
3505        need_check: bool,
3506        imported_memories: bool,
3507        offset: i32,
3508        heap_access_oob: Label,
3509        unaligned_atomic: Label,
3510    ) -> Result<(), CompileError> {
3511        self.memory_op(
3512            target_addr,
3513            memarg,
3514            false,
3515            4,
3516            need_check,
3517            imported_memories,
3518            offset,
3519            heap_access_oob,
3520            unaligned_atomic,
3521            |this, addr| {
3522                this.emit_relaxed_binop(
3523                    AssemblerX64::emit_mov,
3524                    Size::S32,
3525                    target_value,
3526                    Location::Memory(addr, 0),
3527                )
3528            },
3529        )
3530    }
3531
3532    fn i32_save_8(
3533        &mut self,
3534        target_value: Location,
3535        memarg: &MemArg,
3536        target_addr: Location,
3537        need_check: bool,
3538        imported_memories: bool,
3539        offset: i32,
3540        heap_access_oob: Label,
3541        unaligned_atomic: Label,
3542    ) -> Result<(), CompileError> {
3543        self.memory_op(
3544            target_addr,
3545            memarg,
3546            false,
3547            1,
3548            need_check,
3549            imported_memories,
3550            offset,
3551            heap_access_oob,
3552            unaligned_atomic,
3553            |this, addr| {
3554                this.emit_relaxed_binop(
3555                    AssemblerX64::emit_mov,
3556                    Size::S8,
3557                    target_value,
3558                    Location::Memory(addr, 0),
3559                )
3560            },
3561        )
3562    }
3563
3564    fn i32_save_16(
3565        &mut self,
3566        target_value: Location,
3567        memarg: &MemArg,
3568        target_addr: Location,
3569        need_check: bool,
3570        imported_memories: bool,
3571        offset: i32,
3572        heap_access_oob: Label,
3573        unaligned_atomic: Label,
3574    ) -> Result<(), CompileError> {
3575        self.memory_op(
3576            target_addr,
3577            memarg,
3578            false,
3579            2,
3580            need_check,
3581            imported_memories,
3582            offset,
3583            heap_access_oob,
3584            unaligned_atomic,
3585            |this, addr| {
3586                this.emit_relaxed_binop(
3587                    AssemblerX64::emit_mov,
3588                    Size::S16,
3589                    target_value,
3590                    Location::Memory(addr, 0),
3591                )
3592            },
3593        )
3594    }
3595
3596    fn i32_atomic_save(
3597        &mut self,
3598        value: Location,
3599        memarg: &MemArg,
3600        target_addr: Location,
3601        need_check: bool,
3602        imported_memories: bool,
3603        offset: i32,
3604        heap_access_oob: Label,
3605        unaligned_atomic: Label,
3606    ) -> Result<(), CompileError> {
3607        self.memory_op(
3608            target_addr,
3609            memarg,
3610            true,
3611            4,
3612            need_check,
3613            imported_memories,
3614            offset,
3615            heap_access_oob,
3616            unaligned_atomic,
3617            |this, addr| {
3618                this.emit_relaxed_binop(
3619                    AssemblerX64::emit_mov,
3620                    Size::S32,
3621                    value,
3622                    Location::Memory(addr, 0),
3623                )
3624            },
3625        )
3626    }
3627
3628    fn i32_atomic_save_8(
3629        &mut self,
3630        value: Location,
3631        memarg: &MemArg,
3632        target_addr: Location,
3633        need_check: bool,
3634        imported_memories: bool,
3635        offset: i32,
3636        heap_access_oob: Label,
3637        unaligned_atomic: Label,
3638    ) -> Result<(), CompileError> {
3639        self.memory_op(
3640            target_addr,
3641            memarg,
3642            true,
3643            1,
3644            need_check,
3645            imported_memories,
3646            offset,
3647            heap_access_oob,
3648            unaligned_atomic,
3649            |this, addr| {
3650                this.emit_relaxed_binop(
3651                    AssemblerX64::emit_mov,
3652                    Size::S8,
3653                    value,
3654                    Location::Memory(addr, 0),
3655                )
3656            },
3657        )
3658    }
3659
3660    fn i32_atomic_save_16(
3661        &mut self,
3662        value: Location,
3663        memarg: &MemArg,
3664        target_addr: Location,
3665        need_check: bool,
3666        imported_memories: bool,
3667        offset: i32,
3668        heap_access_oob: Label,
3669        unaligned_atomic: Label,
3670    ) -> Result<(), CompileError> {
3671        self.memory_op(
3672            target_addr,
3673            memarg,
3674            true,
3675            2,
3676            need_check,
3677            imported_memories,
3678            offset,
3679            heap_access_oob,
3680            unaligned_atomic,
3681            |this, addr| {
3682                this.emit_relaxed_binop(
3683                    AssemblerX64::emit_mov,
3684                    Size::S16,
3685                    value,
3686                    Location::Memory(addr, 0),
3687                )
3688            },
3689        )
3690    }
3691
3692    fn i32_atomic_add(
3693        &mut self,
3694        loc: Location,
3695        target: Location,
3696        memarg: &MemArg,
3697        ret: Location,
3698        need_check: bool,
3699        imported_memories: bool,
3700        offset: i32,
3701        heap_access_oob: Label,
3702        unaligned_atomic: Label,
3703    ) -> Result<(), CompileError> {
3704        let value = self.acquire_temp_gpr().ok_or_else(|| {
3705            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3706        })?;
3707        self.move_location(Size::S32, loc, Location::GPR(value))?;
3708        self.memory_op(
3709            target,
3710            memarg,
3711            true,
3712            4,
3713            need_check,
3714            imported_memories,
3715            offset,
3716            heap_access_oob,
3717            unaligned_atomic,
3718            |this, addr| {
3719                this.assembler.emit_lock_xadd(
3720                    Size::S32,
3721                    Location::GPR(value),
3722                    Location::Memory(addr, 0),
3723                )
3724            },
3725        )?;
3726        self.move_location(Size::S32, Location::GPR(value), ret)?;
3727        self.release_gpr(value);
3728        Ok(())
3729    }
3730
3731    fn i32_atomic_add_8u(
3732        &mut self,
3733        loc: Location,
3734        target: Location,
3735        memarg: &MemArg,
3736        ret: Location,
3737        need_check: bool,
3738        imported_memories: bool,
3739        offset: i32,
3740        heap_access_oob: Label,
3741        unaligned_atomic: Label,
3742    ) -> Result<(), CompileError> {
3743        let value = self.acquire_temp_gpr().ok_or_else(|| {
3744            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3745        })?;
3746        self.move_location_extend(Size::S8, false, loc, Size::S32, Location::GPR(value))?;
3747        self.memory_op(
3748            target,
3749            memarg,
3750            true,
3751            1,
3752            need_check,
3753            imported_memories,
3754            offset,
3755            heap_access_oob,
3756            unaligned_atomic,
3757            |this, addr| {
3758                this.assembler.emit_lock_xadd(
3759                    Size::S8,
3760                    Location::GPR(value),
3761                    Location::Memory(addr, 0),
3762                )
3763            },
3764        )?;
3765        self.move_location(Size::S32, Location::GPR(value), ret)?;
3766        self.release_gpr(value);
3767        Ok(())
3768    }
3769
3770    fn i32_atomic_add_16u(
3771        &mut self,
3772        loc: Location,
3773        target: Location,
3774        memarg: &MemArg,
3775        ret: Location,
3776        need_check: bool,
3777        imported_memories: bool,
3778        offset: i32,
3779        heap_access_oob: Label,
3780        unaligned_atomic: Label,
3781    ) -> Result<(), CompileError> {
3782        let value = self.acquire_temp_gpr().ok_or_else(|| {
3783            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3784        })?;
3785        self.move_location_extend(Size::S16, false, loc, Size::S32, Location::GPR(value))?;
3786        self.memory_op(
3787            target,
3788            memarg,
3789            true,
3790            2,
3791            need_check,
3792            imported_memories,
3793            offset,
3794            heap_access_oob,
3795            unaligned_atomic,
3796            |this, addr| {
3797                this.assembler.emit_lock_xadd(
3798                    Size::S16,
3799                    Location::GPR(value),
3800                    Location::Memory(addr, 0),
3801                )
3802            },
3803        )?;
3804        self.move_location(Size::S32, Location::GPR(value), ret)?;
3805        self.release_gpr(value);
3806        Ok(())
3807    }
3808
3809    fn i32_atomic_sub(
3810        &mut self,
3811        loc: Location,
3812        target: Location,
3813        memarg: &MemArg,
3814        ret: Location,
3815        need_check: bool,
3816        imported_memories: bool,
3817        offset: i32,
3818        heap_access_oob: Label,
3819        unaligned_atomic: Label,
3820    ) -> Result<(), CompileError> {
3821        let value = self.acquire_temp_gpr().ok_or_else(|| {
3822            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3823        })?;
3824        self.location_neg(Size::S32, false, loc, Size::S32, Location::GPR(value))?;
3825        self.memory_op(
3826            target,
3827            memarg,
3828            true,
3829            4,
3830            need_check,
3831            imported_memories,
3832            offset,
3833            heap_access_oob,
3834            unaligned_atomic,
3835            |this, addr| {
3836                this.assembler.emit_lock_xadd(
3837                    Size::S32,
3838                    Location::GPR(value),
3839                    Location::Memory(addr, 0),
3840                )
3841            },
3842        )?;
3843        self.move_location(Size::S32, Location::GPR(value), ret)?;
3844        self.release_gpr(value);
3845        Ok(())
3846    }
3847
3848    fn i32_atomic_sub_8u(
3849        &mut self,
3850        loc: Location,
3851        target: Location,
3852        memarg: &MemArg,
3853        ret: Location,
3854        need_check: bool,
3855        imported_memories: bool,
3856        offset: i32,
3857        heap_access_oob: Label,
3858        unaligned_atomic: Label,
3859    ) -> Result<(), CompileError> {
3860        let value = self.acquire_temp_gpr().ok_or_else(|| {
3861            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3862        })?;
3863        self.location_neg(Size::S8, false, loc, Size::S32, Location::GPR(value))?;
3864        self.memory_op(
3865            target,
3866            memarg,
3867            true,
3868            1,
3869            need_check,
3870            imported_memories,
3871            offset,
3872            heap_access_oob,
3873            unaligned_atomic,
3874            |this, addr| {
3875                this.assembler.emit_lock_xadd(
3876                    Size::S8,
3877                    Location::GPR(value),
3878                    Location::Memory(addr, 0),
3879                )
3880            },
3881        )?;
3882        self.move_location(Size::S32, Location::GPR(value), ret)?;
3883        self.release_gpr(value);
3884        Ok(())
3885    }
3886
3887    fn i32_atomic_sub_16u(
3888        &mut self,
3889        loc: Location,
3890        target: Location,
3891        memarg: &MemArg,
3892        ret: Location,
3893        need_check: bool,
3894        imported_memories: bool,
3895        offset: i32,
3896        heap_access_oob: Label,
3897        unaligned_atomic: Label,
3898    ) -> Result<(), CompileError> {
3899        let value = self.acquire_temp_gpr().ok_or_else(|| {
3900            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
3901        })?;
3902        self.location_neg(Size::S16, false, loc, Size::S32, Location::GPR(value))?;
3903        self.memory_op(
3904            target,
3905            memarg,
3906            true,
3907            2,
3908            need_check,
3909            imported_memories,
3910            offset,
3911            heap_access_oob,
3912            unaligned_atomic,
3913            |this, addr| {
3914                this.assembler.emit_lock_xadd(
3915                    Size::S16,
3916                    Location::GPR(value),
3917                    Location::Memory(addr, 0),
3918                )
3919            },
3920        )?;
3921        self.move_location(Size::S32, Location::GPR(value), ret)?;
3922        self.release_gpr(value);
3923        Ok(())
3924    }
3925
3926    fn i32_atomic_and(
3927        &mut self,
3928        loc: Location,
3929        target: Location,
3930        memarg: &MemArg,
3931        ret: Location,
3932        need_check: bool,
3933        imported_memories: bool,
3934        offset: i32,
3935        heap_access_oob: Label,
3936        unaligned_atomic: Label,
3937    ) -> Result<(), CompileError> {
3938        self.emit_compare_and_swap(
3939            loc,
3940            target,
3941            ret,
3942            memarg,
3943            4,
3944            Size::S32,
3945            Size::S32,
3946            need_check,
3947            imported_memories,
3948            offset,
3949            heap_access_oob,
3950            unaligned_atomic,
3951            |this, src, dst| {
3952                this.assembler
3953                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
3954            },
3955        )
3956    }
3957
3958    fn i32_atomic_and_8u(
3959        &mut self,
3960        loc: Location,
3961        target: Location,
3962        memarg: &MemArg,
3963        ret: Location,
3964        need_check: bool,
3965        imported_memories: bool,
3966        offset: i32,
3967        heap_access_oob: Label,
3968        unaligned_atomic: Label,
3969    ) -> Result<(), CompileError> {
3970        self.emit_compare_and_swap(
3971            loc,
3972            target,
3973            ret,
3974            memarg,
3975            1,
3976            Size::S8,
3977            Size::S32,
3978            need_check,
3979            imported_memories,
3980            offset,
3981            heap_access_oob,
3982            unaligned_atomic,
3983            |this, src, dst| {
3984                this.assembler
3985                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
3986            },
3987        )
3988    }
3989
3990    fn i32_atomic_and_16u(
3991        &mut self,
3992        loc: Location,
3993        target: Location,
3994        memarg: &MemArg,
3995        ret: Location,
3996        need_check: bool,
3997        imported_memories: bool,
3998        offset: i32,
3999        heap_access_oob: Label,
4000        unaligned_atomic: Label,
4001    ) -> Result<(), CompileError> {
4002        self.emit_compare_and_swap(
4003            loc,
4004            target,
4005            ret,
4006            memarg,
4007            2,
4008            Size::S16,
4009            Size::S32,
4010            need_check,
4011            imported_memories,
4012            offset,
4013            heap_access_oob,
4014            unaligned_atomic,
4015            |this, src, dst| {
4016                this.assembler
4017                    .emit_and(Size::S32, Location::GPR(src), Location::GPR(dst))
4018            },
4019        )
4020    }
4021
4022    fn i32_atomic_or(
4023        &mut self,
4024        loc: Location,
4025        target: Location,
4026        memarg: &MemArg,
4027        ret: Location,
4028        need_check: bool,
4029        imported_memories: bool,
4030        offset: i32,
4031        heap_access_oob: Label,
4032        unaligned_atomic: Label,
4033    ) -> Result<(), CompileError> {
4034        self.emit_compare_and_swap(
4035            loc,
4036            target,
4037            ret,
4038            memarg,
4039            4,
4040            Size::S32,
4041            Size::S32,
4042            need_check,
4043            imported_memories,
4044            offset,
4045            heap_access_oob,
4046            unaligned_atomic,
4047            |this, src, dst| {
4048                this.assembler
4049                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4050            },
4051        )
4052    }
4053
4054    fn i32_atomic_or_8u(
4055        &mut self,
4056        loc: Location,
4057        target: Location,
4058        memarg: &MemArg,
4059        ret: Location,
4060        need_check: bool,
4061        imported_memories: bool,
4062        offset: i32,
4063        heap_access_oob: Label,
4064        unaligned_atomic: Label,
4065    ) -> Result<(), CompileError> {
4066        self.emit_compare_and_swap(
4067            loc,
4068            target,
4069            ret,
4070            memarg,
4071            1,
4072            Size::S8,
4073            Size::S32,
4074            need_check,
4075            imported_memories,
4076            offset,
4077            heap_access_oob,
4078            unaligned_atomic,
4079            |this, src, dst| {
4080                this.assembler
4081                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4082            },
4083        )
4084    }
4085
4086    fn i32_atomic_or_16u(
4087        &mut self,
4088        loc: Location,
4089        target: Location,
4090        memarg: &MemArg,
4091        ret: Location,
4092        need_check: bool,
4093        imported_memories: bool,
4094        offset: i32,
4095        heap_access_oob: Label,
4096        unaligned_atomic: Label,
4097    ) -> Result<(), CompileError> {
4098        self.emit_compare_and_swap(
4099            loc,
4100            target,
4101            ret,
4102            memarg,
4103            2,
4104            Size::S16,
4105            Size::S32,
4106            need_check,
4107            imported_memories,
4108            offset,
4109            heap_access_oob,
4110            unaligned_atomic,
4111            |this, src, dst| {
4112                this.assembler
4113                    .emit_or(Size::S32, Location::GPR(src), Location::GPR(dst))
4114            },
4115        )
4116    }
4117
4118    fn i32_atomic_xor(
4119        &mut self,
4120        loc: Location,
4121        target: Location,
4122        memarg: &MemArg,
4123        ret: Location,
4124        need_check: bool,
4125        imported_memories: bool,
4126        offset: i32,
4127        heap_access_oob: Label,
4128        unaligned_atomic: Label,
4129    ) -> Result<(), CompileError> {
4130        self.emit_compare_and_swap(
4131            loc,
4132            target,
4133            ret,
4134            memarg,
4135            4,
4136            Size::S32,
4137            Size::S32,
4138            need_check,
4139            imported_memories,
4140            offset,
4141            heap_access_oob,
4142            unaligned_atomic,
4143            |this, src, dst| {
4144                this.assembler
4145                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4146            },
4147        )
4148    }
4149
4150    fn i32_atomic_xor_8u(
4151        &mut self,
4152        loc: Location,
4153        target: Location,
4154        memarg: &MemArg,
4155        ret: Location,
4156        need_check: bool,
4157        imported_memories: bool,
4158        offset: i32,
4159        heap_access_oob: Label,
4160        unaligned_atomic: Label,
4161    ) -> Result<(), CompileError> {
4162        self.emit_compare_and_swap(
4163            loc,
4164            target,
4165            ret,
4166            memarg,
4167            1,
4168            Size::S8,
4169            Size::S32,
4170            need_check,
4171            imported_memories,
4172            offset,
4173            heap_access_oob,
4174            unaligned_atomic,
4175            |this, src, dst| {
4176                this.assembler
4177                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4178            },
4179        )
4180    }
4181
4182    fn i32_atomic_xor_16u(
4183        &mut self,
4184        loc: Location,
4185        target: Location,
4186        memarg: &MemArg,
4187        ret: Location,
4188        need_check: bool,
4189        imported_memories: bool,
4190        offset: i32,
4191        heap_access_oob: Label,
4192        unaligned_atomic: Label,
4193    ) -> Result<(), CompileError> {
4194        self.emit_compare_and_swap(
4195            loc,
4196            target,
4197            ret,
4198            memarg,
4199            2,
4200            Size::S16,
4201            Size::S32,
4202            need_check,
4203            imported_memories,
4204            offset,
4205            heap_access_oob,
4206            unaligned_atomic,
4207            |this, src, dst| {
4208                this.assembler
4209                    .emit_xor(Size::S32, Location::GPR(src), Location::GPR(dst))
4210            },
4211        )
4212    }
4213
4214    fn i32_atomic_xchg(
4215        &mut self,
4216        loc: Location,
4217        target: Location,
4218        memarg: &MemArg,
4219        ret: Location,
4220        need_check: bool,
4221        imported_memories: bool,
4222        offset: i32,
4223        heap_access_oob: Label,
4224        unaligned_atomic: Label,
4225    ) -> Result<(), CompileError> {
4226        let value = self.acquire_temp_gpr().ok_or_else(|| {
4227            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4228        })?;
4229        self.move_location(Size::S32, loc, Location::GPR(value))?;
4230        self.memory_op(
4231            target,
4232            memarg,
4233            true,
4234            4,
4235            need_check,
4236            imported_memories,
4237            offset,
4238            heap_access_oob,
4239            unaligned_atomic,
4240            |this, addr| {
4241                this.assembler
4242                    .emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0))
4243            },
4244        )?;
4245        self.move_location(Size::S32, Location::GPR(value), ret)?;
4246        self.release_gpr(value);
4247        Ok(())
4248    }
4249
4250    fn i32_atomic_xchg_8u(
4251        &mut self,
4252        loc: Location,
4253        target: Location,
4254        memarg: &MemArg,
4255        ret: Location,
4256        need_check: bool,
4257        imported_memories: bool,
4258        offset: i32,
4259        heap_access_oob: Label,
4260        unaligned_atomic: Label,
4261    ) -> Result<(), CompileError> {
4262        let value = self.acquire_temp_gpr().ok_or_else(|| {
4263            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4264        })?;
4265        self.assembler
4266            .emit_movzx(Size::S8, loc, Size::S32, Location::GPR(value))?;
4267        self.memory_op(
4268            target,
4269            memarg,
4270            true,
4271            1,
4272            need_check,
4273            imported_memories,
4274            offset,
4275            heap_access_oob,
4276            unaligned_atomic,
4277            |this, addr| {
4278                this.assembler
4279                    .emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0))
4280            },
4281        )?;
4282        self.move_location(Size::S32, Location::GPR(value), ret)?;
4283        self.release_gpr(value);
4284        Ok(())
4285    }
4286
4287    fn i32_atomic_xchg_16u(
4288        &mut self,
4289        loc: Location,
4290        target: Location,
4291        memarg: &MemArg,
4292        ret: Location,
4293        need_check: bool,
4294        imported_memories: bool,
4295        offset: i32,
4296        heap_access_oob: Label,
4297        unaligned_atomic: Label,
4298    ) -> Result<(), CompileError> {
4299        let value = self.acquire_temp_gpr().ok_or_else(|| {
4300            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4301        })?;
4302        self.assembler
4303            .emit_movzx(Size::S16, loc, Size::S32, Location::GPR(value))?;
4304        self.memory_op(
4305            target,
4306            memarg,
4307            true,
4308            2,
4309            need_check,
4310            imported_memories,
4311            offset,
4312            heap_access_oob,
4313            unaligned_atomic,
4314            |this, addr| {
4315                this.assembler
4316                    .emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0))
4317            },
4318        )?;
4319        self.move_location(Size::S32, Location::GPR(value), ret)?;
4320        self.release_gpr(value);
4321        Ok(())
4322    }
4323
4324    fn i32_atomic_cmpxchg(
4325        &mut self,
4326        new: Location,
4327        cmp: Location,
4328        target: Location,
4329        memarg: &MemArg,
4330        ret: Location,
4331        need_check: bool,
4332        imported_memories: bool,
4333        offset: i32,
4334        heap_access_oob: Label,
4335        unaligned_atomic: Label,
4336    ) -> Result<(), CompileError> {
4337        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4338        let value = if cmp == Location::GPR(GPR::R14) {
4339            if new == Location::GPR(GPR::R13) {
4340                GPR::R12
4341            } else {
4342                GPR::R13
4343            }
4344        } else {
4345            GPR::R14
4346        };
4347        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4348        self.assembler
4349            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4350        self.assembler
4351            .emit_mov(Size::S32, new, Location::GPR(value))?;
4352
4353        self.memory_op(
4354            target,
4355            memarg,
4356            true,
4357            4,
4358            need_check,
4359            imported_memories,
4360            offset,
4361            heap_access_oob,
4362            unaligned_atomic,
4363            |this, addr| {
4364                this.assembler.emit_lock_cmpxchg(
4365                    Size::S32,
4366                    Location::GPR(value),
4367                    Location::Memory(addr, 0),
4368                )?;
4369                this.assembler
4370                    .emit_mov(Size::S32, Location::GPR(compare), ret)
4371            },
4372        )?;
4373        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4374        self.release_gpr(compare);
4375        Ok(())
4376    }
4377
4378    fn i32_atomic_cmpxchg_8u(
4379        &mut self,
4380        new: Location,
4381        cmp: Location,
4382        target: Location,
4383        memarg: &MemArg,
4384        ret: Location,
4385        need_check: bool,
4386        imported_memories: bool,
4387        offset: i32,
4388        heap_access_oob: Label,
4389        unaligned_atomic: Label,
4390    ) -> Result<(), CompileError> {
4391        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4392        let value = if cmp == Location::GPR(GPR::R14) {
4393            if new == Location::GPR(GPR::R13) {
4394                GPR::R12
4395            } else {
4396                GPR::R13
4397            }
4398        } else {
4399            GPR::R14
4400        };
4401        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4402        self.assembler
4403            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4404        self.assembler
4405            .emit_mov(Size::S32, new, Location::GPR(value))?;
4406
4407        self.memory_op(
4408            target,
4409            memarg,
4410            true,
4411            1,
4412            need_check,
4413            imported_memories,
4414            offset,
4415            heap_access_oob,
4416            unaligned_atomic,
4417            |this, addr| {
4418                this.assembler.emit_lock_cmpxchg(
4419                    Size::S8,
4420                    Location::GPR(value),
4421                    Location::Memory(addr, 0),
4422                )?;
4423                this.assembler
4424                    .emit_movzx(Size::S8, Location::GPR(compare), Size::S32, ret)
4425            },
4426        )?;
4427        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4428        self.release_gpr(compare);
4429        Ok(())
4430    }
4431
4432    fn i32_atomic_cmpxchg_16u(
4433        &mut self,
4434        new: Location,
4435        cmp: Location,
4436        target: Location,
4437        memarg: &MemArg,
4438        ret: Location,
4439        need_check: bool,
4440        imported_memories: bool,
4441        offset: i32,
4442        heap_access_oob: Label,
4443        unaligned_atomic: Label,
4444    ) -> Result<(), CompileError> {
4445        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
4446        let value = if cmp == Location::GPR(GPR::R14) {
4447            if new == Location::GPR(GPR::R13) {
4448                GPR::R12
4449            } else {
4450                GPR::R13
4451            }
4452        } else {
4453            GPR::R14
4454        };
4455        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
4456        self.assembler
4457            .emit_mov(Size::S32, cmp, Location::GPR(compare))?;
4458        self.assembler
4459            .emit_mov(Size::S32, new, Location::GPR(value))?;
4460
4461        self.memory_op(
4462            target,
4463            memarg,
4464            true,
4465            2,
4466            need_check,
4467            imported_memories,
4468            offset,
4469            heap_access_oob,
4470            unaligned_atomic,
4471            |this, addr| {
4472                this.assembler.emit_lock_cmpxchg(
4473                    Size::S16,
4474                    Location::GPR(value),
4475                    Location::Memory(addr, 0),
4476                )?;
4477                this.assembler
4478                    .emit_movzx(Size::S16, Location::GPR(compare), Size::S32, ret)
4479            },
4480        )?;
4481        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
4482        self.release_gpr(compare);
4483        Ok(())
4484    }
4485
4486    fn emit_call_with_reloc(
4487        &mut self,
4488        _calling_convention: CallingConvention,
4489        reloc_target: RelocationTarget,
4490    ) -> Result<Vec<Relocation>, CompileError> {
4491        let mut relocations = vec![];
4492        let next = self.get_label();
4493        let reloc_at = self.assembler.get_offset().0 + 1; // skip E8
4494        self.assembler.emit_call_label(next)?;
4495        self.emit_label(next)?;
4496        relocations.push(Relocation {
4497            kind: RelocationKind::X86CallPCRel4,
4498            reloc_target,
4499            offset: reloc_at as u32,
4500            addend: -4,
4501        });
4502        Ok(relocations)
4503    }
4504
4505    fn emit_binop_add64(
4506        &mut self,
4507        loc_a: Location,
4508        loc_b: Location,
4509        ret: Location,
4510    ) -> Result<(), CompileError> {
4511        self.emit_binop_i64(AssemblerX64::emit_add, loc_a, loc_b, ret)
4512    }
4513
4514    fn emit_binop_sub64(
4515        &mut self,
4516        loc_a: Location,
4517        loc_b: Location,
4518        ret: Location,
4519    ) -> Result<(), CompileError> {
4520        self.emit_binop_i64(AssemblerX64::emit_sub, loc_a, loc_b, ret)
4521    }
4522
4523    fn emit_binop_mul64(
4524        &mut self,
4525        loc_a: Location,
4526        loc_b: Location,
4527        ret: Location,
4528    ) -> Result<(), CompileError> {
4529        self.emit_binop_i64(AssemblerX64::emit_imul, loc_a, loc_b, ret)
4530    }
4531
4532    fn emit_binop_udiv64(
4533        &mut self,
4534        loc_a: Location,
4535        loc_b: Location,
4536        ret: Location,
4537        integer_division_by_zero: Label,
4538    ) -> Result<usize, CompileError> {
4539        // We assume that RAX and RDX are temporary registers here.
4540        self.assembler
4541            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4542        self.assembler
4543            .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
4544        let offset = self.emit_relaxed_xdiv(
4545            AssemblerX64::emit_div,
4546            Size::S64,
4547            loc_b,
4548            integer_division_by_zero,
4549        )?;
4550        self.assembler
4551            .emit_mov(Size::S64, Location::GPR(GPR::RAX), ret)?;
4552        Ok(offset)
4553    }
4554
4555    fn emit_binop_sdiv64(
4556        &mut self,
4557        loc_a: Location,
4558        loc_b: Location,
4559        ret: Location,
4560        integer_division_by_zero: Label,
4561        _integer_overflow: Label,
4562    ) -> Result<usize, CompileError> {
4563        // We assume that RAX and RDX are temporary registers here.
4564        self.assembler
4565            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4566        self.assembler.emit_cqo()?;
4567        let offset = self.emit_relaxed_xdiv(
4568            AssemblerX64::emit_idiv,
4569            Size::S64,
4570            loc_b,
4571            integer_division_by_zero,
4572        )?;
4573        self.assembler
4574            .emit_mov(Size::S64, Location::GPR(GPR::RAX), ret)?;
4575        Ok(offset)
4576    }
4577
4578    fn emit_binop_urem64(
4579        &mut self,
4580        loc_a: Location,
4581        loc_b: Location,
4582        ret: Location,
4583        integer_division_by_zero: Label,
4584    ) -> Result<usize, CompileError> {
4585        // We assume that RAX and RDX are temporary registers here.
4586        self.assembler
4587            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4588        self.assembler
4589            .emit_xor(Size::S64, Location::GPR(GPR::RDX), Location::GPR(GPR::RDX))?;
4590        let offset = self.emit_relaxed_xdiv(
4591            AssemblerX64::emit_div,
4592            Size::S64,
4593            loc_b,
4594            integer_division_by_zero,
4595        )?;
4596        self.assembler
4597            .emit_mov(Size::S64, Location::GPR(GPR::RDX), ret)?;
4598        Ok(offset)
4599    }
4600
4601    fn emit_binop_srem64(
4602        &mut self,
4603        loc_a: Location,
4604        loc_b: Location,
4605        ret: Location,
4606        integer_division_by_zero: Label,
4607    ) -> Result<usize, CompileError> {
4608        // We assume that RAX and RDX are temporary registers here.
4609        let normal_path = self.assembler.get_label();
4610        let end = self.assembler.get_label();
4611
4612        self.emit_relaxed_cmp(Size::S64, Location::Imm64(0x8000000000000000u64), loc_a)?;
4613        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
4614        self.emit_relaxed_cmp(Size::S64, Location::Imm64(0xffffffffffffffffu64), loc_b)?;
4615        self.assembler.emit_jmp(Condition::NotEqual, normal_path)?;
4616        self.move_location(Size::S64, Location::Imm64(0), ret)?;
4617        self.assembler.emit_jmp(Condition::None, end)?;
4618
4619        self.emit_label(normal_path)?;
4620        self.assembler
4621            .emit_mov(Size::S64, loc_a, Location::GPR(GPR::RAX))?;
4622        self.assembler.emit_cqo()?;
4623        let offset = self.emit_relaxed_xdiv(
4624            AssemblerX64::emit_idiv,
4625            Size::S64,
4626            loc_b,
4627            integer_division_by_zero,
4628        )?;
4629        self.assembler
4630            .emit_mov(Size::S64, Location::GPR(GPR::RDX), ret)?;
4631
4632        self.emit_label(end)?;
4633        Ok(offset)
4634    }
4635
4636    fn emit_binop_and64(
4637        &mut self,
4638        loc_a: Location,
4639        loc_b: Location,
4640        ret: Location,
4641    ) -> Result<(), CompileError> {
4642        self.emit_binop_i64(AssemblerX64::emit_and, loc_a, loc_b, ret)
4643    }
4644
4645    fn emit_binop_or64(
4646        &mut self,
4647        loc_a: Location,
4648        loc_b: Location,
4649        ret: Location,
4650    ) -> Result<(), CompileError> {
4651        self.emit_binop_i64(AssemblerX64::emit_or, loc_a, loc_b, ret)
4652    }
4653
4654    fn emit_binop_xor64(
4655        &mut self,
4656        loc_a: Location,
4657        loc_b: Location,
4658        ret: Location,
4659    ) -> Result<(), CompileError> {
4660        self.emit_binop_i64(AssemblerX64::emit_xor, loc_a, loc_b, ret)
4661    }
4662
4663    fn i64_cmp_ge_s(
4664        &mut self,
4665        loc_a: Location,
4666        loc_b: Location,
4667        ret: Location,
4668    ) -> Result<(), CompileError> {
4669        self.emit_cmpop_i64_dynamic_b(Condition::GreaterEqual, loc_a, loc_b, ret)
4670    }
4671
4672    fn i64_cmp_gt_s(
4673        &mut self,
4674        loc_a: Location,
4675        loc_b: Location,
4676        ret: Location,
4677    ) -> Result<(), CompileError> {
4678        self.emit_cmpop_i64_dynamic_b(Condition::Greater, loc_a, loc_b, ret)
4679    }
4680
4681    fn i64_cmp_le_s(
4682        &mut self,
4683        loc_a: Location,
4684        loc_b: Location,
4685        ret: Location,
4686    ) -> Result<(), CompileError> {
4687        self.emit_cmpop_i64_dynamic_b(Condition::LessEqual, loc_a, loc_b, ret)
4688    }
4689
4690    fn i64_cmp_lt_s(
4691        &mut self,
4692        loc_a: Location,
4693        loc_b: Location,
4694        ret: Location,
4695    ) -> Result<(), CompileError> {
4696        self.emit_cmpop_i64_dynamic_b(Condition::Less, loc_a, loc_b, ret)
4697    }
4698
4699    fn i64_cmp_ge_u(
4700        &mut self,
4701        loc_a: Location,
4702        loc_b: Location,
4703        ret: Location,
4704    ) -> Result<(), CompileError> {
4705        self.emit_cmpop_i64_dynamic_b(Condition::AboveEqual, loc_a, loc_b, ret)
4706    }
4707
4708    fn i64_cmp_gt_u(
4709        &mut self,
4710        loc_a: Location,
4711        loc_b: Location,
4712        ret: Location,
4713    ) -> Result<(), CompileError> {
4714        self.emit_cmpop_i64_dynamic_b(Condition::Above, loc_a, loc_b, ret)
4715    }
4716
4717    fn i64_cmp_le_u(
4718        &mut self,
4719        loc_a: Location,
4720        loc_b: Location,
4721        ret: Location,
4722    ) -> Result<(), CompileError> {
4723        self.emit_cmpop_i64_dynamic_b(Condition::BelowEqual, loc_a, loc_b, ret)
4724    }
4725
4726    fn i64_cmp_lt_u(
4727        &mut self,
4728        loc_a: Location,
4729        loc_b: Location,
4730        ret: Location,
4731    ) -> Result<(), CompileError> {
4732        self.emit_cmpop_i64_dynamic_b(Condition::Below, loc_a, loc_b, ret)
4733    }
4734
4735    fn i64_cmp_ne(
4736        &mut self,
4737        loc_a: Location,
4738        loc_b: Location,
4739        ret: Location,
4740    ) -> Result<(), CompileError> {
4741        self.emit_cmpop_i64_dynamic_b(Condition::NotEqual, loc_a, loc_b, ret)
4742    }
4743
4744    fn i64_cmp_eq(
4745        &mut self,
4746        loc_a: Location,
4747        loc_b: Location,
4748        ret: Location,
4749    ) -> Result<(), CompileError> {
4750        self.emit_cmpop_i64_dynamic_b(Condition::Equal, loc_a, loc_b, ret)
4751    }
4752
4753    fn i64_clz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4754        let src = match loc {
4755            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4756                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4757                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4758                })?;
4759                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4760                tmp
4761            }
4762            Location::GPR(reg) => reg,
4763            _ => {
4764                codegen_error!("singlepass i64_clz unreachable");
4765            }
4766        };
4767        let dst = match ret {
4768            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
4769                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4770            })?,
4771            Location::GPR(reg) => reg,
4772            _ => {
4773                codegen_error!("singlepass i64_clz unreachable");
4774            }
4775        };
4776
4777        if self.assembler.arch_has_xzcnt() {
4778            self.assembler
4779                .arch_emit_lzcnt(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4780        } else {
4781            let zero_path = self.assembler.get_label();
4782            let end = self.assembler.get_label();
4783
4784            self.assembler.emit_test_gpr_64(src)?;
4785            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
4786            self.assembler
4787                .emit_bsr(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4788            self.assembler
4789                .emit_xor(Size::S64, Location::Imm32(63), Location::GPR(dst))?;
4790            self.assembler.emit_jmp(Condition::None, end)?;
4791            self.emit_label(zero_path)?;
4792            self.move_location(Size::S64, Location::Imm32(64), Location::GPR(dst))?;
4793            self.emit_label(end)?;
4794        }
4795        match loc {
4796            Location::Imm64(_) | Location::Memory(_, _) => {
4797                self.release_gpr(src);
4798            }
4799            _ => {}
4800        };
4801        if let Location::Memory(_, _) = ret {
4802            self.move_location(Size::S64, Location::GPR(dst), ret)?;
4803            self.release_gpr(dst);
4804        };
4805        Ok(())
4806    }
4807
4808    fn i64_ctz(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4809        let src = match loc {
4810            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4811                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4812                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4813                })?;
4814                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4815                tmp
4816            }
4817            Location::GPR(reg) => reg,
4818            _ => {
4819                codegen_error!("singlepass i64_ctz unreachable");
4820            }
4821        };
4822        let dst = match ret {
4823            Location::Memory(_, _) => self.acquire_temp_gpr().ok_or_else(|| {
4824                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4825            })?,
4826            Location::GPR(reg) => reg,
4827            _ => {
4828                codegen_error!("singlepass i64_ctz unreachable");
4829            }
4830        };
4831
4832        if self.assembler.arch_has_xzcnt() {
4833            self.assembler
4834                .arch_emit_tzcnt(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4835        } else {
4836            let zero_path = self.assembler.get_label();
4837            let end = self.assembler.get_label();
4838
4839            self.assembler.emit_test_gpr_64(src)?;
4840            self.assembler.emit_jmp(Condition::Equal, zero_path)?;
4841            self.assembler
4842                .emit_bsf(Size::S64, Location::GPR(src), Location::GPR(dst))?;
4843            self.assembler.emit_jmp(Condition::None, end)?;
4844            self.emit_label(zero_path)?;
4845            self.move_location(Size::S64, Location::Imm64(64), Location::GPR(dst))?;
4846            self.emit_label(end)?;
4847        }
4848
4849        match loc {
4850            Location::Imm64(_) | Location::Imm32(_) | Location::Memory(_, _) => {
4851                self.release_gpr(src);
4852            }
4853            _ => {}
4854        };
4855        if let Location::Memory(_, _) = ret {
4856            self.move_location(Size::S64, Location::GPR(dst), ret)?;
4857            self.release_gpr(dst);
4858        };
4859        Ok(())
4860    }
4861
4862    fn i64_popcnt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
4863        match loc {
4864            Location::Imm64(_) | Location::Imm32(_) => {
4865                let tmp = self.acquire_temp_gpr().ok_or_else(|| {
4866                    CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4867                })?;
4868                self.move_location(Size::S64, loc, Location::GPR(tmp))?;
4869                if let Location::Memory(_, _) = ret {
4870                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
4871                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4872                    })?;
4873                    self.assembler.emit_popcnt(
4874                        Size::S64,
4875                        Location::GPR(tmp),
4876                        Location::GPR(out_tmp),
4877                    )?;
4878                    self.move_location(Size::S64, Location::GPR(out_tmp), ret)?;
4879                    self.release_gpr(out_tmp);
4880                } else {
4881                    self.assembler
4882                        .emit_popcnt(Size::S64, Location::GPR(tmp), ret)?;
4883                }
4884                self.release_gpr(tmp);
4885            }
4886            Location::Memory(_, _) | Location::GPR(_) => {
4887                if let Location::Memory(_, _) = ret {
4888                    let out_tmp = self.acquire_temp_gpr().ok_or_else(|| {
4889                        CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
4890                    })?;
4891                    self.assembler
4892                        .emit_popcnt(Size::S64, loc, Location::GPR(out_tmp))?;
4893                    self.move_location(Size::S64, Location::GPR(out_tmp), ret)?;
4894                    self.release_gpr(out_tmp);
4895                } else {
4896                    self.assembler.emit_popcnt(Size::S64, loc, ret)?;
4897                }
4898            }
4899            _ => {
4900                codegen_error!("singlepass i64_popcnt unreachable");
4901            }
4902        }
4903        Ok(())
4904    }
4905
4906    fn i64_shl(
4907        &mut self,
4908        loc_a: Location,
4909        loc_b: Location,
4910        ret: Location,
4911    ) -> Result<(), CompileError> {
4912        self.emit_shift_i64(AssemblerX64::emit_shl, loc_a, loc_b, ret)
4913    }
4914
4915    fn i64_shr(
4916        &mut self,
4917        loc_a: Location,
4918        loc_b: Location,
4919        ret: Location,
4920    ) -> Result<(), CompileError> {
4921        self.emit_shift_i64(AssemblerX64::emit_shr, loc_a, loc_b, ret)
4922    }
4923
4924    fn i64_sar(
4925        &mut self,
4926        loc_a: Location,
4927        loc_b: Location,
4928        ret: Location,
4929    ) -> Result<(), CompileError> {
4930        self.emit_shift_i64(AssemblerX64::emit_sar, loc_a, loc_b, ret)
4931    }
4932
4933    fn i64_rol(
4934        &mut self,
4935        loc_a: Location,
4936        loc_b: Location,
4937        ret: Location,
4938    ) -> Result<(), CompileError> {
4939        self.emit_shift_i64(AssemblerX64::emit_rol, loc_a, loc_b, ret)
4940    }
4941
4942    fn i64_ror(
4943        &mut self,
4944        loc_a: Location,
4945        loc_b: Location,
4946        ret: Location,
4947    ) -> Result<(), CompileError> {
4948        self.emit_shift_i64(AssemblerX64::emit_ror, loc_a, loc_b, ret)
4949    }
4950
4951    fn i64_load(
4952        &mut self,
4953        addr: Location,
4954        memarg: &MemArg,
4955        ret: Location,
4956        need_check: bool,
4957        imported_memories: bool,
4958        offset: i32,
4959        heap_access_oob: Label,
4960        unaligned_atomic: Label,
4961    ) -> Result<(), CompileError> {
4962        self.memory_op(
4963            addr,
4964            memarg,
4965            false,
4966            8,
4967            need_check,
4968            imported_memories,
4969            offset,
4970            heap_access_oob,
4971            unaligned_atomic,
4972            |this, addr| {
4973                this.emit_relaxed_binop(
4974                    AssemblerX64::emit_mov,
4975                    Size::S64,
4976                    Location::Memory(addr, 0),
4977                    ret,
4978                )
4979            },
4980        )
4981    }
4982
4983    fn i64_load_8u(
4984        &mut self,
4985        addr: Location,
4986        memarg: &MemArg,
4987        ret: Location,
4988        need_check: bool,
4989        imported_memories: bool,
4990        offset: i32,
4991        heap_access_oob: Label,
4992        unaligned_atomic: Label,
4993    ) -> Result<(), CompileError> {
4994        self.memory_op(
4995            addr,
4996            memarg,
4997            false,
4998            1,
4999            need_check,
5000            imported_memories,
5001            offset,
5002            heap_access_oob,
5003            unaligned_atomic,
5004            |this, addr| {
5005                this.emit_relaxed_zx_sx(
5006                    AssemblerX64::emit_movzx,
5007                    Size::S8,
5008                    Location::Memory(addr, 0),
5009                    Size::S64,
5010                    ret,
5011                )
5012            },
5013        )
5014    }
5015
5016    fn i64_load_8s(
5017        &mut self,
5018        addr: Location,
5019        memarg: &MemArg,
5020        ret: Location,
5021        need_check: bool,
5022        imported_memories: bool,
5023        offset: i32,
5024        heap_access_oob: Label,
5025        unaligned_atomic: Label,
5026    ) -> Result<(), CompileError> {
5027        self.memory_op(
5028            addr,
5029            memarg,
5030            false,
5031            1,
5032            need_check,
5033            imported_memories,
5034            offset,
5035            heap_access_oob,
5036            unaligned_atomic,
5037            |this, addr| {
5038                this.emit_relaxed_zx_sx(
5039                    AssemblerX64::emit_movsx,
5040                    Size::S8,
5041                    Location::Memory(addr, 0),
5042                    Size::S64,
5043                    ret,
5044                )
5045            },
5046        )
5047    }
5048
5049    fn i64_load_16u(
5050        &mut self,
5051        addr: Location,
5052        memarg: &MemArg,
5053        ret: Location,
5054        need_check: bool,
5055        imported_memories: bool,
5056        offset: i32,
5057        heap_access_oob: Label,
5058        unaligned_atomic: Label,
5059    ) -> Result<(), CompileError> {
5060        self.memory_op(
5061            addr,
5062            memarg,
5063            false,
5064            2,
5065            need_check,
5066            imported_memories,
5067            offset,
5068            heap_access_oob,
5069            unaligned_atomic,
5070            |this, addr| {
5071                this.emit_relaxed_zx_sx(
5072                    AssemblerX64::emit_movzx,
5073                    Size::S16,
5074                    Location::Memory(addr, 0),
5075                    Size::S64,
5076                    ret,
5077                )
5078            },
5079        )
5080    }
5081
5082    fn i64_load_16s(
5083        &mut self,
5084        addr: Location,
5085        memarg: &MemArg,
5086        ret: Location,
5087        need_check: bool,
5088        imported_memories: bool,
5089        offset: i32,
5090        heap_access_oob: Label,
5091        unaligned_atomic: Label,
5092    ) -> Result<(), CompileError> {
5093        self.memory_op(
5094            addr,
5095            memarg,
5096            false,
5097            2,
5098            need_check,
5099            imported_memories,
5100            offset,
5101            heap_access_oob,
5102            unaligned_atomic,
5103            |this, addr| {
5104                this.emit_relaxed_zx_sx(
5105                    AssemblerX64::emit_movsx,
5106                    Size::S16,
5107                    Location::Memory(addr, 0),
5108                    Size::S64,
5109                    ret,
5110                )
5111            },
5112        )
5113    }
5114
5115    fn i64_load_32u(
5116        &mut self,
5117        addr: Location,
5118        memarg: &MemArg,
5119        ret: Location,
5120        need_check: bool,
5121        imported_memories: bool,
5122        offset: i32,
5123        heap_access_oob: Label,
5124        unaligned_atomic: Label,
5125    ) -> Result<(), CompileError> {
5126        self.memory_op(
5127            addr,
5128            memarg,
5129            false,
5130            4,
5131            need_check,
5132            imported_memories,
5133            offset,
5134            heap_access_oob,
5135            unaligned_atomic,
5136            |this, addr| {
5137                match ret {
5138                    Location::GPR(_) => {}
5139                    Location::Memory(base, offset) => {
5140                        this.assembler.emit_mov(
5141                            Size::S32,
5142                            Location::Imm32(0),
5143                            Location::Memory(base, offset + 4),
5144                        )?; // clear upper bits
5145                    }
5146                    _ => {
5147                        codegen_error!("singlepass i64_load_32u unreacahble");
5148                    }
5149                }
5150                this.emit_relaxed_binop(
5151                    AssemblerX64::emit_mov,
5152                    Size::S32,
5153                    Location::Memory(addr, 0),
5154                    ret,
5155                )
5156            },
5157        )
5158    }
5159
5160    fn i64_load_32s(
5161        &mut self,
5162        addr: Location,
5163        memarg: &MemArg,
5164        ret: Location,
5165        need_check: bool,
5166        imported_memories: bool,
5167        offset: i32,
5168        heap_access_oob: Label,
5169        unaligned_atomic: Label,
5170    ) -> Result<(), CompileError> {
5171        self.memory_op(
5172            addr,
5173            memarg,
5174            false,
5175            4,
5176            need_check,
5177            imported_memories,
5178            offset,
5179            heap_access_oob,
5180            unaligned_atomic,
5181            |this, addr| {
5182                this.emit_relaxed_zx_sx(
5183                    AssemblerX64::emit_movsx,
5184                    Size::S32,
5185                    Location::Memory(addr, 0),
5186                    Size::S64,
5187                    ret,
5188                )
5189            },
5190        )
5191    }
5192
5193    fn i64_atomic_load(
5194        &mut self,
5195        addr: Location,
5196        memarg: &MemArg,
5197        ret: Location,
5198        need_check: bool,
5199        imported_memories: bool,
5200        offset: i32,
5201        heap_access_oob: Label,
5202        unaligned_atomic: Label,
5203    ) -> Result<(), CompileError> {
5204        self.memory_op(
5205            addr,
5206            memarg,
5207            true,
5208            8,
5209            need_check,
5210            imported_memories,
5211            offset,
5212            heap_access_oob,
5213            unaligned_atomic,
5214            |this, addr| this.emit_relaxed_mov(Size::S64, Location::Memory(addr, 0), ret),
5215        )
5216    }
5217
5218    fn i64_atomic_load_8u(
5219        &mut self,
5220        addr: Location,
5221        memarg: &MemArg,
5222        ret: Location,
5223        need_check: bool,
5224        imported_memories: bool,
5225        offset: i32,
5226        heap_access_oob: Label,
5227        unaligned_atomic: Label,
5228    ) -> Result<(), CompileError> {
5229        self.memory_op(
5230            addr,
5231            memarg,
5232            true,
5233            1,
5234            need_check,
5235            imported_memories,
5236            offset,
5237            heap_access_oob,
5238            unaligned_atomic,
5239            |this, addr| {
5240                this.emit_relaxed_zero_extension(
5241                    Size::S8,
5242                    Location::Memory(addr, 0),
5243                    Size::S64,
5244                    ret,
5245                )
5246            },
5247        )
5248    }
5249
5250    fn i64_atomic_load_16u(
5251        &mut self,
5252        addr: Location,
5253        memarg: &MemArg,
5254        ret: Location,
5255        need_check: bool,
5256        imported_memories: bool,
5257        offset: i32,
5258        heap_access_oob: Label,
5259        unaligned_atomic: Label,
5260    ) -> Result<(), CompileError> {
5261        self.memory_op(
5262            addr,
5263            memarg,
5264            true,
5265            2,
5266            need_check,
5267            imported_memories,
5268            offset,
5269            heap_access_oob,
5270            unaligned_atomic,
5271            |this, addr| {
5272                this.emit_relaxed_zero_extension(
5273                    Size::S16,
5274                    Location::Memory(addr, 0),
5275                    Size::S64,
5276                    ret,
5277                )
5278            },
5279        )
5280    }
5281
5282    fn i64_atomic_load_32u(
5283        &mut self,
5284        addr: Location,
5285        memarg: &MemArg,
5286        ret: Location,
5287        need_check: bool,
5288        imported_memories: bool,
5289        offset: i32,
5290        heap_access_oob: Label,
5291        unaligned_atomic: Label,
5292    ) -> Result<(), CompileError> {
5293        self.memory_op(
5294            addr,
5295            memarg,
5296            true,
5297            4,
5298            need_check,
5299            imported_memories,
5300            offset,
5301            heap_access_oob,
5302            unaligned_atomic,
5303            |this, addr| {
5304                match ret {
5305                    Location::GPR(_) => {}
5306                    Location::Memory(base, offset) => {
5307                        this.move_location(
5308                            Size::S32,
5309                            Location::Imm32(0),
5310                            Location::Memory(base, offset + 4),
5311                        )?; // clear upper bits
5312                    }
5313                    _ => {
5314                        codegen_error!("singlepass i64_atomic_load_32u unreachable");
5315                    }
5316                }
5317                this.emit_relaxed_zero_extension(
5318                    Size::S32,
5319                    Location::Memory(addr, 0),
5320                    Size::S64,
5321                    ret,
5322                )
5323            },
5324        )
5325    }
5326
5327    fn i64_save(
5328        &mut self,
5329        target_value: Location,
5330        memarg: &MemArg,
5331        target_addr: Location,
5332        need_check: bool,
5333        imported_memories: bool,
5334        offset: i32,
5335        heap_access_oob: Label,
5336        unaligned_atomic: Label,
5337    ) -> Result<(), CompileError> {
5338        self.memory_op(
5339            target_addr,
5340            memarg,
5341            false,
5342            8,
5343            need_check,
5344            imported_memories,
5345            offset,
5346            heap_access_oob,
5347            unaligned_atomic,
5348            |this, addr| {
5349                this.emit_relaxed_binop(
5350                    AssemblerX64::emit_mov,
5351                    Size::S64,
5352                    target_value,
5353                    Location::Memory(addr, 0),
5354                )
5355            },
5356        )
5357    }
5358
5359    fn i64_save_8(
5360        &mut self,
5361        target_value: Location,
5362        memarg: &MemArg,
5363        target_addr: Location,
5364        need_check: bool,
5365        imported_memories: bool,
5366        offset: i32,
5367        heap_access_oob: Label,
5368        unaligned_atomic: Label,
5369    ) -> Result<(), CompileError> {
5370        self.memory_op(
5371            target_addr,
5372            memarg,
5373            false,
5374            1,
5375            need_check,
5376            imported_memories,
5377            offset,
5378            heap_access_oob,
5379            unaligned_atomic,
5380            |this, addr| {
5381                this.emit_relaxed_binop(
5382                    AssemblerX64::emit_mov,
5383                    Size::S8,
5384                    target_value,
5385                    Location::Memory(addr, 0),
5386                )
5387            },
5388        )
5389    }
5390
5391    fn i64_save_16(
5392        &mut self,
5393        target_value: Location,
5394        memarg: &MemArg,
5395        target_addr: Location,
5396        need_check: bool,
5397        imported_memories: bool,
5398        offset: i32,
5399        heap_access_oob: Label,
5400        unaligned_atomic: Label,
5401    ) -> Result<(), CompileError> {
5402        self.memory_op(
5403            target_addr,
5404            memarg,
5405            false,
5406            2,
5407            need_check,
5408            imported_memories,
5409            offset,
5410            heap_access_oob,
5411            unaligned_atomic,
5412            |this, addr| {
5413                this.emit_relaxed_binop(
5414                    AssemblerX64::emit_mov,
5415                    Size::S16,
5416                    target_value,
5417                    Location::Memory(addr, 0),
5418                )
5419            },
5420        )
5421    }
5422
5423    fn i64_save_32(
5424        &mut self,
5425        target_value: Location,
5426        memarg: &MemArg,
5427        target_addr: Location,
5428        need_check: bool,
5429        imported_memories: bool,
5430        offset: i32,
5431        heap_access_oob: Label,
5432        unaligned_atomic: Label,
5433    ) -> Result<(), CompileError> {
5434        self.memory_op(
5435            target_addr,
5436            memarg,
5437            false,
5438            4,
5439            need_check,
5440            imported_memories,
5441            offset,
5442            heap_access_oob,
5443            unaligned_atomic,
5444            |this, addr| {
5445                this.emit_relaxed_binop(
5446                    AssemblerX64::emit_mov,
5447                    Size::S32,
5448                    target_value,
5449                    Location::Memory(addr, 0),
5450                )
5451            },
5452        )
5453    }
5454
5455    fn i64_atomic_save(
5456        &mut self,
5457        value: Location,
5458        memarg: &MemArg,
5459        target_addr: Location,
5460        need_check: bool,
5461        imported_memories: bool,
5462        offset: i32,
5463        heap_access_oob: Label,
5464        unaligned_atomic: Label,
5465    ) -> Result<(), CompileError> {
5466        self.memory_op(
5467            target_addr,
5468            memarg,
5469            true,
5470            8,
5471            need_check,
5472            imported_memories,
5473            offset,
5474            heap_access_oob,
5475            unaligned_atomic,
5476            |this, addr| this.emit_relaxed_atomic_xchg(Size::S64, value, Location::Memory(addr, 0)),
5477        )
5478    }
5479
5480    fn i64_atomic_save_8(
5481        &mut self,
5482        value: Location,
5483        memarg: &MemArg,
5484        target_addr: Location,
5485        need_check: bool,
5486        imported_memories: bool,
5487        offset: i32,
5488        heap_access_oob: Label,
5489        unaligned_atomic: Label,
5490    ) -> Result<(), CompileError> {
5491        self.memory_op(
5492            target_addr,
5493            memarg,
5494            true,
5495            1,
5496            need_check,
5497            imported_memories,
5498            offset,
5499            heap_access_oob,
5500            unaligned_atomic,
5501            |this, addr| this.emit_relaxed_atomic_xchg(Size::S8, value, Location::Memory(addr, 0)),
5502        )
5503    }
5504
5505    fn i64_atomic_save_16(
5506        &mut self,
5507        value: Location,
5508        memarg: &MemArg,
5509        target_addr: Location,
5510        need_check: bool,
5511        imported_memories: bool,
5512        offset: i32,
5513        heap_access_oob: Label,
5514        unaligned_atomic: Label,
5515    ) -> Result<(), CompileError> {
5516        self.memory_op(
5517            target_addr,
5518            memarg,
5519            true,
5520            2,
5521            need_check,
5522            imported_memories,
5523            offset,
5524            heap_access_oob,
5525            unaligned_atomic,
5526            |this, addr| this.emit_relaxed_atomic_xchg(Size::S16, value, Location::Memory(addr, 0)),
5527        )
5528    }
5529
5530    fn i64_atomic_save_32(
5531        &mut self,
5532        value: Location,
5533        memarg: &MemArg,
5534        target_addr: Location,
5535        need_check: bool,
5536        imported_memories: bool,
5537        offset: i32,
5538        heap_access_oob: Label,
5539        unaligned_atomic: Label,
5540    ) -> Result<(), CompileError> {
5541        self.memory_op(
5542            target_addr,
5543            memarg,
5544            true,
5545            2,
5546            need_check,
5547            imported_memories,
5548            offset,
5549            heap_access_oob,
5550            unaligned_atomic,
5551            |this, addr| this.emit_relaxed_atomic_xchg(Size::S32, value, Location::Memory(addr, 0)),
5552        )
5553    }
5554
5555    fn i64_atomic_add(
5556        &mut self,
5557        loc: Location,
5558        target: Location,
5559        memarg: &MemArg,
5560        ret: Location,
5561        need_check: bool,
5562        imported_memories: bool,
5563        offset: i32,
5564        heap_access_oob: Label,
5565        unaligned_atomic: Label,
5566    ) -> Result<(), CompileError> {
5567        let value = self.acquire_temp_gpr().ok_or_else(|| {
5568            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5569        })?;
5570        self.move_location(Size::S64, loc, Location::GPR(value))?;
5571        self.memory_op(
5572            target,
5573            memarg,
5574            true,
5575            8,
5576            need_check,
5577            imported_memories,
5578            offset,
5579            heap_access_oob,
5580            unaligned_atomic,
5581            |this, addr| {
5582                this.assembler.emit_lock_xadd(
5583                    Size::S64,
5584                    Location::GPR(value),
5585                    Location::Memory(addr, 0),
5586                )
5587            },
5588        )?;
5589        self.move_location(Size::S64, Location::GPR(value), ret)?;
5590        self.release_gpr(value);
5591        Ok(())
5592    }
5593
5594    fn i64_atomic_add_8u(
5595        &mut self,
5596        loc: Location,
5597        target: Location,
5598        memarg: &MemArg,
5599        ret: Location,
5600        need_check: bool,
5601        imported_memories: bool,
5602        offset: i32,
5603        heap_access_oob: Label,
5604        unaligned_atomic: Label,
5605    ) -> Result<(), CompileError> {
5606        let value = self.acquire_temp_gpr().ok_or_else(|| {
5607            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5608        })?;
5609        self.move_location_extend(Size::S8, false, loc, Size::S64, Location::GPR(value))?;
5610        self.memory_op(
5611            target,
5612            memarg,
5613            true,
5614            1,
5615            need_check,
5616            imported_memories,
5617            offset,
5618            heap_access_oob,
5619            unaligned_atomic,
5620            |this, addr| {
5621                this.assembler.emit_lock_xadd(
5622                    Size::S8,
5623                    Location::GPR(value),
5624                    Location::Memory(addr, 0),
5625                )
5626            },
5627        )?;
5628        self.move_location(Size::S64, Location::GPR(value), ret)?;
5629        self.release_gpr(value);
5630        Ok(())
5631    }
5632
5633    fn i64_atomic_add_16u(
5634        &mut self,
5635        loc: Location,
5636        target: Location,
5637        memarg: &MemArg,
5638        ret: Location,
5639        need_check: bool,
5640        imported_memories: bool,
5641        offset: i32,
5642        heap_access_oob: Label,
5643        unaligned_atomic: Label,
5644    ) -> Result<(), CompileError> {
5645        let value = self.acquire_temp_gpr().ok_or_else(|| {
5646            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5647        })?;
5648        self.move_location_extend(Size::S16, false, loc, Size::S64, Location::GPR(value))?;
5649        self.memory_op(
5650            target,
5651            memarg,
5652            true,
5653            2,
5654            need_check,
5655            imported_memories,
5656            offset,
5657            heap_access_oob,
5658            unaligned_atomic,
5659            |this, addr| {
5660                this.assembler.emit_lock_xadd(
5661                    Size::S16,
5662                    Location::GPR(value),
5663                    Location::Memory(addr, 0),
5664                )
5665            },
5666        )?;
5667        self.move_location(Size::S64, Location::GPR(value), ret)?;
5668        self.release_gpr(value);
5669        Ok(())
5670    }
5671
5672    fn i64_atomic_add_32u(
5673        &mut self,
5674        loc: Location,
5675        target: Location,
5676        memarg: &MemArg,
5677        ret: Location,
5678        need_check: bool,
5679        imported_memories: bool,
5680        offset: i32,
5681        heap_access_oob: Label,
5682        unaligned_atomic: Label,
5683    ) -> Result<(), CompileError> {
5684        let value = self.acquire_temp_gpr().ok_or_else(|| {
5685            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5686        })?;
5687        self.move_location_extend(Size::S32, false, loc, Size::S64, Location::GPR(value))?;
5688        self.memory_op(
5689            target,
5690            memarg,
5691            true,
5692            4,
5693            need_check,
5694            imported_memories,
5695            offset,
5696            heap_access_oob,
5697            unaligned_atomic,
5698            |this, addr| {
5699                this.assembler.emit_lock_xadd(
5700                    Size::S32,
5701                    Location::GPR(value),
5702                    Location::Memory(addr, 0),
5703                )
5704            },
5705        )?;
5706        self.move_location(Size::S64, Location::GPR(value), ret)?;
5707        self.release_gpr(value);
5708        Ok(())
5709    }
5710
5711    fn i64_atomic_sub(
5712        &mut self,
5713        loc: Location,
5714        target: Location,
5715        memarg: &MemArg,
5716        ret: Location,
5717        need_check: bool,
5718        imported_memories: bool,
5719        offset: i32,
5720        heap_access_oob: Label,
5721        unaligned_atomic: Label,
5722    ) -> Result<(), CompileError> {
5723        let value = self.acquire_temp_gpr().ok_or_else(|| {
5724            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5725        })?;
5726        self.location_neg(Size::S64, false, loc, Size::S64, Location::GPR(value))?;
5727        self.memory_op(
5728            target,
5729            memarg,
5730            true,
5731            8,
5732            need_check,
5733            imported_memories,
5734            offset,
5735            heap_access_oob,
5736            unaligned_atomic,
5737            |this, addr| {
5738                this.assembler.emit_lock_xadd(
5739                    Size::S64,
5740                    Location::GPR(value),
5741                    Location::Memory(addr, 0),
5742                )
5743            },
5744        )?;
5745        self.move_location(Size::S64, Location::GPR(value), ret)?;
5746        self.release_gpr(value);
5747        Ok(())
5748    }
5749
5750    fn i64_atomic_sub_8u(
5751        &mut self,
5752        loc: Location,
5753        target: Location,
5754        memarg: &MemArg,
5755        ret: Location,
5756        need_check: bool,
5757        imported_memories: bool,
5758        offset: i32,
5759        heap_access_oob: Label,
5760        unaligned_atomic: Label,
5761    ) -> Result<(), CompileError> {
5762        let value = self.acquire_temp_gpr().ok_or_else(|| {
5763            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5764        })?;
5765        self.location_neg(Size::S8, false, loc, Size::S64, Location::GPR(value))?;
5766        self.memory_op(
5767            target,
5768            memarg,
5769            true,
5770            1,
5771            need_check,
5772            imported_memories,
5773            offset,
5774            heap_access_oob,
5775            unaligned_atomic,
5776            |this, addr| {
5777                this.assembler.emit_lock_xadd(
5778                    Size::S8,
5779                    Location::GPR(value),
5780                    Location::Memory(addr, 0),
5781                )
5782            },
5783        )?;
5784        self.move_location(Size::S64, Location::GPR(value), ret)?;
5785        self.release_gpr(value);
5786        Ok(())
5787    }
5788
5789    fn i64_atomic_sub_16u(
5790        &mut self,
5791        loc: Location,
5792        target: Location,
5793        memarg: &MemArg,
5794        ret: Location,
5795        need_check: bool,
5796        imported_memories: bool,
5797        offset: i32,
5798        heap_access_oob: Label,
5799        unaligned_atomic: Label,
5800    ) -> Result<(), CompileError> {
5801        let value = self.acquire_temp_gpr().ok_or_else(|| {
5802            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5803        })?;
5804        self.location_neg(Size::S16, false, loc, Size::S64, Location::GPR(value))?;
5805        self.memory_op(
5806            target,
5807            memarg,
5808            true,
5809            2,
5810            need_check,
5811            imported_memories,
5812            offset,
5813            heap_access_oob,
5814            unaligned_atomic,
5815            |this, addr| {
5816                this.assembler.emit_lock_xadd(
5817                    Size::S16,
5818                    Location::GPR(value),
5819                    Location::Memory(addr, 0),
5820                )
5821            },
5822        )?;
5823        self.move_location(Size::S64, Location::GPR(value), ret)?;
5824        self.release_gpr(value);
5825        Ok(())
5826    }
5827
5828    fn i64_atomic_sub_32u(
5829        &mut self,
5830        loc: Location,
5831        target: Location,
5832        memarg: &MemArg,
5833        ret: Location,
5834        need_check: bool,
5835        imported_memories: bool,
5836        offset: i32,
5837        heap_access_oob: Label,
5838        unaligned_atomic: Label,
5839    ) -> Result<(), CompileError> {
5840        let value = self.acquire_temp_gpr().ok_or_else(|| {
5841            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
5842        })?;
5843        self.location_neg(Size::S32, false, loc, Size::S64, Location::GPR(value))?;
5844        self.memory_op(
5845            target,
5846            memarg,
5847            true,
5848            4,
5849            need_check,
5850            imported_memories,
5851            offset,
5852            heap_access_oob,
5853            unaligned_atomic,
5854            |this, addr| {
5855                this.assembler.emit_lock_xadd(
5856                    Size::S32,
5857                    Location::GPR(value),
5858                    Location::Memory(addr, 0),
5859                )
5860            },
5861        )?;
5862        self.move_location(Size::S64, Location::GPR(value), ret)?;
5863        self.release_gpr(value);
5864        Ok(())
5865    }
5866
5867    fn i64_atomic_and(
5868        &mut self,
5869        loc: Location,
5870        target: Location,
5871        memarg: &MemArg,
5872        ret: Location,
5873        need_check: bool,
5874        imported_memories: bool,
5875        offset: i32,
5876        heap_access_oob: Label,
5877        unaligned_atomic: Label,
5878    ) -> Result<(), CompileError> {
5879        self.emit_compare_and_swap(
5880            loc,
5881            target,
5882            ret,
5883            memarg,
5884            8,
5885            Size::S64,
5886            Size::S64,
5887            need_check,
5888            imported_memories,
5889            offset,
5890            heap_access_oob,
5891            unaligned_atomic,
5892            |this, src, dst| {
5893                this.assembler
5894                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5895            },
5896        )
5897    }
5898
5899    fn i64_atomic_and_8u(
5900        &mut self,
5901        loc: Location,
5902        target: Location,
5903        memarg: &MemArg,
5904        ret: Location,
5905        need_check: bool,
5906        imported_memories: bool,
5907        offset: i32,
5908        heap_access_oob: Label,
5909        unaligned_atomic: Label,
5910    ) -> Result<(), CompileError> {
5911        self.emit_compare_and_swap(
5912            loc,
5913            target,
5914            ret,
5915            memarg,
5916            1,
5917            Size::S8,
5918            Size::S64,
5919            need_check,
5920            imported_memories,
5921            offset,
5922            heap_access_oob,
5923            unaligned_atomic,
5924            |this, src, dst| {
5925                this.assembler
5926                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5927            },
5928        )
5929    }
5930
5931    fn i64_atomic_and_16u(
5932        &mut self,
5933        loc: Location,
5934        target: Location,
5935        memarg: &MemArg,
5936        ret: Location,
5937        need_check: bool,
5938        imported_memories: bool,
5939        offset: i32,
5940        heap_access_oob: Label,
5941        unaligned_atomic: Label,
5942    ) -> Result<(), CompileError> {
5943        self.emit_compare_and_swap(
5944            loc,
5945            target,
5946            ret,
5947            memarg,
5948            2,
5949            Size::S16,
5950            Size::S64,
5951            need_check,
5952            imported_memories,
5953            offset,
5954            heap_access_oob,
5955            unaligned_atomic,
5956            |this, src, dst| {
5957                this.assembler
5958                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5959            },
5960        )
5961    }
5962
5963    fn i64_atomic_and_32u(
5964        &mut self,
5965        loc: Location,
5966        target: Location,
5967        memarg: &MemArg,
5968        ret: Location,
5969        need_check: bool,
5970        imported_memories: bool,
5971        offset: i32,
5972        heap_access_oob: Label,
5973        unaligned_atomic: Label,
5974    ) -> Result<(), CompileError> {
5975        self.emit_compare_and_swap(
5976            loc,
5977            target,
5978            ret,
5979            memarg,
5980            4,
5981            Size::S32,
5982            Size::S64,
5983            need_check,
5984            imported_memories,
5985            offset,
5986            heap_access_oob,
5987            unaligned_atomic,
5988            |this, src, dst| {
5989                this.assembler
5990                    .emit_and(Size::S64, Location::GPR(src), Location::GPR(dst))
5991            },
5992        )
5993    }
5994
5995    fn i64_atomic_or(
5996        &mut self,
5997        loc: Location,
5998        target: Location,
5999        memarg: &MemArg,
6000        ret: Location,
6001        need_check: bool,
6002        imported_memories: bool,
6003        offset: i32,
6004        heap_access_oob: Label,
6005        unaligned_atomic: Label,
6006    ) -> Result<(), CompileError> {
6007        self.emit_compare_and_swap(
6008            loc,
6009            target,
6010            ret,
6011            memarg,
6012            8,
6013            Size::S64,
6014            Size::S64,
6015            need_check,
6016            imported_memories,
6017            offset,
6018            heap_access_oob,
6019            unaligned_atomic,
6020            |this, src, dst| {
6021                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6022            },
6023        )
6024    }
6025
6026    fn i64_atomic_or_8u(
6027        &mut self,
6028        loc: Location,
6029        target: Location,
6030        memarg: &MemArg,
6031        ret: Location,
6032        need_check: bool,
6033        imported_memories: bool,
6034        offset: i32,
6035        heap_access_oob: Label,
6036        unaligned_atomic: Label,
6037    ) -> Result<(), CompileError> {
6038        self.emit_compare_and_swap(
6039            loc,
6040            target,
6041            ret,
6042            memarg,
6043            1,
6044            Size::S8,
6045            Size::S64,
6046            need_check,
6047            imported_memories,
6048            offset,
6049            heap_access_oob,
6050            unaligned_atomic,
6051            |this, src, dst| {
6052                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6053            },
6054        )
6055    }
6056
6057    fn i64_atomic_or_16u(
6058        &mut self,
6059        loc: Location,
6060        target: Location,
6061        memarg: &MemArg,
6062        ret: Location,
6063        need_check: bool,
6064        imported_memories: bool,
6065        offset: i32,
6066        heap_access_oob: Label,
6067        unaligned_atomic: Label,
6068    ) -> Result<(), CompileError> {
6069        self.emit_compare_and_swap(
6070            loc,
6071            target,
6072            ret,
6073            memarg,
6074            2,
6075            Size::S16,
6076            Size::S64,
6077            need_check,
6078            imported_memories,
6079            offset,
6080            heap_access_oob,
6081            unaligned_atomic,
6082            |this, src, dst| {
6083                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6084            },
6085        )
6086    }
6087
6088    fn i64_atomic_or_32u(
6089        &mut self,
6090        loc: Location,
6091        target: Location,
6092        memarg: &MemArg,
6093        ret: Location,
6094        need_check: bool,
6095        imported_memories: bool,
6096        offset: i32,
6097        heap_access_oob: Label,
6098        unaligned_atomic: Label,
6099    ) -> Result<(), CompileError> {
6100        self.emit_compare_and_swap(
6101            loc,
6102            target,
6103            ret,
6104            memarg,
6105            4,
6106            Size::S32,
6107            Size::S64,
6108            need_check,
6109            imported_memories,
6110            offset,
6111            heap_access_oob,
6112            unaligned_atomic,
6113            |this, src, dst| {
6114                this.location_or(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6115            },
6116        )
6117    }
6118
6119    fn i64_atomic_xor(
6120        &mut self,
6121        loc: Location,
6122        target: Location,
6123        memarg: &MemArg,
6124        ret: Location,
6125        need_check: bool,
6126        imported_memories: bool,
6127        offset: i32,
6128        heap_access_oob: Label,
6129        unaligned_atomic: Label,
6130    ) -> Result<(), CompileError> {
6131        self.emit_compare_and_swap(
6132            loc,
6133            target,
6134            ret,
6135            memarg,
6136            8,
6137            Size::S64,
6138            Size::S64,
6139            need_check,
6140            imported_memories,
6141            offset,
6142            heap_access_oob,
6143            unaligned_atomic,
6144            |this, src, dst| {
6145                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6146            },
6147        )
6148    }
6149
6150    fn i64_atomic_xor_8u(
6151        &mut self,
6152        loc: Location,
6153        target: Location,
6154        memarg: &MemArg,
6155        ret: Location,
6156        need_check: bool,
6157        imported_memories: bool,
6158        offset: i32,
6159        heap_access_oob: Label,
6160        unaligned_atomic: Label,
6161    ) -> Result<(), CompileError> {
6162        self.emit_compare_and_swap(
6163            loc,
6164            target,
6165            ret,
6166            memarg,
6167            1,
6168            Size::S8,
6169            Size::S64,
6170            need_check,
6171            imported_memories,
6172            offset,
6173            heap_access_oob,
6174            unaligned_atomic,
6175            |this, src, dst| {
6176                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6177            },
6178        )
6179    }
6180
6181    fn i64_atomic_xor_16u(
6182        &mut self,
6183        loc: Location,
6184        target: Location,
6185        memarg: &MemArg,
6186        ret: Location,
6187        need_check: bool,
6188        imported_memories: bool,
6189        offset: i32,
6190        heap_access_oob: Label,
6191        unaligned_atomic: Label,
6192    ) -> Result<(), CompileError> {
6193        self.emit_compare_and_swap(
6194            loc,
6195            target,
6196            ret,
6197            memarg,
6198            2,
6199            Size::S16,
6200            Size::S64,
6201            need_check,
6202            imported_memories,
6203            offset,
6204            heap_access_oob,
6205            unaligned_atomic,
6206            |this, src, dst| {
6207                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6208            },
6209        )
6210    }
6211
6212    fn i64_atomic_xor_32u(
6213        &mut self,
6214        loc: Location,
6215        target: Location,
6216        memarg: &MemArg,
6217        ret: Location,
6218        need_check: bool,
6219        imported_memories: bool,
6220        offset: i32,
6221        heap_access_oob: Label,
6222        unaligned_atomic: Label,
6223    ) -> Result<(), CompileError> {
6224        self.emit_compare_and_swap(
6225            loc,
6226            target,
6227            ret,
6228            memarg,
6229            4,
6230            Size::S32,
6231            Size::S64,
6232            need_check,
6233            imported_memories,
6234            offset,
6235            heap_access_oob,
6236            unaligned_atomic,
6237            |this, src, dst| {
6238                this.location_xor(Size::S64, Location::GPR(src), Location::GPR(dst), false)
6239            },
6240        )
6241    }
6242
6243    fn i64_atomic_xchg(
6244        &mut self,
6245        loc: Location,
6246        target: Location,
6247        memarg: &MemArg,
6248        ret: Location,
6249        need_check: bool,
6250        imported_memories: bool,
6251        offset: i32,
6252        heap_access_oob: Label,
6253        unaligned_atomic: Label,
6254    ) -> Result<(), CompileError> {
6255        let value = self.acquire_temp_gpr().ok_or_else(|| {
6256            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6257        })?;
6258        self.move_location(Size::S64, loc, Location::GPR(value))?;
6259        self.memory_op(
6260            target,
6261            memarg,
6262            true,
6263            8,
6264            need_check,
6265            imported_memories,
6266            offset,
6267            heap_access_oob,
6268            unaligned_atomic,
6269            |this, addr| {
6270                this.assembler
6271                    .emit_xchg(Size::S64, Location::GPR(value), Location::Memory(addr, 0))
6272            },
6273        )?;
6274        self.move_location(Size::S64, Location::GPR(value), ret)?;
6275        self.release_gpr(value);
6276        Ok(())
6277    }
6278
6279    fn i64_atomic_xchg_8u(
6280        &mut self,
6281        loc: Location,
6282        target: Location,
6283        memarg: &MemArg,
6284        ret: Location,
6285        need_check: bool,
6286        imported_memories: bool,
6287        offset: i32,
6288        heap_access_oob: Label,
6289        unaligned_atomic: Label,
6290    ) -> Result<(), CompileError> {
6291        let value = self.acquire_temp_gpr().ok_or_else(|| {
6292            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6293        })?;
6294        self.assembler
6295            .emit_movzx(Size::S8, loc, Size::S64, Location::GPR(value))?;
6296        self.memory_op(
6297            target,
6298            memarg,
6299            true,
6300            1,
6301            need_check,
6302            imported_memories,
6303            offset,
6304            heap_access_oob,
6305            unaligned_atomic,
6306            |this, addr| {
6307                this.assembler
6308                    .emit_xchg(Size::S8, Location::GPR(value), Location::Memory(addr, 0))
6309            },
6310        )?;
6311        self.move_location(Size::S64, Location::GPR(value), ret)?;
6312        self.release_gpr(value);
6313        Ok(())
6314    }
6315
6316    fn i64_atomic_xchg_16u(
6317        &mut self,
6318        loc: Location,
6319        target: Location,
6320        memarg: &MemArg,
6321        ret: Location,
6322        need_check: bool,
6323        imported_memories: bool,
6324        offset: i32,
6325        heap_access_oob: Label,
6326        unaligned_atomic: Label,
6327    ) -> Result<(), CompileError> {
6328        let value = self.acquire_temp_gpr().ok_or_else(|| {
6329            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6330        })?;
6331        self.assembler
6332            .emit_movzx(Size::S16, loc, Size::S64, Location::GPR(value))?;
6333        self.memory_op(
6334            target,
6335            memarg,
6336            true,
6337            2,
6338            need_check,
6339            imported_memories,
6340            offset,
6341            heap_access_oob,
6342            unaligned_atomic,
6343            |this, addr| {
6344                this.assembler
6345                    .emit_xchg(Size::S16, Location::GPR(value), Location::Memory(addr, 0))
6346            },
6347        )?;
6348        self.move_location(Size::S64, Location::GPR(value), ret)?;
6349        self.release_gpr(value);
6350        Ok(())
6351    }
6352
6353    fn i64_atomic_xchg_32u(
6354        &mut self,
6355        loc: Location,
6356        target: Location,
6357        memarg: &MemArg,
6358        ret: Location,
6359        need_check: bool,
6360        imported_memories: bool,
6361        offset: i32,
6362        heap_access_oob: Label,
6363        unaligned_atomic: Label,
6364    ) -> Result<(), CompileError> {
6365        let value = self.acquire_temp_gpr().ok_or_else(|| {
6366            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6367        })?;
6368        self.assembler
6369            .emit_movzx(Size::S32, loc, Size::S64, Location::GPR(value))?;
6370        self.memory_op(
6371            target,
6372            memarg,
6373            true,
6374            4,
6375            need_check,
6376            imported_memories,
6377            offset,
6378            heap_access_oob,
6379            unaligned_atomic,
6380            |this, addr| {
6381                this.assembler
6382                    .emit_xchg(Size::S32, Location::GPR(value), Location::Memory(addr, 0))
6383            },
6384        )?;
6385        self.move_location(Size::S64, Location::GPR(value), ret)?;
6386        self.release_gpr(value);
6387        Ok(())
6388    }
6389
6390    fn i64_atomic_cmpxchg(
6391        &mut self,
6392        new: Location,
6393        cmp: Location,
6394        target: Location,
6395        memarg: &MemArg,
6396        ret: Location,
6397        need_check: bool,
6398        imported_memories: bool,
6399        offset: i32,
6400        heap_access_oob: Label,
6401        unaligned_atomic: Label,
6402    ) -> Result<(), CompileError> {
6403        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6404        let value = if cmp == Location::GPR(GPR::R14) {
6405            if new == Location::GPR(GPR::R13) {
6406                GPR::R12
6407            } else {
6408                GPR::R13
6409            }
6410        } else {
6411            GPR::R14
6412        };
6413        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6414        self.assembler
6415            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6416        self.assembler
6417            .emit_mov(Size::S64, new, Location::GPR(value))?;
6418
6419        self.memory_op(
6420            target,
6421            memarg,
6422            true,
6423            8,
6424            need_check,
6425            imported_memories,
6426            offset,
6427            heap_access_oob,
6428            unaligned_atomic,
6429            |this, addr| {
6430                this.assembler.emit_lock_cmpxchg(
6431                    Size::S64,
6432                    Location::GPR(value),
6433                    Location::Memory(addr, 0),
6434                )?;
6435                this.assembler
6436                    .emit_mov(Size::S64, Location::GPR(compare), ret)
6437            },
6438        )?;
6439        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6440        self.release_gpr(compare);
6441        Ok(())
6442    }
6443
6444    fn i64_atomic_cmpxchg_8u(
6445        &mut self,
6446        new: Location,
6447        cmp: Location,
6448        target: Location,
6449        memarg: &MemArg,
6450        ret: Location,
6451        need_check: bool,
6452        imported_memories: bool,
6453        offset: i32,
6454        heap_access_oob: Label,
6455        unaligned_atomic: Label,
6456    ) -> Result<(), CompileError> {
6457        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6458        let value = if cmp == Location::GPR(GPR::R14) {
6459            if new == Location::GPR(GPR::R13) {
6460                GPR::R12
6461            } else {
6462                GPR::R13
6463            }
6464        } else {
6465            GPR::R14
6466        };
6467        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6468        self.assembler
6469            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6470        self.assembler
6471            .emit_mov(Size::S64, new, Location::GPR(value))?;
6472
6473        self.memory_op(
6474            target,
6475            memarg,
6476            true,
6477            1,
6478            need_check,
6479            imported_memories,
6480            offset,
6481            heap_access_oob,
6482            unaligned_atomic,
6483            |this, addr| {
6484                this.assembler.emit_lock_cmpxchg(
6485                    Size::S8,
6486                    Location::GPR(value),
6487                    Location::Memory(addr, 0),
6488                )?;
6489                this.assembler
6490                    .emit_movzx(Size::S8, Location::GPR(compare), Size::S64, ret)
6491            },
6492        )?;
6493        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6494        self.release_gpr(compare);
6495        Ok(())
6496    }
6497
6498    fn i64_atomic_cmpxchg_16u(
6499        &mut self,
6500        new: Location,
6501        cmp: Location,
6502        target: Location,
6503        memarg: &MemArg,
6504        ret: Location,
6505        need_check: bool,
6506        imported_memories: bool,
6507        offset: i32,
6508        heap_access_oob: Label,
6509        unaligned_atomic: Label,
6510    ) -> Result<(), CompileError> {
6511        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6512        let value = if cmp == Location::GPR(GPR::R14) {
6513            if new == Location::GPR(GPR::R13) {
6514                GPR::R12
6515            } else {
6516                GPR::R13
6517            }
6518        } else {
6519            GPR::R14
6520        };
6521        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6522        self.assembler
6523            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6524        self.assembler
6525            .emit_mov(Size::S64, new, Location::GPR(value))?;
6526
6527        self.memory_op(
6528            target,
6529            memarg,
6530            true,
6531            2,
6532            need_check,
6533            imported_memories,
6534            offset,
6535            heap_access_oob,
6536            unaligned_atomic,
6537            |this, addr| {
6538                this.assembler.emit_lock_cmpxchg(
6539                    Size::S16,
6540                    Location::GPR(value),
6541                    Location::Memory(addr, 0),
6542                )?;
6543                this.assembler
6544                    .emit_movzx(Size::S16, Location::GPR(compare), Size::S64, ret)
6545            },
6546        )?;
6547        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6548        self.release_gpr(compare);
6549        Ok(())
6550    }
6551
6552    fn i64_atomic_cmpxchg_32u(
6553        &mut self,
6554        new: Location,
6555        cmp: Location,
6556        target: Location,
6557        memarg: &MemArg,
6558        ret: Location,
6559        need_check: bool,
6560        imported_memories: bool,
6561        offset: i32,
6562        heap_access_oob: Label,
6563        unaligned_atomic: Label,
6564    ) -> Result<(), CompileError> {
6565        let compare = self.reserve_unused_temp_gpr(GPR::RAX);
6566        let value = if cmp == Location::GPR(GPR::R14) {
6567            if new == Location::GPR(GPR::R13) {
6568                GPR::R12
6569            } else {
6570                GPR::R13
6571            }
6572        } else {
6573            GPR::R14
6574        };
6575        self.assembler.emit_push(Size::S64, Location::GPR(value))?;
6576        self.assembler
6577            .emit_mov(Size::S64, cmp, Location::GPR(compare))?;
6578        self.assembler
6579            .emit_mov(Size::S64, new, Location::GPR(value))?;
6580
6581        self.memory_op(
6582            target,
6583            memarg,
6584            true,
6585            4,
6586            need_check,
6587            imported_memories,
6588            offset,
6589            heap_access_oob,
6590            unaligned_atomic,
6591            |this, addr| {
6592                this.assembler.emit_lock_cmpxchg(
6593                    Size::S32,
6594                    Location::GPR(value),
6595                    Location::Memory(addr, 0),
6596                )?;
6597                this.assembler
6598                    .emit_mov(Size::S32, Location::GPR(compare), ret)
6599            },
6600        )?;
6601        self.assembler.emit_pop(Size::S64, Location::GPR(value))?;
6602        self.release_gpr(compare);
6603        Ok(())
6604    }
6605
6606    fn f32_load(
6607        &mut self,
6608        addr: Location,
6609        memarg: &MemArg,
6610        ret: Location,
6611        need_check: bool,
6612        imported_memories: bool,
6613        offset: i32,
6614        heap_access_oob: Label,
6615        unaligned_atomic: Label,
6616    ) -> Result<(), CompileError> {
6617        self.memory_op(
6618            addr,
6619            memarg,
6620            false,
6621            4,
6622            need_check,
6623            imported_memories,
6624            offset,
6625            heap_access_oob,
6626            unaligned_atomic,
6627            |this, addr| {
6628                this.emit_relaxed_binop(
6629                    AssemblerX64::emit_mov,
6630                    Size::S32,
6631                    Location::Memory(addr, 0),
6632                    ret,
6633                )
6634            },
6635        )
6636    }
6637
6638    fn f32_save(
6639        &mut self,
6640        target_value: Location,
6641        memarg: &MemArg,
6642        target_addr: Location,
6643        canonicalize: bool,
6644        need_check: bool,
6645        imported_memories: bool,
6646        offset: i32,
6647        heap_access_oob: Label,
6648        unaligned_atomic: Label,
6649    ) -> Result<(), CompileError> {
6650        self.memory_op(
6651            target_addr,
6652            memarg,
6653            false,
6654            4,
6655            need_check,
6656            imported_memories,
6657            offset,
6658            heap_access_oob,
6659            unaligned_atomic,
6660            |this, addr| {
6661                if !canonicalize {
6662                    this.emit_relaxed_binop(
6663                        AssemblerX64::emit_mov,
6664                        Size::S32,
6665                        target_value,
6666                        Location::Memory(addr, 0),
6667                    )
6668                } else {
6669                    this.canonicalize_nan(Size::S32, target_value, Location::Memory(addr, 0))
6670                }
6671            },
6672        )
6673    }
6674
6675    fn f64_load(
6676        &mut self,
6677        addr: Location,
6678        memarg: &MemArg,
6679        ret: Location,
6680        need_check: bool,
6681        imported_memories: bool,
6682        offset: i32,
6683        heap_access_oob: Label,
6684        unaligned_atomic: Label,
6685    ) -> Result<(), CompileError> {
6686        self.memory_op(
6687            addr,
6688            memarg,
6689            false,
6690            8,
6691            need_check,
6692            imported_memories,
6693            offset,
6694            heap_access_oob,
6695            unaligned_atomic,
6696            |this, addr| {
6697                this.emit_relaxed_binop(
6698                    AssemblerX64::emit_mov,
6699                    Size::S64,
6700                    Location::Memory(addr, 0),
6701                    ret,
6702                )
6703            },
6704        )
6705    }
6706
6707    fn f64_save(
6708        &mut self,
6709        target_value: Location,
6710        memarg: &MemArg,
6711        target_addr: Location,
6712        canonicalize: bool,
6713        need_check: bool,
6714        imported_memories: bool,
6715        offset: i32,
6716        heap_access_oob: Label,
6717        unaligned_atomic: Label,
6718    ) -> Result<(), CompileError> {
6719        self.memory_op(
6720            target_addr,
6721            memarg,
6722            false,
6723            8,
6724            need_check,
6725            imported_memories,
6726            offset,
6727            heap_access_oob,
6728            unaligned_atomic,
6729            |this, addr| {
6730                if !canonicalize {
6731                    this.emit_relaxed_binop(
6732                        AssemblerX64::emit_mov,
6733                        Size::S64,
6734                        target_value,
6735                        Location::Memory(addr, 0),
6736                    )
6737                } else {
6738                    this.canonicalize_nan(Size::S64, target_value, Location::Memory(addr, 0))
6739                }
6740            },
6741        )
6742    }
6743
6744    fn convert_f64_i64(
6745        &mut self,
6746        loc: Location,
6747        signed: bool,
6748        ret: Location,
6749    ) -> Result<(), CompileError> {
6750        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6751            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6752        })?;
6753        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6754            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6755        })?;
6756        if self.assembler.arch_has_fconverti() {
6757            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6758            if signed {
6759                self.assembler.arch_emit_f64_convert_si64(tmp_in, tmp_out)?;
6760            } else {
6761                self.assembler.arch_emit_f64_convert_ui64(tmp_in, tmp_out)?;
6762            }
6763            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp_out), ret)?;
6764        } else if signed {
6765            self.assembler
6766                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6767            self.assembler
6768                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6769            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6770        } else {
6771            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6772                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6773            })?;
6774
6775            let do_convert = self.assembler.get_label();
6776            let end_convert = self.assembler.get_label();
6777
6778            self.assembler
6779                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6780            self.assembler.emit_test_gpr_64(tmp_in)?;
6781            self.assembler.emit_jmp(Condition::Signed, do_convert)?;
6782            self.assembler
6783                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6784            self.assembler.emit_jmp(Condition::None, end_convert)?;
6785            self.emit_label(do_convert)?;
6786            self.move_location(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp))?;
6787            self.assembler
6788                .emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp))?;
6789            self.assembler
6790                .emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in))?;
6791            self.assembler
6792                .emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in))?;
6793            self.assembler
6794                .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6795            self.assembler
6796                .emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out)?;
6797            self.emit_label(end_convert)?;
6798            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6799
6800            self.release_gpr(tmp);
6801        }
6802        self.release_gpr(tmp_in);
6803        self.release_simd(tmp_out);
6804        Ok(())
6805    }
6806
6807    fn convert_f64_i32(
6808        &mut self,
6809        loc: Location,
6810        signed: bool,
6811        ret: Location,
6812    ) -> Result<(), CompileError> {
6813        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6814            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6815        })?;
6816        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6817            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6818        })?;
6819        if self.assembler.arch_has_fconverti() {
6820            self.emit_relaxed_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6821            if signed {
6822                self.assembler.arch_emit_f64_convert_si32(tmp_in, tmp_out)?;
6823            } else {
6824                self.assembler.arch_emit_f64_convert_ui32(tmp_in, tmp_out)?;
6825            }
6826            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp_out), ret)?;
6827        } else {
6828            self.assembler
6829                .emit_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6830            if signed {
6831                self.assembler
6832                    .emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6833            } else {
6834                self.assembler
6835                    .emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6836            }
6837            self.move_location(Size::S64, Location::SIMD(tmp_out), ret)?;
6838        }
6839        self.release_gpr(tmp_in);
6840        self.release_simd(tmp_out);
6841        Ok(())
6842    }
6843
6844    fn convert_f32_i64(
6845        &mut self,
6846        loc: Location,
6847        signed: bool,
6848        ret: Location,
6849    ) -> Result<(), CompileError> {
6850        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6851            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6852        })?;
6853        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6854            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6855        })?;
6856        if self.assembler.arch_has_fconverti() {
6857            self.emit_relaxed_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6858            if signed {
6859                self.assembler.arch_emit_f32_convert_si64(tmp_in, tmp_out)?;
6860            } else {
6861                self.assembler.arch_emit_f32_convert_ui64(tmp_in, tmp_out)?;
6862            }
6863            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp_out), ret)?;
6864        } else if signed {
6865            self.assembler
6866                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6867            self.assembler
6868                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6869            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6870        } else {
6871            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
6872                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6873            })?;
6874
6875            let do_convert = self.assembler.get_label();
6876            let end_convert = self.assembler.get_label();
6877
6878            self.assembler
6879                .emit_mov(Size::S64, loc, Location::GPR(tmp_in))?;
6880            self.assembler.emit_test_gpr_64(tmp_in)?;
6881            self.assembler.emit_jmp(Condition::Signed, do_convert)?;
6882            self.assembler
6883                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6884            self.assembler.emit_jmp(Condition::None, end_convert)?;
6885            self.emit_label(do_convert)?;
6886            self.move_location(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp))?;
6887            self.assembler
6888                .emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp))?;
6889            self.assembler
6890                .emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in))?;
6891            self.assembler
6892                .emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in))?;
6893            self.assembler
6894                .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6895            self.assembler
6896                .emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out)?;
6897            self.emit_label(end_convert)?;
6898            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6899
6900            self.release_gpr(tmp);
6901        }
6902        self.release_gpr(tmp_in);
6903        self.release_simd(tmp_out);
6904        Ok(())
6905    }
6906
6907    fn convert_f32_i32(
6908        &mut self,
6909        loc: Location,
6910        signed: bool,
6911        ret: Location,
6912    ) -> Result<(), CompileError> {
6913        let tmp_out = self.acquire_temp_simd().ok_or_else(|| {
6914            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
6915        })?;
6916        let tmp_in = self.acquire_temp_gpr().ok_or_else(|| {
6917            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
6918        })?;
6919        if self.assembler.arch_has_fconverti() {
6920            self.emit_relaxed_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6921            if signed {
6922                self.assembler.arch_emit_f32_convert_si32(tmp_in, tmp_out)?;
6923            } else {
6924                self.assembler.arch_emit_f32_convert_ui32(tmp_in, tmp_out)?;
6925            }
6926            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp_out), ret)?;
6927        } else {
6928            self.assembler
6929                .emit_mov(Size::S32, loc, Location::GPR(tmp_in))?;
6930            if signed {
6931                self.assembler
6932                    .emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6933            } else {
6934                self.assembler
6935                    .emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out)?;
6936            }
6937            self.move_location(Size::S32, Location::SIMD(tmp_out), ret)?;
6938        }
6939        self.release_gpr(tmp_in);
6940        self.release_simd(tmp_out);
6941        Ok(())
6942    }
6943
6944    fn convert_i64_f64(
6945        &mut self,
6946        loc: Location,
6947        ret: Location,
6948        signed: bool,
6949        sat: bool,
6950    ) -> Result<(), CompileError> {
6951        match (signed, sat) {
6952            (false, true) => self.convert_i64_f64_u_s(loc, ret),
6953            (false, false) => self.convert_i64_f64_u_u(loc, ret),
6954            (true, true) => self.convert_i64_f64_s_s(loc, ret),
6955            (true, false) => self.convert_i64_f64_s_u(loc, ret),
6956        }
6957    }
6958
6959    fn convert_i32_f64(
6960        &mut self,
6961        loc: Location,
6962        ret: Location,
6963        signed: bool,
6964        sat: bool,
6965    ) -> Result<(), CompileError> {
6966        match (signed, sat) {
6967            (false, true) => self.convert_i32_f64_u_s(loc, ret),
6968            (false, false) => self.convert_i32_f64_u_u(loc, ret),
6969            (true, true) => self.convert_i32_f64_s_s(loc, ret),
6970            (true, false) => self.convert_i32_f64_s_u(loc, ret),
6971        }
6972    }
6973
6974    fn convert_i64_f32(
6975        &mut self,
6976        loc: Location,
6977        ret: Location,
6978        signed: bool,
6979        sat: bool,
6980    ) -> Result<(), CompileError> {
6981        match (signed, sat) {
6982            (false, true) => self.convert_i64_f32_u_s(loc, ret),
6983            (false, false) => self.convert_i64_f32_u_u(loc, ret),
6984            (true, true) => self.convert_i64_f32_s_s(loc, ret),
6985            (true, false) => self.convert_i64_f32_s_u(loc, ret),
6986        }
6987    }
6988
6989    fn convert_i32_f32(
6990        &mut self,
6991        loc: Location,
6992        ret: Location,
6993        signed: bool,
6994        sat: bool,
6995    ) -> Result<(), CompileError> {
6996        match (signed, sat) {
6997            (false, true) => self.convert_i32_f32_u_s(loc, ret),
6998            (false, false) => self.convert_i32_f32_u_u(loc, ret),
6999            (true, true) => self.convert_i32_f32_s_s(loc, ret),
7000            (true, false) => self.convert_i32_f32_s_u(loc, ret),
7001        }
7002    }
7003
7004    fn convert_f64_f32(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7005        self.emit_relaxed_avx(AssemblerX64::emit_vcvtss2sd, loc, loc, ret)
7006    }
7007
7008    fn convert_f32_f64(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7009        self.emit_relaxed_avx(AssemblerX64::emit_vcvtsd2ss, loc, loc, ret)
7010    }
7011
7012    fn f64_neg(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7013        if self.assembler.arch_has_fneg() {
7014            let tmp = self.acquire_temp_simd().ok_or_else(|| {
7015                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7016            })?;
7017            self.emit_relaxed_mov(Size::S64, loc, Location::SIMD(tmp))?;
7018            self.assembler.arch_emit_f64_neg(tmp, tmp)?;
7019            self.emit_relaxed_mov(Size::S64, Location::SIMD(tmp), ret)?;
7020            self.release_simd(tmp);
7021        } else {
7022            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7023                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7024            })?;
7025            self.move_location(Size::S64, loc, Location::GPR(tmp))?;
7026            self.assembler.emit_btc_gpr_imm8_64(63, tmp)?;
7027            self.move_location(Size::S64, Location::GPR(tmp), ret)?;
7028            self.release_gpr(tmp);
7029        }
7030        Ok(())
7031    }
7032
7033    fn f64_abs(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7034        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7035            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7036        })?;
7037        let c = self.acquire_temp_gpr().ok_or_else(|| {
7038            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7039        })?;
7040
7041        self.move_location(Size::S64, loc, Location::GPR(tmp))?;
7042        self.move_location(
7043            Size::S64,
7044            Location::Imm64(0x7fffffffffffffffu64),
7045            Location::GPR(c),
7046        )?;
7047        self.assembler
7048            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp))?;
7049        self.move_location(Size::S64, Location::GPR(tmp), ret)?;
7050
7051        self.release_gpr(c);
7052        self.release_gpr(tmp);
7053        Ok(())
7054    }
7055
7056    fn emit_i64_copysign(&mut self, tmp1: GPR, tmp2: GPR) -> Result<(), CompileError> {
7057        let c = self.acquire_temp_gpr().ok_or_else(|| {
7058            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7059        })?;
7060
7061        self.move_location(
7062            Size::S64,
7063            Location::Imm64(0x7fffffffffffffffu64),
7064            Location::GPR(c),
7065        )?;
7066        self.assembler
7067            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp1))?;
7068
7069        self.move_location(
7070            Size::S64,
7071            Location::Imm64(0x8000000000000000u64),
7072            Location::GPR(c),
7073        )?;
7074        self.assembler
7075            .emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp2))?;
7076
7077        self.assembler
7078            .emit_or(Size::S64, Location::GPR(tmp2), Location::GPR(tmp1))?;
7079
7080        self.release_gpr(c);
7081        Ok(())
7082    }
7083
7084    fn f64_sqrt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7085        self.emit_relaxed_avx(AssemblerX64::emit_vsqrtsd, loc, loc, ret)
7086    }
7087
7088    fn f64_trunc(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7089        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_trunc, loc, loc, ret)
7090    }
7091
7092    fn f64_ceil(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7093        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_ceil, loc, loc, ret)
7094    }
7095
7096    fn f64_floor(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7097        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_floor, loc, loc, ret)
7098    }
7099
7100    fn f64_nearest(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7101        self.emit_relaxed_avx(AssemblerX64::emit_vroundsd_nearest, loc, loc, ret)
7102    }
7103
7104    fn f64_cmp_ge(
7105        &mut self,
7106        loc_a: Location,
7107        loc_b: Location,
7108        ret: Location,
7109    ) -> Result<(), CompileError> {
7110        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgesd, loc_a, loc_b, ret)?;
7111        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7112    }
7113
7114    fn f64_cmp_gt(
7115        &mut self,
7116        loc_a: Location,
7117        loc_b: Location,
7118        ret: Location,
7119    ) -> Result<(), CompileError> {
7120        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtsd, loc_a, loc_b, ret)?;
7121        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7122    }
7123
7124    fn f64_cmp_le(
7125        &mut self,
7126        loc_a: Location,
7127        loc_b: Location,
7128        ret: Location,
7129    ) -> Result<(), CompileError> {
7130        self.emit_relaxed_avx(AssemblerX64::emit_vcmplesd, loc_a, loc_b, ret)?;
7131        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7132    }
7133
7134    fn f64_cmp_lt(
7135        &mut self,
7136        loc_a: Location,
7137        loc_b: Location,
7138        ret: Location,
7139    ) -> Result<(), CompileError> {
7140        self.emit_relaxed_avx(AssemblerX64::emit_vcmpltsd, loc_a, loc_b, ret)?;
7141        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7142    }
7143
7144    fn f64_cmp_ne(
7145        &mut self,
7146        loc_a: Location,
7147        loc_b: Location,
7148        ret: Location,
7149    ) -> Result<(), CompileError> {
7150        self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqsd, loc_a, loc_b, ret)?;
7151        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7152    }
7153
7154    fn f64_cmp_eq(
7155        &mut self,
7156        loc_a: Location,
7157        loc_b: Location,
7158        ret: Location,
7159    ) -> Result<(), CompileError> {
7160        self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqsd, loc_a, loc_b, ret)?;
7161        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7162    }
7163
7164    fn f64_min(
7165        &mut self,
7166        loc_a: Location,
7167        loc_b: Location,
7168        ret: Location,
7169    ) -> Result<(), CompileError> {
7170        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7171        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7172            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7173        })?;
7174        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7175            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7176        })?;
7177        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7178            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7179        })?;
7180        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7181            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7182        })?;
7183
7184        let src1 = match loc_a {
7185            Location::SIMD(x) => x,
7186            Location::GPR(_) | Location::Memory(_, _) => {
7187                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7188                tmp1
7189            }
7190            Location::Imm32(_) => {
7191                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7192                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7193                tmp1
7194            }
7195            Location::Imm64(_) => {
7196                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7197                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7198                tmp1
7199            }
7200            _ => {
7201                codegen_error!("singlepass f64_min unreachable");
7202            }
7203        };
7204        let src2 = match loc_b {
7205            Location::SIMD(x) => x,
7206            Location::GPR(_) | Location::Memory(_, _) => {
7207                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7208                tmp2
7209            }
7210            Location::Imm32(_) => {
7211                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7212                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7213                tmp2
7214            }
7215            Location::Imm64(_) => {
7216                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7217                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7218                tmp2
7219            }
7220            _ => {
7221                codegen_error!("singlepass f64_min unreachable");
7222            }
7223        };
7224
7225        let tmp_xmm1 = XMM::XMM8;
7226        let tmp_xmm2 = XMM::XMM9;
7227        let tmp_xmm3 = XMM::XMM10;
7228
7229        self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1))?;
7230        self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2))?;
7231        self.assembler
7232            .emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7233        self.assembler
7234            .emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7235        let label1 = self.assembler.get_label();
7236        let label2 = self.assembler.get_label();
7237        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7238        self.assembler
7239            .emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7240        self.assembler.emit_jmp(Condition::None, label2)?;
7241        self.emit_label(label1)?;
7242        // load float -0.0
7243        self.move_location(
7244            Size::S64,
7245            Location::Imm64(0x8000_0000_0000_0000), // Negative zero
7246            Location::GPR(tmpg1),
7247        )?;
7248        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2))?;
7249        self.emit_label(label2)?;
7250        self.assembler
7251            .emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7252        self.assembler
7253            .emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7254        self.assembler
7255            .emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1)?;
7256        // load float canonical nan
7257        self.move_location(
7258            Size::S64,
7259            Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
7260            Location::GPR(tmpg1),
7261        )?;
7262        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7263        self.assembler
7264            .emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7265        match ret {
7266            Location::SIMD(x) => {
7267                self.assembler
7268                    .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7269            }
7270            Location::Memory(_, _) | Location::GPR(_) => {
7271                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7272            }
7273            _ => {
7274                codegen_error!("singlepass f64_min unreachable");
7275            }
7276        }
7277
7278        self.release_gpr(tmpg2);
7279        self.release_gpr(tmpg1);
7280        self.release_simd(tmp2);
7281        self.release_simd(tmp1);
7282        Ok(())
7283    }
7284
7285    fn f64_max(
7286        &mut self,
7287        loc_a: Location,
7288        loc_b: Location,
7289        ret: Location,
7290    ) -> Result<(), CompileError> {
7291        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7292        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7293            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7294        })?;
7295        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7296            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7297        })?;
7298        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7299            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7300        })?;
7301        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7302            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7303        })?;
7304
7305        let src1 = match loc_a {
7306            Location::SIMD(x) => x,
7307            Location::GPR(_) | Location::Memory(_, _) => {
7308                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7309                tmp1
7310            }
7311            Location::Imm32(_) => {
7312                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7313                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7314                tmp1
7315            }
7316            Location::Imm64(_) => {
7317                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7318                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7319                tmp1
7320            }
7321            _ => {
7322                codegen_error!("singlepass f64_max unreachable");
7323            }
7324        };
7325        let src2 = match loc_b {
7326            Location::SIMD(x) => x,
7327            Location::GPR(_) | Location::Memory(_, _) => {
7328                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7329                tmp2
7330            }
7331            Location::Imm32(_) => {
7332                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7333                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7334                tmp2
7335            }
7336            Location::Imm64(_) => {
7337                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7338                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7339                tmp2
7340            }
7341            _ => {
7342                codegen_error!("singlepass f64_max unreachable");
7343            }
7344        };
7345
7346        let tmp_xmm1 = XMM::XMM8;
7347        let tmp_xmm2 = XMM::XMM9;
7348        let tmp_xmm3 = XMM::XMM10;
7349
7350        self.move_location(Size::S64, Location::SIMD(src1), Location::GPR(tmpg1))?;
7351        self.move_location(Size::S64, Location::SIMD(src2), Location::GPR(tmpg2))?;
7352        self.assembler
7353            .emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7354        self.assembler
7355            .emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7356        let label1 = self.assembler.get_label();
7357        let label2 = self.assembler.get_label();
7358        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7359        self.assembler
7360            .emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7361        self.assembler.emit_jmp(Condition::None, label2)?;
7362        self.emit_label(label1)?;
7363        self.assembler
7364            .emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2)?;
7365        self.emit_label(label2)?;
7366        self.assembler
7367            .emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7368        self.assembler
7369            .emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7370        self.assembler
7371            .emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1)?;
7372        // load float canonical nan
7373        self.move_location(
7374            Size::S64,
7375            Location::Imm64(0x7FF8_0000_0000_0000), // Canonical NaN
7376            Location::GPR(tmpg1),
7377        )?;
7378        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7379        self.assembler
7380            .emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7381        match ret {
7382            Location::SIMD(x) => {
7383                self.assembler
7384                    .emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7385            }
7386            Location::Memory(_, _) | Location::GPR(_) => {
7387                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7388            }
7389            _ => {
7390                codegen_error!("singlepass f64_max unreachable");
7391            }
7392        }
7393
7394        self.release_gpr(tmpg2);
7395        self.release_gpr(tmpg1);
7396        self.release_simd(tmp2);
7397        self.release_simd(tmp1);
7398        Ok(())
7399    }
7400
7401    fn f64_add(
7402        &mut self,
7403        loc_a: Location,
7404        loc_b: Location,
7405        ret: Location,
7406    ) -> Result<(), CompileError> {
7407        self.emit_relaxed_avx(AssemblerX64::emit_vaddsd, loc_a, loc_b, ret)
7408    }
7409
7410    fn f64_sub(
7411        &mut self,
7412        loc_a: Location,
7413        loc_b: Location,
7414        ret: Location,
7415    ) -> Result<(), CompileError> {
7416        self.emit_relaxed_avx(AssemblerX64::emit_vsubsd, loc_a, loc_b, ret)
7417    }
7418
7419    fn f64_mul(
7420        &mut self,
7421        loc_a: Location,
7422        loc_b: Location,
7423        ret: Location,
7424    ) -> Result<(), CompileError> {
7425        self.emit_relaxed_avx(AssemblerX64::emit_vmulsd, loc_a, loc_b, ret)
7426    }
7427
7428    fn f64_div(
7429        &mut self,
7430        loc_a: Location,
7431        loc_b: Location,
7432        ret: Location,
7433    ) -> Result<(), CompileError> {
7434        self.emit_relaxed_avx(AssemblerX64::emit_vdivsd, loc_a, loc_b, ret)
7435    }
7436
7437    fn f32_neg(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7438        if self.assembler.arch_has_fneg() {
7439            let tmp = self.acquire_temp_simd().ok_or_else(|| {
7440                CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7441            })?;
7442            self.emit_relaxed_mov(Size::S32, loc, Location::SIMD(tmp))?;
7443            self.assembler.arch_emit_f32_neg(tmp, tmp)?;
7444            self.emit_relaxed_mov(Size::S32, Location::SIMD(tmp), ret)?;
7445            self.release_simd(tmp);
7446        } else {
7447            let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7448                CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7449            })?;
7450            self.move_location(Size::S32, loc, Location::GPR(tmp))?;
7451            self.assembler.emit_btc_gpr_imm8_32(31, tmp)?;
7452            self.move_location(Size::S32, Location::GPR(tmp), ret)?;
7453            self.release_gpr(tmp);
7454        }
7455        Ok(())
7456    }
7457
7458    fn f32_abs(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7459        let tmp = self.acquire_temp_gpr().ok_or_else(|| {
7460            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7461        })?;
7462        self.move_location(Size::S32, loc, Location::GPR(tmp))?;
7463        self.assembler.emit_and(
7464            Size::S32,
7465            Location::Imm32(0x7fffffffu32),
7466            Location::GPR(tmp),
7467        )?;
7468        self.move_location(Size::S32, Location::GPR(tmp), ret)?;
7469        self.release_gpr(tmp);
7470        Ok(())
7471    }
7472
7473    fn emit_i32_copysign(&mut self, tmp1: GPR, tmp2: GPR) -> Result<(), CompileError> {
7474        self.assembler.emit_and(
7475            Size::S32,
7476            Location::Imm32(0x7fffffffu32),
7477            Location::GPR(tmp1),
7478        )?;
7479        self.assembler.emit_and(
7480            Size::S32,
7481            Location::Imm32(0x80000000u32),
7482            Location::GPR(tmp2),
7483        )?;
7484        self.assembler
7485            .emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1))
7486    }
7487
7488    fn f32_sqrt(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7489        self.emit_relaxed_avx(AssemblerX64::emit_vsqrtss, loc, loc, ret)
7490    }
7491
7492    fn f32_trunc(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7493        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_trunc, loc, loc, ret)
7494    }
7495
7496    fn f32_ceil(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7497        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_ceil, loc, loc, ret)
7498    }
7499
7500    fn f32_floor(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7501        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_floor, loc, loc, ret)
7502    }
7503
7504    fn f32_nearest(&mut self, loc: Location, ret: Location) -> Result<(), CompileError> {
7505        self.emit_relaxed_avx(AssemblerX64::emit_vroundss_nearest, loc, loc, ret)
7506    }
7507
7508    fn f32_cmp_ge(
7509        &mut self,
7510        loc_a: Location,
7511        loc_b: Location,
7512        ret: Location,
7513    ) -> Result<(), CompileError> {
7514        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgess, loc_a, loc_b, ret)?;
7515        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7516    }
7517
7518    fn f32_cmp_gt(
7519        &mut self,
7520        loc_a: Location,
7521        loc_b: Location,
7522        ret: Location,
7523    ) -> Result<(), CompileError> {
7524        self.emit_relaxed_avx(AssemblerX64::emit_vcmpgtss, loc_a, loc_b, ret)?;
7525        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7526    }
7527
7528    fn f32_cmp_le(
7529        &mut self,
7530        loc_a: Location,
7531        loc_b: Location,
7532        ret: Location,
7533    ) -> Result<(), CompileError> {
7534        self.emit_relaxed_avx(AssemblerX64::emit_vcmpless, loc_a, loc_b, ret)?;
7535        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7536    }
7537
7538    fn f32_cmp_lt(
7539        &mut self,
7540        loc_a: Location,
7541        loc_b: Location,
7542        ret: Location,
7543    ) -> Result<(), CompileError> {
7544        self.emit_relaxed_avx(AssemblerX64::emit_vcmpltss, loc_a, loc_b, ret)?;
7545        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7546    }
7547
7548    fn f32_cmp_ne(
7549        &mut self,
7550        loc_a: Location,
7551        loc_b: Location,
7552        ret: Location,
7553    ) -> Result<(), CompileError> {
7554        self.emit_relaxed_avx(AssemblerX64::emit_vcmpneqss, loc_a, loc_b, ret)?;
7555        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7556    }
7557
7558    fn f32_cmp_eq(
7559        &mut self,
7560        loc_a: Location,
7561        loc_b: Location,
7562        ret: Location,
7563    ) -> Result<(), CompileError> {
7564        self.emit_relaxed_avx(AssemblerX64::emit_vcmpeqss, loc_a, loc_b, ret)?;
7565        self.assembler.emit_and(Size::S32, Location::Imm32(1), ret)
7566    }
7567
7568    fn f32_min(
7569        &mut self,
7570        loc_a: Location,
7571        loc_b: Location,
7572        ret: Location,
7573    ) -> Result<(), CompileError> {
7574        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7575        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7576            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7577        })?;
7578        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7579            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7580        })?;
7581        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7582            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7583        })?;
7584        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7585            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7586        })?;
7587
7588        let src1 = match loc_a {
7589            Location::SIMD(x) => x,
7590            Location::GPR(_) | Location::Memory(_, _) => {
7591                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7592                tmp1
7593            }
7594            Location::Imm32(_) => {
7595                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7596                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7597                tmp1
7598            }
7599            Location::Imm64(_) => {
7600                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7601                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7602                tmp1
7603            }
7604            _ => {
7605                codegen_error!("singlepass f32_min unreachable");
7606            }
7607        };
7608        let src2 = match loc_b {
7609            Location::SIMD(x) => x,
7610            Location::GPR(_) | Location::Memory(_, _) => {
7611                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7612                tmp2
7613            }
7614            Location::Imm32(_) => {
7615                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7616                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7617                tmp2
7618            }
7619            Location::Imm64(_) => {
7620                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7621                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7622                tmp2
7623            }
7624            _ => {
7625                codegen_error!("singlepass f32_min unreachable");
7626            }
7627        };
7628
7629        let tmp_xmm1 = XMM::XMM8;
7630        let tmp_xmm2 = XMM::XMM9;
7631        let tmp_xmm3 = XMM::XMM10;
7632
7633        self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1))?;
7634        self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2))?;
7635        self.assembler
7636            .emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7637        self.assembler
7638            .emit_vminss(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7639        let label1 = self.assembler.get_label();
7640        let label2 = self.assembler.get_label();
7641        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7642        self.assembler
7643            .emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7644        self.assembler.emit_jmp(Condition::None, label2)?;
7645        self.emit_label(label1)?;
7646        // load float -0.0
7647        self.move_location(
7648            Size::S64,
7649            Location::Imm32(0x8000_0000), // Negative zero
7650            Location::GPR(tmpg1),
7651        )?;
7652        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp_xmm2))?;
7653        self.emit_label(label2)?;
7654        self.assembler
7655            .emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7656        self.assembler
7657            .emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7658        self.assembler
7659            .emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1)?;
7660        // load float canonical nan
7661        self.move_location(
7662            Size::S64,
7663            Location::Imm32(0x7FC0_0000), // Canonical NaN
7664            Location::GPR(tmpg1),
7665        )?;
7666        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7667        self.assembler
7668            .emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7669        match ret {
7670            Location::SIMD(x) => {
7671                self.assembler
7672                    .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7673            }
7674            Location::Memory(_, _) | Location::GPR(_) => {
7675                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7676            }
7677            _ => {
7678                codegen_error!("singlepass f32_min unreachable");
7679            }
7680        }
7681
7682        self.release_gpr(tmpg2);
7683        self.release_gpr(tmpg1);
7684        self.release_simd(tmp2);
7685        self.release_simd(tmp1);
7686        Ok(())
7687    }
7688
7689    fn f32_max(
7690        &mut self,
7691        loc_a: Location,
7692        loc_b: Location,
7693        ret: Location,
7694    ) -> Result<(), CompileError> {
7695        // Canonicalize the result to differentiate arithmetic NaNs from canonical NaNs.
7696        let tmp1 = self.acquire_temp_simd().ok_or_else(|| {
7697            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7698        })?;
7699        let tmp2 = self.acquire_temp_simd().ok_or_else(|| {
7700            CompileError::Codegen("singlepass cannot acquire temp simd".to_owned())
7701        })?;
7702        let tmpg1 = self.acquire_temp_gpr().ok_or_else(|| {
7703            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7704        })?;
7705        let tmpg2 = self.acquire_temp_gpr().ok_or_else(|| {
7706            CompileError::Codegen("singlepass cannot acquire temp gpr".to_owned())
7707        })?;
7708
7709        let src1 = match loc_a {
7710            Location::SIMD(x) => x,
7711            Location::GPR(_) | Location::Memory(_, _) => {
7712                self.move_location(Size::S64, loc_a, Location::SIMD(tmp1))?;
7713                tmp1
7714            }
7715            Location::Imm32(_) => {
7716                self.move_location(Size::S32, loc_a, Location::GPR(tmpg1))?;
7717                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7718                tmp1
7719            }
7720            Location::Imm64(_) => {
7721                self.move_location(Size::S64, loc_a, Location::GPR(tmpg1))?;
7722                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp1))?;
7723                tmp1
7724            }
7725            _ => {
7726                codegen_error!("singlepass f32_max unreachable");
7727            }
7728        };
7729        let src2 = match loc_b {
7730            Location::SIMD(x) => x,
7731            Location::GPR(_) | Location::Memory(_, _) => {
7732                self.move_location(Size::S64, loc_b, Location::SIMD(tmp2))?;
7733                tmp2
7734            }
7735            Location::Imm32(_) => {
7736                self.move_location(Size::S32, loc_b, Location::GPR(tmpg1))?;
7737                self.move_location(Size::S32, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7738                tmp2
7739            }
7740            Location::Imm64(_) => {
7741                self.move_location(Size::S64, loc_b, Location::GPR(tmpg1))?;
7742                self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(tmp2))?;
7743                tmp2
7744            }
7745            _ => {
7746                codegen_error!("singlepass f32_max unreachable");
7747            }
7748        };
7749
7750        let tmp_xmm1 = XMM::XMM8;
7751        let tmp_xmm2 = XMM::XMM9;
7752        let tmp_xmm3 = XMM::XMM10;
7753
7754        self.move_location(Size::S32, Location::SIMD(src1), Location::GPR(tmpg1))?;
7755        self.move_location(Size::S32, Location::SIMD(src2), Location::GPR(tmpg2))?;
7756        self.assembler
7757            .emit_cmp(Size::S32, Location::GPR(tmpg2), Location::GPR(tmpg1))?;
7758        self.assembler
7759            .emit_vmaxss(src1, XMMOrMemory::XMM(src2), tmp_xmm1)?;
7760        let label1 = self.assembler.get_label();
7761        let label2 = self.assembler.get_label();
7762        self.assembler.emit_jmp(Condition::NotEqual, label1)?;
7763        self.assembler
7764            .emit_vmovaps(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2))?;
7765        self.assembler.emit_jmp(Condition::None, label2)?;
7766        self.emit_label(label1)?;
7767        self.assembler
7768            .emit_vxorps(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2)?;
7769        self.emit_label(label2)?;
7770        self.assembler
7771            .emit_vcmpeqss(src1, XMMOrMemory::XMM(src2), tmp_xmm3)?;
7772        self.assembler
7773            .emit_vblendvps(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1)?;
7774        self.assembler
7775            .emit_vcmpunordss(src1, XMMOrMemory::XMM(src2), src1)?;
7776        // load float canonical nan
7777        self.move_location(
7778            Size::S64,
7779            Location::Imm32(0x7FC0_0000), // Canonical NaN
7780            Location::GPR(tmpg1),
7781        )?;
7782        self.move_location(Size::S64, Location::GPR(tmpg1), Location::SIMD(src2))?;
7783        self.assembler
7784            .emit_vblendvps(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1)?;
7785        match ret {
7786            Location::SIMD(x) => {
7787                self.assembler
7788                    .emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x))?;
7789            }
7790            Location::Memory(_, _) | Location::GPR(_) => {
7791                self.move_location(Size::S64, Location::SIMD(src1), ret)?;
7792            }
7793            _ => {
7794                codegen_error!("singlepass f32_max unreachable");
7795            }
7796        }
7797
7798        self.release_gpr(tmpg2);
7799        self.release_gpr(tmpg1);
7800        self.release_simd(tmp2);
7801        self.release_simd(tmp1);
7802        Ok(())
7803    }
7804
7805    fn f32_add(
7806        &mut self,
7807        loc_a: Location,
7808        loc_b: Location,
7809        ret: Location,
7810    ) -> Result<(), CompileError> {
7811        self.emit_relaxed_avx(AssemblerX64::emit_vaddss, loc_a, loc_b, ret)
7812    }
7813
7814    fn f32_sub(
7815        &mut self,
7816        loc_a: Location,
7817        loc_b: Location,
7818        ret: Location,
7819    ) -> Result<(), CompileError> {
7820        self.emit_relaxed_avx(AssemblerX64::emit_vsubss, loc_a, loc_b, ret)
7821    }
7822
7823    fn f32_mul(
7824        &mut self,
7825        loc_a: Location,
7826        loc_b: Location,
7827        ret: Location,
7828    ) -> Result<(), CompileError> {
7829        self.emit_relaxed_avx(AssemblerX64::emit_vmulss, loc_a, loc_b, ret)
7830    }
7831
7832    fn f32_div(
7833        &mut self,
7834        loc_a: Location,
7835        loc_b: Location,
7836        ret: Location,
7837    ) -> Result<(), CompileError> {
7838        self.emit_relaxed_avx(AssemblerX64::emit_vdivss, loc_a, loc_b, ret)
7839    }
7840
7841    fn gen_std_trampoline(
7842        &self,
7843        sig: &FunctionType,
7844        calling_convention: CallingConvention,
7845    ) -> Result<FunctionBody, CompileError> {
7846        // the cpu feature here is irrelevant
7847        let mut a = AssemblerX64::new(0, None)?;
7848
7849        // Calculate stack offset (+1 for the vmctx argument we are going to pass).
7850        let stack_params = (0..sig.params().len() + 1)
7851            .filter(|&i| {
7852                self.get_param_registers(calling_convention)
7853                    .get(i)
7854                    .is_none()
7855            })
7856            .count();
7857        let stack_return_slots = sig
7858            .results()
7859            .len()
7860            .saturating_sub(X86_64_RETURN_VALUE_REGISTERS.len());
7861
7862        // Stack slots are not shared in between function params and return values.
7863        let mut stack_offset = 8 * (stack_params + stack_return_slots) as u32;
7864        let stack_padding: u32 = match calling_convention {
7865            CallingConvention::WindowsFastcall => 32,
7866            _ => 0,
7867        };
7868
7869        // Align to 16 bytes. We push two 8-byte registers below, so here we need to ensure stack_offset % 16 == 8.
7870        if stack_offset % 16 != 8 {
7871            stack_offset += 8;
7872        }
7873
7874        // Used callee-saved registers
7875        a.emit_push(Size::S64, Location::GPR(GPR::R15))?;
7876        a.emit_push(Size::S64, Location::GPR(GPR::R14))?;
7877
7878        // Prepare stack space.
7879        a.emit_sub(
7880            Size::S64,
7881            Location::Imm32(stack_offset + stack_padding),
7882            Location::GPR(GPR::RSP),
7883        )?;
7884
7885        // Arguments
7886        a.emit_mov(
7887            Size::S64,
7888            Location::GPR(self.get_simple_param_location(1, calling_convention)),
7889            Location::GPR(GPR::R15),
7890        )?; // func_ptr
7891        a.emit_mov(
7892            Size::S64,
7893            Location::GPR(self.get_simple_param_location(2, calling_convention)),
7894            Location::GPR(GPR::R14),
7895        )?; // args_rets
7896
7897        // Move arguments to their locations.
7898        // `callee_vmctx` is already in the first argument register, so no need to move.
7899        {
7900            let mut n_stack_args = 0u32;
7901            for (i, _param) in sig.params().iter().enumerate() {
7902                let src_loc = Location::Memory(GPR::R14, (i * 16) as _); // args_rets[i]
7903                let dst_loc = self.get_param_registers(calling_convention).get(1 + i);
7904
7905                match dst_loc {
7906                    Some(&gpr) => {
7907                        a.emit_mov(Size::S64, src_loc, Location::GPR(gpr))?;
7908                    }
7909                    None => {
7910                        // This location is for reading arguments but we are writing arguments here.
7911                        // So recalculate it.
7912                        a.emit_mov(Size::S64, src_loc, Location::GPR(GPR::RAX))?;
7913                        a.emit_mov(
7914                            Size::S64,
7915                            Location::GPR(GPR::RAX),
7916                            Location::Memory(
7917                                GPR::RSP,
7918                                (stack_padding + (n_stack_args + stack_return_slots as u32) * 8)
7919                                    as _,
7920                            ),
7921                        )?;
7922                        n_stack_args += 1;
7923                    }
7924                }
7925            }
7926        }
7927
7928        // Call.
7929        a.emit_call_location(Location::GPR(GPR::R15))?;
7930
7931        // Write return values.
7932        let mut n_stack_return_slots: usize = 0;
7933        for i in 0..sig.results().len() {
7934            let src = if let Some(&reg) = X86_64_RETURN_VALUE_REGISTERS.get(i) {
7935                Location::GPR(reg)
7936            } else {
7937                let loc = Location::GPR(GPR::R15);
7938                a.emit_mov(
7939                    Size::S64,
7940                    Location::Memory(
7941                        GPR::RSP,
7942                        (stack_padding + (n_stack_return_slots as u32 * 8)) as _,
7943                    ),
7944                    loc,
7945                )?;
7946                n_stack_return_slots += 1;
7947                loc
7948            };
7949            a.emit_mov(Size::S64, src, Location::Memory(GPR::R14, (i * 16) as _))?;
7950        }
7951
7952        // Restore stack.
7953        a.emit_add(
7954            Size::S64,
7955            Location::Imm32(stack_offset + stack_padding),
7956            Location::GPR(GPR::RSP),
7957        )?;
7958
7959        // Restore callee-saved registers.
7960        a.emit_pop(Size::S64, Location::GPR(GPR::R14))?;
7961        a.emit_pop(Size::S64, Location::GPR(GPR::R15))?;
7962
7963        a.emit_ret()?;
7964
7965        let mut body = a.finalize().unwrap();
7966        body.shrink_to_fit();
7967
7968        Ok(FunctionBody {
7969            body,
7970            unwind_info: None,
7971        })
7972    }
7973    // Generates dynamic import function call trampoline for a function type.
7974
7975    fn gen_std_dynamic_import_trampoline(
7976        &self,
7977        vmoffsets: &VMOffsets,
7978        sig: &FunctionType,
7979        calling_convention: CallingConvention,
7980    ) -> Result<FunctionBody, CompileError> {
7981        // the cpu feature here is irrelevant
7982        let mut a = AssemblerX64::new(0, None)?;
7983
7984        // Allocate argument array.
7985        let stack_offset: usize = 16 * std::cmp::max(sig.params().len(), sig.results().len()) + 8; // 16 bytes each + 8 bytes sysv call padding
7986        let stack_padding: usize = match calling_convention {
7987            CallingConvention::WindowsFastcall => 32,
7988            _ => 0,
7989        };
7990        a.emit_sub(
7991            Size::S64,
7992            Location::Imm32((stack_offset + stack_padding) as _),
7993            Location::GPR(GPR::RSP),
7994        )?;
7995
7996        // Copy arguments.
7997        if !sig.params().is_empty() {
7998            let mut argalloc = ArgumentRegisterAllocator::default();
7999            argalloc.next(Type::I64, calling_convention).unwrap(); // skip VMContext
8000
8001            let mut stack_param_count: usize = 0;
8002
8003            for (i, ty) in sig.params().iter().enumerate() {
8004                let source_loc = match argalloc.next(*ty, calling_convention)? {
8005                    Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
8006                    Some(X64Register::XMM(xmm)) => Location::SIMD(xmm),
8007                    None => {
8008                        a.emit_mov(
8009                            Size::S64,
8010                            Location::Memory(
8011                                GPR::RSP,
8012                                (stack_padding * 2 + stack_offset + 8 + stack_param_count * 8) as _,
8013                            ),
8014                            Location::GPR(GPR::RAX),
8015                        )?;
8016                        stack_param_count += 1;
8017                        Location::GPR(GPR::RAX)
8018                    }
8019                };
8020                a.emit_mov(
8021                    Size::S64,
8022                    source_loc,
8023                    Location::Memory(GPR::RSP, (stack_padding + i * 16) as _),
8024                )?;
8025
8026                // Zero upper 64 bits.
8027                a.emit_mov(
8028                    Size::S64,
8029                    Location::Imm32(0),
8030                    Location::Memory(GPR::RSP, (stack_padding + i * 16 + 8) as _),
8031                )?;
8032            }
8033        }
8034
8035        match calling_convention {
8036            CallingConvention::WindowsFastcall => {
8037                // Load target address.
8038                a.emit_mov(
8039                    Size::S64,
8040                    Location::Memory(
8041                        GPR::RCX,
8042                        vmoffsets.vmdynamicfunction_import_context_address() as i32,
8043                    ),
8044                    Location::GPR(GPR::RAX),
8045                )?;
8046                // Load values array.
8047                a.emit_lea(
8048                    Size::S64,
8049                    Location::Memory(GPR::RSP, stack_padding as i32),
8050                    Location::GPR(GPR::RDX),
8051                )?;
8052            }
8053            _ => {
8054                // Load target address.
8055                a.emit_mov(
8056                    Size::S64,
8057                    Location::Memory(
8058                        GPR::RDI,
8059                        vmoffsets.vmdynamicfunction_import_context_address() as i32,
8060                    ),
8061                    Location::GPR(GPR::RAX),
8062                )?;
8063                // Load values array.
8064                a.emit_mov(Size::S64, Location::GPR(GPR::RSP), Location::GPR(GPR::RSI))?;
8065            }
8066        };
8067
8068        // Call target.
8069        a.emit_call_location(Location::GPR(GPR::RAX))?;
8070
8071        // Fetch return value.
8072        if !sig.results().is_empty() {
8073            assert_eq!(sig.results().len(), 1);
8074            a.emit_mov(
8075                Size::S64,
8076                Location::Memory(GPR::RSP, stack_padding as i32),
8077                Location::GPR(GPR::RAX),
8078            )?;
8079        }
8080
8081        // Release values array.
8082        a.emit_add(
8083            Size::S64,
8084            Location::Imm32((stack_offset + stack_padding) as _),
8085            Location::GPR(GPR::RSP),
8086        )?;
8087
8088        // Return.
8089        a.emit_ret()?;
8090
8091        let mut body = a.finalize().unwrap();
8092        body.shrink_to_fit();
8093        Ok(FunctionBody {
8094            body,
8095            unwind_info: None,
8096        })
8097    }
8098    // Singlepass calls import functions through a trampoline.
8099
8100    fn gen_import_call_trampoline(
8101        &self,
8102        vmoffsets: &VMOffsets,
8103        index: FunctionIndex,
8104        sig: &FunctionType,
8105        calling_convention: CallingConvention,
8106    ) -> Result<CustomSection, CompileError> {
8107        // the cpu feature here is irrelevant
8108        let mut a = AssemblerX64::new(0, None)?;
8109
8110        // TODO: ARM entry trampoline is not emitted.
8111
8112        // Singlepass internally treats all arguments as integers
8113        // For the standard Windows calling convention requires
8114        //  floating point arguments to be passed in XMM registers for the 4 first arguments only
8115        //  That's the only change to do, other arguments are not to be changed
8116        // For the standard System V calling convention requires
8117        //  floating point arguments to be passed in XMM registers.
8118        //  Translation is expensive, so only do it if needed.
8119        if sig
8120            .params()
8121            .iter()
8122            .any(|&x| x == Type::F32 || x == Type::F64)
8123        {
8124            match calling_convention {
8125                CallingConvention::WindowsFastcall => {
8126                    let mut param_locations: Vec<Location> = vec![];
8127                    static PARAM_REGS: &[GPR] = &[GPR::RDX, GPR::R8, GPR::R9];
8128                    #[allow(clippy::needless_range_loop)]
8129                    for i in 0..sig.params().len() {
8130                        let loc = match i {
8131                            0..=2 => Location::GPR(PARAM_REGS[i]),
8132                            _ => Location::Memory(GPR::RSP, 32 + 8 + ((i - 3) * 8) as i32), // will not be used anyway
8133                        };
8134                        param_locations.push(loc);
8135                    }
8136
8137                    // Copy Float arguments to XMM from GPR.
8138                    let mut argalloc = ArgumentRegisterAllocator::default();
8139                    for (i, ty) in sig.params().iter().enumerate() {
8140                        let prev_loc = param_locations[i];
8141                        match argalloc.next(*ty, calling_convention)? {
8142                            Some(X64Register::GPR(_gpr)) => continue,
8143                            Some(X64Register::XMM(xmm)) => {
8144                                a.emit_mov(Size::S64, prev_loc, Location::SIMD(xmm))?
8145                            }
8146                            None => continue,
8147                        };
8148                    }
8149                }
8150                _ => {
8151                    let mut param_locations = vec![];
8152
8153                    // Allocate stack space for arguments.
8154                    let stack_offset: i32 = if sig.params().len() > 5 {
8155                        5 * 8
8156                    } else {
8157                        (sig.params().len() as i32) * 8
8158                    };
8159                    if stack_offset > 0 {
8160                        a.emit_sub(
8161                            Size::S64,
8162                            Location::Imm32(stack_offset as u32),
8163                            Location::GPR(GPR::RSP),
8164                        )?;
8165                    }
8166
8167                    // Store all arguments to the stack to prevent overwrite.
8168                    static PARAM_REGS: &[GPR] = &[GPR::RSI, GPR::RDX, GPR::RCX, GPR::R8, GPR::R9];
8169                    #[allow(clippy::needless_range_loop)]
8170                    for i in 0..sig.params().len() {
8171                        let loc = match i {
8172                            0..=4 => {
8173                                let loc = Location::Memory(GPR::RSP, (i * 8) as i32);
8174                                a.emit_mov(Size::S64, Location::GPR(PARAM_REGS[i]), loc)?;
8175                                loc
8176                            }
8177                            _ => {
8178                                Location::Memory(GPR::RSP, stack_offset + 8 + ((i - 5) * 8) as i32)
8179                            }
8180                        };
8181                        param_locations.push(loc);
8182                    }
8183
8184                    // Copy arguments.
8185                    let mut argalloc = ArgumentRegisterAllocator::default();
8186                    argalloc.next(Type::I64, calling_convention)?.unwrap(); // skip VMContext
8187                    let mut caller_stack_offset: i32 = 0;
8188                    for (i, ty) in sig.params().iter().enumerate() {
8189                        let prev_loc = param_locations[i];
8190                        let targ = match argalloc.next(*ty, calling_convention)? {
8191                            Some(X64Register::GPR(gpr)) => Location::GPR(gpr),
8192                            Some(X64Register::XMM(xmm)) => Location::SIMD(xmm),
8193                            None => {
8194                                // No register can be allocated. Put this argument on the stack.
8195                                //
8196                                // Since here we never use fewer registers than by the original call, on the caller's frame
8197                                // we always have enough space to store the rearranged arguments, and the copy "backward" between different
8198                                // slots in the caller argument region will always work.
8199                                a.emit_mov(Size::S64, prev_loc, Location::GPR(GPR::RAX))?;
8200                                a.emit_mov(
8201                                    Size::S64,
8202                                    Location::GPR(GPR::RAX),
8203                                    Location::Memory(
8204                                        GPR::RSP,
8205                                        stack_offset + 8 + caller_stack_offset,
8206                                    ),
8207                                )?;
8208                                caller_stack_offset += 8;
8209                                continue;
8210                            }
8211                        };
8212                        a.emit_mov(Size::S64, prev_loc, targ)?;
8213                    }
8214
8215                    // Restore stack pointer.
8216                    if stack_offset > 0 {
8217                        a.emit_add(
8218                            Size::S64,
8219                            Location::Imm32(stack_offset as u32),
8220                            Location::GPR(GPR::RSP),
8221                        )?;
8222                    }
8223                }
8224            }
8225        }
8226
8227        // Emits a tail call trampoline that loads the address of the target import function
8228        // from Ctx and jumps to it.
8229
8230        let offset = vmoffsets.vmctx_vmfunction_import(index);
8231
8232        match calling_convention {
8233            CallingConvention::WindowsFastcall => {
8234                a.emit_mov(
8235                    Size::S64,
8236                    Location::Memory(GPR::RCX, offset as i32), // function pointer
8237                    Location::GPR(GPR::RAX),
8238                )?;
8239                a.emit_mov(
8240                    Size::S64,
8241                    Location::Memory(GPR::RCX, offset as i32 + 8), // target vmctx
8242                    Location::GPR(GPR::RCX),
8243                )?;
8244            }
8245            _ => {
8246                a.emit_mov(
8247                    Size::S64,
8248                    Location::Memory(GPR::RDI, offset as i32), // function pointer
8249                    Location::GPR(GPR::RAX),
8250                )?;
8251                a.emit_mov(
8252                    Size::S64,
8253                    Location::Memory(GPR::RDI, offset as i32 + 8), // target vmctx
8254                    Location::GPR(GPR::RDI),
8255                )?;
8256            }
8257        }
8258        a.emit_host_redirection(GPR::RAX)?;
8259
8260        let mut contents = a.finalize().unwrap();
8261        contents.shrink_to_fit();
8262        let section_body = SectionBody::new_with_vec(contents);
8263
8264        Ok(CustomSection {
8265            protection: CustomSectionProtection::ReadExecute,
8266            alignment: None,
8267            bytes: section_body,
8268            relocations: vec![],
8269        })
8270    }
8271
8272    #[cfg(feature = "unwind")]
8273    fn gen_dwarf_unwind_info(&mut self, code_len: usize) -> Option<UnwindInstructions> {
8274        let mut instructions = vec![];
8275        for &(instruction_offset, ref inst) in &self.unwind_ops {
8276            let instruction_offset = instruction_offset as u32;
8277            match *inst {
8278                UnwindOps::PushFP { up_to_sp } => {
8279                    instructions.push((
8280                        instruction_offset,
8281                        CallFrameInstruction::CfaOffset(up_to_sp as i32),
8282                    ));
8283                    instructions.push((
8284                        instruction_offset,
8285                        CallFrameInstruction::Offset(X86_64::RBP, -(up_to_sp as i32)),
8286                    ));
8287                }
8288                UnwindOps::DefineNewFrame => {
8289                    instructions.push((
8290                        instruction_offset,
8291                        CallFrameInstruction::CfaRegister(X86_64::RBP),
8292                    ));
8293                }
8294                UnwindOps::SaveRegister { reg, bp_neg_offset } => instructions.push((
8295                    instruction_offset,
8296                    CallFrameInstruction::Offset(reg.dwarf_index(), -bp_neg_offset),
8297                )),
8298                UnwindOps::Push2Regs { .. } | UnwindOps::SubtractFP { .. } => unimplemented!(),
8299            }
8300        }
8301        Some(UnwindInstructions {
8302            instructions,
8303            len: code_len as u32,
8304        })
8305    }
8306    #[cfg(not(feature = "unwind"))]
8307
8308    fn gen_dwarf_unwind_info(&mut self, _code_len: usize) -> Option<UnwindInstructions> {
8309        None
8310    }
8311
8312    #[cfg(feature = "unwind")]
8313    fn gen_windows_unwind_info(&mut self, _code_len: usize) -> Option<Vec<u8>> {
8314        let unwind_info = create_unwind_info_from_insts(&self.unwind_ops);
8315        if let Some(unwind) = unwind_info {
8316            let sz = unwind.emit_size();
8317            let mut tbl = vec![0; sz];
8318            unwind.emit(&mut tbl);
8319            Some(tbl)
8320        } else {
8321            None
8322        }
8323    }
8324
8325    #[cfg(not(feature = "unwind"))]
8326
8327    fn gen_windows_unwind_info(&mut self, _code_len: usize) -> Option<Vec<u8>> {
8328        None
8329    }
8330}
8331
8332#[cfg(test)]
8333mod test {
8334    use super::*;
8335    use enumset::enum_set;
8336    use std::str::FromStr;
8337    use wasmer_types::target::{CpuFeature, Target, Triple};
8338
8339    fn test_move_location(machine: &mut MachineX86_64) -> Result<(), CompileError> {
8340        machine.move_location_for_native(
8341            Size::S64,
8342            Location::GPR(GPR::RAX),
8343            Location::GPR(GPR::RCX),
8344        )?;
8345        machine.move_location_for_native(
8346            Size::S64,
8347            Location::GPR(GPR::RAX),
8348            Location::Memory(GPR::RDX, 10),
8349        )?;
8350        machine.move_location_for_native(
8351            Size::S64,
8352            Location::GPR(GPR::RAX),
8353            Location::Memory(GPR::RDX, -10),
8354        )?;
8355        machine.move_location_for_native(
8356            Size::S64,
8357            Location::Memory(GPR::RDX, 10),
8358            Location::GPR(GPR::RAX),
8359        )?;
8360        machine.move_location_for_native(
8361            Size::S64,
8362            Location::Imm64(50),
8363            Location::GPR(GPR::RAX),
8364        )?;
8365        machine.move_location_for_native(
8366            Size::S64,
8367            Location::Imm32(50),
8368            Location::GPR(GPR::RAX),
8369        )?;
8370        machine.move_location_for_native(Size::S64, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8371
8372        machine.move_location_for_native(
8373            Size::S32,
8374            Location::GPR(GPR::RAX),
8375            Location::GPR(GPR::RCX),
8376        )?;
8377        machine.move_location_for_native(
8378            Size::S32,
8379            Location::GPR(GPR::RAX),
8380            Location::Memory(GPR::RDX, 10),
8381        )?;
8382        machine.move_location_for_native(
8383            Size::S32,
8384            Location::GPR(GPR::RAX),
8385            Location::Memory(GPR::RDX, -10),
8386        )?;
8387        machine.move_location_for_native(
8388            Size::S32,
8389            Location::Memory(GPR::RDX, 10),
8390            Location::GPR(GPR::RAX),
8391        )?;
8392        machine.move_location_for_native(
8393            Size::S32,
8394            Location::Imm32(50),
8395            Location::GPR(GPR::RAX),
8396        )?;
8397        machine.move_location_for_native(Size::S32, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8398
8399        machine.move_location_for_native(
8400            Size::S16,
8401            Location::GPR(GPR::RAX),
8402            Location::GPR(GPR::RCX),
8403        )?;
8404        machine.move_location_for_native(
8405            Size::S16,
8406            Location::GPR(GPR::RAX),
8407            Location::Memory(GPR::RDX, 10),
8408        )?;
8409        machine.move_location_for_native(
8410            Size::S16,
8411            Location::GPR(GPR::RAX),
8412            Location::Memory(GPR::RDX, -10),
8413        )?;
8414        machine.move_location_for_native(
8415            Size::S16,
8416            Location::Memory(GPR::RDX, 10),
8417            Location::GPR(GPR::RAX),
8418        )?;
8419        machine.move_location_for_native(Size::S16, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8420
8421        machine.move_location_for_native(
8422            Size::S8,
8423            Location::GPR(GPR::RAX),
8424            Location::GPR(GPR::RCX),
8425        )?;
8426        machine.move_location_for_native(
8427            Size::S8,
8428            Location::GPR(GPR::RAX),
8429            Location::Memory(GPR::RDX, 10),
8430        )?;
8431        machine.move_location_for_native(
8432            Size::S8,
8433            Location::GPR(GPR::RAX),
8434            Location::Memory(GPR::RDX, -10),
8435        )?;
8436        machine.move_location_for_native(
8437            Size::S8,
8438            Location::Memory(GPR::RDX, 10),
8439            Location::GPR(GPR::RAX),
8440        )?;
8441        machine.move_location_for_native(Size::S8, Location::Imm8(50), Location::GPR(GPR::RAX))?;
8442
8443        machine.move_location_for_native(
8444            Size::S64,
8445            Location::SIMD(XMM::XMM0),
8446            Location::GPR(GPR::RAX),
8447        )?;
8448        machine.move_location_for_native(
8449            Size::S64,
8450            Location::SIMD(XMM::XMM0),
8451            Location::Memory(GPR::RDX, -10),
8452        )?;
8453        machine.move_location_for_native(
8454            Size::S64,
8455            Location::GPR(GPR::RAX),
8456            Location::SIMD(XMM::XMM0),
8457        )?;
8458        machine.move_location_for_native(
8459            Size::S64,
8460            Location::Memory(GPR::RDX, -10),
8461            Location::SIMD(XMM::XMM0),
8462        )?;
8463
8464        Ok(())
8465    }
8466
8467    fn test_move_location_extended(
8468        machine: &mut MachineX86_64,
8469        signed: bool,
8470        sized: Size,
8471    ) -> Result<(), CompileError> {
8472        machine.move_location_extend(
8473            sized,
8474            signed,
8475            Location::GPR(GPR::RAX),
8476            Size::S64,
8477            Location::GPR(GPR::RCX),
8478        )?;
8479        machine.move_location_extend(
8480            sized,
8481            signed,
8482            Location::GPR(GPR::RAX),
8483            Size::S64,
8484            Location::Memory(GPR::RCX, 10),
8485        )?;
8486        machine.move_location_extend(
8487            sized,
8488            signed,
8489            Location::Memory(GPR::RAX, 10),
8490            Size::S64,
8491            Location::GPR(GPR::RCX),
8492        )?;
8493        if sized != Size::S32 {
8494            machine.move_location_extend(
8495                sized,
8496                signed,
8497                Location::GPR(GPR::RAX),
8498                Size::S32,
8499                Location::GPR(GPR::RCX),
8500            )?;
8501            machine.move_location_extend(
8502                sized,
8503                signed,
8504                Location::GPR(GPR::RAX),
8505                Size::S32,
8506                Location::Memory(GPR::RCX, 10),
8507            )?;
8508            machine.move_location_extend(
8509                sized,
8510                signed,
8511                Location::Memory(GPR::RAX, 10),
8512                Size::S32,
8513                Location::GPR(GPR::RCX),
8514            )?;
8515        }
8516
8517        Ok(())
8518    }
8519
8520    fn test_binop_op(
8521        machine: &mut MachineX86_64,
8522        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8523    ) -> Result<(), CompileError> {
8524        op(
8525            machine,
8526            Location::GPR(GPR::RDX),
8527            Location::GPR(GPR::RDX),
8528            Location::GPR(GPR::RAX),
8529        )?;
8530        op(
8531            machine,
8532            Location::GPR(GPR::RDX),
8533            Location::Imm32(10),
8534            Location::GPR(GPR::RAX),
8535        )?;
8536        op(
8537            machine,
8538            Location::GPR(GPR::RAX),
8539            Location::GPR(GPR::RAX),
8540            Location::GPR(GPR::RAX),
8541        )?;
8542        op(
8543            machine,
8544            Location::Imm32(10),
8545            Location::GPR(GPR::RDX),
8546            Location::GPR(GPR::RAX),
8547        )?;
8548        op(
8549            machine,
8550            Location::GPR(GPR::RAX),
8551            Location::GPR(GPR::RDX),
8552            Location::Memory(GPR::RAX, 10),
8553        )?;
8554        op(
8555            machine,
8556            Location::GPR(GPR::RAX),
8557            Location::Memory(GPR::RDX, 16),
8558            Location::Memory(GPR::RAX, 10),
8559        )?;
8560        op(
8561            machine,
8562            Location::Memory(GPR::RAX, 0),
8563            Location::Memory(GPR::RDX, 16),
8564            Location::Memory(GPR::RAX, 10),
8565        )?;
8566
8567        Ok(())
8568    }
8569
8570    fn test_float_binop_op(
8571        machine: &mut MachineX86_64,
8572        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8573    ) -> Result<(), CompileError> {
8574        op(
8575            machine,
8576            Location::SIMD(XMM::XMM3),
8577            Location::SIMD(XMM::XMM2),
8578            Location::SIMD(XMM::XMM0),
8579        )?;
8580        op(
8581            machine,
8582            Location::SIMD(XMM::XMM0),
8583            Location::SIMD(XMM::XMM2),
8584            Location::SIMD(XMM::XMM0),
8585        )?;
8586        op(
8587            machine,
8588            Location::SIMD(XMM::XMM0),
8589            Location::SIMD(XMM::XMM0),
8590            Location::SIMD(XMM::XMM0),
8591        )?;
8592        op(
8593            machine,
8594            Location::Memory(GPR::RBP, 0),
8595            Location::SIMD(XMM::XMM2),
8596            Location::SIMD(XMM::XMM0),
8597        )?;
8598        op(
8599            machine,
8600            Location::Memory(GPR::RBP, 0),
8601            Location::Memory(GPR::RDX, 10),
8602            Location::SIMD(XMM::XMM0),
8603        )?;
8604        op(
8605            machine,
8606            Location::Memory(GPR::RBP, 0),
8607            Location::Memory(GPR::RDX, 16),
8608            Location::Memory(GPR::RAX, 32),
8609        )?;
8610        op(
8611            machine,
8612            Location::SIMD(XMM::XMM0),
8613            Location::Memory(GPR::RDX, 16),
8614            Location::Memory(GPR::RAX, 32),
8615        )?;
8616        op(
8617            machine,
8618            Location::SIMD(XMM::XMM0),
8619            Location::SIMD(XMM::XMM1),
8620            Location::Memory(GPR::RAX, 32),
8621        )?;
8622
8623        Ok(())
8624    }
8625
8626    fn test_float_cmp_op(
8627        machine: &mut MachineX86_64,
8628        op: fn(&mut MachineX86_64, Location, Location, Location) -> Result<(), CompileError>,
8629    ) -> Result<(), CompileError> {
8630        op(
8631            machine,
8632            Location::SIMD(XMM::XMM3),
8633            Location::SIMD(XMM::XMM2),
8634            Location::GPR(GPR::RAX),
8635        )?;
8636        op(
8637            machine,
8638            Location::SIMD(XMM::XMM0),
8639            Location::SIMD(XMM::XMM0),
8640            Location::GPR(GPR::RAX),
8641        )?;
8642        op(
8643            machine,
8644            Location::Memory(GPR::RBP, 0),
8645            Location::SIMD(XMM::XMM2),
8646            Location::GPR(GPR::RAX),
8647        )?;
8648        op(
8649            machine,
8650            Location::Memory(GPR::RBP, 0),
8651            Location::Memory(GPR::RDX, 10),
8652            Location::GPR(GPR::RAX),
8653        )?;
8654        op(
8655            machine,
8656            Location::Memory(GPR::RBP, 0),
8657            Location::Memory(GPR::RDX, 16),
8658            Location::Memory(GPR::RAX, 32),
8659        )?;
8660        op(
8661            machine,
8662            Location::SIMD(XMM::XMM0),
8663            Location::Memory(GPR::RDX, 16),
8664            Location::Memory(GPR::RAX, 32),
8665        )?;
8666        op(
8667            machine,
8668            Location::SIMD(XMM::XMM0),
8669            Location::SIMD(XMM::XMM1),
8670            Location::Memory(GPR::RAX, 32),
8671        )?;
8672
8673        Ok(())
8674    }
8675
8676    #[test]
8677    fn tests_avx() -> Result<(), CompileError> {
8678        let set = enum_set!(CpuFeature::AVX);
8679        let target = Target::new(Triple::from_str("x86_64-linux-gnu").unwrap(), set);
8680        let mut machine = MachineX86_64::new(Some(target))?;
8681
8682        test_move_location(&mut machine)?;
8683        test_move_location_extended(&mut machine, false, Size::S8)?;
8684        test_move_location_extended(&mut machine, false, Size::S16)?;
8685        test_move_location_extended(&mut machine, false, Size::S32)?;
8686        test_move_location_extended(&mut machine, true, Size::S8)?;
8687        test_move_location_extended(&mut machine, true, Size::S16)?;
8688        test_move_location_extended(&mut machine, true, Size::S32)?;
8689        test_binop_op(&mut machine, MachineX86_64::emit_binop_add32)?;
8690        test_binop_op(&mut machine, MachineX86_64::emit_binop_add64)?;
8691        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub32)?;
8692        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub64)?;
8693        test_binop_op(&mut machine, MachineX86_64::emit_binop_and32)?;
8694        test_binop_op(&mut machine, MachineX86_64::emit_binop_and64)?;
8695        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor32)?;
8696        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor64)?;
8697        test_binop_op(&mut machine, MachineX86_64::emit_binop_or32)?;
8698        test_binop_op(&mut machine, MachineX86_64::emit_binop_or64)?;
8699        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul32)?;
8700        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul64)?;
8701        test_float_binop_op(&mut machine, MachineX86_64::f32_add)?;
8702        test_float_binop_op(&mut machine, MachineX86_64::f32_sub)?;
8703        test_float_binop_op(&mut machine, MachineX86_64::f32_mul)?;
8704        test_float_binop_op(&mut machine, MachineX86_64::f32_div)?;
8705        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_eq)?;
8706        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_lt)?;
8707        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_le)?;
8708
8709        Ok(())
8710    }
8711
8712    #[test]
8713    fn tests_sse42() -> Result<(), CompileError> {
8714        let set = enum_set!(CpuFeature::SSE42);
8715        let target = Target::new(Triple::from_str("x86_64-linux-gnu").unwrap(), set);
8716        let mut machine = MachineX86_64::new(Some(target))?;
8717
8718        test_move_location(&mut machine)?;
8719        test_move_location_extended(&mut machine, false, Size::S8)?;
8720        test_move_location_extended(&mut machine, false, Size::S16)?;
8721        test_move_location_extended(&mut machine, false, Size::S32)?;
8722        test_move_location_extended(&mut machine, true, Size::S8)?;
8723        test_move_location_extended(&mut machine, true, Size::S16)?;
8724        test_move_location_extended(&mut machine, true, Size::S32)?;
8725        test_binop_op(&mut machine, MachineX86_64::emit_binop_add32)?;
8726        test_binop_op(&mut machine, MachineX86_64::emit_binop_add64)?;
8727        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub32)?;
8728        test_binop_op(&mut machine, MachineX86_64::emit_binop_sub64)?;
8729        test_binop_op(&mut machine, MachineX86_64::emit_binop_and32)?;
8730        test_binop_op(&mut machine, MachineX86_64::emit_binop_and64)?;
8731        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor32)?;
8732        test_binop_op(&mut machine, MachineX86_64::emit_binop_xor64)?;
8733        test_binop_op(&mut machine, MachineX86_64::emit_binop_or32)?;
8734        test_binop_op(&mut machine, MachineX86_64::emit_binop_or64)?;
8735        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul32)?;
8736        test_binop_op(&mut machine, MachineX86_64::emit_binop_mul64)?;
8737        test_float_binop_op(&mut machine, MachineX86_64::f32_add)?;
8738        test_float_binop_op(&mut machine, MachineX86_64::f32_sub)?;
8739        test_float_binop_op(&mut machine, MachineX86_64::f32_mul)?;
8740        test_float_binop_op(&mut machine, MachineX86_64::f32_div)?;
8741        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_eq)?;
8742        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_lt)?;
8743        test_float_cmp_op(&mut machine, MachineX86_64::f32_cmp_le)?;
8744
8745        Ok(())
8746    }
8747}