wasmer_compiler_singlepass/
emitter_x64.rs

1use crate::{
2    codegen_error, common_decl::Size, location::Location as AbstractLocation,
3    machine_x64::AssemblerX64,
4};
5pub use crate::{
6    location::Multiplier,
7    machine::{Label, Offset},
8    x64_decl::{GPR, XMM},
9};
10use dynasm::dynasm;
11use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi};
12use wasmer_types::{CompileError, target::CpuFeature};
13
14/// Force `dynasm!` to use the correct arch (x64) when cross-compiling.
15/// `dynasm!` proc-macro tries to auto-detect it by default by looking at the
16/// `target_arch`, but it sees the `target_arch` of the proc-macro itself, which
17/// is always equal to host, even when cross-compiling.
18macro_rules! dynasm {
19    ($a:expr_2021 ; $($tt:tt)*) => {
20        dynasm::dynasm!(
21            $a.inner
22            ; .arch x64
23            ; $($tt)*
24        )
25    };
26}
27
28pub type Location = AbstractLocation<GPR, XMM>;
29
30#[derive(Copy, Clone, Debug, Eq, PartialEq)]
31pub enum Condition {
32    None,
33    Above,
34    AboveEqual,
35    Below,
36    BelowEqual,
37    Greater,
38    GreaterEqual,
39    Less,
40    LessEqual,
41    Equal,
42    NotEqual,
43    Signed,
44    Carry,
45}
46
47#[derive(Copy, Clone, Debug, Eq, PartialEq)]
48#[allow(dead_code, clippy::upper_case_acronyms)]
49pub enum XMMOrMemory {
50    XMM(XMM),
51    Memory(GPR, i32),
52}
53
54#[derive(Copy, Clone, Debug)]
55#[allow(dead_code, clippy::upper_case_acronyms)]
56pub enum GPROrMemory {
57    GPR(GPR),
58    Memory(GPR, i32),
59}
60
61pub enum Precision {
62    Single,
63    Double,
64}
65
66#[allow(unused)]
67pub trait EmitterX64 {
68    fn get_simd_arch(&self) -> Option<&CpuFeature>;
69    fn get_label(&mut self) -> Label;
70    fn get_offset(&self) -> Offset;
71    fn get_jmp_instr_size(&self) -> u8;
72
73    fn finalize_function(&mut self) -> Result<(), CompileError> {
74        Ok(())
75    }
76
77    fn emit_u64(&mut self, x: u64) -> Result<(), CompileError>;
78    fn emit_bytes(&mut self, bytes: &[u8]) -> Result<(), CompileError>;
79
80    fn emit_label(&mut self, label: Label) -> Result<(), CompileError>;
81
82    fn emit_nop(&mut self) -> Result<(), CompileError>;
83
84    /// A high-level assembler method. Emits an instruction sequence of length `n` that is functionally
85    /// equivalent to a `nop` instruction, without guarantee about the underlying implementation.
86    fn emit_nop_n(&mut self, n: usize) -> Result<(), CompileError>;
87
88    fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
89    fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
90    fn emit_lea_label(&mut self, label: Label, dst: Location) -> Result<(), CompileError>;
91    fn emit_cdq(&mut self) -> Result<(), CompileError>;
92    fn emit_cqo(&mut self) -> Result<(), CompileError>;
93    fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
94    fn emit_jmp(&mut self, condition: Condition, label: Label) -> Result<(), CompileError>;
95    fn emit_jmp_location(&mut self, loc: Location) -> Result<(), CompileError>;
96    fn emit_set(&mut self, condition: Condition, dst: GPR) -> Result<(), CompileError>;
97    fn emit_push(&mut self, sz: Size, src: Location) -> Result<(), CompileError>;
98    fn emit_pop(&mut self, sz: Size, dst: Location) -> Result<(), CompileError>;
99    fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) -> Result<(), CompileError>;
100    fn emit_add(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
101    fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
102    fn emit_neg(&mut self, sz: Size, value: Location) -> Result<(), CompileError>;
103    fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
104    fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) -> Result<(), CompileError>;
105    fn emit_div(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError>;
106    fn emit_idiv(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError>;
107    fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
108    fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
109    fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
110    fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
111    fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
112    fn emit_and(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
113    fn emit_test(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
114    fn emit_or(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
115    fn emit_bsr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
116    fn emit_bsf(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
117    fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
118    fn emit_movzx(
119        &mut self,
120        sz_src: Size,
121        src: Location,
122        sz_dst: Size,
123        dst: Location,
124    ) -> Result<(), CompileError>;
125    fn emit_movsx(
126        &mut self,
127        sz_src: Size,
128        src: Location,
129        sz_dst: Size,
130        dst: Location,
131    ) -> Result<(), CompileError>;
132    fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
133    fn emit_lock_xadd(
134        &mut self,
135        sz: Size,
136        src: Location,
137        dst: Location,
138    ) -> Result<(), CompileError>;
139    fn emit_lock_cmpxchg(
140        &mut self,
141        sz: Size,
142        src: Location,
143        dst: Location,
144    ) -> Result<(), CompileError>;
145    fn emit_rep_stosq(&mut self) -> Result<(), CompileError>;
146
147    fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) -> Result<(), CompileError>;
148    fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) -> Result<(), CompileError>;
149
150    fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError>;
151    fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError>;
152
153    fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError>;
154    fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError>;
155    fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
156    fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
157
158    fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
159    fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
160    fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
161    fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
162    fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
163    fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
164    fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
165    fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
166    fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
167    fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
168    fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
169    fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
170
171    fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
172    -> Result<(), CompileError>;
173    fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
174    -> Result<(), CompileError>;
175
176    fn emit_vcmpneqss(
177        &mut self,
178        src1: XMM,
179        src2: XMMOrMemory,
180        dst: XMM,
181    ) -> Result<(), CompileError>;
182    fn emit_vcmpneqsd(
183        &mut self,
184        src1: XMM,
185        src2: XMMOrMemory,
186        dst: XMM,
187    ) -> Result<(), CompileError>;
188
189    fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
190    -> Result<(), CompileError>;
191    fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
192    -> Result<(), CompileError>;
193
194    fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
195    -> Result<(), CompileError>;
196    fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
197    -> Result<(), CompileError>;
198
199    fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
200    -> Result<(), CompileError>;
201    fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
202    -> Result<(), CompileError>;
203
204    fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
205    -> Result<(), CompileError>;
206    fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
207    -> Result<(), CompileError>;
208
209    fn emit_vcmpunordss(
210        &mut self,
211        src1: XMM,
212        src2: XMMOrMemory,
213        dst: XMM,
214    ) -> Result<(), CompileError>;
215    fn emit_vcmpunordsd(
216        &mut self,
217        src1: XMM,
218        src2: XMMOrMemory,
219        dst: XMM,
220    ) -> Result<(), CompileError>;
221
222    fn emit_vcmpordss(
223        &mut self,
224        src1: XMM,
225        src2: XMMOrMemory,
226        dst: XMM,
227    ) -> Result<(), CompileError>;
228    fn emit_vcmpordsd(
229        &mut self,
230        src1: XMM,
231        src2: XMMOrMemory,
232        dst: XMM,
233    ) -> Result<(), CompileError>;
234
235    fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
236    fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
237
238    fn emit_vroundss_nearest(
239        &mut self,
240        src1: XMM,
241        src2: XMMOrMemory,
242        dst: XMM,
243    ) -> Result<(), CompileError>;
244    fn emit_vroundss_floor(
245        &mut self,
246        src1: XMM,
247        src2: XMMOrMemory,
248        dst: XMM,
249    ) -> Result<(), CompileError>;
250    fn emit_vroundss_ceil(
251        &mut self,
252        src1: XMM,
253        src2: XMMOrMemory,
254        dst: XMM,
255    ) -> Result<(), CompileError>;
256    fn emit_vroundss_trunc(
257        &mut self,
258        src1: XMM,
259        src2: XMMOrMemory,
260        dst: XMM,
261    ) -> Result<(), CompileError>;
262    fn emit_vroundsd_nearest(
263        &mut self,
264        src1: XMM,
265        src2: XMMOrMemory,
266        dst: XMM,
267    ) -> Result<(), CompileError>;
268    fn emit_vroundsd_floor(
269        &mut self,
270        src1: XMM,
271        src2: XMMOrMemory,
272        dst: XMM,
273    ) -> Result<(), CompileError>;
274    fn emit_vroundsd_ceil(
275        &mut self,
276        src1: XMM,
277        src2: XMMOrMemory,
278        dst: XMM,
279    ) -> Result<(), CompileError>;
280    fn emit_vroundsd_trunc(
281        &mut self,
282        src1: XMM,
283        src2: XMMOrMemory,
284        dst: XMM,
285    ) -> Result<(), CompileError>;
286
287    fn emit_vcvtss2sd(
288        &mut self,
289        src1: XMM,
290        src2: XMMOrMemory,
291        dst: XMM,
292    ) -> Result<(), CompileError>;
293    fn emit_vcvtsd2ss(
294        &mut self,
295        src1: XMM,
296        src2: XMMOrMemory,
297        dst: XMM,
298    ) -> Result<(), CompileError>;
299
300    fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
301    fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
302
303    fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
304    fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
305    fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
306    fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
307
308    fn emit_vcvtsi2ss_32(
309        &mut self,
310        src1: XMM,
311        src2: GPROrMemory,
312        dst: XMM,
313    ) -> Result<(), CompileError>;
314    fn emit_vcvtsi2ss_64(
315        &mut self,
316        src1: XMM,
317        src2: GPROrMemory,
318        dst: XMM,
319    ) -> Result<(), CompileError>;
320    fn emit_vcvtsi2sd_32(
321        &mut self,
322        src1: XMM,
323        src2: GPROrMemory,
324        dst: XMM,
325    ) -> Result<(), CompileError>;
326    fn emit_vcvtsi2sd_64(
327        &mut self,
328        src1: XMM,
329        src2: GPROrMemory,
330        dst: XMM,
331    ) -> Result<(), CompileError>;
332
333    fn emit_vblendvps(
334        &mut self,
335        src1: XMM,
336        src2: XMMOrMemory,
337        mask: XMM,
338        dst: XMM,
339    ) -> Result<(), CompileError>;
340    fn emit_vblendvpd(
341        &mut self,
342        src1: XMM,
343        src2: XMMOrMemory,
344        mask: XMM,
345        dst: XMM,
346    ) -> Result<(), CompileError>;
347
348    fn emit_test_gpr_64(&mut self, reg: GPR) -> Result<(), CompileError>;
349
350    fn emit_ud2(&mut self) -> Result<(), CompileError>;
351    fn emit_ud1_payload(&mut self, payload: u8) -> Result<(), CompileError>;
352    fn emit_ret(&mut self) -> Result<(), CompileError>;
353    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError>;
354    fn emit_call_location(&mut self, loc: Location) -> Result<(), CompileError>;
355
356    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError>;
357
358    fn emit_bkpt(&mut self) -> Result<(), CompileError>;
359
360    fn emit_host_redirection(&mut self, target: GPR) -> Result<(), CompileError>;
361
362    fn arch_has_itruncf(&self) -> bool {
363        false
364    }
365    fn arch_emit_i32_trunc_sf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
366        codegen_error!("singplepass arch_emit_i32_trunc_sf32 unimplemented")
367    }
368    fn arch_emit_i32_trunc_sf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
369        codegen_error!("singplepass arch_emit_i32_trunc_sf64 unimplemented")
370    }
371    fn arch_emit_i32_trunc_uf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
372        codegen_error!("singplepass arch_emit_i32_trunc_uf32 unimplemented")
373    }
374    fn arch_emit_i32_trunc_uf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
375        codegen_error!("singplepass arch_emit_i32_trunc_uf64 unimplemented")
376    }
377    fn arch_emit_i64_trunc_sf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
378        codegen_error!("singplepass arch_emit_i64_trunc_sf32 unimplemented")
379    }
380    fn arch_emit_i64_trunc_sf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
381        codegen_error!("singplepass arch_emit_i64_trunc_sf64 unimplemented")
382    }
383    fn arch_emit_i64_trunc_uf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
384        codegen_error!("singplepass arch_emit_i64_trunc_uf32 unimplemented")
385    }
386    fn arch_emit_i64_trunc_uf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
387        codegen_error!("singplepass arch_emit_i64_trunc_uf64 unimplemented")
388    }
389
390    fn arch_has_fconverti(&self) -> bool {
391        false
392    }
393    fn arch_emit_f32_convert_si32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
394        codegen_error!("singlepass arch_emit_f32_convert_si32 unimplemented")
395    }
396    fn arch_emit_f32_convert_si64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
397        codegen_error!("singlepass arch_emit_f32_convert_si64 unimplemented")
398    }
399    fn arch_emit_f32_convert_ui32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
400        codegen_error!("singlepass arch_emit_f32_convert_ui32 unimplemented")
401    }
402    fn arch_emit_f32_convert_ui64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
403        codegen_error!("singlepass arch_emit_f32_convert_ui64 unimplemented")
404    }
405    fn arch_emit_f64_convert_si32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
406        codegen_error!("singlepass arch_emit_f64_convert_si32 unimplemented")
407    }
408    fn arch_emit_f64_convert_si64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
409        codegen_error!("singlepass arch_emit_f64_convert_si64 unimplemented")
410    }
411    fn arch_emit_f64_convert_ui32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
412        codegen_error!("singlepass arch_emit_f64_convert_ui32 unimplemented")
413    }
414    fn arch_emit_f64_convert_ui64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
415        codegen_error!("singlepass arch_emit_f64_convert_ui64 unimplemented")
416    }
417
418    fn arch_has_fneg(&self) -> bool {
419        false
420    }
421    fn arch_emit_f32_neg(&mut self, _src: XMM, _dst: XMM) -> Result<(), CompileError> {
422        codegen_error!("singlepass arch_emit_f32_neg unimplemented")
423    }
424    fn arch_emit_f64_neg(&mut self, _src: XMM, _dst: XMM) -> Result<(), CompileError> {
425        codegen_error!("singlepass arch_emit_f64_neg unimplemented")
426    }
427
428    fn arch_has_xzcnt(&self) -> bool {
429        false
430    }
431    fn arch_emit_lzcnt(
432        &mut self,
433        _sz: Size,
434        _src: Location,
435        _dst: Location,
436    ) -> Result<(), CompileError> {
437        codegen_error!("singlepass arch_emit_lzcnt unimplemented")
438    }
439    fn arch_emit_tzcnt(
440        &mut self,
441        _sz: Size,
442        _src: Location,
443        _dst: Location,
444    ) -> Result<(), CompileError> {
445        codegen_error!("singlepass arch_emit_tzcnt unimplemented")
446    }
447
448    fn arch_supports_canonicalize_nan(&self) -> bool {
449        true
450    }
451
452    fn arch_requires_indirect_call_trampoline(&self) -> bool {
453        false
454    }
455
456    fn arch_emit_indirect_call_with_trampoline(
457        &mut self,
458        _loc: Location,
459    ) -> Result<(), CompileError> {
460        codegen_error!("singlepass arch_emit_indirect_call_with_trampoline unimplemented")
461    }
462
463    // Emits entry trampoline just before the real function.
464    fn arch_emit_entry_trampoline(&mut self) -> Result<(), CompileError> {
465        Ok(())
466    }
467
468    // Byte offset from the beginning of a `mov Imm64, GPR` instruction to the imm64 value.
469    // Required to support emulation on Aarch64.
470    fn arch_mov64_imm_offset(&self) -> Result<usize, CompileError> {
471        codegen_error!("singlepass arch_mov64_imm_offset unimplemented")
472    }
473}
474
475macro_rules! unop_gpr {
476    ($ins:ident, $assembler:tt, $sz:expr_2021, $loc:expr_2021, $otherwise:block) => {
477        match ($sz, $loc) {
478            (Size::S32, Location::GPR(loc)) => {
479                dynasm!($assembler ; $ins Rd(loc));
480            },
481            (Size::S64, Location::GPR(loc)) => {
482                dynasm!($assembler ; $ins Rq(loc));
483            },
484            _ => $otherwise
485        }
486    };
487}
488
489macro_rules! unop_mem {
490    ($ins:ident, $assembler:tt, $sz:expr_2021, $loc:expr_2021, $otherwise:block) => {
491        match ($sz, $loc) {
492            (Size::S32, Location::Memory(loc, disp)) => {
493                dynasm!($assembler ; $ins DWORD [Rq(loc) + disp] );
494            },
495            (Size::S64, Location::Memory(loc, disp)) => {
496                dynasm!($assembler ; $ins QWORD [Rq(loc) + disp] );
497            },
498            _ => $otherwise
499        }
500    };
501}
502
503macro_rules! unop_gpr_or_mem {
504    ($ins:ident, $assembler:tt, $sz:expr_2021, $loc:expr_2021, $otherwise:block) => {
505        unop_gpr!($ins, $assembler, $sz, $loc, {
506            unop_mem!($ins, $assembler, $sz, $loc, $otherwise)
507        })
508    };
509}
510
511macro_rules! binop_imm32_gpr {
512    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
513        match ($sz, $src, $dst) {
514            (Size::S32, Location::Imm32(src), Location::GPR(dst)) => {
515                dynasm!($assembler ; $ins Rd(dst), src as i32); // IMM32_2GPR
516            },
517            (Size::S64, Location::Imm32(src), Location::GPR(dst)) => {
518                dynasm!($assembler ; $ins Rq(dst), src as i32); // IMM32_2GPR
519            },
520            _ => $otherwise
521        }
522    };
523}
524
525macro_rules! binop_imm32_mem {
526    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
527        match ($sz, $src, $dst) {
528            (Size::S32, Location::Imm32(src), Location::Memory(dst, disp)) => {
529                dynasm!($assembler ; $ins DWORD [Rq(dst) + disp], src as i32);
530            },
531            (Size::S64, Location::Imm32(src), Location::Memory(dst, disp)) => {
532                dynasm!($assembler ; $ins QWORD [Rq(dst) + disp], src as i32);
533            },
534            _ => $otherwise
535        }
536    };
537}
538
539macro_rules! binop_imm64_gpr {
540    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
541        match ($sz, $src, $dst) {
542            (Size::S64, Location::Imm64(src), Location::GPR(dst)) => {
543                dynasm!($assembler ; $ins Rq(dst), QWORD src as i64); // IMM32_2GPR
544            },
545            _ => $otherwise
546        }
547    };
548}
549
550macro_rules! binop_gpr_gpr {
551    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
552        match ($sz, $src, $dst) {
553            (Size::S32, Location::GPR(src), Location::GPR(dst)) => {
554                dynasm!($assembler ; $ins Rd(dst), Rd(src)); // GPR2GPR
555            },
556            (Size::S64, Location::GPR(src), Location::GPR(dst)) => {
557                dynasm!($assembler ; $ins Rq(dst), Rq(src)); // GPR2GPR
558            },
559            _ => $otherwise
560        }
561    };
562}
563
564macro_rules! binop_gpr_mem {
565    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
566        match ($sz, $src, $dst) {
567            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
568                dynasm!($assembler ; $ins [Rq(dst) + disp], Rd(src)); // GPR2MEM
569            },
570            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
571                dynasm!($assembler ; $ins [Rq(dst) + disp], Rq(src)); // GPR2MEM
572            },
573            _ => $otherwise
574        }
575    };
576}
577
578macro_rules! binop_mem_gpr {
579    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
580        match ($sz, $src, $dst) {
581            (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
582                dynasm!($assembler ; $ins Rd(dst), [Rq(src) + disp]); // MEM2GPR
583            },
584            (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
585                dynasm!($assembler ; $ins Rq(dst), [Rq(src) + disp]); // MEM2GPR
586            },
587            _ => $otherwise
588        }
589    };
590}
591
592macro_rules! binop_all_nofp {
593    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
594        binop_imm32_gpr!($ins, $assembler, $sz, $src, $dst, {
595            binop_imm32_mem!($ins, $assembler, $sz, $src, $dst, {
596                binop_gpr_gpr!($ins, $assembler, $sz, $src, $dst, {
597                    binop_gpr_mem!($ins, $assembler, $sz, $src, $dst, {
598                        binop_mem_gpr!($ins, $assembler, $sz, $src, $dst, $otherwise)
599                    })
600                })
601            })
602        })
603    };
604}
605
606macro_rules! binop_shift {
607    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
608        match ($sz, $src, $dst) {
609            (Size::S32, Location::GPR(GPR::RCX), Location::GPR(dst)) => {
610                dynasm!($assembler ; $ins Rd(dst), cl);
611            },
612            (Size::S32, Location::GPR(GPR::RCX), Location::Memory(dst, disp)) => {
613                dynasm!($assembler ; $ins DWORD [Rq(dst) + disp], cl);
614            },
615            (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => {
616                dynasm!($assembler ; $ins Rd(dst), imm as i8);
617            },
618            (Size::S32, Location::Imm8(imm), Location::Memory(dst, disp)) => {
619                dynasm!($assembler ; $ins DWORD [Rq(dst) + disp], imm as i8);
620            },
621            (Size::S64, Location::GPR(GPR::RCX), Location::GPR(dst)) => {
622                dynasm!($assembler ; $ins Rq(dst), cl);
623            },
624            (Size::S64, Location::GPR(GPR::RCX), Location::Memory(dst, disp)) => {
625                dynasm!($assembler ; $ins QWORD [Rq(dst) + disp], cl);
626            },
627            (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => {
628                dynasm!($assembler ; $ins Rq(dst), imm as i8);
629            },
630            (Size::S64, Location::Imm8(imm), Location::Memory(dst, disp)) => {
631                dynasm!($assembler ; $ins QWORD [Rq(dst) + disp], imm as i8);
632            },
633            _ => $otherwise
634        }
635    }
636}
637
638macro_rules! jmp_op {
639    ($ins:ident, $assembler:tt, $label:ident) => {
640        dynasm!($assembler ; $ins =>$label)
641    }
642}
643
644/// Move a single or double precision XMM value to another if src and destination
645/// are not the same.
646///
647/// TODO: Can we assume data is aligned and packed? If so, this function isn't necessary
648/// TODO: as we can use [`EmitterX64::emit_vmovaps`] and [`EmitterX64::emit_vmovadp`]
649/// TODO: instead
650fn move_src_to_dst(emitter: &mut AssemblerX64, precision: Precision, src: XMM, dst: XMM) {
651    if src == dst {
652        return;
653    }
654    match precision {
655        Precision::Single => match src {
656            XMM::XMM0 => dynasm!(emitter ; movss Rx(dst), xmm0),
657            XMM::XMM1 => dynasm!(emitter ; movss Rx(dst), xmm1),
658            XMM::XMM2 => dynasm!(emitter ; movss Rx(dst), xmm2),
659            XMM::XMM3 => dynasm!(emitter ; movss Rx(dst), xmm3),
660            XMM::XMM4 => dynasm!(emitter ; movss Rx(dst), xmm4),
661            XMM::XMM5 => dynasm!(emitter ; movss Rx(dst), xmm5),
662            XMM::XMM6 => dynasm!(emitter ; movss Rx(dst), xmm6),
663            XMM::XMM7 => dynasm!(emitter ; movss Rx(dst), xmm7),
664            XMM::XMM8 => dynasm!(emitter ; movss Rx(dst), xmm8),
665            XMM::XMM9 => dynasm!(emitter ; movss Rx(dst), xmm9),
666            XMM::XMM10 => dynasm!(emitter ; movss Rx(dst), xmm10),
667            XMM::XMM11 => dynasm!(emitter ; movss Rx(dst), xmm11),
668            XMM::XMM12 => dynasm!(emitter ; movss Rx(dst), xmm12),
669            XMM::XMM13 => dynasm!(emitter ; movss Rx(dst), xmm13),
670            XMM::XMM14 => dynasm!(emitter ; movss Rx(dst), xmm14),
671            XMM::XMM15 => dynasm!(emitter ; movss Rx(dst), xmm15),
672        },
673        Precision::Double => match src {
674            XMM::XMM0 => dynasm!(emitter ; movsd Rx(dst), xmm0),
675            XMM::XMM1 => dynasm!(emitter ; movsd Rx(dst), xmm1),
676            XMM::XMM2 => dynasm!(emitter ; movsd Rx(dst), xmm2),
677            XMM::XMM3 => dynasm!(emitter ; movsd Rx(dst), xmm3),
678            XMM::XMM4 => dynasm!(emitter ; movsd Rx(dst), xmm4),
679            XMM::XMM5 => dynasm!(emitter ; movsd Rx(dst), xmm5),
680            XMM::XMM6 => dynasm!(emitter ; movsd Rx(dst), xmm6),
681            XMM::XMM7 => dynasm!(emitter ; movsd Rx(dst), xmm7),
682            XMM::XMM8 => dynasm!(emitter ; movsd Rx(dst), xmm8),
683            XMM::XMM9 => dynasm!(emitter ; movsd Rx(dst), xmm9),
684            XMM::XMM10 => dynasm!(emitter ; movsd Rx(dst), xmm10),
685            XMM::XMM11 => dynasm!(emitter ; movsd Rx(dst), xmm11),
686            XMM::XMM12 => dynasm!(emitter ; movsd Rx(dst), xmm12),
687            XMM::XMM13 => dynasm!(emitter ; movsd Rx(dst), xmm13),
688            XMM::XMM14 => dynasm!(emitter ; movsd Rx(dst), xmm14),
689            XMM::XMM15 => dynasm!(emitter ; movsd Rx(dst), xmm15),
690        },
691    }
692}
693
694macro_rules! avx_fn {
695    ($ins:ident, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
696        // Dynasm bug: AVX instructions are not encoded correctly.
697        match $src2 {
698            XMMOrMemory::XMM(x) => match $src1 {
699                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, Rx(x)),
700                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, Rx(x)),
701                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, Rx(x)),
702                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, Rx(x)),
703                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, Rx(x)),
704                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, Rx(x)),
705                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, Rx(x)),
706                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, Rx(x)),
707                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, Rx(x)),
708                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, Rx(x)),
709                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, Rx(x)),
710                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, Rx(x)),
711                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, Rx(x)),
712                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, Rx(x)),
713                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, Rx(x)),
714                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, Rx(x)),
715            },
716            XMMOrMemory::Memory(base, disp) => match $src1 {
717                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, [Rq(base) + disp]),
718                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, [Rq(base) + disp]),
719                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, [Rq(base) + disp]),
720                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, [Rq(base) + disp]),
721                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, [Rq(base) + disp]),
722                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, [Rq(base) + disp]),
723                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, [Rq(base) + disp]),
724                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, [Rq(base) + disp]),
725                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, [Rq(base) + disp]),
726                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, [Rq(base) + disp]),
727                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, [Rq(base) + disp]),
728                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, [Rq(base) + disp]),
729                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, [Rq(base) + disp]),
730                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, [Rq(base) + disp]),
731                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, [Rq(base) + disp]),
732                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, [Rq(base) + disp]),
733            },
734        }
735    }
736}
737
738macro_rules! sse_fn {
739    ($ins:ident, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
740        match $src2 {
741            XMMOrMemory::XMM(x) => {
742                if x == $dst {
743                    dynasm!($emitter ; $ins Rx($dst), Rx($src1))
744                } else {
745                    move_src_to_dst($emitter, $precision, $src1, $dst);
746                    dynasm!($emitter ; $ins Rx($dst), Rx(x))
747                }
748            }
749            XMMOrMemory::Memory(base, disp) => {
750                move_src_to_dst($emitter, $precision, $src1, $dst);
751                dynasm!($emitter ; $ins Rx($dst), [Rq(base) + disp])
752            }
753        }
754    };
755    ($ins:ident, $mode:expr_2021, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
756        match $src2 {
757            XMMOrMemory::XMM(x) => {
758                move_src_to_dst($emitter, $precision, $src1, $dst);
759                dynasm!($emitter ; $ins Rx($dst), Rx(x), $mode)
760            }
761            XMMOrMemory::Memory(base, disp) => {
762                move_src_to_dst($emitter, $precision, $src1, $dst);
763                dynasm!($emitter ; $ins Rx($dst), [Rq(base) + disp], $mode)
764            }
765        }
766    };
767}
768
769macro_rules! avx_i2f_64_fn {
770    ($ins:ident, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
771        match $src2 {
772            GPROrMemory::GPR(x) => match $src1 {
773                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, Rq(x)),
774                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, Rq(x)),
775                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, Rq(x)),
776                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, Rq(x)),
777                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, Rq(x)),
778                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, Rq(x)),
779                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, Rq(x)),
780                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, Rq(x)),
781                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, Rq(x)),
782                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, Rq(x)),
783                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, Rq(x)),
784                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, Rq(x)),
785                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, Rq(x)),
786                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, Rq(x)),
787                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, Rq(x)),
788                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, Rq(x)),
789            },
790            GPROrMemory::Memory(base, disp) => match $src1 {
791                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, QWORD [Rq(base) + disp]),
792                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, QWORD [Rq(base) + disp]),
793                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, QWORD [Rq(base) + disp]),
794                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, QWORD [Rq(base) + disp]),
795                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, QWORD [Rq(base) + disp]),
796                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, QWORD [Rq(base) + disp]),
797                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, QWORD [Rq(base) + disp]),
798                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, QWORD [Rq(base) + disp]),
799                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, QWORD [Rq(base) + disp]),
800                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, QWORD [Rq(base) + disp]),
801                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, QWORD [Rq(base) + disp]),
802                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, QWORD [Rq(base) + disp]),
803                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, QWORD [Rq(base) + disp]),
804                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, QWORD [Rq(base) + disp]),
805                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, QWORD [Rq(base) + disp]),
806                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, QWORD [Rq(base) + disp]),
807            },
808        }
809    }
810}
811
812macro_rules! sse_i2f_64_fn {
813    ($ins:ident, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
814        match $src2 {
815            GPROrMemory::GPR(x) => {
816                move_src_to_dst($emitter, $precision, $src1, $dst);
817                dynasm!($emitter ; $ins Rx($dst), Rq(x))
818            },
819            GPROrMemory::Memory(base, disp) => {
820                move_src_to_dst($emitter, $precision, $src1, $dst);
821                dynasm!($emitter ; $ins Rx($dst), QWORD [Rq(base) + disp])
822            }
823        }
824    }
825}
826
827macro_rules! avx_i2f_32_fn {
828    ($ins:ident, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
829        match $src2 {
830            GPROrMemory::GPR(x) => match $src1 {
831                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, Rd(x)),
832                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, Rd(x)),
833                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, Rd(x)),
834                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, Rd(x)),
835                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, Rd(x)),
836                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, Rd(x)),
837                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, Rd(x)),
838                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, Rd(x)),
839                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, Rd(x)),
840                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, Rd(x)),
841                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, Rd(x)),
842                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, Rd(x)),
843                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, Rd(x)),
844                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, Rd(x)),
845                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, Rd(x)),
846                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, Rd(x)),
847            },
848            GPROrMemory::Memory(base, disp) => match $src1 {
849                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, DWORD [Rq(base) + disp]),
850                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, DWORD [Rq(base) + disp]),
851                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, DWORD [Rq(base) + disp]),
852                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, DWORD [Rq(base) + disp]),
853                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, DWORD [Rq(base) + disp]),
854                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, DWORD [Rq(base) + disp]),
855                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, DWORD [Rq(base) + disp]),
856                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, DWORD [Rq(base) + disp]),
857                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, DWORD [Rq(base) + disp]),
858                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, DWORD [Rq(base) + disp]),
859                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, DWORD [Rq(base) + disp]),
860                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, DWORD [Rq(base) + disp]),
861                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, DWORD [Rq(base) + disp]),
862                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, DWORD [Rq(base) + disp]),
863                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, DWORD [Rq(base) + disp]),
864                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, DWORD [Rq(base) + disp]),
865            },
866        }
867    }
868}
869
870macro_rules! sse_i2f_32_fn {
871    ($ins:ident, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
872        match $src2 {
873            GPROrMemory::GPR(x) => {
874                move_src_to_dst($emitter, $precision, $src1, $dst);
875                dynasm!($emitter; $ins Rx($src1), Rd(x))
876            },
877            GPROrMemory::Memory(base, disp) => {
878                move_src_to_dst($emitter, $precision, $src1, $dst);
879                dynasm!($emitter; $ins Rx($dst), DWORD [Rq(base) + disp])
880            }
881        }
882    }
883}
884
885macro_rules! avx_round_fn {
886    ($ins:ident, $mode:expr_2021, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
887        match $src2 {
888            XMMOrMemory::XMM(x) => dynasm!($emitter ; $ins Rx($dst), Rx($src1), Rx(x), $mode),
889            XMMOrMemory::Memory(base, disp) => dynasm!($emitter ; $ins Rx($dst), Rx($src1), [Rq(base) + disp], $mode),
890        }
891    }
892}
893
894macro_rules! sse_round_fn {
895    ($ins:ident, $mode:expr_2021, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
896        match $src2 {
897            XMMOrMemory::XMM(x) => {
898                if x != $dst {
899                    move_src_to_dst($emitter, $precision, $src1, $dst);
900                }
901                dynasm!($emitter ; $ins Rx(x), Rx($dst), $mode)
902            }
903            XMMOrMemory::Memory(base, disp) => {
904                dynasm!($emitter ; $ins Rx($dst), [Rq(base) + disp], $mode)
905            },
906        }
907    }
908}
909
910impl EmitterX64 for AssemblerX64 {
911    fn get_simd_arch(&self) -> Option<&CpuFeature> {
912        self.simd_arch.as_ref()
913    }
914
915    fn get_label(&mut self) -> DynamicLabel {
916        self.new_dynamic_label()
917    }
918
919    fn get_offset(&self) -> AssemblyOffset {
920        self.offset()
921    }
922
923    fn get_jmp_instr_size(&self) -> u8 {
924        5
925    }
926
927    fn finalize_function(&mut self) -> Result<(), CompileError> {
928        dynasm!(
929            self
930            ; const_neg_one_32:
931            ; .i32 -1
932            ; const_zero_32:
933            ; .i32  0
934            ; const_pos_one_32:
935            ; .i32 1
936        );
937        Ok(())
938    }
939
940    fn arch_has_xzcnt(&self) -> bool {
941        match &self.target {
942            Some(target) => {
943                target.cpu_features().contains(CpuFeature::LZCNT)
944                    && target.cpu_features().contains(CpuFeature::BMI1)
945            }
946            None => false,
947        }
948    }
949
950    fn arch_emit_lzcnt(
951        &mut self,
952        sz: Size,
953        src: Location,
954        dst: Location,
955    ) -> Result<(), CompileError> {
956        binop_gpr_gpr!(lzcnt, self, sz, src, dst, {
957            binop_mem_gpr!(lzcnt, self, sz, src, dst, {
958                codegen_error!("singlepass cannot emit lzcnt")
959            })
960        });
961        Ok(())
962    }
963
964    fn arch_emit_tzcnt(
965        &mut self,
966        sz: Size,
967        src: Location,
968        dst: Location,
969    ) -> Result<(), CompileError> {
970        binop_gpr_gpr!(tzcnt, self, sz, src, dst, {
971            binop_mem_gpr!(tzcnt, self, sz, src, dst, {
972                codegen_error!("singlepass cannot emit tzcnt")
973            })
974        });
975        Ok(())
976    }
977
978    fn emit_u64(&mut self, x: u64) -> Result<(), CompileError> {
979        self.push_u64(x);
980        Ok(())
981    }
982
983    fn emit_bytes(&mut self, bytes: &[u8]) -> Result<(), CompileError> {
984        for &b in bytes {
985            self.push(b);
986        }
987        Ok(())
988    }
989
990    fn emit_label(&mut self, label: Label) -> Result<(), CompileError> {
991        dynasm!(self ; => label);
992        Ok(())
993    }
994
995    fn emit_nop(&mut self) -> Result<(), CompileError> {
996        dynasm!(self ; nop);
997        Ok(())
998    }
999
1000    fn emit_nop_n(&mut self, mut n: usize) -> Result<(), CompileError> {
1001        /*
1002            1      90H                            NOP
1003            2      66 90H                         66 NOP
1004            3      0F 1F 00H                      NOP DWORD ptr [EAX]
1005            4      0F 1F 40 00H                   NOP DWORD ptr [EAX + 00H]
1006            5      0F 1F 44 00 00H                NOP DWORD ptr [EAX + EAX*1 + 00H]
1007            6      66 0F 1F 44 00 00H             NOP DWORD ptr [AX + AX*1 + 00H]
1008            7      0F 1F 80 00 00 00 00H          NOP DWORD ptr [EAX + 00000000H]
1009            8      0F 1F 84 00 00 00 00 00H       NOP DWORD ptr [AX + AX*1 + 00000000H]
1010            9      66 0F 1F 84 00 00 00 00 00H    NOP DWORD ptr [AX + AX*1 + 00000000H]
1011        */
1012        while n >= 9 {
1013            n -= 9;
1014            self.emit_bytes(&[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00])?;
1015            // 9-byte nop
1016        }
1017        let seq: &[u8] = match n {
1018            0 => &[],
1019            1 => &[0x90],
1020            2 => &[0x66, 0x90],
1021            3 => &[0x0f, 0x1f, 0x00],
1022            4 => &[0x0f, 0x1f, 0x40, 0x00],
1023            5 => &[0x0f, 0x1f, 0x44, 0x00, 0x00],
1024            6 => &[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00],
1025            7 => &[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00],
1026            8 => &[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00],
1027            _ => codegen_error!("singlepass emit_nop_n unreachable"),
1028        };
1029        self.emit_bytes(seq)
1030    }
1031
1032    fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1033        // fast path
1034        if let (Location::Imm32(0), Location::GPR(x)) = (src, dst) {
1035            dynasm!(self ; xor Rd(x), Rd(x));
1036            return Ok(());
1037        }
1038
1039        binop_all_nofp!(mov, self, sz, src, dst, {
1040            binop_imm64_gpr!(mov, self, sz, src, dst, {
1041                match (sz, src, dst) {
1042                    (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1043                        dynasm!(self ; mov [Rq(dst) + disp], Rb(src));
1044                    }
1045                    (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => {
1046                        dynasm!(self ; mov Rb(dst), [Rq(src) + disp]);
1047                    }
1048                    (Size::S8, Location::Imm32(src), Location::GPR(dst)) => {
1049                        dynasm!(self ; mov Rb(dst), src as i8);
1050                    }
1051                    (Size::S8, Location::Imm64(src), Location::GPR(dst)) => {
1052                        dynasm!(self ; mov Rb(dst), src as i8);
1053                    }
1054                    (Size::S8, Location::Imm32(src), Location::Memory(dst, disp)) => {
1055                        dynasm!(self ; mov BYTE [Rq(dst) + disp], src as i8);
1056                    }
1057                    (Size::S8, Location::Imm64(src), Location::Memory(dst, disp)) => {
1058                        dynasm!(self ; mov BYTE [Rq(dst) + disp], src as i8);
1059                    }
1060                    (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1061                        dynasm!(self ; mov [Rq(dst) + disp], Rw(src));
1062                    }
1063                    (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => {
1064                        dynasm!(self ; mov Rw(dst), [Rq(src) + disp]);
1065                    }
1066                    (Size::S16, Location::Imm32(src), Location::GPR(dst)) => {
1067                        dynasm!(self ; mov Rw(dst), src as i16);
1068                    }
1069                    (Size::S16, Location::Imm64(src), Location::GPR(dst)) => {
1070                        dynasm!(self ; mov Rw(dst), src as i16);
1071                    }
1072                    (Size::S16, Location::Imm32(src), Location::Memory(dst, disp)) => {
1073                        dynasm!(self ; mov WORD [Rq(dst) + disp], src as i16);
1074                    }
1075                    (Size::S16, Location::Imm64(src), Location::Memory(dst, disp)) => {
1076                        dynasm!(self ; mov WORD [Rq(dst) + disp], src as i16);
1077                    }
1078                    (Size::S32, Location::Imm64(src), Location::GPR(dst)) => {
1079                        dynasm!(self ; mov Rd(dst), src as i32);
1080                    }
1081                    (Size::S32, Location::Imm64(src), Location::Memory(dst, disp)) => {
1082                        dynasm!(self ; mov DWORD [Rq(dst) + disp], src as i32);
1083                    }
1084                    (Size::S32, Location::GPR(src), Location::SIMD(dst)) => {
1085                        dynasm!(self ; movd Rx(dst), Rd(src));
1086                    }
1087                    (Size::S32, Location::SIMD(src), Location::GPR(dst)) => {
1088                        dynasm!(self ; movd Rd(dst), Rx(src));
1089                    }
1090                    (Size::S32, Location::Memory(src, disp), Location::SIMD(dst)) => {
1091                        dynasm!(self ; movd Rx(dst), [Rq(src) + disp]);
1092                    }
1093                    (Size::S32, Location::SIMD(src), Location::Memory(dst, disp)) => {
1094                        dynasm!(self ; movd [Rq(dst) + disp], Rx(src));
1095                    }
1096                    (Size::S64, Location::Imm64(src), Location::GPR(dst)) => {
1097                        dynasm!(self ; mov Rd(dst), src as i32);
1098                    }
1099                    (Size::S64, Location::Imm32(src), Location::GPR(dst)) => {
1100                        dynasm!(self ; mov Rd(dst), src as i32);
1101                    }
1102                    (Size::S64, Location::Imm8(src), Location::GPR(dst)) => {
1103                        dynasm!(self ; mov Rd(dst), src as i32);
1104                    }
1105
1106                    (Size::S64, Location::GPR(src), Location::SIMD(dst)) => {
1107                        dynasm!(self ; movq Rx(dst), Rq(src));
1108                    }
1109                    (Size::S64, Location::SIMD(src), Location::GPR(dst)) => {
1110                        dynasm!(self ; movq Rq(dst), Rx(src));
1111                    }
1112                    (Size::S64, Location::Memory(src, disp), Location::SIMD(dst)) => {
1113                        dynasm!(self ; movq Rx(dst), [Rq(src) + disp]);
1114                    }
1115                    (Size::S64, Location::SIMD(src), Location::Memory(dst, disp)) => {
1116                        dynasm!(self ; movq [Rq(dst) + disp], Rx(src));
1117                    }
1118                    (_, Location::SIMD(src), Location::SIMD(dst)) => {
1119                        dynasm!(self ; movq Rx(dst), Rx(src));
1120                    }
1121
1122                    _ => codegen_error!("singlepass can't emit MOV {:?} {:?} {:?}", sz, src, dst),
1123                }
1124            })
1125        });
1126        Ok(())
1127    }
1128    fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1129        match (sz, src, dst) {
1130            (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
1131                dynasm!(self ; lea Rd(dst), [Rq(src) + disp]);
1132            }
1133            (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
1134                dynasm!(self ; lea Rq(dst), [Rq(src) + disp]);
1135            }
1136            (Size::S32, Location::Memory2(src1, src2, mult, disp), Location::GPR(dst)) => {
1137                match mult {
1138                    Multiplier::Zero => dynasm!(self ; lea Rd(dst), [Rq(src1) + disp]),
1139                    Multiplier::One => {
1140                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) + disp])
1141                    }
1142                    Multiplier::Two => {
1143                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) * 2 + disp])
1144                    }
1145                    Multiplier::Four => {
1146                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) * 4 + disp])
1147                    }
1148                    Multiplier::Height => {
1149                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) * 8 + disp])
1150                    }
1151                };
1152            }
1153            (Size::S64, Location::Memory2(src1, src2, mult, disp), Location::GPR(dst)) => {
1154                match mult {
1155                    Multiplier::Zero => dynasm!(self ; lea Rq(dst), [Rq(src1) + disp]),
1156                    Multiplier::One => {
1157                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) + disp])
1158                    }
1159                    Multiplier::Two => {
1160                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) * 2 + disp])
1161                    }
1162                    Multiplier::Four => {
1163                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) * 4 + disp])
1164                    }
1165                    Multiplier::Height => {
1166                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) * 8 + disp])
1167                    }
1168                };
1169            }
1170            _ => codegen_error!("singlepass can't emit LEA {:?} {:?} {:?}", sz, src, dst),
1171        }
1172        Ok(())
1173    }
1174    fn emit_lea_label(&mut self, label: Label, dst: Location) -> Result<(), CompileError> {
1175        match dst {
1176            Location::GPR(x) => {
1177                dynasm!(self ; lea Rq(x), [=>label]);
1178            }
1179            _ => codegen_error!("singlepass can't emit LEA label={:?} {:?}", label, dst),
1180        }
1181        Ok(())
1182    }
1183    fn emit_cdq(&mut self) -> Result<(), CompileError> {
1184        dynasm!(self ; cdq);
1185        Ok(())
1186    }
1187    fn emit_cqo(&mut self) -> Result<(), CompileError> {
1188        dynasm!(self ; cqo);
1189        Ok(())
1190    }
1191    fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1192        binop_all_nofp!(xor, self, sz, src, dst, {
1193            codegen_error!("singlepass can't emit XOR {:?} {:?} {:?}", sz, src, dst)
1194        });
1195        Ok(())
1196    }
1197    fn emit_jmp(&mut self, condition: Condition, label: Label) -> Result<(), CompileError> {
1198        match condition {
1199            Condition::None => jmp_op!(jmp, self, label),
1200            Condition::Above => jmp_op!(ja, self, label),
1201            Condition::AboveEqual => jmp_op!(jae, self, label),
1202            Condition::Below => jmp_op!(jb, self, label),
1203            Condition::BelowEqual => jmp_op!(jbe, self, label),
1204            Condition::Greater => jmp_op!(jg, self, label),
1205            Condition::GreaterEqual => jmp_op!(jge, self, label),
1206            Condition::Less => jmp_op!(jl, self, label),
1207            Condition::LessEqual => jmp_op!(jle, self, label),
1208            Condition::Equal => jmp_op!(je, self, label),
1209            Condition::NotEqual => jmp_op!(jne, self, label),
1210            Condition::Signed => jmp_op!(js, self, label),
1211            Condition::Carry => jmp_op!(jc, self, label),
1212        }
1213        Ok(())
1214    }
1215    fn emit_jmp_location(&mut self, loc: Location) -> Result<(), CompileError> {
1216        match loc {
1217            Location::GPR(x) => dynasm!(self ; jmp Rq(x)),
1218            Location::Memory(base, disp) => dynasm!(self ; jmp QWORD [Rq(base) + disp]),
1219            _ => codegen_error!("singlepass can't emit JMP {:?}", loc),
1220        }
1221        Ok(())
1222    }
1223    fn emit_set(&mut self, condition: Condition, dst: GPR) -> Result<(), CompileError> {
1224        match condition {
1225            Condition::Above => dynasm!(self ; seta Rb(dst)),
1226            Condition::AboveEqual => dynasm!(self ; setae Rb(dst)),
1227            Condition::Below => dynasm!(self ; setb Rb(dst)),
1228            Condition::BelowEqual => dynasm!(self ; setbe Rb(dst)),
1229            Condition::Greater => dynasm!(self ; setg Rb(dst)),
1230            Condition::GreaterEqual => dynasm!(self ; setge Rb(dst)),
1231            Condition::Less => dynasm!(self ; setl Rb(dst)),
1232            Condition::LessEqual => dynasm!(self ; setle Rb(dst)),
1233            Condition::Equal => dynasm!(self ; sete Rb(dst)),
1234            Condition::NotEqual => dynasm!(self ; setne Rb(dst)),
1235            Condition::Signed => dynasm!(self ; sets Rb(dst)),
1236            Condition::Carry => dynasm!(self ; setc Rb(dst)),
1237            _ => codegen_error!("singlepass can't emit SET {:?} {:?}", condition, dst),
1238        }
1239        Ok(())
1240    }
1241    fn emit_push(&mut self, sz: Size, src: Location) -> Result<(), CompileError> {
1242        match (sz, src) {
1243            (Size::S64, Location::Imm32(src)) => dynasm!(self ; push src as i32),
1244            (Size::S64, Location::GPR(src)) => dynasm!(self ; push Rq(src)),
1245            (Size::S64, Location::Memory(src, disp)) => {
1246                dynasm!(self ; push QWORD [Rq(src) + disp])
1247            }
1248            _ => codegen_error!("singlepass can't emit PUSH {:?} {:?}", sz, src),
1249        }
1250        Ok(())
1251    }
1252    fn emit_pop(&mut self, sz: Size, dst: Location) -> Result<(), CompileError> {
1253        match (sz, dst) {
1254            (Size::S64, Location::GPR(dst)) => dynasm!(self ; pop Rq(dst)),
1255            (Size::S64, Location::Memory(dst, disp)) => {
1256                dynasm!(self ; pop QWORD [Rq(dst) + disp])
1257            }
1258            _ => codegen_error!("singlepass can't emit POP {:?} {:?}", sz, dst),
1259        }
1260        Ok(())
1261    }
1262    fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) -> Result<(), CompileError> {
1263        // Constant elimination for comparison between consts.
1264        //
1265        // Only needed for `emit_cmp`, since other binary operators actually write to `right` and `right` must
1266        // be a writable location for them.
1267        let consts = match (left, right) {
1268            (Location::Imm32(x), Location::Imm32(y)) => Some((x as i32 as i64, y as i32 as i64)),
1269            (Location::Imm32(x), Location::Imm64(y)) => Some((x as i32 as i64, y as i64)),
1270            (Location::Imm64(x), Location::Imm32(y)) => Some((x as i64, y as i32 as i64)),
1271            (Location::Imm64(x), Location::Imm64(y)) => Some((x as i64, y as i64)),
1272            _ => None,
1273        };
1274        use std::cmp::Ordering;
1275        match consts {
1276            Some((x, y)) => match x.cmp(&y) {
1277                Ordering::Less => dynasm!(self ; cmp DWORD [>const_neg_one_32], 0),
1278                Ordering::Equal => dynasm!(self ; cmp DWORD [>const_zero_32], 0),
1279                Ordering::Greater => dynasm!(self ; cmp DWORD [>const_pos_one_32], 0),
1280            },
1281            None => binop_all_nofp!(cmp, self, sz, left, right, {
1282                codegen_error!("singlepass can't emit CMP {:?} {:?} {:?}", sz, left, right);
1283            }),
1284        }
1285        Ok(())
1286    }
1287    fn emit_add(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1288        // Fast path
1289        if let Location::Imm32(0) = src {
1290            return Ok(());
1291        }
1292        binop_all_nofp!(add, self, sz, src, dst, {
1293            codegen_error!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst)
1294        });
1295        Ok(())
1296    }
1297    fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1298        // Fast path
1299        if let Location::Imm32(0) = src {
1300            return Ok(());
1301        }
1302        binop_all_nofp!(sub, self, sz, src, dst, {
1303            codegen_error!("singlepass can't emit SUB {:?} {:?} {:?}", sz, src, dst)
1304        });
1305        Ok(())
1306    }
1307    fn emit_neg(&mut self, sz: Size, value: Location) -> Result<(), CompileError> {
1308        match (sz, value) {
1309            (Size::S8, Location::GPR(value)) => dynasm!(self ; neg Rb(value)),
1310            (Size::S8, Location::Memory(value, disp)) => {
1311                dynasm!(self ; neg [Rq(value) + disp])
1312            }
1313            (Size::S16, Location::GPR(value)) => dynasm!(self ; neg Rw(value)),
1314            (Size::S16, Location::Memory(value, disp)) => {
1315                dynasm!(self ; neg [Rq(value) + disp])
1316            }
1317            (Size::S32, Location::GPR(value)) => dynasm!(self ; neg Rd(value)),
1318            (Size::S32, Location::Memory(value, disp)) => {
1319                dynasm!(self ; neg [Rq(value) + disp])
1320            }
1321            (Size::S64, Location::GPR(value)) => dynasm!(self ; neg Rq(value)),
1322            (Size::S64, Location::Memory(value, disp)) => {
1323                dynasm!(self ; neg [Rq(value) + disp])
1324            }
1325            _ => codegen_error!("singlepass can't emit NEG {:?} {:?}", sz, value),
1326        }
1327        Ok(())
1328    }
1329    fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1330        binop_gpr_gpr!(imul, self, sz, src, dst, {
1331            binop_mem_gpr!(imul, self, sz, src, dst, {
1332                codegen_error!("singlepass can't emit IMUL {:?} {:?} {:?}", sz, src, dst)
1333            })
1334        });
1335        Ok(())
1336    }
1337    fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) -> Result<(), CompileError> {
1338        dynasm!(self ; imul Rq(dst), Rq(dst), src as i32);
1339        Ok(())
1340    }
1341    fn emit_div(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError> {
1342        unop_gpr_or_mem!(div, self, sz, divisor, {
1343            codegen_error!("singlepass can't emit DIV {:?} {:?}", sz, divisor)
1344        });
1345        Ok(())
1346    }
1347    fn emit_idiv(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError> {
1348        unop_gpr_or_mem!(idiv, self, sz, divisor, {
1349            codegen_error!("singlepass can't emit IDIV {:?} {:?}", sz, divisor)
1350        });
1351        Ok(())
1352    }
1353    fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1354        binop_shift!(shl, self, sz, src, dst, {
1355            codegen_error!("singlepass can't emit SHL {:?} {:?} {:?}", sz, src, dst)
1356        });
1357        Ok(())
1358    }
1359    fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1360        binop_shift!(shr, self, sz, src, dst, {
1361            codegen_error!("singlepass can't emit SHR {:?} {:?} {:?}", sz, src, dst)
1362        });
1363        Ok(())
1364    }
1365    fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1366        binop_shift!(sar, self, sz, src, dst, {
1367            codegen_error!("singlepass can't emit SAR {:?} {:?} {:?}", sz, src, dst)
1368        });
1369        Ok(())
1370    }
1371    fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1372        binop_shift!(rol, self, sz, src, dst, {
1373            codegen_error!("singlepass can't emit ROL {:?} {:?} {:?}", sz, src, dst)
1374        });
1375        Ok(())
1376    }
1377    fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1378        binop_shift!(ror, self, sz, src, dst, {
1379            codegen_error!("singlepass can't emit ROR {:?} {:?} {:?}", sz, src, dst)
1380        });
1381        Ok(())
1382    }
1383    fn emit_and(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1384        binop_all_nofp!(and, self, sz, src, dst, {
1385            codegen_error!("singlepass can't emit AND {:?} {:?} {:?}", sz, src, dst)
1386        });
1387        Ok(())
1388    }
1389    fn emit_test(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1390        binop_all_nofp!(test, self, sz, src, dst, {
1391            codegen_error!("singlepass can't emit TEST {:?} {:?} {:?}", sz, src, dst)
1392        });
1393        Ok(())
1394    }
1395    fn emit_or(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1396        binop_all_nofp!(or, self, sz, src, dst, {
1397            codegen_error!("singlepass can't emit OR {:?} {:?} {:?}", sz, src, dst)
1398        });
1399        Ok(())
1400    }
1401    fn emit_bsr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1402        binop_gpr_gpr!(bsr, self, sz, src, dst, {
1403            binop_mem_gpr!(bsr, self, sz, src, dst, {
1404                codegen_error!("singlepass can't emit BSR {:?} {:?} {:?}", sz, src, dst)
1405            })
1406        });
1407        Ok(())
1408    }
1409    fn emit_bsf(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1410        binop_gpr_gpr!(bsf, self, sz, src, dst, {
1411            binop_mem_gpr!(bsf, self, sz, src, dst, {
1412                codegen_error!("singlepass can't emit BSF {:?} {:?} {:?}", sz, src, dst)
1413            })
1414        });
1415        Ok(())
1416    }
1417    fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1418        binop_gpr_gpr!(popcnt, self, sz, src, dst, {
1419            binop_mem_gpr!(popcnt, self, sz, src, dst, {
1420                codegen_error!("singlepass can't emit POPCNT {:?} {:?} {:?}", sz, src, dst)
1421            })
1422        });
1423        Ok(())
1424    }
1425    fn emit_movzx(
1426        &mut self,
1427        sz_src: Size,
1428        src: Location,
1429        sz_dst: Size,
1430        dst: Location,
1431    ) -> Result<(), CompileError> {
1432        match (sz_src, src, sz_dst, dst) {
1433            (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1434                dynasm!(self ; movzx Rd(dst), Rb(src));
1435            }
1436            (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1437                dynasm!(self ; movzx Rd(dst), Rw(src));
1438            }
1439            (Size::S8, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1440                dynasm!(self ; movzx Rd(dst), BYTE [Rq(src) + disp]);
1441            }
1442            (Size::S16, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1443                dynasm!(self ; movzx Rd(dst), WORD [Rq(src) + disp]);
1444            }
1445            (Size::S16, Location::Imm32(imm), Size::S32, Location::GPR(dst)) => {
1446                dynasm!(self ; mov Rd(dst), imm as i32);
1447            }
1448            (Size::S8, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1449                dynasm!(self ; movzx Rq(dst), Rb(src));
1450            }
1451            (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1452                dynasm!(self ; movzx Rq(dst), Rw(src));
1453            }
1454            (Size::S8, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1455                dynasm!(self ; movzx Rq(dst), BYTE [Rq(src) + disp]);
1456            }
1457            (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1458                dynasm!(self ; movzx Rq(dst), WORD [Rq(src) + disp]);
1459            }
1460            (Size::S32, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1461                if src != dst {
1462                    dynasm!(self ; mov Rd(dst), Rd(src));
1463                }
1464            }
1465            (Size::S32, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1466                dynasm!(self ; mov Rd(dst), DWORD [Rq(src) + disp]);
1467            }
1468            (Size::S8, Location::Imm32(imm), Size::S32, Location::GPR(dst)) => {
1469                dynasm!(self ; mov Rq(dst), imm as i32);
1470            }
1471            (Size::S16, Location::Imm32(imm), Size::S64, Location::GPR(dst)) => {
1472                dynasm!(self ; mov Rq(dst), imm as i32);
1473            }
1474            (Size::S32, Location::Imm32(imm), Size::S64, Location::GPR(dst)) => {
1475                dynasm!(self ; mov Rq(dst), imm as i32);
1476            }
1477            (Size::S8, Location::Imm64(imm), Size::S32, Location::GPR(dst)) => {
1478                dynasm!(self ; mov Rq(dst), imm as i32);
1479            }
1480            (Size::S16, Location::Imm64(imm), Size::S64, Location::GPR(dst)) => {
1481                dynasm!(self ; mov Rq(dst), imm as i32);
1482            }
1483            (Size::S32, Location::Imm64(imm), Size::S64, Location::GPR(dst)) => {
1484                dynasm!(self ; mov Rq(dst), imm as i32);
1485            }
1486            _ => {
1487                codegen_error!(
1488                    "singlepass can't emit MOVZX {:?} {:?} {:?} {:?}",
1489                    sz_src,
1490                    src,
1491                    sz_dst,
1492                    dst
1493                )
1494            }
1495        }
1496        Ok(())
1497    }
1498    fn emit_movsx(
1499        &mut self,
1500        sz_src: Size,
1501        src: Location,
1502        sz_dst: Size,
1503        dst: Location,
1504    ) -> Result<(), CompileError> {
1505        match (sz_src, src, sz_dst, dst) {
1506            (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1507                dynasm!(self ; movsx Rd(dst), Rb(src));
1508            }
1509            (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1510                dynasm!(self ; movsx Rd(dst), Rw(src));
1511            }
1512            (Size::S8, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1513                dynasm!(self ; movsx Rd(dst), BYTE [Rq(src) + disp]);
1514            }
1515            (Size::S16, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1516                dynasm!(self ; movsx Rd(dst), WORD [Rq(src) + disp]);
1517            }
1518            (Size::S8, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1519                dynasm!(self ; movsx Rq(dst), Rb(src));
1520            }
1521            (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1522                dynasm!(self ; movsx Rq(dst), Rw(src));
1523            }
1524            (Size::S32, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1525                dynasm!(self ; movsx Rq(dst), Rd(src));
1526            }
1527            (Size::S8, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1528                dynasm!(self ; movsx Rq(dst), BYTE [Rq(src) + disp]);
1529            }
1530            (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1531                dynasm!(self ; movsx Rq(dst), WORD [Rq(src) + disp]);
1532            }
1533            (Size::S32, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1534                dynasm!(self ; movsx Rq(dst), DWORD [Rq(src) + disp]);
1535            }
1536            _ => {
1537                codegen_error!(
1538                    "singlepass can't emit MOVSX {:?} {:?} {:?} {:?}",
1539                    sz_src,
1540                    src,
1541                    sz_dst,
1542                    dst
1543                )
1544            }
1545        }
1546        Ok(())
1547    }
1548
1549    fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1550        match (sz, src, dst) {
1551            (Size::S8, Location::GPR(src), Location::GPR(dst)) => {
1552                dynasm!(self ; xchg Rb(dst), Rb(src));
1553            }
1554            (Size::S16, Location::GPR(src), Location::GPR(dst)) => {
1555                dynasm!(self ; xchg Rw(dst), Rw(src));
1556            }
1557            (Size::S32, Location::GPR(src), Location::GPR(dst)) => {
1558                dynasm!(self ; xchg Rd(dst), Rd(src));
1559            }
1560            (Size::S64, Location::GPR(src), Location::GPR(dst)) => {
1561                dynasm!(self ; xchg Rq(dst), Rq(src));
1562            }
1563            (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => {
1564                dynasm!(self ; xchg Rb(dst), [Rq(src) + disp]);
1565            }
1566            (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1567                dynasm!(self ; xchg [Rq(dst) + disp], Rb(src));
1568            }
1569            (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => {
1570                dynasm!(self ; xchg Rw(dst), [Rq(src) + disp]);
1571            }
1572            (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1573                dynasm!(self ; xchg [Rq(dst) + disp], Rw(src));
1574            }
1575            (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
1576                dynasm!(self ; xchg Rd(dst), [Rq(src) + disp]);
1577            }
1578            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
1579                dynasm!(self ; xchg [Rq(dst) + disp], Rd(src));
1580            }
1581            (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
1582                dynasm!(self ; xchg Rq(dst), [Rq(src) + disp]);
1583            }
1584            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
1585                dynasm!(self ; xchg [Rq(dst) + disp], Rq(src));
1586            }
1587            _ => codegen_error!("singlepass can't emit XCHG {:?} {:?} {:?}", sz, src, dst),
1588        }
1589        Ok(())
1590    }
1591
1592    fn emit_lock_xadd(
1593        &mut self,
1594        sz: Size,
1595        src: Location,
1596        dst: Location,
1597    ) -> Result<(), CompileError> {
1598        match (sz, src, dst) {
1599            (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1600                dynasm!(self ; lock xadd [Rq(dst) + disp], Rb(src));
1601            }
1602            (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1603                dynasm!(self ; lock xadd [Rq(dst) + disp], Rw(src));
1604            }
1605            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
1606                dynasm!(self ; lock xadd [Rq(dst) + disp], Rd(src));
1607            }
1608            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
1609                dynasm!(self ; lock xadd [Rq(dst) + disp], Rq(src));
1610            }
1611            _ => codegen_error!(
1612                "singlepass can't emit LOCK XADD {:?} {:?} {:?}",
1613                sz,
1614                src,
1615                dst
1616            ),
1617        }
1618        Ok(())
1619    }
1620
1621    fn emit_lock_cmpxchg(
1622        &mut self,
1623        sz: Size,
1624        src: Location,
1625        dst: Location,
1626    ) -> Result<(), CompileError> {
1627        match (sz, src, dst) {
1628            (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1629                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rb(src));
1630            }
1631            (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1632                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rw(src));
1633            }
1634            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
1635                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rd(src));
1636            }
1637            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
1638                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rq(src));
1639            }
1640            _ => codegen_error!(
1641                "singlepass can't emit LOCK CMPXCHG {:?} {:?} {:?}",
1642                sz,
1643                src,
1644                dst
1645            ),
1646        }
1647        Ok(())
1648    }
1649
1650    fn emit_rep_stosq(&mut self) -> Result<(), CompileError> {
1651        dynasm!(self ; rep stosq);
1652        Ok(())
1653    }
1654    fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) -> Result<(), CompileError> {
1655        dynasm!(self ; btc Rd(dst), BYTE src as i8);
1656        Ok(())
1657    }
1658
1659    fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) -> Result<(), CompileError> {
1660        dynasm!(self ; btc Rq(dst), BYTE src as i8);
1661        Ok(())
1662    }
1663
1664    fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError> {
1665        dynasm!(self ; cmovae Rd(dst), Rd(src));
1666        Ok(())
1667    }
1668
1669    fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError> {
1670        dynasm!(self ; cmovae Rq(dst), Rq(src));
1671        Ok(())
1672    }
1673
1674    fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError> {
1675        match (src, dst) {
1676            (XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => {
1677                dynasm!(self ; movaps Rx(dst), Rx(src))
1678            }
1679            (XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => {
1680                dynasm!(self ; movaps Rx(dst), [Rq(base) + disp])
1681            }
1682            (XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => {
1683                dynasm!(self ; movaps [Rq(base) + disp], Rx(src))
1684            }
1685            _ => codegen_error!("singlepass can't emit VMOVAPS {:?} {:?}", src, dst),
1686        };
1687        Ok(())
1688    }
1689
1690    fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError> {
1691        match (src, dst) {
1692            (XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => {
1693                dynasm!(self ; movapd Rx(dst), Rx(src))
1694            }
1695            (XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => {
1696                dynasm!(self ; movapd Rx(dst), [Rq(base) + disp])
1697            }
1698            (XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => {
1699                dynasm!(self ; movapd [Rq(base) + disp], Rx(src))
1700            }
1701            _ => codegen_error!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst),
1702        };
1703        Ok(())
1704    }
1705    fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1706        match self.get_simd_arch() {
1707            Some(CpuFeature::AVX) => avx_fn!(vxorps, self, src1, src2, dst),
1708            Some(CpuFeature::SSE42) => sse_fn!(xorps, self, Precision::Single, src1, src2, dst),
1709            _ => {}
1710        }
1711        Ok(())
1712    }
1713    fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1714        match self.get_simd_arch() {
1715            Some(CpuFeature::AVX) => avx_fn!(vxorpd, self, src1, src2, dst),
1716            Some(CpuFeature::SSE42) => sse_fn!(xorpd, self, Precision::Double, src1, src2, dst),
1717            _ => {}
1718        }
1719        Ok(())
1720    }
1721    fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1722        match self.get_simd_arch() {
1723            Some(CpuFeature::AVX) => avx_fn!(vaddss, self, src1, src2, dst),
1724            Some(CpuFeature::SSE42) => sse_fn!(addss, self, Precision::Single, src1, src2, dst),
1725            _ => {}
1726        }
1727        Ok(())
1728    }
1729    fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1730        match self.get_simd_arch() {
1731            Some(CpuFeature::AVX) => avx_fn!(vaddsd, self, src1, src2, dst),
1732            Some(CpuFeature::SSE42) => sse_fn!(addsd, self, Precision::Double, src1, src2, dst),
1733            _ => {}
1734        }
1735        Ok(())
1736    }
1737    fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1738        match self.get_simd_arch() {
1739            Some(CpuFeature::AVX) => avx_fn!(vsubss, self, src1, src2, dst),
1740            Some(CpuFeature::SSE42) => sse_fn!(subss, self, Precision::Single, src1, src2, dst),
1741            _ => {}
1742        }
1743        Ok(())
1744    }
1745    fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1746        match self.get_simd_arch() {
1747            Some(CpuFeature::AVX) => avx_fn!(vsubsd, self, src1, src2, dst),
1748            Some(CpuFeature::SSE42) => sse_fn!(subsd, self, Precision::Double, src1, src2, dst),
1749            _ => {}
1750        }
1751        Ok(())
1752    }
1753    fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1754        match self.get_simd_arch() {
1755            Some(CpuFeature::AVX) => avx_fn!(vmulss, self, src1, src2, dst),
1756            Some(CpuFeature::SSE42) => sse_fn!(mulss, self, Precision::Single, src1, src2, dst),
1757            _ => {}
1758        }
1759        Ok(())
1760    }
1761    fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1762        match self.get_simd_arch() {
1763            Some(CpuFeature::AVX) => avx_fn!(vmulsd, self, src1, src2, dst),
1764            Some(CpuFeature::SSE42) => sse_fn!(mulsd, self, Precision::Double, src1, src2, dst),
1765            _ => {}
1766        }
1767        Ok(())
1768    }
1769    fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1770        match self.get_simd_arch() {
1771            Some(CpuFeature::AVX) => avx_fn!(vdivss, self, src1, src2, dst),
1772            Some(CpuFeature::SSE42) => sse_fn!(divss, self, Precision::Single, src1, src2, dst),
1773            _ => {}
1774        }
1775        Ok(())
1776    }
1777    fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1778        match self.get_simd_arch() {
1779            Some(CpuFeature::AVX) => avx_fn!(vdivsd, self, src1, src2, dst),
1780            Some(CpuFeature::SSE42) => sse_fn!(divsd, self, Precision::Double, src1, src2, dst),
1781            _ => {}
1782        }
1783        Ok(())
1784    }
1785    fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1786        match self.get_simd_arch() {
1787            Some(CpuFeature::AVX) => avx_fn!(vmaxss, self, src1, src2, dst),
1788            Some(CpuFeature::SSE42) => sse_fn!(maxss, self, Precision::Single, src1, src2, dst),
1789            _ => {}
1790        }
1791        Ok(())
1792    }
1793    fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1794        match self.get_simd_arch() {
1795            Some(CpuFeature::AVX) => avx_fn!(vmaxsd, self, src1, src2, dst),
1796            Some(CpuFeature::SSE42) => sse_fn!(maxsd, self, Precision::Double, src1, src2, dst),
1797            _ => {}
1798        }
1799        Ok(())
1800    }
1801    fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1802        match self.get_simd_arch() {
1803            Some(CpuFeature::AVX) => avx_fn!(vminss, self, src1, src2, dst),
1804            Some(CpuFeature::SSE42) => sse_fn!(minss, self, Precision::Single, src1, src2, dst),
1805            _ => {}
1806        }
1807        Ok(())
1808    }
1809    fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1810        match self.get_simd_arch() {
1811            Some(CpuFeature::AVX) => avx_fn!(vminsd, self, src1, src2, dst),
1812            Some(CpuFeature::SSE42) => sse_fn!(minsd, self, Precision::Double, src1, src2, dst),
1813            _ => {}
1814        }
1815        Ok(())
1816    }
1817    fn emit_vcmpeqss(
1818        &mut self,
1819        src1: XMM,
1820        src2: XMMOrMemory,
1821        dst: XMM,
1822    ) -> Result<(), CompileError> {
1823        match self.get_simd_arch() {
1824            Some(CpuFeature::AVX) => avx_fn!(vcmpeqss, self, src1, src2, dst),
1825            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 0, self, Precision::Single, src1, src2, dst),
1826            _ => {}
1827        }
1828        Ok(())
1829    }
1830    fn emit_vcmpeqsd(
1831        &mut self,
1832        src1: XMM,
1833        src2: XMMOrMemory,
1834        dst: XMM,
1835    ) -> Result<(), CompileError> {
1836        match self.get_simd_arch() {
1837            Some(CpuFeature::AVX) => avx_fn!(vcmpeqsd, self, src1, src2, dst),
1838            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 0, self, Precision::Double, src1, src2, dst),
1839            _ => {}
1840        }
1841        Ok(())
1842    }
1843    fn emit_vcmpneqss(
1844        &mut self,
1845        src1: XMM,
1846        src2: XMMOrMemory,
1847        dst: XMM,
1848    ) -> Result<(), CompileError> {
1849        match self.get_simd_arch() {
1850            Some(CpuFeature::AVX) => avx_fn!(vcmpneqss, self, src1, src2, dst),
1851            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 4, self, Precision::Single, src1, src2, dst),
1852            _ => {}
1853        }
1854        Ok(())
1855    }
1856    fn emit_vcmpneqsd(
1857        &mut self,
1858        src1: XMM,
1859        src2: XMMOrMemory,
1860        dst: XMM,
1861    ) -> Result<(), CompileError> {
1862        match self.get_simd_arch() {
1863            Some(CpuFeature::AVX) => avx_fn!(vcmpneqsd, self, src1, src2, dst),
1864            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 4, self, Precision::Double, src1, src2, dst),
1865            _ => {}
1866        }
1867        Ok(())
1868    }
1869    fn emit_vcmpltss(
1870        &mut self,
1871        src1: XMM,
1872        src2: XMMOrMemory,
1873        dst: XMM,
1874    ) -> Result<(), CompileError> {
1875        match self.get_simd_arch() {
1876            Some(CpuFeature::AVX) => avx_fn!(vcmpltss, self, src1, src2, dst),
1877            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 1, self, Precision::Single, src1, src2, dst),
1878            _ => {}
1879        }
1880        Ok(())
1881    }
1882    fn emit_vcmpltsd(
1883        &mut self,
1884        src1: XMM,
1885        src2: XMMOrMemory,
1886        dst: XMM,
1887    ) -> Result<(), CompileError> {
1888        match self.get_simd_arch() {
1889            Some(CpuFeature::AVX) => avx_fn!(vcmpltsd, self, src1, src2, dst),
1890            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 1, self, Precision::Double, src1, src2, dst),
1891            _ => {}
1892        }
1893        Ok(())
1894    }
1895    fn emit_vcmpless(
1896        &mut self,
1897        src1: XMM,
1898        src2: XMMOrMemory,
1899        dst: XMM,
1900    ) -> Result<(), CompileError> {
1901        match self.get_simd_arch() {
1902            Some(CpuFeature::AVX) => avx_fn!(vcmpless, self, src1, src2, dst),
1903            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 2, self, Precision::Single, src1, src2, dst),
1904            _ => {}
1905        }
1906        Ok(())
1907    }
1908    fn emit_vcmplesd(
1909        &mut self,
1910        src1: XMM,
1911        src2: XMMOrMemory,
1912        dst: XMM,
1913    ) -> Result<(), CompileError> {
1914        match self.get_simd_arch() {
1915            Some(CpuFeature::AVX) => avx_fn!(vcmplesd, self, src1, src2, dst),
1916            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 2, self, Precision::Double, src1, src2, dst),
1917            _ => {}
1918        }
1919        Ok(())
1920    }
1921    fn emit_vcmpgtss(
1922        &mut self,
1923        src1: XMM,
1924        src2: XMMOrMemory,
1925        dst: XMM,
1926    ) -> Result<(), CompileError> {
1927        match self.get_simd_arch() {
1928            Some(CpuFeature::AVX) => avx_fn!(vcmpgtss, self, src1, src2, dst),
1929            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 6, self, Precision::Single, src1, src2, dst),
1930            _ => {}
1931        }
1932        Ok(())
1933    }
1934    fn emit_vcmpgtsd(
1935        &mut self,
1936        src1: XMM,
1937        src2: XMMOrMemory,
1938        dst: XMM,
1939    ) -> Result<(), CompileError> {
1940        match self.get_simd_arch() {
1941            Some(CpuFeature::AVX) => avx_fn!(vcmpgtsd, self, src1, src2, dst),
1942            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 6, self, Precision::Double, src1, src2, dst),
1943            _ => {}
1944        }
1945        Ok(())
1946    }
1947    fn emit_vcmpgess(
1948        &mut self,
1949        src1: XMM,
1950        src2: XMMOrMemory,
1951        dst: XMM,
1952    ) -> Result<(), CompileError> {
1953        match self.get_simd_arch() {
1954            Some(CpuFeature::AVX) => avx_fn!(vcmpgess, self, src1, src2, dst),
1955            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 5, self, Precision::Single, src1, src2, dst),
1956            _ => {}
1957        }
1958        Ok(())
1959    }
1960    fn emit_vcmpgesd(
1961        &mut self,
1962        src1: XMM,
1963        src2: XMMOrMemory,
1964        dst: XMM,
1965    ) -> Result<(), CompileError> {
1966        match self.get_simd_arch() {
1967            Some(CpuFeature::AVX) => avx_fn!(vcmpgesd, self, src1, src2, dst),
1968            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 5, self, Precision::Double, src1, src2, dst),
1969            _ => {}
1970        }
1971        Ok(())
1972    }
1973    fn emit_vcmpunordss(
1974        &mut self,
1975        src1: XMM,
1976        src2: XMMOrMemory,
1977        dst: XMM,
1978    ) -> Result<(), CompileError> {
1979        match self.get_simd_arch() {
1980            Some(CpuFeature::AVX) => avx_fn!(vcmpunordss, self, src1, src2, dst),
1981            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 3, self, Precision::Single, src1, src2, dst),
1982            _ => {}
1983        }
1984        Ok(())
1985    }
1986    fn emit_vcmpunordsd(
1987        &mut self,
1988        src1: XMM,
1989        src2: XMMOrMemory,
1990        dst: XMM,
1991    ) -> Result<(), CompileError> {
1992        match self.get_simd_arch() {
1993            Some(CpuFeature::AVX) => avx_fn!(vcmpunordsd, self, src1, src2, dst),
1994            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 3, self, Precision::Double, src1, src2, dst),
1995            _ => {}
1996        }
1997        Ok(())
1998    }
1999    fn emit_vcmpordss(
2000        &mut self,
2001        src1: XMM,
2002        src2: XMMOrMemory,
2003        dst: XMM,
2004    ) -> Result<(), CompileError> {
2005        match self.get_simd_arch() {
2006            Some(CpuFeature::AVX) => avx_fn!(vcmpordss, self, src1, src2, dst),
2007            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 7, self, Precision::Single, src1, src2, dst),
2008            _ => {}
2009        }
2010        Ok(())
2011    }
2012    fn emit_vcmpordsd(
2013        &mut self,
2014        src1: XMM,
2015        src2: XMMOrMemory,
2016        dst: XMM,
2017    ) -> Result<(), CompileError> {
2018        match self.get_simd_arch() {
2019            Some(CpuFeature::AVX) => avx_fn!(vcmpordsd, self, src1, src2, dst),
2020            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 7, self, Precision::Double, src1, src2, dst),
2021            _ => {}
2022        }
2023        Ok(())
2024    }
2025    fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2026        match self.get_simd_arch() {
2027            Some(CpuFeature::AVX) => avx_fn!(vsqrtss, self, src1, src2, dst),
2028            Some(CpuFeature::SSE42) => sse_fn!(sqrtss, self, Precision::Single, src1, src2, dst),
2029            _ => {}
2030        }
2031        Ok(())
2032    }
2033    fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2034        match self.get_simd_arch() {
2035            Some(CpuFeature::AVX) => avx_fn!(vsqrtsd, self, src1, src2, dst),
2036            Some(CpuFeature::SSE42) => sse_fn!(sqrtsd, self, Precision::Double, src1, src2, dst),
2037            _ => {}
2038        }
2039        Ok(())
2040    }
2041    fn emit_vcvtss2sd(
2042        &mut self,
2043        src1: XMM,
2044        src2: XMMOrMemory,
2045        dst: XMM,
2046    ) -> Result<(), CompileError> {
2047        match self.get_simd_arch() {
2048            Some(CpuFeature::AVX) => avx_fn!(vcvtss2sd, self, src1, src2, dst),
2049            Some(CpuFeature::SSE42) => sse_fn!(cvtss2sd, self, Precision::Single, src1, src2, dst),
2050            _ => {}
2051        }
2052        Ok(())
2053    }
2054    fn emit_vcvtsd2ss(
2055        &mut self,
2056        src1: XMM,
2057        src2: XMMOrMemory,
2058        dst: XMM,
2059    ) -> Result<(), CompileError> {
2060        match self.get_simd_arch() {
2061            Some(CpuFeature::AVX) => avx_fn!(vcvtsd2ss, self, src1, src2, dst),
2062            Some(CpuFeature::SSE42) => sse_fn!(cvtsd2ss, self, Precision::Double, src1, src2, dst),
2063            _ => {}
2064        }
2065        Ok(())
2066    }
2067    fn emit_vroundss_nearest(
2068        &mut self,
2069        src1: XMM,
2070        src2: XMMOrMemory,
2071        dst: XMM,
2072    ) -> Result<(), CompileError> {
2073        match self.get_simd_arch() {
2074            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 0, self, src1, src2, dst),
2075            Some(CpuFeature::SSE42) => {
2076                sse_round_fn!(roundss, 0, self, Precision::Single, src1, src2, dst)
2077            }
2078            _ => {}
2079        }
2080        Ok(())
2081    }
2082    fn emit_vroundsd_nearest(
2083        &mut self,
2084        src1: XMM,
2085        src2: XMMOrMemory,
2086        dst: XMM,
2087    ) -> Result<(), CompileError> {
2088        match self.get_simd_arch() {
2089            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 0, self, src1, src2, dst),
2090            Some(CpuFeature::SSE42) => {
2091                sse_round_fn!(roundsd, 0, self, Precision::Double, src1, src2, dst)
2092            }
2093            _ => {}
2094        }
2095        Ok(())
2096    }
2097    fn emit_vroundss_floor(
2098        &mut self,
2099        src1: XMM,
2100        src2: XMMOrMemory,
2101        dst: XMM,
2102    ) -> Result<(), CompileError> {
2103        match self.get_simd_arch() {
2104            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 1, self, src1, src2, dst),
2105            Some(CpuFeature::SSE42) => {
2106                sse_round_fn!(roundss, 1, self, Precision::Single, src1, src2, dst)
2107            }
2108            _ => {}
2109        }
2110        Ok(())
2111    }
2112    fn emit_vroundsd_floor(
2113        &mut self,
2114        src1: XMM,
2115        src2: XMMOrMemory,
2116        dst: XMM,
2117    ) -> Result<(), CompileError> {
2118        match self.get_simd_arch() {
2119            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 1, self, src1, src2, dst),
2120            Some(CpuFeature::SSE42) => {
2121                sse_round_fn!(roundsd, 1, self, Precision::Double, src1, src2, dst)
2122            }
2123            _ => {}
2124        }
2125        Ok(())
2126    }
2127    fn emit_vroundss_ceil(
2128        &mut self,
2129        src1: XMM,
2130        src2: XMMOrMemory,
2131        dst: XMM,
2132    ) -> Result<(), CompileError> {
2133        match self.get_simd_arch() {
2134            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 2, self, src1, src2, dst),
2135            Some(CpuFeature::SSE42) => {
2136                sse_round_fn!(roundss, 2, self, Precision::Single, src1, src2, dst)
2137            }
2138            _ => {}
2139        }
2140        Ok(())
2141    }
2142    fn emit_vroundsd_ceil(
2143        &mut self,
2144        src1: XMM,
2145        src2: XMMOrMemory,
2146        dst: XMM,
2147    ) -> Result<(), CompileError> {
2148        match self.get_simd_arch() {
2149            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 2, self, src1, src2, dst),
2150            Some(CpuFeature::SSE42) => {
2151                sse_round_fn!(roundsd, 2, self, Precision::Double, src1, src2, dst)
2152            }
2153            _ => {}
2154        }
2155        Ok(())
2156    }
2157    fn emit_vroundss_trunc(
2158        &mut self,
2159        src1: XMM,
2160        src2: XMMOrMemory,
2161        dst: XMM,
2162    ) -> Result<(), CompileError> {
2163        match self.get_simd_arch() {
2164            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 3, self, src1, src2, dst),
2165            Some(CpuFeature::SSE42) => {
2166                sse_round_fn!(roundss, 3, self, Precision::Single, src1, src2, dst)
2167            }
2168            _ => {}
2169        }
2170        Ok(())
2171    }
2172    fn emit_vroundsd_trunc(
2173        &mut self,
2174        src1: XMM,
2175        src2: XMMOrMemory,
2176        dst: XMM,
2177    ) -> Result<(), CompileError> {
2178        match self.get_simd_arch() {
2179            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 3, self, src1, src2, dst),
2180            Some(CpuFeature::SSE42) => {
2181                sse_round_fn!(roundsd, 3, self, Precision::Double, src1, src2, dst)
2182            }
2183            _ => {}
2184        }
2185        Ok(())
2186    }
2187    fn emit_vcvtsi2ss_32(
2188        &mut self,
2189        src1: XMM,
2190        src2: GPROrMemory,
2191        dst: XMM,
2192    ) -> Result<(), CompileError> {
2193        match self.get_simd_arch() {
2194            Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2ss, self, src1, src2, dst),
2195            Some(CpuFeature::SSE42) => {
2196                sse_i2f_32_fn!(cvtsi2ss, self, Precision::Single, src1, src2, dst)
2197            }
2198            _ => {}
2199        }
2200        Ok(())
2201    }
2202    fn emit_vcvtsi2sd_32(
2203        &mut self,
2204        src1: XMM,
2205        src2: GPROrMemory,
2206        dst: XMM,
2207    ) -> Result<(), CompileError> {
2208        match self.get_simd_arch() {
2209            Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2sd, self, src1, src2, dst),
2210            Some(CpuFeature::SSE42) => {
2211                sse_i2f_32_fn!(cvtsi2sd, self, Precision::Double, src1, src2, dst)
2212            }
2213            _ => {}
2214        }
2215        Ok(())
2216    }
2217    fn emit_vcvtsi2ss_64(
2218        &mut self,
2219        src1: XMM,
2220        src2: GPROrMemory,
2221        dst: XMM,
2222    ) -> Result<(), CompileError> {
2223        match self.get_simd_arch() {
2224            Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2ss, self, src1, src2, dst),
2225            Some(CpuFeature::SSE42) => {
2226                sse_i2f_64_fn!(cvtsi2ss, self, Precision::Single, src1, src2, dst)
2227            }
2228            _ => {}
2229        }
2230        Ok(())
2231    }
2232    fn emit_vcvtsi2sd_64(
2233        &mut self,
2234        src1: XMM,
2235        src2: GPROrMemory,
2236        dst: XMM,
2237    ) -> Result<(), CompileError> {
2238        match self.get_simd_arch() {
2239            Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2sd, self, src1, src2, dst),
2240            Some(CpuFeature::SSE42) => {
2241                sse_i2f_64_fn!(cvtsi2sd, self, Precision::Double, src1, src2, dst)
2242            }
2243            _ => {}
2244        }
2245        Ok(())
2246    }
2247
2248    fn emit_vblendvps(
2249        &mut self,
2250        src1: XMM,
2251        src2: XMMOrMemory,
2252        mask: XMM,
2253        dst: XMM,
2254    ) -> Result<(), CompileError> {
2255        // this implementation works only for sse 4.1 and greater
2256        match self.get_simd_arch() {
2257            Some(CpuFeature::AVX) => match src2 {
2258                XMMOrMemory::XMM(src2) => {
2259                    // TODO: this argument order does not match the documentation??
2260                    dynasm!( self; vblendvps Rx(dst), Rx(mask), Rx(src2), Rx(src1))
2261                }
2262                XMMOrMemory::Memory(base, disp) => {
2263                    dynasm!( self; vblendvps Rx(dst), Rx(mask), [Rq(base) + disp], Rx(src1))
2264                }
2265            },
2266            Some(CpuFeature::SSE42) => match src2 {
2267                XMMOrMemory::XMM(src2) => {
2268                    move_src_to_dst(self, Precision::Single, src1, dst);
2269                    dynasm!( self; blendvps Rx(dst), Rx(src2))
2270                }
2271                XMMOrMemory::Memory(base, disp) => {
2272                    move_src_to_dst(self, Precision::Single, src1, dst);
2273                    dynasm!( self; blendvps Rx(dst), [Rq(base) + disp])
2274                }
2275            },
2276            _ => {}
2277        }
2278        Ok(())
2279    }
2280
2281    fn emit_vblendvpd(
2282        &mut self,
2283        src1: XMM,
2284        src2: XMMOrMemory,
2285        mask: XMM,
2286        dst: XMM,
2287    ) -> Result<(), CompileError> {
2288        // this implementation works only for sse 4.1 and greater
2289        match self.get_simd_arch() {
2290            Some(CpuFeature::AVX) => match src2 {
2291                XMMOrMemory::XMM(src2) => {
2292                    // TODO: this argument order does not match the documentation??
2293                    dynasm!( self; vblendvpd Rx(dst), Rx(mask), Rx(src2), Rx(src1))
2294                }
2295                XMMOrMemory::Memory(base, disp) => {
2296                    dynasm!( self; vblendvpd Rx(dst), Rx(mask), [Rq(base) + disp], Rx(src1))
2297                }
2298            },
2299            Some(CpuFeature::SSE42) => match src2 {
2300                XMMOrMemory::XMM(src2) => {
2301                    move_src_to_dst(self, Precision::Double, src1, dst);
2302                    dynasm!( self; blendvpd Rx(dst), Rx(src2))
2303                }
2304                XMMOrMemory::Memory(base, disp) => {
2305                    move_src_to_dst(self, Precision::Double, src1, dst);
2306                    dynasm!( self; blendvpd Rx(dst), [Rq(base) + disp])
2307                }
2308            },
2309            _ => {}
2310        }
2311        Ok(())
2312    }
2313
2314    fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2315        match src {
2316            XMMOrMemory::XMM(x) => dynasm!(self ; ucomiss Rx(dst), Rx(x)),
2317            XMMOrMemory::Memory(base, disp) => {
2318                dynasm!(self ; ucomiss Rx(dst), [Rq(base) + disp])
2319            }
2320        }
2321        Ok(())
2322    }
2323
2324    fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2325        match src {
2326            XMMOrMemory::XMM(x) => dynasm!(self ; ucomisd Rx(dst), Rx(x)),
2327            XMMOrMemory::Memory(base, disp) => {
2328                dynasm!(self ; ucomisd Rx(dst), [Rq(base) + disp])
2329            }
2330        }
2331        Ok(())
2332    }
2333
2334    fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2335        match src {
2336            XMMOrMemory::XMM(x) => dynasm!(self ; cvttss2si Rd(dst), Rx(x)),
2337            XMMOrMemory::Memory(base, disp) => {
2338                dynasm!(self ; cvttss2si Rd(dst), [Rq(base) + disp])
2339            }
2340        }
2341        Ok(())
2342    }
2343
2344    fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2345        match src {
2346            XMMOrMemory::XMM(x) => dynasm!(self ; cvttss2si Rq(dst), Rx(x)),
2347            XMMOrMemory::Memory(base, disp) => {
2348                dynasm!(self ; cvttss2si Rq(dst), [Rq(base) + disp])
2349            }
2350        }
2351        Ok(())
2352    }
2353
2354    fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2355        match src {
2356            XMMOrMemory::XMM(x) => dynasm!(self ; cvttsd2si Rd(dst), Rx(x)),
2357            XMMOrMemory::Memory(base, disp) => {
2358                dynasm!(self ; cvttsd2si Rd(dst), [Rq(base) + disp])
2359            }
2360        }
2361        Ok(())
2362    }
2363
2364    fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2365        match src {
2366            XMMOrMemory::XMM(x) => dynasm!(self ; cvttsd2si Rq(dst), Rx(x)),
2367            XMMOrMemory::Memory(base, disp) => {
2368                dynasm!(self ; cvttsd2si Rq(dst), [Rq(base) + disp])
2369            }
2370        }
2371        Ok(())
2372    }
2373
2374    fn emit_test_gpr_64(&mut self, reg: GPR) -> Result<(), CompileError> {
2375        dynasm!(self ; test Rq(reg), Rq(reg));
2376        Ok(())
2377    }
2378
2379    fn emit_ud2(&mut self) -> Result<(), CompileError> {
2380        dynasm!(self ; ud2);
2381        Ok(())
2382    }
2383    #[allow(clippy::useless_conversion)]
2384    fn emit_ud1_payload(&mut self, payload: u8) -> Result<(), CompileError> {
2385        assert!(payload & 0xf0 == 0);
2386        let reg1 = (payload >> 3) & 1;
2387        let reg2 = payload & 7;
2388
2389        dynasm!(self ; ud1 Rd(reg1), Rd(reg2));
2390        Ok(())
2391    }
2392    fn emit_ret(&mut self) -> Result<(), CompileError> {
2393        dynasm!(self ; ret);
2394        Ok(())
2395    }
2396
2397    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError> {
2398        dynasm!(self ; call =>label);
2399        Ok(())
2400    }
2401    fn emit_call_location(&mut self, loc: Location) -> Result<(), CompileError> {
2402        match loc {
2403            Location::GPR(x) => dynasm!(self ; call Rq(x)),
2404            Location::Memory(base, disp) => dynasm!(self ; call QWORD [Rq(base) + disp]),
2405            _ => codegen_error!("singlepass can't emit CALL {:?}", loc),
2406        }
2407        Ok(())
2408    }
2409
2410    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError> {
2411        dynasm!(self ; call Rq(reg));
2412        Ok(())
2413    }
2414
2415    fn emit_bkpt(&mut self) -> Result<(), CompileError> {
2416        dynasm!(self ; int3);
2417        Ok(())
2418    }
2419
2420    fn emit_host_redirection(&mut self, target: GPR) -> Result<(), CompileError> {
2421        self.emit_jmp_location(Location::GPR(target))
2422    }
2423
2424    fn arch_mov64_imm_offset(&self) -> Result<usize, CompileError> {
2425        Ok(2)
2426    }
2427}