wasmer_compiler_singlepass/
emitter_x64.rs

1use crate::{
2    codegen_error, common_decl::Size, location::Location as AbstractLocation,
3    machine_x64::AssemblerX64,
4};
5pub use crate::{
6    location::Multiplier,
7    machine::{Label, Offset},
8    x64_decl::{GPR, XMM},
9};
10use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi};
11use wasmer_types::{CompileError, target::CpuFeature};
12
13/// Force `dynasm!` to use the correct arch (x64) when cross-compiling.
14/// `dynasm!` proc-macro tries to auto-detect it by default by looking at the
15/// `target_arch`, but it sees the `target_arch` of the proc-macro itself, which
16/// is always equal to host, even when cross-compiling.
17macro_rules! dynasm {
18    ($a:expr_2021 ; $($tt:tt)*) => {
19        dynasm::dynasm!(
20            $a.inner
21            ; .arch x64
22            ; $($tt)*
23        )
24    };
25}
26
27pub type Location = AbstractLocation<GPR, XMM>;
28
29#[derive(Copy, Clone, Debug, Eq, PartialEq)]
30pub enum Condition {
31    None,
32    Above,
33    AboveEqual,
34    Below,
35    BelowEqual,
36    Greater,
37    GreaterEqual,
38    Less,
39    LessEqual,
40    Equal,
41    NotEqual,
42    Signed,
43    Carry,
44}
45
46#[derive(Copy, Clone, Debug, Eq, PartialEq)]
47#[allow(dead_code, clippy::upper_case_acronyms)]
48pub enum XMMOrMemory {
49    XMM(XMM),
50    Memory(GPR, i32),
51}
52
53#[derive(Copy, Clone, Debug)]
54#[allow(dead_code, clippy::upper_case_acronyms)]
55pub enum GPROrMemory {
56    GPR(GPR),
57    Memory(GPR, i32),
58}
59
60pub enum Precision {
61    Single,
62    Double,
63}
64
65#[allow(unused)]
66pub trait EmitterX64 {
67    fn get_simd_arch(&self) -> Option<&CpuFeature>;
68    fn get_label(&mut self) -> Label;
69    fn get_offset(&self) -> Offset;
70    fn get_jmp_instr_size(&self) -> u8;
71
72    fn finalize_function(&mut self) -> Result<(), CompileError> {
73        Ok(())
74    }
75
76    fn emit_u64(&mut self, x: u64) -> Result<(), CompileError>;
77    fn emit_bytes(&mut self, bytes: &[u8]) -> Result<(), CompileError>;
78
79    fn emit_label(&mut self, label: Label) -> Result<(), CompileError>;
80
81    fn emit_nop(&mut self) -> Result<(), CompileError>;
82
83    /// A high-level assembler method. Emits an instruction sequence of length `n` that is functionally
84    /// equivalent to a `nop` instruction, without guarantee about the underlying implementation.
85    fn emit_nop_n(&mut self, n: usize) -> Result<(), CompileError>;
86
87    fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
88    fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
89    fn emit_lea_label(&mut self, label: Label, dst: Location) -> Result<(), CompileError>;
90    fn emit_cdq(&mut self) -> Result<(), CompileError>;
91    fn emit_cqo(&mut self) -> Result<(), CompileError>;
92    fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
93    fn emit_jmp(&mut self, condition: Condition, label: Label) -> Result<(), CompileError>;
94    fn emit_jmp_location(&mut self, loc: Location) -> Result<(), CompileError>;
95    fn emit_set(&mut self, condition: Condition, dst: GPR) -> Result<(), CompileError>;
96    fn emit_push(&mut self, sz: Size, src: Location) -> Result<(), CompileError>;
97    fn emit_pop(&mut self, sz: Size, dst: Location) -> Result<(), CompileError>;
98    fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) -> Result<(), CompileError>;
99    fn emit_add(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
100    fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
101    fn emit_neg(&mut self, sz: Size, value: Location) -> Result<(), CompileError>;
102    fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
103    fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) -> Result<(), CompileError>;
104    fn emit_div(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError>;
105    fn emit_idiv(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError>;
106    fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
107    fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
108    fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
109    fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
110    fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
111    fn emit_and(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
112    fn emit_test(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
113    fn emit_or(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
114    fn emit_bsr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
115    fn emit_bsf(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
116    fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
117    fn emit_movzx(
118        &mut self,
119        sz_src: Size,
120        src: Location,
121        sz_dst: Size,
122        dst: Location,
123    ) -> Result<(), CompileError>;
124    fn emit_movsx(
125        &mut self,
126        sz_src: Size,
127        src: Location,
128        sz_dst: Size,
129        dst: Location,
130    ) -> Result<(), CompileError>;
131    fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError>;
132    fn emit_lock_xadd(
133        &mut self,
134        sz: Size,
135        src: Location,
136        dst: Location,
137    ) -> Result<(), CompileError>;
138    fn emit_lock_cmpxchg(
139        &mut self,
140        sz: Size,
141        src: Location,
142        dst: Location,
143    ) -> Result<(), CompileError>;
144    fn emit_rep_stosq(&mut self) -> Result<(), CompileError>;
145
146    fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) -> Result<(), CompileError>;
147    fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) -> Result<(), CompileError>;
148
149    fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError>;
150    fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError>;
151
152    fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError>;
153    fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError>;
154    fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
155    fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
156
157    fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
158    fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
159    fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
160    fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
161    fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
162    fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
163    fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
164    fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
165    fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
166    fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
167    fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
168    fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
169
170    fn emit_vcmpeqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
171    -> Result<(), CompileError>;
172    fn emit_vcmpeqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
173    -> Result<(), CompileError>;
174
175    fn emit_vcmpneqss(
176        &mut self,
177        src1: XMM,
178        src2: XMMOrMemory,
179        dst: XMM,
180    ) -> Result<(), CompileError>;
181    fn emit_vcmpneqsd(
182        &mut self,
183        src1: XMM,
184        src2: XMMOrMemory,
185        dst: XMM,
186    ) -> Result<(), CompileError>;
187
188    fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
189    -> Result<(), CompileError>;
190    fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
191    -> Result<(), CompileError>;
192
193    fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
194    -> Result<(), CompileError>;
195    fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
196    -> Result<(), CompileError>;
197
198    fn emit_vcmpgtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
199    -> Result<(), CompileError>;
200    fn emit_vcmpgtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
201    -> Result<(), CompileError>;
202
203    fn emit_vcmpgess(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
204    -> Result<(), CompileError>;
205    fn emit_vcmpgesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM)
206    -> Result<(), CompileError>;
207
208    fn emit_vcmpunordss(
209        &mut self,
210        src1: XMM,
211        src2: XMMOrMemory,
212        dst: XMM,
213    ) -> Result<(), CompileError>;
214    fn emit_vcmpunordsd(
215        &mut self,
216        src1: XMM,
217        src2: XMMOrMemory,
218        dst: XMM,
219    ) -> Result<(), CompileError>;
220
221    fn emit_vcmpordss(
222        &mut self,
223        src1: XMM,
224        src2: XMMOrMemory,
225        dst: XMM,
226    ) -> Result<(), CompileError>;
227    fn emit_vcmpordsd(
228        &mut self,
229        src1: XMM,
230        src2: XMMOrMemory,
231        dst: XMM,
232    ) -> Result<(), CompileError>;
233
234    fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
235    fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
236
237    fn emit_vroundss_nearest(
238        &mut self,
239        src1: XMM,
240        src2: XMMOrMemory,
241        dst: XMM,
242    ) -> Result<(), CompileError>;
243    fn emit_vroundss_floor(
244        &mut self,
245        src1: XMM,
246        src2: XMMOrMemory,
247        dst: XMM,
248    ) -> Result<(), CompileError>;
249    fn emit_vroundss_ceil(
250        &mut self,
251        src1: XMM,
252        src2: XMMOrMemory,
253        dst: XMM,
254    ) -> Result<(), CompileError>;
255    fn emit_vroundss_trunc(
256        &mut self,
257        src1: XMM,
258        src2: XMMOrMemory,
259        dst: XMM,
260    ) -> Result<(), CompileError>;
261    fn emit_vroundsd_nearest(
262        &mut self,
263        src1: XMM,
264        src2: XMMOrMemory,
265        dst: XMM,
266    ) -> Result<(), CompileError>;
267    fn emit_vroundsd_floor(
268        &mut self,
269        src1: XMM,
270        src2: XMMOrMemory,
271        dst: XMM,
272    ) -> Result<(), CompileError>;
273    fn emit_vroundsd_ceil(
274        &mut self,
275        src1: XMM,
276        src2: XMMOrMemory,
277        dst: XMM,
278    ) -> Result<(), CompileError>;
279    fn emit_vroundsd_trunc(
280        &mut self,
281        src1: XMM,
282        src2: XMMOrMemory,
283        dst: XMM,
284    ) -> Result<(), CompileError>;
285
286    fn emit_vcvtss2sd(
287        &mut self,
288        src1: XMM,
289        src2: XMMOrMemory,
290        dst: XMM,
291    ) -> Result<(), CompileError>;
292    fn emit_vcvtsd2ss(
293        &mut self,
294        src1: XMM,
295        src2: XMMOrMemory,
296        dst: XMM,
297    ) -> Result<(), CompileError>;
298
299    fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
300    fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError>;
301
302    fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
303    fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
304    fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
305    fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError>;
306
307    fn emit_vcvtsi2ss_32(
308        &mut self,
309        src1: XMM,
310        src2: GPROrMemory,
311        dst: XMM,
312    ) -> Result<(), CompileError>;
313    fn emit_vcvtsi2ss_64(
314        &mut self,
315        src1: XMM,
316        src2: GPROrMemory,
317        dst: XMM,
318    ) -> Result<(), CompileError>;
319    fn emit_vcvtsi2sd_32(
320        &mut self,
321        src1: XMM,
322        src2: GPROrMemory,
323        dst: XMM,
324    ) -> Result<(), CompileError>;
325    fn emit_vcvtsi2sd_64(
326        &mut self,
327        src1: XMM,
328        src2: GPROrMemory,
329        dst: XMM,
330    ) -> Result<(), CompileError>;
331
332    fn emit_vblendvps(
333        &mut self,
334        src1: XMM,
335        src2: XMMOrMemory,
336        mask: XMM,
337        dst: XMM,
338    ) -> Result<(), CompileError>;
339    fn emit_vblendvpd(
340        &mut self,
341        src1: XMM,
342        src2: XMMOrMemory,
343        mask: XMM,
344        dst: XMM,
345    ) -> Result<(), CompileError>;
346
347    fn emit_test_gpr_64(&mut self, reg: GPR) -> Result<(), CompileError>;
348
349    fn emit_ud2(&mut self) -> Result<(), CompileError>;
350    fn emit_ud1_payload(&mut self, payload: u8) -> Result<(), CompileError>;
351    fn emit_ret(&mut self) -> Result<(), CompileError>;
352    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError>;
353    fn emit_call_location(&mut self, loc: Location) -> Result<(), CompileError>;
354
355    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError>;
356
357    fn emit_bkpt(&mut self) -> Result<(), CompileError>;
358
359    fn emit_host_redirection(&mut self, target: GPR) -> Result<(), CompileError>;
360
361    fn arch_has_itruncf(&self) -> bool {
362        false
363    }
364    fn arch_emit_i32_trunc_sf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
365        codegen_error!("singplepass arch_emit_i32_trunc_sf32 unimplemented")
366    }
367    fn arch_emit_i32_trunc_sf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
368        codegen_error!("singplepass arch_emit_i32_trunc_sf64 unimplemented")
369    }
370    fn arch_emit_i32_trunc_uf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
371        codegen_error!("singplepass arch_emit_i32_trunc_uf32 unimplemented")
372    }
373    fn arch_emit_i32_trunc_uf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
374        codegen_error!("singplepass arch_emit_i32_trunc_uf64 unimplemented")
375    }
376    fn arch_emit_i64_trunc_sf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
377        codegen_error!("singplepass arch_emit_i64_trunc_sf32 unimplemented")
378    }
379    fn arch_emit_i64_trunc_sf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
380        codegen_error!("singplepass arch_emit_i64_trunc_sf64 unimplemented")
381    }
382    fn arch_emit_i64_trunc_uf32(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
383        codegen_error!("singplepass arch_emit_i64_trunc_uf32 unimplemented")
384    }
385    fn arch_emit_i64_trunc_uf64(&mut self, _src: XMM, _dst: GPR) -> Result<(), CompileError> {
386        codegen_error!("singplepass arch_emit_i64_trunc_uf64 unimplemented")
387    }
388
389    fn arch_has_fconverti(&self) -> bool {
390        false
391    }
392    fn arch_emit_f32_convert_si32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
393        codegen_error!("singlepass arch_emit_f32_convert_si32 unimplemented")
394    }
395    fn arch_emit_f32_convert_si64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
396        codegen_error!("singlepass arch_emit_f32_convert_si64 unimplemented")
397    }
398    fn arch_emit_f32_convert_ui32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
399        codegen_error!("singlepass arch_emit_f32_convert_ui32 unimplemented")
400    }
401    fn arch_emit_f32_convert_ui64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
402        codegen_error!("singlepass arch_emit_f32_convert_ui64 unimplemented")
403    }
404    fn arch_emit_f64_convert_si32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
405        codegen_error!("singlepass arch_emit_f64_convert_si32 unimplemented")
406    }
407    fn arch_emit_f64_convert_si64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
408        codegen_error!("singlepass arch_emit_f64_convert_si64 unimplemented")
409    }
410    fn arch_emit_f64_convert_ui32(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
411        codegen_error!("singlepass arch_emit_f64_convert_ui32 unimplemented")
412    }
413    fn arch_emit_f64_convert_ui64(&mut self, _src: GPR, _dst: XMM) -> Result<(), CompileError> {
414        codegen_error!("singlepass arch_emit_f64_convert_ui64 unimplemented")
415    }
416
417    fn arch_has_fneg(&self) -> bool {
418        false
419    }
420    fn arch_emit_f32_neg(&mut self, _src: XMM, _dst: XMM) -> Result<(), CompileError> {
421        codegen_error!("singlepass arch_emit_f32_neg unimplemented")
422    }
423    fn arch_emit_f64_neg(&mut self, _src: XMM, _dst: XMM) -> Result<(), CompileError> {
424        codegen_error!("singlepass arch_emit_f64_neg unimplemented")
425    }
426
427    fn arch_has_xzcnt(&self) -> bool {
428        false
429    }
430    fn arch_emit_lzcnt(
431        &mut self,
432        _sz: Size,
433        _src: Location,
434        _dst: Location,
435    ) -> Result<(), CompileError> {
436        codegen_error!("singlepass arch_emit_lzcnt unimplemented")
437    }
438    fn arch_emit_tzcnt(
439        &mut self,
440        _sz: Size,
441        _src: Location,
442        _dst: Location,
443    ) -> Result<(), CompileError> {
444        codegen_error!("singlepass arch_emit_tzcnt unimplemented")
445    }
446
447    fn arch_emit_indirect_call_with_trampoline(
448        &mut self,
449        _loc: Location,
450    ) -> Result<(), CompileError> {
451        codegen_error!("singlepass arch_emit_indirect_call_with_trampoline unimplemented")
452    }
453
454    // Emits entry trampoline just before the real function.
455    fn arch_emit_entry_trampoline(&mut self) -> Result<(), CompileError> {
456        Ok(())
457    }
458
459    // Byte offset from the beginning of a `mov Imm64, GPR` instruction to the imm64 value.
460    // Required to support emulation on Aarch64.
461    fn arch_mov64_imm_offset(&self) -> Result<usize, CompileError> {
462        codegen_error!("singlepass arch_mov64_imm_offset unimplemented")
463    }
464}
465
466macro_rules! unop_gpr {
467    ($ins:ident, $assembler:tt, $sz:expr_2021, $loc:expr_2021, $otherwise:block) => {
468        match ($sz, $loc) {
469            (Size::S32, Location::GPR(loc)) => {
470                dynasm!($assembler ; $ins Rd(loc));
471            },
472            (Size::S64, Location::GPR(loc)) => {
473                dynasm!($assembler ; $ins Rq(loc));
474            },
475            _ => $otherwise
476        }
477    };
478}
479
480macro_rules! unop_mem {
481    ($ins:ident, $assembler:tt, $sz:expr_2021, $loc:expr_2021, $otherwise:block) => {
482        match ($sz, $loc) {
483            (Size::S32, Location::Memory(loc, disp)) => {
484                dynasm!($assembler ; $ins DWORD [Rq(loc) + disp] );
485            },
486            (Size::S64, Location::Memory(loc, disp)) => {
487                dynasm!($assembler ; $ins QWORD [Rq(loc) + disp] );
488            },
489            _ => $otherwise
490        }
491    };
492}
493
494macro_rules! unop_gpr_or_mem {
495    ($ins:ident, $assembler:tt, $sz:expr_2021, $loc:expr_2021, $otherwise:block) => {
496        unop_gpr!($ins, $assembler, $sz, $loc, {
497            unop_mem!($ins, $assembler, $sz, $loc, $otherwise)
498        })
499    };
500}
501
502macro_rules! binop_imm32_gpr {
503    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
504        match ($sz, $src, $dst) {
505            (Size::S32, Location::Imm32(src), Location::GPR(dst)) => {
506                dynasm!($assembler ; $ins Rd(dst), src as i32); // IMM32_2GPR
507            },
508            (Size::S64, Location::Imm32(src), Location::GPR(dst)) => {
509                dynasm!($assembler ; $ins Rq(dst), src as i32); // IMM32_2GPR
510            },
511            _ => $otherwise
512        }
513    };
514}
515
516macro_rules! binop_imm32_mem {
517    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
518        match ($sz, $src, $dst) {
519            (Size::S32, Location::Imm32(src), Location::Memory(dst, disp)) => {
520                dynasm!($assembler ; $ins DWORD [Rq(dst) + disp], src as i32);
521            },
522            (Size::S64, Location::Imm32(src), Location::Memory(dst, disp)) => {
523                dynasm!($assembler ; $ins QWORD [Rq(dst) + disp], src as i32);
524            },
525            _ => $otherwise
526        }
527    };
528}
529
530macro_rules! binop_imm64_gpr {
531    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
532        match ($sz, $src, $dst) {
533            (Size::S64, Location::Imm64(src), Location::GPR(dst)) => {
534                dynasm!($assembler ; $ins Rq(dst), QWORD src as i64); // IMM32_2GPR
535            },
536            _ => $otherwise
537        }
538    };
539}
540
541macro_rules! binop_gpr_gpr {
542    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
543        match ($sz, $src, $dst) {
544            (Size::S32, Location::GPR(src), Location::GPR(dst)) => {
545                dynasm!($assembler ; $ins Rd(dst), Rd(src)); // GPR2GPR
546            },
547            (Size::S64, Location::GPR(src), Location::GPR(dst)) => {
548                dynasm!($assembler ; $ins Rq(dst), Rq(src)); // GPR2GPR
549            },
550            _ => $otherwise
551        }
552    };
553}
554
555macro_rules! binop_gpr_mem {
556    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
557        match ($sz, $src, $dst) {
558            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
559                dynasm!($assembler ; $ins [Rq(dst) + disp], Rd(src)); // GPR2MEM
560            },
561            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
562                dynasm!($assembler ; $ins [Rq(dst) + disp], Rq(src)); // GPR2MEM
563            },
564            _ => $otherwise
565        }
566    };
567}
568
569macro_rules! binop_mem_gpr {
570    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
571        match ($sz, $src, $dst) {
572            (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
573                dynasm!($assembler ; $ins Rd(dst), [Rq(src) + disp]); // MEM2GPR
574            },
575            (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
576                dynasm!($assembler ; $ins Rq(dst), [Rq(src) + disp]); // MEM2GPR
577            },
578            _ => $otherwise
579        }
580    };
581}
582
583macro_rules! binop_all_nofp {
584    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
585        binop_imm32_gpr!($ins, $assembler, $sz, $src, $dst, {
586            binop_imm32_mem!($ins, $assembler, $sz, $src, $dst, {
587                binop_gpr_gpr!($ins, $assembler, $sz, $src, $dst, {
588                    binop_gpr_mem!($ins, $assembler, $sz, $src, $dst, {
589                        binop_mem_gpr!($ins, $assembler, $sz, $src, $dst, $otherwise)
590                    })
591                })
592            })
593        })
594    };
595}
596
597macro_rules! binop_shift {
598    ($ins:ident, $assembler:tt, $sz:expr_2021, $src:expr_2021, $dst:expr_2021, $otherwise:block) => {
599        match ($sz, $src, $dst) {
600            (Size::S32, Location::GPR(GPR::RCX), Location::GPR(dst)) => {
601                dynasm!($assembler ; $ins Rd(dst), cl);
602            },
603            (Size::S32, Location::GPR(GPR::RCX), Location::Memory(dst, disp)) => {
604                dynasm!($assembler ; $ins DWORD [Rq(dst) + disp], cl);
605            },
606            (Size::S32, Location::Imm8(imm), Location::GPR(dst)) => {
607                dynasm!($assembler ; $ins Rd(dst), imm as i8);
608            },
609            (Size::S32, Location::Imm8(imm), Location::Memory(dst, disp)) => {
610                dynasm!($assembler ; $ins DWORD [Rq(dst) + disp], imm as i8);
611            },
612            (Size::S64, Location::GPR(GPR::RCX), Location::GPR(dst)) => {
613                dynasm!($assembler ; $ins Rq(dst), cl);
614            },
615            (Size::S64, Location::GPR(GPR::RCX), Location::Memory(dst, disp)) => {
616                dynasm!($assembler ; $ins QWORD [Rq(dst) + disp], cl);
617            },
618            (Size::S64, Location::Imm8(imm), Location::GPR(dst)) => {
619                dynasm!($assembler ; $ins Rq(dst), imm as i8);
620            },
621            (Size::S64, Location::Imm8(imm), Location::Memory(dst, disp)) => {
622                dynasm!($assembler ; $ins QWORD [Rq(dst) + disp], imm as i8);
623            },
624            _ => $otherwise
625        }
626    }
627}
628
629macro_rules! jmp_op {
630    ($ins:ident, $assembler:tt, $label:ident) => {
631        dynasm!($assembler ; $ins =>$label)
632    }
633}
634
635/// Move a single or double precision XMM value to another if src and destination
636/// are not the same.
637///
638/// TODO: Can we assume data is aligned and packed? If so, this function isn't necessary
639/// TODO: as we can use [`EmitterX64::emit_vmovaps`] and [`EmitterX64::emit_vmovadp`]
640/// TODO: instead
641fn move_src_to_dst(emitter: &mut AssemblerX64, precision: Precision, src: XMM, dst: XMM) {
642    if src == dst {
643        return;
644    }
645    match precision {
646        Precision::Single => match src {
647            XMM::XMM0 => dynasm!(emitter ; movss Rx(dst), xmm0),
648            XMM::XMM1 => dynasm!(emitter ; movss Rx(dst), xmm1),
649            XMM::XMM2 => dynasm!(emitter ; movss Rx(dst), xmm2),
650            XMM::XMM3 => dynasm!(emitter ; movss Rx(dst), xmm3),
651            XMM::XMM4 => dynasm!(emitter ; movss Rx(dst), xmm4),
652            XMM::XMM5 => dynasm!(emitter ; movss Rx(dst), xmm5),
653            XMM::XMM6 => dynasm!(emitter ; movss Rx(dst), xmm6),
654            XMM::XMM7 => dynasm!(emitter ; movss Rx(dst), xmm7),
655            XMM::XMM8 => dynasm!(emitter ; movss Rx(dst), xmm8),
656            XMM::XMM9 => dynasm!(emitter ; movss Rx(dst), xmm9),
657            XMM::XMM10 => dynasm!(emitter ; movss Rx(dst), xmm10),
658            XMM::XMM11 => dynasm!(emitter ; movss Rx(dst), xmm11),
659            XMM::XMM12 => dynasm!(emitter ; movss Rx(dst), xmm12),
660            XMM::XMM13 => dynasm!(emitter ; movss Rx(dst), xmm13),
661            XMM::XMM14 => dynasm!(emitter ; movss Rx(dst), xmm14),
662            XMM::XMM15 => dynasm!(emitter ; movss Rx(dst), xmm15),
663        },
664        Precision::Double => match src {
665            XMM::XMM0 => dynasm!(emitter ; movsd Rx(dst), xmm0),
666            XMM::XMM1 => dynasm!(emitter ; movsd Rx(dst), xmm1),
667            XMM::XMM2 => dynasm!(emitter ; movsd Rx(dst), xmm2),
668            XMM::XMM3 => dynasm!(emitter ; movsd Rx(dst), xmm3),
669            XMM::XMM4 => dynasm!(emitter ; movsd Rx(dst), xmm4),
670            XMM::XMM5 => dynasm!(emitter ; movsd Rx(dst), xmm5),
671            XMM::XMM6 => dynasm!(emitter ; movsd Rx(dst), xmm6),
672            XMM::XMM7 => dynasm!(emitter ; movsd Rx(dst), xmm7),
673            XMM::XMM8 => dynasm!(emitter ; movsd Rx(dst), xmm8),
674            XMM::XMM9 => dynasm!(emitter ; movsd Rx(dst), xmm9),
675            XMM::XMM10 => dynasm!(emitter ; movsd Rx(dst), xmm10),
676            XMM::XMM11 => dynasm!(emitter ; movsd Rx(dst), xmm11),
677            XMM::XMM12 => dynasm!(emitter ; movsd Rx(dst), xmm12),
678            XMM::XMM13 => dynasm!(emitter ; movsd Rx(dst), xmm13),
679            XMM::XMM14 => dynasm!(emitter ; movsd Rx(dst), xmm14),
680            XMM::XMM15 => dynasm!(emitter ; movsd Rx(dst), xmm15),
681        },
682    }
683}
684
685macro_rules! avx_fn {
686    ($ins:ident, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
687        // Dynasm bug: AVX instructions are not encoded correctly.
688        match $src2 {
689            XMMOrMemory::XMM(x) => match $src1 {
690                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, Rx(x)),
691                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, Rx(x)),
692                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, Rx(x)),
693                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, Rx(x)),
694                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, Rx(x)),
695                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, Rx(x)),
696                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, Rx(x)),
697                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, Rx(x)),
698                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, Rx(x)),
699                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, Rx(x)),
700                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, Rx(x)),
701                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, Rx(x)),
702                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, Rx(x)),
703                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, Rx(x)),
704                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, Rx(x)),
705                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, Rx(x)),
706            },
707            XMMOrMemory::Memory(base, disp) => match $src1 {
708                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, [Rq(base) + disp]),
709                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, [Rq(base) + disp]),
710                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, [Rq(base) + disp]),
711                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, [Rq(base) + disp]),
712                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, [Rq(base) + disp]),
713                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, [Rq(base) + disp]),
714                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, [Rq(base) + disp]),
715                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, [Rq(base) + disp]),
716                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, [Rq(base) + disp]),
717                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, [Rq(base) + disp]),
718                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, [Rq(base) + disp]),
719                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, [Rq(base) + disp]),
720                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, [Rq(base) + disp]),
721                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, [Rq(base) + disp]),
722                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, [Rq(base) + disp]),
723                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, [Rq(base) + disp]),
724            },
725        }
726    }
727}
728
729macro_rules! sse_fn {
730    ($ins:ident, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
731        match $src2 {
732            XMMOrMemory::XMM(x) => {
733                if x == $dst {
734                    dynasm!($emitter ; $ins Rx($dst), Rx($src1))
735                } else {
736                    move_src_to_dst($emitter, $precision, $src1, $dst);
737                    dynasm!($emitter ; $ins Rx($dst), Rx(x))
738                }
739            }
740            XMMOrMemory::Memory(base, disp) => {
741                move_src_to_dst($emitter, $precision, $src1, $dst);
742                dynasm!($emitter ; $ins Rx($dst), [Rq(base) + disp])
743            }
744        }
745    };
746    ($ins:ident, $mode:expr_2021, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
747        match $src2 {
748            XMMOrMemory::XMM(x) => {
749                move_src_to_dst($emitter, $precision, $src1, $dst);
750                dynasm!($emitter ; $ins Rx($dst), Rx(x), $mode)
751            }
752            XMMOrMemory::Memory(base, disp) => {
753                move_src_to_dst($emitter, $precision, $src1, $dst);
754                dynasm!($emitter ; $ins Rx($dst), [Rq(base) + disp], $mode)
755            }
756        }
757    };
758}
759
760macro_rules! avx_i2f_64_fn {
761    ($ins:ident, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
762        match $src2 {
763            GPROrMemory::GPR(x) => match $src1 {
764                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, Rq(x)),
765                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, Rq(x)),
766                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, Rq(x)),
767                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, Rq(x)),
768                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, Rq(x)),
769                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, Rq(x)),
770                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, Rq(x)),
771                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, Rq(x)),
772                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, Rq(x)),
773                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, Rq(x)),
774                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, Rq(x)),
775                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, Rq(x)),
776                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, Rq(x)),
777                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, Rq(x)),
778                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, Rq(x)),
779                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, Rq(x)),
780            },
781            GPROrMemory::Memory(base, disp) => match $src1 {
782                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, QWORD [Rq(base) + disp]),
783                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, QWORD [Rq(base) + disp]),
784                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, QWORD [Rq(base) + disp]),
785                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, QWORD [Rq(base) + disp]),
786                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, QWORD [Rq(base) + disp]),
787                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, QWORD [Rq(base) + disp]),
788                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, QWORD [Rq(base) + disp]),
789                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, QWORD [Rq(base) + disp]),
790                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, QWORD [Rq(base) + disp]),
791                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, QWORD [Rq(base) + disp]),
792                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, QWORD [Rq(base) + disp]),
793                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, QWORD [Rq(base) + disp]),
794                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, QWORD [Rq(base) + disp]),
795                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, QWORD [Rq(base) + disp]),
796                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, QWORD [Rq(base) + disp]),
797                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, QWORD [Rq(base) + disp]),
798            },
799        }
800    }
801}
802
803macro_rules! sse_i2f_64_fn {
804    ($ins:ident, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
805        match $src2 {
806            GPROrMemory::GPR(x) => {
807                move_src_to_dst($emitter, $precision, $src1, $dst);
808                dynasm!($emitter ; $ins Rx($dst), Rq(x))
809            },
810            GPROrMemory::Memory(base, disp) => {
811                move_src_to_dst($emitter, $precision, $src1, $dst);
812                dynasm!($emitter ; $ins Rx($dst), QWORD [Rq(base) + disp])
813            }
814        }
815    }
816}
817
818macro_rules! avx_i2f_32_fn {
819    ($ins:ident, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
820        match $src2 {
821            GPROrMemory::GPR(x) => match $src1 {
822                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, Rd(x)),
823                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, Rd(x)),
824                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, Rd(x)),
825                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, Rd(x)),
826                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, Rd(x)),
827                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, Rd(x)),
828                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, Rd(x)),
829                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, Rd(x)),
830                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, Rd(x)),
831                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, Rd(x)),
832                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, Rd(x)),
833                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, Rd(x)),
834                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, Rd(x)),
835                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, Rd(x)),
836                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, Rd(x)),
837                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, Rd(x)),
838            },
839            GPROrMemory::Memory(base, disp) => match $src1 {
840                XMM::XMM0 => dynasm!($emitter ; $ins Rx($dst), xmm0, DWORD [Rq(base) + disp]),
841                XMM::XMM1 => dynasm!($emitter ; $ins Rx($dst), xmm1, DWORD [Rq(base) + disp]),
842                XMM::XMM2 => dynasm!($emitter ; $ins Rx($dst), xmm2, DWORD [Rq(base) + disp]),
843                XMM::XMM3 => dynasm!($emitter ; $ins Rx($dst), xmm3, DWORD [Rq(base) + disp]),
844                XMM::XMM4 => dynasm!($emitter ; $ins Rx($dst), xmm4, DWORD [Rq(base) + disp]),
845                XMM::XMM5 => dynasm!($emitter ; $ins Rx($dst), xmm5, DWORD [Rq(base) + disp]),
846                XMM::XMM6 => dynasm!($emitter ; $ins Rx($dst), xmm6, DWORD [Rq(base) + disp]),
847                XMM::XMM7 => dynasm!($emitter ; $ins Rx($dst), xmm7, DWORD [Rq(base) + disp]),
848                XMM::XMM8 => dynasm!($emitter ; $ins Rx($dst), xmm8, DWORD [Rq(base) + disp]),
849                XMM::XMM9 => dynasm!($emitter ; $ins Rx($dst), xmm9, DWORD [Rq(base) + disp]),
850                XMM::XMM10 => dynasm!($emitter ; $ins Rx($dst), xmm10, DWORD [Rq(base) + disp]),
851                XMM::XMM11 => dynasm!($emitter ; $ins Rx($dst), xmm11, DWORD [Rq(base) + disp]),
852                XMM::XMM12 => dynasm!($emitter ; $ins Rx($dst), xmm12, DWORD [Rq(base) + disp]),
853                XMM::XMM13 => dynasm!($emitter ; $ins Rx($dst), xmm13, DWORD [Rq(base) + disp]),
854                XMM::XMM14 => dynasm!($emitter ; $ins Rx($dst), xmm14, DWORD [Rq(base) + disp]),
855                XMM::XMM15 => dynasm!($emitter ; $ins Rx($dst), xmm15, DWORD [Rq(base) + disp]),
856            },
857        }
858    }
859}
860
861macro_rules! sse_i2f_32_fn {
862    ($ins:ident, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
863        match $src2 {
864            GPROrMemory::GPR(x) => {
865                move_src_to_dst($emitter, $precision, $src1, $dst);
866                dynasm!($emitter; $ins Rx($src1), Rd(x))
867            },
868            GPROrMemory::Memory(base, disp) => {
869                move_src_to_dst($emitter, $precision, $src1, $dst);
870                dynasm!($emitter; $ins Rx($dst), DWORD [Rq(base) + disp])
871            }
872        }
873    }
874}
875
876macro_rules! avx_round_fn {
877    ($ins:ident, $mode:expr_2021, $emitter:ident, $src1:ident, $src2:ident, $dst:ident) => {
878        match $src2 {
879            XMMOrMemory::XMM(x) => dynasm!($emitter ; $ins Rx($dst), Rx($src1), Rx(x), $mode),
880            XMMOrMemory::Memory(base, disp) => dynasm!($emitter ; $ins Rx($dst), Rx($src1), [Rq(base) + disp], $mode),
881        }
882    }
883}
884
885macro_rules! sse_round_fn {
886    ($ins:ident, $mode:expr_2021, $emitter:ident, $precision:expr_2021, $src1:ident, $src2:ident, $dst:ident) => {
887        match $src2 {
888            XMMOrMemory::XMM(x) => {
889                if x != $dst {
890                    move_src_to_dst($emitter, $precision, $src1, $dst);
891                }
892                dynasm!($emitter ; $ins Rx(x), Rx($dst), $mode)
893            }
894            XMMOrMemory::Memory(base, disp) => {
895                dynasm!($emitter ; $ins Rx($dst), [Rq(base) + disp], $mode)
896            },
897        }
898    }
899}
900
901impl EmitterX64 for AssemblerX64 {
902    fn get_simd_arch(&self) -> Option<&CpuFeature> {
903        Some(&self.simd_arch)
904    }
905
906    fn get_label(&mut self) -> DynamicLabel {
907        self.new_dynamic_label()
908    }
909
910    fn get_offset(&self) -> AssemblyOffset {
911        self.offset()
912    }
913
914    fn get_jmp_instr_size(&self) -> u8 {
915        5
916    }
917
918    fn finalize_function(&mut self) -> Result<(), CompileError> {
919        dynasm!(
920            self
921            ; const_neg_one_32:
922            ; .i32 -1
923            ; const_zero_32:
924            ; .i32  0
925            ; const_pos_one_32:
926            ; .i32 1
927        );
928        Ok(())
929    }
930
931    fn arch_has_xzcnt(&self) -> bool {
932        match &self.target {
933            Some(target) => {
934                target.cpu_features().contains(CpuFeature::LZCNT)
935                    && target.cpu_features().contains(CpuFeature::BMI1)
936            }
937            None => false,
938        }
939    }
940
941    fn arch_emit_lzcnt(
942        &mut self,
943        sz: Size,
944        src: Location,
945        dst: Location,
946    ) -> Result<(), CompileError> {
947        binop_gpr_gpr!(lzcnt, self, sz, src, dst, {
948            binop_mem_gpr!(lzcnt, self, sz, src, dst, {
949                codegen_error!("singlepass cannot emit lzcnt")
950            })
951        });
952        Ok(())
953    }
954
955    fn arch_emit_tzcnt(
956        &mut self,
957        sz: Size,
958        src: Location,
959        dst: Location,
960    ) -> Result<(), CompileError> {
961        binop_gpr_gpr!(tzcnt, self, sz, src, dst, {
962            binop_mem_gpr!(tzcnt, self, sz, src, dst, {
963                codegen_error!("singlepass cannot emit tzcnt")
964            })
965        });
966        Ok(())
967    }
968
969    fn emit_u64(&mut self, x: u64) -> Result<(), CompileError> {
970        self.push_u64(x);
971        Ok(())
972    }
973
974    fn emit_bytes(&mut self, bytes: &[u8]) -> Result<(), CompileError> {
975        for &b in bytes {
976            self.push(b);
977        }
978        Ok(())
979    }
980
981    fn emit_label(&mut self, label: Label) -> Result<(), CompileError> {
982        dynasm!(self ; => label);
983        Ok(())
984    }
985
986    fn emit_nop(&mut self) -> Result<(), CompileError> {
987        dynasm!(self ; nop);
988        Ok(())
989    }
990
991    fn emit_nop_n(&mut self, mut n: usize) -> Result<(), CompileError> {
992        /*
993            1      90H                            NOP
994            2      66 90H                         66 NOP
995            3      0F 1F 00H                      NOP DWORD ptr [EAX]
996            4      0F 1F 40 00H                   NOP DWORD ptr [EAX + 00H]
997            5      0F 1F 44 00 00H                NOP DWORD ptr [EAX + EAX*1 + 00H]
998            6      66 0F 1F 44 00 00H             NOP DWORD ptr [AX + AX*1 + 00H]
999            7      0F 1F 80 00 00 00 00H          NOP DWORD ptr [EAX + 00000000H]
1000            8      0F 1F 84 00 00 00 00 00H       NOP DWORD ptr [AX + AX*1 + 00000000H]
1001            9      66 0F 1F 84 00 00 00 00 00H    NOP DWORD ptr [AX + AX*1 + 00000000H]
1002        */
1003        while n >= 9 {
1004            n -= 9;
1005            self.emit_bytes(&[0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00])?;
1006            // 9-byte nop
1007        }
1008        let seq: &[u8] = match n {
1009            0 => &[],
1010            1 => &[0x90],
1011            2 => &[0x66, 0x90],
1012            3 => &[0x0f, 0x1f, 0x00],
1013            4 => &[0x0f, 0x1f, 0x40, 0x00],
1014            5 => &[0x0f, 0x1f, 0x44, 0x00, 0x00],
1015            6 => &[0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00],
1016            7 => &[0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00],
1017            8 => &[0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00],
1018            _ => codegen_error!("singlepass emit_nop_n unreachable"),
1019        };
1020        self.emit_bytes(seq)
1021    }
1022
1023    fn emit_mov(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1024        // fast path
1025        if let (Location::Imm32(0), Location::GPR(x)) = (src, dst) {
1026            dynasm!(self ; xor Rd(x), Rd(x));
1027            return Ok(());
1028        }
1029
1030        binop_all_nofp!(mov, self, sz, src, dst, {
1031            binop_imm64_gpr!(mov, self, sz, src, dst, {
1032                match (sz, src, dst) {
1033                    (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1034                        dynasm!(self ; mov [Rq(dst) + disp], Rb(src));
1035                    }
1036                    (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => {
1037                        dynasm!(self ; mov Rb(dst), [Rq(src) + disp]);
1038                    }
1039                    (Size::S8, Location::Imm32(src), Location::GPR(dst)) => {
1040                        dynasm!(self ; mov Rb(dst), src as i8);
1041                    }
1042                    (Size::S8, Location::Imm64(src), Location::GPR(dst)) => {
1043                        dynasm!(self ; mov Rb(dst), src as i8);
1044                    }
1045                    (Size::S8, Location::Imm32(src), Location::Memory(dst, disp)) => {
1046                        dynasm!(self ; mov BYTE [Rq(dst) + disp], src as i8);
1047                    }
1048                    (Size::S8, Location::Imm64(src), Location::Memory(dst, disp)) => {
1049                        dynasm!(self ; mov BYTE [Rq(dst) + disp], src as i8);
1050                    }
1051                    (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1052                        dynasm!(self ; mov [Rq(dst) + disp], Rw(src));
1053                    }
1054                    (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => {
1055                        dynasm!(self ; mov Rw(dst), [Rq(src) + disp]);
1056                    }
1057                    (Size::S16, Location::Imm32(src), Location::GPR(dst)) => {
1058                        dynasm!(self ; mov Rw(dst), src as i16);
1059                    }
1060                    (Size::S16, Location::Imm64(src), Location::GPR(dst)) => {
1061                        dynasm!(self ; mov Rw(dst), src as i16);
1062                    }
1063                    (Size::S16, Location::Imm32(src), Location::Memory(dst, disp)) => {
1064                        dynasm!(self ; mov WORD [Rq(dst) + disp], src as i16);
1065                    }
1066                    (Size::S16, Location::Imm64(src), Location::Memory(dst, disp)) => {
1067                        dynasm!(self ; mov WORD [Rq(dst) + disp], src as i16);
1068                    }
1069                    (Size::S32, Location::Imm64(src), Location::GPR(dst)) => {
1070                        dynasm!(self ; mov Rd(dst), src as i32);
1071                    }
1072                    (Size::S32, Location::Imm64(src), Location::Memory(dst, disp)) => {
1073                        dynasm!(self ; mov DWORD [Rq(dst) + disp], src as i32);
1074                    }
1075                    (Size::S32, Location::GPR(src), Location::SIMD(dst)) => {
1076                        dynasm!(self ; movd Rx(dst), Rd(src));
1077                    }
1078                    (Size::S32, Location::SIMD(src), Location::GPR(dst)) => {
1079                        dynasm!(self ; movd Rd(dst), Rx(src));
1080                    }
1081                    (Size::S32, Location::Memory(src, disp), Location::SIMD(dst)) => {
1082                        dynasm!(self ; movd Rx(dst), [Rq(src) + disp]);
1083                    }
1084                    (Size::S32, Location::SIMD(src), Location::Memory(dst, disp)) => {
1085                        dynasm!(self ; movd [Rq(dst) + disp], Rx(src));
1086                    }
1087                    (Size::S64, Location::Imm64(src), Location::GPR(dst)) => {
1088                        dynasm!(self ; mov Rd(dst), src as i32);
1089                    }
1090                    (Size::S64, Location::Imm32(src), Location::GPR(dst)) => {
1091                        dynasm!(self ; mov Rd(dst), src as i32);
1092                    }
1093                    (Size::S64, Location::Imm8(src), Location::GPR(dst)) => {
1094                        dynasm!(self ; mov Rd(dst), src as i32);
1095                    }
1096
1097                    (Size::S64, Location::GPR(src), Location::SIMD(dst)) => {
1098                        dynasm!(self ; movq Rx(dst), Rq(src));
1099                    }
1100                    (Size::S64, Location::SIMD(src), Location::GPR(dst)) => {
1101                        dynasm!(self ; movq Rq(dst), Rx(src));
1102                    }
1103                    (Size::S64, Location::Memory(src, disp), Location::SIMD(dst)) => {
1104                        dynasm!(self ; movq Rx(dst), [Rq(src) + disp]);
1105                    }
1106                    (Size::S64, Location::SIMD(src), Location::Memory(dst, disp)) => {
1107                        dynasm!(self ; movq [Rq(dst) + disp], Rx(src));
1108                    }
1109                    (_, Location::SIMD(src), Location::SIMD(dst)) => {
1110                        dynasm!(self ; movq Rx(dst), Rx(src));
1111                    }
1112
1113                    _ => codegen_error!("singlepass can't emit MOV {:?} {:?} {:?}", sz, src, dst),
1114                }
1115            })
1116        });
1117        Ok(())
1118    }
1119    fn emit_lea(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1120        match (sz, src, dst) {
1121            (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
1122                dynasm!(self ; lea Rd(dst), [Rq(src) + disp]);
1123            }
1124            (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
1125                dynasm!(self ; lea Rq(dst), [Rq(src) + disp]);
1126            }
1127            (Size::S32, Location::Memory2(src1, src2, mult, disp), Location::GPR(dst)) => {
1128                match mult {
1129                    Multiplier::Zero => dynasm!(self ; lea Rd(dst), [Rq(src1) + disp]),
1130                    Multiplier::One => {
1131                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) + disp])
1132                    }
1133                    Multiplier::Two => {
1134                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) * 2 + disp])
1135                    }
1136                    Multiplier::Four => {
1137                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) * 4 + disp])
1138                    }
1139                    Multiplier::Height => {
1140                        dynasm!(self ; lea Rd(dst), [Rq(src1) + Rq(src2) * 8 + disp])
1141                    }
1142                };
1143            }
1144            (Size::S64, Location::Memory2(src1, src2, mult, disp), Location::GPR(dst)) => {
1145                match mult {
1146                    Multiplier::Zero => dynasm!(self ; lea Rq(dst), [Rq(src1) + disp]),
1147                    Multiplier::One => {
1148                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) + disp])
1149                    }
1150                    Multiplier::Two => {
1151                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) * 2 + disp])
1152                    }
1153                    Multiplier::Four => {
1154                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) * 4 + disp])
1155                    }
1156                    Multiplier::Height => {
1157                        dynasm!(self ; lea Rq(dst), [Rq(src1) + Rq(src2) * 8 + disp])
1158                    }
1159                };
1160            }
1161            _ => codegen_error!("singlepass can't emit LEA {:?} {:?} {:?}", sz, src, dst),
1162        }
1163        Ok(())
1164    }
1165    fn emit_lea_label(&mut self, label: Label, dst: Location) -> Result<(), CompileError> {
1166        match dst {
1167            Location::GPR(x) => {
1168                dynasm!(self ; lea Rq(x), [=>label]);
1169            }
1170            _ => codegen_error!("singlepass can't emit LEA label={:?} {:?}", label, dst),
1171        }
1172        Ok(())
1173    }
1174    fn emit_cdq(&mut self) -> Result<(), CompileError> {
1175        dynasm!(self ; cdq);
1176        Ok(())
1177    }
1178    fn emit_cqo(&mut self) -> Result<(), CompileError> {
1179        dynasm!(self ; cqo);
1180        Ok(())
1181    }
1182    fn emit_xor(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1183        binop_all_nofp!(xor, self, sz, src, dst, {
1184            codegen_error!("singlepass can't emit XOR {:?} {:?} {:?}", sz, src, dst)
1185        });
1186        Ok(())
1187    }
1188    fn emit_jmp(&mut self, condition: Condition, label: Label) -> Result<(), CompileError> {
1189        match condition {
1190            Condition::None => jmp_op!(jmp, self, label),
1191            Condition::Above => jmp_op!(ja, self, label),
1192            Condition::AboveEqual => jmp_op!(jae, self, label),
1193            Condition::Below => jmp_op!(jb, self, label),
1194            Condition::BelowEqual => jmp_op!(jbe, self, label),
1195            Condition::Greater => jmp_op!(jg, self, label),
1196            Condition::GreaterEqual => jmp_op!(jge, self, label),
1197            Condition::Less => jmp_op!(jl, self, label),
1198            Condition::LessEqual => jmp_op!(jle, self, label),
1199            Condition::Equal => jmp_op!(je, self, label),
1200            Condition::NotEqual => jmp_op!(jne, self, label),
1201            Condition::Signed => jmp_op!(js, self, label),
1202            Condition::Carry => jmp_op!(jc, self, label),
1203        }
1204        Ok(())
1205    }
1206    fn emit_jmp_location(&mut self, loc: Location) -> Result<(), CompileError> {
1207        match loc {
1208            Location::GPR(x) => dynasm!(self ; jmp Rq(x)),
1209            Location::Memory(base, disp) => dynasm!(self ; jmp QWORD [Rq(base) + disp]),
1210            _ => codegen_error!("singlepass can't emit JMP {:?}", loc),
1211        }
1212        Ok(())
1213    }
1214    fn emit_set(&mut self, condition: Condition, dst: GPR) -> Result<(), CompileError> {
1215        match condition {
1216            Condition::Above => dynasm!(self ; seta Rb(dst)),
1217            Condition::AboveEqual => dynasm!(self ; setae Rb(dst)),
1218            Condition::Below => dynasm!(self ; setb Rb(dst)),
1219            Condition::BelowEqual => dynasm!(self ; setbe Rb(dst)),
1220            Condition::Greater => dynasm!(self ; setg Rb(dst)),
1221            Condition::GreaterEqual => dynasm!(self ; setge Rb(dst)),
1222            Condition::Less => dynasm!(self ; setl Rb(dst)),
1223            Condition::LessEqual => dynasm!(self ; setle Rb(dst)),
1224            Condition::Equal => dynasm!(self ; sete Rb(dst)),
1225            Condition::NotEqual => dynasm!(self ; setne Rb(dst)),
1226            Condition::Signed => dynasm!(self ; sets Rb(dst)),
1227            Condition::Carry => dynasm!(self ; setc Rb(dst)),
1228            _ => codegen_error!("singlepass can't emit SET {:?} {:?}", condition, dst),
1229        }
1230        Ok(())
1231    }
1232    fn emit_push(&mut self, sz: Size, src: Location) -> Result<(), CompileError> {
1233        match (sz, src) {
1234            (Size::S64, Location::Imm32(src)) => dynasm!(self ; push src as i32),
1235            (Size::S64, Location::GPR(src)) => dynasm!(self ; push Rq(src)),
1236            (Size::S64, Location::Memory(src, disp)) => {
1237                dynasm!(self ; push QWORD [Rq(src) + disp])
1238            }
1239            _ => codegen_error!("singlepass can't emit PUSH {:?} {:?}", sz, src),
1240        }
1241        Ok(())
1242    }
1243    fn emit_pop(&mut self, sz: Size, dst: Location) -> Result<(), CompileError> {
1244        match (sz, dst) {
1245            (Size::S64, Location::GPR(dst)) => dynasm!(self ; pop Rq(dst)),
1246            (Size::S64, Location::Memory(dst, disp)) => {
1247                dynasm!(self ; pop QWORD [Rq(dst) + disp])
1248            }
1249            _ => codegen_error!("singlepass can't emit POP {:?} {:?}", sz, dst),
1250        }
1251        Ok(())
1252    }
1253
1254    /// Emit a CMP instruction that compares `left` against `right`.
1255    ///
1256    /// Note: callers sometimes pass operands in the opposite order compared
1257    /// to other binary operators. This function performs the comparison as
1258    /// provided (i.e. it emits `cmp left, right` semantics).
1259    fn emit_cmp(&mut self, sz: Size, left: Location, right: Location) -> Result<(), CompileError> {
1260        // Constant elimination for comparison between consts.
1261        //
1262        // Only needed for `emit_cmp`, since other binary operators actually write to `right` and `right` must
1263        // be a writable location for them.
1264        let consts = match (left, right) {
1265            (Location::Imm32(x), Location::Imm32(y)) => Some((x as i32 as i64, y as i32 as i64)),
1266            (Location::Imm32(x), Location::Imm64(y)) => Some((x as i32 as i64, y as i64)),
1267            (Location::Imm64(x), Location::Imm32(y)) => Some((x as i64, y as i32 as i64)),
1268            (Location::Imm64(x), Location::Imm64(y)) => Some((x as i64, y as i64)),
1269            _ => None,
1270        };
1271        use std::cmp::Ordering;
1272        match consts {
1273            Some((x, y)) => match x.cmp(&y) {
1274                Ordering::Less => dynasm!(self ; cmp DWORD [>const_neg_one_32], 0),
1275                Ordering::Equal => dynasm!(self ; cmp DWORD [>const_zero_32], 0),
1276                Ordering::Greater => dynasm!(self ; cmp DWORD [>const_pos_one_32], 0),
1277            },
1278            None => binop_all_nofp!(cmp, self, sz, left, right, {
1279                codegen_error!("singlepass can't emit CMP {:?} {:?} {:?}", sz, left, right);
1280            }),
1281        }
1282        Ok(())
1283    }
1284    fn emit_add(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1285        // Fast path
1286        if let Location::Imm32(0) = src {
1287            return Ok(());
1288        }
1289        binop_all_nofp!(add, self, sz, src, dst, {
1290            codegen_error!("singlepass can't emit ADD {:?} {:?} {:?}", sz, src, dst)
1291        });
1292        Ok(())
1293    }
1294    fn emit_sub(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1295        // Fast path
1296        if let Location::Imm32(0) = src {
1297            return Ok(());
1298        }
1299        binop_all_nofp!(sub, self, sz, src, dst, {
1300            codegen_error!("singlepass can't emit SUB {:?} {:?} {:?}", sz, src, dst)
1301        });
1302        Ok(())
1303    }
1304    fn emit_neg(&mut self, sz: Size, value: Location) -> Result<(), CompileError> {
1305        match (sz, value) {
1306            (Size::S8, Location::GPR(value)) => dynasm!(self ; neg Rb(value)),
1307            (Size::S8, Location::Memory(value, disp)) => {
1308                dynasm!(self ; neg [Rq(value) + disp])
1309            }
1310            (Size::S16, Location::GPR(value)) => dynasm!(self ; neg Rw(value)),
1311            (Size::S16, Location::Memory(value, disp)) => {
1312                dynasm!(self ; neg [Rq(value) + disp])
1313            }
1314            (Size::S32, Location::GPR(value)) => dynasm!(self ; neg Rd(value)),
1315            (Size::S32, Location::Memory(value, disp)) => {
1316                dynasm!(self ; neg [Rq(value) + disp])
1317            }
1318            (Size::S64, Location::GPR(value)) => dynasm!(self ; neg Rq(value)),
1319            (Size::S64, Location::Memory(value, disp)) => {
1320                dynasm!(self ; neg [Rq(value) + disp])
1321            }
1322            _ => codegen_error!("singlepass can't emit NEG {:?} {:?}", sz, value),
1323        }
1324        Ok(())
1325    }
1326    fn emit_imul(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1327        binop_gpr_gpr!(imul, self, sz, src, dst, {
1328            binop_mem_gpr!(imul, self, sz, src, dst, {
1329                codegen_error!("singlepass can't emit IMUL {:?} {:?} {:?}", sz, src, dst)
1330            })
1331        });
1332        Ok(())
1333    }
1334    fn emit_imul_imm32_gpr64(&mut self, src: u32, dst: GPR) -> Result<(), CompileError> {
1335        dynasm!(self ; imul Rq(dst), Rq(dst), src as i32);
1336        Ok(())
1337    }
1338    fn emit_div(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError> {
1339        unop_gpr_or_mem!(div, self, sz, divisor, {
1340            codegen_error!("singlepass can't emit DIV {:?} {:?}", sz, divisor)
1341        });
1342        Ok(())
1343    }
1344    fn emit_idiv(&mut self, sz: Size, divisor: Location) -> Result<(), CompileError> {
1345        unop_gpr_or_mem!(idiv, self, sz, divisor, {
1346            codegen_error!("singlepass can't emit IDIV {:?} {:?}", sz, divisor)
1347        });
1348        Ok(())
1349    }
1350    fn emit_shl(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1351        binop_shift!(shl, self, sz, src, dst, {
1352            codegen_error!("singlepass can't emit SHL {:?} {:?} {:?}", sz, src, dst)
1353        });
1354        Ok(())
1355    }
1356    fn emit_shr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1357        binop_shift!(shr, self, sz, src, dst, {
1358            codegen_error!("singlepass can't emit SHR {:?} {:?} {:?}", sz, src, dst)
1359        });
1360        Ok(())
1361    }
1362    fn emit_sar(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1363        binop_shift!(sar, self, sz, src, dst, {
1364            codegen_error!("singlepass can't emit SAR {:?} {:?} {:?}", sz, src, dst)
1365        });
1366        Ok(())
1367    }
1368    fn emit_rol(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1369        binop_shift!(rol, self, sz, src, dst, {
1370            codegen_error!("singlepass can't emit ROL {:?} {:?} {:?}", sz, src, dst)
1371        });
1372        Ok(())
1373    }
1374    fn emit_ror(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1375        binop_shift!(ror, self, sz, src, dst, {
1376            codegen_error!("singlepass can't emit ROR {:?} {:?} {:?}", sz, src, dst)
1377        });
1378        Ok(())
1379    }
1380    fn emit_and(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1381        binop_all_nofp!(and, self, sz, src, dst, {
1382            codegen_error!("singlepass can't emit AND {:?} {:?} {:?}", sz, src, dst)
1383        });
1384        Ok(())
1385    }
1386    fn emit_test(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1387        binop_all_nofp!(test, self, sz, src, dst, {
1388            codegen_error!("singlepass can't emit TEST {:?} {:?} {:?}", sz, src, dst)
1389        });
1390        Ok(())
1391    }
1392    fn emit_or(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1393        binop_all_nofp!(or, self, sz, src, dst, {
1394            codegen_error!("singlepass can't emit OR {:?} {:?} {:?}", sz, src, dst)
1395        });
1396        Ok(())
1397    }
1398    fn emit_bsr(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1399        binop_gpr_gpr!(bsr, self, sz, src, dst, {
1400            binop_mem_gpr!(bsr, self, sz, src, dst, {
1401                codegen_error!("singlepass can't emit BSR {:?} {:?} {:?}", sz, src, dst)
1402            })
1403        });
1404        Ok(())
1405    }
1406    fn emit_bsf(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1407        binop_gpr_gpr!(bsf, self, sz, src, dst, {
1408            binop_mem_gpr!(bsf, self, sz, src, dst, {
1409                codegen_error!("singlepass can't emit BSF {:?} {:?} {:?}", sz, src, dst)
1410            })
1411        });
1412        Ok(())
1413    }
1414    fn emit_popcnt(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1415        binop_gpr_gpr!(popcnt, self, sz, src, dst, {
1416            binop_mem_gpr!(popcnt, self, sz, src, dst, {
1417                codegen_error!("singlepass can't emit POPCNT {:?} {:?} {:?}", sz, src, dst)
1418            })
1419        });
1420        Ok(())
1421    }
1422    fn emit_movzx(
1423        &mut self,
1424        sz_src: Size,
1425        src: Location,
1426        sz_dst: Size,
1427        dst: Location,
1428    ) -> Result<(), CompileError> {
1429        match (sz_src, src, sz_dst, dst) {
1430            (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1431                dynasm!(self ; movzx Rd(dst), Rb(src));
1432            }
1433            (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1434                dynasm!(self ; movzx Rd(dst), Rw(src));
1435            }
1436            (Size::S8, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1437                dynasm!(self ; movzx Rd(dst), BYTE [Rq(src) + disp]);
1438            }
1439            (Size::S16, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1440                dynasm!(self ; movzx Rd(dst), WORD [Rq(src) + disp]);
1441            }
1442            (Size::S16, Location::Imm32(imm), Size::S32, Location::GPR(dst)) => {
1443                dynasm!(self ; mov Rd(dst), imm as i32);
1444            }
1445            (Size::S8, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1446                dynasm!(self ; movzx Rq(dst), Rb(src));
1447            }
1448            (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1449                dynasm!(self ; movzx Rq(dst), Rw(src));
1450            }
1451            (Size::S8, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1452                dynasm!(self ; movzx Rq(dst), BYTE [Rq(src) + disp]);
1453            }
1454            (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1455                dynasm!(self ; movzx Rq(dst), WORD [Rq(src) + disp]);
1456            }
1457            (Size::S32, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1458                if src != dst {
1459                    dynasm!(self ; mov Rd(dst), Rd(src));
1460                }
1461            }
1462            (Size::S32, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1463                dynasm!(self ; mov Rd(dst), DWORD [Rq(src) + disp]);
1464            }
1465            (Size::S8, Location::Imm32(imm), Size::S32, Location::GPR(dst)) => {
1466                dynasm!(self ; mov Rq(dst), imm as i32);
1467            }
1468            (Size::S16, Location::Imm32(imm), Size::S64, Location::GPR(dst)) => {
1469                dynasm!(self ; mov Rq(dst), imm as i32);
1470            }
1471            (Size::S32, Location::Imm32(imm), Size::S64, Location::GPR(dst)) => {
1472                dynasm!(self ; mov Rq(dst), imm as i32);
1473            }
1474            (Size::S8, Location::Imm64(imm), Size::S32, Location::GPR(dst)) => {
1475                dynasm!(self ; mov Rq(dst), imm as i32);
1476            }
1477            (Size::S16, Location::Imm64(imm), Size::S64, Location::GPR(dst)) => {
1478                dynasm!(self ; mov Rq(dst), imm as i32);
1479            }
1480            (Size::S32, Location::Imm64(imm), Size::S64, Location::GPR(dst)) => {
1481                dynasm!(self ; mov Rq(dst), imm as i32);
1482            }
1483            _ => {
1484                codegen_error!(
1485                    "singlepass can't emit MOVZX {:?} {:?} {:?} {:?}",
1486                    sz_src,
1487                    src,
1488                    sz_dst,
1489                    dst
1490                )
1491            }
1492        }
1493        Ok(())
1494    }
1495    fn emit_movsx(
1496        &mut self,
1497        sz_src: Size,
1498        src: Location,
1499        sz_dst: Size,
1500        dst: Location,
1501    ) -> Result<(), CompileError> {
1502        match (sz_src, src, sz_dst, dst) {
1503            (Size::S8, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1504                dynasm!(self ; movsx Rd(dst), Rb(src));
1505            }
1506            (Size::S16, Location::GPR(src), Size::S32, Location::GPR(dst)) => {
1507                dynasm!(self ; movsx Rd(dst), Rw(src));
1508            }
1509            (Size::S8, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1510                dynasm!(self ; movsx Rd(dst), BYTE [Rq(src) + disp]);
1511            }
1512            (Size::S16, Location::Memory(src, disp), Size::S32, Location::GPR(dst)) => {
1513                dynasm!(self ; movsx Rd(dst), WORD [Rq(src) + disp]);
1514            }
1515            (Size::S8, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1516                dynasm!(self ; movsx Rq(dst), Rb(src));
1517            }
1518            (Size::S16, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1519                dynasm!(self ; movsx Rq(dst), Rw(src));
1520            }
1521            (Size::S32, Location::GPR(src), Size::S64, Location::GPR(dst)) => {
1522                dynasm!(self ; movsx Rq(dst), Rd(src));
1523            }
1524            (Size::S8, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1525                dynasm!(self ; movsx Rq(dst), BYTE [Rq(src) + disp]);
1526            }
1527            (Size::S16, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1528                dynasm!(self ; movsx Rq(dst), WORD [Rq(src) + disp]);
1529            }
1530            (Size::S32, Location::Memory(src, disp), Size::S64, Location::GPR(dst)) => {
1531                dynasm!(self ; movsx Rq(dst), DWORD [Rq(src) + disp]);
1532            }
1533            _ => {
1534                codegen_error!(
1535                    "singlepass can't emit MOVSX {:?} {:?} {:?} {:?}",
1536                    sz_src,
1537                    src,
1538                    sz_dst,
1539                    dst
1540                )
1541            }
1542        }
1543        Ok(())
1544    }
1545
1546    fn emit_xchg(&mut self, sz: Size, src: Location, dst: Location) -> Result<(), CompileError> {
1547        match (sz, src, dst) {
1548            (Size::S8, Location::GPR(src), Location::GPR(dst)) => {
1549                dynasm!(self ; xchg Rb(dst), Rb(src));
1550            }
1551            (Size::S16, Location::GPR(src), Location::GPR(dst)) => {
1552                dynasm!(self ; xchg Rw(dst), Rw(src));
1553            }
1554            (Size::S32, Location::GPR(src), Location::GPR(dst)) => {
1555                dynasm!(self ; xchg Rd(dst), Rd(src));
1556            }
1557            (Size::S64, Location::GPR(src), Location::GPR(dst)) => {
1558                dynasm!(self ; xchg Rq(dst), Rq(src));
1559            }
1560            (Size::S8, Location::Memory(src, disp), Location::GPR(dst)) => {
1561                dynasm!(self ; xchg Rb(dst), [Rq(src) + disp]);
1562            }
1563            (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1564                dynasm!(self ; xchg [Rq(dst) + disp], Rb(src));
1565            }
1566            (Size::S16, Location::Memory(src, disp), Location::GPR(dst)) => {
1567                dynasm!(self ; xchg Rw(dst), [Rq(src) + disp]);
1568            }
1569            (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1570                dynasm!(self ; xchg [Rq(dst) + disp], Rw(src));
1571            }
1572            (Size::S32, Location::Memory(src, disp), Location::GPR(dst)) => {
1573                dynasm!(self ; xchg Rd(dst), [Rq(src) + disp]);
1574            }
1575            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
1576                dynasm!(self ; xchg [Rq(dst) + disp], Rd(src));
1577            }
1578            (Size::S64, Location::Memory(src, disp), Location::GPR(dst)) => {
1579                dynasm!(self ; xchg Rq(dst), [Rq(src) + disp]);
1580            }
1581            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
1582                dynasm!(self ; xchg [Rq(dst) + disp], Rq(src));
1583            }
1584            _ => codegen_error!("singlepass can't emit XCHG {:?} {:?} {:?}", sz, src, dst),
1585        }
1586        Ok(())
1587    }
1588
1589    fn emit_lock_xadd(
1590        &mut self,
1591        sz: Size,
1592        src: Location,
1593        dst: Location,
1594    ) -> Result<(), CompileError> {
1595        match (sz, src, dst) {
1596            (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1597                dynasm!(self ; lock xadd [Rq(dst) + disp], Rb(src));
1598            }
1599            (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1600                dynasm!(self ; lock xadd [Rq(dst) + disp], Rw(src));
1601            }
1602            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
1603                dynasm!(self ; lock xadd [Rq(dst) + disp], Rd(src));
1604            }
1605            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
1606                dynasm!(self ; lock xadd [Rq(dst) + disp], Rq(src));
1607            }
1608            _ => codegen_error!(
1609                "singlepass can't emit LOCK XADD {:?} {:?} {:?}",
1610                sz,
1611                src,
1612                dst
1613            ),
1614        }
1615        Ok(())
1616    }
1617
1618    fn emit_lock_cmpxchg(
1619        &mut self,
1620        sz: Size,
1621        src: Location,
1622        dst: Location,
1623    ) -> Result<(), CompileError> {
1624        match (sz, src, dst) {
1625            (Size::S8, Location::GPR(src), Location::Memory(dst, disp)) => {
1626                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rb(src));
1627            }
1628            (Size::S16, Location::GPR(src), Location::Memory(dst, disp)) => {
1629                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rw(src));
1630            }
1631            (Size::S32, Location::GPR(src), Location::Memory(dst, disp)) => {
1632                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rd(src));
1633            }
1634            (Size::S64, Location::GPR(src), Location::Memory(dst, disp)) => {
1635                dynasm!(self ; lock cmpxchg [Rq(dst) + disp], Rq(src));
1636            }
1637            _ => codegen_error!(
1638                "singlepass can't emit LOCK CMPXCHG {:?} {:?} {:?}",
1639                sz,
1640                src,
1641                dst
1642            ),
1643        }
1644        Ok(())
1645    }
1646
1647    fn emit_rep_stosq(&mut self) -> Result<(), CompileError> {
1648        dynasm!(self ; rep stosq);
1649        Ok(())
1650    }
1651    fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) -> Result<(), CompileError> {
1652        dynasm!(self ; btc Rd(dst), BYTE src as i8);
1653        Ok(())
1654    }
1655
1656    fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) -> Result<(), CompileError> {
1657        dynasm!(self ; btc Rq(dst), BYTE src as i8);
1658        Ok(())
1659    }
1660
1661    fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError> {
1662        dynasm!(self ; cmovae Rd(dst), Rd(src));
1663        Ok(())
1664    }
1665
1666    fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) -> Result<(), CompileError> {
1667        dynasm!(self ; cmovae Rq(dst), Rq(src));
1668        Ok(())
1669    }
1670
1671    fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError> {
1672        match (src, dst) {
1673            (XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => {
1674                dynasm!(self ; movaps Rx(dst), Rx(src))
1675            }
1676            (XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => {
1677                dynasm!(self ; movaps Rx(dst), [Rq(base) + disp])
1678            }
1679            (XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => {
1680                dynasm!(self ; movaps [Rq(base) + disp], Rx(src))
1681            }
1682            _ => codegen_error!("singlepass can't emit VMOVAPS {:?} {:?}", src, dst),
1683        };
1684        Ok(())
1685    }
1686
1687    fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory) -> Result<(), CompileError> {
1688        match (src, dst) {
1689            (XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => {
1690                dynasm!(self ; movapd Rx(dst), Rx(src))
1691            }
1692            (XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => {
1693                dynasm!(self ; movapd Rx(dst), [Rq(base) + disp])
1694            }
1695            (XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => {
1696                dynasm!(self ; movapd [Rq(base) + disp], Rx(src))
1697            }
1698            _ => codegen_error!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst),
1699        };
1700        Ok(())
1701    }
1702    fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1703        match self.get_simd_arch() {
1704            Some(CpuFeature::AVX) => avx_fn!(vxorps, self, src1, src2, dst),
1705            Some(CpuFeature::SSE42) => sse_fn!(xorps, self, Precision::Single, src1, src2, dst),
1706            _ => {}
1707        }
1708        Ok(())
1709    }
1710    fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1711        match self.get_simd_arch() {
1712            Some(CpuFeature::AVX) => avx_fn!(vxorpd, self, src1, src2, dst),
1713            Some(CpuFeature::SSE42) => sse_fn!(xorpd, self, Precision::Double, src1, src2, dst),
1714            _ => {}
1715        }
1716        Ok(())
1717    }
1718    fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1719        match self.get_simd_arch() {
1720            Some(CpuFeature::AVX) => avx_fn!(vaddss, self, src1, src2, dst),
1721            Some(CpuFeature::SSE42) => sse_fn!(addss, self, Precision::Single, src1, src2, dst),
1722            _ => {}
1723        }
1724        Ok(())
1725    }
1726    fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1727        match self.get_simd_arch() {
1728            Some(CpuFeature::AVX) => avx_fn!(vaddsd, self, src1, src2, dst),
1729            Some(CpuFeature::SSE42) => sse_fn!(addsd, self, Precision::Double, src1, src2, dst),
1730            _ => {}
1731        }
1732        Ok(())
1733    }
1734    fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1735        match self.get_simd_arch() {
1736            Some(CpuFeature::AVX) => avx_fn!(vsubss, self, src1, src2, dst),
1737            Some(CpuFeature::SSE42) => sse_fn!(subss, self, Precision::Single, src1, src2, dst),
1738            _ => {}
1739        }
1740        Ok(())
1741    }
1742    fn emit_vsubsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1743        match self.get_simd_arch() {
1744            Some(CpuFeature::AVX) => avx_fn!(vsubsd, self, src1, src2, dst),
1745            Some(CpuFeature::SSE42) => sse_fn!(subsd, self, Precision::Double, src1, src2, dst),
1746            _ => {}
1747        }
1748        Ok(())
1749    }
1750    fn emit_vmulss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1751        match self.get_simd_arch() {
1752            Some(CpuFeature::AVX) => avx_fn!(vmulss, self, src1, src2, dst),
1753            Some(CpuFeature::SSE42) => sse_fn!(mulss, self, Precision::Single, src1, src2, dst),
1754            _ => {}
1755        }
1756        Ok(())
1757    }
1758    fn emit_vmulsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1759        match self.get_simd_arch() {
1760            Some(CpuFeature::AVX) => avx_fn!(vmulsd, self, src1, src2, dst),
1761            Some(CpuFeature::SSE42) => sse_fn!(mulsd, self, Precision::Double, src1, src2, dst),
1762            _ => {}
1763        }
1764        Ok(())
1765    }
1766    fn emit_vdivss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1767        match self.get_simd_arch() {
1768            Some(CpuFeature::AVX) => avx_fn!(vdivss, self, src1, src2, dst),
1769            Some(CpuFeature::SSE42) => sse_fn!(divss, self, Precision::Single, src1, src2, dst),
1770            _ => {}
1771        }
1772        Ok(())
1773    }
1774    fn emit_vdivsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1775        match self.get_simd_arch() {
1776            Some(CpuFeature::AVX) => avx_fn!(vdivsd, self, src1, src2, dst),
1777            Some(CpuFeature::SSE42) => sse_fn!(divsd, self, Precision::Double, src1, src2, dst),
1778            _ => {}
1779        }
1780        Ok(())
1781    }
1782    fn emit_vmaxss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1783        match self.get_simd_arch() {
1784            Some(CpuFeature::AVX) => avx_fn!(vmaxss, self, src1, src2, dst),
1785            Some(CpuFeature::SSE42) => sse_fn!(maxss, self, Precision::Single, src1, src2, dst),
1786            _ => {}
1787        }
1788        Ok(())
1789    }
1790    fn emit_vmaxsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1791        match self.get_simd_arch() {
1792            Some(CpuFeature::AVX) => avx_fn!(vmaxsd, self, src1, src2, dst),
1793            Some(CpuFeature::SSE42) => sse_fn!(maxsd, self, Precision::Double, src1, src2, dst),
1794            _ => {}
1795        }
1796        Ok(())
1797    }
1798    fn emit_vminss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1799        match self.get_simd_arch() {
1800            Some(CpuFeature::AVX) => avx_fn!(vminss, self, src1, src2, dst),
1801            Some(CpuFeature::SSE42) => sse_fn!(minss, self, Precision::Single, src1, src2, dst),
1802            _ => {}
1803        }
1804        Ok(())
1805    }
1806    fn emit_vminsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
1807        match self.get_simd_arch() {
1808            Some(CpuFeature::AVX) => avx_fn!(vminsd, self, src1, src2, dst),
1809            Some(CpuFeature::SSE42) => sse_fn!(minsd, self, Precision::Double, src1, src2, dst),
1810            _ => {}
1811        }
1812        Ok(())
1813    }
1814    fn emit_vcmpeqss(
1815        &mut self,
1816        src1: XMM,
1817        src2: XMMOrMemory,
1818        dst: XMM,
1819    ) -> Result<(), CompileError> {
1820        match self.get_simd_arch() {
1821            Some(CpuFeature::AVX) => avx_fn!(vcmpeqss, self, src1, src2, dst),
1822            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 0, self, Precision::Single, src1, src2, dst),
1823            _ => {}
1824        }
1825        Ok(())
1826    }
1827    fn emit_vcmpeqsd(
1828        &mut self,
1829        src1: XMM,
1830        src2: XMMOrMemory,
1831        dst: XMM,
1832    ) -> Result<(), CompileError> {
1833        match self.get_simd_arch() {
1834            Some(CpuFeature::AVX) => avx_fn!(vcmpeqsd, self, src1, src2, dst),
1835            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 0, self, Precision::Double, src1, src2, dst),
1836            _ => {}
1837        }
1838        Ok(())
1839    }
1840    fn emit_vcmpneqss(
1841        &mut self,
1842        src1: XMM,
1843        src2: XMMOrMemory,
1844        dst: XMM,
1845    ) -> Result<(), CompileError> {
1846        match self.get_simd_arch() {
1847            Some(CpuFeature::AVX) => avx_fn!(vcmpneqss, self, src1, src2, dst),
1848            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 4, self, Precision::Single, src1, src2, dst),
1849            _ => {}
1850        }
1851        Ok(())
1852    }
1853    fn emit_vcmpneqsd(
1854        &mut self,
1855        src1: XMM,
1856        src2: XMMOrMemory,
1857        dst: XMM,
1858    ) -> Result<(), CompileError> {
1859        match self.get_simd_arch() {
1860            Some(CpuFeature::AVX) => avx_fn!(vcmpneqsd, self, src1, src2, dst),
1861            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 4, self, Precision::Double, src1, src2, dst),
1862            _ => {}
1863        }
1864        Ok(())
1865    }
1866    fn emit_vcmpltss(
1867        &mut self,
1868        src1: XMM,
1869        src2: XMMOrMemory,
1870        dst: XMM,
1871    ) -> Result<(), CompileError> {
1872        match self.get_simd_arch() {
1873            Some(CpuFeature::AVX) => avx_fn!(vcmpltss, self, src1, src2, dst),
1874            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 1, self, Precision::Single, src1, src2, dst),
1875            _ => {}
1876        }
1877        Ok(())
1878    }
1879    fn emit_vcmpltsd(
1880        &mut self,
1881        src1: XMM,
1882        src2: XMMOrMemory,
1883        dst: XMM,
1884    ) -> Result<(), CompileError> {
1885        match self.get_simd_arch() {
1886            Some(CpuFeature::AVX) => avx_fn!(vcmpltsd, self, src1, src2, dst),
1887            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 1, self, Precision::Double, src1, src2, dst),
1888            _ => {}
1889        }
1890        Ok(())
1891    }
1892    fn emit_vcmpless(
1893        &mut self,
1894        src1: XMM,
1895        src2: XMMOrMemory,
1896        dst: XMM,
1897    ) -> Result<(), CompileError> {
1898        match self.get_simd_arch() {
1899            Some(CpuFeature::AVX) => avx_fn!(vcmpless, self, src1, src2, dst),
1900            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 2, self, Precision::Single, src1, src2, dst),
1901            _ => {}
1902        }
1903        Ok(())
1904    }
1905    fn emit_vcmplesd(
1906        &mut self,
1907        src1: XMM,
1908        src2: XMMOrMemory,
1909        dst: XMM,
1910    ) -> Result<(), CompileError> {
1911        match self.get_simd_arch() {
1912            Some(CpuFeature::AVX) => avx_fn!(vcmplesd, self, src1, src2, dst),
1913            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 2, self, Precision::Double, src1, src2, dst),
1914            _ => {}
1915        }
1916        Ok(())
1917    }
1918    fn emit_vcmpgtss(
1919        &mut self,
1920        src1: XMM,
1921        src2: XMMOrMemory,
1922        dst: XMM,
1923    ) -> Result<(), CompileError> {
1924        match self.get_simd_arch() {
1925            Some(CpuFeature::AVX) => avx_fn!(vcmpgtss, self, src1, src2, dst),
1926            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 6, self, Precision::Single, src1, src2, dst),
1927            _ => {}
1928        }
1929        Ok(())
1930    }
1931    fn emit_vcmpgtsd(
1932        &mut self,
1933        src1: XMM,
1934        src2: XMMOrMemory,
1935        dst: XMM,
1936    ) -> Result<(), CompileError> {
1937        match self.get_simd_arch() {
1938            Some(CpuFeature::AVX) => avx_fn!(vcmpgtsd, self, src1, src2, dst),
1939            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 6, self, Precision::Double, src1, src2, dst),
1940            _ => {}
1941        }
1942        Ok(())
1943    }
1944    fn emit_vcmpgess(
1945        &mut self,
1946        src1: XMM,
1947        src2: XMMOrMemory,
1948        dst: XMM,
1949    ) -> Result<(), CompileError> {
1950        match self.get_simd_arch() {
1951            Some(CpuFeature::AVX) => avx_fn!(vcmpgess, self, src1, src2, dst),
1952            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 5, self, Precision::Single, src1, src2, dst),
1953            _ => {}
1954        }
1955        Ok(())
1956    }
1957    fn emit_vcmpgesd(
1958        &mut self,
1959        src1: XMM,
1960        src2: XMMOrMemory,
1961        dst: XMM,
1962    ) -> Result<(), CompileError> {
1963        match self.get_simd_arch() {
1964            Some(CpuFeature::AVX) => avx_fn!(vcmpgesd, self, src1, src2, dst),
1965            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 5, self, Precision::Double, src1, src2, dst),
1966            _ => {}
1967        }
1968        Ok(())
1969    }
1970    fn emit_vcmpunordss(
1971        &mut self,
1972        src1: XMM,
1973        src2: XMMOrMemory,
1974        dst: XMM,
1975    ) -> Result<(), CompileError> {
1976        match self.get_simd_arch() {
1977            Some(CpuFeature::AVX) => avx_fn!(vcmpunordss, self, src1, src2, dst),
1978            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 3, self, Precision::Single, src1, src2, dst),
1979            _ => {}
1980        }
1981        Ok(())
1982    }
1983    fn emit_vcmpunordsd(
1984        &mut self,
1985        src1: XMM,
1986        src2: XMMOrMemory,
1987        dst: XMM,
1988    ) -> Result<(), CompileError> {
1989        match self.get_simd_arch() {
1990            Some(CpuFeature::AVX) => avx_fn!(vcmpunordsd, self, src1, src2, dst),
1991            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 3, self, Precision::Double, src1, src2, dst),
1992            _ => {}
1993        }
1994        Ok(())
1995    }
1996    fn emit_vcmpordss(
1997        &mut self,
1998        src1: XMM,
1999        src2: XMMOrMemory,
2000        dst: XMM,
2001    ) -> Result<(), CompileError> {
2002        match self.get_simd_arch() {
2003            Some(CpuFeature::AVX) => avx_fn!(vcmpordss, self, src1, src2, dst),
2004            Some(CpuFeature::SSE42) => sse_fn!(cmpss, 7, self, Precision::Single, src1, src2, dst),
2005            _ => {}
2006        }
2007        Ok(())
2008    }
2009    fn emit_vcmpordsd(
2010        &mut self,
2011        src1: XMM,
2012        src2: XMMOrMemory,
2013        dst: XMM,
2014    ) -> Result<(), CompileError> {
2015        match self.get_simd_arch() {
2016            Some(CpuFeature::AVX) => avx_fn!(vcmpordsd, self, src1, src2, dst),
2017            Some(CpuFeature::SSE42) => sse_fn!(cmpsd, 7, self, Precision::Double, src1, src2, dst),
2018            _ => {}
2019        }
2020        Ok(())
2021    }
2022    fn emit_vsqrtss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2023        match self.get_simd_arch() {
2024            Some(CpuFeature::AVX) => avx_fn!(vsqrtss, self, src1, src2, dst),
2025            Some(CpuFeature::SSE42) => sse_fn!(sqrtss, self, Precision::Single, src1, src2, dst),
2026            _ => {}
2027        }
2028        Ok(())
2029    }
2030    fn emit_vsqrtsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2031        match self.get_simd_arch() {
2032            Some(CpuFeature::AVX) => avx_fn!(vsqrtsd, self, src1, src2, dst),
2033            Some(CpuFeature::SSE42) => sse_fn!(sqrtsd, self, Precision::Double, src1, src2, dst),
2034            _ => {}
2035        }
2036        Ok(())
2037    }
2038    fn emit_vcvtss2sd(
2039        &mut self,
2040        src1: XMM,
2041        src2: XMMOrMemory,
2042        dst: XMM,
2043    ) -> Result<(), CompileError> {
2044        match self.get_simd_arch() {
2045            Some(CpuFeature::AVX) => avx_fn!(vcvtss2sd, self, src1, src2, dst),
2046            Some(CpuFeature::SSE42) => sse_fn!(cvtss2sd, self, Precision::Single, src1, src2, dst),
2047            _ => {}
2048        }
2049        Ok(())
2050    }
2051    fn emit_vcvtsd2ss(
2052        &mut self,
2053        src1: XMM,
2054        src2: XMMOrMemory,
2055        dst: XMM,
2056    ) -> Result<(), CompileError> {
2057        match self.get_simd_arch() {
2058            Some(CpuFeature::AVX) => avx_fn!(vcvtsd2ss, self, src1, src2, dst),
2059            Some(CpuFeature::SSE42) => sse_fn!(cvtsd2ss, self, Precision::Double, src1, src2, dst),
2060            _ => {}
2061        }
2062        Ok(())
2063    }
2064    fn emit_vroundss_nearest(
2065        &mut self,
2066        src1: XMM,
2067        src2: XMMOrMemory,
2068        dst: XMM,
2069    ) -> Result<(), CompileError> {
2070        match self.get_simd_arch() {
2071            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 0, self, src1, src2, dst),
2072            Some(CpuFeature::SSE42) => {
2073                sse_round_fn!(roundss, 0, self, Precision::Single, src1, src2, dst)
2074            }
2075            _ => {}
2076        }
2077        Ok(())
2078    }
2079    fn emit_vroundsd_nearest(
2080        &mut self,
2081        src1: XMM,
2082        src2: XMMOrMemory,
2083        dst: XMM,
2084    ) -> Result<(), CompileError> {
2085        match self.get_simd_arch() {
2086            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 0, self, src1, src2, dst),
2087            Some(CpuFeature::SSE42) => {
2088                sse_round_fn!(roundsd, 0, self, Precision::Double, src1, src2, dst)
2089            }
2090            _ => {}
2091        }
2092        Ok(())
2093    }
2094    fn emit_vroundss_floor(
2095        &mut self,
2096        src1: XMM,
2097        src2: XMMOrMemory,
2098        dst: XMM,
2099    ) -> Result<(), CompileError> {
2100        match self.get_simd_arch() {
2101            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 1, self, src1, src2, dst),
2102            Some(CpuFeature::SSE42) => {
2103                sse_round_fn!(roundss, 1, self, Precision::Single, src1, src2, dst)
2104            }
2105            _ => {}
2106        }
2107        Ok(())
2108    }
2109    fn emit_vroundsd_floor(
2110        &mut self,
2111        src1: XMM,
2112        src2: XMMOrMemory,
2113        dst: XMM,
2114    ) -> Result<(), CompileError> {
2115        match self.get_simd_arch() {
2116            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 1, self, src1, src2, dst),
2117            Some(CpuFeature::SSE42) => {
2118                sse_round_fn!(roundsd, 1, self, Precision::Double, src1, src2, dst)
2119            }
2120            _ => {}
2121        }
2122        Ok(())
2123    }
2124    fn emit_vroundss_ceil(
2125        &mut self,
2126        src1: XMM,
2127        src2: XMMOrMemory,
2128        dst: XMM,
2129    ) -> Result<(), CompileError> {
2130        match self.get_simd_arch() {
2131            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 2, self, src1, src2, dst),
2132            Some(CpuFeature::SSE42) => {
2133                sse_round_fn!(roundss, 2, self, Precision::Single, src1, src2, dst)
2134            }
2135            _ => {}
2136        }
2137        Ok(())
2138    }
2139    fn emit_vroundsd_ceil(
2140        &mut self,
2141        src1: XMM,
2142        src2: XMMOrMemory,
2143        dst: XMM,
2144    ) -> Result<(), CompileError> {
2145        match self.get_simd_arch() {
2146            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 2, self, src1, src2, dst),
2147            Some(CpuFeature::SSE42) => {
2148                sse_round_fn!(roundsd, 2, self, Precision::Double, src1, src2, dst)
2149            }
2150            _ => {}
2151        }
2152        Ok(())
2153    }
2154    fn emit_vroundss_trunc(
2155        &mut self,
2156        src1: XMM,
2157        src2: XMMOrMemory,
2158        dst: XMM,
2159    ) -> Result<(), CompileError> {
2160        match self.get_simd_arch() {
2161            Some(CpuFeature::AVX) => avx_round_fn!(vroundss, 3, self, src1, src2, dst),
2162            Some(CpuFeature::SSE42) => {
2163                sse_round_fn!(roundss, 3, self, Precision::Single, src1, src2, dst)
2164            }
2165            _ => {}
2166        }
2167        Ok(())
2168    }
2169    fn emit_vroundsd_trunc(
2170        &mut self,
2171        src1: XMM,
2172        src2: XMMOrMemory,
2173        dst: XMM,
2174    ) -> Result<(), CompileError> {
2175        match self.get_simd_arch() {
2176            Some(CpuFeature::AVX) => avx_round_fn!(vroundsd, 3, self, src1, src2, dst),
2177            Some(CpuFeature::SSE42) => {
2178                sse_round_fn!(roundsd, 3, self, Precision::Double, src1, src2, dst)
2179            }
2180            _ => {}
2181        }
2182        Ok(())
2183    }
2184    fn emit_vcvtsi2ss_32(
2185        &mut self,
2186        src1: XMM,
2187        src2: GPROrMemory,
2188        dst: XMM,
2189    ) -> Result<(), CompileError> {
2190        match self.get_simd_arch() {
2191            Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2ss, self, src1, src2, dst),
2192            Some(CpuFeature::SSE42) => {
2193                sse_i2f_32_fn!(cvtsi2ss, self, Precision::Single, src1, src2, dst)
2194            }
2195            _ => {}
2196        }
2197        Ok(())
2198    }
2199    fn emit_vcvtsi2sd_32(
2200        &mut self,
2201        src1: XMM,
2202        src2: GPROrMemory,
2203        dst: XMM,
2204    ) -> Result<(), CompileError> {
2205        match self.get_simd_arch() {
2206            Some(CpuFeature::AVX) => avx_i2f_32_fn!(vcvtsi2sd, self, src1, src2, dst),
2207            Some(CpuFeature::SSE42) => {
2208                sse_i2f_32_fn!(cvtsi2sd, self, Precision::Double, src1, src2, dst)
2209            }
2210            _ => {}
2211        }
2212        Ok(())
2213    }
2214    fn emit_vcvtsi2ss_64(
2215        &mut self,
2216        src1: XMM,
2217        src2: GPROrMemory,
2218        dst: XMM,
2219    ) -> Result<(), CompileError> {
2220        match self.get_simd_arch() {
2221            Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2ss, self, src1, src2, dst),
2222            Some(CpuFeature::SSE42) => {
2223                sse_i2f_64_fn!(cvtsi2ss, self, Precision::Single, src1, src2, dst)
2224            }
2225            _ => {}
2226        }
2227        Ok(())
2228    }
2229    fn emit_vcvtsi2sd_64(
2230        &mut self,
2231        src1: XMM,
2232        src2: GPROrMemory,
2233        dst: XMM,
2234    ) -> Result<(), CompileError> {
2235        match self.get_simd_arch() {
2236            Some(CpuFeature::AVX) => avx_i2f_64_fn!(vcvtsi2sd, self, src1, src2, dst),
2237            Some(CpuFeature::SSE42) => {
2238                sse_i2f_64_fn!(cvtsi2sd, self, Precision::Double, src1, src2, dst)
2239            }
2240            _ => {}
2241        }
2242        Ok(())
2243    }
2244
2245    fn emit_vblendvps(
2246        &mut self,
2247        src1: XMM,
2248        src2: XMMOrMemory,
2249        mask: XMM,
2250        dst: XMM,
2251    ) -> Result<(), CompileError> {
2252        // this implementation works only for sse 4.1 and greater
2253        match self.get_simd_arch() {
2254            Some(CpuFeature::AVX) => match src2 {
2255                XMMOrMemory::XMM(src2) => {
2256                    // TODO: this argument order does not match the documentation??
2257                    dynasm!( self; vblendvps Rx(dst), Rx(mask), Rx(src2), Rx(src1))
2258                }
2259                XMMOrMemory::Memory(base, disp) => {
2260                    dynasm!( self; vblendvps Rx(dst), Rx(mask), [Rq(base) + disp], Rx(src1))
2261                }
2262            },
2263            Some(CpuFeature::SSE42) => match src2 {
2264                XMMOrMemory::XMM(src2) => {
2265                    move_src_to_dst(self, Precision::Single, src1, dst);
2266                    dynasm!( self; blendvps Rx(dst), Rx(src2))
2267                }
2268                XMMOrMemory::Memory(base, disp) => {
2269                    move_src_to_dst(self, Precision::Single, src1, dst);
2270                    dynasm!( self; blendvps Rx(dst), [Rq(base) + disp])
2271                }
2272            },
2273            _ => {}
2274        }
2275        Ok(())
2276    }
2277
2278    fn emit_vblendvpd(
2279        &mut self,
2280        src1: XMM,
2281        src2: XMMOrMemory,
2282        mask: XMM,
2283        dst: XMM,
2284    ) -> Result<(), CompileError> {
2285        // this implementation works only for sse 4.1 and greater
2286        match self.get_simd_arch() {
2287            Some(CpuFeature::AVX) => match src2 {
2288                XMMOrMemory::XMM(src2) => {
2289                    // TODO: this argument order does not match the documentation??
2290                    dynasm!( self; vblendvpd Rx(dst), Rx(mask), Rx(src2), Rx(src1))
2291                }
2292                XMMOrMemory::Memory(base, disp) => {
2293                    dynasm!( self; vblendvpd Rx(dst), Rx(mask), [Rq(base) + disp], Rx(src1))
2294                }
2295            },
2296            Some(CpuFeature::SSE42) => match src2 {
2297                XMMOrMemory::XMM(src2) => {
2298                    move_src_to_dst(self, Precision::Double, src1, dst);
2299                    dynasm!( self; blendvpd Rx(dst), Rx(src2))
2300                }
2301                XMMOrMemory::Memory(base, disp) => {
2302                    move_src_to_dst(self, Precision::Double, src1, dst);
2303                    dynasm!( self; blendvpd Rx(dst), [Rq(base) + disp])
2304                }
2305            },
2306            _ => {}
2307        }
2308        Ok(())
2309    }
2310
2311    fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2312        match src {
2313            XMMOrMemory::XMM(x) => dynasm!(self ; ucomiss Rx(dst), Rx(x)),
2314            XMMOrMemory::Memory(base, disp) => {
2315                dynasm!(self ; ucomiss Rx(dst), [Rq(base) + disp])
2316            }
2317        }
2318        Ok(())
2319    }
2320
2321    fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM) -> Result<(), CompileError> {
2322        match src {
2323            XMMOrMemory::XMM(x) => dynasm!(self ; ucomisd Rx(dst), Rx(x)),
2324            XMMOrMemory::Memory(base, disp) => {
2325                dynasm!(self ; ucomisd Rx(dst), [Rq(base) + disp])
2326            }
2327        }
2328        Ok(())
2329    }
2330
2331    fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2332        match src {
2333            XMMOrMemory::XMM(x) => dynasm!(self ; cvttss2si Rd(dst), Rx(x)),
2334            XMMOrMemory::Memory(base, disp) => {
2335                dynasm!(self ; cvttss2si Rd(dst), [Rq(base) + disp])
2336            }
2337        }
2338        Ok(())
2339    }
2340
2341    fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2342        match src {
2343            XMMOrMemory::XMM(x) => dynasm!(self ; cvttss2si Rq(dst), Rx(x)),
2344            XMMOrMemory::Memory(base, disp) => {
2345                dynasm!(self ; cvttss2si Rq(dst), [Rq(base) + disp])
2346            }
2347        }
2348        Ok(())
2349    }
2350
2351    fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2352        match src {
2353            XMMOrMemory::XMM(x) => dynasm!(self ; cvttsd2si Rd(dst), Rx(x)),
2354            XMMOrMemory::Memory(base, disp) => {
2355                dynasm!(self ; cvttsd2si Rd(dst), [Rq(base) + disp])
2356            }
2357        }
2358        Ok(())
2359    }
2360
2361    fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR) -> Result<(), CompileError> {
2362        match src {
2363            XMMOrMemory::XMM(x) => dynasm!(self ; cvttsd2si Rq(dst), Rx(x)),
2364            XMMOrMemory::Memory(base, disp) => {
2365                dynasm!(self ; cvttsd2si Rq(dst), [Rq(base) + disp])
2366            }
2367        }
2368        Ok(())
2369    }
2370
2371    fn emit_test_gpr_64(&mut self, reg: GPR) -> Result<(), CompileError> {
2372        dynasm!(self ; test Rq(reg), Rq(reg));
2373        Ok(())
2374    }
2375
2376    fn emit_ud2(&mut self) -> Result<(), CompileError> {
2377        dynasm!(self ; ud2);
2378        Ok(())
2379    }
2380    #[allow(clippy::useless_conversion)]
2381    fn emit_ud1_payload(&mut self, payload: u8) -> Result<(), CompileError> {
2382        assert!(payload & 0xf0 == 0);
2383        let reg1 = (payload >> 3) & 1;
2384        let reg2 = payload & 7;
2385
2386        dynasm!(self ; ud1 Rd(reg1), Rd(reg2));
2387        Ok(())
2388    }
2389    fn emit_ret(&mut self) -> Result<(), CompileError> {
2390        dynasm!(self ; ret);
2391        Ok(())
2392    }
2393
2394    fn emit_call_label(&mut self, label: Label) -> Result<(), CompileError> {
2395        dynasm!(self ; call =>label);
2396        Ok(())
2397    }
2398    fn emit_call_location(&mut self, loc: Location) -> Result<(), CompileError> {
2399        match loc {
2400            Location::GPR(x) => dynasm!(self ; call Rq(x)),
2401            Location::Memory(base, disp) => dynasm!(self ; call QWORD [Rq(base) + disp]),
2402            _ => codegen_error!("singlepass can't emit CALL {:?}", loc),
2403        }
2404        Ok(())
2405    }
2406
2407    fn emit_call_register(&mut self, reg: GPR) -> Result<(), CompileError> {
2408        dynasm!(self ; call Rq(reg));
2409        Ok(())
2410    }
2411
2412    fn emit_bkpt(&mut self) -> Result<(), CompileError> {
2413        dynasm!(self ; int3);
2414        Ok(())
2415    }
2416
2417    fn emit_host_redirection(&mut self, target: GPR) -> Result<(), CompileError> {
2418        self.emit_jmp_location(Location::GPR(target))
2419    }
2420
2421    fn arch_mov64_imm_offset(&self) -> Result<usize, CompileError> {
2422        Ok(2)
2423    }
2424}