diff --git a/cranelift/assembler-x64/meta/src/dsl.rs b/cranelift/assembler-x64/meta/src/dsl.rs index 29ae52bf5d15..29aa5a2ca539 100644 --- a/cranelift/assembler-x64/meta/src/dsl.rs +++ b/cranelift/assembler-x64/meta/src/dsl.rs @@ -8,10 +8,12 @@ mod encoding; mod features; pub mod format; -pub use encoding::{rex, vex}; -pub use encoding::{Encoding, Group1Prefix, Group2Prefix, Group3Prefix, Group4Prefix, Opcodes, Prefixes, Rex}; +pub use encoding::{ + rex, vex, Encoding, Group1Prefix, Group2Prefix, Group3Prefix, Group4Prefix, Opcodes, Prefixes, Rex, Vex, VexLength, + VexMMMMM, VexPP, +}; pub use features::{Feature, Features, ALL_FEATURES}; -pub use format::{align, fmt, r, rw, sxl, sxq, sxw}; +pub use format::{align, fmt, r, rw, sxl, sxq, sxw, w}; pub use format::{Extension, Format, Location, Mutability, Operand, OperandKind}; /// Abbreviated constructor for an x64 instruction. diff --git a/cranelift/assembler-x64/meta/src/dsl/encoding.rs b/cranelift/assembler-x64/meta/src/dsl/encoding.rs index fa89fd0bcc67..dcaa80c429b0 100644 --- a/cranelift/assembler-x64/meta/src/dsl/encoding.rs +++ b/cranelift/assembler-x64/meta/src/dsl/encoding.rs @@ -32,8 +32,20 @@ pub fn rex(opcode: impl Into) -> Rex { /// An abbreviated constructor for VEX-encoded instructions. #[must_use] -pub fn vex() -> Vex { - Vex {} +pub fn vex(opcode: impl Into) -> Vex { + Vex { + opcodes: opcode.into(), + w: false, + r: false, + wig: false, + rxb: 0, + length: VexLength::default(), + mmmmm: VexMMMMM::None, + pp: VexPP::None, + reg: 0x00, + vvvv: None, + imm: None, + } } /// Enumerate the ways x64 encodes instructions. @@ -48,7 +60,7 @@ impl Encoding { pub fn validate(&self, operands: &[Operand]) { match self { Encoding::Rex(rex) => rex.validate(operands), - Encoding::Vex(vex) => vex.validate(), + Encoding::Vex(vex) => vex.validate(operands), } } } @@ -57,7 +69,7 @@ impl fmt::Display for Encoding { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Encoding::Rex(rex) => write!(f, "{rex}"), - Encoding::Vex(_vex) => todo!(), + Encoding::Vex(vex) => write!(f, "{vex}"), } } } @@ -383,6 +395,23 @@ impl Prefixes { pub fn is_empty(&self) -> bool { self.group1.is_none() && self.group2.is_none() && self.group3.is_none() && self.group4.is_none() } + + pub fn bits(&self) -> u8 { + let mut bits = 0; + if self.group1.is_some() { + bits |= 0b0001; + } + if self.group2.is_some() { + bits |= 0b0010; + } + if self.group3.is_some() { + bits |= 0b0100; + } + if self.group4.is_some() { + bits |= 0b1000; + } + bits + } } pub enum Group1Prefix { @@ -559,7 +588,7 @@ pub enum Imm { } impl Imm { - fn bits(&self) -> u8 { + fn bits(&self) -> u16 { match self { Imm::None => 0, Imm::ib => 8, @@ -582,10 +611,128 @@ impl fmt::Display for Imm { } } -pub struct Vex {} +pub struct Vex { + pub opcodes: Opcodes, + pub w: bool, + pub r: bool, + pub wig: bool, + pub rxb: u8, + pub length: VexLength, + pub mmmmm: VexMMMMM, + pub pp: VexPP, + pub reg: u8, + pub vvvv: Option, + pub imm: Option, +} + +#[derive(PartialEq)] +pub enum VexPP { + None, + /// Operand size override -- here, denoting "16-bit operation". + _66, + /// REPNE, but no specific meaning here -- is just an opcode extension. + _F2, + /// REP/REPE, but no specific meaning here -- is just an opcode extension. + _F3, +} + +impl fmt::Display for VexPP { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VexPP::None => write!(f, "None"), + VexPP::_66 => write!(f, "_66"), + VexPP::_F3 => write!(f, "_F3"), + VexPP::_F2 => write!(f, "_F2"), + } + } +} + +#[derive(PartialEq)] +pub enum VexMMMMM { + None, + _OF, + /// Operand size override -- here, denoting "16-bit operation". + _OF3A, + /// The lock prefix. + _OF38, +} + +impl fmt::Display for VexMMMMM { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + VexMMMMM::None => write!(f, "None"), + VexMMMMM::_OF => write!(f, "_0F"), + VexMMMMM::_OF3A => write!(f, "_OF3A"), + VexMMMMM::_OF38 => write!(f, "_OF38"), + } + } +} + +pub enum VexLength { + _128, + _256, +} + +impl VexLength { + /// Encode the `L` bit. + pub fn bits(&self) -> u8 { + match self { + Self::_128 => 0b0, + Self::_256 => 0b1, + } + } +} + +impl Default for VexLength { + fn default() -> Self { + Self::_128 + } +} + +/// Describe the register index to use. This wrapper is a type-safe way to pass +/// around the registers defined in `inst/regs.rs`. +#[derive(Debug, Copy, Clone, Default)] +pub struct Register(u8); +impl From for Register { + fn from(reg: u8) -> Self { + debug_assert!(reg < 16); + Self(reg) + } +} +impl Into for Register { + fn into(self) -> u8 { + self.0 + } +} impl Vex { - fn validate(&self) { - todo!() + pub fn length(self, length: VexLength) -> Self { + Self { length, ..self } + } + pub fn pp(self, pp: VexPP) -> Self { + Self { pp, ..self } + } + pub fn mmmmm(self, mmmmm: VexMMMMM) -> Self { + Self { mmmmm, ..self } + } + + fn validate(&self, _operands: &[Operand]) {} +} + +impl From for Encoding { + fn from(vex: Vex) -> Encoding { + Encoding::Vex(vex) + } +} + +impl fmt::Display for Vex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "VEX")?; + match self.length { + VexLength::_128 => write!(f, ".128")?, + VexLength::_256 => write!(f, ".256")?, + } + write!(f, " {:#04x}", self.opcodes.primary)?; + Ok(()) } } diff --git a/cranelift/assembler-x64/meta/src/dsl/format.rs b/cranelift/assembler-x64/meta/src/dsl/format.rs index e4aae3571a50..e940bc03a546 100644 --- a/cranelift/assembler-x64/meta/src/dsl/format.rs +++ b/cranelift/assembler-x64/meta/src/dsl/format.rs @@ -49,6 +49,16 @@ pub fn r(op: impl Into) -> Operand { op } +#[must_use] +pub fn w(location: Location) -> Operand { + Operand { + location, + mutability: Mutability::Write, + extension: Extension::None, + align: false, + } +} + /// An abbreviated constructor for a memory operand that requires alignment. pub fn align(location: Location) -> Operand { assert!(location.uses_memory()); @@ -236,8 +246,6 @@ pub enum Location { r32, r64, - xmm, - rm8, rm16, rm32, @@ -248,26 +256,41 @@ pub enum Location { m16, m32, m64, + xmm1, + xmm2, + xmm3, + ymm1, + ymm2, + ymm3, + zmm1, + zmm2, + zmm3, + + xmm_m128, + ymm_m256, + zmm_m512, } impl Location { /// Return the number of bits accessed. #[must_use] - pub fn bits(&self) -> u8 { + pub fn bits(&self) -> u16 { use Location::*; match self { al | cl | imm8 | r8 | rm8 | m8 => 8, ax | imm16 | r16 | rm16 | m16 => 16, eax | imm32 | r32 | rm32 | m32 => 32, rax | r64 | rm64 | m64 => 64, - xmm | rm128 => 128, + rm128 | xmm1 | xmm2 | xmm3 | xmm_m128 => 128, + ymm1 | ymm2 | ymm3 | ymm_m256 => 256, + zmm1 | zmm2 | zmm3 | zmm_m512 => 512, } } /// Return the number of bytes accessed, for convenience. #[must_use] - pub fn bytes(&self) -> u8 { - self.bits() / 8 + pub fn bytes(&self) -> u16 { + self.bits() / 16 } /// Return `true` if the location accesses memory; `false` otherwise. @@ -275,8 +298,9 @@ impl Location { pub fn uses_memory(&self) -> bool { use Location::*; match self { - al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | xmm => false, - rm8 | rm16 | rm32 | rm64 | rm128 | m8 | m16 | m32 | m64 => true, + al | cl | ax | eax | rax | imm8 | imm16 | imm32 | r8 | r16 | r32 | r64 | xmm1 | xmm2 | xmm3 | ymm1 + | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 => false, + rm8 | rm16 | rm32 | rm64 | rm128 | m8 | m16 | m32 | m64 | xmm_m128 | ymm_m256 | zmm_m512 => true, } } @@ -286,9 +310,9 @@ impl Location { pub fn uses_register(&self) -> bool { use Location::*; match self { - imm8 | imm16 | imm32 => false, - al | ax | eax | rax | cl | r8 | r16 | r32 | r64 | xmm | rm8 | rm16 | rm32 | rm64 | rm128 | m8 | m16 - | m32 | m64 => true, + cl | imm8 | imm16 | imm32 => false, + al | ax | eax | rax | r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 | rm128 | m8 | m16 | m32 | m64 + | xmm1 | xmm2 | xmm3 | ymm1 | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 | xmm_m128 | ymm_m256 | zmm_m512 => true, } } @@ -299,8 +323,10 @@ impl Location { match self { al | ax | eax | rax | cl => OperandKind::FixedReg(*self), imm8 | imm16 | imm32 => OperandKind::Imm(*self), - r8 | r16 | r32 | r64 | xmm => OperandKind::Reg(*self), - rm8 | rm16 | rm32 | rm64 | rm128 => OperandKind::RegMem(*self), + r8 | r16 | r32 | r64 | xmm1 | xmm2 | xmm3 | ymm1 | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 => { + OperandKind::Reg(*self) + } + rm8 | rm16 | rm32 | rm64 | rm128 | xmm_m128 | ymm_m256 | zmm_m512 => OperandKind::RegMem(*self), m8 | m16 | m32 | m64 => OperandKind::Mem(*self), } } @@ -326,8 +352,6 @@ impl core::fmt::Display for Location { r32 => write!(f, "r32"), r64 => write!(f, "r64"), - xmm => write!(f, "xmm"), - rm8 => write!(f, "rm8"), rm16 => write!(f, "rm16"), rm32 => write!(f, "rm32"), @@ -338,6 +362,21 @@ impl core::fmt::Display for Location { m16 => write!(f, "m16"), m32 => write!(f, "m32"), m64 => write!(f, "m64"), + xmm1 => write!(f, "xmm1"), + xmm2 => write!(f, "xmm2"), + xmm3 => write!(f, "xmm3"), + + ymm1 => write!(f, "ymm1"), + ymm2 => write!(f, "ymm2"), + ymm3 => write!(f, "ymm3"), + + zmm1 => write!(f, "zmm1"), + zmm2 => write!(f, "zmm2"), + zmm3 => write!(f, "zmm3"), + + xmm_m128 => write!(f, "xmm_m128"), + ymm_m256 => write!(f, "ymm_m256"), + zmm_m512 => write!(f, "zmm_m512"), } } } @@ -368,6 +407,7 @@ pub enum OperandKind { pub enum Mutability { Read, ReadWrite, + Write, } impl Mutability { @@ -377,6 +417,7 @@ impl Mutability { pub fn is_read(&self) -> bool { match self { Mutability::Read | Mutability::ReadWrite => true, + Mutability::Write => false, } } @@ -386,7 +427,7 @@ impl Mutability { pub fn is_write(&self) -> bool { match self { Mutability::Read => false, - Mutability::ReadWrite => true, + Mutability::ReadWrite | Mutability::Write => true, } } } @@ -402,6 +443,7 @@ impl core::fmt::Display for Mutability { match self { Self::Read => write!(f, "r"), Self::ReadWrite => write!(f, "rw"), + Self::Write => write!(f, "w"), } } } diff --git a/cranelift/assembler-x64/meta/src/generate/format.rs b/cranelift/assembler-x64/meta/src/generate/format.rs index cb4833e0781b..d2b4c1bed555 100644 --- a/cranelift/assembler-x64/meta/src/generate/format.rs +++ b/cranelift/assembler-x64/meta/src/generate/format.rs @@ -1,6 +1,5 @@ //! Generate format-related Rust code; this also includes generation of encoding //! Rust code. - use super::{fmtln, Formatter}; use crate::dsl; @@ -32,6 +31,10 @@ impl dsl::Format { self.generate_immediate(f); } + pub fn generate_vex_encoding(&self, f: &mut Formatter, vex: &dsl::Vex) { + self.generate_vex(f, vex); + } + /// `buf.put1(...);` fn generate_prefixes(&self, f: &mut Formatter, rex: &dsl::Rex) { if !rex.opcodes.prefixes.is_empty() { @@ -137,6 +140,22 @@ impl dsl::Format { } } + fn generate_vex(&self, f: &mut Formatter, vex: &dsl::Vex) { + f.empty_line(); + f.comment("Emit VEX prefix."); + fmtln!(f, "let vex: VexInstruction = VexInstruction::default();"); + fmtln!(f, "let mut vex = vex.opcode(0x{:0x});", vex.opcodes.primary); + fmtln!(f, "vex.reg = self.xmm1.enc();"); + fmtln!(f, "match &self.xmm_m128 {{"); + fmtln!(f, "XmmMem::Xmm(r) => {{vex.rm = XmmMem::Xmm(r.clone());}}"); + fmtln!(f, "XmmMem::Mem(m) => {{vex.rm = XmmMem::Mem(m.clone());}}"); + fmtln!(f, "}}"); + fmtln!(f, "vex.vvvv = Some(self.xmm2.enc());"); + fmtln!(f, "vex.prefix = LegacyPrefix::{};", vex.pp.to_string()); + fmtln!(f, "vex.map = OpcodeMap::{};", vex.mmmmm.to_string()); + fmtln!(f, "vex.encode(buf, off);"); + } + fn generate_modrm_byte(&self, f: &mut Formatter, rex: &dsl::Rex) { use dsl::OperandKind::{FixedReg, Imm, Mem, Reg, RegMem}; diff --git a/cranelift/assembler-x64/meta/src/generate/inst.rs b/cranelift/assembler-x64/meta/src/generate/inst.rs index cc4f409d2298..9d1791180253 100644 --- a/cranelift/assembler-x64/meta/src/generate/inst.rs +++ b/cranelift/assembler-x64/meta/src/generate/inst.rs @@ -114,7 +114,7 @@ impl dsl::Inst { match &self.encoding { dsl::Encoding::Rex(rex) => self.format.generate_rex_encoding(f, rex), - dsl::Encoding::Vex(_) => todo!(), + dsl::Encoding::Vex(vex) => self.format.generate_vex_encoding(f, vex), } }); } diff --git a/cranelift/assembler-x64/meta/src/generate/operand.rs b/cranelift/assembler-x64/meta/src/generate/operand.rs index c698ee279ebd..6981cc69630e 100644 --- a/cranelift/assembler-x64/meta/src/generate/operand.rs +++ b/cranelift/assembler-x64/meta/src/generate/operand.rs @@ -23,8 +23,12 @@ impl dsl::Operand { } r8 | r16 | r32 | r64 => format!("Gpr", self.mutability.generate_camel_case()), rm8 | rm16 | rm32 | rm64 => format!("GprMem", self.mutability.generate_camel_case()), - xmm => format!("Xmm", self.mutability.generate_camel_case()), - rm128 => format!("XmmMem", self.mutability.generate_camel_case()), + xmm1 | xmm2 | xmm3 | ymm1 | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 => { + format!("Xmm", self.mutability.generate_camel_case()) + } + xmm_m128 | ymm_m256 | zmm_m512 | rm128 => { + format!("XmmMem", self.mutability.generate_camel_case()) + } m8 | m16 | m32 | m64 => format!("Amode"), } } @@ -37,7 +41,9 @@ impl dsl::Location { use dsl::Location::*; match self { al | ax | eax | rax | cl | r8 | r16 | r32 | r64 | rm8 | rm16 | rm32 | rm64 => Some("Gpr"), - xmm | rm128 => Some("Xmm"), + xmm1 | xmm2 | xmm3 | ymm1 | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 | xmm_m128 | ymm_m256 | zmm_m512 | rm128 => { + Some("Xmm") + } // Do not generate a register class for memory-only access or // immediates. imm8 | imm16 | imm32 | m8 | m16 | m32 | m64 => None, @@ -66,7 +72,8 @@ impl dsl::Location { Some(size) => format!("self.{self}.to_string({size})"), None => unreachable!(), }, - xmm | rm128 | m8 | m16 | m32 | m64 => format!("self.{self}.to_string()"), + xmm1 | xmm2 | xmm3 | ymm1 | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 | xmm_m128 | ymm_m256 | zmm_m512 | rm128 + | m8 | m16 | m32 | m64 => format!("self.{self}.to_string()"), } } @@ -83,7 +90,7 @@ impl dsl::Location { m8 | m16 | m32 | m64 => { panic!("no need to generate a size for memory-only access") } - xmm | rm128 => { + xmm1 | xmm2 | xmm3 | ymm1 | ymm2 | ymm3 | zmm1 | zmm2 | zmm3 | xmm_m128 | ymm_m256 | zmm_m512 | rm128 => { panic!("no need to generate a size for XMM-sized access") } } @@ -96,6 +103,7 @@ impl dsl::Mutability { match self { dsl::Mutability::Read => "Read", dsl::Mutability::ReadWrite => "ReadWrite", + dsl::Mutability::Write => "Write", } } @@ -104,6 +112,7 @@ impl dsl::Mutability { match self { dsl::Mutability::Read => "read", dsl::Mutability::ReadWrite => "read_write", + dsl::Mutability::Write => "write", } } } diff --git a/cranelift/assembler-x64/meta/src/instructions/add.rs b/cranelift/assembler-x64/meta/src/instructions/add.rs index ff9453c4942e..c12683569127 100644 --- a/cranelift/assembler-x64/meta/src/instructions/add.rs +++ b/cranelift/assembler-x64/meta/src/instructions/add.rs @@ -62,7 +62,7 @@ pub fn list() -> Vec { inst("lock_adcl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x11]).r(), _64b | compat), inst("lock_adcq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x11]).w().r(), _64b), // Vector instructions. - inst("addps", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x0F, 0x58]).r(), _64b | compat | sse), - inst("addpd", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x66, 0x0F, 0x58]).r(), _64b | compat | sse), + inst("addps", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x0F, 0x58]).r(), _64b | compat | sse), + inst("addpd", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x66, 0x0F, 0x58]).r(), _64b | compat | sse), ] } diff --git a/cranelift/assembler-x64/meta/src/instructions/addpd.rs b/cranelift/assembler-x64/meta/src/instructions/addpd.rs new file mode 100644 index 000000000000..b70d2febdf68 --- /dev/null +++ b/cranelift/assembler-x64/meta/src/instructions/addpd.rs @@ -0,0 +1,15 @@ +use crate::dsl::{ + align, fmt, inst, r, rex, rw, vex, w, Feature::*, Inst, Location::*, VexLength::*, VexMMMMM::*, VexPP::*, +}; + +pub fn list() -> Vec { + vec![ + inst("addpd", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x66, 0x0F, 0x58]).r(), _64b | compat | sse), + inst( + "vaddpd", + fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), + vex(0x58).length(_128).pp(_66).mmmmm(_OF), + _64b | compat | sse, + ), + ] +} diff --git a/cranelift/assembler-x64/meta/src/instructions/addps.rs b/cranelift/assembler-x64/meta/src/instructions/addps.rs new file mode 100644 index 000000000000..81fb966ada01 --- /dev/null +++ b/cranelift/assembler-x64/meta/src/instructions/addps.rs @@ -0,0 +1,13 @@ +use crate::dsl::{align, fmt, inst, r, rex, rw, vex, w, Feature::*, Inst, Location::*, VexLength::*, VexMMMMM::*}; + +pub fn list() -> Vec { + vec![ + inst("addps", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x0F, 0x58]).r(), _64b | compat | sse), + inst( + "vaddps", + fmt("B", [w(xmm1), r(xmm2), r(xmm_m128)]), + vex(0x58).length(_128).mmmmm(_OF), + _64b | compat | sse, + ), + ] +} diff --git a/cranelift/assembler-x64/meta/src/instructions/and.rs b/cranelift/assembler-x64/meta/src/instructions/and.rs index ee418f97fda2..d2a3ce216652 100644 --- a/cranelift/assembler-x64/meta/src/instructions/and.rs +++ b/cranelift/assembler-x64/meta/src/instructions/and.rs @@ -40,7 +40,7 @@ pub fn list() -> Vec { inst("lock_andl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x21]).r(), _64b | compat), inst("lock_andq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x21]).w().r(), _64b), // Vector instructions. - inst("andps", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x0F, 0x54]).r(), _64b | compat | sse), - inst("andpd", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x66, 0x0F, 0x54]).r(), _64b | compat | sse), + inst("andps", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x0F, 0x54]).r(), _64b | compat | sse), + inst("andpd", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x66, 0x0F, 0x54]).r(), _64b | compat | sse), ] } diff --git a/cranelift/assembler-x64/meta/src/instructions/or.rs b/cranelift/assembler-x64/meta/src/instructions/or.rs index 79fb1bb2cabd..76a20569b239 100644 --- a/cranelift/assembler-x64/meta/src/instructions/or.rs +++ b/cranelift/assembler-x64/meta/src/instructions/or.rs @@ -33,7 +33,7 @@ pub fn list() -> Vec { inst("lock_orl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x09]).r(), _64b | compat), inst("lock_orq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x09]).w().r(), _64b), // Vector instructions. - inst("orps", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x0F, 0x56]).r(), _64b | compat | sse), - inst("orpd", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x66, 0x0F, 0x56]).r(), _64b | compat | sse), + inst("orps", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x0F, 0x56]).r(), _64b | compat | sse), + inst("orpd", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x66, 0x0F, 0x56]).r(), _64b | compat | sse), ] } diff --git a/cranelift/assembler-x64/meta/src/instructions/sub.rs b/cranelift/assembler-x64/meta/src/instructions/sub.rs index 8e72c58fa2d0..6f78f8d2e721 100644 --- a/cranelift/assembler-x64/meta/src/instructions/sub.rs +++ b/cranelift/assembler-x64/meta/src/instructions/sub.rs @@ -62,7 +62,7 @@ pub fn list() -> Vec { inst("lock_sbbl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x19]).r(), _64b | compat), inst("lock_sbbq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x19]).w().r(), _64b), // Vector instructions. - inst("subps", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x0F, 0x5C]).r(), _64b | compat | sse), - inst("subpd", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x66, 0x0F, 0x5C]).r(), _64b | compat | sse), + inst("subps", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x0F, 0x5C]).r(), _64b | compat | sse), + inst("subpd", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x66, 0x0F, 0x5C]).r(), _64b | compat | sse), ] } diff --git a/cranelift/assembler-x64/meta/src/instructions/xor.rs b/cranelift/assembler-x64/meta/src/instructions/xor.rs index 77098b897b5a..0f0a843195c7 100644 --- a/cranelift/assembler-x64/meta/src/instructions/xor.rs +++ b/cranelift/assembler-x64/meta/src/instructions/xor.rs @@ -33,7 +33,7 @@ pub fn list() -> Vec { inst("lock_xorl", fmt("MR", [rw(m32), r(r32)]), rex([0xf0, 0x31]).r(), _64b | compat), inst("lock_xorq", fmt("MR", [rw(m64), r(r64)]), rex([0xf0, 0x31]).w().r(), _64b), // Vector instructions. - inst("xorps", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x0F, 0x57]).r(), _64b | compat | sse), - inst("xorpd", fmt("A", [rw(xmm), r(align(rm128))]), rex([0x66, 0x0F, 0x57]).r(), _64b | compat | sse), + inst("xorps", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x0F, 0x57]).r(), _64b | compat | sse), + inst("xorpd", fmt("A", [rw(xmm1), r(align(rm128))]), rex([0x66, 0x0F, 0x57]).r(), _64b | compat | sse), ] } diff --git a/cranelift/assembler-x64/src/api.rs b/cranelift/assembler-x64/src/api.rs index 48d744405154..58e315b076e0 100644 --- a/cranelift/assembler-x64/src/api.rs +++ b/cranelift/assembler-x64/src/api.rs @@ -116,6 +116,9 @@ pub trait Registers { /// An x64 SSE register that may be read and written. type ReadWriteXmm: AsReg; + + /// An x64 SSE register that may be written. + type WriteXmm: AsReg; } /// Describe how to interact with an external register type. @@ -169,10 +172,14 @@ pub trait RegisterVisitor { fn read_xmm(&mut self, reg: &mut R::ReadXmm); /// Visit a read-write SSE register. fn read_write_xmm(&mut self, reg: &mut R::ReadWriteXmm); + + fn write_xmm(&mut self, reg: &mut R::WriteXmm); /// Visit a read-only fixed SSE register; this register can be modified /// in-place but must emit as the hardware encoding `enc`. fn fixed_read_xmm(&mut self, reg: &mut R::ReadXmm, enc: u8); /// Visit a read-write fixed SSE register; this register can be modified /// in-place but must emit as the hardware encoding `enc`. fn fixed_read_write_xmm(&mut self, reg: &mut R::ReadWriteXmm, enc: u8); + + fn fixed_write_xmm(&mut self, reg: &mut R::WriteXmm, enc: u8); } diff --git a/cranelift/assembler-x64/src/fuzz.rs b/cranelift/assembler-x64/src/fuzz.rs index b349f7072ae0..6b8a1c4ba534 100644 --- a/cranelift/assembler-x64/src/fuzz.rs +++ b/cranelift/assembler-x64/src/fuzz.rs @@ -177,6 +177,7 @@ impl Registers for FuzzRegs { type ReadWriteGpr = FuzzReg; type ReadXmm = FuzzReg; type ReadWriteXmm = FuzzReg; + type WriteXmm = FuzzReg; } /// A simple `u8` register type for fuzzing only. @@ -242,6 +243,7 @@ pub trait RegistersArbitrary: ReadWriteGpr: for<'a> Arbitrary<'a>, ReadXmm: for<'a> Arbitrary<'a>, ReadWriteXmm: for<'a> Arbitrary<'a>, + WriteXmm: for<'a> Arbitrary<'a>, > { } @@ -253,6 +255,7 @@ where R::ReadWriteGpr: for<'a> Arbitrary<'a>, R::ReadXmm: for<'a> Arbitrary<'a>, R::ReadWriteXmm: for<'a> Arbitrary<'a>, + R::WriteXmm: for<'a> Arbitrary<'a>, { } @@ -274,6 +277,6 @@ mod test { .budget_ms(1_000); // This will run the `roundtrip` fuzzer for one second. To repeatably - // test a single input, append `.seed(0x)`. + // test a single0 input, append `.seed(0x)`. } } diff --git a/cranelift/assembler-x64/src/lib.rs b/cranelift/assembler-x64/src/lib.rs index 135c2a4a10fe..994880dcbe20 100644 --- a/cranelift/assembler-x64/src/lib.rs +++ b/cranelift/assembler-x64/src/lib.rs @@ -15,6 +15,7 @@ //! type ReadWriteGpr = u8; //! type ReadXmm = u8; //! type ReadWriteXmm = u8; +//! type WriteXmm = u8; //! } //! //! // Then, build one of the `AND` instructions; this one operates on an @@ -49,8 +50,9 @@ mod fixed; pub mod gpr; mod imm; pub mod inst; -mod mem; -mod rex; +pub mod mem; +pub mod rex; +pub mod vex; pub mod xmm; #[cfg(any(test, feature = "fuzz"))] diff --git a/cranelift/assembler-x64/src/mem.rs b/cranelift/assembler-x64/src/mem.rs index eca13e63195e..b9e015e3b031 100644 --- a/cranelift/assembler-x64/src/mem.rs +++ b/cranelift/assembler-x64/src/mem.rs @@ -297,6 +297,9 @@ impl XmmMem { XmmMem::Mem(amode) => amode.to_string(), } } + pub fn default() -> Self { + XmmMem::Xmm(R::new(0)) + } } /// Emit the ModRM/SIB/displacement sequence for a memory operand. diff --git a/cranelift/assembler-x64/src/vex.rs b/cranelift/assembler-x64/src/vex.rs new file mode 100644 index 000000000000..c53e0e839af9 --- /dev/null +++ b/cranelift/assembler-x64/src/vex.rs @@ -0,0 +1,259 @@ +//! Encoding logic for VEX instructions. +use super::rex; +use super::XmmMem; +use crate::api::{AsReg, CodeSink, KnownOffsetTable, Registers}; +use crate::mem::emit_modrm_sib_disp; +use crate::Amode; + +/// Allows using the same opcode byte in different "opcode maps" to allow for more instruction +/// encodings. See appendix A in the Intel Software Developer's Manual, volume 2A, for more details. +#[derive(PartialEq)] +pub enum OpcodeMap { + None, + _0F, + _0F38, + _0F3A, +} + +impl OpcodeMap { + /// Normally the opcode map is specified as bytes in the instruction, but some x64 encoding + /// formats pack this information as bits in a prefix (e.g. VEX / EVEX). + pub fn bits(&self) -> u8 { + match self { + OpcodeMap::None => 0b00, + OpcodeMap::_0F => 0b01, + OpcodeMap::_0F38 => 0b10, + OpcodeMap::_0F3A => 0b11, + } + } +} + +impl Default for OpcodeMap { + fn default() -> Self { + Self::None + } +} + +/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum +/// covers only the small set of possibilities that we actually need. +#[derive(PartialEq)] +pub enum LegacyPrefix { + /// No prefix bytes. + None, + /// Operand Size Override -- here, denoting "16-bit operation". + _66, + /// The Lock prefix. + _F0, + /// Operand size override and Lock. + _66F0, + /// REPNE, but no specific meaning here -- is just an opcode extension. + _F2, + /// REP/REPE, but no specific meaning here -- is just an opcode extension. + _F3, + /// Operand size override and same effect as F3. + _66F3, +} + +impl LegacyPrefix { + /// Emit the legacy prefix as bits (e.g. for EVEX instructions). + #[inline(always)] + pub(crate) fn bits(&self) -> u8 { + match self { + Self::None => 0b00, + Self::_66 => 0b01, + Self::_F3 => 0b10, + Self::_F2 => 0b11, + _ => panic!( + "VEX and EVEX bits can only be extracted from single prefixes: None, 66, F3, F2" + ), + } + } +} + +impl Default for LegacyPrefix { + fn default() -> Self { + Self::None + } +} + +pub struct VexInstruction { + pub length: VexVectorLength, + pub prefix: LegacyPrefix, + pub map: OpcodeMap, + pub opcode: u8, + pub w: bool, + pub reg: u8, + pub rm: XmmMem, + pub vvvv: Option, + pub imm: Option, +} + +impl Default for VexInstruction { + fn default() -> Self { + Self { + length: VexVectorLength::default(), + prefix: LegacyPrefix::None, + map: OpcodeMap::None, + opcode: 0x00, + w: false, + reg: 0x00, + rm: XmmMem::default(), + vvvv: None, + imm: None, + } + } +} + +impl VexInstruction { + /// The R bit in encoded format (inverted). + #[inline(always)] + fn r_bit(&self) -> u8 { + (!(self.reg >> 3)) & 1 + } + + /// The X bit in encoded format (inverted). + #[inline(always)] + fn x_bit(&self) -> u8 { + let reg = match &self.rm { + XmmMem::Xmm(_xmm) => 0, + XmmMem::Mem(Amode::ImmReg { .. }) => 0, + XmmMem::Mem(Amode::ImmRegRegShift { index, .. }) => index.enc(), + XmmMem::Mem(Amode::RipRelative { .. }) => 0, + }; + + !(reg >> 3) & 1 + } + + /// The B bit in encoded format (inverted). + #[inline(always)] + fn b_bit(&self) -> u8 { + let reg = match &self.rm { + XmmMem::Xmm(xmm) => (*xmm).enc(), + XmmMem::Mem(Amode::ImmReg { base, .. }) => base.enc(), + XmmMem::Mem(Amode::ImmRegRegShift { base, .. }) => base.enc(), + XmmMem::Mem(Amode::RipRelative { .. }) => 0, + }; + + !(reg >> 3) & 1 + } + + /// Is the 2 byte prefix available for this instruction? + /// We essentially just check if we need any of the bits that are only available + /// in the 3 byte instruction + #[inline(always)] + fn use_2byte_prefix(&self) -> bool { + // These bits are only represented on the 3 byte prefix, so their presence + // implies the use of the 3 byte prefix + self.b_bit() == 1 && self.x_bit() == 1 && + // The presence of W1 in the opcode column implies the opcode must be encoded using the + // 3-byte form of the VEX prefix. + self.w == false && + // The presence of 0F3A and 0F38 in the opcode column implies that opcode can only be + // encoded by the three-byte form of VEX + !(self.map == OpcodeMap::_0F3A || self.map == OpcodeMap::_0F38) + } + + /// The last byte of the 2byte and 3byte prefixes is mostly the same, share the common + /// encoding logic here. + #[inline(always)] + fn prefix_last_byte(&self) -> u8 { + let vvvv = self.vvvv.map(|r| r.into()).unwrap_or(0x00); + + let mut byte = 0x00; + byte |= self.prefix.bits(); + byte |= self.length.bits() << 2; + byte |= ((!vvvv) & 0xF) << 3; + byte + } + + /// Encode the 2 byte prefix + #[inline(always)] + fn encode_2byte_prefix(&self, sink: &mut CS) { + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + + let last_byte = self.prefix_last_byte() | (self.r_bit() << 7); + + sink.put1(0xC5); + sink.put1(last_byte); + } + + /// Encode the 3 byte prefix + #[inline(always)] + fn encode_3byte_prefix(&self, sink: &mut CS) { + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + let mut second_byte = 0x00; + second_byte |= self.map.bits(); // m-mmmm field + second_byte |= self.b_bit() << 5; + second_byte |= self.x_bit() << 6; + second_byte |= self.r_bit() << 7; + + let w_bit = self.w as u8; + let last_byte = self.prefix_last_byte() | (w_bit << 7); + + sink.put1(0xC4); + sink.put1(second_byte); + sink.put1(last_byte); + } + + /// Emit the VEX-encoded instruction to the provided buffer. + pub fn encode(&self, sink: &mut impl CodeSink, off: &impl KnownOffsetTable) { + // 2/3 byte prefix + if self.use_2byte_prefix() { + self.encode_2byte_prefix(sink); + } else { + self.encode_3byte_prefix(sink); + } + + // 1 Byte Opcode + sink.put1(self.opcode); + + match &self.rm { + // Not all instructions use Reg as a reg, some use it as an extension + // of the opcode. + XmmMem::Xmm(xmm) => { + let rm: u8 = (*xmm).enc(); + sink.put1(rex::encode_modrm(3, self.reg & 7, rm & 7)); + } + // For address-based modes reuse the logic from the `rex` module + // for the modrm and trailing bytes since VEX uses the same + // encoding. + XmmMem::Mem(amode) => { + let bytes_at_end = if self.imm.is_some() { 1 } else { 0 }; + emit_modrm_sib_disp(sink, off, self.reg & 7, amode, bytes_at_end, None); + } + } + + // Optional 1 Byte imm + if let Some(imm) = self.imm { + sink.put1(imm); + } + } +} + +/// The VEX format allows choosing a vector length in the `L` bit. +pub enum VexVectorLength { + V128, + V256, +} + +impl VexVectorLength { + /// Encode the `L` bit. + fn bits(&self) -> u8 { + match self { + Self::V128 => 0b0, + Self::V256 => 0b1, + } + } +} + +impl Default for VexVectorLength { + fn default() -> Self { + Self::V128 + } +} diff --git a/cranelift/codegen/meta/src/gen_asm.rs b/cranelift/codegen/meta/src/gen_asm.rs index 6286c07f7554..51bfe7f7370e 100644 --- a/cranelift/codegen/meta/src/gen_asm.rs +++ b/cranelift/codegen/meta/src/gen_asm.rs @@ -40,21 +40,39 @@ pub fn rust_convert_isle_to_assembler(op: &Operand) -> &'static str { OperandKind::Reg(r) => match (r.bits(), op.mutability) { (128, Mutability::Read) => "cranelift_assembler_x64::Xmm::new", (128, Mutability::ReadWrite) => "self.convert_xmm_to_assembler_read_write_xmm", + (128, Mutability::Write) => "self.convert_xmm_to_assembler_write_xmm", + (256, Mutability::Read) => "cranelift_assembler_x64::Ymm::new", + (256, Mutability::ReadWrite) => "self.convert_ymm_to_assembler_read_write_ymm", + (256, Mutability::Write) => "self.convert_ymm_to_assembler_write_ymm", + (512, Mutability::Read) => "cranelift_assembler_x64::Zmm::new", + (512, Mutability::ReadWrite) => "self.convert_zmm_to_assembler_read_write_zmm", + (512, Mutability::Write) => "self.convert_zmm_to_assembler_write_zmm", (_, Mutability::Read) => "cranelift_assembler_x64::Gpr::new", (_, Mutability::ReadWrite) => "self.convert_gpr_to_assembler_read_write_gpr", + (_, Mutability::Write) => unreachable!(), }, OperandKind::FixedReg(r) => match (r.bits(), op.mutability) { (128, Mutability::Read) => "cranelift_assembler_x64::Fixed", (128, Mutability::ReadWrite) => "self.convert_xmm_to_assembler_fixed_read_write_xmm", + (256, Mutability::Read) => "cranelift_assembler_x64::Fixed", + (256, Mutability::ReadWrite) => "self.convert_ymm_to_assembler_fixed_read_write_ymm", + (512, Mutability::Read) => "cranelift_assembler_x64::Fixed", + (512, Mutability::ReadWrite) => "self.convert_zmm_to_assembler_fixed_read_write_zmm", (_, Mutability::Read) => "cranelift_assembler_x64::Fixed", (_, Mutability::ReadWrite) => "self.convert_gpr_to_assembler_fixed_read_write_gpr", + (_, Mutability::Write) => unreachable!(), }, OperandKind::Mem(_) => "self.convert_amode_to_assembler_amode", OperandKind::RegMem(r) => match (r.bits(), op.mutability) { (128, Mutability::Read) => "self.convert_xmm_mem_to_assembler_read_xmm_mem", (128, Mutability::ReadWrite) => "self.convert_xmm_mem_to_assembler_read_write_xmm_mem", + (128, Mutability::Write) => "self.convert_xmm_mem_to_assembler_write_xmm_mem", + (256, Mutability::Read) => "self.convert_ymm_mem_to_assembler_read_ymm_mem", + (256, Mutability::ReadWrite) => "self.convert_ymm_mem_to_assembler_read_write_ymm_mem", + (256, Mutability::Write) => "self.convert_ymm_mem_to_assembler_write_ymm_mem", (_, Mutability::Read) => "self.convert_gpr_mem_to_assembler_read_gpr_mem", (_, Mutability::ReadWrite) => "self.convert_gpr_mem_to_assembler_read_write_gpr_mem", + (_, Mutability::Write) => unreachable!(), }, OperandKind::Imm(loc) => match (op.extension.is_sign_extended(), loc.bits()) { (true, 8) => "cranelift_assembler_x64::Simm8::new", @@ -116,7 +134,7 @@ pub fn generate_macro_inst_fn(f: &mut Formatter, inst: &Inst) { [] => fmtln!(f, "SideEffectNoResult::Inst(inst)"), [one] => match one.mutability { Read => unreachable!(), - ReadWrite => match one.location.kind() { + ReadWrite | Write => match one.location.kind() { OperandKind::Imm(_) => unreachable!(), // One read/write register output? Output the instruction // and that register. @@ -152,6 +170,7 @@ pub fn generate_macro_inst_fn(f: &mut Formatter, inst: &Inst) { }); } }, + //Write => todo!(), }, _ => panic!("instruction has more than one result"), } @@ -299,7 +318,7 @@ pub fn isle_constructors(format: &Format) -> Vec { [] => unimplemented!("if you truly need this (and not a `SideEffect*`), add a `NoReturn` variant to `AssemblerOutputs`"), [one] => match one.mutability { Read => unreachable!(), - ReadWrite => match one.location.kind() { + ReadWrite | Write => match one.location.kind() { Imm(_) => unreachable!(), // One read/write register output? Output the instruction // and that register. @@ -315,7 +334,7 @@ pub fn isle_constructors(format: &Format) -> Vec { 128 => vec![IsleConstructor::RetXmm, IsleConstructor::RetMemorySideEffect], _ => vec![IsleConstructor::RetGpr, IsleConstructor::RetMemorySideEffect], }, - } + }, }, other => panic!("unsupported number of write operands {other:?}"), } diff --git a/cranelift/codegen/src/isa/x64/inst/external.rs b/cranelift/codegen/src/isa/x64/inst/external.rs index b89f95e56c9b..93e0e418bac6 100644 --- a/cranelift/codegen/src/isa/x64/inst/external.rs +++ b/cranelift/codegen/src/isa/x64/inst/external.rs @@ -17,6 +17,7 @@ impl asm::Registers for CraneliftRegisters { type ReadWriteGpr = PairedGpr; type ReadXmm = Xmm; type ReadWriteXmm = PairedXmm; + type WriteXmm = Xmm; } /// A pair of registers, one for reading and one for writing. @@ -202,6 +203,14 @@ impl<'a, T: OperandVisitor> asm::RegisterVisitor for Regallo self.collector .reg_fixed_def(write, fixed_reg(enc, RegClass::Float)); } + + fn write_xmm(&mut self, _reg: &mut Xmm) { + todo!() + } + + fn fixed_write_xmm(&mut self, _reg: &mut Xmm, _enc: u8) { + todo!() + } } /// A helper for building a fixed register from its hardware encoding.