Skip to content

Commit 8d853cc

Browse files
authored
x64: convert some SSE shift instructions (#10762)
1 parent 0ca5cc7 commit 8d853cc

File tree

17 files changed

+130
-251
lines changed

17 files changed

+130
-251
lines changed

cranelift/assembler-x64/meta/src/generate/format.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,12 @@ impl dsl::Format {
8585
fmtln!(f, "let dst = self.{dst}.enc();");
8686
fmtln!(f, "let rex = RexPrefix::with_digit(digit, dst, {bits});");
8787
}
88+
[Reg(dst), Imm(_)] => {
89+
let digit = rex.digit.unwrap();
90+
fmtln!(f, "let digit = 0x{digit:x};");
91+
fmtln!(f, "let dst = self.{dst}.enc();");
92+
fmtln!(f, "let rex = RexPrefix::two_op(digit, dst, {bits});");
93+
}
8894
[Mem(dst), Imm(_)] | [RegMem(dst), Imm(_)] | [RegMem(dst)] => {
8995
let digit = rex.digit.unwrap();
9096
fmtln!(f, "let digit = 0x{digit:x};");
@@ -124,6 +130,11 @@ impl dsl::Format {
124130
[FixedReg(_), Imm(_)] => {
125131
// No need to emit a ModRM byte: we know the register used.
126132
}
133+
[Reg(reg), Imm(_)] => {
134+
let digit = rex.digit.unwrap();
135+
fmtln!(f, "let digit = 0x{digit:x};");
136+
fmtln!(f, "self.{reg}.encode_modrm(buf, digit);");
137+
}
127138
[Mem(mem), Imm(_)] | [RegMem(mem), Imm(_)] | [RegMem(mem)] => {
128139
let digit = rex.digit.unwrap();
129140
fmtln!(f, "let digit = 0x{digit:x};");

cranelift/assembler-x64/meta/src/instructions.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ mod add;
44
mod and;
55
mod neg;
66
mod or;
7-
mod shld;
7+
mod shift;
88
mod sub;
99
mod xor;
1010

@@ -15,9 +15,9 @@ pub fn list() -> Vec<Inst> {
1515
let mut all = vec![];
1616
all.extend(add::list());
1717
all.extend(and::list());
18-
all.extend(or::list());
1918
all.extend(neg::list());
20-
all.extend(shld::list());
19+
all.extend(or::list());
20+
all.extend(shift::list());
2121
all.extend(sub::list());
2222
all.extend(xor::list());
2323
all
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
use crate::dsl::{align, fmt, inst, r, rex, rw};
2+
use crate::dsl::{Feature::*, Inst, Location::*};
3+
4+
#[rustfmt::skip] // Keeps instructions on a single line.
5+
pub fn list() -> Vec<Inst> {
6+
vec![
7+
inst("shldw", fmt("MRI", [rw(rm16), r(r16), r(imm8)]), rex([0x66, 0x0F, 0xA4]).ib(), _64b | compat),
8+
inst("shldw", fmt("MRC", [rw(rm16), r(r16), r(cl)]), rex([0x66, 0x0F, 0xA5]).ib(), _64b | compat),
9+
inst("shldl", fmt("MRI", [rw(rm32), r(r32), r(imm8)]), rex([0x0F, 0xA4]).ib(), _64b | compat),
10+
inst("shldq", fmt("MRI", [rw(rm64), r(r64), r(imm8)]), rex([0x0F, 0xA4]).ib().w(), _64b),
11+
inst("shldl", fmt("MRC", [rw(rm32), r(r32), r(cl)]), rex([0x0F, 0xA5]).ib(), _64b | compat),
12+
inst("shldq", fmt("MRC", [rw(rm64), r(r64), r(cl)]), rex([0x0F, 0xA5]).ib().w(), _64b),
13+
// Vector instructions (shift left).
14+
inst("psllw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF1]).r(), _64b | compat | sse2),
15+
inst("psllw", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x71]).digit(6).ib(), _64b | compat | sse2),
16+
inst("pslld", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF2]).r(), _64b | compat | sse2),
17+
inst("pslld", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x72]).digit(6).ib(), _64b | compat | sse2),
18+
inst("psllq", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xF3]).r(), _64b | compat | sse2),
19+
inst("psllq", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x73]).digit(6).ib(), _64b | compat | sse2),
20+
// Vector instructions (shift right).
21+
inst("psraw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE1]).r(), _64b | compat | sse2),
22+
inst("psraw", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x71]).digit(4).ib(), _64b | compat | sse2),
23+
inst("psrad", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE2]).r(), _64b | compat | sse2),
24+
inst("psrad", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x72]).digit(4).ib(), _64b | compat | sse2),
25+
inst("psrlw", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xD1]).r(), _64b | compat | sse2),
26+
inst("psrlw", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x71]).digit(2).ib(), _64b | compat | sse2),
27+
inst("psrld", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xD2]).r(), _64b | compat | sse2),
28+
inst("psrld", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x72]).digit(2).ib(), _64b | compat | sse2),
29+
inst("psrlq", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xD3]).r(), _64b | compat | sse2),
30+
inst("psrlq", fmt("B", [rw(xmm), r(imm8)]), rex([0x66, 0x0F, 0x73]).digit(2).ib(), _64b | compat | sse2),
31+
]
32+
}

cranelift/assembler-x64/meta/src/instructions/shld.rs

Lines changed: 0 additions & 14 deletions
This file was deleted.

cranelift/assembler-x64/src/xmm.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Xmm register operands; see [`Xmm`].
22
3-
use crate::AsReg;
3+
use crate::{rex::encode_modrm, AsReg, CodeSink};
44

55
/// An x64 SSE register (e.g., `%xmm0`).
66
#[derive(Clone, Copy, Debug)]
@@ -28,6 +28,11 @@ impl<R: AsReg> Xmm<R> {
2828
pub fn to_string(&self) -> String {
2929
self.0.to_string(None)
3030
}
31+
32+
/// Emit this register as the `r/m` field of a ModR/M byte.
33+
pub(crate) fn encode_modrm(&self, sink: &mut impl CodeSink, enc_reg: u8) {
34+
sink.put1(encode_modrm(0b11, enc_reg & 0b111, self.enc() & 0b111));
35+
}
3136
}
3237

3338
impl<R: AsReg> AsRef<R> for Xmm<R> {

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 28 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -196,12 +196,6 @@
196196
(num_bits Imm8Gpr)
197197
(dst WritableGpr))
198198

199-
;; Arithmetic SIMD shifts.
200-
(XmmRmiReg (opcode SseOpcode)
201-
(src1 Xmm)
202-
(src2 XmmMemAlignedImm)
203-
(dst WritableXmm))
204-
205199
;; Integer comparisons/tests: cmp or test (b w l q) (reg addr imm) reg.
206200
(CmpRmiR (size OperandSize) ;; 1, 2, 4, or 8
207201
(opcode CmpOpcode)
@@ -980,14 +974,6 @@
980974
Pmuludq
981975
Pshufb
982976
Pshufd
983-
Psllw
984-
Pslld
985-
Psllq
986-
Psraw
987-
Psrad
988-
Psrlw
989-
Psrld
990-
Psrlq
991977
Ptest
992978
Punpckhbw
993979
Punpckhwd
@@ -1839,6 +1825,8 @@
18391825

18401826
(decl is_imm8 (u8) GprMemImm)
18411827
(extern extractor is_imm8 is_imm8)
1828+
(decl is_imm8_xmm (u8) XmmMemImm)
1829+
(extern extractor is_imm8_xmm is_imm8_xmm)
18421830
(decl is_simm8 (i8) GprMemImm)
18431831
(extern extractor is_simm8 is_simm8)
18441832
(decl is_imm16 (u16) GprMemImm)
@@ -1853,7 +1841,7 @@
18531841
(extern extractor is_gpr is_gpr)
18541842
(decl is_gpr_mem (GprMem) GprMemImm)
18551843
(extern extractor is_gpr_mem is_gpr_mem)
1856-
(decl is_xmm_mem (XmmMem) XmmMem)
1844+
(decl is_xmm_mem (XmmMem) XmmMemImm)
18571845
(extern extractor is_xmm_mem is_xmm_mem)
18581846
(decl is_xmm (Xmm) XmmMem)
18591847
(extern extractor is_xmm is_xmm)
@@ -2132,16 +2120,6 @@
21322120
(_ Unit (emit (MInst.XmmUnaryRmRImmEvex op src dst imm))))
21332121
dst))
21342122

2135-
;; Helper for creating `MInst.XmmRmiXmm` instructions.
2136-
(decl xmm_rmi_xmm (SseOpcode Xmm XmmMemAlignedImm) Xmm)
2137-
(rule (xmm_rmi_xmm op src1 src2)
2138-
(let ((dst WritableXmm (temp_writable_xmm))
2139-
(_ Unit (emit (MInst.XmmRmiReg op
2140-
src1
2141-
src2
2142-
dst))))
2143-
dst))
2144-
21452123
;; Helper for creating `MInst.XmmToGprImm` instructions.
21462124
(decl xmm_to_gpr_imm (SseOpcode Xmm u8) Gpr)
21472125
(rule (xmm_to_gpr_imm op src imm)
@@ -3609,7 +3587,7 @@
36093587
(rule 1 (x64_orpd src1 src2)
36103588
(if-let true (use_avx))
36113589
(xmm_rmir_vex (AvxOpcode.Vorpd) src1 src2))
3612-
(rule 0 (x64_orpd src1 (is_xmm_mem src2)) (x64_orpd_a src1 src2))
3590+
(rule 0 (x64_orpd src1 src2) (x64_orpd_a src1 src2))
36133591

36143592
;; Helper fxor creating `pxor` instructions.
36153593
(decl x64_pxor (Xmm XmmMem) Xmm)
@@ -4392,67 +4370,67 @@
43924370

43934371
;; Helper for creating `psllw` instructions.
43944372
(decl x64_psllw (Xmm XmmMemImm) Xmm)
4395-
(rule 0 (x64_psllw src1 src2)
4396-
(xmm_rmi_xmm (SseOpcode.Psllw) src1 src2))
4397-
(rule 1 (x64_psllw src1 src2)
4373+
(rule 2 (x64_psllw src1 src2)
43984374
(if-let true (use_avx))
43994375
(xmm_rmir_vex (AvxOpcode.Vpsllw) src1 src2))
4376+
(rule 1 (x64_psllw src1 (is_xmm_mem src2)) (x64_psllw_a src1 src2))
4377+
(rule 0 (x64_psllw src1 (is_imm8_xmm src2)) (x64_psllw_b src1 src2))
44004378

44014379
;; Helper for creating `pslld` instructions.
44024380
(decl x64_pslld (Xmm XmmMemImm) Xmm)
4403-
(rule 0 (x64_pslld src1 src2)
4404-
(xmm_rmi_xmm (SseOpcode.Pslld) src1 src2))
4405-
(rule 1 (x64_pslld src1 src2)
4381+
(rule 2 (x64_pslld src1 src2)
44064382
(if-let true (use_avx))
44074383
(xmm_rmir_vex (AvxOpcode.Vpslld) src1 src2))
4384+
(rule 1 (x64_pslld src1 (is_xmm_mem src2)) (x64_pslld_a src1 src2))
4385+
(rule 0 (x64_pslld src1 (is_imm8_xmm src2)) (x64_pslld_b src1 src2))
44084386

44094387
;; Helper for creating `psllq` instructions.
44104388
(decl x64_psllq (Xmm XmmMemImm) Xmm)
4411-
(rule 0 (x64_psllq src1 src2)
4412-
(xmm_rmi_xmm (SseOpcode.Psllq) src1 src2))
4413-
(rule 1 (x64_psllq src1 src2)
4389+
(rule 2 (x64_psllq src1 src2)
44144390
(if-let true (use_avx))
44154391
(xmm_rmir_vex (AvxOpcode.Vpsllq) src1 src2))
4392+
(rule 1 (x64_psllq src1 (is_xmm_mem src2)) (x64_psllq_a src1 src2))
4393+
(rule 0 (x64_psllq src1 (is_imm8_xmm src2)) (x64_psllq_b src1 src2))
44164394

44174395
;; Helper for creating `psrlw` instructions.
44184396
(decl x64_psrlw (Xmm XmmMemImm) Xmm)
4419-
(rule 0 (x64_psrlw src1 src2)
4420-
(xmm_rmi_xmm (SseOpcode.Psrlw) src1 src2))
4421-
(rule 1 (x64_psrlw src1 src2)
4397+
(rule 2 (x64_psrlw src1 src2)
44224398
(if-let true (use_avx))
44234399
(xmm_rmir_vex (AvxOpcode.Vpsrlw) src1 src2))
4400+
(rule 1 (x64_psrlw src1 (is_xmm_mem src2)) (x64_psrlw_a src1 src2))
4401+
(rule 0 (x64_psrlw src1 (is_imm8_xmm src2)) (x64_psrlw_b src1 src2))
44244402

44254403
;; Helper for creating `psrld` instructions.
44264404
(decl x64_psrld (Xmm XmmMemImm) Xmm)
4427-
(rule 0 (x64_psrld src1 src2)
4428-
(xmm_rmi_xmm (SseOpcode.Psrld) src1 src2))
4429-
(rule 1 (x64_psrld src1 src2)
4405+
(rule 2 (x64_psrld src1 src2)
44304406
(if-let true (use_avx))
44314407
(xmm_rmir_vex (AvxOpcode.Vpsrld) src1 src2))
4408+
(rule 1 (x64_psrld src1 (is_xmm_mem src2)) (x64_psrld_a src1 src2))
4409+
(rule 0 (x64_psrld src1 (is_imm8_xmm src2)) (x64_psrld_b src1 src2))
44324410

44334411
;; Helper for creating `psrlq` instructions.
44344412
(decl x64_psrlq (Xmm XmmMemImm) Xmm)
4435-
(rule 0 (x64_psrlq src1 src2)
4436-
(xmm_rmi_xmm (SseOpcode.Psrlq) src1 src2))
4437-
(rule 1 (x64_psrlq src1 src2)
4413+
(rule 2 (x64_psrlq src1 src2)
44384414
(if-let true (use_avx))
44394415
(xmm_rmir_vex (AvxOpcode.Vpsrlq) src1 src2))
4416+
(rule 1 (x64_psrlq src1 (is_xmm_mem src2)) (x64_psrlq_a src1 src2))
4417+
(rule 0 (x64_psrlq src1 (is_imm8_xmm src2)) (x64_psrlq_b src1 src2))
44404418

44414419
;; Helper for creating `psraw` instructions.
44424420
(decl x64_psraw (Xmm XmmMemImm) Xmm)
4443-
(rule 0 (x64_psraw src1 src2)
4444-
(xmm_rmi_xmm (SseOpcode.Psraw) src1 src2))
4445-
(rule 1 (x64_psraw src1 src2)
4421+
(rule 2 (x64_psraw src1 src2)
44464422
(if-let true (use_avx))
44474423
(xmm_rmir_vex (AvxOpcode.Vpsraw) src1 src2))
4424+
(rule 1 (x64_psraw src1 (is_xmm_mem src2)) (x64_psraw_a src1 src2))
4425+
(rule 0 (x64_psraw src1 (is_imm8_xmm src2)) (x64_psraw_b src1 src2))
44484426

44494427
;; Helper for creating `psrad` instructions.
44504428
(decl x64_psrad (Xmm XmmMemImm) Xmm)
4451-
(rule 0 (x64_psrad src1 src2)
4452-
(xmm_rmi_xmm (SseOpcode.Psrad) src1 src2))
4453-
(rule 1 (x64_psrad src1 src2)
4429+
(rule 2 (x64_psrad src1 src2)
44544430
(if-let true (use_avx))
44554431
(xmm_rmir_vex (AvxOpcode.Vpsrad) src1 src2))
4432+
(rule 1 (x64_psrad src1 (is_xmm_mem src2)) (x64_psrad_a src1 src2))
4433+
(rule 0 (x64_psrad src1 (is_imm8_xmm src2)) (x64_psrad_b src1 src2))
44564434

44574435
;; Helper for creating `vpsraq` instructions.
44584436
(decl x64_vpsraq (Xmm XmmMem) Xmm)

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1055,14 +1055,6 @@ pub enum SseOpcode {
10551055
Pmuludq,
10561056
Pshufb,
10571057
Pshufd,
1058-
Psllw,
1059-
Pslld,
1060-
Psllq,
1061-
Psraw,
1062-
Psrad,
1063-
Psrlw,
1064-
Psrld,
1065-
Psrlq,
10661058
Ptest,
10671059
Punpckhbw,
10681060
Punpckhwd,
@@ -1181,14 +1173,6 @@ impl SseOpcode {
11811173
| SseOpcode::Pmullw
11821174
| SseOpcode::Pmuludq
11831175
| SseOpcode::Pshufd
1184-
| SseOpcode::Psllw
1185-
| SseOpcode::Pslld
1186-
| SseOpcode::Psllq
1187-
| SseOpcode::Psraw
1188-
| SseOpcode::Psrad
1189-
| SseOpcode::Psrlw
1190-
| SseOpcode::Psrld
1191-
| SseOpcode::Psrlq
11921176
| SseOpcode::Punpckhbw
11931177
| SseOpcode::Punpckhwd
11941178
| SseOpcode::Punpcklbw
@@ -1403,14 +1387,6 @@ impl fmt::Debug for SseOpcode {
14031387
SseOpcode::Pmuludq => "pmuludq",
14041388
SseOpcode::Pshufb => "pshufb",
14051389
SseOpcode::Pshufd => "pshufd",
1406-
SseOpcode::Psllw => "psllw",
1407-
SseOpcode::Pslld => "pslld",
1408-
SseOpcode::Psllq => "psllq",
1409-
SseOpcode::Psraw => "psraw",
1410-
SseOpcode::Psrad => "psrad",
1411-
SseOpcode::Psrlw => "psrlw",
1412-
SseOpcode::Psrld => "psrld",
1413-
SseOpcode::Psrlq => "psrlq",
14141390
SseOpcode::Ptest => "ptest",
14151391
SseOpcode::Punpckhbw => "punpckhbw",
14161392
SseOpcode::Punpckhwd => "punpckhwd",

cranelift/codegen/src/isa/x64/inst/emit.rs

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,62 +1078,6 @@ pub(crate) fn emit(
10781078
}
10791079
}
10801080

1081-
Inst::XmmRmiReg {
1082-
opcode,
1083-
src1,
1084-
src2,
1085-
dst,
1086-
} => {
1087-
let src1 = src1.to_reg();
1088-
let dst = dst.to_reg().to_reg();
1089-
debug_assert_eq!(src1, dst);
1090-
let rex = RexFlags::clear_w();
1091-
let prefix = LegacyPrefixes::_66;
1092-
let src2 = src2.clone().to_reg_mem_imm();
1093-
if let RegMemImm::Imm { simm32 } = src2 {
1094-
let (opcode_bytes, reg_digit) = match opcode {
1095-
SseOpcode::Psllw => (0x0F71, 6),
1096-
SseOpcode::Pslld => (0x0F72, 6),
1097-
SseOpcode::Psllq => (0x0F73, 6),
1098-
SseOpcode::Psraw => (0x0F71, 4),
1099-
SseOpcode::Psrad => (0x0F72, 4),
1100-
SseOpcode::Psrlw => (0x0F71, 2),
1101-
SseOpcode::Psrld => (0x0F72, 2),
1102-
SseOpcode::Psrlq => (0x0F73, 2),
1103-
_ => panic!("invalid opcode: {opcode}"),
1104-
};
1105-
let dst_enc = reg_enc(dst);
1106-
emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex);
1107-
let imm = (simm32)
1108-
.try_into()
1109-
.expect("the immediate must be convertible to a u8");
1110-
sink.put1(imm);
1111-
} else {
1112-
let opcode_bytes = match opcode {
1113-
SseOpcode::Psllw => 0x0FF1,
1114-
SseOpcode::Pslld => 0x0FF2,
1115-
SseOpcode::Psllq => 0x0FF3,
1116-
SseOpcode::Psraw => 0x0FE1,
1117-
SseOpcode::Psrad => 0x0FE2,
1118-
SseOpcode::Psrlw => 0x0FD1,
1119-
SseOpcode::Psrld => 0x0FD2,
1120-
SseOpcode::Psrlq => 0x0FD3,
1121-
_ => panic!("invalid opcode: {opcode}"),
1122-
};
1123-
1124-
match src2 {
1125-
RegMemImm::Reg { reg } => {
1126-
emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst, reg, rex);
1127-
}
1128-
RegMemImm::Mem { addr } => {
1129-
let addr = &addr.finalize(state.frame_layout(), sink).clone();
1130-
emit_std_reg_mem(sink, prefix, opcode_bytes, 2, dst, addr, rex, 0);
1131-
}
1132-
RegMemImm::Imm { .. } => unreachable!(),
1133-
}
1134-
};
1135-
}
1136-
11371081
Inst::CmpRmiR {
11381082
size,
11391083
src1: reg_g,

0 commit comments

Comments
 (0)