Skip to content

Commit 5bbc446

Browse files
committed
x64: convert some SSE cvt* instructions
x86 CPUs have a set of conversions of the form: `cvt{from}2{to}`, where `from` and `to` can be various XMM-held types (e.g., `ss`, `ps, `si`, etc.). These also have their truncating versions, `cvtt*`. This change defines all of the instructions Cranelift needs (there are a few more) and wires them up in ISLE and in various emitted sequences. For these sequences, this chooses to factor out the choice of instruction into a closure since we no longer can pass around an opcode.
1 parent a66afef commit 5bbc446

File tree

13 files changed

+153
-297
lines changed

13 files changed

+153
-297
lines changed

cranelift/assembler-x64/meta/src/instructions.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
33
mod add;
44
mod and;
5+
mod cvt;
56
mod neg;
67
mod or;
78
mod shld;
@@ -15,8 +16,9 @@ pub fn list() -> Vec<Inst> {
1516
let mut all = vec![];
1617
all.extend(add::list());
1718
all.extend(and::list());
18-
all.extend(or::list());
19+
all.extend(cvt::list());
1920
all.extend(neg::list());
21+
all.extend(or::list());
2022
all.extend(shld::list());
2123
all.extend(sub::list());
2224
all.extend(xor::list());
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
use crate::dsl::{align, fmt, inst, r, rex, rw, w};
2+
use crate::dsl::{Feature::*, Inst, Location::*};
3+
4+
#[rustfmt::skip] // Keeps instructions on a single line.
5+
pub fn list() -> Vec<Inst> {
6+
vec![
7+
// From 32-bit floating point.
8+
inst("cvtps2pd", fmt("A", [w(xmm), r(xmm_m64)]), rex([0x0F, 0x5A]).r(), _64b | compat | sse2),
9+
inst("cvttps2dq", fmt("A", [w(xmm), r(align(xmm_m128))]), rex([0xF3, 0x0F, 0x5B]).r(), _64b | compat | sse2),
10+
inst("cvtss2sd", fmt("A", [rw(xmm), r(xmm_m32)]), rex([0xF3, 0x0F, 0x5A]).r(), _64b | compat | sse2),
11+
inst("cvtss2si", fmt("A", [w(r32), r(xmm_m32)]), rex([0xF3, 0x0F, 0x2D]).r(), _64b | compat | sse),
12+
inst("cvtss2si", fmt("AQ", [w(r64), r(xmm_m32)]), rex([0xF3, 0x0F, 0x2D]).w().r(), _64b | sse),
13+
inst("cvttss2si", fmt("A", [w(r32), r(xmm_m32)]), rex([0xF3, 0x0F, 0x2C]).r(), _64b | compat | sse),
14+
inst("cvttss2si", fmt("AQ", [w(r64), r(xmm_m32)]), rex([0xF3, 0x0F, 0x2C]).w().r(), _64b | sse),
15+
// From 64-bit floating point.
16+
inst("cvtpd2ps", fmt("A", [w(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x5A]).r(), _64b | compat | sse2),
17+
inst("cvttpd2dq", fmt("A", [w(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xE6]).r(), _64b | compat | sse2),
18+
inst("cvtsd2ss", fmt("A", [rw(xmm), r(xmm_m64)]), rex([0xF2, 0x0F, 0x5A]).r(), _64b | compat | sse2),
19+
inst("cvtsd2si", fmt("A", [w(r32), r(xmm_m64)]), rex([0xF2, 0x0F, 0x2D]).r(), _64b | compat | sse2),
20+
inst("cvtsd2si", fmt("AQ", [w(r64), r(xmm_m64)]), rex([0xF2, 0x0F, 0x2D]).w().r(), _64b | sse2),
21+
inst("cvttsd2si", fmt("A", [w(r32), r(xmm_m64)]), rex([0xF2, 0x0F, 0x2C]).r(), _64b | compat | sse2),
22+
inst("cvttsd2si", fmt("AQ", [w(r64), r(xmm_m64)]), rex([0xF2, 0x0F, 0x2C]).w().r(), _64b | sse2),
23+
// From signed 32-bit integer.
24+
inst("cvtdq2ps", fmt("A", [w(xmm), r(align(xmm_m128))]), rex([0x0F, 0x5B]).r(), _64b | compat | sse2),
25+
inst("cvtdq2pd", fmt("A", [w(xmm), r(xmm_m64)]), rex([0xF3, 0x0F, 0xE6]).r(), _64b | compat | sse2),
26+
inst("cvtsi2ssl", fmt("A", [rw(xmm), r(rm32)]), rex([0xF3, 0x0F, 0x2A]).r(), _64b | compat | sse),
27+
inst("cvtsi2ssq", fmt("A", [rw(xmm), r(rm64)]), rex([0xF3, 0x0F, 0x2A]).w().r(), _64b | sse),
28+
inst("cvtsi2sdl", fmt("A", [rw(xmm), r(rm32)]), rex([0xF2, 0x0F, 0x2A]).r(), _64b | compat | sse2),
29+
inst("cvtsi2sdq", fmt("A", [rw(xmm), r(rm64)]), rex([0xF2, 0x0F, 0x2A]).w().r(), _64b | sse2),
30+
]
31+
}

cranelift/codegen/src/isa/x64/inst.isle

Lines changed: 14 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -448,11 +448,6 @@
448448
;; Note that this is special in that `src1` is an xmm/float register
449449
;; while `src2` is a general purpose register as this is converting an
450450
;; integer in a gpr to an equivalent float in an xmm reg.
451-
(CvtIntToFloat (op SseOpcode)
452-
(src1 Xmm)
453-
(src2 GprMem)
454-
(dst WritableXmm)
455-
(src2_size OperandSize))
456451
(CvtIntToFloatVex (op AvxOpcode)
457452
(src1 Xmm)
458453
(src2 GprMem)
@@ -874,20 +869,6 @@
874869
Cmppd
875870
Cmpss
876871
Cmpsd
877-
Cvtdq2ps
878-
Cvtdq2pd
879-
Cvtpd2ps
880-
Cvtps2pd
881-
Cvtsd2ss
882-
Cvtsd2si
883-
Cvtsi2ss
884-
Cvtsi2sd
885-
Cvtss2si
886-
Cvtss2sd
887-
Cvttpd2dq
888-
Cvttps2dq
889-
Cvttss2si
890-
Cvttsd2si
891872
Divps
892873
Divpd
893874
Divss
@@ -2234,12 +2215,6 @@
22342215
(_ Unit (emit (MInst.UnaryRmRImmVex size op src dst imm))))
22352216
dst))
22362217

2237-
(decl cvt_int_to_float (SseOpcode Xmm GprMem OperandSize) Xmm)
2238-
(rule (cvt_int_to_float op src1 src2 size)
2239-
(let ((dst WritableXmm (temp_writable_xmm))
2240-
(_ Unit (emit (MInst.CvtIntToFloat op src1 src2 dst size))))
2241-
dst))
2242-
22432218
(decl cvt_int_to_float_vex (AvxOpcode Xmm GprMem OperandSize) Xmm)
22442219
(rule (cvt_int_to_float_vex op src1 src2 size)
22452220
(let ((dst WritableXmm (temp_writable_xmm))
@@ -4847,79 +4822,77 @@
48474822
;;
48484823
;; NB: see `x64_sqrtss` for why this has two args (same reasoning, different op)
48494824
(decl x64_cvtss2sd (Xmm XmmMem) Xmm)
4850-
(rule (x64_cvtss2sd x y) (xmm_rm_r_unaligned (SseOpcode.Cvtss2sd) x y))
48514825
(rule 1 (x64_cvtss2sd x y)
48524826
(if-let true (use_avx))
48534827
(xmm_rmir_vex (AvxOpcode.Vcvtss2sd) x y))
4828+
(rule 0 (x64_cvtss2sd x y) (x64_cvtss2sd_a x y))
48544829

48554830
;; Helper for creating `cvtsd2ss` instructions.
48564831
;;
48574832
;; NB: see `x64_sqrtss` for why this has two args (same reasoning, different op)
48584833
(decl x64_cvtsd2ss (Xmm XmmMem) Xmm)
4859-
(rule (x64_cvtsd2ss x y) (xmm_rm_r_unaligned (SseOpcode.Cvtsd2ss) x y))
48604834
(rule 1 (x64_cvtsd2ss x y)
48614835
(if-let true (use_avx))
48624836
(xmm_rmir_vex (AvxOpcode.Vcvtsd2ss) x y))
4837+
(rule 0 (x64_cvtsd2ss x y) (x64_cvtsd2ss_a x y))
48634838

48644839
;; Helper for creating `cvtdq2ps` instructions.
48654840
(decl x64_cvtdq2ps (XmmMem) Xmm)
4866-
(rule (x64_cvtdq2ps x) (xmm_unary_rm_r (SseOpcode.Cvtdq2ps) x))
48674841
(rule 1 (x64_cvtdq2ps x)
48684842
(if-let true (use_avx))
48694843
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2ps) x))
4844+
(rule (x64_cvtdq2ps x) (x64_cvtdq2ps_a x))
48704845

48714846
;; Helper for creating `cvtps2pd` instructions.
48724847
(decl x64_cvtps2pd (XmmMem) Xmm)
4873-
(rule (x64_cvtps2pd x) (xmm_unary_rm_r (SseOpcode.Cvtps2pd) x))
48744848
(rule 1 (x64_cvtps2pd x)
48754849
(if-let true (use_avx))
48764850
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtps2pd) x))
4851+
(rule 0 (x64_cvtps2pd x) (x64_cvtps2pd_a x))
48774852

48784853
;; Helper for creating `cvtpd2ps` instructions.
48794854
(decl x64_cvtpd2ps (XmmMem) Xmm)
4880-
(rule (x64_cvtpd2ps x) (xmm_unary_rm_r (SseOpcode.Cvtpd2ps) x))
48814855
(rule 1 (x64_cvtpd2ps x)
48824856
(if-let true (use_avx))
48834857
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtpd2ps) x))
4858+
(rule 0 (x64_cvtpd2ps x) (x64_cvtpd2ps_a x))
48844859

48854860
;; Helper for creating `cvtdq2pd` instructions.
48864861
(decl x64_cvtdq2pd (XmmMem) Xmm)
4887-
(rule (x64_cvtdq2pd x) (xmm_unary_rm_r (SseOpcode.Cvtdq2pd) x))
48884862
(rule 1 (x64_cvtdq2pd x)
48894863
(if-let true (use_avx))
48904864
(xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2pd) x))
4865+
(rule 0 (x64_cvtdq2pd x) (x64_cvtdq2pd_a x))
48914866

48924867
;; Helper for creating `cvtsi2ss` instructions.
48934868
(decl x64_cvtsi2ss (Type Xmm GprMem) Xmm)
4894-
(rule (x64_cvtsi2ss ty x y)
4895-
(cvt_int_to_float (SseOpcode.Cvtsi2ss) x y (raw_operand_size_of_type ty)))
4896-
(rule 1 (x64_cvtsi2ss ty x y)
4869+
(rule 2 (x64_cvtsi2ss ty x y)
48974870
(if-let true (use_avx))
48984871
(cvt_int_to_float_vex (AvxOpcode.Vcvtsi2ss) x y (raw_operand_size_of_type ty)))
4872+
(rule 1 (x64_cvtsi2ss $I32 x y) (x64_cvtsi2ssl_a x y))
4873+
(rule 0 (x64_cvtsi2ss $I64 x y) (x64_cvtsi2ssq_a x y))
48994874

49004875
;; Helper for creating `cvtsi2sd` instructions.
49014876
(decl x64_cvtsi2sd (Type Xmm GprMem) Xmm)
4902-
(rule (x64_cvtsi2sd ty x y)
4903-
(cvt_int_to_float (SseOpcode.Cvtsi2sd) x y (raw_operand_size_of_type ty)))
4904-
(rule 1 (x64_cvtsi2sd ty x y)
4877+
(rule 2 (x64_cvtsi2sd ty x y)
49054878
(if-let true (use_avx))
49064879
(cvt_int_to_float_vex (AvxOpcode.Vcvtsi2sd) x y (raw_operand_size_of_type ty)))
4880+
(rule 1 (x64_cvtsi2sd $I32 x y) (x64_cvtsi2sdl_a x y))
4881+
(rule 0 (x64_cvtsi2sd $I64 x y) (x64_cvtsi2sdq_a x y))
49074882

49084883
;; Helper for creating `cvttps2dq` instructions.
49094884
(decl x64_cvttps2dq (XmmMem) Xmm)
4910-
(rule (x64_cvttps2dq x)
4911-
(xmm_unary_rm_r (SseOpcode.Cvttps2dq) x))
49124885
(rule 1 (x64_cvttps2dq x)
49134886
(if-let true (use_avx))
49144887
(xmm_unary_rm_r_vex (AvxOpcode.Vcvttps2dq) x))
4888+
(rule 0 (x64_cvttps2dq x) (x64_cvttps2dq_a x))
49154889

49164890
;; Helper for creating `cvttpd2dq` instructions.
49174891
(decl x64_cvttpd2dq (XmmMem) Xmm)
4918-
(rule (x64_cvttpd2dq x)
4919-
(xmm_unary_rm_r (SseOpcode.Cvttpd2dq) x))
49204892
(rule 1 (x64_cvttpd2dq x)
49214893
(if-let true (use_avx))
49224894
(xmm_unary_rm_r_vex (AvxOpcode.Vcvttpd2dq) x))
4895+
(rule 0 (x64_cvttpd2dq x) (x64_cvttpd2dq_a x))
49234896

49244897
;; Helpers for creating `pcmpeq*` instructions.
49254898
(decl x64_pcmpeq (Type Xmm XmmMem) Xmm)

cranelift/codegen/src/isa/x64/inst/args.rs

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -949,20 +949,6 @@ pub enum SseOpcode {
949949
Cmppd,
950950
Cmpss,
951951
Cmpsd,
952-
Cvtdq2ps,
953-
Cvtdq2pd,
954-
Cvtpd2ps,
955-
Cvtps2pd,
956-
Cvtsd2ss,
957-
Cvtsd2si,
958-
Cvtsi2ss,
959-
Cvtsi2sd,
960-
Cvtss2si,
961-
Cvtss2sd,
962-
Cvttpd2dq,
963-
Cvttps2dq,
964-
Cvttss2si,
965-
Cvttsd2si,
966952
Divps,
967953
Divpd,
968954
Divss,
@@ -1102,9 +1088,6 @@ impl SseOpcode {
11021088
SseOpcode::Comiss
11031089
| SseOpcode::Cmpps
11041090
| SseOpcode::Cmpss
1105-
| SseOpcode::Cvtsi2ss
1106-
| SseOpcode::Cvtss2si
1107-
| SseOpcode::Cvttss2si
11081091
| SseOpcode::Divps
11091092
| SseOpcode::Divss
11101093
| SseOpcode::Maxps
@@ -1130,17 +1113,6 @@ impl SseOpcode {
11301113
SseOpcode::Cmppd
11311114
| SseOpcode::Cmpsd
11321115
| SseOpcode::Comisd
1133-
| SseOpcode::Cvtdq2ps
1134-
| SseOpcode::Cvtdq2pd
1135-
| SseOpcode::Cvtpd2ps
1136-
| SseOpcode::Cvtps2pd
1137-
| SseOpcode::Cvtsd2ss
1138-
| SseOpcode::Cvtsd2si
1139-
| SseOpcode::Cvtsi2sd
1140-
| SseOpcode::Cvtss2sd
1141-
| SseOpcode::Cvttpd2dq
1142-
| SseOpcode::Cvttps2dq
1143-
| SseOpcode::Cvttsd2si
11441116
| SseOpcode::Divpd
11451117
| SseOpcode::Divsd
11461118
| SseOpcode::Maxpd
@@ -1297,20 +1269,6 @@ impl fmt::Debug for SseOpcode {
12971269
SseOpcode::Cmpsd => "cmpsd",
12981270
SseOpcode::Comiss => "comiss",
12991271
SseOpcode::Comisd => "comisd",
1300-
SseOpcode::Cvtdq2ps => "cvtdq2ps",
1301-
SseOpcode::Cvtdq2pd => "cvtdq2pd",
1302-
SseOpcode::Cvtpd2ps => "cvtpd2ps",
1303-
SseOpcode::Cvtps2pd => "cvtps2pd",
1304-
SseOpcode::Cvtsd2ss => "cvtsd2ss",
1305-
SseOpcode::Cvtsd2si => "cvtsd2si",
1306-
SseOpcode::Cvtsi2ss => "cvtsi2ss",
1307-
SseOpcode::Cvtsi2sd => "cvtsi2sd",
1308-
SseOpcode::Cvtss2si => "cvtss2si",
1309-
SseOpcode::Cvtss2sd => "cvtss2sd",
1310-
SseOpcode::Cvttpd2dq => "cvttpd2dq",
1311-
SseOpcode::Cvttps2dq => "cvttps2dq",
1312-
SseOpcode::Cvttss2si => "cvttss2si",
1313-
SseOpcode::Cvttsd2si => "cvttsd2si",
13141272
SseOpcode::Divps => "divps",
13151273
SseOpcode::Divpd => "divpd",
13161274
SseOpcode::Divss => "divss",

0 commit comments

Comments
 (0)