Skip to content

x64: convert more logical operations #10753

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cranelift/assembler-x64/meta/src/instructions/and.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,7 @@ pub fn list() -> Vec<Inst> {
inst("andpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x54]).r(), _64b | compat | sse2),
inst("andnps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x55]).r(), _64b | compat | sse),
inst("andnpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x55]).r(), _64b | compat | sse2),
inst("pand", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xDB]).r(), _64b | compat | sse2),
inst("pandn", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xDF]).r(), _64b | compat | sse2),
]
}
1 change: 1 addition & 0 deletions cranelift/assembler-x64/meta/src/instructions/or.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ pub fn list() -> Vec<Inst> {
// Vector instructions.
inst("orps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x56]).r(), _64b | compat | sse),
inst("orpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x56]).r(), _64b | compat | sse2),
inst("por", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xEB]).r(), _64b | compat | sse2),
]
}
1 change: 1 addition & 0 deletions cranelift/assembler-x64/meta/src/instructions/xor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ pub fn list() -> Vec<Inst> {
// Vector instructions.
inst("xorps", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x0F, 0x57]).r(), _64b | compat | sse),
inst("xorpd", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0x57]).r(), _64b | compat | sse2),
inst("pxor", fmt("A", [rw(xmm), r(align(xmm_m128))]), rex([0x66, 0x0F, 0xEF]).r(), _64b | compat | sse2),
]
}
24 changes: 8 additions & 16 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -936,8 +936,6 @@
Packusdw
Packuswb
Palignr
Pand
Pandn
Pavgb
Pavgw
Pblendvb
Expand Down Expand Up @@ -990,7 +988,6 @@
Pmulld
Pmullw
Pmuludq
Por
Pshufb
Pshufd
Psllw
Expand All @@ -1006,7 +1003,6 @@
Punpckhwd
Punpcklbw
Punpcklwd
Pxor
Rcpss
Roundps
Roundpd
Expand Down Expand Up @@ -3585,11 +3581,10 @@

;; Helper for creating `pand` instructions.
(decl x64_pand (Xmm XmmMem) Xmm)
(rule 0 (x64_pand src1 src2)
(xmm_rm_r (SseOpcode.Pand) src1 src2))
(rule 1 (x64_pand src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vpand) src1 src2))
(rule 0 (x64_pand src1 src2) (x64_pand_a src1 src2))

;; Helper for creating `andps` instructions.
(decl x64_andps (Xmm XmmMem) Xmm)
Expand All @@ -3607,11 +3602,10 @@

;; Helper for creating `por` instructions.
(decl x64_por (Xmm XmmMem) Xmm)
(rule 0 (x64_por src1 src2)
(xmm_rm_r (SseOpcode.Por) src1 src2))
(rule 1 (x64_por src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vpor) src1 src2))
(rule 0 (x64_por src1 src2) (x64_por_a src1 src2))

;; Helper for creating `orps` instructions.
(decl x64_orps (Xmm XmmMem) Xmm)
Expand All @@ -3629,11 +3623,10 @@

;; Helper fxor creating `pxor` instructions.
(decl x64_pxor (Xmm XmmMem) Xmm)
(rule 0 (x64_pxor src1 src2)
(xmm_rm_r (SseOpcode.Pxor) src1 src2))
(rule 1 (x64_pxor src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vpxor) src1 src2))
(rule 0 (x64_pxor src1 src2) (x64_pxor_a src1 src2))

;; Helper fxor creating `xorps` instructions.
(decl x64_xorps (Xmm XmmMem) Xmm)
Expand Down Expand Up @@ -3793,25 +3786,24 @@

;; Helper for creating `pandn` instructions.
(decl x64_pandn (Xmm XmmMem) Xmm)
(rule 0 (x64_pandn src1 src2)
(xmm_rm_r (SseOpcode.Pandn) src1 src2))
(rule 1 (x64_pandn src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vpandn) src1 src2))
(rule 0 (x64_pandn src1 src2) (x64_pandn_a src1 src2))

;; Helper for creating `addss` instructions.
(decl x64_addss (Xmm XmmMem) Xmm)
(rule 1 (x64_addss src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vaddss) src1 src2))
(rule (x64_addss src1 src2) (x64_addss_a src1 src2))
(rule 0 (x64_addss src1 src2) (x64_addss_a src1 src2))

;; Helper for creating `addsd` instructions.
(decl x64_addsd (Xmm XmmMem) Xmm)
(rule 1 (x64_addsd src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vaddsd) src1 src2))
(rule (x64_addsd src1 src2) (x64_addsd_a src1 src2))
(rule 0 (x64_addsd src1 src2) (x64_addsd_a src1 src2))

;; Helper for creating `addps` instructions.
(decl x64_addps (Xmm XmmMem) Xmm)
Expand All @@ -3832,14 +3824,14 @@
(rule 1 (x64_subss src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vsubss) src1 src2))
(rule (x64_subss src1 src2) (x64_subss_a src1 src2))
(rule 0 (x64_subss src1 src2) (x64_subss_a src1 src2))

;; Helper for creating `subsd` instructions.
(decl x64_subsd (Xmm XmmMem) Xmm)
(rule 1 (x64_subsd src1 src2)
(if-let true (use_avx))
(xmm_rmir_vex (AvxOpcode.Vsubsd) src1 src2))
(rule (x64_subsd src1 src2) (x64_subsd_a src1 src2))
(rule 0 (x64_subsd src1 src2) (x64_subsd_a src1 src2))

;; Helper for creating `subps` instructions.
(decl x64_subps (Xmm XmmMem) Xmm)
Expand Down
12 changes: 0 additions & 12 deletions cranelift/codegen/src/isa/x64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1001,8 +1001,6 @@ pub enum SseOpcode {
Packusdw,
Packuswb,
Palignr,
Pand,
Pandn,
Pavgb,
Pavgw,
Pblendvb,
Expand Down Expand Up @@ -1055,7 +1053,6 @@ pub enum SseOpcode {
Pmulld,
Pmullw,
Pmuludq,
Por,
Pshufb,
Pshufd,
Psllw,
Expand All @@ -1071,7 +1068,6 @@ pub enum SseOpcode {
Punpckhwd,
Punpcklbw,
Punpcklwd,
Pxor,
Rcpss,
Roundps,
Roundpd,
Expand Down Expand Up @@ -1164,8 +1160,6 @@ impl SseOpcode {
| SseOpcode::Packssdw
| SseOpcode::Packsswb
| SseOpcode::Packuswb
| SseOpcode::Pand
| SseOpcode::Pandn
| SseOpcode::Pavgb
| SseOpcode::Pavgw
| SseOpcode::Pcmpeqb
Expand All @@ -1186,7 +1180,6 @@ impl SseOpcode {
| SseOpcode::Pmulhuw
| SseOpcode::Pmullw
| SseOpcode::Pmuludq
| SseOpcode::Por
| SseOpcode::Pshufd
| SseOpcode::Psllw
| SseOpcode::Pslld
Expand All @@ -1200,7 +1193,6 @@ impl SseOpcode {
| SseOpcode::Punpckhwd
| SseOpcode::Punpcklbw
| SseOpcode::Punpcklwd
| SseOpcode::Pxor
| SseOpcode::Sqrtpd
| SseOpcode::Sqrtsd
| SseOpcode::Ucomisd
Expand Down Expand Up @@ -1357,8 +1349,6 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Packusdw => "packusdw",
SseOpcode::Packuswb => "packuswb",
SseOpcode::Palignr => "palignr",
SseOpcode::Pand => "pand",
SseOpcode::Pandn => "pandn",
SseOpcode::Pavgb => "pavgb",
SseOpcode::Pavgw => "pavgw",
SseOpcode::Pblendvb => "pblendvb",
Expand Down Expand Up @@ -1411,7 +1401,6 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pmulld => "pmulld",
SseOpcode::Pmullw => "pmullw",
SseOpcode::Pmuludq => "pmuludq",
SseOpcode::Por => "por",
SseOpcode::Pshufb => "pshufb",
SseOpcode::Pshufd => "pshufd",
SseOpcode::Psllw => "psllw",
Expand All @@ -1427,7 +1416,6 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Punpckhwd => "punpckhwd",
SseOpcode::Punpcklbw => "punpcklbw",
SseOpcode::Punpcklwd => "punpcklwd",
SseOpcode::Pxor => "pxor",
SseOpcode::Rcpss => "rcpss",
SseOpcode::Roundps => "roundps",
SseOpcode::Roundpd => "roundpd",
Expand Down
4 changes: 0 additions & 4 deletions cranelift/codegen/src/isa/x64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2228,8 +2228,6 @@ pub(crate) fn emit(
SseOpcode::Packusdw => (LegacyPrefixes::_66, 0x0F382B, 3),
SseOpcode::Packuswb => (LegacyPrefixes::_66, 0x0F67, 2),
SseOpcode::Pmaddubsw => (LegacyPrefixes::_66, 0x0F3804, 3),
SseOpcode::Pand => (LegacyPrefixes::_66, 0x0FDB, 2),
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
Expand Down Expand Up @@ -2260,7 +2258,6 @@ pub(crate) fn emit(
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
SseOpcode::Por => (LegacyPrefixes::_66, 0x0FEB, 2),
SseOpcode::Pshufb => (LegacyPrefixes::_66, 0x0F3800, 3),
SseOpcode::Punpckhbw => (LegacyPrefixes::_66, 0x0F68, 2),
SseOpcode::Punpckhwd => (LegacyPrefixes::_66, 0x0F69, 2),
Expand All @@ -2270,7 +2267,6 @@ pub(crate) fn emit(
SseOpcode::Punpcklqdq => (LegacyPrefixes::_66, 0x0F6C, 2),
SseOpcode::Punpckhdq => (LegacyPrefixes::_66, 0x0F6A, 2),
SseOpcode::Punpckhqdq => (LegacyPrefixes::_66, 0x0F6D, 2),
SseOpcode::Pxor => (LegacyPrefixes::_66, 0x0FEF, 2),
SseOpcode::Unpcklps => (LegacyPrefixes::None, 0x0F14, 2),
SseOpcode::Unpckhps => (LegacyPrefixes::None, 0x0F15, 2),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10, 2),
Expand Down
6 changes: 0 additions & 6 deletions cranelift/codegen/src/isa/x64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3423,12 +3423,6 @@ fn test_x64_emit() {
"pminud %xmm2, %xmm3, %xmm2",
));

insns.push((
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
"66410FEFD3",
"pxor %xmm2, %xmm11, %xmm2",
));

insns.push((
Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2),
"66410F3800D3",
Expand Down
16 changes: 8 additions & 8 deletions cranelift/filetests/filetests/isa/x64/fcvt.clif
Original file line number Diff line number Diff line change
Expand Up @@ -1074,9 +1074,9 @@ block0(v0: f32x4):
; subps %xmm7, %xmm0
; cmpps $2, %xmm7, %xmm0, %xmm7
; cvttps2dq %xmm0, %xmm0
; pxor %xmm0, %xmm7, %xmm0
; pxor %xmm7, %xmm0
; uninit %xmm1
; pxor %xmm1, %xmm1, %xmm1
; pxor %xmm1, %xmm1
; pmaxsd %xmm0, %xmm1, %xmm0
; paddd %xmm6, %xmm0
; movq %rbp, %rsp
Expand Down Expand Up @@ -1118,12 +1118,12 @@ block0(v0: f32x4):
; movdqa %xmm0, %xmm4
; cmpps $0, %xmm4, %xmm0, %xmm4
; andps %xmm4, %xmm0
; pxor %xmm4, %xmm0, %xmm4
; pxor %xmm0, %xmm4
; cvttps2dq %xmm0, %xmm1
; movdqa %xmm1, %xmm0
; pand %xmm0, %xmm4, %xmm0
; pand %xmm4, %xmm0
; psrad %xmm0, $31, %xmm0
; pxor %xmm0, %xmm1, %xmm0
; pxor %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand Down Expand Up @@ -1157,12 +1157,12 @@ block0(v0: i64x2):
; movq %rsp, %rbp
; block0:
; movdqa %xmm0, %xmm7
; pand %xmm7, const(0), %xmm7
; pand (%rip), %xmm7
; movdqa %xmm7, %xmm1
; por %xmm1, const(1), %xmm1
; por (%rip), %xmm1
; movdqa %xmm1, %xmm7
; psrlq %xmm0, $32, %xmm0
; por %xmm0, const(2), %xmm0
; por (%rip), %xmm0
; subpd (%rip), %xmm0
; movdqa %xmm0, %xmm1
; movdqa %xmm7, %xmm0
Expand Down
2 changes: 1 addition & 1 deletion cranelift/filetests/filetests/isa/x64/shuffle.clif
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ block0(v0: i8x16, v1: i8x16):
; movq %rsp, %rbp
; block0:
; uninit %xmm4
; pxor %xmm4, %xmm4, %xmm4
; pxor %xmm4, %xmm4
; pshufb %xmm0, %xmm4, %xmm0
; movq %rbp, %rsp
; popq %rbp
Expand Down
12 changes: 6 additions & 6 deletions cranelift/filetests/filetests/isa/x64/simd-bitselect.clif
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ block0(v0: i8x16, v1: i8x16, v2: i32x4):
; pushq %rbp
; movq %rsp, %rbp
; block0:
; pand %xmm0, %xmm2, %xmm0
; pand %xmm2, %xmm0
; movdqa %xmm0, %xmm7
; pandn %xmm2, %xmm1, %xmm2
; pandn %xmm1, %xmm2
; movdqa %xmm7, %xmm1
; movdqa %xmm2, %xmm0
; por %xmm0, %xmm1, %xmm0
; por %xmm1, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand Down Expand Up @@ -216,9 +216,9 @@ block0(v0: i8x16, v1: i8x16):
; movdqa %xmm0, %xmm7
; movdqu const(0), %xmm0
; movdqa %xmm7, %xmm2
; pand %xmm2, %xmm0, %xmm2
; pandn %xmm0, %xmm1, %xmm0
; por %xmm0, %xmm2, %xmm0
; pand %xmm0, %xmm2
; pandn %xmm1, %xmm0
; por %xmm2, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret
Expand Down
Loading
Loading