Skip to content

Commit 44343c7

Browse files
bmkesslerbradfitz
authored andcommitted
cmd/compile: add signed divisibility by power of 2 rules
For powers of two (c=1<<k), the divisibility check x%c == 0 can be made just by checking the trailing zeroes via a mask x&(c-1) == 0 even for signed integers. This avoids division fix-ups when just divisibility check is needed. To apply this rule, we match on the fixed-up version of the division. This is neccessary because the mod and division rewrite rules are already applied during the initial opt pass. The speed up on amd64 due to elimination of unneccessary fix-up code is ~55%: name old time/op new time/op delta DivconstI64-4 2.08ns ± 0% 2.09ns ± 1% ~ (p=0.730 n=5+5) DivisiblePow2constI64-4 1.78ns ± 1% 0.81ns ± 1% -54.66% (p=0.008 n=5+5) DivconstU64-4 2.08ns ± 0% 2.08ns ± 0% ~ (p=0.683 n=5+5) DivconstI32-4 1.53ns ± 0% 1.53ns ± 1% ~ (p=0.968 n=4+5) DivisiblePow2constI32-4 1.79ns ± 1% 0.81ns ± 1% -54.97% (p=0.008 n=5+5) DivconstU32-4 1.78ns ± 1% 1.80ns ± 2% ~ (p=0.206 n=5+5) DivconstI16-4 1.54ns ± 2% 1.54ns ± 0% ~ (p=0.238 n=5+4) DivisiblePow2constI16-4 1.78ns ± 0% 0.81ns ± 1% -54.72% (p=0.000 n=4+5) DivconstU16-4 1.00ns ± 5% 1.01ns ± 1% ~ (p=0.119 n=5+5) DivconstI8-4 1.54ns ± 0% 1.54ns ± 2% ~ (p=0.571 n=4+5) DivisiblePow2constI8-4 1.78ns ± 0% 0.82ns ± 8% -53.71% (p=0.008 n=5+5) DivconstU8-4 0.93ns ± 1% 0.93ns ± 1% ~ (p=0.643 n=5+5) A follow-up CL will address the general case of x%c == 0 for signed integers. Updates #15806 Change-Id: Iabadbbe369b6e0998c8ce85d038ebc236142e42a Reviewed-on: https://go-review.googlesource.com/c/go/+/173557 Run-TryBot: Brian Kessler <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent 2693b42 commit 44343c7

File tree

5 files changed

+1910
-9
lines changed

5 files changed

+1910
-9
lines changed

src/cmd/compile/internal/gc/testdata/arith_test.go

Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
package main
88

99
import (
10+
"math"
1011
"runtime"
1112
"testing"
1213
)
@@ -924,6 +925,7 @@ func TestArithmetic(t *testing.T) {
924925
testShiftRemoval(t)
925926
testShiftedOps(t)
926927
testDivFixUp(t)
928+
testDivisibleSignedPow2(t)
927929
}
928930

929931
// testDivFixUp ensures that signed division fix-ups are being generated.
@@ -952,3 +954,292 @@ func testDivFixUp(t *testing.T) {
952954
g64 = z % int64(i)
953955
}
954956
}
957+
958+
//go:noinline
959+
func divisible_int8_2to1(x int8) bool {
960+
return x%(1<<1) == 0
961+
}
962+
963+
//go:noinline
964+
func divisible_int8_2to2(x int8) bool {
965+
return x%(1<<2) == 0
966+
}
967+
968+
//go:noinline
969+
func divisible_int8_2to3(x int8) bool {
970+
return x%(1<<3) == 0
971+
}
972+
973+
//go:noinline
974+
func divisible_int8_2to4(x int8) bool {
975+
return x%(1<<4) == 0
976+
}
977+
978+
//go:noinline
979+
func divisible_int8_2to5(x int8) bool {
980+
return x%(1<<5) == 0
981+
}
982+
983+
//go:noinline
984+
func divisible_int8_2to6(x int8) bool {
985+
return x%(1<<6) == 0
986+
}
987+
988+
//go:noinline
989+
func divisible_int16_2to1(x int16) bool {
990+
return x%(1<<1) == 0
991+
}
992+
993+
//go:noinline
994+
func divisible_int16_2to2(x int16) bool {
995+
return x%(1<<2) == 0
996+
}
997+
998+
//go:noinline
999+
func divisible_int16_2to3(x int16) bool {
1000+
return x%(1<<3) == 0
1001+
}
1002+
1003+
//go:noinline
1004+
func divisible_int16_2to4(x int16) bool {
1005+
return x%(1<<4) == 0
1006+
}
1007+
1008+
//go:noinline
1009+
func divisible_int16_2to5(x int16) bool {
1010+
return x%(1<<5) == 0
1011+
}
1012+
1013+
//go:noinline
1014+
func divisible_int16_2to6(x int16) bool {
1015+
return x%(1<<6) == 0
1016+
}
1017+
1018+
//go:noinline
1019+
func divisible_int16_2to7(x int16) bool {
1020+
return x%(1<<7) == 0
1021+
}
1022+
1023+
//go:noinline
1024+
func divisible_int16_2to8(x int16) bool {
1025+
return x%(1<<8) == 0
1026+
}
1027+
1028+
//go:noinline
1029+
func divisible_int16_2to9(x int16) bool {
1030+
return x%(1<<9) == 0
1031+
}
1032+
1033+
//go:noinline
1034+
func divisible_int16_2to10(x int16) bool {
1035+
return x%(1<<10) == 0
1036+
}
1037+
1038+
//go:noinline
1039+
func divisible_int16_2to11(x int16) bool {
1040+
return x%(1<<11) == 0
1041+
}
1042+
1043+
//go:noinline
1044+
func divisible_int16_2to12(x int16) bool {
1045+
return x%(1<<12) == 0
1046+
}
1047+
1048+
//go:noinline
1049+
func divisible_int16_2to13(x int16) bool {
1050+
return x%(1<<13) == 0
1051+
}
1052+
1053+
//go:noinline
1054+
func divisible_int16_2to14(x int16) bool {
1055+
return x%(1<<14) == 0
1056+
}
1057+
1058+
//go:noinline
1059+
func divisible_int32_2to4(x int32) bool {
1060+
return x%(1<<4) == 0
1061+
}
1062+
1063+
//go:noinline
1064+
func divisible_int32_2to15(x int32) bool {
1065+
return x%(1<<15) == 0
1066+
}
1067+
1068+
//go:noinline
1069+
func divisible_int32_2to26(x int32) bool {
1070+
return x%(1<<26) == 0
1071+
}
1072+
1073+
//go:noinline
1074+
func divisible_int64_2to4(x int64) bool {
1075+
return x%(1<<4) == 0
1076+
}
1077+
1078+
//go:noinline
1079+
func divisible_int64_2to15(x int64) bool {
1080+
return x%(1<<15) == 0
1081+
}
1082+
1083+
//go:noinline
1084+
func divisible_int64_2to26(x int64) bool {
1085+
return x%(1<<26) == 0
1086+
}
1087+
1088+
//go:noinline
1089+
func divisible_int64_2to34(x int64) bool {
1090+
return x%(1<<34) == 0
1091+
}
1092+
1093+
//go:noinline
1094+
func divisible_int64_2to48(x int64) bool {
1095+
return x%(1<<48) == 0
1096+
}
1097+
1098+
//go:noinline
1099+
func divisible_int64_2to57(x int64) bool {
1100+
return x%(1<<57) == 0
1101+
}
1102+
1103+
// testDivisibleSignedPow2 confirms that x%(1<<k)==0 is rewritten correctly
1104+
func testDivisibleSignedPow2(t *testing.T) {
1105+
var i int64
1106+
var pow2 = []int64{
1107+
1,
1108+
1 << 1,
1109+
1 << 2,
1110+
1 << 3,
1111+
1 << 4,
1112+
1 << 5,
1113+
1 << 6,
1114+
1 << 7,
1115+
1 << 8,
1116+
1 << 9,
1117+
1 << 10,
1118+
1 << 11,
1119+
1 << 12,
1120+
1 << 13,
1121+
1 << 14,
1122+
}
1123+
// exhaustive test for int8
1124+
for i = math.MinInt8; i <= math.MaxInt8; i++ {
1125+
if want, got := int8(i)%int8(pow2[1]) == 0, divisible_int8_2to1(int8(i)); got != want {
1126+
t.Errorf("divisible_int8_2to1(%d) = %v want %v", i, got, want)
1127+
}
1128+
if want, got := int8(i)%int8(pow2[2]) == 0, divisible_int8_2to2(int8(i)); got != want {
1129+
t.Errorf("divisible_int8_2to2(%d) = %v want %v", i, got, want)
1130+
}
1131+
if want, got := int8(i)%int8(pow2[3]) == 0, divisible_int8_2to3(int8(i)); got != want {
1132+
t.Errorf("divisible_int8_2to3(%d) = %v want %v", i, got, want)
1133+
}
1134+
if want, got := int8(i)%int8(pow2[4]) == 0, divisible_int8_2to4(int8(i)); got != want {
1135+
t.Errorf("divisible_int8_2to4(%d) = %v want %v", i, got, want)
1136+
}
1137+
if want, got := int8(i)%int8(pow2[5]) == 0, divisible_int8_2to5(int8(i)); got != want {
1138+
t.Errorf("divisible_int8_2to5(%d) = %v want %v", i, got, want)
1139+
}
1140+
if want, got := int8(i)%int8(pow2[6]) == 0, divisible_int8_2to6(int8(i)); got != want {
1141+
t.Errorf("divisible_int8_2to6(%d) = %v want %v", i, got, want)
1142+
}
1143+
}
1144+
// exhaustive test for int16
1145+
for i = math.MinInt16; i <= math.MaxInt16; i++ {
1146+
if want, got := int16(i)%int16(pow2[1]) == 0, divisible_int16_2to1(int16(i)); got != want {
1147+
t.Errorf("divisible_int16_2to1(%d) = %v want %v", i, got, want)
1148+
}
1149+
if want, got := int16(i)%int16(pow2[2]) == 0, divisible_int16_2to2(int16(i)); got != want {
1150+
t.Errorf("divisible_int16_2to2(%d) = %v want %v", i, got, want)
1151+
}
1152+
if want, got := int16(i)%int16(pow2[3]) == 0, divisible_int16_2to3(int16(i)); got != want {
1153+
t.Errorf("divisible_int16_2to3(%d) = %v want %v", i, got, want)
1154+
}
1155+
if want, got := int16(i)%int16(pow2[4]) == 0, divisible_int16_2to4(int16(i)); got != want {
1156+
t.Errorf("divisible_int16_2to4(%d) = %v want %v", i, got, want)
1157+
}
1158+
if want, got := int16(i)%int16(pow2[5]) == 0, divisible_int16_2to5(int16(i)); got != want {
1159+
t.Errorf("divisible_int16_2to5(%d) = %v want %v", i, got, want)
1160+
}
1161+
if want, got := int16(i)%int16(pow2[6]) == 0, divisible_int16_2to6(int16(i)); got != want {
1162+
t.Errorf("divisible_int16_2to6(%d) = %v want %v", i, got, want)
1163+
}
1164+
if want, got := int16(i)%int16(pow2[7]) == 0, divisible_int16_2to7(int16(i)); got != want {
1165+
t.Errorf("divisible_int16_2to7(%d) = %v want %v", i, got, want)
1166+
}
1167+
if want, got := int16(i)%int16(pow2[8]) == 0, divisible_int16_2to8(int16(i)); got != want {
1168+
t.Errorf("divisible_int16_2to8(%d) = %v want %v", i, got, want)
1169+
}
1170+
if want, got := int16(i)%int16(pow2[9]) == 0, divisible_int16_2to9(int16(i)); got != want {
1171+
t.Errorf("divisible_int16_2to9(%d) = %v want %v", i, got, want)
1172+
}
1173+
if want, got := int16(i)%int16(pow2[10]) == 0, divisible_int16_2to10(int16(i)); got != want {
1174+
t.Errorf("divisible_int16_2to10(%d) = %v want %v", i, got, want)
1175+
}
1176+
if want, got := int16(i)%int16(pow2[11]) == 0, divisible_int16_2to11(int16(i)); got != want {
1177+
t.Errorf("divisible_int16_2to11(%d) = %v want %v", i, got, want)
1178+
}
1179+
if want, got := int16(i)%int16(pow2[12]) == 0, divisible_int16_2to12(int16(i)); got != want {
1180+
t.Errorf("divisible_int16_2to12(%d) = %v want %v", i, got, want)
1181+
}
1182+
if want, got := int16(i)%int16(pow2[13]) == 0, divisible_int16_2to13(int16(i)); got != want {
1183+
t.Errorf("divisible_int16_2to13(%d) = %v want %v", i, got, want)
1184+
}
1185+
if want, got := int16(i)%int16(pow2[14]) == 0, divisible_int16_2to14(int16(i)); got != want {
1186+
t.Errorf("divisible_int16_2to14(%d) = %v want %v", i, got, want)
1187+
}
1188+
}
1189+
// spot check for int32 and int64
1190+
var (
1191+
two4 int64 = 1 << 4
1192+
two15 int64 = 1 << 15
1193+
two26 int64 = 1 << 26
1194+
two34 int64 = 1 << 34
1195+
two48 int64 = 1 << 48
1196+
two57 int64 = 1 << 57
1197+
)
1198+
var xs = []int64{two4, two4 + 3, -3 * two4, -3*two4 + 1,
1199+
two15, two15 + 3, -3 * two15, -3*two15 + 1,
1200+
two26, two26 + 37, -5 * two26, -5*two26 + 2,
1201+
two34, two34 + 356, -7 * two34, -7*two34 + 13,
1202+
two48, two48 + 3000, -12 * two48, -12*two48 + 1111,
1203+
two57, two57 + 397654, -15 * two57, -15*two57 + 11234,
1204+
}
1205+
for _, x := range xs {
1206+
if int64(int32(x)) == x {
1207+
if want, got := int32(x)%int32(two4) == 0, divisible_int32_2to4(int32(x)); got != want {
1208+
t.Errorf("divisible_int32_2to4(%d) = %v want %v", x, got, want)
1209+
}
1210+
1211+
if want, got := int32(x)%int32(two15) == 0, divisible_int32_2to15(int32(x)); got != want {
1212+
t.Errorf("divisible_int32_2to15(%d) = %v want %v", x, got, want)
1213+
}
1214+
1215+
if want, got := int32(x)%int32(two26) == 0, divisible_int32_2to26(int32(x)); got != want {
1216+
t.Errorf("divisible_int32_2to26(%d) = %v want %v", x, got, want)
1217+
}
1218+
}
1219+
// spot check for int64
1220+
if want, got := x%two4 == 0, divisible_int64_2to4(x); got != want {
1221+
t.Errorf("divisible_int64_2to4(%d) = %v want %v", x, got, want)
1222+
}
1223+
1224+
if want, got := x%two15 == 0, divisible_int64_2to15(x); got != want {
1225+
t.Errorf("divisible_int64_2to15(%d) = %v want %v", x, got, want)
1226+
}
1227+
1228+
if want, got := x%two26 == 0, divisible_int64_2to26(x); got != want {
1229+
t.Errorf("divisible_int64_2to26(%d) = %v want %v", x, got, want)
1230+
}
1231+
1232+
if want, got := x%two34 == 0, divisible_int64_2to34(x); got != want {
1233+
t.Errorf("divisible_int64_2to34(%d) = %v want %v", x, got, want)
1234+
}
1235+
1236+
if want, got := x%two48 == 0, divisible_int64_2to48(x); got != want {
1237+
t.Errorf("divisible_int64_2to48(%d) = %v want %v", x, got, want)
1238+
}
1239+
1240+
if want, got := x%two57 == 0, divisible_int64_2to57(x); got != want {
1241+
t.Errorf("divisible_int64_2to57(%d) = %v want %v", x, got, want)
1242+
}
1243+
1244+
}
1245+
}

src/cmd/compile/internal/ssa/gen/generic.rules

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,41 @@
11631163
(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK(64,c)
11641164
-> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
11651165

1166+
// Divisibility check for signed integers for power of two constant are simple mask.
1167+
// However, we must match against the rewritten n%c == 0 -> n - c*(n/c) == 0 -> n == c *(n/c)
1168+
// where n/c contains fixup code to handle signed n.
1169+
(Eq8 n (Lsh8x64
1170+
(Rsh8x64
1171+
(Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [kbar])))
1172+
(Const64 <typ.UInt64> [k]))
1173+
(Const64 <typ.UInt64> [k]))
1174+
) && k > 0 && k < 7 && kbar == 8 - k
1175+
-> (Eq8 (And8 <t> n (Const8 <t> [int64(1<<uint(k)-1)])) (Const8 <t> [0]))
1176+
1177+
(Eq16 n (Lsh16x64
1178+
(Rsh16x64
1179+
(Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [kbar])))
1180+
(Const64 <typ.UInt64> [k]))
1181+
(Const64 <typ.UInt64> [k]))
1182+
) && k > 0 && k < 15 && kbar == 16 - k
1183+
-> (Eq16 (And16 <t> n (Const16 <t> [int64(1<<uint(k)-1)])) (Const16 <t> [0]))
1184+
1185+
(Eq32 n (Lsh32x64
1186+
(Rsh32x64
1187+
(Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [kbar])))
1188+
(Const64 <typ.UInt64> [k]))
1189+
(Const64 <typ.UInt64> [k]))
1190+
) && k > 0 && k < 31 && kbar == 32 - k
1191+
-> (Eq32 (And32 <t> n (Const32 <t> [int64(1<<uint(k)-1)])) (Const32 <t> [0]))
1192+
1193+
(Eq64 n (Lsh64x64
1194+
(Rsh64x64
1195+
(Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [kbar])))
1196+
(Const64 <typ.UInt64> [k]))
1197+
(Const64 <typ.UInt64> [k]))
1198+
) && k > 0 && k < 63 && kbar == 64 - k
1199+
-> (Eq64 (And64 <t> n (Const64 <t> [int64(1<<uint(k)-1)])) (Const64 <t> [0]))
1200+
11661201
(Eq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 -> (Eq(8|16|32|64) x y)
11671202
(Neq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 -> (Neq(8|16|32|64) x y)
11681203

0 commit comments

Comments
 (0)