Skip to content

Commit a52b44f

Browse files
Ilya RezvovV8 LUCI CQ
Ilya Rezvov
authored and
V8 LUCI CQ
committed
[wasm-simd] Prototype relaxed integer Dot product instructions
Prototype the instruction on the interpreter, and Arm64. Details of instruction lowerings on all relevant architectures can be found at: WebAssembly/relaxed-simd#52 Bug: v8:12908 Change-Id: If8ffb82c38042191c67c9b5c23a231877d4f2159 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3679848 Reviewed-by: Clemens Backes <[email protected]> Commit-Queue: Ilya Rezvov <[email protected]> Reviewed-by: Deepti Gandluri <[email protected]> Cr-Commit-Position: refs/heads/main@{#80924}
1 parent 90c80f7 commit a52b44f

20 files changed

+248
-2
lines changed

src/compiler/backend/arm64/code-generator-arm64.cc

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2488,6 +2488,31 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
24882488
__ Addp(i.OutputSimd128Register().V4S(), tmp1, tmp2);
24892489
break;
24902490
}
2491+
case kArm64I16x8DotI8x16S: {
2492+
UseScratchRegisterScope scope(tasm());
2493+
VRegister lhs = i.InputSimd128Register(0);
2494+
VRegister rhs = i.InputSimd128Register(1);
2495+
VRegister tmp1 = scope.AcquireV(kFormat8H);
2496+
VRegister tmp2 = scope.AcquireV(kFormat8H);
2497+
__ Smull(tmp1, lhs.V8B(), rhs.V8B());
2498+
__ Smull2(tmp2, lhs.V16B(), rhs.V16B());
2499+
__ Addp(i.OutputSimd128Register().V8H(), tmp1, tmp2);
2500+
break;
2501+
}
2502+
case kArm64I32x4DotI8x16AddS: {
2503+
UseScratchRegisterScope scope(tasm());
2504+
VRegister lhs = i.InputSimd128Register(0);
2505+
VRegister rhs = i.InputSimd128Register(1);
2506+
VRegister tmp1 = scope.AcquireV(kFormat8H);
2507+
VRegister tmp2 = scope.AcquireV(kFormat8H);
2508+
__ Smull(tmp1, lhs.V8B(), rhs.V8B());
2509+
__ Smull2(tmp2, lhs.V16B(), rhs.V16B());
2510+
__ Addp(tmp1, tmp1, tmp2);
2511+
__ Saddlp(tmp1.V4S(), tmp1);
2512+
__ Add(i.OutputSimd128Register().V4S(), tmp1.V4S(),
2513+
i.InputSimd128Register(2).V4S());
2514+
break;
2515+
}
24912516
case kArm64IExtractLaneU: {
24922517
VectorFormat f = VectorFormatFillQ(LaneSizeField::decode(opcode));
24932518
__ Umov(i.OutputRegister32(), i.InputSimd128Register(0).Format(f),

src/compiler/backend/arm64/instruction-codes-arm64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,8 @@ namespace compiler {
276276
V(Arm64IGeU) \
277277
V(Arm64I32x4BitMask) \
278278
V(Arm64I32x4DotI16x8S) \
279+
V(Arm64I16x8DotI8x16S) \
280+
V(Arm64I32x4DotI8x16AddS) \
279281
V(Arm64I32x4TruncSatF64x2SZero) \
280282
V(Arm64I32x4TruncSatF64x2UZero) \
281283
V(Arm64IExtractLaneU) \

src/compiler/backend/arm64/instruction-scheduler-arm64.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
225225
case kArm64IGeU:
226226
case kArm64I32x4BitMask:
227227
case kArm64I32x4DotI16x8S:
228+
case kArm64I16x8DotI8x16S:
229+
case kArm64I32x4DotI8x16AddS:
228230
case kArm64I32x4TruncSatF64x2SZero:
229231
case kArm64I32x4TruncSatF64x2UZero:
230232
case kArm64IExtractLaneU:

src/compiler/backend/arm64/instruction-selector-arm64.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3538,6 +3538,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
35383538
#define SIMD_BINOP_LIST(V) \
35393539
V(I32x4Mul, kArm64I32x4Mul) \
35403540
V(I32x4DotI16x8S, kArm64I32x4DotI16x8S) \
3541+
V(I16x8DotI8x16I7x16S, kArm64I16x8DotI8x16S) \
35413542
V(I16x8SConvertI32x4, kArm64I16x8SConvertI32x4) \
35423543
V(I16x8Mul, kArm64I16x8Mul) \
35433544
V(I16x8UConvertI32x4, kArm64I16x8UConvertI32x4) \
@@ -3724,6 +3725,13 @@ void InstructionSelector::VisitS128Zero(Node* node) {
37243725
Emit(kArm64S128Zero, g.DefineAsRegister(node));
37253726
}
37263727

3728+
void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
3729+
Arm64OperandGenerator g(this);
3730+
Emit(
3731+
kArm64I32x4DotI8x16AddS, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)),
3732+
g.UseRegister(node->InputAt(1)), g.UseRegister(node->InputAt(2)));
3733+
}
3734+
37273735
#define SIMD_VISIT_EXTRACT_LANE(Type, T, Sign, LaneSize) \
37283736
void InstructionSelector::Visit##Type##ExtractLane##Sign(Node* node) { \
37293737
VisitRRI(this, \

src/compiler/backend/instruction-selector.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2372,6 +2372,10 @@ void InstructionSelector::VisitNode(Node* node) {
23722372
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF32x4U(node);
23732373
case IrOpcode::kI16x8RelaxedQ15MulRS:
23742374
return MarkAsSimd128(node), VisitI16x8RelaxedQ15MulRS(node);
2375+
case IrOpcode::kI16x8DotI8x16I7x16S:
2376+
return MarkAsSimd128(node), VisitI16x8DotI8x16I7x16S(node);
2377+
case IrOpcode::kI32x4DotI8x16I7x16AddS:
2378+
return MarkAsSimd128(node), VisitI32x4DotI8x16I7x16AddS(node);
23752379
default:
23762380
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
23772381
node->op()->mnemonic(), node->id());
@@ -2830,6 +2834,16 @@ void InstructionSelector::VisitI16x8RelaxedQ15MulRS(Node* node) {
28302834
}
28312835
#endif // !V8_TARGET_ARCH_ARM6 && !V8_TARGET_ARCH_ARM
28322836

2837+
#if !V8_TARGET_ARCH_ARM64
2838+
void InstructionSelector::VisitI16x8DotI8x16I7x16S(Node* node) {
2839+
UNIMPLEMENTED();
2840+
}
2841+
2842+
void InstructionSelector::VisitI32x4DotI8x16I7x16AddS(Node* node) {
2843+
UNIMPLEMENTED();
2844+
}
2845+
#endif // !V8_TARGET_ARCH_ARM6
2846+
28332847
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
28342848

28352849
void InstructionSelector::VisitParameter(Node* node) {

src/compiler/machine-operator.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,9 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
608608
V(I32x4RelaxedTruncF32x4U, Operator::kNoProperties, 1, 0, 1) \
609609
V(I32x4RelaxedTruncF64x2SZero, Operator::kNoProperties, 1, 0, 1) \
610610
V(I32x4RelaxedTruncF64x2UZero, Operator::kNoProperties, 1, 0, 1) \
611-
V(I16x8RelaxedQ15MulRS, Operator::kCommutative, 2, 0, 1)
611+
V(I16x8RelaxedQ15MulRS, Operator::kCommutative, 2, 0, 1) \
612+
V(I16x8DotI8x16I7x16S, Operator::kCommutative, 2, 0, 1) \
613+
V(I32x4DotI8x16I7x16AddS, Operator::kNoProperties, 3, 0, 1)
612614

613615
// The format is:
614616
// V(Name, properties, value_input_count, control_input_count, output_count)

src/compiler/machine-operator.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -925,6 +925,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
925925
const Operator* I32x4RelaxedTruncF64x2SZero();
926926
const Operator* I32x4RelaxedTruncF64x2UZero();
927927
const Operator* I16x8RelaxedQ15MulRS();
928+
const Operator* I16x8DotI8x16I7x16S();
929+
const Operator* I32x4DotI8x16I7x16AddS();
928930

929931
// load [base + index]
930932
const Operator* Load(LoadRepresentation rep);

src/compiler/opcodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1001,6 +1001,8 @@
10011001
V(I32x4RelaxedTruncF64x2SZero) \
10021002
V(I32x4RelaxedTruncF64x2UZero) \
10031003
V(I16x8RelaxedQ15MulRS) \
1004+
V(I16x8DotI8x16I7x16S) \
1005+
V(I32x4DotI8x16I7x16AddS) \
10041006
V(I8x16Shuffle) \
10051007
V(V128AnyTrue) \
10061008
V(I64x2AllTrue) \

src/compiler/wasm-compiler.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4476,6 +4476,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
44764476
case wasm::kExprI16x8RelaxedQ15MulRS:
44774477
return graph()->NewNode(mcgraph()->machine()->I16x8RelaxedQ15MulRS(),
44784478
inputs[0], inputs[1]);
4479+
case wasm::kExprI16x8DotI8x16I7x16S:
4480+
return graph()->NewNode(mcgraph()->machine()->I16x8DotI8x16I7x16S(),
4481+
inputs[0], inputs[1]);
4482+
case wasm::kExprI32x4DotI8x16I7x16AddS:
4483+
return graph()->NewNode(mcgraph()->machine()->I32x4DotI8x16I7x16AddS(),
4484+
inputs[0], inputs[1], inputs[2]);
44794485
case wasm::kExprI16x8Abs:
44804486
return graph()->NewNode(mcgraph()->machine()->I16x8Abs(), inputs[0]);
44814487
case wasm::kExprI16x8BitMask:

src/wasm/baseline/arm/liftoff-assembler-arm.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3510,6 +3510,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
35103510
liftoff::GetSimd128Register(src2));
35113511
}
35123512

3513+
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
3514+
LiftoffRegister lhs,
3515+
LiftoffRegister rhs) {
3516+
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
3517+
}
3518+
3519+
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
3520+
LiftoffRegister lhs,
3521+
LiftoffRegister rhs,
3522+
LiftoffRegister acc) {
3523+
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
3524+
}
3525+
35133526
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
35143527
LiftoffRegister lhs,
35153528
LiftoffRegister rhs,

src/wasm/baseline/arm64/liftoff-assembler-arm64.h

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3175,6 +3175,31 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
31753175
Sqrdmulh(dst.fp().V8H(), src1.fp().V8H(), src2.fp().V8H());
31763176
}
31773177

3178+
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
3179+
LiftoffRegister lhs,
3180+
LiftoffRegister rhs) {
3181+
UseScratchRegisterScope scope(this);
3182+
VRegister tmp1 = scope.AcquireV(kFormat8H);
3183+
VRegister tmp2 = scope.AcquireV(kFormat8H);
3184+
Smull(tmp1, lhs.fp().V8B(), rhs.fp().V8B());
3185+
Smull2(tmp2, lhs.fp().V16B(), rhs.fp().V16B());
3186+
Addp(dst.fp().V8H(), tmp1, tmp2);
3187+
}
3188+
3189+
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
3190+
LiftoffRegister lhs,
3191+
LiftoffRegister rhs,
3192+
LiftoffRegister acc) {
3193+
UseScratchRegisterScope scope(this);
3194+
VRegister tmp1 = scope.AcquireV(kFormat8H);
3195+
VRegister tmp2 = scope.AcquireV(kFormat8H);
3196+
Smull(tmp1, lhs.fp().V8B(), rhs.fp().V8B());
3197+
Smull2(tmp2, lhs.fp().V16B(), rhs.fp().V16B());
3198+
Addp(tmp1, tmp1, tmp2);
3199+
Saddlp(tmp1.V4S(), tmp1);
3200+
Add(dst.fp().V4S(), tmp1.V4S(), acc.fp().V4S());
3201+
}
3202+
31783203
void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
31793204
LiftoffRegister src) {
31803205
Abs(dst.fp().V4S(), src.fp().V4S());

src/wasm/baseline/ia32/liftoff-assembler-ia32.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3657,6 +3657,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
36573657
bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s");
36583658
}
36593659

3660+
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
3661+
LiftoffRegister lhs,
3662+
LiftoffRegister rhs) {
3663+
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
3664+
}
3665+
3666+
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
3667+
LiftoffRegister lhs,
3668+
LiftoffRegister rhs,
3669+
LiftoffRegister acc) {
3670+
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
3671+
}
3672+
36603673
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
36613674
LiftoffRegister src) {
36623675
if (dst.fp() == src.fp()) {

src/wasm/baseline/liftoff-assembler.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,13 @@ class LiftoffAssembler : public TurboAssembler {
12591259
inline void emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
12601260
LiftoffRegister src1,
12611261
LiftoffRegister src2);
1262+
inline void emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
1263+
LiftoffRegister src1,
1264+
LiftoffRegister src2);
1265+
inline void emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
1266+
LiftoffRegister src1,
1267+
LiftoffRegister src2,
1268+
LiftoffRegister acc);
12621269
inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src);
12631270
inline void emit_i32x4_alltrue(LiftoffRegister dst, LiftoffRegister src);
12641271
inline void emit_i32x4_bitmask(LiftoffRegister dst, LiftoffRegister src);

src/wasm/baseline/liftoff-compiler.cc

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4102,6 +4102,22 @@ class LiftoffCompiler {
41024102
case wasm::kExprI32x4RelaxedTruncF64x2UZero:
41034103
return EmitUnOp<kS128, kS128>(
41044104
&LiftoffAssembler::emit_i32x4_relaxed_trunc_f64x2_u_zero);
4105+
case wasm::kExprI16x8DotI8x16I7x16S:
4106+
return EmitBinOp<kS128, kS128>(
4107+
&LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s);
4108+
case wasm::kExprI32x4DotI8x16I7x16AddS: {
4109+
// There is no helper for an instruction with 3 SIMD operands
4110+
// and we do not expect to add any more, so inlining it here.
4111+
static constexpr RegClass res_rc = reg_class_for(kS128);
4112+
LiftoffRegister acc = __ PopToRegister();
4113+
LiftoffRegister rhs = __ PopToRegister(LiftoffRegList{acc});
4114+
LiftoffRegister lhs = __ PopToRegister(LiftoffRegList{rhs, acc});
4115+
LiftoffRegister dst = __ GetUnusedRegister(res_rc, {lhs, rhs, acc}, {});
4116+
4117+
__ emit_i32x4_dot_i8x16_i7x16_add_s(dst, lhs, rhs, acc);
4118+
__ PushRegister(kS128, dst);
4119+
return;
4120+
}
41054121
default:
41064122
unsupported(decoder, kSimd, "simd");
41074123
}

src/wasm/baseline/x64/liftoff-assembler-x64.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3231,6 +3231,19 @@ void LiftoffAssembler::emit_i16x8_relaxed_q15mulr_s(LiftoffRegister dst,
32313231
bailout(kRelaxedSimd, "emit_i16x8_relaxed_q15mulr_s");
32323232
}
32333233

3234+
void LiftoffAssembler::emit_i16x8_dot_i8x16_i7x16_s(LiftoffRegister dst,
3235+
LiftoffRegister lhs,
3236+
LiftoffRegister rhs) {
3237+
bailout(kSimd, "emit_i16x8_dot_i8x16_i7x16_s");
3238+
}
3239+
3240+
void LiftoffAssembler::emit_i32x4_dot_i8x16_i7x16_add_s(LiftoffRegister dst,
3241+
LiftoffRegister lhs,
3242+
LiftoffRegister rhs,
3243+
LiftoffRegister acc) {
3244+
bailout(kSimd, "emit_i32x4_dot_i8x16_i7x16_add_s");
3245+
}
3246+
32343247
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
32353248
LiftoffRegister src) {
32363249
if (dst.fp() == src.fp()) {

src/wasm/wasm-opcodes-inl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
377377
CASE_I32x4_OP(RelaxedTruncF64x2SZero, "relaxed_trunc_f64x2_s_zero");
378378
CASE_I32x4_OP(RelaxedTruncF64x2UZero, "relaxed_trunc_f64x2_u_zero");
379379
CASE_I16x8_OP(RelaxedQ15MulRS, "relaxed_q15mulr_s")
380+
CASE_I16x8_OP(DotI8x16I7x16S, "dot_i8x16_i7x16_s")
381+
CASE_I32x4_OP(DotI8x16I7x16AddS, "dot_i8x16_i7x16_add_s")
380382

381383
// Atomic operations.
382384
CASE_OP(AtomicNotify, "atomic.notify")

src/wasm/wasm-opcodes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,9 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
541541
V(F32x4RelaxedMax, 0xfd10e, s_ss) \
542542
V(F64x2RelaxedMin, 0xfd10f, s_ss) \
543543
V(F64x2RelaxedMax, 0xfd110, s_ss) \
544-
V(I16x8RelaxedQ15MulRS, 0xfd111, s_ss)
544+
V(I16x8RelaxedQ15MulRS, 0xfd111, s_ss) \
545+
V(I16x8DotI8x16I7x16S, 0xfd112, s_ss) \
546+
V(I32x4DotI8x16I7x16AddS, 0xfd113, s_sss)
545547

546548
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
547549
V(I8x16ExtractLaneS, 0xfd15, _) \

test/cctest/wasm/test-run-wasm-relaxed-simd.cc

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,66 @@ WASM_RELAXED_SIMD_TEST(I16x8RelaxedQ15MulRS) {
411411
RunI16x8BinOpTest<int16_t>(execution_tier, kExprI16x8RelaxedQ15MulRS,
412412
SaturateRoundingQMul<int16_t>);
413413
}
414+
414415
#endif // V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_ARM
415416

417+
#if V8_TARGET_ARCH_ARM64
418+
WASM_RELAXED_SIMD_TEST(I16x8DotI8x16I7x16S) {
419+
WasmRunner<int32_t, int8_t, int8_t> r(execution_tier);
420+
int16_t* g = r.builder().template AddGlobal<int16_t>(kWasmS128);
421+
byte value1 = 0, value2 = 1;
422+
byte temp1 = r.AllocateLocal(kWasmS128);
423+
byte temp2 = r.AllocateLocal(kWasmS128);
424+
BUILD(r, WASM_LOCAL_SET(temp1, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value1))),
425+
WASM_LOCAL_SET(temp2, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value2))),
426+
WASM_GLOBAL_SET(
427+
0, WASM_SIMD_BINOP(kExprI16x8DotI8x16I7x16S, WASM_LOCAL_GET(temp1),
428+
WASM_LOCAL_GET(temp2))),
429+
WASM_ONE);
430+
431+
for (int8_t x : compiler::ValueHelper::GetVector<int8_t>()) {
432+
for (int8_t y : compiler::ValueHelper::GetVector<int8_t>()) {
433+
r.Call(x, y & 0x7F);
434+
// * 2 because we of (x*y) + (x*y) = 2*x*y
435+
int16_t expected = base::MulWithWraparound(x * (y & 0x7F), 2);
436+
for (int i = 0; i < 8; i++) {
437+
CHECK_EQ(expected, LANE(g, i));
438+
}
439+
}
440+
}
441+
}
442+
443+
WASM_RELAXED_SIMD_TEST(I32x4DotI8x16I7x16AddS) {
444+
WasmRunner<int32_t, int8_t, int8_t, int32_t> r(execution_tier);
445+
int32_t* g = r.builder().template AddGlobal<int32_t>(kWasmS128);
446+
byte value1 = 0, value2 = 1, value3 = 2;
447+
byte temp1 = r.AllocateLocal(kWasmS128);
448+
byte temp2 = r.AllocateLocal(kWasmS128);
449+
byte temp3 = r.AllocateLocal(kWasmS128);
450+
BUILD(
451+
r, WASM_LOCAL_SET(temp1, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value1))),
452+
WASM_LOCAL_SET(temp2, WASM_SIMD_I8x16_SPLAT(WASM_LOCAL_GET(value2))),
453+
WASM_LOCAL_SET(temp3, WASM_SIMD_I32x4_SPLAT(WASM_LOCAL_GET(value3))),
454+
WASM_GLOBAL_SET(0, WASM_SIMD_TERNOP(
455+
kExprI32x4DotI8x16I7x16AddS, WASM_LOCAL_GET(temp1),
456+
WASM_LOCAL_GET(temp2), WASM_LOCAL_GET(temp3))),
457+
WASM_ONE);
458+
459+
for (int8_t x : compiler::ValueHelper::GetVector<int8_t>()) {
460+
for (int8_t y : compiler::ValueHelper::GetVector<int8_t>()) {
461+
for (int32_t z : compiler::ValueHelper::GetVector<int32_t>()) {
462+
r.Call(x, y & 0x7F, z);
463+
int32_t expected = base::AddWithWraparound(
464+
base::MulWithWraparound(x * (y & 0x7F), 4), z);
465+
for (int i = 0; i < 4; i++) {
466+
CHECK_EQ(expected, LANE(g, i));
467+
}
468+
}
469+
}
470+
}
471+
}
472+
#endif // V8_TARGET_ARCH_ARM64
473+
416474
#undef WASM_RELAXED_SIMD_TEST
417475
} // namespace test_run_wasm_relaxed_simd
418476
} // namespace wasm

test/common/wasm/wasm-interpreter.cc

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2782,6 +2782,39 @@ class WasmInterpreterInternals {
27822782
*len += 16;
27832783
return true;
27842784
}
2785+
case kExprI16x8DotI8x16I7x16S: {
2786+
int16 v2 = Pop().to_s128().to_i8x16();
2787+
int16 v1 = Pop().to_s128().to_i8x16();
2788+
int8 res;
2789+
for (size_t i = 0; i < 8; i++) {
2790+
int16_t lo = (v1.val[LANE(i * 2, v1)] * v2.val[LANE(i * 2, v2)]);
2791+
int16_t hi =
2792+
(v1.val[LANE(i * 2 + 1, v1)] * v2.val[LANE(i * 2 + 1, v2)]);
2793+
res.val[LANE(i, res)] = base::AddWithWraparound(lo, hi);
2794+
}
2795+
Push(WasmValue(Simd128(res)));
2796+
return true;
2797+
}
2798+
case kExprI32x4DotI8x16I7x16AddS: {
2799+
int4 v3 = Pop().to_s128().to_i32x4();
2800+
int16 v2 = Pop().to_s128().to_i8x16();
2801+
int16 v1 = Pop().to_s128().to_i8x16();
2802+
int4 res;
2803+
for (size_t i = 0; i < 4; i++) {
2804+
int32_t a = (v1.val[LANE(i * 4, v1)] * v2.val[LANE(i * 4, v2)]);
2805+
int32_t b =
2806+
(v1.val[LANE(i * 4 + 1, v1)] * v2.val[LANE(i * 4 + 1, v2)]);
2807+
int32_t c =
2808+
(v1.val[LANE(i * 4 + 2, v1)] * v2.val[LANE(i * 4 + 2, v2)]);
2809+
int32_t d =
2810+
(v1.val[LANE(i * 4 + 3, v1)] * v2.val[LANE(i * 4 + 3, v2)]);
2811+
int32_t acc = v3.val[LANE(i, v3)];
2812+
// a + b + c + d should not wrap
2813+
res.val[LANE(i, res)] = base::AddWithWraparound(a + b + c + d, acc);
2814+
}
2815+
Push(WasmValue(Simd128(res)));
2816+
return true;
2817+
}
27852818
case kExprI8x16RelaxedSwizzle:
27862819
case kExprI8x16Swizzle: {
27872820
int16 v2 = Pop().to_s128().to_i8x16();

0 commit comments

Comments
 (0)