Skip to content

Commit 58572e0

Browse files
Setting enable-compiler-stick-unstick default, enable also if saturation is on (#2881)
* Turn on enable-compiler-stick-unstick default, enable it also if nnpa-saturation is on Signed-off-by: Alexandre Eichenberger <[email protected]>
1 parent 67ea9b5 commit 58572e0

9 files changed

+299
-7
lines changed

src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -49,11 +49,13 @@ llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick(
4949
"Default is false."),
5050
llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
5151

52+
// Enabled default now, could also enable it only if parallel is on as parallel
53+
// stick/unstick is quite a bit faster than sequential.
5254
llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick(
5355
"enable-compiler-stick-unstick",
5456
llvm::cl::desc("[Experimental feature] Enable the compiler generate some "
55-
"stick/unstick code. Default is false."),
56-
llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
57+
"stick/unstick code. Default is true."),
58+
llvm::cl::init(true), llvm::cl::cat(OnnxMlirOptions));
5759

5860
llvm::cl::opt<bool> nnpaEnableScalarBcastBinary(
5961
"nnpa-enable-scalar-bcast-binary",
@@ -93,6 +95,7 @@ llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{
9395

9496
llvm::cl::opt<bool> nnpaEnableSaturation("nnpa-saturation",
9597
llvm::cl::desc("Enable saturating f32 values before stickify them."
98+
"This option turns enable-compiler-stick-unstick on."
9699
"Default is false."),
97100
llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));
98101

src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ namespace onnx_mlir {
4848

4949
void configurePassesNNPA() {
5050
configureOnnxToZHighLoweringPass(optReport == OptReport::NNPAUnsupportedOps);
51+
// Compiler generated sticks supports saturation, so force its usage.
52+
// TODO: remove this if zDNN adds support for saturation.
53+
if (nnpaEnableSaturation)
54+
nnpaEnableCompilerStickUnstick = true;
5155
}
5256

5357
void addONNXToZHighPasses(mlir::PassManager &pm) {

src/Conversion/ONNXToKrnl/Math/Elementwise.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -588,7 +588,7 @@ template <>
588588
double analyzeSimdFor<ONNXHardSigmoidOp>(
589589
Type t, Operation *op, int64_t &von, int64_t &son) {
590590
return simdAnalysis(
591-
{GenericOps::ArithmeticGop, GenericOps::MulGop}, {2, 1}, t, von, son);
591+
{GenericOps::ArithmeticGop, GenericOps::MulGop}, {3, 1}, t, von, son);
592592
}
593593

594594
template <>

test/mlir/accelerators/nnpa/driver/ccfd.mlir

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
1+
// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
22

33
// COM: This test is to check regression on the IBM CCFD model.
44
// COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.

test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" %s | FileCheck %s
1+
// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
22

33
// -----
44

test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" %s | FileCheck %s
1+
// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
22

33
// -----
44

test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR %s | FileCheck %s
1+
// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR %s | FileCheck %s
22

33
// Check whether the compiler can remove unstick/stick so that the output of zdnn softmax is passed directly to zdnn matmul.
44
func.func @softmax_matmul(%arg0: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {

test/mlir/accelerators/nnpa/transform/fold-std-alloc.mlir

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
// RUN: onnx-mlir-opt --mcpu=z16 --maccel=NNPA --fold-std-alloc %s -split-input-file | FileCheck %s
22

3+
// -----
4+
35
func.func @should_fold() -> memref<3xi64> {
46
%c0 = arith.constant 0 : index
57
%c1 = arith.constant 1 : index

test/mlir/accelerators/nnpa/transform/zlow-stick-unstick-expansion.mlir

+283
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)