Setting enable-compiler-stick-unstick default, enable also if saturation is on (#2881)

AlexandreEichenberger · web-flow · commit 58572e00c380 · 2024-07-23T09:46:57.000-04:00
* Turn on enable-compiler-stick-unstick default, enable it also if nnpa-saturation is on
Signed-off-by: Alexandre Eichenberger &lt;alexe@us.ibm.com&gt;
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerOptions.cpp
@@ -49,11 +49,13 @@ llvm::cl::opt<bool> nnpaEnableZHighDecomposeStickUnstick(
         "Default is false."),
     llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
 
+// Enabled default now, could also enable it only if parallel is on as parallel
+// stick/unstick is quite a bit faster than sequential.
 llvm::cl::opt<bool> nnpaEnableCompilerStickUnstick(
     "enable-compiler-stick-unstick",
     llvm::cl::desc("[Experimental feature] Enable the compiler generate some "
-                   "stick/unstick code. Default is false."),
-    llvm::cl::init(false), llvm::cl::cat(OnnxMlirOptions));
+                   "stick/unstick code. Default is true."),
+    llvm::cl::init(true), llvm::cl::cat(OnnxMlirOptions));
 
 llvm::cl::opt<bool> nnpaEnableScalarBcastBinary(
     "nnpa-enable-scalar-bcast-binary",
@@ -93,6 +95,7 @@ llvm::cl::opt<NNPAPlacementHeuristic> nnpaPlacementHeuristic{
 
 llvm::cl::opt<bool> nnpaEnableSaturation("nnpa-saturation",
     llvm::cl::desc("Enable saturating f32 values before stickify them."
+                   "This option turns enable-compiler-stick-unstick on."
                    "Default is false."),
     llvm::cl::init(false), llvm::cl::cat(OnnxMlirCommonOptions));
 
diff --git a/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp b/src/Accelerators/NNPA/Compiler/NNPACompilerUtils.cpp
@@ -48,6 +48,10 @@ namespace onnx_mlir {
 
 void configurePassesNNPA() {
   configureOnnxToZHighLoweringPass(optReport == OptReport::NNPAUnsupportedOps);
+  // Compiler generated sticks supports saturation, so force its usage.
+  // TODO: remove this if zDNN adds support for saturation.
+  if (nnpaEnableSaturation)
+    nnpaEnableCompilerStickUnstick = true;
 }
 
 void addONNXToZHighPasses(mlir::PassManager &pm) {
diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp
@@ -588,7 +588,7 @@ template <>
 double analyzeSimdFor<ONNXHardSigmoidOp>(
     Type t, Operation *op, int64_t &von, int64_t &son) {
   return simdAnalysis(
-      {GenericOps::ArithmeticGop, GenericOps::MulGop}, {2, 1}, t, von, son);
+      {GenericOps::ArithmeticGop, GenericOps::MulGop}, {3, 1}, t, von, son);
 }
 
 template <>
diff --git a/test/mlir/accelerators/nnpa/driver/ccfd.mlir b/test/mlir/accelerators/nnpa/driver/ccfd.mlir
@@ -1,4 +1,4 @@
-// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
+// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} | FileCheck %s && rm -rf ${ccfd}
 
 // COM: This test is to check regression on the IBM CCFD model.
 // COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.
diff --git a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor-num2.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" %s | FileCheck %s
+// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
 
 // -----
 
diff --git a/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir b/test/mlir/accelerators/nnpa/driver/data-transformation-on-ztensor.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" %s | FileCheck %s
+// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s | FileCheck %s
 
 // -----
 
diff --git a/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir b/test/mlir/accelerators/nnpa/driver/softmax-matmul-in-attention-layer.mlir
@@ -1,4 +1,4 @@
-// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR %s | FileCheck %s
+// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR %s | FileCheck %s
 
 // Check whether the compiler can remove unstick/stick so that the output of zdnn softmax is passed directly to zdnn matmul.
 func.func @softmax_matmul(%arg0: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> {
diff --git a/test/mlir/accelerators/nnpa/transform/fold-std-alloc.mlir b/test/mlir/accelerators/nnpa/transform/fold-std-alloc.mlir
@@ -1,5 +1,7 @@
 // RUN: onnx-mlir-opt --mcpu=z16 --maccel=NNPA --fold-std-alloc %s -split-input-file | FileCheck %s
 
+// -----
+
 func.func @should_fold() -> memref<3xi64> {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
diff --git a/test/mlir/accelerators/nnpa/transform/zlow-stick-unstick-expansion.mlir b/test/mlir/accelerators/nnpa/transform/zlow-stick-unstick-expansion.mlir

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,10 @@ namespace onnx_mlir {`
`48`	`48`
`49`	`49`	`void configurePassesNNPA() {`
`50`	`50`	`configureOnnxToZHighLoweringPass(optReport == OptReport::NNPAUnsupportedOps);`
	`51`	`+ // Compiler generated sticks supports saturation, so force its usage.`
	`52`	`+ // TODO: remove this if zDNN adds support for saturation.`
	`53`	`+ if (nnpaEnableSaturation)`
	`54`	`+ nnpaEnableCompilerStickUnstick = true;`
`51`	`55`	`}`
`52`	`56`
`53`	`57`	`void addONNXToZHighPasses(mlir::PassManager &pm) {`
Original file line number	Diff line number	Diff line change
`@@ -588,7 +588,7 @@ template <>`
`588`	`588`	`double analyzeSimdFor<ONNXHardSigmoidOp>(`
`589`	`589`	`Type t, Operation *op, int64_t &von, int64_t &son) {`
`590`	`590`	`return simdAnalysis(`
`591`		`- {GenericOps::ArithmeticGop, GenericOps::MulGop}, {2, 1}, t, von, son);`
	`591`	`+ {GenericOps::ArithmeticGop, GenericOps::MulGop}, {3, 1}, t, von, son);`
`592`	`592`	`}`
`593`	`593`
`594`	`594`	`template <>`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" ${ccfd} \| FileCheck %s && rm -rf ${ccfd}`
	`1`	`+// RUN: ccfd=$(dirname %s)/ccfd.onnx && curl -L https://github.com/IBM/ai-on-z-fraud-detection/raw/main/onnx%20models/ccf_lstm_static_tf2onnx_OS_new.onnx -o ${ccfd} && onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" ${ccfd} \| FileCheck %s && rm -rf ${ccfd}`
`2`	`2`
`3`	`3`	`// COM: This test is to check regression on the IBM CCFD model.`
`4`	`4`	`// COM: We expect that there are only one zlow.stick for the input and one zlow.unstick for the output.`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --EmitMLIR --printIR -tag="test" %s \| FileCheck %s`
	`1`	`+// RUN: onnx-mlir --mcpu=z16 --maccel=NNPA --enable-compiler-stick-unstick=false --EmitMLIR --printIR -tag="test" %s \| FileCheck %s`
`2`	`2`
`3`	`3`	`// -----`
`4`	`4`