Enhance shape inference for ONNX Reshape (#3122)

tungld · web-flow · commit 076492ab8529 · 2025-04-18T11:07:21.000+09:00
* Add a special case in shape inference for reshape

Signed-off-by: Tung D. Le &lt;tung@jp.ibm.com&gt;

---------

Signed-off-by: Tung D. Le &lt;tung@jp.ibm.com&gt;
diff --git a/src/Dialect/ONNX/ONNXOps/Tensor/Reshape.cpp b/src/Dialect/ONNX/ONNXOps/Tensor/Reshape.cpp
@@ -48,12 +48,83 @@ LogicalResult ONNXReshapeOpShapeHelper::computeShape() {
   //   - -1: the output dim is calculated from the other output dims. No more
   //   than one dim in the output has value -1.
 
+  // Shape inference can be simplified if there is a bijection between a set of
+  // unknown dimensions in data and unknown dimensions in shape. In such a case,
+  // there is no need to include these unknown dimensions in computing the
+  // dimension at position of -1, which increases the chance that the dim value
+  // at position of -1 can be a static value.
+  //
+  // For example,
+  //  - data is tensor<1x?x2048xf32>,
+  //  - shape is tensor<4xi64> of [1, dim_1_of_data, -1, 64]
+  // In this case, the 2nd dimension of data is unknown but it is similar to the
+  // 2nd value in shape. So to compute the output dim at position of -1, we just
+  // do 2048/64, that is 32. Without this simplification, the output dim at
+  // position of -1 would be unknown at compile time.
+  std::set<int64_t> dataIgnoredDims, outputIgnoredDims;
+  SmallVector<Value> shapeDimVals;
+  if (areDimsFromConcat(shape)) {
+    getDims(shape, shapeDimVals);
+    Value refData = data;
+
+    // Get the input A of MatMul that is the producer of "data" if applicable.
+    // Special case to handle a pattern in the IBM granite-3.1-2b-instruct
+    // model. This pattern is found in the IBM granite-3.1-2b-instruct model.
+    // clang-format off
+    // %0 = onnx.Constant dense<1.000000e+00> : tensor<2048x2048xf32>
+    // %1 = onnx.Constant dense<64> : tensor<1xi64>
+    // %2 = onnx.Constant dense<-1> : tensor<1xi64>
+    // %3 = "onnx.Dim"(%arg0) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+    // %4 = "onnx.Dim"(%arg0) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+    // %5 = "onnx.MatMul"(%arg0, %0) : (tensor<?x?x2048xf32>, tensor<2048x2048xf32>) -> tensor<?x?x2048xf32>
+    // %6 = "onnx.Concat"(%3, %4, %2, %1) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+    // %7 = "onnx.Reshape"(%5, %6) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x?x64xf32>
+    // clang-format on
+    // This is a special handling which is not encouraged to be used widely.
+    // Since there is no good mechanism to handle this situation in a systematic
+    // way (e.g. using dynamic dimension analysis), so we handle it here.
+    ONNXMatMulOp mmOp = data.getDefiningOp<ONNXMatMulOp>();
+    bool fromMatMul = false;
+    if (mmOp && isRankedShapedType(mmOp.getB().getType()) &&
+        getRank(mmOp.getB().getType()) == 2) {
+      refData = mmOp.getA();
+      fromMatMul = true;
+    }
+
+    // Find the bijective mapping.
+    // We do not compute the actual mapping, just storing the source and target
+    // sets is enough if the map exists.
+    bool isBijective = true;
+    for (int64_t i = 0; i < outputRank; ++i) {
+      Value dim = shapeDimVals[i];
+      if (auto dimOp = dim.getDefiningOp<ONNXDimOp>()) {
+        if (dimOp.getData() != refData)
+          continue;
+        int64_t axis = dimOp.getAxis();
+        if (auto search = dataIgnoredDims.find(axis);
+            search != dataIgnoredDims.end())
+          isBijective = false;
+        if (fromMatMul && axis == getRank(refData.getType()) - 1)
+          isBijective = false;
+        outputIgnoredDims.insert(i);
+        dataIgnoredDims.insert(axis);
+      }
+    }
+    if (!isBijective) {
+      outputIgnoredDims.clear();
+      dataIgnoredDims.clear();
+    }
+  }
+
   // Compute the total number of elements using the input data operand.
   // dataRank will be 0 if Data is unranked tensor.
   // The number of element will not be computed
   IndexExpr numOfElements = LitIE(1);
-  for (unsigned i = 0; i < dataRank; ++i)
+  for (unsigned i = 0; i < dataRank; ++i) {
+    if (auto search = dataIgnoredDims.find(i); search != dataIgnoredDims.end())
+      continue;
     numOfElements = numOfElements * createIE->getShapeAsDim(data, i);
+  }
 
   // Compute the total number of elements from the shape values.
   IndexExpr numOfElementsFromShape = LitIE(1);
@@ -74,6 +145,9 @@ LogicalResult ONNXReshapeOpShapeHelper::computeShape() {
 
     // dimShape == -1: use 1 to compute the number of elements to avoid
     // negative value.
+    if (auto search = outputIgnoredDims.find(i);
+        search != outputIgnoredDims.end())
+      continue;
     dim = dim.selectOrSelf(dim == -1, LitIE(1));
     numOfElementsFromShape = numOfElementsFromShape * dim;
   }
diff --git a/test/mlir/onnx/onnx_canonicalization.mlir b/test/mlir/onnx/onnx_canonicalization.mlir
@@ -406,8 +406,8 @@ func.func @test_reshape_fusion3(%arg0: tensor<?x4x2x2xf32>) -> tensor<?x2x?xf32>
 // CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<2> : tensor<1xi64>
 // CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 0 : si64} : (tensor<?x4x2x2xf32>) -> tensor<1xi64>
 // CHECK:           [[VAR_3_:%.+]] = "onnx.Concat"([[VAR_2_]], [[VAR_1_]], [[VAR_0_]]) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<3xi64>
-// CHECK:           [[VAR_4_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_3_]]) {allowzero = 0 : si64} : (tensor<?x4x2x2xf32>, tensor<3xi64>) -> tensor<?x2x?xf32>
-// CHECK:           onnx.Return [[VAR_4_]] : tensor<?x2x?xf32>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_3_]]) {allowzero = 0 : si64} : (tensor<?x4x2x2xf32>, tensor<3xi64>) -> tensor<?x2x8xf32>
+// CHECK:           onnx.Return [[VAR_4_]] : tensor<?x2x8xf32>
 // CHECK:         }
 }
 
@@ -1952,4 +1952,4 @@ func.func @test_reorder_relu_maxpool(%arg0: tensor<1x64x32x32xf32>) -> tensor<1x
   // CHECK:      [[VAR_1_:%.+]] = "onnx.Relu"([[VAR_0_]]) : (tensor<*xf32>) -> tensor<1x64x16x16xf32>
   // CHECK-NEXT:     return [[VAR_1_]] : tensor<1x64x16x16xf32>
   // CHECK:         }
-}
+}
diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir
@@ -900,6 +900,104 @@ onnx.Return %0 : tensor<*xf16>
 // CHECK:           onnx.Return [[VAR_1_]] : tensor<4x?x3xf16>
 // CHECK:         }
 
+// -----
+
+func.func @test_reshape_dim(%arg0: tensor<?x?x2048xf32>) -> tensor<?x?x?x64xf32> {
+  %1 = onnx.Constant dense<64> : tensor<1xi64>
+  %2 = onnx.Constant dense<-1> : tensor<1xi64>
+  %3 = "onnx.Dim"(%arg0) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %4 = "onnx.Dim"(%arg0) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %5 = "onnx.Concat"(%3, %4, %2, %1) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+  %6 = "onnx.Reshape"(%arg0, %5) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x?x64xf32>
+  return %6 : tensor<?x?x?x64xf32>
+
+// CHECK-LABEL:  func.func @test_reshape_dim
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x?x2048xf32>) -> tensor<?x?x32x64xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = onnx.Constant dense<64> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<-1> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK-DAG:       [[VAR_3_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.Concat"([[VAR_2_]], [[VAR_3_]], [[VAR_1_]], [[VAR_0_]]) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+// CHECK:           [[VAR_5_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_4_]]) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x32x64xf32>
+// CHECK:           return [[VAR_5_]] : tensor<?x?x32x64xf32>
+// CHECK:         }
+}
+
+// -----
+
+func.func @test_reshape_dim_bijective_at_last_dim(%arg0: tensor<?x?x2048xf32>) -> tensor<?x?x64x?xf32> {
+  %1 = onnx.Constant dense<64> : tensor<1xi64>
+  %2 = onnx.Constant dense<-1> : tensor<1xi64>
+  %3 = "onnx.Dim"(%arg0) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %4 = "onnx.Dim"(%arg0) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %5 = "onnx.Concat"(%4, %2, %1, %3) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+  %6 = "onnx.Reshape"(%arg0, %5) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x64x?xf32>
+  return %6 : tensor<?x?x64x?xf32>
+
+// CHECK-LABEL:  func.func @test_reshape_dim_bijective_at_last_dim
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x?x2048xf32>) -> tensor<?x32x64x?xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = onnx.Constant dense<64> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<-1> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK-DAG:       [[VAR_3_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.Concat"([[VAR_3_]], [[VAR_1_]], [[VAR_0_]], [[VAR_2_]]) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+// CHECK:           [[VAR_5_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_4_]]) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x32x64x?xf32>
+// CHECK:           return [[VAR_5_]] : tensor<?x32x64x?xf32>
+// CHECK:         }
+}
+
+// -----
+
+// COM: This pattern is found in the IBM granite-3.1-2b-instruct model.
+func.func @test_reshape_matmul_dim(%arg0: tensor<?x?x2048xf32>) -> tensor<?x?x?x64xf32> {
+  %0 = onnx.Constant dense<1.000000e+00> : tensor<2048x2048xf32>
+  %1 = onnx.Constant dense<64> : tensor<1xi64>
+  %2 = onnx.Constant dense<-1> : tensor<1xi64>
+  %3 = "onnx.Dim"(%arg0) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %4 = "onnx.Dim"(%arg0) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %5 = "onnx.MatMul"(%arg0, %0) : (tensor<?x?x2048xf32>, tensor<2048x2048xf32>) -> tensor<?x?x2048xf32>
+  %6 = "onnx.Concat"(%3, %4, %2, %1) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+  %7 = "onnx.Reshape"(%5, %6) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x?x64xf32>
+  return %7 : tensor<?x?x?x64xf32>
+
+// CHECK-LABEL:  func.func @test_reshape_matmul_dim
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x?x2048xf32>) -> tensor<?x?x32x64xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e+00> : tensor<2048x2048xf32>
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<64> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_2_:%.+]] = onnx.Constant dense<-1> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_3_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK-DAG:       [[VAR_4_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 1 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK-NOT: separator of consecutive DAGs
+// CHECK-DAG:       [[VAR_5_:%.+]] = "onnx.MatMul"([[PARAM_0_]], [[VAR_0_]]) : (tensor<?x?x2048xf32>, tensor<2048x2048xf32>) -> tensor<?x?x2048xf32>
+// CHECK-DAG:       [[VAR_6_:%.+]] = "onnx.Concat"([[VAR_3_]], [[VAR_4_]], [[VAR_2_]], [[VAR_1_]]) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+// CHECK:           [[VAR_7_:%.+]] = "onnx.Reshape"([[VAR_5_]], [[VAR_6_]]) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x32x64xf32>
+// CHECK:           return [[VAR_7_]] : tensor<?x?x32x64xf32>
+// CHECK:         }
+}
+
+// -----
+
+func.func @test_reshape_dim_not_bijection(%arg0: tensor<?x?x2048xf32>) -> tensor<?x?x?x64xf32> {
+  %1 = onnx.Constant dense<64> : tensor<1xi64>
+  %2 = onnx.Constant dense<-1> : tensor<1xi64>
+  %3 = "onnx.Dim"(%arg0) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+  %4 = "onnx.Concat"(%3, %3, %2, %1) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+  %5 = "onnx.Reshape"(%arg0, %4) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x?x64xf32>
+  return %5 : tensor<?x?x?x64xf32>
+
+// CHECK-LABEL:  func.func @test_reshape_dim_not_bijection
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<?x?x2048xf32>) -> tensor<?x?x?x64xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = onnx.Constant dense<64> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<-1> : tensor<1xi64>
+// CHECK-DAG:       [[VAR_2_:%.+]] = "onnx.Dim"([[PARAM_0_]]) {axis = 0 : si64} : (tensor<?x?x2048xf32>) -> tensor<1xi64>
+// CHECK:           [[VAR_3_:%.+]] = "onnx.Concat"([[VAR_2_]], [[VAR_2_]], [[VAR_1_]], [[VAR_0_]]) {axis = 0 : si64} : (tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<4xi64>
+// CHECK:           [[VAR_4_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_3_]]) {allowzero = 0 : si64} : (tensor<?x?x2048xf32>, tensor<4xi64>) -> tensor<?x?x?x64xf32>
+// CHECK:           return [[VAR_4_]] : tensor<?x?x?x64xf32>
+// CHECK:         }
+}
+
+// -----
+
 //===----------------------------------------------------------------------===//
 /// Test the flatten op inference.
 //===----------------------------------------------------------------------===//
@@ -3910,4 +4008,4 @@ func.func @test_grid_sample_dim_shape3(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor
 // CHECK:           return [[GRID]] : tensor<?x?x10x20xf32>
 // CHECK:         }
   return %0 : tensor<*xf32>
-}
+}