Revert "Reuse input buffer in lowering to krnl (onnx#2939)"

jorickert · jorickert · commit 4915c6665d78 · 2024-12-06T01:40:02.000-07:00
This reverts commit 97d497f.
diff --git a/src/Compiler/CompilerOptions.cpp b/src/Compiler/CompilerOptions.cpp
@@ -42,11 +42,8 @@ bool enableONNXHybridPass;                             // common for both
 std::vector<std::string> functionsToDecompose;         // common for both
 std::string opsForCall;                                // common for both
 bool disableKrnlOpFusion;                              // common for both
-<<<<<<< HEAD
-=======
-bool enableKrnlBufferReuse; // common for both
-bool disableMemRefPrefetch; // common for both
->>>>>>> 97d497fa09e4cfa8a570d820aa01a76b8cda8728
+bool enableKrnlBufferReuse;                            // common for both
+bool disableMemRefPrefetch;                            // common for both
 EmissionTargetType emissionTarget;                     // onnx-mlir only
 bool invokeOnnxVersionConverter;                       // onnx-mlir only
 bool preserveLocations;                                // onnx-mlir only
@@ -217,14 +214,6 @@ static llvm::cl::opt<bool, true> disableKrnlOpFusionOpt(
     llvm::cl::location(disableKrnlOpFusion), llvm::cl::init(false),
     llvm::cl::cat(OnnxMlirCommonOptions));
 
-static llvm::cl::opt<bool, true> enableKrnlBufferReuseOpt(
-    "enable-krnl-buffer-reuse",
-    llvm::cl::desc("enable buffer reuse within an op in onnx-to-krnl pass"
-                   "(default=false)\n"
-                   "Set to 'true' if you want to enable buffer reuse."),
-    llvm::cl::location(enableKrnlBufferReuse), llvm::cl::init(false),
-    llvm::cl::cat(OnnxMlirCommonOptions));
-
 static llvm::cl::opt<bool, true> disableRecomposeOptionOpt("disable-recompose",
     llvm::cl::desc("Disable recomposition of ONNX operations."),
     llvm::cl::location(disableRecomposeOption), llvm::cl::init(false),
diff --git a/src/Compiler/CompilerOptions.hpp b/src/Compiler/CompilerOptions.hpp
@@ -87,7 +87,6 @@ extern bool enableONNXHybridPass;                             // common for both
 extern std::vector<std::string> functionsToDecompose;         // common for both
 extern std::string opsForCall;                                // common for both
 extern bool disableKrnlOpFusion;                              // common for both
-extern bool enableKrnlBufferReuse;                            // common for both
 extern EmissionTargetType emissionTarget;                     // onnx-mlir only
 extern bool invokeOnnxVersionConverter;                       // onnx-mlir only
 extern bool preserveLocations;                                // onnx-mlir only
diff --git a/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp b/src/Conversion/ONNXToKrnl/Math/Elementwise.cpp
@@ -28,82 +28,6 @@ using namespace mlir;
 
 namespace onnx_mlir {
 
-// Check the input, x, can be reused as the output buffer
-bool isBufferReusable(Value x, MemRefType outputType) {
-  if (!x.hasOneUse())
-    return false;
-
-  Type xType = x.getType();
-  auto inputType = dyn_cast<ShapedType>(xType);
-  if (!inputType)
-    return false;
-  // Currently, only static shape could be reused.
-  // ToFix: use DimAnalysis to handle dynamic shape.
-  if (!hasStaticShape(inputType))
-    return false;
-  if (!hasStaticShape(outputType))
-    return false;
-
-  // Currently reuse requires that the shape has to be the same.
-  // ToFix: If the shape is not the same, memref.cast can be used.
-  if (getRank(inputType) != getRank(outputType))
-    return false;
-  for (int64_t i = 0; i < getRank(inputType); i++) {
-    if (inputType.getShape()[i] != outputType.getShape()[i])
-      return false;
-  }
-
-  // ToFix: The simd padding is not checked
-  // We did not record whether the memref is padded or not.
-  // The padding added to the memref the as an attribute, or not needed.
-  return true;
-}
-
-// Traverse the operands to find the candidate for buffer reuse.
-// Return -1, if no candidate is found.
-int whichBufferToReuse(ValueRange values, MemRefType outputType) {
-  for (size_t i = 0; i < values.size(); i++) {
-    if (isBufferReusable(values[i], outputType))
-      return i;
-  }
-  return -1;
-}
-
-// Allocate memref (as before) if no input buffer can be reused.
-// Default VL=0 is used for non SIMD allocation
-Value allocOrReuse(MemRefBuilder &create, Operation *op,
-    ValueRange generatedOperands, MemRefType outputMemRefType, DimsExprRef dims,
-    int64_t alignment, int64_t VL = 0);
-
-Value allocOrReuse(MemRefBuilder &create, Operation *op,
-    ValueRange generatedOperands, MemRefType outputMemRefType, DimsExprRef dims,
-    int64_t alignment, int64_t VL) {
-
-  int indexToReuse = -1;
-  // By default, enableKrnlBufferReuse is false. Simply allocate a memref.
-  if (enableKrnlBufferReuse) {
-    // Be aware to use the op->getOperands() to check the number of uses.
-    // After buffer reuse, the number of uses of the transformed Value,
-    // generatedOperands, will increase.
-    indexToReuse = whichBufferToReuse(op->getOperands(), outputMemRefType);
-  }
-
-  if (indexToReuse != -1) {
-    int size = getSizeInBytes(outputMemRefType);
-    LLVM_DEBUG({
-      llvm::dbgs() << "  malloc_size " << size << "\n";
-      op->dump();
-    });
-    return generatedOperands[indexToReuse];
-  } else {
-    if (VL == 0)
-      return create.alignedAlloc(outputMemRefType, dims, alignment);
-    else
-      return create.alignedAllocWithSimdPadding(
-          outputMemRefType, dims, VL, alignment);
-  }
-}
-
 // =============================================================================
 
 /// Emit post-processing for variadic element-wise ops.
@@ -1399,14 +1323,14 @@ static LogicalResult getPartiallyFlattenedSimdCode(
   IndexExprScope allocScope(create.vec, shapeHelper->getScope());
   DimsExpr outputDims;
   getIndexExprList<SymbolIndexExpr>(shapeHelper->getOutputDims(), outputDims);
-  // Reuse the buffer from the input, or Alloc memory with padding for SIMD.
+  // Alloc memory with padding for SIMD.
   // For the moment, its ok to go here; if we truly have partial flattening of
   // the simd code, then we only do it with static memref size that are
   // multiples of VL * unrollVL, so there should be no padding anyway. This
   // will change if we do partial flattening with non-multiple of VL *
   // unrollVL.
-  Value alloc = allocOrReuse(
-      create.mem, op, operands, outputMemRefType, outputDims, alignment, VL);
+  Value alloc = create.mem.alignedAllocWithSimdPadding(
+      outputMemRefType, outputDims, VL, alignment);
   // Create flat inputs in the last innerDinNum dims.
   llvm::SmallVector<Value, 4> flatOperands;
   for (Value oper : operands) {
@@ -2051,9 +1975,8 @@ struct ONNXElementwiseUnaryOpLowering
     outputMemRefType = opFusionHelper.getOutputType(outputMemRefType);
 
     // Insert an allocation for the result of this operation.
-    Value alloc = allocOrReuse(create.mem, op, operands, outputMemRefType,
-        shapeHelper.getOutputDims(), alignment);
-    ;
+    Value alloc = create.mem.alignedAlloc(
+        outputMemRefType, shapeHelper.getOutputDims(), alignment);
 
     // Only create krnl.iterate if one of the operands is not scalar tensor.
     if (!isScalar) {
@@ -2233,9 +2156,8 @@ struct ONNXElementwiseBinaryOpLowering
     outputMemRefType = opFusionHelper.getOutputType(outputMemRefType);
 
     // Insert an allocation and deallocation for the result of this operation.
-    Value alloc = allocOrReuse(create.mem, op, operands, outputMemRefType,
-        shapeHelper.getOutputDims(), alignment);
-    ;
+    Value alloc = create.mem.alignedAlloc(
+        outputMemRefType, shapeHelper.getOutputDims(), alignment);
 
     // Only create krnl.iterate if one of the operands is not scalar tensor.
     if (!isScalar) {
@@ -2409,9 +2331,8 @@ struct ONNXElementwiseVariadicOpLowering
     outputMemRefType = opFusionHelper.getOutputType(outputMemRefType);
 
     // Insert an allocation and deallocation for the result of this operation.
-    Value alloc = allocOrReuse(create.mem, op, operands, outputMemRefType,
-        shapeHelper.getOutputDims(), alignment);
-    ;
+    Value alloc = create.mem.alignedAlloc(
+        outputMemRefType, shapeHelper.getOutputDims(), alignment);
 
     // Only create krnl.iterate if one of the operands is not scalar tensor.
     if (!isScalar) {
diff --git a/test/mlir/conversion/onnx_to_krnl/onnx_lowering_reuse.mlir b/test/mlir/conversion/onnx_to_krnl/onnx_lowering_reuse.mlir