removed alternative code versions

AlexandreEichenberger · AlexandreEichenberger · commit 79a8875e0eb1 · 2024-09-25T13:54:19.000-04:00
Signed-off-by: Alexandre Eichenberger &lt;alexe@us.ibm.com&gt;
diff --git a/src/Conversion/ONNXToKrnl/Quantization/QuantizeLinear.cpp b/src/Conversion/ONNXToKrnl/Quantization/QuantizeLinear.cpp
@@ -71,7 +71,6 @@ void emitQuantizationLinearScalarParameters(ConversionPatternRewriter &rewriter,
   DimsExpr outputAF;
   outputAF.emplace_back(zero);
 
-#if 1
   Type inputElementType = inputType.getElementType();
   unsigned inputWidth;
   if (isa<Float32Type>(inputElementType))
@@ -111,127 +110,6 @@ void emitQuantizationLinearScalarParameters(ConversionPatternRewriter &rewriter,
         return res;
       }});
 
-#elif 1
-  // hi alex: test with 2 loops for easier debugging
-  // Allocate output buffers (same type as input).
-  MemRefType flatBufferType = llvm::cast<MemRefType>(flatInput.getType());
-  Value flatBuffer = create.mem.alignedAlloc(flatBufferType, flatInputDims);
-  DimsExpr bufferAF;
-  bufferAF.emplace_back(zero);
-
-  create.krnl.simdIterateIE(simdLb, simdUb, totVL, simdOnly, enableParallel,
-      {flatInput}, {inputAF}, {flatBuffer}, {bufferAF},
-      {[&](const KrnlBuilder &kb, ArrayRef<Value> inputVals, int64_t VL) {
-        MultiDialectBuilder<MathBuilder> create(kb);
-        Value x = inputVals[0];
-        // Scale
-        Value scaleX = create.math.div(x, scale);
-        // Round
-        Value roundX = create.math.round(scaleX);
-        // Adjust
-        Value adjustX;
-        if (hasZeroPoint)
-          adjustX = create.math.add(roundX, zeroPoint);
-        else
-          adjustX = roundX;
-        // Saturate: use max into a min.
-        Value saturateX = create.math.clip(adjustX, qMin, qMax);
-        // Old approach.
-        // return create.math.cast(quantizedElementType, saturateX);
-        return saturateX;
-      }});
-
-  // Need transient types.
-  Type inputElementType = flatBufferType.getElementType();
-  unsigned inputWidth;
-  if (isa<Float32Type>(inputElementType))
-    inputWidth = 32;
-  else if (isa<Float64Type>(inputElementType))
-    inputWidth = 64;
-  else
-    llvm_unreachable("unsupported input type");
-  IntegerType quantizedIntType = cast<IntegerType>(quantizedElementType);
-  bool isSignless = quantizedIntType.isSignless();
-  bool isSigned = quantizedIntType.isSigned();
-  Type quantizedElementTypeSameSizeAsInput =
-      rewriter.getIntegerType(inputWidth, isSignless || isSigned);
-
-  create.krnl.simdIterateIE(simdLb, simdUb, totVL, simdOnly, enableParallel,
-      {flatBuffer}, {bufferAF}, {flatAlloc}, {outputAF},
-      {[&](const KrnlBuilder &kb, ArrayRef<Value> inputVals, int64_t VL) {
-        MultiDialectBuilder<KrnlBuilder, VectorBuilder, MathBuilder> create(kb);
-        // Convert float* to int*/uint* where * is 32 (64?)
-        Value input = inputVals[0];
-        Value quantizedSameSizeAsInput =
-            create.math.cast(quantizedElementTypeSameSizeAsInput, input);
-    // Convert int32/uint32 to int*/unint* where * is 8, 16...
-#if 0
-        // Code get normalized to the code below
-        unsigned quantizedWidth = quantizedIntType.getWidth();
-        unsigned currWidth = inputWidth;
-        Value qVal = quantizedSameSizeAsInput;
-        while (currWidth > quantizedWidth) {
-          currWidth = currWidth / 2;
-          Type qType =
-              rewriter.getIntegerType(currWidth, isSignless || isSigned);
-          qVal = create.math.cast(qType, qVal);
-        }
-#else
-        Value qVal =
-            create.math.cast(quantizedElementType, quantizedSameSizeAsInput);
-#endif
-        return qVal;
-      }});
-
-#else
-  // faster than original loop on z16, takes 124us for 64k vals
-  // Allocate output buffers.
-  MemRefType flatBufferType = llvm::cast<MemRefType>(flatInput.getType());
-  Value flatBuffer = create.mem.alignedAlloc(flatBufferType, flatInputDims);
-  DimsExpr bufferAF;
-  bufferAF.emplace_back(zero);
-
-  create.krnl.simdIterateIE(simdLb, simdUb, totVL, simdOnly, enableParallel,
-      {flatInput}, {inputAF}, {flatBuffer}, {bufferAF},
-      {[&](const KrnlBuilder &kb, ArrayRef<Value> inputVals, int64_t VL) {
-        MultiDialectBuilder<MathBuilder> create(kb);
-        Value x = inputVals[0];
-        // Scale
-        Value scaleX = create.math.div(x, scale);
-        // Round
-        Value roundX = create.math.round(scaleX);
-        // Adjust
-        Value adjustX;
-        if (hasZeroPoint)
-          adjustX = create.math.add(roundX, zeroPoint);
-        else
-          adjustX = roundX;
-        // Saturate: use max into a min.
-        Value saturateX = create.math.clip(adjustX, qMin, qMax);
-        // Old approach.
-        // return create.math.cast(quantizedElementType, saturateX);
-        return saturateX;
-      }});
-
-  // A second loop that performs scalar float to int performs better than the
-  // compiler's attempt to generate SIMD conversion code. This might not hold
-  // with all data types, but is definitely noticeable with uint8.
-  //
-  // Investigate further: we might save the vector to a buffer on the fly
-  // (avoiding a second loop as below), and then reload each value as scalar and
-  // then saved them as scalar (thus avoiding the insert/extract SIMD operations
-  // that also do not perform well). We can have a SIMD buffer in memory for the
-  // non-quantized and quantized simd values, but then we also need to privatize
-  // it, which is also not easy in this scheme. So ignore this for now.
-  create.krnl.forLoopIE(simdLb, simdUb, 1, enableParallel,
-      [&](const KrnlBuilder &kb, ValueRange loopInd) {
-        MultiDialectBuilder<KrnlBuilder, MemRefBuilder, MathBuilder> create(kb);
-        Value buffVal = create.krnl.loadIE(flatBuffer, {zero}, {loopInd[0]});
-        Value res = create.math.cast(quantizedElementType, buffVal);
-        create.krnl.storeIE(res, flatAlloc, {zero}, {loopInd[0]});
-      });
-#endif
-
   if (totVL > 1)
     onnxToKrnlSimdReport(op, /*successful*/ true, totVL,
         simdLoopStaticTripCount, "quantizationLinear whole tensor");