@@ -742,6 +742,10 @@ static LogicalResult interpretOperation(Operation *op, OpBuilder &builder,
742
742
<< parallelOp << " \n " );
743
743
// ToFix handle multiple parallel loop
744
744
ValueRange loopRefs = parallelOp.getLoops ();
745
+ Value numThreads = parallelOp.getNumThreads ();
746
+ StringAttr procBind = parallelOp.getProcBindAttr ();
747
+ bool needParallelClause =
748
+ numThreads || (procBind && procBind.getValue ().size () > 0 );
745
749
746
750
// Obtain the the reference the loop that needs to be parallelized
747
751
for (Value loopRef : loopRefs) {
@@ -778,6 +782,23 @@ static LogicalResult interpretOperation(Operation *op, OpBuilder &builder,
778
782
parallelLoop.getRegion ().takeBody (loopToParallel.getRegion ());
779
783
Operation *yieldOp = ¶llelLoop.getBody ()->back ();
780
784
yieldOp->setOperands (reducedValues);
785
+ if (needParallelClause) {
786
+ // Use clause only for the first one (expected the outermost one).
787
+ // Ideally, we would generate here a single, multi-dimensional
788
+ // AffineParallelOp, and we would not need to reset the flag.
789
+ needParallelClause = false ;
790
+ // Currently approach: insert after yield and then move before it.
791
+ PatternRewriter::InsertionGuard insertGuard (builder);
792
+ builder.setInsertionPointAfter (yieldOp);
793
+ // Get induction variable.
794
+ ValueRange optionalLoopIndices = parallelLoop.getIVs ();
795
+ assert (optionalLoopIndices.size () >= 1 &&
796
+ " expected at least one loop index" );
797
+ Value parallelLoopIndex = optionalLoopIndices[0 ];
798
+ Operation *newOp = opBuilder.create <KrnlParallelClauseOp>(
799
+ loc, parallelLoopIndex, numThreads, procBind);
800
+ newOp->moveBefore (yieldOp);
801
+ }
781
802
// Replace the affine.forOp with affine.parallelOp in loopRefToTop
782
803
loopRefToOp[loopRef] = parallelLoop;
783
804
loopToParallel.erase ();
@@ -975,6 +996,7 @@ void ConvertKrnlToAffinePass::runOnOperation() {
975
996
target.addIllegalOp <KrnlCopyToBufferOp>();
976
997
target.addIllegalOp <KrnlCopyFromBufferOp>();
977
998
target.addIllegalOp <KrnlPrefetchOp>();
999
+ target.addLegalOp <KrnlParallelClauseOp>();
978
1000
target.addLegalOp <AffineYieldOp>();
979
1001
target.addLegalOp <AffineLoadOp>();
980
1002
target.addLegalOp <AffineStoreOp>();
0 commit comments