diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp index 302bf4273805..fa5094678268 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp @@ -201,13 +201,13 @@ static void tileAndDistributeToWorkgroup( funcPassManager.addPass(createTileAndDistributeToWorkgroupsPass( kNumMaxParallelDims, linalg::DistributionMethod::CyclicNumProcsEqNumIters)); + funcPassManager.addPass(createCSEPass()); + if (convertToDpsOptions) { + funcPassManager.addPass( + createConvertToDestinationPassingStylePass(*convertToDpsOptions)); + } } - funcPassManager.addPass(createCSEPass()); - if (convertToDpsOptions) { - funcPassManager.addPass( - createConvertToDestinationPassingStylePass(*convertToDpsOptions)); - } // TODO(#16421): Disable decomposition due to failure in bufferization. // funcPassManager.addPass( // IREE::LinalgExt::createTileAndDecomposeAttentionPass()); diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir index bd12d38bbe95..4e5218207bbb 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/pipeline_tile_and_fuse.mlir @@ -1089,6 +1089,7 @@ hal.executable public @main { // Verify that the write does not get hoisted out of the single threaded // for loop. -// CHECK: vector.transfer_write %{{.*}}, %[[B2]]{{.*}} memref<10x1xf32, #hal.descriptor_type> -// CHECK-NEXT: } +// CHECK: vector.transfer_write %{{.*}}, %[[B2]]{{.*}} memref<10x1xf32, #hal.descriptor_type> +// CHECK-NEXT: } +// CHECK-NEXT: } {mapping = [#iree_codegen.workgroup_mapping]} // CHECK-NEXT: return