llvm
diff --git a/‎mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h‎
Lines changed: 0 additions & 103 deletions b/‎mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h‎
Lines changed: 0 additions & 103 deletions
diff --git a/‎mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion b/‎mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp‎
Lines changed: 3 additions & 2 deletions b/‎mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp‎
Lines changed: 0 additions & 9 deletions b/‎mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp‎
Lines changed: 0 additions & 9 deletions
@@ -45,109 +45,6 @@ namespace linalg {
 /// when used on distributed loops with memref semantics!
 void hoistRedundantVectorTransfers(func::FuncOp func);
 
-/// Greedily hoist redundant subset extract/insert operations on tensors outside
-/// of `forOp`. The logic follows:
-/// 1. Look for a write walking back from the `forOp` yield.
-/// 2. Check the uses of the matching block argument and look for a matching
-/// read (i.e. extract_slice of transfer_read) with matching indices.
-/// 3. In the case of a transfer_write, we can bypass other non-conflicting
-/// operations and find more hoisting opportunities.
-/// 4. Hoist the read/write pair and update the tensor SSA links.
-///
-/// Return the unmodified `forOp` if no hoisting occured.
-/// Return a new scf::ForOp if hoisting on tensors occured.
-///
-/// After this transformation the returned scf::ForOp may have unused arguments
-/// that can be removed by application of canonicalization patterns.
-///
-/// Example:
-/// ========
-/// IR Resembling:
-///
-/// ```
-/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0)->(tensor<10xf32>) {
-/// %1 = scf.for %j = %l to %u step %s iter_args(%a6 = %a0)->(tensor<10xf32>) {
-/// %e = tensor.extract_slice %a6[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
-/// %r = vector.transfer_read %e[%c0], %cst: tensor<?xf32>, vector<4xf32>
-/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
-/// %w = vector.transfer_write %u, %e[%c0] : vector<4xf32>, tensor<?xf32>
-/// %st = tensor.insert_slice %w into %a6[%i][%sz][1]
-/// : tensor<?xf32> into tensor<10xf32>
-/// scf.yield %st: tensor<10xf32>
-/// }
-/// scf.yield %1: tensor<10xf32>
-/// }
-/// ```
-///
-/// Progressively hoists to:
-///
-/// ```
-/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0) -> (tensor<10xf32>){
-/// %e = tensor.extract_slice %a0[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
-/// %1:2 = scf.for %j = %l to %u step %s iter_args(%a6 = a0, %a7 = %e)
-/// -> (tensor<10xf32>, tensor<?xf32>) {
-/// %r = vector.transfer_read %a7[%c0], %cst: tensor<?xf32>, vector<4xf32>
-/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
-/// %w = vector.transfer_write %u, %a7[%c0] : vector<4xf32>, tensor<?xf32>
-/// scf.yield %a6, %w: tensor<10xf32>, tensor<?xf32>
-/// }
-/// %st = tensor.insert_slice %1#1 into %1#0[%i][%sz][1]
-/// : tensor<?xf32> into tensor<10xf32>
-/// scf.yield %1: tensor<10xf32>
-/// }
-/// ```
-///
-/// and
-///
-/// ```
-/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0) -> (tensor<10xf32>){
-/// %e = tensor.extract_slice %a0[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
-/// %r = vector.transfer_read %a7[%c0], %cst: tensor<?xf32>, vector<4xf32>
-/// %1:3 = scf.for %j = %l to %u step %s iter_args(%a6 = a0, %a7 = %e, %a7 = r)
-/// -> (tensor<10xf32>, tensor<?xf32>, vector<4xf32>) {
-/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
-/// scf.yield %a6, %a7, %u: tensor<10xf32>, tensor<?xf32>, vector<4xf32>
-/// }
-/// %w = vector.transfer_write %1#2, %1#1[%c0] : vector<4xf32>, tensor<?xf32>
-/// %st = tensor.insert_slice %w into %1#0[%i][%sz][1]
-/// : tensor<?xf32> into tensor<10xf32>
-/// scf.yield %1: tensor<10xf32>
-/// }
-/// ```
-///
-/// It can then canonicalize to:
-///
-/// ```
-/// %0 = scf.for %i = %l to %u step %s iter_args(%a0 = %t0) -> (tensor<10xf32>){
-/// %e = tensor.extract_slice %a0[%i][%sz][1]: tensor<10xf32> to tensor<?xf32>
-/// %r = vector.transfer_read %a7[%c0], %cst: tensor<?xf32>, vector<4xf32>
-/// %1 = scf.for %j = %l to %u step %s iter_args(%a7 = r)
-/// -> (tensor<10xf32>, tensor<?xf32>, vector<4xf32>) {
-/// %u = "some_use"(%r) : (vector<4xf32>) -> vector<4xf32>
-/// scf.yield %u: vector<4xf32>
-/// }
-/// %w = vector.transfer_write %1, %e[%c0] : vector<4xf32>, tensor<?xf32>
-/// %st = tensor.insert_slice %w into %a0[%i][%sz][1]
-/// : tensor<?xf32> into tensor<10xf32>
-/// scf.yield %1: tensor<10xf32>
-/// }
-/// ```
-///
-// TODO: This should be further generalized along a few different axes:
-// - Other loops than scf.ForOp that operate on tensors (both sequential and
-// parallel loops).
-// - Other subset extract/insert pairs than tensor.extract/insert_slice and
-// vector.transfer_read/write.
-// - More general areSubsetDisjoint analysis/interface to work across all
-// subset op types and allow bypassing non-WAW-conflicting operations in
-// more cases.
-scf::ForOp hoistRedundantSubsetExtractInsert(RewriterBase &rewriter,
- scf::ForOp forOp);
-
-/// Call into `hoistRedundantSubsetInsertExtract` without a RewriterBase.
-// TODO: obsolete and should be retired
-void hoistRedundantVectorTransfersOnTensor(func::FuncOp func);
-
 } // namespace linalg
 } // namespace mlir
 
 
@@ -27,7 +27,6 @@ add_mlir_dialect_library(MLIRLinalgTransforms
  Specialize.cpp
  Split.cpp
  SplitReduction.cpp
- SubsetHoisting.cpp
  SubsetInsertionOpInterfaceImpl.cpp
  SwapExtractSliceWithFillPatterns.cpp
  Tiling.cpp
 
@@ -25,6 +25,7 @@
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Interfaces/DestinationStyleOpInterface.h"
+#include "mlir/Transforms/LoopInvariantCodeMotionUtils.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/Debug.h"
 
@@ -292,8 +293,8 @@ void HoistPaddingAnalysis::enableHoistPadding(RewriterBase &rewriter) {
  // enclosing loop, try to apply hoisting on this outermost loop.
  // TODO: we may want finer-grained hoisting of only that particular `sliceOp`.
  if (!outermostEnclosingForOp.isDefinedOutsideOfLoop(sliceOp.getSource())) {
- outermostEnclosingForOp =
- hoistRedundantSubsetExtractInsert(rewriter, outermostEnclosingForOp);
+ outermostEnclosingForOp = cast<scf::ForOp>(
+ hoistLoopInvariantSubsets(rewriter, outermostEnclosingForOp));
  }
 }
 
 
@@ -43,15 +43,6 @@ using llvm::dbgs;
 using namespace mlir;
 using namespace mlir::linalg;
 
-void mlir::linalg::hoistRedundantVectorTransfersOnTensor(func::FuncOp func) {
- IRRewriter rewriter(func->getContext());
- // TODO: walking in some reverse / inside-out order would be more efficient
- // and would capture more cases.
- func.walk([&](scf::ForOp forOp) {
- hoistRedundantSubsetExtractInsert(rewriter, forOp);
- });
-}
-
 static bool noAliasingUseInLoop(vector::TransferReadOp transferRead,
  LoopLikeOpInterface loop) {
  Value source = transferRead.getSource();