- Notifications
You must be signed in to change notification settings - Fork 15.3k
Closed
Bug
Copy link
Labels
Description
Hi, I used MLIR to lower a linalg program to gpu dialect, and it crashed unexpectedly. Please see the following example, repro with mlir-opt --convert-parallel-loops-to-gpu .
module { func.func @main() { %c5 = arith.constant 5 : index %c1 = arith.constant 1 : index %c16 = arith.constant 16 : index %c32 = arith.constant 32 : index %c4 = arith.constant 4 : index %c124 = arith.constant 124 : index %c2 = arith.constant 2 : index %c8 = arith.constant 8 : index %c0 = arith.constant 0 : index %alloc = memref.alloc() : memref<8x128x128x4xf32> %alloc_0 = memref.alloc() : memref<32x5x5x4xf32> %alloc_1 = memref.alloc() : memref<8x124x124x32xf32> scf.parallel (%arg0, %arg1, %arg2, %arg3) = (%c0, %c0, %c0, %c0) to (%c8, %c124, %c124, %c32) step (%c2, %c4, %c8, %c16) { %0 = arith.addi %arg0, %c2 : index scf.parallel (%arg4) = (%arg0) to (%0) step (%c1) { %1 = arith.addi %arg1, %c4 : index scf.parallel (%arg5) = (%arg1) to (%1) step (%c1) { %2 = arith.addi %arg2, %c8 : index %3 = arith.minsi %2, %c124 : index scf.parallel (%arg6) = (%arg2) to (%3) step (%c1) { %4 = arith.addi %arg3, %c16 : index scf.parallel (%arg7) = (%arg3) to (%4) step (%c1) { scf.for %arg8 = %c0 to %c5 step %c1 { scf.for %arg9 = %c0 to %c5 step %c1 { scf.for %arg10 = %c0 to %c4 step %c1 { %5 = arith.addi %arg5, %arg8 : index %6 = arith.addi %arg6, %arg9 : index %7 = memref.load %alloc[%arg4, %5, %6, %arg10] : memref<8x128x128x4xf32> %8 = memref.load %alloc_0[%arg7, %arg8, %arg9, %arg10] : memref<32x5x5x4xf32> %9 = memref.load %alloc_1[%arg4, %arg5, %arg6, %arg7] : memref<8x124x124x32xf32> %10 = arith.mulf %7, %8 : f32 %11 = arith.addf %9, %10 : f32 memref.store %11, %alloc_1[%arg4, %arg5, %arg6, %arg7] : memref<8x124x124x32xf32> } } } scf.reduce } {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} scf.reduce } {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} scf.reduce } {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} scf.reduce } {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>]} scf.reduce } {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>, #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, #gpu.loop_dim_map<processor = block_z, map = (d0) -> (d0), bound = (d0) -> (d0)>, #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} return } } Original test case starts from linalg:
func.func @main() { %in_buf = memref.alloc() : memref<8x128x128x4xf32> %filter_buf = memref.alloc() : memref<32x5x5x4xf32> %out_buf = memref.alloc() : memref<8x124x124x32xf32> linalg.generic { indexing_maps = [ affine_map<(n, oh, ow, oc, kh, kw, ic) -> (n, oh + kh, ow + kw, ic)>, affine_map<(n, oh, ow, oc, kh, kw, ic) -> (oc, kh, kw, ic)>, affine_map<(n, oh, ow, oc, kh, kw, ic) -> (n, oh, ow, oc)> ], iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction"] } ins(%in_buf, %filter_buf : memref<8x128x128x4xf32>, memref<32x5x5x4xf32>) outs(%out_buf : memref<8x124x124x32xf32>) { ^bb0(%in: f32, %filt: f32, %out: f32): %prod = arith.mulf %in, %filt : f32 %sum = arith.addf %out, %prod : f32 linalg.yield %sum : f32 } return }Crash Backtrace
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace and instructions to reproduce the bug. Stack dump: 0. Program arguments: mlir-opt --convert-linalg-to-affine-loops --affine-loop-tile=tile-sizes=2,4,8,16,32,64 --affine-parallelize --lower-affine --canonicalize --gpu-map-parallel-loops --convert-parallel-loops-to-gpu main.mlir #0 0x0000564e5b293508 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1b3f508) #1 0x0000564e5b290c35 llvm::sys::RunSignalHandlers() (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1b3cc35) #2 0x0000564e5b294611 SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0 #3 0x00007f5475e45330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330) #4 0x0000564e6280d8f3 mlir::detail::OperandStorage::OperandStorage(mlir::Operation*, mlir::OpOperand*, mlir::ValueRange) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x90b98f3) #5 0x0000564e627ff88c mlir::Operation::create(mlir::Location, mlir::OperationName, mlir::TypeRange, mlir::ValueRange, mlir::DictionaryAttr, mlir::OpaqueProperties, mlir::BlockRange, unsigned int) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x90ab88c) #6 0x0000564e627ff157 mlir::Operation::create(mlir::Location, mlir::OperationName, mlir::TypeRange, mlir::ValueRange, mlir::NamedAttrList&&, mlir::OpaqueProperties, mlir::BlockRange, mlir::RegionRange) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x90ab157) #7 0x0000564e627ff014 mlir::Operation::create(mlir::OperationState const&) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x90ab014) #8 0x0000564e627418f0 mlir::OpBuilder::create(mlir::OperationState const&) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8fed8f0) #9 0x0000564e620f02e2 mlir::affine::AffineApplyOp::create(mlir::OpBuilder&, mlir::Location, mlir::AffineMap, mlir::ValueRange) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x899c2e2) #10 0x0000564e5cc2d861 processParallelLoop(mlir::scf::ParallelOp, mlir::gpu::LaunchOp, mlir::IRMapping&, llvm::SmallVectorImpl<mlir::Operation*>&, llvm::DenseMap<mlir::gpu::Processor, mlir::Value, llvm::DenseMapInfo<mlir::gpu::Processor, void>, llvm::detail::DenseMapPair<mlir::gpu::Processor, mlir::Value>>&, mlir::PatternRewriter&) SCFToGPU.cpp:0:0 #11 0x0000564e5cc2c8c3 (anonymous namespace)::ParallelToGpuLaunchLowering::matchAndRewrite(mlir::scf::ParallelOp, mlir::PatternRewriter&) const SCFToGPU.cpp:0:0 #12 0x0000564e624a739e void llvm::function_ref<void ()>::callback_fn<mlir::PatternApplicator::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&, llvm::function_ref<bool (mlir::Pattern const&)>, llvm::function_ref<void (mlir::Pattern const&)>, llvm::function_ref<llvm::LogicalResult (mlir::Pattern const&)>)::$_0>(long) PatternApplicator.cpp:0:0 #13 0x0000564e624a38eb mlir::PatternApplicator::matchAndRewrite(mlir::Operation*, mlir::PatternRewriter&, llvm::function_ref<bool (mlir::Pattern const&)>, llvm::function_ref<void (mlir::Pattern const&)>, llvm::function_ref<llvm::LogicalResult (mlir::Pattern const&)>) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8d4f8eb) #14 0x0000564e6245e927 (anonymous namespace)::OperationLegalizer::legalize(mlir::Operation*) DialectConversion.cpp:0:0 #15 0x0000564e6245dc6d mlir::OperationConverter::convert(mlir::Operation*) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8d09c6d) #16 0x0000564e6245ecdc mlir::OperationConverter::convertOperations(llvm::ArrayRef<mlir::Operation*>) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8d0acdc) #17 0x0000564e6246ddd4 void llvm::function_ref<void ()>::callback_fn<applyConversion(llvm::ArrayRef<mlir::Operation*>, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, mlir::ConversionConfig, (anonymous namespace)::OpConversionMode)::$_0>(long) DialectConversion.cpp:0:0 #18 0x0000564e624645df applyConversion(llvm::ArrayRef<mlir::Operation*>, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, mlir::ConversionConfig, (anonymous namespace)::OpConversionMode) DialectConversion.cpp:0:0 #19 0x0000564e624646bb mlir::applyPartialConversion(mlir::Operation*, mlir::ConversionTarget const&, mlir::FrozenRewritePatternSet const&, mlir::ConversionConfig) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8d106bb) #20 0x0000564e5cc29c19 (anonymous namespace)::ParallelLoopToGpuPass::runOnOperation() SCFToGPUPass.cpp:0:0 #21 0x0000564e62502bc1 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8daebc1) #22 0x0000564e62503c07 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8dafc07) #23 0x0000564e6250b1a3 mlir::PassManager::runPasses(mlir::Operation*, mlir::AnalysisManager) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8db71a3) #24 0x0000564e6250a7b2 mlir::PassManager::run(mlir::Operation*) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x8db67b2) #25 0x0000564e5b338603 performActions(llvm::raw_ostream&, std::shared_ptr<llvm::SourceMgr> const&, mlir::MLIRContext*, mlir::MlirOptMainConfig const&) MlirOptMain.cpp:0:0 #26 0x0000564e5b337864 llvm::LogicalResult llvm::function_ref<llvm::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::MemoryBufferRef const&, llvm::raw_ostream&)>::callback_fn<mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::DialectRegistry&, mlir::MlirOptMainConfig const&)::$_0>(long, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::MemoryBufferRef const&, llvm::raw_ostream&) MlirOptMain.cpp:0:0 #27 0x0000564e6283f928 mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<llvm::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::MemoryBufferRef const&, llvm::raw_ostream&)>, llvm::raw_ostream&, llvm::StringRef, llvm::StringRef) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x90eb928) #28 0x0000564e5b32cdac mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::DialectRegistry&, mlir::MlirOptMainConfig const&) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1bd8dac) #29 0x0000564e5b32d0e6 mlir::MlirOptMain(int, char**, llvm::StringRef, llvm::StringRef, mlir::DialectRegistry&) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1bd90e6) #30 0x0000564e5b32d2f2 mlir::MlirOptMain(int, char**, llvm::StringRef, mlir::DialectRegistry&) (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1bd92f2) #31 0x0000564e5b27b24f main (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1b2724f) #32 0x00007f5475e2a1ca __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3 #33 0x00007f5475e2a28b call_init ./csu/../csu/libc-start.c:128:20 #34 0x00007f5475e2a28b __libc_start_main ./csu/../csu/libc-start.c:347:5 #35 0x0000564e5b27ad75 _start (/mnt/raid/home/rainyday/bin/mlir/bin/mlir-opt+0x1b26d75) Segmentation faultUsed MLIR command
mlir-opt --convert-linalg-to-affine-loops \ --affine-loop-tile="tile-sizes=2,4,8,16,32,64" \ --affine-parallelize \ --lower-affine \ --canonicalize \ --gpu-map-parallel-loops \ --convert-parallel-loops-to-gpu \ main.mlirMy mlir-opt version is:
LLVM (http://llvm.org/): LLVM version 22.0.0git Optimized build with assertions.