[mlir][NVVM] Add no-rollback option to NVVM lowering passes (#168477)
Add pass options to run lowerings to NVVM without pattern rollback. This makes the dialect conversions easier to debug and improves performance/memory usage.
This commit is contained in:
parent
35a95fe9e9
commit
951ab04d6c
@ -628,6 +628,8 @@ def ConvertGpuOpsToNVVMOps : Pass<"convert-gpu-to-nvvm", "gpu::GPUModuleOp"> {
|
||||
/*default=*/"false",
|
||||
"Replace memref arguments in GPU functions with bare pointers. "
|
||||
"All memrefs must have static shape.">,
|
||||
Option<"allowPatternRollback", "allow-pattern-rollback", "bool", "true",
|
||||
"Experimental performance flag to disallow pattern rollback">,
|
||||
ListOption<"allowedDialects", "allowed-dialects", "std::string",
|
||||
"Run conversion patterns of only the specified dialects">,
|
||||
];
|
||||
|
||||
@ -58,6 +58,10 @@ struct GPUToNVVMPipelineOptions
|
||||
"Whether to use the bareptr calling convention on the host (warning "
|
||||
"this should be false until the GPU layering is fixed)"),
|
||||
llvm::cl::init(false)};
|
||||
PassOptions::Option<bool> allowPatternRollback{
|
||||
*this, "allow-pattern-rollback",
|
||||
llvm::cl::desc("Allow pattern rollback during dialect conversion"),
|
||||
llvm::cl::init(true)};
|
||||
};
|
||||
|
||||
// Options for the gpu to xevm pipeline.
|
||||
|
||||
@ -419,7 +419,10 @@ struct LowerGpuOpsToNVVMOpsPass final
|
||||
if (this->hasRedux)
|
||||
populateGpuSubgroupReduceOpLoweringPattern(converter, llvmPatterns);
|
||||
configureGpuToNVVMConversionLegality(target);
|
||||
if (failed(applyPartialConversion(m, target, std::move(llvmPatterns))))
|
||||
ConversionConfig config;
|
||||
config.allowPatternRollback = allowPatternRollback;
|
||||
if (failed(
|
||||
applyPartialConversion(m, target, std::move(llvmPatterns), config)))
|
||||
signalPassFailure();
|
||||
}
|
||||
};
|
||||
|
||||
@ -72,6 +72,7 @@ void buildGpuPassPipeline(OpPassManager &pm,
|
||||
ConvertGpuOpsToNVVMOpsOptions opt;
|
||||
opt.useBarePtrCallConv = options.kernelUseBarePtrCallConv;
|
||||
opt.indexBitwidth = options.indexBitWidth;
|
||||
opt.allowPatternRollback = options.allowPatternRollback;
|
||||
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps(opt));
|
||||
pm.addNestedPass<gpu::GPUModuleOp>(createCanonicalizerPass());
|
||||
pm.addNestedPass<gpu::GPUModuleOp>(createCSEPass());
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1' -split-input-file | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 allow-pattern-rollback=0' -split-input-file | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 allowed-dialects=func,arith,cf' -split-input-file | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-bare-ptr-memref-call-conv=1' -split-input-file | FileCheck %s --check-prefix=CHECK-BARE
|
||||
// RUN: mlir-opt %s -transform-interpreter | FileCheck %s
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm="allow-pattern-rollback=0" | FileCheck %s
|
||||
// RUN: mlir-opt %s -convert-gpu-to-nvvm='use-bare-ptr-memref-call-conv=1' \
|
||||
// RUN: | FileCheck %s --check-prefix=BARE
|
||||
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
// RUN: mlir-opt --convert-gpu-to-nvvm --split-input-file %s | FileCheck %s
|
||||
// RUN: mlir-opt --convert-gpu-to-nvvm="allow-pattern-rollback=0" --split-input-file %s | FileCheck %s
|
||||
// RUN: mlir-opt --convert-gpu-to-nvvm="index-bitwidth=32" --split-input-file %s | FileCheck --check-prefix=CHECK32 %s
|
||||
|
||||
gpu.module @test_module {
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
// RUN: mlir-opt %s -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: mlir-opt %s -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8'" -debug-only=serialize-to-binary \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-chip=sm_80 ptxas-cmd-options='-v --register-usage-level=8' allow-pattern-rollback=0" -debug-only=serialize-to-binary \
|
||||
// RUN: 2>&1 | FileCheck %s
|
||||
|
||||
func.func @host_function(%arg0 : f32, %arg1 : memref<?xf32>) {
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
// increment a global atomic counter and wait for the counter to reach 2.
|
||||
//
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | env CUDA_MODULE_LOADING=EAGER mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=serialize-to-isa \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="allow-pattern-rollback=0" -debug-only=serialize-to-isa \
|
||||
// RUN: 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK-LABEL: Generated by LLVM NVPTX Back-End
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline -debug-only=dump-sass \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="allow-pattern-rollback=0" -debug-only=dump-sass \
|
||||
// RUN: 2>&1 | FileCheck %s
|
||||
|
||||
// CHECK: MOV
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
// RUN: mlir-opt %s \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \
|
||||
// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format allow-pattern-rollback=0" \
|
||||
// RUN: | mlir-runner \
|
||||
// RUN: --shared-libs=%mlir_cuda_runtime \
|
||||
// RUN: --shared-libs=%mlir_runner_utils \
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user