[mlir][sparse] add GPU num threads to sparsifier options (#189078)
This change adds a `gpu-num-threads` option to the sparsifier. This allows users to specify the number of threads used for GPU codegen, similar to the `num-threads` option in the `-sparse-gpu-codegen` pass.
This commit is contained in:
parent
24b6ee90c1
commit
fbf484009c
@ -151,6 +151,13 @@ struct SparsifierOptions : public PassPipelineOptions<SparsifierOptions> {
|
||||
desc("Enables GPU acceleration by means of direct library calls (like "
|
||||
"cuSPARSE)")};
|
||||
|
||||
/// This option is used to specify the number of threads of GPU codegen.
|
||||
PassOptions::Option<unsigned> gpuNumThreads{
|
||||
*this, "gpu-num-threads",
|
||||
desc("Number of threads for GPU codegen. Setting this to 0 enables "
|
||||
"direct library calls instead."),
|
||||
init(1024)};
|
||||
|
||||
/// Projects out the options for `createSparsificationPass`.
|
||||
SparsificationOptions sparsificationOptions() const {
|
||||
return SparsificationOptions(parallelization, emitStrategy,
|
||||
|
||||
@ -53,7 +53,8 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm,
|
||||
// GPU code generation.
|
||||
const bool gpuCodegen = options.gpuTriple.hasValue();
|
||||
if (gpuCodegen) {
|
||||
pm.addPass(createSparseGPUCodegenPass());
|
||||
pm.addPass(createSparseGPUCodegenPass(options.gpuNumThreads,
|
||||
options.enableRuntimeLibrary));
|
||||
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
|
||||
pm.addNestedPass<gpu::GPUModuleOp>(createSCFToControlFlowPass());
|
||||
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user