[mlir][sparse] add GPU num threads to sparsifier options (#189078)

This change adds a `gpu-num-threads` option to the sparsifier. This
allows users to specify the number of threads used for GPU codegen,
similar to the `num-threads` option in the `-sparse-gpu-codegen` pass.
This commit is contained in:
Vito Secona 2026-04-02 00:42:26 +07:00 committed by GitHub
parent 24b6ee90c1
commit fbf484009c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 9 additions and 1 deletions

View File

@ -151,6 +151,13 @@ struct SparsifierOptions : public PassPipelineOptions<SparsifierOptions> {
desc("Enables GPU acceleration by means of direct library calls (like "
"cuSPARSE)")};
/// This option is used to specify the number of threads of GPU codegen.
PassOptions::Option<unsigned> gpuNumThreads{
*this, "gpu-num-threads",
desc("Number of threads for GPU codegen. Setting this to 0 enables "
"direct library calls instead."),
init(1024)};
/// Projects out the options for `createSparsificationPass`.
SparsificationOptions sparsificationOptions() const {
return SparsificationOptions(parallelization, emitStrategy,

View File

@ -53,7 +53,8 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm,
// GPU code generation.
const bool gpuCodegen = options.gpuTriple.hasValue();
if (gpuCodegen) {
pm.addPass(createSparseGPUCodegenPass());
pm.addPass(createSparseGPUCodegenPass(options.gpuNumThreads,
options.enableRuntimeLibrary));
pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
pm.addNestedPass<gpu::GPUModuleOp>(createSCFToControlFlowPass());
pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());