diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h index 2e76985e92e1..6bb46299738d 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h @@ -151,6 +151,13 @@ struct SparsifierOptions : public PassPipelineOptions { desc("Enables GPU acceleration by means of direct library calls (like " "cuSPARSE)")}; + /// This option is used to specify the number of threads of GPU codegen. + PassOptions::Option gpuNumThreads{ + *this, "gpu-num-threads", + desc("Number of threads for GPU codegen. Setting this to 0 enables " + "direct library calls instead."), + init(1024)}; + /// Projects out the options for `createSparsificationPass`. SparsificationOptions sparsificationOptions() const { return SparsificationOptions(parallelization, emitStrategy, diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp index dabbea1bdec6..d85966ec88f9 100644 --- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp @@ -53,7 +53,8 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm, // GPU code generation. const bool gpuCodegen = options.gpuTriple.hasValue(); if (gpuCodegen) { - pm.addPass(createSparseGPUCodegenPass()); + pm.addPass(createSparseGPUCodegenPass(options.gpuNumThreads, + options.enableRuntimeLibrary)); pm.addNestedPass(createStripDebugInfoPass()); pm.addNestedPass(createSCFToControlFlowPass()); pm.addNestedPass(createConvertGpuOpsToNVVMOps());