diff --git a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
index 2e76985e92e1..6bb46299738d 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h
@@ -151,6 +151,13 @@ struct SparsifierOptions : public PassPipelineOptions<SparsifierOptions> {
       desc("Enables GPU acceleration by means of direct library calls (like "
            "cuSPARSE)")};
 
+  /// This option is used to specify the number of threads of GPU codegen.
+  PassOptions::Option<unsigned> gpuNumThreads{
+      *this, "gpu-num-threads",
+      desc("Number of threads for GPU codegen. Setting this to 0 enables "
+           "direct library calls instead."),
+      init(1024)};
+
   /// Projects out the options for `createSparsificationPass`.
   SparsificationOptions sparsificationOptions() const {
     return SparsificationOptions(parallelization, emitStrategy,
diff --git a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
index dabbea1bdec6..d85966ec88f9 100644
--- a/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp
@@ -53,7 +53,8 @@ void mlir::sparse_tensor::buildSparsifier(OpPassManager &pm,
   // GPU code generation.
   const bool gpuCodegen = options.gpuTriple.hasValue();
   if (gpuCodegen) {
-    pm.addPass(createSparseGPUCodegenPass());
+    pm.addPass(createSparseGPUCodegenPass(options.gpuNumThreads,
+                                          options.enableRuntimeLibrary));
     pm.addNestedPass<gpu::GPUModuleOp>(createStripDebugInfoPass());
     pm.addNestedPass<gpu::GPUModuleOp>(createSCFToControlFlowPass());
     pm.addNestedPass<gpu::GPUModuleOp>(createConvertGpuOpsToNVVMOps());