diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td index f6ae871eb993..63fc8476c08d 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td @@ -249,7 +249,7 @@ def OpenACC_ParWidthOp }]; let arguments = (ins Optional:$launchArg, OpenACC_GPUParallelDimAttr:$par_dim); - let results = (outs OpenACC_ParWidthType:$output); + let results = (outs Index:$output); let assemblyFormat = [{ ($launchArg^)? attr-dict }]; @@ -284,10 +284,10 @@ def OpenACC_ComputeRegionOp The operation is `IsolatedFromAbove`: all values used inside the region must be explicitly captured. Values are captured in two ways: - - Launch arguments (`launch`): Results of operations that define - the parallel launch configuration. These are `!acc.par_width`-typed - and become block arguments representing the parallel width for each - dimension. + - Launch arguments (`launch`): Results of `acc.par_width` + operations that define the parallel launch configuration. These + become `index`-typed block arguments representing the parallel + width for each dimension. - Input arguments (`ins`): Arbitrary values captured from outside the region (data pointers, scalars, etc.). These become block @@ -316,7 +316,7 @@ def OpenACC_ComputeRegionOp ``` }]; - let arguments = (ins Variadic:$launchArgs, + let arguments = (ins Variadic:$launchArgs, Variadic:$inputArgs, Optional:$stream, StrAttr:$origin, diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td index bba385e69c0f..117272693d62 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td @@ -33,12 +33,4 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> { }]; } -def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> { - let summary = "parallel width token type"; - let description = [{ - Represents a type that is consumed by a compute region in order to - capture its parallelism dimensions arguments. - }]; -} - #endif // OPENACC_OPS_TYPES diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h index f72d08085874..bb132f5d02e8 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCUtilsCG.h @@ -38,7 +38,10 @@ std::optional getDataLayout(Operation *op, /// /// Creates a new `acc.compute_region` with the given launch arguments and /// origin string, then clones the operations from `regionToClone` into its -/// body. Multi-block regions are wrapped with `scf.execute_region`. +/// body. Launch operands should be `acc.par_width` results (`index`); the +/// region entry block gets matching `index` block arguments first, then +/// arguments for each `ins` operand. Multi-block regions are wrapped with +/// `scf.execute_region`. /// /// The `mapping` is used and updated during cloning, allowing callers to /// track value correspondences. Optional `output`, `kernelFuncName`, diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp index 7b1cfaa04880..04f8c848c728 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp @@ -455,6 +455,11 @@ BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) { } LogicalResult ComputeRegionOp::verify() { + for (auto op : getLaunchArgs()) + if (!op.getDefiningOp()) + return emitOpError( + "launch arguments must be results of acc.par_width operations"); + unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size(); unsigned actualBlockArgs = getRegion().front().getNumArguments(); if (expectedBlockArgs != actualBlockArgs) @@ -531,9 +536,9 @@ ParseResult ComputeRegionOp::parse(OpAsmParser &parser, if (succeeded(parser.parseOptionalKeyword("launch"))) { if (parser.parseAssignmentList(regionArgs, launchOperands)) return failure(); - auto parWidthType = acc::ParWidthType::get(builder.getContext()); + Type indexType = builder.getIndexType(); for (size_t i = 0; i < regionArgs.size(); ++i) - types.push_back(parWidthType); + types.push_back(indexType); } if (succeeded(parser.parseOptionalKeyword("ins"))) { diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp index db504afafc22..e0b0acff57ca 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCComputeLowering.cpp @@ -25,7 +25,9 @@ // 1. Compute constructs: acc.parallel, acc.serial, and acc.kernels are // replaced by acc.kernel_environment containing a single acc.compute_region. // Launch arguments (num_gangs, num_workers, vector_length) become -// acc.par_width ops and are passed as compute_region launch operands. +// acc.par_width ops (each result is `index`) and are passed as +// compute_region launch operands (still required to be acc.par_width +// results by the compute_region verifier). // // 2. acc.loop: Converted according to context and attributes: // - Unstructured: body wrapped in scf.execute_region. diff --git a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp index c5fa50642cad..d18522c3c440 100644 --- a/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp +++ b/mlir/lib/Dialect/OpenACC/Utils/OpenACCUtilsCG.cpp @@ -78,10 +78,10 @@ ComputeRegionOp buildComputeRegion(Location loc, ValueRange launchArgs, assert(mapKeys.size() == inputArgs.size() && "inputArgsToMap must have same size as inputArgs when provided"); - auto parWidthType = ParWidthType::get(rewriter.getContext()); + Type indexType = rewriter.getIndexType(); Block *entryBlock = rewriter.createBlock(&computeRegion.getRegion()); for (size_t i = 0; i < launchArgs.size(); ++i) - entryBlock->addArgument(parWidthType, loc); + entryBlock->addArgument(indexType, loc); for (Value input : inputArgs) entryBlock->addArgument(input.getType(), loc); for (size_t i = 0; i < inputArgs.size(); ++i) diff --git a/mlir/test/Dialect/OpenACC/invalid-cg.mlir b/mlir/test/Dialect/OpenACC/invalid-cg.mlir index f788e6c03bcc..d218bc505a5e 100644 --- a/mlir/test/Dialect/OpenACC/invalid-cg.mlir +++ b/mlir/test/Dialect/OpenACC/invalid-cg.mlir @@ -22,9 +22,8 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) { // ----- -// expected-note@+1 {{prior use here}} %c32 = arith.constant 32 : index -// expected-error@+1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}} +// expected-error@+1 {{'acc.compute_region' op launch arguments must be results of acc.par_width operations}} acc.compute_region launch(%arg0 = %c32) { acc.yield } {origin = "acc.parallel"} @@ -38,4 +37,4 @@ acc.compute_region launch(%arg0 = %c32) { "acc.compute_region"(%w) <{operandSegmentSizes = array}> ({ ^bb0(%arg0: index, %extra: index): "acc.yield"() : () -> () -}) {origin = "acc.parallel"} : (!acc.par_width) -> () +}) {origin = "acc.parallel"} : (index) -> () diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp index 2940145e40c7..e7e5974ed5c7 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCUtilsCGTest.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/IRMapping.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/OwningOpRef.h" @@ -145,6 +146,10 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) { EXPECT_EQ(cr.getOrigin(), ParallelOp::getOperationName()); EXPECT_EQ(cr.getLaunchArgs().size(), 1u); EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult()); + EXPECT_TRUE(llvm::isa(pw.getResult().getType())); + ASSERT_FALSE(cr.getRegion().empty()); + EXPECT_TRUE( + llvm::isa(cr.getRegion().front().getArgument(0).getType())); func::ReturnOp::create(rewriter, loc); }