[mlir][acc] Use index for acc.par_width results (#187734)

When acc.par_width was introduced in
https://github.com/llvm/llvm-project/pull/184864
there was a discussion on whether to use index or create a new type for
the output of the operation. It was decided to create a new type; but
this means that launch arguments cannot be used directly in the region
such as for loop bounds without a conversion from the new type to index.
In order to avoid the casting operations (and introduction of an actual
operation to do this cast), simply restore acc.par_width to generate
index type. This allows its result to be directly used in
acc.compute_region.
This commit is contained in:
Razvan Lupusoru 2026-03-20 10:36:16 -07:00 committed by GitHub
parent 65d84ea127
commit 5717524c02
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 29 additions and 23 deletions

View File

@ -249,7 +249,7 @@ def OpenACC_ParWidthOp
}];
let arguments = (ins Optional<Index>:$launchArg,
OpenACC_GPUParallelDimAttr:$par_dim);
let results = (outs OpenACC_ParWidthType:$output);
let results = (outs Index:$output);
let assemblyFormat = [{
($launchArg^)? attr-dict
}];
@ -284,10 +284,10 @@ def OpenACC_ComputeRegionOp
The operation is `IsolatedFromAbove`: all values used inside the
region must be explicitly captured. Values are captured in two ways:
- Launch arguments (`launch`): Results of operations that define
the parallel launch configuration. These are `!acc.par_width`-typed
and become block arguments representing the parallel width for each
dimension.
- Launch arguments (`launch`): Results of `acc.par_width`
operations that define the parallel launch configuration. These
become `index`-typed block arguments representing the parallel
width for each dimension.
- Input arguments (`ins`): Arbitrary values captured from outside
the region (data pointers, scalars, etc.). These become block
@ -316,7 +316,7 @@ def OpenACC_ComputeRegionOp
```
}];
let arguments = (ins Variadic<OpenACC_ParWidthType>:$launchArgs,
let arguments = (ins Variadic<Index>:$launchArgs,
Variadic<AnyType>:$inputArgs,
Optional<OpenACC_GPUAsyncTokenType>:$stream,
StrAttr:$origin,

View File

@ -33,12 +33,4 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
}];
}
def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> {
let summary = "parallel width token type";
let description = [{
Represents a type that is consumed by a compute region in order to
capture its parallelism dimensions arguments.
}];
}
#endif // OPENACC_OPS_TYPES

View File

@ -38,7 +38,10 @@ std::optional<DataLayout> getDataLayout(Operation *op,
///
/// Creates a new `acc.compute_region` with the given launch arguments and
/// origin string, then clones the operations from `regionToClone` into its
/// body. Multi-block regions are wrapped with `scf.execute_region`.
/// body. Launch operands should be `acc.par_width` results (`index`); the
/// region entry block gets matching `index` block arguments first, then
/// arguments for each `ins` operand. Multi-block regions are wrapped with
/// `scf.execute_region`.
///
/// The `mapping` is used and updated during cloning, allowing callers to
/// track value correspondences. Optional `output`, `kernelFuncName`,

View File

@ -455,6 +455,11 @@ BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) {
}
LogicalResult ComputeRegionOp::verify() {
for (auto op : getLaunchArgs())
if (!op.getDefiningOp<acc::ParWidthOp>())
return emitOpError(
"launch arguments must be results of acc.par_width operations");
unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size();
unsigned actualBlockArgs = getRegion().front().getNumArguments();
if (expectedBlockArgs != actualBlockArgs)
@ -531,9 +536,9 @@ ParseResult ComputeRegionOp::parse(OpAsmParser &parser,
if (succeeded(parser.parseOptionalKeyword("launch"))) {
if (parser.parseAssignmentList(regionArgs, launchOperands))
return failure();
auto parWidthType = acc::ParWidthType::get(builder.getContext());
Type indexType = builder.getIndexType();
for (size_t i = 0; i < regionArgs.size(); ++i)
types.push_back(parWidthType);
types.push_back(indexType);
}
if (succeeded(parser.parseOptionalKeyword("ins"))) {

View File

@ -25,7 +25,9 @@
// 1. Compute constructs: acc.parallel, acc.serial, and acc.kernels are
// replaced by acc.kernel_environment containing a single acc.compute_region.
// Launch arguments (num_gangs, num_workers, vector_length) become
// acc.par_width ops and are passed as compute_region launch operands.
// acc.par_width ops (each result is `index`) and are passed as
// compute_region launch operands (still required to be acc.par_width
// results by the compute_region verifier).
//
// 2. acc.loop: Converted according to context and attributes:
// - Unstructured: body wrapped in scf.execute_region.

View File

@ -78,10 +78,10 @@ ComputeRegionOp buildComputeRegion(Location loc, ValueRange launchArgs,
assert(mapKeys.size() == inputArgs.size() &&
"inputArgsToMap must have same size as inputArgs when provided");
auto parWidthType = ParWidthType::get(rewriter.getContext());
Type indexType = rewriter.getIndexType();
Block *entryBlock = rewriter.createBlock(&computeRegion.getRegion());
for (size_t i = 0; i < launchArgs.size(); ++i)
entryBlock->addArgument(parWidthType, loc);
entryBlock->addArgument(indexType, loc);
for (Value input : inputArgs)
entryBlock->addArgument(input.getType(), loc);
for (size_t i = 0; i < inputArgs.size(); ++i)

View File

@ -22,9 +22,8 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) {
// -----
// expected-note@+1 {{prior use here}}
%c32 = arith.constant 32 : index
// expected-error@+1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}}
// expected-error@+1 {{'acc.compute_region' op launch arguments must be results of acc.par_width operations}}
acc.compute_region launch(%arg0 = %c32) {
acc.yield
} {origin = "acc.parallel"}
@ -38,4 +37,4 @@ acc.compute_region launch(%arg0 = %c32) {
"acc.compute_region"(%w) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({
^bb0(%arg0: index, %extra: index):
"acc.yield"() : () -> ()
}) {origin = "acc.parallel"} : (!acc.par_width) -> ()
}) {origin = "acc.parallel"} : (index) -> ()

View File

@ -14,6 +14,7 @@
#include "mlir/Dialect/OpenACC/OpenACC.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/IRMapping.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/IR/OwningOpRef.h"
@ -145,6 +146,10 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) {
EXPECT_EQ(cr.getOrigin(), ParallelOp::getOperationName());
EXPECT_EQ(cr.getLaunchArgs().size(), 1u);
EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult());
EXPECT_TRUE(llvm::isa<IndexType>(pw.getResult().getType()));
ASSERT_FALSE(cr.getRegion().empty());
EXPECT_TRUE(
llvm::isa<IndexType>(cr.getRegion().front().getArgument(0).getType()));
func::ReturnOp::create(rewriter, loc);
}