[mlir][acc] Use index for acc.par_width results (#187734)
When acc.par_width was introduced in https://github.com/llvm/llvm-project/pull/184864 there was a discussion on whether to use index or create a new type for the output of the operation. It was decided to create a new type; but this means that launch arguments cannot be used directly in the region such as for loop bounds without a conversion from the new type to index. In order to avoid the casting operations (and introduction of an actual operation to do this cast), simply restore acc.par_width to generate index type. This allows its result to be directly used in acc.compute_region.
This commit is contained in:
parent
65d84ea127
commit
5717524c02
@ -249,7 +249,7 @@ def OpenACC_ParWidthOp
|
||||
}];
|
||||
let arguments = (ins Optional<Index>:$launchArg,
|
||||
OpenACC_GPUParallelDimAttr:$par_dim);
|
||||
let results = (outs OpenACC_ParWidthType:$output);
|
||||
let results = (outs Index:$output);
|
||||
let assemblyFormat = [{
|
||||
($launchArg^)? attr-dict
|
||||
}];
|
||||
@ -284,10 +284,10 @@ def OpenACC_ComputeRegionOp
|
||||
The operation is `IsolatedFromAbove`: all values used inside the
|
||||
region must be explicitly captured. Values are captured in two ways:
|
||||
|
||||
- Launch arguments (`launch`): Results of operations that define
|
||||
the parallel launch configuration. These are `!acc.par_width`-typed
|
||||
and become block arguments representing the parallel width for each
|
||||
dimension.
|
||||
- Launch arguments (`launch`): Results of `acc.par_width`
|
||||
operations that define the parallel launch configuration. These
|
||||
become `index`-typed block arguments representing the parallel
|
||||
width for each dimension.
|
||||
|
||||
- Input arguments (`ins`): Arbitrary values captured from outside
|
||||
the region (data pointers, scalars, etc.). These become block
|
||||
@ -316,7 +316,7 @@ def OpenACC_ComputeRegionOp
|
||||
```
|
||||
}];
|
||||
|
||||
let arguments = (ins Variadic<OpenACC_ParWidthType>:$launchArgs,
|
||||
let arguments = (ins Variadic<Index>:$launchArgs,
|
||||
Variadic<AnyType>:$inputArgs,
|
||||
Optional<OpenACC_GPUAsyncTokenType>:$stream,
|
||||
StrAttr:$origin,
|
||||
|
||||
@ -33,12 +33,4 @@ def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
|
||||
}];
|
||||
}
|
||||
|
||||
def OpenACC_ParWidthType : OpenACC_Type<"ParWidth", "par_width"> {
|
||||
let summary = "parallel width token type";
|
||||
let description = [{
|
||||
Represents a type that is consumed by a compute region in order to
|
||||
capture its parallelism dimensions arguments.
|
||||
}];
|
||||
}
|
||||
|
||||
#endif // OPENACC_OPS_TYPES
|
||||
|
||||
@ -38,7 +38,10 @@ std::optional<DataLayout> getDataLayout(Operation *op,
|
||||
///
|
||||
/// Creates a new `acc.compute_region` with the given launch arguments and
|
||||
/// origin string, then clones the operations from `regionToClone` into its
|
||||
/// body. Multi-block regions are wrapped with `scf.execute_region`.
|
||||
/// body. Launch operands should be `acc.par_width` results (`index`); the
|
||||
/// region entry block gets matching `index` block arguments first, then
|
||||
/// arguments for each `ins` operand. Multi-block regions are wrapped with
|
||||
/// `scf.execute_region`.
|
||||
///
|
||||
/// The `mapping` is used and updated during cloning, allowing callers to
|
||||
/// track value correspondences. Optional `output`, `kernelFuncName`,
|
||||
|
||||
@ -455,6 +455,11 @@ BlockArgument ComputeRegionOp::gpuParWidth(gpu::Processor processor) {
|
||||
}
|
||||
|
||||
LogicalResult ComputeRegionOp::verify() {
|
||||
for (auto op : getLaunchArgs())
|
||||
if (!op.getDefiningOp<acc::ParWidthOp>())
|
||||
return emitOpError(
|
||||
"launch arguments must be results of acc.par_width operations");
|
||||
|
||||
unsigned expectedBlockArgs = getLaunchArgs().size() + getInputArgs().size();
|
||||
unsigned actualBlockArgs = getRegion().front().getNumArguments();
|
||||
if (expectedBlockArgs != actualBlockArgs)
|
||||
@ -531,9 +536,9 @@ ParseResult ComputeRegionOp::parse(OpAsmParser &parser,
|
||||
if (succeeded(parser.parseOptionalKeyword("launch"))) {
|
||||
if (parser.parseAssignmentList(regionArgs, launchOperands))
|
||||
return failure();
|
||||
auto parWidthType = acc::ParWidthType::get(builder.getContext());
|
||||
Type indexType = builder.getIndexType();
|
||||
for (size_t i = 0; i < regionArgs.size(); ++i)
|
||||
types.push_back(parWidthType);
|
||||
types.push_back(indexType);
|
||||
}
|
||||
|
||||
if (succeeded(parser.parseOptionalKeyword("ins"))) {
|
||||
|
||||
@ -25,7 +25,9 @@
|
||||
// 1. Compute constructs: acc.parallel, acc.serial, and acc.kernels are
|
||||
// replaced by acc.kernel_environment containing a single acc.compute_region.
|
||||
// Launch arguments (num_gangs, num_workers, vector_length) become
|
||||
// acc.par_width ops and are passed as compute_region launch operands.
|
||||
// acc.par_width ops (each result is `index`) and are passed as
|
||||
// compute_region launch operands (still required to be acc.par_width
|
||||
// results by the compute_region verifier).
|
||||
//
|
||||
// 2. acc.loop: Converted according to context and attributes:
|
||||
// - Unstructured: body wrapped in scf.execute_region.
|
||||
|
||||
@ -78,10 +78,10 @@ ComputeRegionOp buildComputeRegion(Location loc, ValueRange launchArgs,
|
||||
assert(mapKeys.size() == inputArgs.size() &&
|
||||
"inputArgsToMap must have same size as inputArgs when provided");
|
||||
|
||||
auto parWidthType = ParWidthType::get(rewriter.getContext());
|
||||
Type indexType = rewriter.getIndexType();
|
||||
Block *entryBlock = rewriter.createBlock(&computeRegion.getRegion());
|
||||
for (size_t i = 0; i < launchArgs.size(); ++i)
|
||||
entryBlock->addArgument(parWidthType, loc);
|
||||
entryBlock->addArgument(indexType, loc);
|
||||
for (Value input : inputArgs)
|
||||
entryBlock->addArgument(input.getType(), loc);
|
||||
for (size_t i = 0; i < inputArgs.size(); ++i)
|
||||
|
||||
@ -22,9 +22,8 @@ scf.parallel (%iv) = (%c0_2) to (%c4_2) step (%c1_2) {
|
||||
|
||||
// -----
|
||||
|
||||
// expected-note@+1 {{prior use here}}
|
||||
%c32 = arith.constant 32 : index
|
||||
// expected-error@+1 {{use of value '%c32' expects different type than prior uses: '!acc.par_width' vs 'index'}}
|
||||
// expected-error@+1 {{'acc.compute_region' op launch arguments must be results of acc.par_width operations}}
|
||||
acc.compute_region launch(%arg0 = %c32) {
|
||||
acc.yield
|
||||
} {origin = "acc.parallel"}
|
||||
@ -38,4 +37,4 @@ acc.compute_region launch(%arg0 = %c32) {
|
||||
"acc.compute_region"(%w) <{operandSegmentSizes = array<i32: 1, 0, 0>}> ({
|
||||
^bb0(%arg0: index, %extra: index):
|
||||
"acc.yield"() : () -> ()
|
||||
}) {origin = "acc.parallel"} : (!acc.par_width) -> ()
|
||||
}) {origin = "acc.parallel"} : (index) -> ()
|
||||
|
||||
@ -14,6 +14,7 @@
|
||||
#include "mlir/Dialect/OpenACC/OpenACC.h"
|
||||
#include "mlir/Dialect/SCF/IR/SCF.h"
|
||||
#include "mlir/IR/BuiltinOps.h"
|
||||
#include "mlir/IR/BuiltinTypes.h"
|
||||
#include "mlir/IR/IRMapping.h"
|
||||
#include "mlir/IR/MLIRContext.h"
|
||||
#include "mlir/IR/OwningOpRef.h"
|
||||
@ -145,6 +146,10 @@ TEST_F(OpenACCUtilsCGTest, buildComputeRegionWithLaunchArgs) {
|
||||
EXPECT_EQ(cr.getOrigin(), ParallelOp::getOperationName());
|
||||
EXPECT_EQ(cr.getLaunchArgs().size(), 1u);
|
||||
EXPECT_EQ(cr.getLaunchArgs()[0], pw.getResult());
|
||||
EXPECT_TRUE(llvm::isa<IndexType>(pw.getResult().getType()));
|
||||
ASSERT_FALSE(cr.getRegion().empty());
|
||||
EXPECT_TRUE(
|
||||
llvm::isa<IndexType>(cr.getRegion().front().getArgument(0).getType()));
|
||||
|
||||
func::ReturnOp::create(rewriter, loc);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user