wmma intrinsics have a large number of combinations, ideally we want to be able to target all the different variants. To avoid a combinatorial explosion in the number of mlir op we use attributes to represent the different variation of load/store/mma ops. We also can generate with tablegen helpers to know which combinations are available. Using this we can avoid having too hardcode a path for specific shapes and can support more types. This patch also adds boiler plates for tf32 op support. Differential Revision: https://reviews.llvm.org/D112689
263 lines
11 KiB
C++
263 lines
11 KiB
C++
//===- NVVMDialect.cpp - NVVM IR Ops and Dialect registration -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file defines the types and operation details for the NVVM IR dialect in
|
|
// MLIR, and the LLVM IR dialect. It also registers the dialect.
|
|
//
|
|
// The NVVM dialect only contains GPU specific additions on top of the general
|
|
// LLVM dialect.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
|
|
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/BuiltinTypes.h"
|
|
#include "mlir/IR/MLIRContext.h"
|
|
#include "mlir/IR/Operation.h"
|
|
#include "mlir/IR/OperationSupport.h"
|
|
#include "llvm/AsmParser/Parser.h"
|
|
#include "llvm/IR/Attributes.h"
|
|
#include "llvm/IR/Function.h"
|
|
#include "llvm/IR/Type.h"
|
|
#include "llvm/Support/SourceMgr.h"
|
|
|
|
using namespace mlir;
|
|
using namespace NVVM;
|
|
|
|
#include "mlir/Dialect/LLVMIR/NVVMOpsDialect.cpp.inc"
|
|
#include "mlir/Dialect/LLVMIR/NVVMOpsEnums.cpp.inc"
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Printing/parsing for NVVM ops
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
static void printNVVMIntrinsicOp(OpAsmPrinter &p, Operation *op) {
|
|
p << " " << op->getOperands();
|
|
if (op->getNumResults() > 0)
|
|
p << " : " << op->getResultTypes();
|
|
}
|
|
|
|
// <operation> ::=
|
|
// `llvm.nvvm.shfl.sync.bfly %dst, %val, %offset, %clamp_and_mask`
|
|
// ({return_value_and_is_valid})? : result_type
|
|
static ParseResult parseNVVMShflSyncBflyOp(OpAsmParser &parser,
|
|
OperationState &result) {
|
|
SmallVector<OpAsmParser::OperandType, 8> ops;
|
|
Type resultType;
|
|
if (parser.parseOperandList(ops) ||
|
|
parser.parseOptionalAttrDict(result.attributes) ||
|
|
parser.parseColonType(resultType) ||
|
|
parser.addTypeToList(resultType, result.types))
|
|
return failure();
|
|
|
|
for (auto &attr : result.attributes) {
|
|
if (attr.first != "return_value_and_is_valid")
|
|
continue;
|
|
auto structType = resultType.dyn_cast<LLVM::LLVMStructType>();
|
|
if (structType && !structType.getBody().empty())
|
|
resultType = structType.getBody()[0];
|
|
break;
|
|
}
|
|
|
|
auto int32Ty = IntegerType::get(parser.getContext(), 32);
|
|
return parser.resolveOperands(ops, {int32Ty, resultType, int32Ty, int32Ty},
|
|
parser.getNameLoc(), result.operands);
|
|
}
|
|
|
|
// <operation> ::= `llvm.nvvm.vote.ballot.sync %mask, %pred` : result_type
|
|
static ParseResult parseNVVMVoteBallotOp(OpAsmParser &parser,
|
|
OperationState &result) {
|
|
MLIRContext *context = parser.getContext();
|
|
auto int32Ty = IntegerType::get(context, 32);
|
|
auto int1Ty = IntegerType::get(context, 1);
|
|
|
|
SmallVector<OpAsmParser::OperandType, 8> ops;
|
|
Type type;
|
|
return failure(parser.parseOperandList(ops) ||
|
|
parser.parseOptionalAttrDict(result.attributes) ||
|
|
parser.parseColonType(type) ||
|
|
parser.addTypeToList(type, result.types) ||
|
|
parser.resolveOperands(ops, {int32Ty, int1Ty},
|
|
parser.getNameLoc(), result.operands));
|
|
}
|
|
|
|
static LogicalResult verify(MmaOp op) {
|
|
MLIRContext *context = op.getContext();
|
|
auto f16Ty = Float16Type::get(context);
|
|
auto f16x2Ty = LLVM::getFixedVectorType(f16Ty, 2);
|
|
auto f32Ty = Float32Type::get(context);
|
|
auto f16x2x4StructTy = LLVM::LLVMStructType::getLiteral(
|
|
context, {f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty});
|
|
auto f32x8StructTy = LLVM::LLVMStructType::getLiteral(
|
|
context, {f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty, f32Ty});
|
|
|
|
SmallVector<Type, 12> operandTypes(op.getOperandTypes().begin(),
|
|
op.getOperandTypes().end());
|
|
if (operandTypes != SmallVector<Type, 8>(8, f16x2Ty) &&
|
|
operandTypes != SmallVector<Type, 12>{f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty,
|
|
f32Ty, f32Ty, f32Ty, f32Ty, f32Ty,
|
|
f32Ty, f32Ty, f32Ty}) {
|
|
return op.emitOpError(
|
|
"expected operands to be 4 <halfx2>s followed by either "
|
|
"4 <halfx2>s or 8 floats");
|
|
}
|
|
if (op.getType() != f32x8StructTy && op.getType() != f16x2x4StructTy) {
|
|
return op.emitOpError("expected result type to be a struct of either 4 "
|
|
"<halfx2>s or 8 floats");
|
|
}
|
|
|
|
auto alayout = op->getAttrOfType<StringAttr>("alayout");
|
|
auto blayout = op->getAttrOfType<StringAttr>("blayout");
|
|
|
|
if (!(alayout && blayout) ||
|
|
!(alayout.getValue() == "row" || alayout.getValue() == "col") ||
|
|
!(blayout.getValue() == "row" || blayout.getValue() == "col")) {
|
|
return op.emitOpError(
|
|
"alayout and blayout attributes must be set to either "
|
|
"\"row\" or \"col\"");
|
|
}
|
|
|
|
if (operandTypes == SmallVector<Type, 12>{f16x2Ty, f16x2Ty, f16x2Ty, f16x2Ty,
|
|
f32Ty, f32Ty, f32Ty, f32Ty, f32Ty,
|
|
f32Ty, f32Ty, f32Ty} &&
|
|
op.getType() == f32x8StructTy && alayout.getValue() == "row" &&
|
|
blayout.getValue() == "col") {
|
|
return success();
|
|
}
|
|
return op.emitOpError("unimplemented mma.sync variant");
|
|
}
|
|
|
|
std::pair<mlir::Type, unsigned>
|
|
inferMMAType(NVVM::MMATypes type, NVVM::MMAFrag frag, MLIRContext *context) {
|
|
unsigned numberElements = 0;
|
|
Type elementType;
|
|
OpBuilder builder(context);
|
|
Type f16x2 = VectorType::get(2, builder.getF16Type());
|
|
if (type == NVVM::MMATypes::f16) {
|
|
elementType = f16x2;
|
|
if (frag == NVVM::MMAFrag::a || frag == NVVM::MMAFrag::b)
|
|
numberElements = 8;
|
|
else
|
|
numberElements = 4;
|
|
} else if (type == NVVM::MMATypes::f32) {
|
|
elementType = builder.getF32Type();
|
|
numberElements = 8;
|
|
} else if (type == NVVM::MMATypes::tf32) {
|
|
elementType = builder.getI32Type();
|
|
numberElements = 4;
|
|
}
|
|
assert(numberElements != 0 && elementType != nullptr);
|
|
return std::make_pair(elementType, numberElements);
|
|
}
|
|
|
|
static LogicalResult verify(NVVM::WMMALoadOp op) {
|
|
unsigned addressSpace =
|
|
op.ptr().getType().cast<LLVM::LLVMPointerType>().getAddressSpace();
|
|
if (addressSpace != 0 && addressSpace != 1 && addressSpace != 3)
|
|
return op.emitOpError("expected source pointer in memory "
|
|
"space 0, 1, 3");
|
|
|
|
if (NVVM::WMMALoadOp::getIntrinsicID(op.m(), op.n(), op.k(), op.layout(),
|
|
op.eltype(), op.frag()) == 0)
|
|
return op.emitOpError() << "invalid attribute combination";
|
|
std::pair<Type, unsigned> typeInfo =
|
|
inferMMAType(op.eltype(), op.frag(), op.getContext());
|
|
Type dstType = LLVM::LLVMStructType::getLiteral(
|
|
op.getContext(), SmallVector<Type, 8>(typeInfo.second, typeInfo.first));
|
|
if (op.getType() != dstType)
|
|
return op.emitOpError("expected destination type is a structure of ")
|
|
<< typeInfo.second << " elements of type " << typeInfo.first;
|
|
return success();
|
|
}
|
|
|
|
static LogicalResult verify(NVVM::WMMAStoreOp op) {
|
|
unsigned addressSpace =
|
|
op.ptr().getType().cast<LLVM::LLVMPointerType>().getAddressSpace();
|
|
if (addressSpace != 0 && addressSpace != 1 && addressSpace != 3)
|
|
return op.emitOpError("expected operands to be a source pointer in memory "
|
|
"space 0, 1, 3");
|
|
|
|
if (NVVM::WMMAStoreOp::getIntrinsicID(op.m(), op.n(), op.k(), op.layout(),
|
|
op.eltype()) == 0)
|
|
return op.emitOpError() << "invalid attribute combination";
|
|
std::pair<Type, unsigned> typeInfo =
|
|
inferMMAType(op.eltype(), NVVM::MMAFrag::c, op.getContext());
|
|
if (op.args().size() != typeInfo.second)
|
|
return op.emitOpError()
|
|
<< "expected " << typeInfo.second << " data operands";
|
|
if (llvm::any_of(op.args(), [&typeInfo](Value operands) {
|
|
return operands.getType() != typeInfo.first;
|
|
}))
|
|
return op.emitOpError()
|
|
<< "expected data operands of type " << typeInfo.first;
|
|
return success();
|
|
}
|
|
|
|
static LogicalResult verify(NVVM::WMMAMmaOp op) {
|
|
if (NVVM::WMMAMmaOp::getIntrinsicID(op.m(), op.n(), op.k(), op.layoutA(),
|
|
op.layoutB(), op.eltypeA(),
|
|
op.eltypeB()) == 0)
|
|
return op.emitOpError() << "invalid attribute combination";
|
|
std::pair<Type, unsigned> typeInfoA =
|
|
inferMMAType(op.eltypeA(), NVVM::MMAFrag::a, op.getContext());
|
|
std::pair<Type, unsigned> typeInfoB =
|
|
inferMMAType(op.eltypeA(), NVVM::MMAFrag::b, op.getContext());
|
|
std::pair<Type, unsigned> typeInfoC =
|
|
inferMMAType(op.eltypeB(), NVVM::MMAFrag::c, op.getContext());
|
|
SmallVector<Type, 32> arguments;
|
|
arguments.append(typeInfoA.second, typeInfoA.first);
|
|
arguments.append(typeInfoB.second, typeInfoB.first);
|
|
arguments.append(typeInfoC.second, typeInfoC.first);
|
|
unsigned numArgs = arguments.size();
|
|
if (op.args().size() != numArgs)
|
|
return op.emitOpError() << "expected " << numArgs << " arguments";
|
|
for (unsigned i = 0; i < numArgs; i++) {
|
|
if (op.args()[i].getType() != arguments[i])
|
|
return op.emitOpError()
|
|
<< "expected argument " << i << " to be of type " << arguments[i];
|
|
}
|
|
Type dstType = LLVM::LLVMStructType::getLiteral(
|
|
op.getContext(), SmallVector<Type, 8>(typeInfoC.second, typeInfoC.first));
|
|
if (op.getType() != dstType)
|
|
return op.emitOpError("expected destination type is a structure of ")
|
|
<< typeInfoC.second << " elements of type " << typeInfoC.first;
|
|
return success();
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// NVVMDialect initialization, type parsing, and registration.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// TODO: This should be the llvm.nvvm dialect once this is supported.
|
|
void NVVMDialect::initialize() {
|
|
addOperations<
|
|
#define GET_OP_LIST
|
|
#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
|
|
>();
|
|
|
|
// Support unknown operations because not all NVVM operations are
|
|
// registered.
|
|
allowUnknownOperations();
|
|
}
|
|
|
|
LogicalResult NVVMDialect::verifyOperationAttribute(Operation *op,
|
|
NamedAttribute attr) {
|
|
// Kernel function attribute should be attached to functions.
|
|
if (attr.first == NVVMDialect::getKernelFuncAttrName()) {
|
|
if (!isa<LLVM::LLVMFuncOp>(op)) {
|
|
return op->emitError() << "'" << NVVMDialect::getKernelFuncAttrName()
|
|
<< "' attribute attached to unexpected op";
|
|
}
|
|
}
|
|
return success();
|
|
}
|
|
|
|
#define GET_OP_CLASSES
|
|
#include "mlir/Dialect/LLVMIR/NVVMOps.cpp.inc"
|