
Fixes #141840 This PR implements support for the `memcpy` intrinsic in the DXIL CBuffer Access pass with the following restrictions: - The CBuffer Access must be the `src` operand of `memcpy` and must be direct (i.e., not a GEP) - The type of the CBuffer Access must be of an Array Type These restrictions greatly simplify the implementation of `memcpy` yet still covers the known uses in DML shaders. Furthermore, to prevent errors like #141840 from occurring silently again, this PR adds error reporting for unsupported users of globals in the DXIL CBuffer Access pass.
347 lines
12 KiB
C++
347 lines
12 KiB
C++
//===- DXILCBufferAccess.cpp - Translate CBuffer Loads --------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "DXILCBufferAccess.h"
|
|
#include "DirectX.h"
|
|
#include "llvm/Frontend/HLSL/CBuffer.h"
|
|
#include "llvm/Frontend/HLSL/HLSLResource.h"
|
|
#include "llvm/IR/IRBuilder.h"
|
|
#include "llvm/IR/IntrinsicInst.h"
|
|
#include "llvm/IR/IntrinsicsDirectX.h"
|
|
#include "llvm/InitializePasses.h"
|
|
#include "llvm/Pass.h"
|
|
#include "llvm/Support/FormatVariadic.h"
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
|
|
#define DEBUG_TYPE "dxil-cbuffer-access"
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
/// Helper for building a `load.cbufferrow` intrinsic given a simple type.
|
|
struct CBufferRowIntrin {
|
|
Intrinsic::ID IID;
|
|
Type *RetTy;
|
|
unsigned int EltSize;
|
|
unsigned int NumElts;
|
|
|
|
CBufferRowIntrin(const DataLayout &DL, Type *Ty) {
|
|
assert(Ty == Ty->getScalarType() && "Expected scalar type");
|
|
|
|
switch (DL.getTypeSizeInBits(Ty)) {
|
|
case 16:
|
|
IID = Intrinsic::dx_resource_load_cbufferrow_8;
|
|
RetTy = StructType::get(Ty, Ty, Ty, Ty, Ty, Ty, Ty, Ty);
|
|
EltSize = 2;
|
|
NumElts = 8;
|
|
break;
|
|
case 32:
|
|
IID = Intrinsic::dx_resource_load_cbufferrow_4;
|
|
RetTy = StructType::get(Ty, Ty, Ty, Ty);
|
|
EltSize = 4;
|
|
NumElts = 4;
|
|
break;
|
|
case 64:
|
|
IID = Intrinsic::dx_resource_load_cbufferrow_2;
|
|
RetTy = StructType::get(Ty, Ty);
|
|
EltSize = 8;
|
|
NumElts = 2;
|
|
break;
|
|
default:
|
|
llvm_unreachable("Only 16, 32, and 64 bit types supported");
|
|
}
|
|
}
|
|
};
|
|
|
|
// Helper for creating CBuffer handles and loading data from them
|
|
struct CBufferResource {
|
|
GlobalVariable *GVHandle;
|
|
GlobalVariable *Member;
|
|
size_t MemberOffset;
|
|
|
|
LoadInst *Handle;
|
|
|
|
CBufferResource(GlobalVariable *GVHandle, GlobalVariable *Member,
|
|
size_t MemberOffset)
|
|
: GVHandle(GVHandle), Member(Member), MemberOffset(MemberOffset) {}
|
|
|
|
const DataLayout &getDataLayout() { return GVHandle->getDataLayout(); }
|
|
Type *getValueType() { return Member->getValueType(); }
|
|
iterator_range<ConstantDataSequential::user_iterator> users() {
|
|
return Member->users();
|
|
}
|
|
|
|
/// Get the byte offset of a Pointer-typed Value * `Val` relative to Member.
|
|
/// `Val` can either be Member itself, or a GEP of a constant offset from
|
|
/// Member
|
|
size_t getOffsetForCBufferGEP(Value *Val) {
|
|
assert(isa<PointerType>(Val->getType()) &&
|
|
"Expected a pointer-typed value");
|
|
|
|
if (Val == Member)
|
|
return 0;
|
|
|
|
if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
|
|
// Since we should always have a constant offset, we should only ever have
|
|
// a single GEP of indirection from the Global.
|
|
assert(GEP->getPointerOperand() == Member &&
|
|
"Indirect access to resource handle");
|
|
|
|
const DataLayout &DL = getDataLayout();
|
|
APInt ConstantOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
|
|
bool Success = GEP->accumulateConstantOffset(DL, ConstantOffset);
|
|
(void)Success;
|
|
assert(Success && "Offsets into cbuffer globals must be constant");
|
|
|
|
if (auto *ATy = dyn_cast<ArrayType>(Member->getValueType()))
|
|
ConstantOffset =
|
|
hlsl::translateCBufArrayOffset(DL, ConstantOffset, ATy);
|
|
|
|
return ConstantOffset.getZExtValue();
|
|
}
|
|
|
|
llvm_unreachable("Expected Val to be a GlobalVariable or GEP");
|
|
}
|
|
|
|
/// Create a handle for this cbuffer resource using the IRBuilder `Builder`
|
|
/// and sets the handle as the current one to use for subsequent calls to
|
|
/// `loadValue`
|
|
void createAndSetCurrentHandle(IRBuilder<> &Builder) {
|
|
Handle = Builder.CreateLoad(GVHandle->getValueType(), GVHandle,
|
|
GVHandle->getName());
|
|
}
|
|
|
|
/// Load a value of type `Ty` at offset `Offset` using the handle from the
|
|
/// last call to `createAndSetCurrentHandle`
|
|
Value *loadValue(IRBuilder<> &Builder, Type *Ty, size_t Offset,
|
|
const Twine &Name = "") {
|
|
assert(Handle &&
|
|
"Expected a handle for this cbuffer global resource to be created "
|
|
"before loading a value from it");
|
|
const DataLayout &DL = getDataLayout();
|
|
|
|
size_t TargetOffset = MemberOffset + Offset;
|
|
CBufferRowIntrin Intrin(DL, Ty->getScalarType());
|
|
// The cbuffer consists of some number of 16-byte rows.
|
|
unsigned int CurrentRow = TargetOffset / hlsl::CBufferRowSizeInBytes;
|
|
unsigned int CurrentIndex =
|
|
(TargetOffset % hlsl::CBufferRowSizeInBytes) / Intrin.EltSize;
|
|
|
|
auto *CBufLoad = Builder.CreateIntrinsic(
|
|
Intrin.RetTy, Intrin.IID,
|
|
{Handle, ConstantInt::get(Builder.getInt32Ty(), CurrentRow)}, nullptr,
|
|
Name + ".load");
|
|
auto *Elt = Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
|
|
Name + ".extract");
|
|
|
|
Value *Result = nullptr;
|
|
unsigned int Remaining =
|
|
((DL.getTypeSizeInBits(Ty) / 8) / Intrin.EltSize) - 1;
|
|
|
|
if (Remaining == 0) {
|
|
// We only have a single element, so we're done.
|
|
Result = Elt;
|
|
|
|
// However, if we loaded a <1 x T>, then we need to adjust the type here.
|
|
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
|
|
assert(VT->getNumElements() == 1 &&
|
|
"Can't have multiple elements here");
|
|
Result = Builder.CreateInsertElement(PoisonValue::get(VT), Result,
|
|
Builder.getInt32(0), Name);
|
|
}
|
|
return Result;
|
|
}
|
|
|
|
// Walk each element and extract it, wrapping to new rows as needed.
|
|
SmallVector<Value *> Extracts{Elt};
|
|
while (Remaining--) {
|
|
CurrentIndex %= Intrin.NumElts;
|
|
|
|
if (CurrentIndex == 0)
|
|
CBufLoad = Builder.CreateIntrinsic(
|
|
Intrin.RetTy, Intrin.IID,
|
|
{Handle, ConstantInt::get(Builder.getInt32Ty(), ++CurrentRow)},
|
|
nullptr, Name + ".load");
|
|
|
|
Extracts.push_back(Builder.CreateExtractValue(CBufLoad, {CurrentIndex++},
|
|
Name + ".extract"));
|
|
}
|
|
|
|
// Finally, we build up the original loaded value.
|
|
Result = PoisonValue::get(Ty);
|
|
for (int I = 0, E = Extracts.size(); I < E; ++I)
|
|
Result =
|
|
Builder.CreateInsertElement(Result, Extracts[I], Builder.getInt32(I),
|
|
Name + formatv(".upto{}", I));
|
|
return Result;
|
|
}
|
|
};
|
|
|
|
} // namespace
|
|
|
|
/// Replace load via cbuffer global with a load from the cbuffer handle itself.
|
|
static void replaceLoad(LoadInst *LI, CBufferResource &CBR,
|
|
SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
|
|
size_t Offset = CBR.getOffsetForCBufferGEP(LI->getPointerOperand());
|
|
IRBuilder<> Builder(LI);
|
|
CBR.createAndSetCurrentHandle(Builder);
|
|
Value *Result = CBR.loadValue(Builder, LI->getType(), Offset, LI->getName());
|
|
LI->replaceAllUsesWith(Result);
|
|
DeadInsts.push_back(LI);
|
|
}
|
|
|
|
/// This function recursively copies N array elements from the cbuffer resource
|
|
/// CBR to the MemCpy Destination. Recursion is used to unravel multidimensional
|
|
/// arrays into a sequence of scalar/vector extracts and stores.
|
|
static void copyArrayElemsForMemCpy(IRBuilder<> &Builder, MemCpyInst *MCI,
|
|
CBufferResource &CBR, ArrayType *ArrTy,
|
|
size_t ArrOffset, size_t N,
|
|
const Twine &Name = "") {
|
|
const DataLayout &DL = MCI->getDataLayout();
|
|
Type *ElemTy = ArrTy->getElementType();
|
|
size_t ElemTySize = DL.getTypeAllocSize(ElemTy);
|
|
for (unsigned I = 0; I < N; ++I) {
|
|
size_t Offset = ArrOffset + I * ElemTySize;
|
|
|
|
// Recursively copy nested arrays
|
|
if (ArrayType *ElemArrTy = dyn_cast<ArrayType>(ElemTy)) {
|
|
copyArrayElemsForMemCpy(Builder, MCI, CBR, ElemArrTy, Offset,
|
|
ElemArrTy->getNumElements(), Name);
|
|
continue;
|
|
}
|
|
|
|
// Load CBuffer value and store it in Dest
|
|
APInt CBufArrayOffset(
|
|
DL.getIndexTypeSizeInBits(MCI->getSource()->getType()), Offset);
|
|
CBufArrayOffset =
|
|
hlsl::translateCBufArrayOffset(DL, CBufArrayOffset, ArrTy);
|
|
Value *CBufferVal =
|
|
CBR.loadValue(Builder, ElemTy, CBufArrayOffset.getZExtValue(), Name);
|
|
Value *GEP =
|
|
Builder.CreateInBoundsGEP(Builder.getInt8Ty(), MCI->getDest(),
|
|
{Builder.getInt32(Offset)}, Name + ".dest");
|
|
Builder.CreateStore(CBufferVal, GEP, MCI->isVolatile());
|
|
}
|
|
}
|
|
|
|
/// Replace memcpy from a cbuffer global with a memcpy from the cbuffer handle
|
|
/// itself. Assumes the cbuffer global is an array, and the length of bytes to
|
|
/// copy is divisible by array element allocation size.
|
|
/// The memcpy source must also be a direct cbuffer global reference, not a GEP.
|
|
static void replaceMemCpy(MemCpyInst *MCI, CBufferResource &CBR) {
|
|
|
|
ArrayType *ArrTy = dyn_cast<ArrayType>(CBR.getValueType());
|
|
assert(ArrTy && "MemCpy lowering is only supported for array types");
|
|
|
|
// This assumption vastly simplifies the implementation
|
|
if (MCI->getSource() != CBR.Member)
|
|
reportFatalUsageError(
|
|
"Expected MemCpy source to be a cbuffer global variable");
|
|
|
|
ConstantInt *Length = dyn_cast<ConstantInt>(MCI->getLength());
|
|
uint64_t ByteLength = Length->getZExtValue();
|
|
|
|
// If length to copy is zero, no memcpy is needed
|
|
if (ByteLength == 0) {
|
|
MCI->eraseFromParent();
|
|
return;
|
|
}
|
|
|
|
const DataLayout &DL = CBR.getDataLayout();
|
|
|
|
Type *ElemTy = ArrTy->getElementType();
|
|
size_t ElemSize = DL.getTypeAllocSize(ElemTy);
|
|
assert(ByteLength % ElemSize == 0 &&
|
|
"Length of bytes to MemCpy must be divisible by allocation size of "
|
|
"source/destination array elements");
|
|
size_t ElemsToCpy = ByteLength / ElemSize;
|
|
|
|
IRBuilder<> Builder(MCI);
|
|
CBR.createAndSetCurrentHandle(Builder);
|
|
|
|
copyArrayElemsForMemCpy(Builder, MCI, CBR, ArrTy, 0, ElemsToCpy,
|
|
"memcpy." + MCI->getDest()->getName() + "." +
|
|
MCI->getSource()->getName());
|
|
|
|
MCI->eraseFromParent();
|
|
}
|
|
|
|
static void replaceAccessesWithHandle(CBufferResource &CBR) {
|
|
SmallVector<WeakTrackingVH> DeadInsts;
|
|
|
|
SmallVector<User *> ToProcess{CBR.users()};
|
|
while (!ToProcess.empty()) {
|
|
User *Cur = ToProcess.pop_back_val();
|
|
|
|
// If we have a load instruction, replace the access.
|
|
if (auto *LI = dyn_cast<LoadInst>(Cur)) {
|
|
replaceLoad(LI, CBR, DeadInsts);
|
|
continue;
|
|
}
|
|
|
|
// If we have a memcpy instruction, replace it with multiple accesses and
|
|
// subsequent stores to the destination
|
|
if (auto *MCI = dyn_cast<MemCpyInst>(Cur)) {
|
|
replaceMemCpy(MCI, CBR);
|
|
continue;
|
|
}
|
|
|
|
// Otherwise, walk users looking for a load...
|
|
if (isa<GetElementPtrInst>(Cur) || isa<GEPOperator>(Cur)) {
|
|
ToProcess.append(Cur->user_begin(), Cur->user_end());
|
|
continue;
|
|
}
|
|
|
|
llvm_unreachable("Unexpected user of Global");
|
|
}
|
|
RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
|
|
}
|
|
|
|
static bool replaceCBufferAccesses(Module &M) {
|
|
std::optional<hlsl::CBufferMetadata> CBufMD = hlsl::CBufferMetadata::get(M);
|
|
if (!CBufMD)
|
|
return false;
|
|
|
|
for (const hlsl::CBufferMapping &Mapping : *CBufMD)
|
|
for (const hlsl::CBufferMember &Member : Mapping.Members) {
|
|
CBufferResource CBR(Mapping.Handle, Member.GV, Member.Offset);
|
|
replaceAccessesWithHandle(CBR);
|
|
Member.GV->removeFromParent();
|
|
}
|
|
|
|
CBufMD->eraseFromModule();
|
|
return true;
|
|
}
|
|
|
|
PreservedAnalyses DXILCBufferAccess::run(Module &M, ModuleAnalysisManager &AM) {
|
|
PreservedAnalyses PA;
|
|
bool Changed = replaceCBufferAccesses(M);
|
|
|
|
if (!Changed)
|
|
return PreservedAnalyses::all();
|
|
return PA;
|
|
}
|
|
|
|
namespace {
|
|
class DXILCBufferAccessLegacy : public ModulePass {
|
|
public:
|
|
bool runOnModule(Module &M) override { return replaceCBufferAccesses(M); }
|
|
StringRef getPassName() const override { return "DXIL CBuffer Access"; }
|
|
DXILCBufferAccessLegacy() : ModulePass(ID) {}
|
|
|
|
static char ID; // Pass identification.
|
|
};
|
|
char DXILCBufferAccessLegacy::ID = 0;
|
|
} // end anonymous namespace
|
|
|
|
INITIALIZE_PASS(DXILCBufferAccessLegacy, DEBUG_TYPE, "DXIL CBuffer Access",
|
|
false, false)
|
|
|
|
ModulePass *llvm::createDXILCBufferAccessLegacyPass() {
|
|
return new DXILCBufferAccessLegacy();
|
|
}
|