[DirectX] Flatten arrays (#114332)
- Relevant piece is `DXILFlattenArrays.cpp` - Loads and Store Instruction visits are just for finding GetElementPtrConstantExpr and splitting them. - Allocas needed to be replaced with flattened allocas. - Global arrays were similar to allocas. Only interesting piece here is around initializers. - Most of the work went into building correct GEP chains. The approach here was a recursive strategy via `recursivelyCollectGEPs`. - All intermediary GEPs get marked for deletion and only the leaf GEPs get updated with the new index. fixes [89646](https://github.com/llvm/llvm-project/issues/89646)
This commit is contained in:
parent
de6d48d05d
commit
5ac624c823
@ -22,6 +22,7 @@ add_llvm_target(DirectXCodeGen
|
|||||||
DXContainerGlobals.cpp
|
DXContainerGlobals.cpp
|
||||||
DXILDataScalarization.cpp
|
DXILDataScalarization.cpp
|
||||||
DXILFinalizeLinkage.cpp
|
DXILFinalizeLinkage.cpp
|
||||||
|
DXILFlattenArrays.cpp
|
||||||
DXILIntrinsicExpansion.cpp
|
DXILIntrinsicExpansion.cpp
|
||||||
DXILOpBuilder.cpp
|
DXILOpBuilder.cpp
|
||||||
DXILOpLowering.cpp
|
DXILOpLowering.cpp
|
||||||
|
443
llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
Normal file
443
llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
Normal file
@ -0,0 +1,443 @@
|
|||||||
|
//===- DXILFlattenArrays.cpp - Flattens DXIL Arrays-----------------------===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
///
|
||||||
|
/// \file This file contains a pass to flatten arrays for the DirectX Backend.
|
||||||
|
///
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#include "DXILFlattenArrays.h"
|
||||||
|
#include "DirectX.h"
|
||||||
|
#include "llvm/ADT/PostOrderIterator.h"
|
||||||
|
#include "llvm/ADT/STLExtras.h"
|
||||||
|
#include "llvm/Analysis/DXILResource.h"
|
||||||
|
#include "llvm/IR/BasicBlock.h"
|
||||||
|
#include "llvm/IR/DerivedTypes.h"
|
||||||
|
#include "llvm/IR/IRBuilder.h"
|
||||||
|
#include "llvm/IR/InstVisitor.h"
|
||||||
|
#include "llvm/IR/ReplaceConstant.h"
|
||||||
|
#include "llvm/Support/Casting.h"
|
||||||
|
#include "llvm/Transforms/Utils/Local.h"
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#define DEBUG_TYPE "dxil-flatten-arrays"
|
||||||
|
|
||||||
|
using namespace llvm;
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
class DXILFlattenArraysLegacy : public ModulePass {
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool runOnModule(Module &M) override;
|
||||||
|
DXILFlattenArraysLegacy() : ModulePass(ID) {}
|
||||||
|
|
||||||
|
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||||
|
static char ID; // Pass identification.
|
||||||
|
};
|
||||||
|
|
||||||
|
struct GEPData {
|
||||||
|
ArrayType *ParentArrayType;
|
||||||
|
Value *ParendOperand;
|
||||||
|
SmallVector<Value *> Indices;
|
||||||
|
SmallVector<uint64_t> Dims;
|
||||||
|
bool AllIndicesAreConstInt;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DXILFlattenArraysVisitor
|
||||||
|
: public InstVisitor<DXILFlattenArraysVisitor, bool> {
|
||||||
|
public:
|
||||||
|
DXILFlattenArraysVisitor() {}
|
||||||
|
bool visit(Function &F);
|
||||||
|
// InstVisitor methods. They return true if the instruction was scalarized,
|
||||||
|
// false if nothing changed.
|
||||||
|
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
|
||||||
|
bool visitAllocaInst(AllocaInst &AI);
|
||||||
|
bool visitInstruction(Instruction &I) { return false; }
|
||||||
|
bool visitSelectInst(SelectInst &SI) { return false; }
|
||||||
|
bool visitICmpInst(ICmpInst &ICI) { return false; }
|
||||||
|
bool visitFCmpInst(FCmpInst &FCI) { return false; }
|
||||||
|
bool visitUnaryOperator(UnaryOperator &UO) { return false; }
|
||||||
|
bool visitBinaryOperator(BinaryOperator &BO) { return false; }
|
||||||
|
bool visitCastInst(CastInst &CI) { return false; }
|
||||||
|
bool visitBitCastInst(BitCastInst &BCI) { return false; }
|
||||||
|
bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
|
||||||
|
bool visitExtractElementInst(ExtractElementInst &EEI) { return false; }
|
||||||
|
bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
|
||||||
|
bool visitPHINode(PHINode &PHI) { return false; }
|
||||||
|
bool visitLoadInst(LoadInst &LI);
|
||||||
|
bool visitStoreInst(StoreInst &SI);
|
||||||
|
bool visitCallInst(CallInst &ICI) { return false; }
|
||||||
|
bool visitFreezeInst(FreezeInst &FI) { return false; }
|
||||||
|
static bool isMultiDimensionalArray(Type *T);
|
||||||
|
static std::pair<unsigned, Type *> getElementCountAndType(Type *ArrayTy);
|
||||||
|
|
||||||
|
private:
|
||||||
|
SmallVector<WeakTrackingVH> PotentiallyDeadInstrs;
|
||||||
|
DenseMap<GetElementPtrInst *, GEPData> GEPChainMap;
|
||||||
|
bool finish();
|
||||||
|
ConstantInt *genConstFlattenIndices(ArrayRef<Value *> Indices,
|
||||||
|
ArrayRef<uint64_t> Dims,
|
||||||
|
IRBuilder<> &Builder);
|
||||||
|
Value *genInstructionFlattenIndices(ArrayRef<Value *> Indices,
|
||||||
|
ArrayRef<uint64_t> Dims,
|
||||||
|
IRBuilder<> &Builder);
|
||||||
|
void
|
||||||
|
recursivelyCollectGEPs(GetElementPtrInst &CurrGEP,
|
||||||
|
ArrayType *FlattenedArrayType, Value *PtrOperand,
|
||||||
|
unsigned &GEPChainUseCount,
|
||||||
|
SmallVector<Value *> Indices = SmallVector<Value *>(),
|
||||||
|
SmallVector<uint64_t> Dims = SmallVector<uint64_t>(),
|
||||||
|
bool AllIndicesAreConstInt = true);
|
||||||
|
bool visitGetElementPtrInstInGEPChain(GetElementPtrInst &GEP);
|
||||||
|
bool visitGetElementPtrInstInGEPChainBase(GEPData &GEPInfo,
|
||||||
|
GetElementPtrInst &GEP);
|
||||||
|
};
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::finish() {
|
||||||
|
RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::isMultiDimensionalArray(Type *T) {
|
||||||
|
if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
|
||||||
|
return isa<ArrayType>(ArrType->getElementType());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<unsigned, Type *>
|
||||||
|
DXILFlattenArraysVisitor::getElementCountAndType(Type *ArrayTy) {
|
||||||
|
unsigned TotalElements = 1;
|
||||||
|
Type *CurrArrayTy = ArrayTy;
|
||||||
|
while (auto *InnerArrayTy = dyn_cast<ArrayType>(CurrArrayTy)) {
|
||||||
|
TotalElements *= InnerArrayTy->getNumElements();
|
||||||
|
CurrArrayTy = InnerArrayTy->getElementType();
|
||||||
|
}
|
||||||
|
return std::make_pair(TotalElements, CurrArrayTy);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConstantInt *DXILFlattenArraysVisitor::genConstFlattenIndices(
|
||||||
|
ArrayRef<Value *> Indices, ArrayRef<uint64_t> Dims, IRBuilder<> &Builder) {
|
||||||
|
assert(Indices.size() == Dims.size() &&
|
||||||
|
"Indicies and dimmensions should be the same");
|
||||||
|
unsigned FlatIndex = 0;
|
||||||
|
unsigned Multiplier = 1;
|
||||||
|
|
||||||
|
for (int I = Indices.size() - 1; I >= 0; --I) {
|
||||||
|
unsigned DimSize = Dims[I];
|
||||||
|
ConstantInt *CIndex = dyn_cast<ConstantInt>(Indices[I]);
|
||||||
|
assert(CIndex && "This function expects all indicies to be ConstantInt");
|
||||||
|
FlatIndex += CIndex->getZExtValue() * Multiplier;
|
||||||
|
Multiplier *= DimSize;
|
||||||
|
}
|
||||||
|
return Builder.getInt32(FlatIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *DXILFlattenArraysVisitor::genInstructionFlattenIndices(
|
||||||
|
ArrayRef<Value *> Indices, ArrayRef<uint64_t> Dims, IRBuilder<> &Builder) {
|
||||||
|
if (Indices.size() == 1)
|
||||||
|
return Indices[0];
|
||||||
|
|
||||||
|
Value *FlatIndex = Builder.getInt32(0);
|
||||||
|
unsigned Multiplier = 1;
|
||||||
|
|
||||||
|
for (int I = Indices.size() - 1; I >= 0; --I) {
|
||||||
|
unsigned DimSize = Dims[I];
|
||||||
|
Value *VMultiplier = Builder.getInt32(Multiplier);
|
||||||
|
Value *ScaledIndex = Builder.CreateMul(Indices[I], VMultiplier);
|
||||||
|
FlatIndex = Builder.CreateAdd(FlatIndex, ScaledIndex);
|
||||||
|
Multiplier *= DimSize;
|
||||||
|
}
|
||||||
|
return FlatIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::visitLoadInst(LoadInst &LI) {
|
||||||
|
unsigned NumOperands = LI.getNumOperands();
|
||||||
|
for (unsigned I = 0; I < NumOperands; ++I) {
|
||||||
|
Value *CurrOpperand = LI.getOperand(I);
|
||||||
|
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
|
||||||
|
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
|
||||||
|
convertUsersOfConstantsToInstructions(CE,
|
||||||
|
/*RestrictToFunc=*/nullptr,
|
||||||
|
/*RemoveDeadConstants=*/false,
|
||||||
|
/*IncludeSelf=*/true);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::visitStoreInst(StoreInst &SI) {
|
||||||
|
unsigned NumOperands = SI.getNumOperands();
|
||||||
|
for (unsigned I = 0; I < NumOperands; ++I) {
|
||||||
|
Value *CurrOpperand = SI.getOperand(I);
|
||||||
|
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
|
||||||
|
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
|
||||||
|
convertUsersOfConstantsToInstructions(CE,
|
||||||
|
/*RestrictToFunc=*/nullptr,
|
||||||
|
/*RemoveDeadConstants=*/false,
|
||||||
|
/*IncludeSelf=*/true);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::visitAllocaInst(AllocaInst &AI) {
|
||||||
|
if (!isMultiDimensionalArray(AI.getAllocatedType()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ArrayType *ArrType = cast<ArrayType>(AI.getAllocatedType());
|
||||||
|
IRBuilder<> Builder(&AI);
|
||||||
|
auto [TotalElements, BaseType] = getElementCountAndType(ArrType);
|
||||||
|
|
||||||
|
ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements);
|
||||||
|
AllocaInst *FlatAlloca =
|
||||||
|
Builder.CreateAlloca(FattenedArrayType, nullptr, AI.getName() + ".flat");
|
||||||
|
FlatAlloca->setAlignment(AI.getAlign());
|
||||||
|
AI.replaceAllUsesWith(FlatAlloca);
|
||||||
|
AI.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void DXILFlattenArraysVisitor::recursivelyCollectGEPs(
|
||||||
|
GetElementPtrInst &CurrGEP, ArrayType *FlattenedArrayType,
|
||||||
|
Value *PtrOperand, unsigned &GEPChainUseCount, SmallVector<Value *> Indices,
|
||||||
|
SmallVector<uint64_t> Dims, bool AllIndicesAreConstInt) {
|
||||||
|
Value *LastIndex = CurrGEP.getOperand(CurrGEP.getNumOperands() - 1);
|
||||||
|
AllIndicesAreConstInt &= isa<ConstantInt>(LastIndex);
|
||||||
|
Indices.push_back(LastIndex);
|
||||||
|
assert(isa<ArrayType>(CurrGEP.getSourceElementType()));
|
||||||
|
Dims.push_back(
|
||||||
|
cast<ArrayType>(CurrGEP.getSourceElementType())->getNumElements());
|
||||||
|
bool IsMultiDimArr = isMultiDimensionalArray(CurrGEP.getSourceElementType());
|
||||||
|
if (!IsMultiDimArr) {
|
||||||
|
assert(GEPChainUseCount < FlattenedArrayType->getNumElements());
|
||||||
|
GEPChainMap.insert(
|
||||||
|
{&CurrGEP,
|
||||||
|
{std::move(FlattenedArrayType), PtrOperand, std::move(Indices),
|
||||||
|
std::move(Dims), AllIndicesAreConstInt}});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
bool GepUses = false;
|
||||||
|
for (auto *User : CurrGEP.users()) {
|
||||||
|
if (GetElementPtrInst *NestedGEP = dyn_cast<GetElementPtrInst>(User)) {
|
||||||
|
recursivelyCollectGEPs(*NestedGEP, FlattenedArrayType, PtrOperand,
|
||||||
|
++GEPChainUseCount, Indices, Dims,
|
||||||
|
AllIndicesAreConstInt);
|
||||||
|
GepUses = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// This case is just incase the gep chain doesn't end with a 1d array.
|
||||||
|
if (IsMultiDimArr && GEPChainUseCount > 0 && !GepUses) {
|
||||||
|
GEPChainMap.insert(
|
||||||
|
{&CurrGEP,
|
||||||
|
{std::move(FlattenedArrayType), PtrOperand, std::move(Indices),
|
||||||
|
std::move(Dims), AllIndicesAreConstInt}});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChain(
|
||||||
|
GetElementPtrInst &GEP) {
|
||||||
|
GEPData GEPInfo = GEPChainMap.at(&GEP);
|
||||||
|
return visitGetElementPtrInstInGEPChainBase(GEPInfo, GEP);
|
||||||
|
}
|
||||||
|
bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChainBase(
|
||||||
|
GEPData &GEPInfo, GetElementPtrInst &GEP) {
|
||||||
|
IRBuilder<> Builder(&GEP);
|
||||||
|
Value *FlatIndex;
|
||||||
|
if (GEPInfo.AllIndicesAreConstInt)
|
||||||
|
FlatIndex = genConstFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder);
|
||||||
|
else
|
||||||
|
FlatIndex =
|
||||||
|
genInstructionFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder);
|
||||||
|
|
||||||
|
ArrayType *FlattenedArrayType = GEPInfo.ParentArrayType;
|
||||||
|
Value *FlatGEP =
|
||||||
|
Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParendOperand, FlatIndex,
|
||||||
|
GEP.getName() + ".flat", GEP.isInBounds());
|
||||||
|
|
||||||
|
GEP.replaceAllUsesWith(FlatGEP);
|
||||||
|
GEP.eraseFromParent();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
||||||
|
auto It = GEPChainMap.find(&GEP);
|
||||||
|
if (It != GEPChainMap.end())
|
||||||
|
return visitGetElementPtrInstInGEPChain(GEP);
|
||||||
|
if (!isMultiDimensionalArray(GEP.getSourceElementType()))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ArrayType *ArrType = cast<ArrayType>(GEP.getSourceElementType());
|
||||||
|
IRBuilder<> Builder(&GEP);
|
||||||
|
auto [TotalElements, BaseType] = getElementCountAndType(ArrType);
|
||||||
|
ArrayType *FlattenedArrayType = ArrayType::get(BaseType, TotalElements);
|
||||||
|
|
||||||
|
Value *PtrOperand = GEP.getPointerOperand();
|
||||||
|
|
||||||
|
unsigned GEPChainUseCount = 0;
|
||||||
|
recursivelyCollectGEPs(GEP, FlattenedArrayType, PtrOperand, GEPChainUseCount);
|
||||||
|
|
||||||
|
// NOTE: hasNUses(0) is not the same as GEPChainUseCount == 0.
|
||||||
|
// Here recursion is used to get the length of the GEP chain.
|
||||||
|
// Handle zero uses here because there won't be an update via
|
||||||
|
// a child in the chain later.
|
||||||
|
if (GEPChainUseCount == 0) {
|
||||||
|
SmallVector<Value *> Indices({GEP.getOperand(GEP.getNumOperands() - 1)});
|
||||||
|
SmallVector<uint64_t> Dims({ArrType->getNumElements()});
|
||||||
|
bool AllIndicesAreConstInt = isa<ConstantInt>(Indices[0]);
|
||||||
|
GEPData GEPInfo{std::move(FlattenedArrayType), PtrOperand,
|
||||||
|
std::move(Indices), std::move(Dims), AllIndicesAreConstInt};
|
||||||
|
return visitGetElementPtrInstInGEPChainBase(GEPInfo, GEP);
|
||||||
|
}
|
||||||
|
|
||||||
|
PotentiallyDeadInstrs.emplace_back(&GEP);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysVisitor::visit(Function &F) {
|
||||||
|
bool MadeChange = false;
|
||||||
|
ReversePostOrderTraversal<Function *> RPOT(&F);
|
||||||
|
for (BasicBlock *BB : make_early_inc_range(RPOT)) {
|
||||||
|
for (Instruction &I : make_early_inc_range(*BB))
|
||||||
|
MadeChange |= InstVisitor::visit(I);
|
||||||
|
}
|
||||||
|
finish();
|
||||||
|
return MadeChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void collectElements(Constant *Init,
|
||||||
|
SmallVectorImpl<Constant *> &Elements) {
|
||||||
|
// Base case: If Init is not an array, add it directly to the vector.
|
||||||
|
if (!isa<ArrayType>(Init->getType())) {
|
||||||
|
Elements.push_back(Init);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recursive case: Process each element in the array.
|
||||||
|
if (auto *ArrayConstant = dyn_cast<ConstantArray>(Init)) {
|
||||||
|
for (unsigned I = 0; I < ArrayConstant->getNumOperands(); ++I) {
|
||||||
|
collectElements(ArrayConstant->getOperand(I), Elements);
|
||||||
|
}
|
||||||
|
} else if (auto *DataArrayConstant = dyn_cast<ConstantDataArray>(Init)) {
|
||||||
|
for (unsigned I = 0; I < DataArrayConstant->getNumElements(); ++I) {
|
||||||
|
collectElements(DataArrayConstant->getElementAsConstant(I), Elements);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
llvm_unreachable(
|
||||||
|
"Expected a ConstantArray or ConstantDataArray for array initializer!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static Constant *transformInitializer(Constant *Init, Type *OrigType,
|
||||||
|
ArrayType *FlattenedType,
|
||||||
|
LLVMContext &Ctx) {
|
||||||
|
// Handle ConstantAggregateZero (zero-initialized constants)
|
||||||
|
if (isa<ConstantAggregateZero>(Init))
|
||||||
|
return ConstantAggregateZero::get(FlattenedType);
|
||||||
|
|
||||||
|
// Handle UndefValue (undefined constants)
|
||||||
|
if (isa<UndefValue>(Init))
|
||||||
|
return UndefValue::get(FlattenedType);
|
||||||
|
|
||||||
|
if (!isa<ArrayType>(OrigType))
|
||||||
|
return Init;
|
||||||
|
|
||||||
|
SmallVector<Constant *> FlattenedElements;
|
||||||
|
collectElements(Init, FlattenedElements);
|
||||||
|
assert(FlattenedType->getNumElements() == FlattenedElements.size() &&
|
||||||
|
"The number of collected elements should match the FlattenedType");
|
||||||
|
return ConstantArray::get(FlattenedType, FlattenedElements);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
flattenGlobalArrays(Module &M,
|
||||||
|
DenseMap<GlobalVariable *, GlobalVariable *> &GlobalMap) {
|
||||||
|
LLVMContext &Ctx = M.getContext();
|
||||||
|
for (GlobalVariable &G : M.globals()) {
|
||||||
|
Type *OrigType = G.getValueType();
|
||||||
|
if (!DXILFlattenArraysVisitor::isMultiDimensionalArray(OrigType))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
ArrayType *ArrType = cast<ArrayType>(OrigType);
|
||||||
|
auto [TotalElements, BaseType] =
|
||||||
|
DXILFlattenArraysVisitor::getElementCountAndType(ArrType);
|
||||||
|
ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements);
|
||||||
|
|
||||||
|
// Create a new global variable with the updated type
|
||||||
|
// Note: Initializer is set via transformInitializer
|
||||||
|
GlobalVariable *NewGlobal =
|
||||||
|
new GlobalVariable(M, FattenedArrayType, G.isConstant(), G.getLinkage(),
|
||||||
|
/*Initializer=*/nullptr, G.getName() + ".1dim", &G,
|
||||||
|
G.getThreadLocalMode(), G.getAddressSpace(),
|
||||||
|
G.isExternallyInitialized());
|
||||||
|
|
||||||
|
// Copy relevant attributes
|
||||||
|
NewGlobal->setUnnamedAddr(G.getUnnamedAddr());
|
||||||
|
if (G.getAlignment() > 0) {
|
||||||
|
NewGlobal->setAlignment(G.getAlign());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (G.hasInitializer()) {
|
||||||
|
Constant *Init = G.getInitializer();
|
||||||
|
Constant *NewInit =
|
||||||
|
transformInitializer(Init, OrigType, FattenedArrayType, Ctx);
|
||||||
|
NewGlobal->setInitializer(NewInit);
|
||||||
|
}
|
||||||
|
GlobalMap[&G] = NewGlobal;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool flattenArrays(Module &M) {
|
||||||
|
bool MadeChange = false;
|
||||||
|
DXILFlattenArraysVisitor Impl;
|
||||||
|
DenseMap<GlobalVariable *, GlobalVariable *> GlobalMap;
|
||||||
|
flattenGlobalArrays(M, GlobalMap);
|
||||||
|
for (auto &F : make_early_inc_range(M.functions())) {
|
||||||
|
if (F.isIntrinsic())
|
||||||
|
continue;
|
||||||
|
MadeChange |= Impl.visit(F);
|
||||||
|
}
|
||||||
|
for (auto &[Old, New] : GlobalMap) {
|
||||||
|
Old->replaceAllUsesWith(New);
|
||||||
|
Old->eraseFromParent();
|
||||||
|
MadeChange = true;
|
||||||
|
}
|
||||||
|
return MadeChange;
|
||||||
|
}
|
||||||
|
|
||||||
|
PreservedAnalyses DXILFlattenArrays::run(Module &M, ModuleAnalysisManager &) {
|
||||||
|
bool MadeChanges = flattenArrays(M);
|
||||||
|
if (!MadeChanges)
|
||||||
|
return PreservedAnalyses::all();
|
||||||
|
PreservedAnalyses PA;
|
||||||
|
PA.preserve<DXILResourceAnalysis>();
|
||||||
|
return PA;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool DXILFlattenArraysLegacy::runOnModule(Module &M) {
|
||||||
|
return flattenArrays(M);
|
||||||
|
}
|
||||||
|
|
||||||
|
void DXILFlattenArraysLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||||
|
AU.addPreserved<DXILResourceWrapperPass>();
|
||||||
|
}
|
||||||
|
|
||||||
|
char DXILFlattenArraysLegacy::ID = 0;
|
||||||
|
|
||||||
|
INITIALIZE_PASS_BEGIN(DXILFlattenArraysLegacy, DEBUG_TYPE,
|
||||||
|
"DXIL Array Flattener", false, false)
|
||||||
|
INITIALIZE_PASS_END(DXILFlattenArraysLegacy, DEBUG_TYPE, "DXIL Array Flattener",
|
||||||
|
false, false)
|
||||||
|
|
||||||
|
ModulePass *llvm::createDXILFlattenArraysLegacyPass() {
|
||||||
|
return new DXILFlattenArraysLegacy();
|
||||||
|
}
|
23
llvm/lib/Target/DirectX/DXILFlattenArrays.h
Normal file
23
llvm/lib/Target/DirectX/DXILFlattenArrays.h
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
//===- DXILFlattenArrays.h - Perform flattening of DXIL Arrays -*- C++ -*-===//
|
||||||
|
//
|
||||||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||||
|
//
|
||||||
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
#ifndef LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H
|
||||||
|
#define LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H
|
||||||
|
|
||||||
|
#include "llvm/IR/PassManager.h"
|
||||||
|
|
||||||
|
namespace llvm {
|
||||||
|
|
||||||
|
/// A pass that transforms multidimensional arrays into one-dimensional arrays.
|
||||||
|
class DXILFlattenArrays : public PassInfoMixin<DXILFlattenArrays> {
|
||||||
|
public:
|
||||||
|
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
|
||||||
|
};
|
||||||
|
} // namespace llvm
|
||||||
|
|
||||||
|
#endif // LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H
|
@ -40,6 +40,12 @@ void initializeDXILDataScalarizationLegacyPass(PassRegistry &);
|
|||||||
/// Pass to scalarize llvm global data into a DXIL legal form
|
/// Pass to scalarize llvm global data into a DXIL legal form
|
||||||
ModulePass *createDXILDataScalarizationLegacyPass();
|
ModulePass *createDXILDataScalarizationLegacyPass();
|
||||||
|
|
||||||
|
/// Initializer for DXIL Array Flatten Pass
|
||||||
|
void initializeDXILFlattenArraysLegacyPass(PassRegistry &);
|
||||||
|
|
||||||
|
/// Pass to flatten arrays into a one dimensional DXIL legal form
|
||||||
|
ModulePass *createDXILFlattenArraysLegacyPass();
|
||||||
|
|
||||||
/// Initializer for DXILOpLowering
|
/// Initializer for DXILOpLowering
|
||||||
void initializeDXILOpLoweringLegacyPass(PassRegistry &);
|
void initializeDXILOpLoweringLegacyPass(PassRegistry &);
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ MODULE_ANALYSIS("dxil-resource-md", DXILResourceMDAnalysis())
|
|||||||
#define MODULE_PASS(NAME, CREATE_PASS)
|
#define MODULE_PASS(NAME, CREATE_PASS)
|
||||||
#endif
|
#endif
|
||||||
MODULE_PASS("dxil-data-scalarization", DXILDataScalarization())
|
MODULE_PASS("dxil-data-scalarization", DXILDataScalarization())
|
||||||
|
MODULE_PASS("dxil-flatten-arrays", DXILFlattenArrays())
|
||||||
MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion())
|
MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion())
|
||||||
MODULE_PASS("dxil-op-lower", DXILOpLowering())
|
MODULE_PASS("dxil-op-lower", DXILOpLowering())
|
||||||
MODULE_PASS("dxil-pretty-printer", DXILPrettyPrinterPass(dbgs()))
|
MODULE_PASS("dxil-pretty-printer", DXILPrettyPrinterPass(dbgs()))
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
|
|
||||||
#include "DirectXTargetMachine.h"
|
#include "DirectXTargetMachine.h"
|
||||||
#include "DXILDataScalarization.h"
|
#include "DXILDataScalarization.h"
|
||||||
|
#include "DXILFlattenArrays.h"
|
||||||
#include "DXILIntrinsicExpansion.h"
|
#include "DXILIntrinsicExpansion.h"
|
||||||
#include "DXILOpLowering.h"
|
#include "DXILOpLowering.h"
|
||||||
#include "DXILPrettyPrinter.h"
|
#include "DXILPrettyPrinter.h"
|
||||||
@ -48,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
|
|||||||
auto *PR = PassRegistry::getPassRegistry();
|
auto *PR = PassRegistry::getPassRegistry();
|
||||||
initializeDXILIntrinsicExpansionLegacyPass(*PR);
|
initializeDXILIntrinsicExpansionLegacyPass(*PR);
|
||||||
initializeDXILDataScalarizationLegacyPass(*PR);
|
initializeDXILDataScalarizationLegacyPass(*PR);
|
||||||
|
initializeDXILFlattenArraysLegacyPass(*PR);
|
||||||
initializeScalarizerLegacyPassPass(*PR);
|
initializeScalarizerLegacyPassPass(*PR);
|
||||||
initializeDXILPrepareModulePass(*PR);
|
initializeDXILPrepareModulePass(*PR);
|
||||||
initializeEmbedDXILPassPass(*PR);
|
initializeEmbedDXILPassPass(*PR);
|
||||||
@ -91,6 +93,7 @@ public:
|
|||||||
addPass(createDXILDataScalarizationLegacyPass());
|
addPass(createDXILDataScalarizationLegacyPass());
|
||||||
ScalarizerPassOptions DxilScalarOptions;
|
ScalarizerPassOptions DxilScalarOptions;
|
||||||
DxilScalarOptions.ScalarizeLoadStore = true;
|
DxilScalarOptions.ScalarizeLoadStore = true;
|
||||||
|
addPass(createDXILFlattenArraysLegacyPass());
|
||||||
addPass(createScalarizerPass(DxilScalarOptions));
|
addPass(createScalarizerPass(DxilScalarOptions));
|
||||||
addPass(createDXILOpLoweringLegacyPass());
|
addPass(createDXILOpLoweringLegacyPass());
|
||||||
addPass(createDXILFinalizeLinkageLegacyPass());
|
addPass(createDXILFinalizeLinkageLegacyPass());
|
||||||
|
188
llvm/test/CodeGen/DirectX/flatten-array.ll
Normal file
188
llvm/test/CodeGen/DirectX/flatten-array.ll
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
|
||||||
|
; RUN: opt -S -dxil-flatten-arrays %s | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK-LABEL: alloca_2d_test
|
||||||
|
define void @alloca_2d_test () {
|
||||||
|
; CHECK-NEXT: alloca [9 x i32], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%1 = alloca [3 x [3 x i32]], align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: alloca_3d_test
|
||||||
|
define void @alloca_3d_test () {
|
||||||
|
; CHECK-NEXT: alloca [8 x i32], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%1 = alloca [2 x[2 x [2 x i32]]], align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: alloca_4d_test
|
||||||
|
define void @alloca_4d_test () {
|
||||||
|
; CHECK-NEXT: alloca [16 x i32], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: gep_2d_test
|
||||||
|
define void @gep_2d_test () {
|
||||||
|
; CHECK: [[a:%.*]] = alloca [9 x i32], align 4
|
||||||
|
; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 {{[0-8]}}
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
%1 = alloca [3 x [3 x i32]], align 4
|
||||||
|
%g2d0 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 0
|
||||||
|
%g1d_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 0
|
||||||
|
%g1d_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 1
|
||||||
|
%g1d_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 2
|
||||||
|
%g2d1 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 1
|
||||||
|
%g1d1_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 0
|
||||||
|
%g1d1_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 1
|
||||||
|
%g1d1_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 2
|
||||||
|
%g2d2 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 2
|
||||||
|
%g1d2_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 0
|
||||||
|
%g1d2_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 1
|
||||||
|
%g1d2_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 2
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: gep_3d_test
|
||||||
|
define void @gep_3d_test () {
|
||||||
|
; CHECK: [[a:%.*]] = alloca [8 x i32], align 4
|
||||||
|
; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 {{[0-7]}}
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
%1 = alloca [2 x[2 x [2 x i32]]], align 4
|
||||||
|
%g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 0
|
||||||
|
%g2d0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 0
|
||||||
|
%g1d_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0, i32 0, i32 0
|
||||||
|
%g1d_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0, i32 0, i32 1
|
||||||
|
%g2d1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 1
|
||||||
|
%g1d1_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1, i32 0, i32 0
|
||||||
|
%g1d1_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1, i32 0, i32 1
|
||||||
|
%g3d1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 1
|
||||||
|
%g2d2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 0
|
||||||
|
%g1d2_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d2, i32 0, i32 0
|
||||||
|
%g1d2_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d2, i32 0, i32 1
|
||||||
|
%g2d3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 1
|
||||||
|
%g1d3_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d3, i32 0, i32 0
|
||||||
|
%g1d3_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d3, i32 0, i32 1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: gep_4d_test
|
||||||
|
define void @gep_4d_test () {
|
||||||
|
; CHECK: [[a:%.*]] = alloca [16 x i32], align 4
|
||||||
|
; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 {{[0-9]|1[0-5]}}
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
%1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4
|
||||||
|
%g4d0 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 0
|
||||||
|
%g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d0, i32 0, i32 0
|
||||||
|
%g2d0_0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 0
|
||||||
|
%g1d_0 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_0, i32 0, i32 0
|
||||||
|
%g1d_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_0, i32 0, i32 1
|
||||||
|
%g2d0_1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 1
|
||||||
|
%g1d_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_1, i32 0, i32 0
|
||||||
|
%g1d_3 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_1, i32 0, i32 1
|
||||||
|
%g3d1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d0, i32 0, i32 1
|
||||||
|
%g2d0_2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 0
|
||||||
|
%g1d_4 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_2, i32 0, i32 0
|
||||||
|
%g1d_5 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_2, i32 0, i32 1
|
||||||
|
%g2d1_2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 1
|
||||||
|
%g1d_6 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_2, i32 0, i32 0
|
||||||
|
%g1d_7 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_2, i32 0, i32 1
|
||||||
|
%g4d1 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 1
|
||||||
|
%g3d0_1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d1, i32 0, i32 0
|
||||||
|
%g2d0_3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0_1, i32 0, i32 0
|
||||||
|
%g1d_8 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_3, i32 0, i32 0
|
||||||
|
%g1d_9 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_3, i32 0, i32 1
|
||||||
|
%g2d0_4 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0_1, i32 0, i32 1
|
||||||
|
%g1d_10 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_4, i32 0, i32 0
|
||||||
|
%g1d_11 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_4, i32 0, i32 1
|
||||||
|
%g3d1_1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d1, i32 0, i32 1
|
||||||
|
%g2d0_5 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1_1, i32 0, i32 0
|
||||||
|
%g1d_12 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_5, i32 0, i32 0
|
||||||
|
%g1d_13 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_5, i32 0, i32 1
|
||||||
|
%g2d1_3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1_1, i32 0, i32 1
|
||||||
|
%g1d_14 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_3, i32 0, i32 0
|
||||||
|
%g1d_15 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_3, i32 0, i32 1
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@a = internal global [2 x [3 x [4 x i32]]] [[3 x [4 x i32]] [[4 x i32] [i32 0, i32 1, i32 2, i32 3],
|
||||||
|
[4 x i32] [i32 4, i32 5, i32 6, i32 7],
|
||||||
|
[4 x i32] [i32 8, i32 9, i32 10, i32 11]],
|
||||||
|
[3 x [4 x i32]] [[4 x i32] [i32 12, i32 13, i32 14, i32 15],
|
||||||
|
[4 x i32] [i32 16, i32 17, i32 18, i32 19],
|
||||||
|
[4 x i32] [i32 20, i32 21, i32 22, i32 23]]], align 4
|
||||||
|
|
||||||
|
@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
|
||||||
|
|
||||||
|
define void @global_gep_load() {
|
||||||
|
; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 6
|
||||||
|
; CHECK: load i32, ptr [[GEP_PTR]], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 0
|
||||||
|
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
|
||||||
|
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 2
|
||||||
|
%4 = load i32, i32* %3, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @global_gep_load_index(i32 %row, i32 %col, i32 %timeIndex) {
|
||||||
|
; CHECK-LABEL: define void @global_gep_load_index(
|
||||||
|
; CHECK-SAME: i32 [[ROW:%.*]], i32 [[COL:%.*]], i32 [[TIMEINDEX:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TIMEINDEX]], 1
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[COL]], 4
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[ROW]], 12
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
|
||||||
|
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP6]]
|
||||||
|
; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
|
||||||
|
; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
|
||||||
|
; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLAT]], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row
|
||||||
|
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 %col
|
||||||
|
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 %timeIndex
|
||||||
|
%4 = load i32, i32* %3, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
|
||||||
|
; CHECK-LABEL: define void @global_incomplete_gep_chain(
|
||||||
|
; CHECK-SAME: i32 [[ROW:%.*]], i32 [[COL:%.*]]) {
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[COL]], 1
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[ROW]], 3
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
|
||||||
|
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP4]]
|
||||||
|
; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
|
||||||
|
; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
|
||||||
|
; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFLAT]], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row
|
||||||
|
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 %col
|
||||||
|
%4 = load i32, i32* %2, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @global_gep_store() {
|
||||||
|
; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 13
|
||||||
|
; CHECK: store i32 1, ptr [[GEP_PTR]], align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @b, i32 0, i32 1
|
||||||
|
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 0
|
||||||
|
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 1
|
||||||
|
store i32 1, i32* %3, align 4
|
||||||
|
ret void
|
||||||
|
}
|
@ -9,6 +9,7 @@
|
|||||||
; CHECK-NEXT: ModulePass Manager
|
; CHECK-NEXT: ModulePass Manager
|
||||||
; CHECK-NEXT: DXIL Intrinsic Expansion
|
; CHECK-NEXT: DXIL Intrinsic Expansion
|
||||||
; CHECK-NEXT: DXIL Data Scalarization
|
; CHECK-NEXT: DXIL Data Scalarization
|
||||||
|
; CHECK-NEXT: DXIL Array Flattener
|
||||||
; CHECK-NEXT: FunctionPass Manager
|
; CHECK-NEXT: FunctionPass Manager
|
||||||
; CHECK-NEXT: Dominator Tree Construction
|
; CHECK-NEXT: Dominator Tree Construction
|
||||||
; CHECK-NEXT: Scalarize vector operations
|
; CHECK-NEXT: Scalarize vector operations
|
||||||
|
153
llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
Normal file
153
llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool llc --version 5
|
||||||
|
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||||
|
|
||||||
|
; Make sure we can load groupshared, static vectors and arrays of vectors
|
||||||
|
|
||||||
|
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
||||||
|
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
||||||
|
@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
|
||||||
|
@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16
|
||||||
|
|
||||||
|
; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [6 x float] zeroinitializer, align 16
|
||||||
|
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
||||||
|
; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4
|
||||||
|
; CHECK: @groushared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16
|
||||||
|
|
||||||
|
; CHECK-NOT: @arrayofVecData
|
||||||
|
; CHECK-NOT: @arrayofVecData.scalarized
|
||||||
|
; CHECK-NOT: @vecData
|
||||||
|
; CHECK-NOT: @staticArrayOfVecData
|
||||||
|
; CHECK-NOT: @staticArrayOfVecData.scalarized
|
||||||
|
; CHECK-NOT: @groushared2dArrayofVectors
|
||||||
|
; CHECK-NOT: @groushared2dArrayofVectors.scalarized
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x i32> @load_array_vec_test() #0 {
|
||||||
|
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
|
||||||
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 2) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [2 x [3 x float]], ptr addrspace(3) [[TMP9]], i32 0, i32 1
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1
|
||||||
|
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2
|
||||||
|
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3
|
||||||
|
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
|
||||||
|
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
|
||||||
|
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
|
||||||
|
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
|
||||||
|
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
|
||||||
|
; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0
|
||||||
|
; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1
|
||||||
|
; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2
|
||||||
|
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3
|
||||||
|
; CHECK-NEXT: ret <4 x i32> [[TMP16]]
|
||||||
|
;
|
||||||
|
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
|
||||||
|
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
|
||||||
|
%3 = add <4 x i32> %1, %2
|
||||||
|
ret <4 x i32> %3
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define <4 x i32> @load_vec_test() #0 {
|
||||||
|
; CHECK-LABEL: define <4 x i32> @load_vec_test(
|
||||||
|
; CHECK-SAME: ) #[[ATTR0]] {
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @vecData.scalarized to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
|
||||||
|
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
|
||||||
|
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i64 1
|
||||||
|
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i64 2
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i64 3
|
||||||
|
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
|
||||||
|
;
|
||||||
|
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||||
|
ret <4 x i32> %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
||||||
|
; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
|
||||||
|
; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
|
||||||
|
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]]
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||||
|
; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[TMP3]], i32 1
|
||||||
|
; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||||
|
; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[TMP4]], i32 2
|
||||||
|
; CHECK-NEXT: [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||||
|
; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[TMP5]], i32 3
|
||||||
|
; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
|
||||||
|
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
|
||||||
|
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i64 1
|
||||||
|
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i64 2
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i64 3
|
||||||
|
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
|
||||||
|
;
|
||||||
|
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
|
||||||
|
%4 = load <4 x i32>, <4 x i32>* %3, align 4
|
||||||
|
ret <4 x i32> %4
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x i32> @multid_load_test() #0 {
|
||||||
|
; CHECK-LABEL: define <4 x i32> @multid_load_test(
|
||||||
|
; CHECK-SAME: ) #[[ATTR0]] {
|
||||||
|
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x [3 x [4 x i32]]], ptr addrspace(3) [[TMP9]], i32 0, i32 1, i32 1
|
||||||
|
; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1
|
||||||
|
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2
|
||||||
|
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
|
||||||
|
; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||||
|
; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3
|
||||||
|
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
|
||||||
|
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
|
||||||
|
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
|
||||||
|
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
|
||||||
|
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
|
||||||
|
; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0
|
||||||
|
; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1
|
||||||
|
; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2
|
||||||
|
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3
|
||||||
|
; CHECK-NEXT: ret <4 x i32> [[TMP16]]
|
||||||
|
;
|
||||||
|
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
|
||||||
|
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
|
||||||
|
%3 = add <4 x i32> %1, %2
|
||||||
|
ret <4 x i32> %3
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { convergent norecurse nounwind "hlsl.export"}
|
30
llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll
Normal file
30
llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
; RUN: opt -S -passes='dxil-data-scalarization,scalarizer<load-store>,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||||
|
|
||||||
|
; Make sure we can store groupshared, static vectors and arrays of vectors
|
||||||
|
|
||||||
|
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
||||||
|
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
||||||
|
|
||||||
|
; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16
|
||||||
|
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
||||||
|
; CHECK-NOT: @arrayofVecData
|
||||||
|
; CHECK-NOT: @vecData
|
||||||
|
|
||||||
|
; CHECK-LABEL: store_array_vec_test
|
||||||
|
define void @store_array_vec_test () local_unnamed_addr #0 {
|
||||||
|
; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}}
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
|
||||||
|
store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK-LABEL: store_vec_test
|
||||||
|
define void @store_vec_test(<4 x i32> %inputVec) #0 {
|
||||||
|
; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { convergent norecurse nounwind "hlsl.export"}
|
@ -1,12 +1,15 @@
|
|||||||
; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
|
||||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||||
|
|
||||||
; Make sure we don't touch arrays without vectors and that can recurse multiple-dimension arrays of vectors
|
; Make sure we don't touch arrays without vectors and that can recurse and flatten multiple-dimension arrays of vectors
|
||||||
|
|
||||||
@staticArray = internal global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4
|
@staticArray = internal global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4
|
||||||
@"groushared3dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x <4 x i32>]]] zeroinitializer, align 16
|
@"groushared3dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x <4 x i32>]]] zeroinitializer, align 16
|
||||||
|
|
||||||
; CHECK @staticArray
|
; CHECK @staticArray
|
||||||
; CHECK-NOT: @staticArray.scalarized
|
; CHECK-NOT: @staticArray.scalarized
|
||||||
; CHECK: @groushared3dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x [4 x i32]]]] zeroinitializer, align 16
|
; CHECK-NOT: @staticArray.scalarized.1dim
|
||||||
|
; CHECK-NOT: @staticArray.1dim
|
||||||
|
|
||||||
|
; CHECK: @groushared3dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [108 x i32] zeroinitializer, align 16
|
||||||
|
; CHECK-NOT: @groushared3dArrayofVectors.scalarized
|
||||||
; CHECK-NOT: @groushared3dArrayofVectors
|
; CHECK-NOT: @groushared3dArrayofVectors
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
|
||||||
|
|
||||||
; Make sure we can load groupshared, static vectors and arrays of vectors
|
; Make sure we can load groupshared, static vectors and arrays of vectors
|
||||||
|
|
||||||
@ -22,7 +21,6 @@
|
|||||||
; CHECK-LABEL: load_array_vec_test
|
; CHECK-LABEL: load_array_vec_test
|
||||||
define <4 x i32> @load_array_vec_test() #0 {
|
define <4 x i32> @load_array_vec_test() #0 {
|
||||||
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4
|
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4
|
||||||
; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4
|
|
||||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
|
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
|
||||||
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
|
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
|
||||||
%3 = add <4 x i32> %1, %2
|
%3 = add <4 x i32> %1, %2
|
||||||
@ -32,7 +30,6 @@ define <4 x i32> @load_array_vec_test() #0 {
|
|||||||
; CHECK-LABEL: load_vec_test
|
; CHECK-LABEL: load_vec_test
|
||||||
define <4 x i32> @load_vec_test() #0 {
|
define <4 x i32> @load_vec_test() #0 {
|
||||||
; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}}
|
; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}}
|
||||||
; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4
|
|
||||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
|
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||||
ret <4 x i32> %1
|
ret <4 x i32> %1
|
||||||
}
|
}
|
||||||
@ -41,7 +38,6 @@ define <4 x i32> @load_vec_test() #0 {
|
|||||||
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
||||||
; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index
|
; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index
|
||||||
; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4
|
; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4
|
||||||
; CHECK-NOT: load i32, ptr {{.*}}, align 4
|
|
||||||
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
|
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
|
||||||
%4 = load <4 x i32>, <4 x i32>* %3, align 4
|
%4 = load <4 x i32>, <4 x i32>* %3, align 4
|
||||||
ret <4 x i32> %4
|
ret <4 x i32> %4
|
||||||
@ -50,7 +46,6 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
|||||||
; CHECK-LABEL: multid_load_test
|
; CHECK-LABEL: multid_load_test
|
||||||
define <4 x i32> @multid_load_test() #0 {
|
define <4 x i32> @multid_load_test() #0 {
|
||||||
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4
|
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4
|
||||||
; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4
|
|
||||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
|
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
|
||||||
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
|
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
|
||||||
%3 = add <4 x i32> %1, %2
|
%3 = add <4 x i32> %1, %2
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
; RUN: opt -S -passes='dxil-data-scalarization,scalarizer<load-store>,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
|
||||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||||
|
|
||||||
; Make sure we can store groupshared, static vectors and arrays of vectors
|
; Make sure we can store groupshared, static vectors and arrays of vectors
|
||||||
@ -6,15 +5,17 @@
|
|||||||
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
||||||
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
||||||
|
|
||||||
; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16
|
|
||||||
|
; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [6 x float] zeroinitializer, align 16
|
||||||
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
||||||
; CHECK-NOT: @arrayofVecData
|
; CHECK-NOT: @arrayofVecData
|
||||||
|
; CHECK-NOT: @arrayofVecData.scalarized
|
||||||
; CHECK-NOT: @vecData
|
; CHECK-NOT: @vecData
|
||||||
|
|
||||||
; CHECK-LABEL: store_array_vec_test
|
; CHECK-LABEL: store_array_vec_test
|
||||||
define void @store_array_vec_test () local_unnamed_addr #0 {
|
define void @store_array_vec_test () local_unnamed_addr #0 {
|
||||||
; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}}
|
; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.1dim.*|%.*)}}, align {{4|8|16}}
|
||||||
; CHECK-NOT: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}}
|
; CHECK-NEXT: ret void
|
||||||
store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
|
store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
|
||||||
store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16
|
store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16
|
||||||
ret void
|
ret void
|
||||||
@ -23,7 +24,7 @@ define void @store_array_vec_test () local_unnamed_addr #0 {
|
|||||||
; CHECK-LABEL: store_vec_test
|
; CHECK-LABEL: store_vec_test
|
||||||
define void @store_vec_test(<4 x i32> %inputVec) #0 {
|
define void @store_vec_test(<4 x i32> %inputVec) #0 {
|
||||||
; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4
|
; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4
|
||||||
; CHECK-NOT: store i32 %inputVec.{{.*}}, ptr addrspace(3)
|
; CHECK-NEXT: ret void
|
||||||
store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
|
store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user