[DirectX] Flatten arrays (#114332)
- Relevant piece is `DXILFlattenArrays.cpp` - Loads and Store Instruction visits are just for finding GetElementPtrConstantExpr and splitting them. - Allocas needed to be replaced with flattened allocas. - Global arrays were similar to allocas. Only interesting piece here is around initializers. - Most of the work went into building correct GEP chains. The approach here was a recursive strategy via `recursivelyCollectGEPs`. - All intermediary GEPs get marked for deletion and only the leaf GEPs get updated with the new index. fixes [89646](https://github.com/llvm/llvm-project/issues/89646)
This commit is contained in:
parent
de6d48d05d
commit
5ac624c823
@ -22,6 +22,7 @@ add_llvm_target(DirectXCodeGen
|
||||
DXContainerGlobals.cpp
|
||||
DXILDataScalarization.cpp
|
||||
DXILFinalizeLinkage.cpp
|
||||
DXILFlattenArrays.cpp
|
||||
DXILIntrinsicExpansion.cpp
|
||||
DXILOpBuilder.cpp
|
||||
DXILOpLowering.cpp
|
||||
|
443
llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
Normal file
443
llvm/lib/Target/DirectX/DXILFlattenArrays.cpp
Normal file
@ -0,0 +1,443 @@
|
||||
//===- DXILFlattenArrays.cpp - Flattens DXIL Arrays-----------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
///
|
||||
/// \file This file contains a pass to flatten arrays for the DirectX Backend.
|
||||
///
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "DXILFlattenArrays.h"
|
||||
#include "DirectX.h"
|
||||
#include "llvm/ADT/PostOrderIterator.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/Analysis/DXILResource.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/DerivedTypes.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/InstVisitor.h"
|
||||
#include "llvm/IR/ReplaceConstant.h"
|
||||
#include "llvm/Support/Casting.h"
|
||||
#include "llvm/Transforms/Utils/Local.h"
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
#define DEBUG_TYPE "dxil-flatten-arrays"
|
||||
|
||||
using namespace llvm;
|
||||
namespace {
|
||||
|
||||
class DXILFlattenArraysLegacy : public ModulePass {
|
||||
|
||||
public:
|
||||
bool runOnModule(Module &M) override;
|
||||
DXILFlattenArraysLegacy() : ModulePass(ID) {}
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override;
|
||||
static char ID; // Pass identification.
|
||||
};
|
||||
|
||||
struct GEPData {
|
||||
ArrayType *ParentArrayType;
|
||||
Value *ParendOperand;
|
||||
SmallVector<Value *> Indices;
|
||||
SmallVector<uint64_t> Dims;
|
||||
bool AllIndicesAreConstInt;
|
||||
};
|
||||
|
||||
class DXILFlattenArraysVisitor
|
||||
: public InstVisitor<DXILFlattenArraysVisitor, bool> {
|
||||
public:
|
||||
DXILFlattenArraysVisitor() {}
|
||||
bool visit(Function &F);
|
||||
// InstVisitor methods. They return true if the instruction was scalarized,
|
||||
// false if nothing changed.
|
||||
bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
|
||||
bool visitAllocaInst(AllocaInst &AI);
|
||||
bool visitInstruction(Instruction &I) { return false; }
|
||||
bool visitSelectInst(SelectInst &SI) { return false; }
|
||||
bool visitICmpInst(ICmpInst &ICI) { return false; }
|
||||
bool visitFCmpInst(FCmpInst &FCI) { return false; }
|
||||
bool visitUnaryOperator(UnaryOperator &UO) { return false; }
|
||||
bool visitBinaryOperator(BinaryOperator &BO) { return false; }
|
||||
bool visitCastInst(CastInst &CI) { return false; }
|
||||
bool visitBitCastInst(BitCastInst &BCI) { return false; }
|
||||
bool visitInsertElementInst(InsertElementInst &IEI) { return false; }
|
||||
bool visitExtractElementInst(ExtractElementInst &EEI) { return false; }
|
||||
bool visitShuffleVectorInst(ShuffleVectorInst &SVI) { return false; }
|
||||
bool visitPHINode(PHINode &PHI) { return false; }
|
||||
bool visitLoadInst(LoadInst &LI);
|
||||
bool visitStoreInst(StoreInst &SI);
|
||||
bool visitCallInst(CallInst &ICI) { return false; }
|
||||
bool visitFreezeInst(FreezeInst &FI) { return false; }
|
||||
static bool isMultiDimensionalArray(Type *T);
|
||||
static std::pair<unsigned, Type *> getElementCountAndType(Type *ArrayTy);
|
||||
|
||||
private:
|
||||
SmallVector<WeakTrackingVH> PotentiallyDeadInstrs;
|
||||
DenseMap<GetElementPtrInst *, GEPData> GEPChainMap;
|
||||
bool finish();
|
||||
ConstantInt *genConstFlattenIndices(ArrayRef<Value *> Indices,
|
||||
ArrayRef<uint64_t> Dims,
|
||||
IRBuilder<> &Builder);
|
||||
Value *genInstructionFlattenIndices(ArrayRef<Value *> Indices,
|
||||
ArrayRef<uint64_t> Dims,
|
||||
IRBuilder<> &Builder);
|
||||
void
|
||||
recursivelyCollectGEPs(GetElementPtrInst &CurrGEP,
|
||||
ArrayType *FlattenedArrayType, Value *PtrOperand,
|
||||
unsigned &GEPChainUseCount,
|
||||
SmallVector<Value *> Indices = SmallVector<Value *>(),
|
||||
SmallVector<uint64_t> Dims = SmallVector<uint64_t>(),
|
||||
bool AllIndicesAreConstInt = true);
|
||||
bool visitGetElementPtrInstInGEPChain(GetElementPtrInst &GEP);
|
||||
bool visitGetElementPtrInstInGEPChainBase(GEPData &GEPInfo,
|
||||
GetElementPtrInst &GEP);
|
||||
};
|
||||
} // namespace
|
||||
|
||||
bool DXILFlattenArraysVisitor::finish() {
|
||||
RecursivelyDeleteTriviallyDeadInstructionsPermissive(PotentiallyDeadInstrs);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::isMultiDimensionalArray(Type *T) {
|
||||
if (ArrayType *ArrType = dyn_cast<ArrayType>(T))
|
||||
return isa<ArrayType>(ArrType->getElementType());
|
||||
return false;
|
||||
}
|
||||
|
||||
std::pair<unsigned, Type *>
|
||||
DXILFlattenArraysVisitor::getElementCountAndType(Type *ArrayTy) {
|
||||
unsigned TotalElements = 1;
|
||||
Type *CurrArrayTy = ArrayTy;
|
||||
while (auto *InnerArrayTy = dyn_cast<ArrayType>(CurrArrayTy)) {
|
||||
TotalElements *= InnerArrayTy->getNumElements();
|
||||
CurrArrayTy = InnerArrayTy->getElementType();
|
||||
}
|
||||
return std::make_pair(TotalElements, CurrArrayTy);
|
||||
}
|
||||
|
||||
ConstantInt *DXILFlattenArraysVisitor::genConstFlattenIndices(
|
||||
ArrayRef<Value *> Indices, ArrayRef<uint64_t> Dims, IRBuilder<> &Builder) {
|
||||
assert(Indices.size() == Dims.size() &&
|
||||
"Indicies and dimmensions should be the same");
|
||||
unsigned FlatIndex = 0;
|
||||
unsigned Multiplier = 1;
|
||||
|
||||
for (int I = Indices.size() - 1; I >= 0; --I) {
|
||||
unsigned DimSize = Dims[I];
|
||||
ConstantInt *CIndex = dyn_cast<ConstantInt>(Indices[I]);
|
||||
assert(CIndex && "This function expects all indicies to be ConstantInt");
|
||||
FlatIndex += CIndex->getZExtValue() * Multiplier;
|
||||
Multiplier *= DimSize;
|
||||
}
|
||||
return Builder.getInt32(FlatIndex);
|
||||
}
|
||||
|
||||
Value *DXILFlattenArraysVisitor::genInstructionFlattenIndices(
|
||||
ArrayRef<Value *> Indices, ArrayRef<uint64_t> Dims, IRBuilder<> &Builder) {
|
||||
if (Indices.size() == 1)
|
||||
return Indices[0];
|
||||
|
||||
Value *FlatIndex = Builder.getInt32(0);
|
||||
unsigned Multiplier = 1;
|
||||
|
||||
for (int I = Indices.size() - 1; I >= 0; --I) {
|
||||
unsigned DimSize = Dims[I];
|
||||
Value *VMultiplier = Builder.getInt32(Multiplier);
|
||||
Value *ScaledIndex = Builder.CreateMul(Indices[I], VMultiplier);
|
||||
FlatIndex = Builder.CreateAdd(FlatIndex, ScaledIndex);
|
||||
Multiplier *= DimSize;
|
||||
}
|
||||
return FlatIndex;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::visitLoadInst(LoadInst &LI) {
|
||||
unsigned NumOperands = LI.getNumOperands();
|
||||
for (unsigned I = 0; I < NumOperands; ++I) {
|
||||
Value *CurrOpperand = LI.getOperand(I);
|
||||
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
|
||||
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
|
||||
convertUsersOfConstantsToInstructions(CE,
|
||||
/*RestrictToFunc=*/nullptr,
|
||||
/*RemoveDeadConstants=*/false,
|
||||
/*IncludeSelf=*/true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::visitStoreInst(StoreInst &SI) {
|
||||
unsigned NumOperands = SI.getNumOperands();
|
||||
for (unsigned I = 0; I < NumOperands; ++I) {
|
||||
Value *CurrOpperand = SI.getOperand(I);
|
||||
ConstantExpr *CE = dyn_cast<ConstantExpr>(CurrOpperand);
|
||||
if (CE && CE->getOpcode() == Instruction::GetElementPtr) {
|
||||
convertUsersOfConstantsToInstructions(CE,
|
||||
/*RestrictToFunc=*/nullptr,
|
||||
/*RemoveDeadConstants=*/false,
|
||||
/*IncludeSelf=*/true);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::visitAllocaInst(AllocaInst &AI) {
|
||||
if (!isMultiDimensionalArray(AI.getAllocatedType()))
|
||||
return false;
|
||||
|
||||
ArrayType *ArrType = cast<ArrayType>(AI.getAllocatedType());
|
||||
IRBuilder<> Builder(&AI);
|
||||
auto [TotalElements, BaseType] = getElementCountAndType(ArrType);
|
||||
|
||||
ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements);
|
||||
AllocaInst *FlatAlloca =
|
||||
Builder.CreateAlloca(FattenedArrayType, nullptr, AI.getName() + ".flat");
|
||||
FlatAlloca->setAlignment(AI.getAlign());
|
||||
AI.replaceAllUsesWith(FlatAlloca);
|
||||
AI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
void DXILFlattenArraysVisitor::recursivelyCollectGEPs(
|
||||
GetElementPtrInst &CurrGEP, ArrayType *FlattenedArrayType,
|
||||
Value *PtrOperand, unsigned &GEPChainUseCount, SmallVector<Value *> Indices,
|
||||
SmallVector<uint64_t> Dims, bool AllIndicesAreConstInt) {
|
||||
Value *LastIndex = CurrGEP.getOperand(CurrGEP.getNumOperands() - 1);
|
||||
AllIndicesAreConstInt &= isa<ConstantInt>(LastIndex);
|
||||
Indices.push_back(LastIndex);
|
||||
assert(isa<ArrayType>(CurrGEP.getSourceElementType()));
|
||||
Dims.push_back(
|
||||
cast<ArrayType>(CurrGEP.getSourceElementType())->getNumElements());
|
||||
bool IsMultiDimArr = isMultiDimensionalArray(CurrGEP.getSourceElementType());
|
||||
if (!IsMultiDimArr) {
|
||||
assert(GEPChainUseCount < FlattenedArrayType->getNumElements());
|
||||
GEPChainMap.insert(
|
||||
{&CurrGEP,
|
||||
{std::move(FlattenedArrayType), PtrOperand, std::move(Indices),
|
||||
std::move(Dims), AllIndicesAreConstInt}});
|
||||
return;
|
||||
}
|
||||
bool GepUses = false;
|
||||
for (auto *User : CurrGEP.users()) {
|
||||
if (GetElementPtrInst *NestedGEP = dyn_cast<GetElementPtrInst>(User)) {
|
||||
recursivelyCollectGEPs(*NestedGEP, FlattenedArrayType, PtrOperand,
|
||||
++GEPChainUseCount, Indices, Dims,
|
||||
AllIndicesAreConstInt);
|
||||
GepUses = true;
|
||||
}
|
||||
}
|
||||
// This case is just incase the gep chain doesn't end with a 1d array.
|
||||
if (IsMultiDimArr && GEPChainUseCount > 0 && !GepUses) {
|
||||
GEPChainMap.insert(
|
||||
{&CurrGEP,
|
||||
{std::move(FlattenedArrayType), PtrOperand, std::move(Indices),
|
||||
std::move(Dims), AllIndicesAreConstInt}});
|
||||
}
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChain(
|
||||
GetElementPtrInst &GEP) {
|
||||
GEPData GEPInfo = GEPChainMap.at(&GEP);
|
||||
return visitGetElementPtrInstInGEPChainBase(GEPInfo, GEP);
|
||||
}
|
||||
bool DXILFlattenArraysVisitor::visitGetElementPtrInstInGEPChainBase(
|
||||
GEPData &GEPInfo, GetElementPtrInst &GEP) {
|
||||
IRBuilder<> Builder(&GEP);
|
||||
Value *FlatIndex;
|
||||
if (GEPInfo.AllIndicesAreConstInt)
|
||||
FlatIndex = genConstFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder);
|
||||
else
|
||||
FlatIndex =
|
||||
genInstructionFlattenIndices(GEPInfo.Indices, GEPInfo.Dims, Builder);
|
||||
|
||||
ArrayType *FlattenedArrayType = GEPInfo.ParentArrayType;
|
||||
Value *FlatGEP =
|
||||
Builder.CreateGEP(FlattenedArrayType, GEPInfo.ParendOperand, FlatIndex,
|
||||
GEP.getName() + ".flat", GEP.isInBounds());
|
||||
|
||||
GEP.replaceAllUsesWith(FlatGEP);
|
||||
GEP.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::visitGetElementPtrInst(GetElementPtrInst &GEP) {
|
||||
auto It = GEPChainMap.find(&GEP);
|
||||
if (It != GEPChainMap.end())
|
||||
return visitGetElementPtrInstInGEPChain(GEP);
|
||||
if (!isMultiDimensionalArray(GEP.getSourceElementType()))
|
||||
return false;
|
||||
|
||||
ArrayType *ArrType = cast<ArrayType>(GEP.getSourceElementType());
|
||||
IRBuilder<> Builder(&GEP);
|
||||
auto [TotalElements, BaseType] = getElementCountAndType(ArrType);
|
||||
ArrayType *FlattenedArrayType = ArrayType::get(BaseType, TotalElements);
|
||||
|
||||
Value *PtrOperand = GEP.getPointerOperand();
|
||||
|
||||
unsigned GEPChainUseCount = 0;
|
||||
recursivelyCollectGEPs(GEP, FlattenedArrayType, PtrOperand, GEPChainUseCount);
|
||||
|
||||
// NOTE: hasNUses(0) is not the same as GEPChainUseCount == 0.
|
||||
// Here recursion is used to get the length of the GEP chain.
|
||||
// Handle zero uses here because there won't be an update via
|
||||
// a child in the chain later.
|
||||
if (GEPChainUseCount == 0) {
|
||||
SmallVector<Value *> Indices({GEP.getOperand(GEP.getNumOperands() - 1)});
|
||||
SmallVector<uint64_t> Dims({ArrType->getNumElements()});
|
||||
bool AllIndicesAreConstInt = isa<ConstantInt>(Indices[0]);
|
||||
GEPData GEPInfo{std::move(FlattenedArrayType), PtrOperand,
|
||||
std::move(Indices), std::move(Dims), AllIndicesAreConstInt};
|
||||
return visitGetElementPtrInstInGEPChainBase(GEPInfo, GEP);
|
||||
}
|
||||
|
||||
PotentiallyDeadInstrs.emplace_back(&GEP);
|
||||
return false;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysVisitor::visit(Function &F) {
|
||||
bool MadeChange = false;
|
||||
ReversePostOrderTraversal<Function *> RPOT(&F);
|
||||
for (BasicBlock *BB : make_early_inc_range(RPOT)) {
|
||||
for (Instruction &I : make_early_inc_range(*BB))
|
||||
MadeChange |= InstVisitor::visit(I);
|
||||
}
|
||||
finish();
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
static void collectElements(Constant *Init,
|
||||
SmallVectorImpl<Constant *> &Elements) {
|
||||
// Base case: If Init is not an array, add it directly to the vector.
|
||||
if (!isa<ArrayType>(Init->getType())) {
|
||||
Elements.push_back(Init);
|
||||
return;
|
||||
}
|
||||
|
||||
// Recursive case: Process each element in the array.
|
||||
if (auto *ArrayConstant = dyn_cast<ConstantArray>(Init)) {
|
||||
for (unsigned I = 0; I < ArrayConstant->getNumOperands(); ++I) {
|
||||
collectElements(ArrayConstant->getOperand(I), Elements);
|
||||
}
|
||||
} else if (auto *DataArrayConstant = dyn_cast<ConstantDataArray>(Init)) {
|
||||
for (unsigned I = 0; I < DataArrayConstant->getNumElements(); ++I) {
|
||||
collectElements(DataArrayConstant->getElementAsConstant(I), Elements);
|
||||
}
|
||||
} else {
|
||||
llvm_unreachable(
|
||||
"Expected a ConstantArray or ConstantDataArray for array initializer!");
|
||||
}
|
||||
}
|
||||
|
||||
static Constant *transformInitializer(Constant *Init, Type *OrigType,
|
||||
ArrayType *FlattenedType,
|
||||
LLVMContext &Ctx) {
|
||||
// Handle ConstantAggregateZero (zero-initialized constants)
|
||||
if (isa<ConstantAggregateZero>(Init))
|
||||
return ConstantAggregateZero::get(FlattenedType);
|
||||
|
||||
// Handle UndefValue (undefined constants)
|
||||
if (isa<UndefValue>(Init))
|
||||
return UndefValue::get(FlattenedType);
|
||||
|
||||
if (!isa<ArrayType>(OrigType))
|
||||
return Init;
|
||||
|
||||
SmallVector<Constant *> FlattenedElements;
|
||||
collectElements(Init, FlattenedElements);
|
||||
assert(FlattenedType->getNumElements() == FlattenedElements.size() &&
|
||||
"The number of collected elements should match the FlattenedType");
|
||||
return ConstantArray::get(FlattenedType, FlattenedElements);
|
||||
}
|
||||
|
||||
static void
|
||||
flattenGlobalArrays(Module &M,
|
||||
DenseMap<GlobalVariable *, GlobalVariable *> &GlobalMap) {
|
||||
LLVMContext &Ctx = M.getContext();
|
||||
for (GlobalVariable &G : M.globals()) {
|
||||
Type *OrigType = G.getValueType();
|
||||
if (!DXILFlattenArraysVisitor::isMultiDimensionalArray(OrigType))
|
||||
continue;
|
||||
|
||||
ArrayType *ArrType = cast<ArrayType>(OrigType);
|
||||
auto [TotalElements, BaseType] =
|
||||
DXILFlattenArraysVisitor::getElementCountAndType(ArrType);
|
||||
ArrayType *FattenedArrayType = ArrayType::get(BaseType, TotalElements);
|
||||
|
||||
// Create a new global variable with the updated type
|
||||
// Note: Initializer is set via transformInitializer
|
||||
GlobalVariable *NewGlobal =
|
||||
new GlobalVariable(M, FattenedArrayType, G.isConstant(), G.getLinkage(),
|
||||
/*Initializer=*/nullptr, G.getName() + ".1dim", &G,
|
||||
G.getThreadLocalMode(), G.getAddressSpace(),
|
||||
G.isExternallyInitialized());
|
||||
|
||||
// Copy relevant attributes
|
||||
NewGlobal->setUnnamedAddr(G.getUnnamedAddr());
|
||||
if (G.getAlignment() > 0) {
|
||||
NewGlobal->setAlignment(G.getAlign());
|
||||
}
|
||||
|
||||
if (G.hasInitializer()) {
|
||||
Constant *Init = G.getInitializer();
|
||||
Constant *NewInit =
|
||||
transformInitializer(Init, OrigType, FattenedArrayType, Ctx);
|
||||
NewGlobal->setInitializer(NewInit);
|
||||
}
|
||||
GlobalMap[&G] = NewGlobal;
|
||||
}
|
||||
}
|
||||
|
||||
static bool flattenArrays(Module &M) {
|
||||
bool MadeChange = false;
|
||||
DXILFlattenArraysVisitor Impl;
|
||||
DenseMap<GlobalVariable *, GlobalVariable *> GlobalMap;
|
||||
flattenGlobalArrays(M, GlobalMap);
|
||||
for (auto &F : make_early_inc_range(M.functions())) {
|
||||
if (F.isIntrinsic())
|
||||
continue;
|
||||
MadeChange |= Impl.visit(F);
|
||||
}
|
||||
for (auto &[Old, New] : GlobalMap) {
|
||||
Old->replaceAllUsesWith(New);
|
||||
Old->eraseFromParent();
|
||||
MadeChange = true;
|
||||
}
|
||||
return MadeChange;
|
||||
}
|
||||
|
||||
PreservedAnalyses DXILFlattenArrays::run(Module &M, ModuleAnalysisManager &) {
|
||||
bool MadeChanges = flattenArrays(M);
|
||||
if (!MadeChanges)
|
||||
return PreservedAnalyses::all();
|
||||
PreservedAnalyses PA;
|
||||
PA.preserve<DXILResourceAnalysis>();
|
||||
return PA;
|
||||
}
|
||||
|
||||
bool DXILFlattenArraysLegacy::runOnModule(Module &M) {
|
||||
return flattenArrays(M);
|
||||
}
|
||||
|
||||
void DXILFlattenArraysLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.addPreserved<DXILResourceWrapperPass>();
|
||||
}
|
||||
|
||||
char DXILFlattenArraysLegacy::ID = 0;
|
||||
|
||||
INITIALIZE_PASS_BEGIN(DXILFlattenArraysLegacy, DEBUG_TYPE,
|
||||
"DXIL Array Flattener", false, false)
|
||||
INITIALIZE_PASS_END(DXILFlattenArraysLegacy, DEBUG_TYPE, "DXIL Array Flattener",
|
||||
false, false)
|
||||
|
||||
ModulePass *llvm::createDXILFlattenArraysLegacyPass() {
|
||||
return new DXILFlattenArraysLegacy();
|
||||
}
|
23
llvm/lib/Target/DirectX/DXILFlattenArrays.h
Normal file
23
llvm/lib/Target/DirectX/DXILFlattenArrays.h
Normal file
@ -0,0 +1,23 @@
|
||||
//===- DXILFlattenArrays.h - Perform flattening of DXIL Arrays -*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H
|
||||
#define LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H
|
||||
|
||||
#include "llvm/IR/PassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// A pass that transforms multidimensional arrays into one-dimensional arrays.
|
||||
class DXILFlattenArrays : public PassInfoMixin<DXILFlattenArrays> {
|
||||
public:
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif // LLVM_TARGET_DIRECTX_DXILFLATTENARRAYS_H
|
@ -40,6 +40,12 @@ void initializeDXILDataScalarizationLegacyPass(PassRegistry &);
|
||||
/// Pass to scalarize llvm global data into a DXIL legal form
|
||||
ModulePass *createDXILDataScalarizationLegacyPass();
|
||||
|
||||
/// Initializer for DXIL Array Flatten Pass
|
||||
void initializeDXILFlattenArraysLegacyPass(PassRegistry &);
|
||||
|
||||
/// Pass to flatten arrays into a one dimensional DXIL legal form
|
||||
ModulePass *createDXILFlattenArraysLegacyPass();
|
||||
|
||||
/// Initializer for DXILOpLowering
|
||||
void initializeDXILOpLoweringLegacyPass(PassRegistry &);
|
||||
|
||||
|
@ -24,6 +24,7 @@ MODULE_ANALYSIS("dxil-resource-md", DXILResourceMDAnalysis())
|
||||
#define MODULE_PASS(NAME, CREATE_PASS)
|
||||
#endif
|
||||
MODULE_PASS("dxil-data-scalarization", DXILDataScalarization())
|
||||
MODULE_PASS("dxil-flatten-arrays", DXILFlattenArrays())
|
||||
MODULE_PASS("dxil-intrinsic-expansion", DXILIntrinsicExpansion())
|
||||
MODULE_PASS("dxil-op-lower", DXILOpLowering())
|
||||
MODULE_PASS("dxil-pretty-printer", DXILPrettyPrinterPass(dbgs()))
|
||||
|
@ -13,6 +13,7 @@
|
||||
|
||||
#include "DirectXTargetMachine.h"
|
||||
#include "DXILDataScalarization.h"
|
||||
#include "DXILFlattenArrays.h"
|
||||
#include "DXILIntrinsicExpansion.h"
|
||||
#include "DXILOpLowering.h"
|
||||
#include "DXILPrettyPrinter.h"
|
||||
@ -48,6 +49,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeDirectXTarget() {
|
||||
auto *PR = PassRegistry::getPassRegistry();
|
||||
initializeDXILIntrinsicExpansionLegacyPass(*PR);
|
||||
initializeDXILDataScalarizationLegacyPass(*PR);
|
||||
initializeDXILFlattenArraysLegacyPass(*PR);
|
||||
initializeScalarizerLegacyPassPass(*PR);
|
||||
initializeDXILPrepareModulePass(*PR);
|
||||
initializeEmbedDXILPassPass(*PR);
|
||||
@ -91,6 +93,7 @@ public:
|
||||
addPass(createDXILDataScalarizationLegacyPass());
|
||||
ScalarizerPassOptions DxilScalarOptions;
|
||||
DxilScalarOptions.ScalarizeLoadStore = true;
|
||||
addPass(createDXILFlattenArraysLegacyPass());
|
||||
addPass(createScalarizerPass(DxilScalarOptions));
|
||||
addPass(createDXILOpLoweringLegacyPass());
|
||||
addPass(createDXILFinalizeLinkageLegacyPass());
|
||||
|
188
llvm/test/CodeGen/DirectX/flatten-array.ll
Normal file
188
llvm/test/CodeGen/DirectX/flatten-array.ll
Normal file
@ -0,0 +1,188 @@
|
||||
|
||||
; RUN: opt -S -dxil-flatten-arrays %s | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: alloca_2d_test
|
||||
define void @alloca_2d_test () {
|
||||
; CHECK-NEXT: alloca [9 x i32], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = alloca [3 x [3 x i32]], align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: alloca_3d_test
|
||||
define void @alloca_3d_test () {
|
||||
; CHECK-NEXT: alloca [8 x i32], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = alloca [2 x[2 x [2 x i32]]], align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: alloca_4d_test
|
||||
define void @alloca_4d_test () {
|
||||
; CHECK-NEXT: alloca [16 x i32], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: gep_2d_test
|
||||
define void @gep_2d_test () {
|
||||
; CHECK: [[a:%.*]] = alloca [9 x i32], align 4
|
||||
; CHECK-COUNT-9: getelementptr inbounds [9 x i32], ptr [[a]], i32 {{[0-8]}}
|
||||
; CHECK-NEXT: ret void
|
||||
%1 = alloca [3 x [3 x i32]], align 4
|
||||
%g2d0 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 0
|
||||
%g1d_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 0
|
||||
%g1d_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 1
|
||||
%g1d_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d0, i32 0, i32 2
|
||||
%g2d1 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 1
|
||||
%g1d1_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 0
|
||||
%g1d1_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 1
|
||||
%g1d1_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d1, i32 0, i32 2
|
||||
%g2d2 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* %1, i32 0, i32 2
|
||||
%g1d2_1 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 0
|
||||
%g1d2_2 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 1
|
||||
%g1d2_3 = getelementptr inbounds [3 x i32], [3 x i32]* %g2d2, i32 0, i32 2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: gep_3d_test
|
||||
define void @gep_3d_test () {
|
||||
; CHECK: [[a:%.*]] = alloca [8 x i32], align 4
|
||||
; CHECK-COUNT-8: getelementptr inbounds [8 x i32], ptr [[a]], i32 {{[0-7]}}
|
||||
; CHECK-NEXT: ret void
|
||||
%1 = alloca [2 x[2 x [2 x i32]]], align 4
|
||||
%g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 0
|
||||
%g2d0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 0
|
||||
%g1d_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0, i32 0, i32 0
|
||||
%g1d_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0, i32 0, i32 1
|
||||
%g2d1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 1
|
||||
%g1d1_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1, i32 0, i32 0
|
||||
%g1d1_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1, i32 0, i32 1
|
||||
%g3d1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %1, i32 0, i32 1
|
||||
%g2d2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 0
|
||||
%g1d2_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d2, i32 0, i32 0
|
||||
%g1d2_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d2, i32 0, i32 1
|
||||
%g2d3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 1
|
||||
%g1d3_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d3, i32 0, i32 0
|
||||
%g1d3_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d3, i32 0, i32 1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: gep_4d_test
|
||||
define void @gep_4d_test () {
|
||||
; CHECK: [[a:%.*]] = alloca [16 x i32], align 4
|
||||
; CHECK-COUNT-16: getelementptr inbounds [16 x i32], ptr [[a]], i32 {{[0-9]|1[0-5]}}
|
||||
; CHECK-NEXT: ret void
|
||||
%1 = alloca [2x[2 x[2 x [2 x i32]]]], align 4
|
||||
%g4d0 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 0
|
||||
%g3d0 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d0, i32 0, i32 0
|
||||
%g2d0_0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 0
|
||||
%g1d_0 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_0, i32 0, i32 0
|
||||
%g1d_1 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_0, i32 0, i32 1
|
||||
%g2d0_1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0, i32 0, i32 1
|
||||
%g1d_2 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_1, i32 0, i32 0
|
||||
%g1d_3 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_1, i32 0, i32 1
|
||||
%g3d1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d0, i32 0, i32 1
|
||||
%g2d0_2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 0
|
||||
%g1d_4 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_2, i32 0, i32 0
|
||||
%g1d_5 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_2, i32 0, i32 1
|
||||
%g2d1_2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1, i32 0, i32 1
|
||||
%g1d_6 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_2, i32 0, i32 0
|
||||
%g1d_7 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_2, i32 0, i32 1
|
||||
%g4d1 = getelementptr inbounds [2x[2 x[2 x [2 x i32]]]], [2x[2 x[2 x [2 x i32]]]]* %1, i32 0, i32 1
|
||||
%g3d0_1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d1, i32 0, i32 0
|
||||
%g2d0_3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0_1, i32 0, i32 0
|
||||
%g1d_8 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_3, i32 0, i32 0
|
||||
%g1d_9 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_3, i32 0, i32 1
|
||||
%g2d0_4 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d0_1, i32 0, i32 1
|
||||
%g1d_10 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_4, i32 0, i32 0
|
||||
%g1d_11 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_4, i32 0, i32 1
|
||||
%g3d1_1 = getelementptr inbounds [2 x[2 x [2 x i32]]], [2 x[2 x [2 x i32]]]* %g4d1, i32 0, i32 1
|
||||
%g2d0_5 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1_1, i32 0, i32 0
|
||||
%g1d_12 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_5, i32 0, i32 0
|
||||
%g1d_13 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d0_5, i32 0, i32 1
|
||||
%g2d1_3 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %g3d1_1, i32 0, i32 1
|
||||
%g1d_14 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_3, i32 0, i32 0
|
||||
%g1d_15 = getelementptr inbounds [2 x i32], [2 x i32]* %g2d1_3, i32 0, i32 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
@a = internal global [2 x [3 x [4 x i32]]] [[3 x [4 x i32]] [[4 x i32] [i32 0, i32 1, i32 2, i32 3],
|
||||
[4 x i32] [i32 4, i32 5, i32 6, i32 7],
|
||||
[4 x i32] [i32 8, i32 9, i32 10, i32 11]],
|
||||
[3 x [4 x i32]] [[4 x i32] [i32 12, i32 13, i32 14, i32 15],
|
||||
[4 x i32] [i32 16, i32 17, i32 18, i32 19],
|
||||
[4 x i32] [i32 20, i32 21, i32 22, i32 23]]], align 4
|
||||
|
||||
@b = internal global [2 x [3 x [4 x i32]]] zeroinitializer, align 16
|
||||
|
||||
define void @global_gep_load() {
|
||||
; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 6
|
||||
; CHECK: load i32, ptr [[GEP_PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 0
|
||||
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 1
|
||||
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 2
|
||||
%4 = load i32, i32* %3, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @global_gep_load_index(i32 %row, i32 %col, i32 %timeIndex) {
|
||||
; CHECK-LABEL: define void @global_gep_load_index(
|
||||
; CHECK-SAME: i32 [[ROW:%.*]], i32 [[COL:%.*]], i32 [[TIMEINDEX:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TIMEINDEX]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[COL]], 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[ROW]], 12
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP4]], [[TMP5]]
|
||||
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP6]]
|
||||
; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
|
||||
; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
|
||||
; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTFLAT]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row
|
||||
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 %col
|
||||
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 %timeIndex
|
||||
%4 = load i32, i32* %3, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @global_incomplete_gep_chain(i32 %row, i32 %col) {
|
||||
; CHECK-LABEL: define void @global_incomplete_gep_chain(
|
||||
; CHECK-SAME: i32 [[ROW:%.*]], i32 [[COL:%.*]]) {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[COL]], 1
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = add i32 0, [[TMP1]]
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[ROW]], 3
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP2]], [[TMP3]]
|
||||
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr inbounds [24 x i32], ptr @a.1dim, i32 [[TMP4]]
|
||||
; CHECK-NOT: getelementptr inbounds [2 x [3 x [4 x i32]]]{{.*}}
|
||||
; CHECK-NOT: getelementptr inbounds [3 x [4 x i32]]{{.*}}
|
||||
; CHECK-NOT: getelementptr inbounds [4 x i32]{{.*}}
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTFLAT]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @a, i32 0, i32 %row
|
||||
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 %col
|
||||
%4 = load i32, i32* %2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @global_gep_store() {
|
||||
; CHECK: [[GEP_PTR:%.*]] = getelementptr inbounds [24 x i32], ptr @b.1dim, i32 13
|
||||
; CHECK: store i32 1, ptr [[GEP_PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
%1 = getelementptr inbounds [2 x [3 x [4 x i32]]], [2 x [3 x [4 x i32]]]* @b, i32 0, i32 1
|
||||
%2 = getelementptr inbounds [3 x [4 x i32]], [3 x [4 x i32]]* %1, i32 0, i32 0
|
||||
%3 = getelementptr inbounds [4 x i32], [4 x i32]* %2, i32 0, i32 1
|
||||
store i32 1, i32* %3, align 4
|
||||
ret void
|
||||
}
|
@ -9,6 +9,7 @@
|
||||
; CHECK-NEXT: ModulePass Manager
|
||||
; CHECK-NEXT: DXIL Intrinsic Expansion
|
||||
; CHECK-NEXT: DXIL Data Scalarization
|
||||
; CHECK-NEXT: DXIL Array Flattener
|
||||
; CHECK-NEXT: FunctionPass Manager
|
||||
; CHECK-NEXT: Dominator Tree Construction
|
||||
; CHECK-NEXT: Scalarize vector operations
|
||||
|
153
llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
Normal file
153
llvm/test/CodeGen/DirectX/llc-vector-load-scalarize.ll
Normal file
@ -0,0 +1,153 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool llc --version 5
|
||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||
|
||||
; Make sure we can load groupshared, static vectors and arrays of vectors
|
||||
|
||||
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
||||
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
||||
@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
|
||||
@"groushared2dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16
|
||||
|
||||
; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [6 x float] zeroinitializer, align 16
|
||||
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
||||
; CHECK: @staticArrayOfVecData.scalarized.1dim = internal global [12 x i32] [i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12], align 4
|
||||
; CHECK: @groushared2dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [36 x i32] zeroinitializer, align 16
|
||||
|
||||
; CHECK-NOT: @arrayofVecData
|
||||
; CHECK-NOT: @arrayofVecData.scalarized
|
||||
; CHECK-NOT: @vecData
|
||||
; CHECK-NOT: @staticArrayOfVecData
|
||||
; CHECK-NOT: @staticArrayOfVecData.scalarized
|
||||
; CHECK-NOT: @groushared2dArrayofVectors
|
||||
; CHECK-NOT: @groushared2dArrayofVectors.scalarized
|
||||
|
||||
|
||||
define <4 x i32> @load_array_vec_test() #0 {
|
||||
; CHECK-LABEL: define <4 x i32> @load_array_vec_test(
|
||||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 1) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 2) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @arrayofVecData.scalarized.1dim, i32 3) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @arrayofVecData.scalarized.1dim to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [2 x [3 x float]], ptr addrspace(3) [[TMP9]], i32 0, i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1
|
||||
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2
|
||||
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3
|
||||
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
|
||||
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
|
||||
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
|
||||
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
|
||||
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
|
||||
; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0
|
||||
; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1
|
||||
; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP16]]
|
||||
;
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
|
||||
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
|
||||
%3 = add <4 x i32> %1, %2
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
|
||||
define <4 x i32> @load_vec_test() #0 {
|
||||
; CHECK-LABEL: define <4 x i32> @load_vec_test(
|
||||
; CHECK-SAME: ) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @vecData.scalarized to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 1) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 2) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @vecData.scalarized, i32 3) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[TMP4]], i64 1
|
||||
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[TMP6]], i64 2
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[TMP8]], i64 3
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP9]]
|
||||
;
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
||||
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
||||
; CHECK-LABEL: define <4 x i32> @load_static_array_of_vec_test(
|
||||
; CHECK-SAME: i32 [[INDEX:%.*]]) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[DOTFLAT:%.*]] = getelementptr [12 x i32], ptr @staticArrayOfVecData.scalarized.1dim, i32 [[INDEX]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||
; CHECK-NEXT: [[DOTFLAT_I1:%.*]] = getelementptr i32, ptr [[TMP3]], i32 1
|
||||
; CHECK-NEXT: [[DOTI1:%.*]] = load i32, ptr [[DOTFLAT_I1]], align 4
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||
; CHECK-NEXT: [[DOTFLAT_I2:%.*]] = getelementptr i32, ptr [[TMP4]], i32 2
|
||||
; CHECK-NEXT: [[DOTI2:%.*]] = load i32, ptr [[DOTFLAT_I2]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr [[DOTFLAT]] to ptr
|
||||
; CHECK-NEXT: [[DOTFLAT_I3:%.*]] = getelementptr i32, ptr [[TMP5]], i32 3
|
||||
; CHECK-NEXT: [[DOTI3:%.*]] = load i32, ptr [[DOTFLAT_I3]], align 4
|
||||
; CHECK-NEXT: [[DOTUPTO0:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
|
||||
; CHECK-NEXT: [[DOTUPTO1:%.*]] = insertelement <4 x i32> [[DOTUPTO0]], i32 [[DOTI1]], i64 1
|
||||
; CHECK-NEXT: [[DOTUPTO2:%.*]] = insertelement <4 x i32> [[DOTUPTO1]], i32 [[DOTI2]], i64 2
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[DOTUPTO2]], i32 [[DOTI3]], i64 3
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP6]]
|
||||
;
|
||||
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
|
||||
%4 = load <4 x i32>, <4 x i32>* %3, align 4
|
||||
ret <4 x i32> %4
|
||||
}
|
||||
|
||||
define <4 x i32> @multid_load_test() #0 {
|
||||
; CHECK-LABEL: define <4 x i32> @multid_load_test(
|
||||
; CHECK-SAME: ) #[[ATTR0]] {
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 1) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr addrspace(3) [[TMP3]], align 4
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 2) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(3) [[TMP5]], align 4
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = bitcast ptr addrspace(3) getelementptr (i32, ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim, i32 3) to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr addrspace(3) [[TMP7]], align 4
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = bitcast ptr addrspace(3) @groushared2dArrayofVectors.scalarized.1dim to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x [3 x [4 x i32]]], ptr addrspace(3) [[TMP9]], i32 0, i32 1, i32 1
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(3) [[TMP11]], align 4
|
||||
; CHECK-NEXT: [[TMP13:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DOTI12:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP13]], i32 1
|
||||
; CHECK-NEXT: [[DOTI13:%.*]] = load i32, ptr addrspace(3) [[DOTI12]], align 4
|
||||
; CHECK-NEXT: [[TMP14:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DOTI24:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP14]], i32 2
|
||||
; CHECK-NEXT: [[DOTI25:%.*]] = load i32, ptr addrspace(3) [[DOTI24]], align 4
|
||||
; CHECK-NEXT: [[TMP15:%.*]] = bitcast ptr addrspace(3) [[TMP10]] to ptr addrspace(3)
|
||||
; CHECK-NEXT: [[DOTI36:%.*]] = getelementptr i32, ptr addrspace(3) [[TMP15]], i32 3
|
||||
; CHECK-NEXT: [[DOTI37:%.*]] = load i32, ptr addrspace(3) [[DOTI36]], align 4
|
||||
; CHECK-NEXT: [[DOTI08:%.*]] = add i32 [[TMP2]], [[TMP12]]
|
||||
; CHECK-NEXT: [[DOTI19:%.*]] = add i32 [[TMP4]], [[DOTI13]]
|
||||
; CHECK-NEXT: [[DOTI210:%.*]] = add i32 [[TMP6]], [[DOTI25]]
|
||||
; CHECK-NEXT: [[DOTI311:%.*]] = add i32 [[TMP8]], [[DOTI37]]
|
||||
; CHECK-NEXT: [[DOTUPTO015:%.*]] = insertelement <4 x i32> poison, i32 [[DOTI08]], i64 0
|
||||
; CHECK-NEXT: [[DOTUPTO116:%.*]] = insertelement <4 x i32> [[DOTUPTO015]], i32 [[DOTI19]], i64 1
|
||||
; CHECK-NEXT: [[DOTUPTO217:%.*]] = insertelement <4 x i32> [[DOTUPTO116]], i32 [[DOTI210]], i64 2
|
||||
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[DOTUPTO217]], i32 [[DOTI311]], i64 3
|
||||
; CHECK-NEXT: ret <4 x i32> [[TMP16]]
|
||||
;
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
|
||||
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
|
||||
%3 = add <4 x i32> %1, %2
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
attributes #0 = { convergent norecurse nounwind "hlsl.export"}
|
30
llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll
Normal file
30
llvm/test/CodeGen/DirectX/llc-vector-store-scalarize.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: opt -S -passes='dxil-data-scalarization,scalarizer<load-store>,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||
|
||||
; Make sure we can store groupshared, static vectors and arrays of vectors
|
||||
|
||||
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
||||
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
||||
|
||||
; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16
|
||||
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
||||
; CHECK-NOT: @arrayofVecData
|
||||
; CHECK-NOT: @vecData
|
||||
|
||||
; CHECK-LABEL: store_array_vec_test
|
||||
define void @store_array_vec_test () local_unnamed_addr #0 {
|
||||
; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}}
|
||||
; CHECK-NEXT: ret void
|
||||
store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
|
||||
store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: store_vec_test
|
||||
define void @store_vec_test(<4 x i32> %inputVec) #0 {
|
||||
; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { convergent norecurse nounwind "hlsl.export"}
|
@ -1,12 +1,15 @@
|
||||
; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||
|
||||
; Make sure we don't touch arrays without vectors and that can recurse multiple-dimension arrays of vectors
|
||||
; Make sure we don't touch arrays without vectors and that can recurse and flatten multiple-dimension arrays of vectors
|
||||
|
||||
@staticArray = internal global [4 x i32] [i32 1, i32 2, i32 3, i32 4], align 4
|
||||
@"groushared3dArrayofVectors" = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x <4 x i32>]]] zeroinitializer, align 16
|
||||
|
||||
; CHECK @staticArray
|
||||
; CHECK-NOT: @staticArray.scalarized
|
||||
; CHECK: @groushared3dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [3 x [4 x i32]]]] zeroinitializer, align 16
|
||||
; CHECK-NOT: @staticArray.scalarized.1dim
|
||||
; CHECK-NOT: @staticArray.1dim
|
||||
|
||||
; CHECK: @groushared3dArrayofVectors.scalarized.1dim = local_unnamed_addr addrspace(3) global [108 x i32] zeroinitializer, align 16
|
||||
; CHECK-NOT: @groushared3dArrayofVectors.scalarized
|
||||
; CHECK-NOT: @groushared3dArrayofVectors
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||
|
||||
; Make sure we can load groupshared, static vectors and arrays of vectors
|
||||
|
||||
@ -22,7 +21,6 @@
|
||||
; CHECK-LABEL: load_array_vec_test
|
||||
define <4 x i32> @load_array_vec_test() #0 {
|
||||
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4
|
||||
; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
|
||||
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
|
||||
%3 = add <4 x i32> %1, %2
|
||||
@ -32,7 +30,6 @@ define <4 x i32> @load_array_vec_test() #0 {
|
||||
; CHECK-LABEL: load_vec_test
|
||||
define <4 x i32> @load_vec_test() #0 {
|
||||
; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}}
|
||||
; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
@ -41,7 +38,6 @@ define <4 x i32> @load_vec_test() #0 {
|
||||
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
||||
; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index
|
||||
; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4
|
||||
; CHECK-NOT: load i32, ptr {{.*}}, align 4
|
||||
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
|
||||
%4 = load <4 x i32>, <4 x i32>* %3, align 4
|
||||
ret <4 x i32> %4
|
||||
@ -50,7 +46,6 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
|
||||
; CHECK-LABEL: multid_load_test
|
||||
define <4 x i32> @multid_load_test() #0 {
|
||||
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4
|
||||
; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4
|
||||
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
|
||||
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
|
||||
%3 = add <4 x i32> %1, %2
|
||||
|
@ -1,4 +1,3 @@
|
||||
; RUN: opt -S -passes='dxil-data-scalarization,scalarizer<load-store>,dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
|
||||
; RUN: llc %s -mtriple=dxil-pc-shadermodel6.3-library --filetype=asm -o - | FileCheck %s
|
||||
|
||||
; Make sure we can store groupshared, static vectors and arrays of vectors
|
||||
@ -6,15 +5,17 @@
|
||||
@"arrayofVecData" = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
|
||||
@"vecData" = external addrspace(3) global <4 x i32>, align 4
|
||||
|
||||
; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16
|
||||
|
||||
; CHECK: @arrayofVecData.scalarized.1dim = local_unnamed_addr addrspace(3) global [6 x float] zeroinitializer, align 16
|
||||
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
|
||||
; CHECK-NOT: @arrayofVecData
|
||||
; CHECK-NOT: @arrayofVecData.scalarized
|
||||
; CHECK-NOT: @vecData
|
||||
|
||||
; CHECK-LABEL: store_array_vec_test
|
||||
define void @store_array_vec_test () local_unnamed_addr #0 {
|
||||
; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}}
|
||||
; CHECK-NOT: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}}
|
||||
; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.1dim.*|%.*)}}, align {{4|8|16}}
|
||||
; CHECK-NEXT: ret void
|
||||
store <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, ptr addrspace(3) @"arrayofVecData", align 16
|
||||
store <3 x float> <float 2.000000e+00, float 4.000000e+00, float 6.000000e+00>, ptr addrspace(3) getelementptr inbounds (i8, ptr addrspace(3) @"arrayofVecData", i32 16), align 16
|
||||
ret void
|
||||
@ -23,7 +24,7 @@ define void @store_array_vec_test () local_unnamed_addr #0 {
|
||||
; CHECK-LABEL: store_vec_test
|
||||
define void @store_vec_test(<4 x i32> %inputVec) #0 {
|
||||
; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4
|
||||
; CHECK-NOT: store i32 %inputVec.{{.*}}, ptr addrspace(3)
|
||||
; CHECK-NEXT: ret void
|
||||
store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4
|
||||
ret void
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user