[HLSL] Codegen for indexing of sub-arrays of multi-dimensional resource arrays
Closes #145426
This commit is contained in:
parent
eb3d88423d
commit
75a7511da9
@ -18,6 +18,7 @@
|
||||
#include "CodeGenModule.h"
|
||||
#include "TargetInfo.h"
|
||||
#include "clang/AST/ASTContext.h"
|
||||
#include "clang/AST/Attrs.inc"
|
||||
#include "clang/AST/Decl.h"
|
||||
#include "clang/AST/RecursiveASTVisitor.h"
|
||||
#include "clang/AST/Type.h"
|
||||
@ -36,6 +37,7 @@
|
||||
#include "llvm/Support/Alignment.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/FormatVariadic.h"
|
||||
#include <cstdint>
|
||||
|
||||
using namespace clang;
|
||||
using namespace CodeGen;
|
||||
@ -190,6 +192,71 @@ static void createResourceCtorArgs(CodeGenModule &CGM, CXXConstructorDecl *CD,
|
||||
Args.add(RValue::get(NameStr), AST.getPointerType(AST.CharTy.withConst()));
|
||||
}
|
||||
|
||||
// Initializes local resource array variable. For multi-dimensional arrays it
|
||||
// calls itself recursively to initialize its sub-arrays. The Index used in the
|
||||
// resource constructor calls will begin at StartIndex and will be incremented
|
||||
// for each array element. The last last used resource Index is returned to the
|
||||
// caller.
|
||||
static Value *initializeLocalResourceArray(
|
||||
CodeGenFunction &CGF, AggValueSlot &ValueSlot,
|
||||
const ConstantArrayType *ArrayTy, CXXConstructorDecl *CD,
|
||||
llvm::Value *Range, llvm::Value *StartIndex, StringRef ResourceName,
|
||||
HLSLResourceBindingAttr *RBA, HLSLVkBindingAttr *VkBinding,
|
||||
ArrayRef<llvm::Value *> PrevGEPIndices, SourceLocation ArraySubsExprLoc) {
|
||||
|
||||
llvm::IntegerType *IntTy = CGF.CGM.IntTy;
|
||||
llvm::Value *Index = StartIndex;
|
||||
llvm::Value *One = llvm::ConstantInt::get(IntTy, 1);
|
||||
uint64_t ArraySize = ArrayTy->getSExtSize();
|
||||
QualType ElemType = ArrayTy->getElementType();
|
||||
Address TmpArrayAddr = ValueSlot.getAddress();
|
||||
|
||||
// Add additional index to the getelementptr call indices.
|
||||
// This index will be updated for each array element in the loops below.
|
||||
SmallVector<llvm::Value *> GEPIndices(PrevGEPIndices);
|
||||
GEPIndices.push_back(llvm::ConstantInt::get(IntTy, 0));
|
||||
|
||||
// array of arrays - recursively initialize the sub-arrays
|
||||
if (ElemType->isArrayType()) {
|
||||
const ConstantArrayType *SubArrayTy = cast<ConstantArrayType>(ElemType);
|
||||
for (uint64_t I = 0; I < ArraySize; I++) {
|
||||
if (I > 0) {
|
||||
Index = CGF.Builder.CreateAdd(Index, One);
|
||||
GEPIndices.back() = llvm::ConstantInt::get(IntTy, I);
|
||||
}
|
||||
// recursively initialize the sub-array
|
||||
Index = initializeLocalResourceArray(
|
||||
CGF, ValueSlot, SubArrayTy, CD, Range, Index, ResourceName, RBA,
|
||||
VkBinding, GEPIndices, ArraySubsExprLoc);
|
||||
}
|
||||
return Index;
|
||||
}
|
||||
|
||||
// array of resources - initialize each resource in the array
|
||||
llvm::Type *Ty = CGF.ConvertTypeForMem(ElemType);
|
||||
CharUnits ElemSize = CD->getASTContext().getTypeSizeInChars(ElemType);
|
||||
CharUnits Align =
|
||||
TmpArrayAddr.getAlignment().alignmentOfArrayElement(ElemSize);
|
||||
|
||||
for (uint64_t I = 0; I < ArraySize; I++) {
|
||||
if (I > 0) {
|
||||
Index = CGF.Builder.CreateAdd(Index, One);
|
||||
GEPIndices.back() = llvm::ConstantInt::get(IntTy, I);
|
||||
}
|
||||
Address ThisAddress =
|
||||
CGF.Builder.CreateGEP(TmpArrayAddr, GEPIndices, Ty, Align);
|
||||
llvm::Value *ThisPtr = CGF.getAsNaturalPointerTo(ThisAddress, ElemType);
|
||||
|
||||
CallArgList Args;
|
||||
createResourceCtorArgs(CGF.CGM, CD, ThisPtr, Range, Index, ResourceName,
|
||||
RBA, VkBinding, Args);
|
||||
CGF.EmitCXXConstructorCall(CD, Ctor_Complete, false, false, ThisAddress,
|
||||
Args, ValueSlot.mayOverlap(), ArraySubsExprLoc,
|
||||
ValueSlot.isSanitizerChecked());
|
||||
}
|
||||
return Index;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
llvm::Type *
|
||||
@ -802,16 +869,14 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
|
||||
ArraySubsExpr->getType()->isHLSLResourceRecordArray() &&
|
||||
"expected resource array subscript expression");
|
||||
|
||||
// let clang codegen handle local resource array subscripts
|
||||
const VarDecl *ArrayDecl = dyn_cast<VarDecl>(getArrayDecl(ArraySubsExpr));
|
||||
// Let clang codegen handle local resource array subscripts,
|
||||
// or when the subscript references on opaque expression (as part of
|
||||
// ArrayInitLoopExpr AST node).
|
||||
const VarDecl *ArrayDecl =
|
||||
dyn_cast_or_null<VarDecl>(getArrayDecl(ArraySubsExpr));
|
||||
if (!ArrayDecl || !ArrayDecl->hasGlobalStorage())
|
||||
return std::nullopt;
|
||||
|
||||
if (ArraySubsExpr->getType()->isArrayType())
|
||||
// FIXME: this is not yet implemented (llvm/llvm-project#145426)
|
||||
llvm_unreachable(
|
||||
"indexing of sub-arrays of multidimensional arrays not supported yet");
|
||||
|
||||
// get the resource array type
|
||||
ASTContext &AST = ArrayDecl->getASTContext();
|
||||
const Type *ResArrayTy = ArrayDecl->getType().getTypePtr();
|
||||
@ -832,26 +897,30 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
|
||||
CGM.IntTy, AST.getConstantArrayElementCount(ArrayTy));
|
||||
SubIndex = CGF.Builder.CreateMul(SubIndex, Multiplier);
|
||||
}
|
||||
|
||||
Index = Index ? CGF.Builder.CreateAdd(Index, SubIndex) : SubIndex;
|
||||
ASE = dyn_cast<ArraySubscriptExpr>(ASE->getBase()->IgnoreParenImpCasts());
|
||||
}
|
||||
|
||||
// find binding info for the resource array (for implicit binding
|
||||
// an HLSLResourceBindingAttr should have been added by SemaHLSL)
|
||||
QualType ResourceTy = ArraySubsExpr->getType();
|
||||
HLSLVkBindingAttr *VkBinding = ArrayDecl->getAttr<HLSLVkBindingAttr>();
|
||||
HLSLResourceBindingAttr *RBA = ArrayDecl->getAttr<HLSLResourceBindingAttr>();
|
||||
assert((VkBinding || RBA) && "resource array must have a binding attribute");
|
||||
|
||||
// Find the individual resource type
|
||||
QualType ResultTy = ArraySubsExpr->getType();
|
||||
QualType ResourceTy =
|
||||
ResultTy->isArrayType() ? AST.getBaseElementType(ResultTy) : ResultTy;
|
||||
|
||||
// lookup the resource class constructor based on the resource type and
|
||||
// binding
|
||||
CXXConstructorDecl *CD = findResourceConstructorDecl(
|
||||
AST, ResourceTy, VkBinding || RBA->hasRegisterSlot());
|
||||
|
||||
// create a temporary variable for the resource class instance (we need to
|
||||
// create a temporary variable for the result, which is either going
|
||||
// to be a single resource instance or a local array of resources (we need to
|
||||
// return an LValue)
|
||||
RawAddress TmpVar = CGF.CreateMemTemp(ResourceTy);
|
||||
RawAddress TmpVar = CGF.CreateMemTemp(ResultTy);
|
||||
if (CGF.EmitLifetimeStart(TmpVar.getPointer()))
|
||||
CGF.pushFullExprCleanup<CodeGenFunction::CallLifetimeEnd>(
|
||||
NormalEHLifetimeMarker, TmpVar);
|
||||
@ -860,26 +929,35 @@ std::optional<LValue> CGHLSLRuntime::emitResourceArraySubscriptExpr(
|
||||
TmpVar, Qualifiers(), AggValueSlot::IsDestructed_t(true),
|
||||
AggValueSlot::DoesNotNeedGCBarriers, AggValueSlot::IsAliased_t(false),
|
||||
AggValueSlot::DoesNotOverlap);
|
||||
|
||||
Address ThisAddress = ValueSlot.getAddress();
|
||||
llvm::Value *ThisPtr = CGF.getAsNaturalPointerTo(
|
||||
ThisAddress, CD->getThisType()->getPointeeType());
|
||||
Address TmpVarAddress = ValueSlot.getAddress();
|
||||
|
||||
// get total array size (= range size)
|
||||
llvm::Value *Range =
|
||||
llvm::ConstantInt::get(CGM.IntTy, getTotalArraySize(AST, ResArrayTy));
|
||||
|
||||
// assemble the constructor parameters
|
||||
CallArgList Args;
|
||||
createResourceCtorArgs(CGM, CD, ThisPtr, Range, Index, ArrayDecl->getName(),
|
||||
RBA, VkBinding, Args);
|
||||
// if the result of the subscript operation is a single resource - call the
|
||||
// constructor
|
||||
if (ResultTy == ResourceTy) {
|
||||
QualType ThisType = CD->getThisType()->getPointeeType();
|
||||
llvm::Value *ThisPtr = CGF.getAsNaturalPointerTo(TmpVarAddress, ThisType);
|
||||
|
||||
// call the constructor
|
||||
CGF.EmitCXXConstructorCall(CD, Ctor_Complete, false, false, ThisAddress, Args,
|
||||
ValueSlot.mayOverlap(),
|
||||
ArraySubsExpr->getExprLoc(),
|
||||
ValueSlot.isSanitizerChecked());
|
||||
|
||||
return CGF.MakeAddrLValue(TmpVar, ArraySubsExpr->getType(),
|
||||
AlignmentSource::Decl);
|
||||
// assemble the constructor parameters
|
||||
CallArgList Args;
|
||||
createResourceCtorArgs(CGM, CD, ThisPtr, Range, Index, ArrayDecl->getName(),
|
||||
RBA, VkBinding, Args);
|
||||
// call the constructor
|
||||
CGF.EmitCXXConstructorCall(CD, Ctor_Complete, false, false, TmpVarAddress,
|
||||
Args, ValueSlot.mayOverlap(),
|
||||
ArraySubsExpr->getExprLoc(),
|
||||
ValueSlot.isSanitizerChecked());
|
||||
} else {
|
||||
// result of the subscript operation is a local resource array
|
||||
const ConstantArrayType *ArrayTy =
|
||||
cast<ConstantArrayType>(ResultTy.getTypePtr());
|
||||
initializeLocalResourceArray(CGF, ValueSlot, ArrayTy, CD, Range, Index,
|
||||
ArrayDecl->getName(), RBA, VkBinding,
|
||||
{llvm::ConstantInt::get(CGM.IntTy, 0)},
|
||||
ArraySubsExpr->getExprLoc());
|
||||
}
|
||||
return CGF.MakeAddrLValue(TmpVar, ResultTy, AlignmentSource::Decl);
|
||||
}
|
||||
|
||||
@ -0,0 +1,102 @@
|
||||
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
|
||||
// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
|
||||
|
||||
// CHECK: @[[BufA:.*]] = private unnamed_addr constant [2 x i8] c"A\00", align 1
|
||||
|
||||
RWBuffer<float> A[5][4][3][2] : register(u10, space2);
|
||||
RWStructuredBuffer<float> Out;
|
||||
|
||||
float foo(RWBuffer<float> Arr[3][2]) {
|
||||
return Arr[1][0][0];
|
||||
}
|
||||
|
||||
// NOTE:
|
||||
// - _ZN4hlsl8RWBufferIfEC1EjjijPKc is the constructor call for explicit binding
|
||||
// (has "jjij" in the mangled name) and the arguments are (register, space, range_size, index, name).
|
||||
// - _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float>
|
||||
|
||||
// CHECK: define internal void @_Z4mainj(i32 noundef %GI)
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: %[[GI_alloca:.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %Sub = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4
|
||||
// CHECK-NEXT: %[[Tmp0:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4
|
||||
// CHECK-NEXT: %a = alloca float, align 4
|
||||
// CHECK-NEXT: %b = alloca float, align 4
|
||||
// CHECK-NEXT: %[[Tmp1:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4
|
||||
// CHECK-NEXT: %[[Tmp2:.*]] = alloca [3 x [2 x %"class.hlsl::RWBuffer"]], align 4
|
||||
// CHECK-NEXT: store i32 %GI, ptr %[[GI_alloca]], align 4
|
||||
[numthreads(4,1,1)]
|
||||
void main(uint GI : SV_GroupThreadID) {
|
||||
// Codegen for "A[4][1]" - create local array [[Tmp0]] of size 3 x 2 and initialize
|
||||
// each element by a call to the resource constructor
|
||||
// The resource index for A[4][1][0][0] is 102 = 4 * (4 * 3 * 2) + 1 * (3 * 2)
|
||||
// (index in the resource array as if it was flattened)
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_0_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 0, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 102, ptr noundef @A.str) #6
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_0_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 0, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 103, ptr noundef @A.str) #6
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_1_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 1, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 104, ptr noundef @A.str) #6
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_1_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 1, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 105, ptr noundef @A.str) #6
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_2_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 2, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_2_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 106, ptr noundef @A.str) #6
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_2_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %tmp, i32 0, i32 2, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_2_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef 107, ptr noundef @A.str) #6
|
||||
// After this Tmp0 values are copied to %Sub using the standard array loop initializaion
|
||||
// (generated from ArrayInitLoopExpr AST node)
|
||||
RWBuffer<float> Sub[3][2] = A[4][1];
|
||||
|
||||
// CHECK: %[[Ptr_Sub_2:.*]] = getelementptr inbounds [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %Sub, i32 0, i32 2
|
||||
// CHECK: %[[Ptr_Sub_2_1:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %[[Ptr_Sub_2]], i32 0, i32 1
|
||||
// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Ptr_Sub_2_1]], i32 noundef 0)
|
||||
// CHECK-NEXT: %[[Sub_2_1_0_Value:.*]] = load float, ptr %[[BufPtr]], align 4
|
||||
// CHECK-NEXT: store float %[[Sub_2_1_0_Value]], ptr %a, align 4
|
||||
float a = Sub[2][1][0];
|
||||
|
||||
// Codegen for "foo(A[2][GI])" - create local array [[Tmp2]] of size 3 x 2 and initialize
|
||||
// each element by a call to the resource constructor with dynamic index, and then
|
||||
// copy-in the array as an argument of "foo"
|
||||
|
||||
// Calculate the resource index for A[2][GI][0][0] (index in the resource array as if it was flattened)
|
||||
// The index is 2 * (4 * 3 * 2) + GI * (3 * 2) = 48 + GI * 6
|
||||
// CHECK: %[[GI:.*]] = load i32, ptr %[[GI_alloca]], align 4
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_Tmp:.*]] = mul i32 %[[GI]], 6
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_0_0:.*]] = add i32 %[[Index_A_2_GI_Tmp]], 48
|
||||
|
||||
// A[2][GI][0][0]
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_0_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 0, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_0_0]], ptr noundef @A.str)
|
||||
|
||||
// A[2][GI][0][1]
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_0_1:.*]] = add i32 %[[Index_A_2_GI_0_0]], 1
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_0_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 0, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_0_1]], ptr noundef @A.str)
|
||||
|
||||
// A[2][GI][1][0]
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_1_0:.*]] = add i32 %[[Index_A_2_GI_0_1]], 1
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_1_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 1, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_1_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_1_0]], ptr noundef @A.str)
|
||||
|
||||
// A[2][GI][1][1]
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_1_1:.*]] = add i32 %[[Index_A_2_GI_1_0]], 1
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_1_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 1, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_1_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_1_1]], ptr noundef @A.str)
|
||||
|
||||
// A[2][GI][2][0]
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_2_0:.*]] = add i32 %[[Index_A_2_GI_1_1]], 1
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_2_0:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 2, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_2_0]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_2_0]], ptr noundef @A.str)
|
||||
|
||||
// A[2][GI][2][1]
|
||||
// CHECK-NEXT: %[[Index_A_2_GI_2_1:.*]] = add i32 %[[Index_A_2_GI_2_0]], 1
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_2_1:.*]] = getelementptr [3 x [2 x %"class.hlsl::RWBuffer"]], ptr %[[Tmp2]], i32 0, i32 2, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_2_1]], i32 noundef 10, i32 noundef 2, i32 noundef 120, i32 noundef %[[Index_A_2_GI_2_1]], ptr noundef @A.str)
|
||||
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp1]], ptr align 4 %[[Tmp2]], i32 24, i1 false)
|
||||
// CHECK-NEXT: %[[FooReturned:.*]] = call {{.*}} float @_Z3fooA3_A2_N4hlsl8RWBufferIfEE(ptr noundef byval([3 x [2 x %"class.hlsl::RWBuffer"]]) align 4 %[[Tmp1]])
|
||||
// CHECK-NEXT: store float %[[FooReturned]], ptr %b, align 4
|
||||
float b = foo(A[2][GI]);
|
||||
|
||||
Out[0] = a + b;
|
||||
}
|
||||
@ -0,0 +1,62 @@
|
||||
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-compute -finclude-default-header \
|
||||
// RUN: -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
|
||||
|
||||
// CHECK: @[[BufA:.*]] = private unnamed_addr constant [2 x i8] c"A\00", align 1
|
||||
|
||||
RWBuffer<float> A[4][2] : register(u10, space2);
|
||||
RWStructuredBuffer<float> Out;
|
||||
|
||||
float foo(RWBuffer<float> Arr[2]) {
|
||||
return Arr[1][0];
|
||||
}
|
||||
|
||||
// NOTE:
|
||||
// - _ZN4hlsl8RWBufferIfEC1EjjijPKc is the constructor call for explicit binding
|
||||
// (has "jjij" in the mangled name) and the arguments are (register, space, range_size, index, name).
|
||||
// - _ZN4hlsl8RWBufferIfEixEj is the subscript operator for RWBuffer<float>
|
||||
|
||||
// CHECK: define internal void @_Z4mainj(i32 noundef %GI)
|
||||
// CHECK-NEXT: entry:
|
||||
// CHECK-NEXT: %[[GI_alloca:.*]] = alloca i32, align 4
|
||||
// CHECK-NEXT: %Sub = alloca [2 x %"class.hlsl::RWBuffer"], align 4
|
||||
// CHECK-NEXT: %[[Tmp0:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4
|
||||
// CHECK-NEXT: %a = alloca float, align 4
|
||||
// CHECK-NEXT: %b = alloca float, align 4
|
||||
// CHECK-NEXT: %[[Tmp1:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4
|
||||
// CHECK-NEXT: %[[Tmp2:.*]] = alloca [2 x %"class.hlsl::RWBuffer"], align 4
|
||||
// CHECK-NEXT: store i32 %GI, ptr %[[GI_alloca]], align 4
|
||||
[numthreads(4,1,1)]
|
||||
void main(uint GI : SV_GroupThreadID) {
|
||||
// Codegen for "A[2]" - create local array [[Tmp0]] of size 2 and initialize
|
||||
// each element by a call to the resource constructor
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_0:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp0]], i32 0, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_0]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef 6, ptr noundef @A.str)
|
||||
// CHECK-NEXT: %[[Ptr_Tmp0_1:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp0]], i32 0, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp0_1]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef 7, ptr noundef @A.str)
|
||||
// After this Tmp0 values are copied to %Sub using the standard array loop initializaion
|
||||
// (generated from ArrayInitLoopExpr AST node)
|
||||
RWBuffer<float> Sub[2] = A[3];
|
||||
|
||||
// CHECK: %[[Ptr_Sub_1:.*]] = getelementptr inbounds [2 x %"class.hlsl::RWBuffer"], ptr %Sub, i32 0, i32 1
|
||||
// CHECK-NEXT: %[[BufPtr:.*]] = call {{.*}} ptr @_ZN4hlsl8RWBufferIfEixEj(ptr {{.*}} %[[Ptr_Sub_1]], i32 noundef 0)
|
||||
// CHECK-NEXT: %[[Sub_1_0_Value:.*]] = load float, ptr %[[BufPtr]], align 4
|
||||
// CHECK-NEXT: store float %[[Sub_1_0_Value]], ptr %a, align 4
|
||||
float a = Sub[1][0];
|
||||
|
||||
// Codegen for "foo(A[GI])" - create local array [[Tmp2]] of size 2 and initialize
|
||||
// each element by a call to the resource constructor with dynamic index, and then
|
||||
// copy-in the array as an argument of "foo"
|
||||
// CHECK: %[[GI:.*]] = load i32, ptr %[[GI_alloca]], align 4
|
||||
// CHECK-NEXT: %[[Index_A_GI_0:.*]] = mul i32 %[[GI]], 2
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_GI_0:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 0
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_GI_0]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef %[[Index_A_GI_0]], ptr noundef @A.str)
|
||||
// CHECK-NEXT: %[[Index_A_GI_1:.*]] = add i32 %[[Index_A_GI_0]], 1
|
||||
// CHECK-NEXT: %[[Ptr_Tmp2_GI_1:.*]] = getelementptr [2 x %"class.hlsl::RWBuffer"], ptr %[[Tmp2]], i32 0, i32 1
|
||||
// CHECK-NEXT: call void @_ZN4hlsl8RWBufferIfEC1EjjijPKc(ptr {{.*}} %[[Ptr_Tmp2_GI_1]], i32 noundef 10, i32 noundef 2, i32 noundef 8, i32 noundef %[[Index_A_GI_1]], ptr noundef @A.str)
|
||||
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 %[[Tmp1]], ptr align 4 %[[Tmp2]], i32 8, i1 false)
|
||||
// CHECK-NEXT: %[[FooReturned:.*]] = call {{.*}} float @_Z3fooA2_N4hlsl8RWBufferIfEE(ptr noundef byval([2 x %"class.hlsl::RWBuffer"]) align 4 %[[Tmp1]])
|
||||
// CHECK-NEXT: store float %[[FooReturned]], ptr %b, align 4
|
||||
float b = foo(A[GI]);
|
||||
|
||||
Out[0] = a + b;
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user