[HLSL] Avoid putting the byval attribute on out and inout parameters (#150495)

Fixes #148063 by preventing the ByVal attribute from being placed on out
and inout function parameters which causes them to be eliminated by the
Dead Store Elimination (DSE) pass.
This commit is contained in:
Deric C. 2025-07-24 13:54:00 -07:00 committed by GitHub
parent d9952a7a5f
commit ed9a1027a4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 20 deletions

View File

@ -2852,20 +2852,28 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
if (AI.getInReg())
Attrs.addAttribute(llvm::Attribute::InReg);
// Depending on the ABI, this may be either a byval or a dead_on_return
// argument.
if (AI.getIndirectByVal()) {
Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
} else {
// Add dead_on_return when the object's lifetime ends in the callee.
// This includes trivially-destructible objects, as well as objects
// whose destruction / clean-up is carried out within the callee (e.g.,
// Obj-C ARC-managed structs, MSVC callee-destroyed objects).
if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
ParamType->castAs<RecordType>()
->getDecl()
->isParamDestroyedInCallee())
Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
// HLSL out and inout parameters must not be marked with ByVal or
// DeadOnReturn attributes because stores to these parameters by the
// callee are visible to the caller.
if (auto ParamABI = FI.getExtParameterInfo(ArgNo).getABI();
ParamABI != ParameterABI::HLSLOut &&
ParamABI != ParameterABI::HLSLInOut) {
// Depending on the ABI, this may be either a byval or a dead_on_return
// argument.
if (AI.getIndirectByVal()) {
Attrs.addByValAttr(getTypes().ConvertTypeForMem(ParamType));
} else {
// Add dead_on_return when the object's lifetime ends in the callee.
// This includes trivially-destructible objects, as well as objects
// whose destruction / clean-up is carried out within the callee
// (e.g., Obj-C ARC-managed structs, MSVC callee-destroyed objects).
if (!ParamType.isDestructedType() || !ParamType->isRecordType() ||
ParamType->castAs<RecordType>()
->getDecl()
->isParamDestroyedInCallee())
Attrs.addAttribute(llvm::Attribute::DeadOnReturn);
}
}
auto *Decl = ParamType->getAsRecordDecl();

View File

@ -11,7 +11,7 @@ void increment(inout int Arr[2]) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
// CHECK-NEXT: call void @{{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@ -32,7 +32,7 @@ void fn2(out int Arr[2]) {
// CHECK: [[A:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
// CHECK-NEXT: call void @{{.*}}fn2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@ -56,7 +56,7 @@ void nestedCall(inout int Arr[2], uint index) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]], i32 noundef 0)
// CHECK-NEXT: call void @{{.*}}nestedCall{{.*}}(ptr noalias noundef align 4 [[Tmp]], i32 noundef 0)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 1
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@ -70,7 +70,7 @@ export int arrayCall3() {
// CHECK-LABEL: outerCall
// CHECK: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 %{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
// CHECK-NEXT: call void {{.*}}increment{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 {{.*}}, ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: ret void
void outerCall(inout int Arr[2]) {
@ -82,7 +82,7 @@ void outerCall(inout int Arr[2]) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
// CHECK-NEXT: call void @{{.*}}outerCall{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4
@ -110,7 +110,7 @@ void outerCall2(inout int Arr[2]) {
// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 @{{.*}}, i32 8, i1 false)
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false)
// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef byval([2 x i32]) align 4 [[Tmp]])
// CHECK-NEXT: call void @{{.*}}outerCall2{{.*}}(ptr noalias noundef align 4 [[Tmp]])
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 [[Tmp]], i32 8, i1 false)
// CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x i32], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[B:%.*]] = load i32, ptr [[Idx]], align 4