Farzon Lotfi 6457aee5b7
[DirectX] Bug fix for Data Scalarization crash (#118426)
Two bugs here. First calling `Inst->getFunction()` has undefined
behavior if the instruction is not tracked to a function. I suspect the
`replaceAllUsesWith` was leaving the GEPs in a weird ghost parent
situation. I switched up the visitor to be able to `eraseFromParent` as
part of visiting and then everything started working.

The second bug was in `DXILFlattenArrays.cpp`. I was unaware that you
can have multidimensional arrays of `zeroinitializer`, and `undef` so
fixed up the initializer to handle these two cases.

fixes #117273
2024-12-18 16:33:49 -05:00

62 lines
3.6 KiB
LLVM

; RUN: opt -S -passes='dxil-data-scalarization,function(scalarizer<load-store>),dxil-op-lower' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s
; Make sure we can load groupshared, static vectors and arrays of vectors
@arrayofVecData = local_unnamed_addr addrspace(3) global [2 x <3 x float>] zeroinitializer, align 16
@vecData = external addrspace(3) global <4 x i32>, align 4
@staticArrayOfVecData = internal global [3 x <4 x i32>] [<4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>, <4 x i32> <i32 9, i32 10, i32 11, i32 12>], align 4
@groushared2dArrayofVectors = local_unnamed_addr addrspace(3) global [3 x [ 3 x <4 x i32>]] zeroinitializer, align 16
; CHECK: @arrayofVecData.scalarized = local_unnamed_addr addrspace(3) global [2 x [3 x float]] zeroinitializer, align 16
; CHECK: @vecData.scalarized = external addrspace(3) global [4 x i32], align 4
; CHECK: @staticArrayOfVecData.scalarized = internal global [3 x [4 x i32]] {{\[}}[4 x i32] [i32 1, i32 2, i32 3, i32 4], [4 x i32] [i32 5, i32 6, i32 7, i32 8], [4 x i32] [i32 9, i32 10, i32 11, i32 12]], align 4
; CHECK: @groushared2dArrayofVectors.scalarized = local_unnamed_addr addrspace(3) global [3 x [3 x [4 x i32]]] zeroinitializer, align 16
; CHECK-NOT: @arrayofVecData
; CHECK-NOT: @vecData
; CHECK-NOT: @staticArrayOfVecData
; CHECK-NOT: @groushared2dArrayofVectors
; CHECK-LABEL: load_array_vec_test
define <3 x float> @load_array_vec_test() #0 {
; CHECK-COUNT-6: load float, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4
%1 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4
%2 = load <3 x float>, <3 x float> addrspace(3)* getelementptr inbounds ([2 x <3 x float>], [2 x <3 x float>] addrspace(3)* @"arrayofVecData", i32 0, i32 1), align 4
%3 = fadd <3 x float> %1, %2
ret <3 x float> %3
}
; CHECK-LABEL: load_vec_test
define <4 x i32> @load_vec_test() #0 {
; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}}
%1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4
ret <4 x i32> %1
}
; CHECK-LABEL: load_static_array_of_vec_test
define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 {
; CHECK: getelementptr inbounds [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index
; CHECK: load i32, ptr {{.*}}, align 4
; CHECK: getelementptr i32, ptr {{.*}}, i32 1
; CHECK: load i32, ptr {{.*}}, align 4
; CHECK: getelementptr i32, ptr {{.*}}, i32 2
; CHECK: load i32, ptr {{.*}}, align 4
; CHECK: getelementptr i32, ptr {{.*}}, i32 3
; CHECK: load i32, ptr {{.*}}, align 4
%3 = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* @staticArrayOfVecData, i32 0, i32 %index
%4 = load <4 x i32>, <4 x i32>* %3, align 4
ret <4 x i32> %4
}
; CHECK-LABEL: multid_load_test
define <4 x i32> @multid_load_test() #0 {
; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4
%1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4
%2 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 1, i32 1), align 4
%3 = add <4 x i32> %1, %2
ret <4 x i32> %3
}
attributes #0 = { convergent norecurse nounwind "hlsl.export"}