Fixes #185518 The SPIR-V backend does not handle the lowering of `shufflevector` instructions on vectors with more than 4 elements. This PR changes the codegen of matrix init lists to directly emit vectors with elements in column-major order when the default matrix memory layout is column-major, as opposed to in linear/row-major order followed by a vector shuffle. While an alternative fix could be to change the default depth of [`canEvaluateShuffled`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp#L1865-L1866) to 16 in `InstCombineVectorOps.cpp` to eliminate the vector shuffle for vectors of up to 16 elements in size (to handle 4x4 matrices), this change would have broader impacts than just HLSL, which does not seem necessary for the scope of this issue (which regards only matrix initializer list codegen). Another alternative fix would be to extend the `shufflevector` lowering in the SPIR-V backend to support vectors of more than 4 elements. However, again, this goes beyond the scope of just matrix initializer list codegen which is so far the only case where a vector shuffle of a vector more than 4 elements appeared. Assisted-by: claude-opus-4.6
160 lines
7.2 KiB
HLSL
160 lines
7.2 KiB
HLSL
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
|
|
// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
|
|
|
|
|
|
struct S {
|
|
bool2x2 bM;
|
|
float f;
|
|
};
|
|
|
|
// CHECK-LABEL: define hidden noundef i1 @_Z3fn1v(
|
|
// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
|
|
// CHECK-NEXT: [[B:%.*]] = alloca [2 x <2 x i32>], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[B]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[B]], align 4
|
|
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
|
|
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret i1 [[TMP1]]
|
|
//
|
|
bool fn1() {
|
|
bool2x2 B = {true,true,true,true};
|
|
return B[0][0];
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef <4 x i1> @_Z3fn2b(
|
|
// CHECK-SAME: i1 noundef [[V:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x i1>, align 4
|
|
// CHECK-NEXT: [[V_ADDR:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[A:%.*]] = alloca [2 x <2 x i32>], align 4
|
|
// CHECK-NEXT: [[STOREDV:%.*]] = zext i1 [[V]] to i32
|
|
// CHECK-NEXT: store i32 [[STOREDV]], ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[TMP0]] to i1
|
|
// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x i1> poison, i1 [[LOADEDV]], i32 0
|
|
// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x i1> [[VECINIT]], i1 true, i32 2
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[V_ADDR]], align 4
|
|
// CHECK-NEXT: [[LOADEDV2:%.*]] = trunc i32 [[TMP1]] to i1
|
|
// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x i1> [[VECINIT1]], i1 [[LOADEDV2]], i32 1
|
|
// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <4 x i1> [[VECINIT3]], i1 false, i32 3
|
|
// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i1> [[VECINIT4]] to <4 x i32>
|
|
// CHECK-NEXT: store <4 x i32> [[TMP2]], ptr [[A]], align 4
|
|
// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[A]], align 4
|
|
// CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i1>, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret <4 x i1> [[TMP4]]
|
|
//
|
|
bool2x2 fn2(bool V) {
|
|
bool2x2 A = {V, true, V, false};
|
|
return A;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef i1 @_Z3fn3v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
|
|
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
|
|
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z3fn3v.s, i32 20, i1 false)
|
|
// CHECK-NEXT: [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[BM]], align 1
|
|
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
|
|
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret i1 [[TMP1]]
|
|
//
|
|
bool fn3() {
|
|
S s = {{true,true,false,false}, 1.0};
|
|
return s.bM[0][0];
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef i1 @_Z3fn4v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i1, align 4
|
|
// CHECK-NEXT: [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
|
|
// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[ARR]], i32 1
|
|
// CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4
|
|
// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
|
|
// CHECK-NEXT: store i32 [[MATRIXEXT]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load i1, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret i1 [[TMP1]]
|
|
//
|
|
bool fn4() {
|
|
bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
|
|
return Arr[0][1][0];
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z3fn5v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[M:%.*]] = alloca [2 x <2 x i32>], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[M]], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[M]], i32 0, i32 3
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void fn5() {
|
|
bool2x2 M = {true,true,true,true};
|
|
M[1][1] = false;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z3fn6v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[V:%.*]] = alloca i32, align 4
|
|
// CHECK-NEXT: [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 1
|
|
// CHECK-NEXT: store i32 0, ptr [[V]], align 4
|
|
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[S]], ptr align 1 @__const._Z3fn6v.s, i32 20, i1 false)
|
|
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[V]], align 4
|
|
// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[TMP0]] to i1
|
|
// CHECK-NEXT: [[BM:%.*]] = getelementptr inbounds nuw [[STRUCT_S]], ptr [[S]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[LOADEDV]] to i32
|
|
// CHECK-NEXT: [[TMP2:%.*]] = getelementptr <4 x i32>, ptr [[BM]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void fn6() {
|
|
bool V = false;
|
|
S s = {{true,true,false,false}, 1.0};
|
|
s.bM[1][0] = V;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden void @_Z3fn7v(
|
|
// CHECK-SAME: ) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[ARR:%.*]] = alloca [2 x [2 x <2 x i32>]], align 4
|
|
// CHECK-NEXT: store <4 x i32> splat (i32 1), ptr [[ARR]], align 4
|
|
// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[ARR]], i32 1
|
|
// CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[ARRAYINIT_ELEMENT]], align 4
|
|
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [2 x <2 x i32>]], ptr [[ARR]], i32 0, i32 0
|
|
// CHECK-NEXT: [[TMP0:%.*]] = getelementptr <4 x i32>, ptr [[ARRAYIDX]], i32 0, i32 1
|
|
// CHECK-NEXT: store i32 0, ptr [[TMP0]], align 4
|
|
// CHECK-NEXT: ret void
|
|
//
|
|
void fn7() {
|
|
bool2x2 Arr[2] = {{true,true,true,true}, {false,false,false,false}};
|
|
Arr[0][1][0] = false;
|
|
}
|
|
|
|
// CHECK-LABEL: define hidden noundef <16 x i1> @_Z3fn8u11matrix_typeILm4ELm4EbE(
|
|
// CHECK-SAME: <16 x i1> noundef [[M:%.*]]) #[[ATTR0]] {
|
|
// CHECK-NEXT: [[ENTRY:.*:]]
|
|
// CHECK-NEXT: [[RETVAL:%.*]] = alloca <16 x i1>, align 4
|
|
// CHECK-NEXT: [[M_ADDR:%.*]] = alloca [4 x <4 x i32>], align 4
|
|
// CHECK-NEXT: [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
|
|
// CHECK-NEXT: store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
|
|
// CHECK-NEXT: store <16 x i32> [[TMP1]], ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: [[TMP2:%.*]] = load <16 x i1>, ptr [[RETVAL]], align 4
|
|
// CHECK-NEXT: ret <16 x i1> [[TMP2]]
|
|
//
|
|
bool4x4 fn8(bool4x4 m) {
|
|
return m;
|
|
}
|