llvm-project/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
Matt Arsenault ab6b48b711 DAG: Avoid stack lowering if bitcast has an illegal vector result type
A bitcast of <10 x i32> to <5 x i64> was ending up on the
stack. Instead of doing that, handle the case where the new type
doesn't evenly divide but the elements do. Extract the individual
elements and pad with undef.

Avoids stack usage for bitcasts involving <5 x i64>. In some of these
cases, later optimizations actually eliminated the stack objects but
left behind the unused temporary stack object to final emission.

Fixes: SWDEV-377548
2023-01-15 12:37:14 -05:00

859 lines
28 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; This test just checks that the compiler doesn't crash.
; FUNC-LABEL: {{^}}v32i8_to_v8i32:
define amdgpu_ps float @v32i8_to_v8i32(ptr addrspace(4) inreg) #0 {
entry:
%1 = load <32 x i8>, ptr addrspace(4) %0
%2 = bitcast <32 x i8> %1 to <8 x i32>
%3 = extractelement <8 x i32> %2, i32 1
%4 = icmp ne i32 %3, 0
%5 = select i1 %4, float 0.0, float 1.0
ret float %5
}
; FUNC-LABEL: {{^}}i8ptr_v16i8ptr:
; SI: s_endpgm
define amdgpu_kernel void @i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) {
entry:
%0 = load <16 x i8>, ptr addrspace(1) %in
store <16 x i8> %0, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @f32_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load float, ptr addrspace(1) %in, align 4
%fadd32 = fadd float %load, 1.0
%bc = bitcast float %fadd32 to <2 x i16>
%add.bitcast = add <2 x i16> %bc, <i16 2, i16 2>
store <2 x i16> %add.bitcast, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @v2i16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <2 x i16>, ptr addrspace(1) %in, align 4
%add.v2i16 = add <2 x i16> %load, <i16 2, i16 2>
%bc = bitcast <2 x i16> %add.v2i16 to float
%fadd.bitcast = fadd float %bc, 1.0
store float %fadd.bitcast, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @f32_to_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load float, ptr addrspace(1) %in, align 4
%fadd32 = fadd float %load, 1.0
%bc = bitcast float %fadd32 to <2 x half>
%add.bitcast = fadd <2 x half> %bc, <half 2.0, half 2.0>
store <2 x half> %add.bitcast, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @v2f16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <2 x half>, ptr addrspace(1) %in, align 4
%add.v2f16 = fadd <2 x half> %load, <half 2.0, half 2.0>
%bc = bitcast <2 x half> %add.v2f16 to float
%fadd.bitcast = fadd float %bc, 1.0
store float %fadd.bitcast, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @v4i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x i8>, ptr addrspace(1) %in, align 4
%bc = bitcast <4 x i8> %load to i32
store i32 %bc, ptr addrspace(1) %out, align 4
ret void
}
define amdgpu_kernel void @i32_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load i32, ptr addrspace(1) %in, align 4
%bc = bitcast i32 %load to <4 x i8>
store <4 x i8> %bc, ptr addrspace(1) %out, align 4
ret void
}
; FUNC-LABEL: {{^}}bitcast_v2i32_to_f64:
; SI: s_endpgm
define amdgpu_kernel void @bitcast_v2i32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load <2 x i32>, ptr addrspace(1) %in, align 8
%add = add <2 x i32> %val, <i32 4, i32 9>
%bc = bitcast <2 x i32> %add to double
%fadd.bc = fadd double %bc, 1.0
store double %fadd.bc, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
; SI: s_endpgm
define amdgpu_kernel void @bitcast_f64_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load double, ptr addrspace(1) %in, align 8
%add = fadd double %val, 4.0
%bc = bitcast double %add to <2 x i32>
store <2 x i32> %bc, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, ptr addrspace(1) %out, <2 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%cast = bitcast <2 x i64> %value to <2 x double>
br label %end
end:
%phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
store <2 x double> %phi, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, ptr addrspace(1) %out, <2 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%cast = bitcast <2 x double> %value to <2 x i64>
br label %end
end:
%phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
store <2 x i64> %phi, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4i16_to_f64:
define amdgpu_kernel void @v4i16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x i16>, ptr addrspace(1) %in, align 4
%add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
%bc = bitcast <4 x i16> %add.v4i16 to double
%fadd.bitcast = fadd double %bc, 1.0
store double %fadd.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4f16_to_f64:
define amdgpu_kernel void @v4f16_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x half>, ptr addrspace(1) %in, align 4
%add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
%bc = bitcast <4 x half> %add.v4half to double
%fadd.bitcast = fadd double %bc, 1.0
store double %fadd.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}f64_to_v4f16:
define amdgpu_kernel void @f64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load double, ptr addrspace(1) %in, align 4
%fadd32 = fadd double %load, 1.0
%bc = bitcast double %fadd32 to <4 x half>
%add.bitcast = fadd <4 x half> %bc, <half 2.0, half 2.0, half 2.0, half 2.0>
store <4 x half> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}f64_to_v4i16:
define amdgpu_kernel void @f64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load double, ptr addrspace(1) %in, align 4
%fadd32 = fadd double %load, 1.0
%bc = bitcast double %fadd32 to <4 x i16>
%add.bitcast = add <4 x i16> %bc, <i16 2, i16 2, i16 2, i16 2>
store <4 x i16> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4i16_to_i64:
define amdgpu_kernel void @v4i16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x i16>, ptr addrspace(1) %in, align 4
%add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
%bc = bitcast <4 x i16> %add.v4i16 to i64
%add.bitcast = add i64 %bc, 1
store i64 %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4f16_to_i64:
define amdgpu_kernel void @v4f16_to_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x half>, ptr addrspace(1) %in, align 4
%add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
%bc = bitcast <4 x half> %add.v4half to i64
%add.bitcast = add i64 %bc, 1
store i64 %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_i64_to_v4i16:
define amdgpu_kernel void @bitcast_i64_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load i64, ptr addrspace(1) %in, align 8
%add = add i64 %val, 4
%bc = bitcast i64 %add to <4 x i16>
%add.v4i16 = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
store <4 x i16> %add.v4i16, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}bitcast_i64_to_v4f16:
define amdgpu_kernel void @bitcast_i64_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load i64, ptr addrspace(1) %in, align 8
%add = add i64 %val, 4
%bc = bitcast i64 %add to <4 x half>
%add.v4i16 = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
store <4 x half> %add.v4i16, ptr addrspace(1) %out, align 8
ret void
}
; FUNC-LABEL: {{^}}v4i16_to_v2f32:
define amdgpu_kernel void @v4i16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x i16>, ptr addrspace(1) %in, align 4
%add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
%bc = bitcast <4 x i16> %add.v4i16 to <2 x float>
%fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
store <2 x float> %fadd.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4f16_to_v2f32:
define amdgpu_kernel void @v4f16_to_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x half>, ptr addrspace(1) %in, align 4
%add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
%bc = bitcast <4 x half> %add.v4half to <2 x float>
%fadd.bitcast = fadd <2 x float> %bc, <float 1.0, float 1.0>
store <2 x float> %fadd.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v2f32_to_v4i16:
define amdgpu_kernel void @v2f32_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <2 x float>, ptr addrspace(1) %in, align 4
%add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
%bc = bitcast <2 x float> %add.v2f32 to <4 x i16>
%add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
store <4 x i16> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v2f32_to_v4f16:
define amdgpu_kernel void @v2f32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <2 x float>, ptr addrspace(1) %in, align 4
%add.v2f32 = fadd <2 x float> %load, <float 2.0, float 4.0>
%bc = bitcast <2 x float> %add.v2f32 to <4 x half>
%add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
store <4 x half> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4i16_to_v2i32:
define amdgpu_kernel void @v4i16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x i16>, ptr addrspace(1) %in, align 4
%add.v4i16 = add <4 x i16> %load, <i16 4, i16 4, i16 4, i16 4>
%bc = bitcast <4 x i16> %add.v4i16 to <2 x i32>
%add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
store <2 x i32> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v4f16_to_v2i32:
define amdgpu_kernel void @v4f16_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <4 x half>, ptr addrspace(1) %in, align 4
%add.v4half = fadd <4 x half> %load, <half 4.0, half 4.0, half 4.0, half 4.0>
%bc = bitcast <4 x half> %add.v4half to <2 x i32>
%add.bitcast = add <2 x i32> %bc, <i32 1, i32 1>
store <2 x i32> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v2i32_to_v4i16:
define amdgpu_kernel void @v2i32_to_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <2 x i32>, ptr addrspace(1) %in, align 4
%add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
%bc = bitcast <2 x i32> %add.v2i32 to <4 x i16>
%add.bitcast = add <4 x i16> %bc, <i16 1, i16 2, i16 3, i16 4>
store <4 x i16> %add.bitcast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}v2i32_to_v4f16:
define amdgpu_kernel void @v2i32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
%load = load <2 x i32>, ptr addrspace(1) %in, align 4
%add.v2i32 = add <2 x i32> %load, <i32 2, i32 4>
%bc = bitcast <2 x i32> %add.v2i32 to <4 x half>
%add.bitcast = fadd <4 x half> %bc, <half 1.0, half 2.0, half 4.0, half 8.0>
store <4 x half> %add.bitcast, ptr addrspace(1) %out
ret void
}
declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
; FUNC-LABEL: {{^}}bitcast_v4f32_to_v2i64:
; GCN: s_buffer_load_dwordx4
define <2 x i64> @bitcast_v4f32_to_v2i64(<2 x i64> %arg) {
%val = call <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32> undef, i32 0, i32 0)
%cast = bitcast <4 x float> %val to <2 x i64>
%div = udiv <2 x i64> %cast, %arg
ret <2 x i64> %div
}
declare half @llvm.canonicalize.f16(half)
; FUNC-LABEL: {{^}}bitcast_f32_to_v1i32:
define amdgpu_kernel void @bitcast_f32_to_v1i32(ptr addrspace(1) %out) {
%f16 = call arcp afn half @llvm.canonicalize.f16(half 0xH03F0)
%f32 = fpext half %f16 to float
%v = bitcast float %f32 to <1 x i32>
%v1 = extractelement <1 x i32> %v, i32 0
store i32 %v1, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v4i64_to_v16i16:
define amdgpu_kernel void @bitcast_v4i64_to_v16i16(i32 %cond, ptr addrspace(1) %out, <4 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <4 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <4 x i64> %phi_value to <16 x i16>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <16 x i16> [zeroinitializer, %entry], [%cast, %if]
store <16 x i16> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v4f64_to_v16f16:
define amdgpu_kernel void @bitcast_v4f64_to_v16f16(i32 %cond, ptr addrspace(1) %out, <4 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <4 x double> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <4 x double> %phi_value to <16 x half>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <16 x half> [zeroinitializer, %entry], [%cast, %if]
store <16 x half> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v16i16_to_v4i64:
define amdgpu_kernel void @bitcast_v16i16_to_v4i64(i32 %cond, ptr addrspace(1) %out, <16 x i16> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <16 x i16> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <16 x i16> %phi_value to <4 x i64>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <4 x i64> [zeroinitializer, %entry], [%cast, %if]
store <4 x i64> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v16f16_to_v4f64:
define amdgpu_kernel void @bitcast_v16f16_to_v4f64(i32 %cond, ptr addrspace(1) %out, <16 x half> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <16 x half> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <16 x half> %phi_value to <4 x double>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <4 x double> [zeroinitializer, %entry], [%cast, %if]
store <4 x double> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v20f16_to_v5f64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v20f16_to_v5f64(i32 %cond, ptr addrspace(1) %out, <20 x half> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <20 x half> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <20 x half> %phi_value to <5 x double>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
store <5 x double> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v10f32_to_v5f64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v10f32_to_v5f64(i32 %cond, ptr addrspace(1) %out, <10 x float> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <10 x float> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <10 x float> %phi_value to <5 x double>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
store <5 x double> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v10i32_to_v5f64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v10i32_to_v5f64(i32 %cond, ptr addrspace(1) %out, <10 x i32> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <10 x i32> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <10 x i32> %phi_value to <5 x double>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
store <5 x double> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v10f32_to_v5i64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v10f32_to_v5i64(i32 %cond, ptr addrspace(1) %out, <10 x float> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <10 x float> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <10 x float> %phi_value to <5 x i64>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x i64> [zeroinitializer, %entry], [%cast, %if]
store <5 x i64> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v10i32_to_v5i64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v10i32_to_v5i64(i32 %cond, ptr addrspace(1) %out, <10 x i32> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <10 x i32> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <10 x i32> %phi_value to <5 x i64>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x i64> [zeroinitializer, %entry], [%cast, %if]
store <5 x i64> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v40i8_to_v5f64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v40i8_to_v5f64(i32 %cond, ptr addrspace(1) %out, <40 x i8> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <40 x i8> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <40 x i8> %phi_value to <5 x double>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x double> [zeroinitializer, %entry], [%cast, %if]
store <5 x double> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v40i8_to_v5i64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v40i8_to_v5i64(i32 %cond, ptr addrspace(1) %out, <40 x i8> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <40 x i8> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <40 x i8> %phi_value to <5 x i64>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <5 x i64> [zeroinitializer, %entry], [%cast, %if]
store <5 x i64> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v5f64_to_v10f32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v5f64_to_v10f32(i32 %cond, ptr addrspace(1) %out, <5 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <5 x double> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <5 x double> %phi_value to <10 x float>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <10 x float> [zeroinitializer, %entry], [%cast, %if]
store <10 x float> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v5f64_to_v10i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v5f64_to_v10i32(i32 %cond, ptr addrspace(1) %out, <5 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <5 x double> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <5 x double> %phi_value to <10 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <10 x i32> [zeroinitializer, %entry], [%cast, %if]
store <10 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v5i64_to_v10f32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v5i64_to_v10f32(i32 %cond, ptr addrspace(1) %out, <5 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <5 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <5 x i64> %phi_value to <10 x float>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <10 x float> [zeroinitializer, %entry], [%cast, %if]
store <10 x float> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v5i64_to_v10i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v5i64_to_v10i32(i32 %cond, ptr addrspace(1) %out, <5 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <5 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <5 x i64> %phi_value to <10 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <10 x i32> [zeroinitializer, %entry], [%cast, %if]
store <10 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v6f64_to_v12i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v6f64_to_v12i32(i32 %cond, ptr addrspace(1) %out, <6 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <6 x double> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <6 x double> %phi_value to <12 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <12 x i32> [zeroinitializer, %entry], [%cast, %if]
store <12 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v6f64_to_v12f32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v6f64_to_v12f32(i32 %cond, ptr addrspace(1) %out, <6 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <6 x double> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <6 x double> %phi_value to <12 x float>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <12 x float> [zeroinitializer, %entry], [%cast, %if]
store <12 x float> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v12i32_to_v6i64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v12i32_to_v6i64(i32 %cond, ptr addrspace(1) %out, <12 x i32> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <12 x i32> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <12 x i32> %phi_value to <6 x i64>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <6 x i64> [zeroinitializer, %entry], [%cast, %if]
store <6 x i64> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v12i32_to_v6f64:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v12i32_to_v6f64(i32 %cond, ptr addrspace(1) %out, <12 x i32> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <12 x i32> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <12 x i32> %phi_value to <6 x double>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <6 x double> [zeroinitializer, %entry], [%cast, %if]
store <6 x double> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v6i64_to_v12i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v6i64_to_v12i32(i32 %cond, ptr addrspace(1) %out, <6 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <6 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <6 x i64> %phi_value to <12 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <12 x i32> [zeroinitializer, %entry], [%cast, %if]
store <12 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v7i64_to_v14i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v7i64_to_v14i32(i32 %cond, ptr addrspace(1) %out, <7 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <7 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <7 x i64> %phi_value to <14 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <14 x i32> [zeroinitializer, %entry], [%cast, %if]
store <14 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v7f64_to_v14i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v7f64_to_v14i32(i32 %cond, ptr addrspace(1) %out, <7 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <7 x double> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <7 x double> %phi_value to <14 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <14 x i32> [zeroinitializer, %entry], [%cast, %if]
store <14 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v9i64_to_v18i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v9i64_to_v18i32(i32 %cond, ptr addrspace(1) %out, <9 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <9 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <9 x i64> %phi_value to <18 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <18 x i32> [zeroinitializer, %entry], [%cast, %if]
store <18 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v10i64_to_v20i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v10i64_to_v20i32(i32 %cond, ptr addrspace(1) %out, <10 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <10 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <10 x i64> %phi_value to <20 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <20 x i32> [zeroinitializer, %entry], [%cast, %if]
store <20 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v11i64_to_v20i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v11i64_to_v20i32(i32 %cond, ptr addrspace(1) %out, <11 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <11 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <11 x i64> %phi_value to <22 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <22 x i32> [zeroinitializer, %entry], [%cast, %if]
store <22 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v12i64_to_v22i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v12i64_to_v22i32(i32 %cond, ptr addrspace(1) %out, <12 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <12 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <12 x i64> %phi_value to <24 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <24 x i32> [zeroinitializer, %entry], [%cast, %if]
store <24 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v13i64_to_v24i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v13i64_to_v24i32(i32 %cond, ptr addrspace(1) %out, <13 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <13 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <13 x i64> %phi_value to <26 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <26 x i32> [zeroinitializer, %entry], [%cast, %if]
store <26 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v14i64_to_v26i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v14i64_to_v26i32(i32 %cond, ptr addrspace(1) %out, <14 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <14 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <14 x i64> %phi_value to <28 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <28 x i32> [zeroinitializer, %entry], [%cast, %if]
store <28 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}
; FUNC-LABEL: {{^}}bitcast_v15i64_to_v26i32:
; SI: ScratchSize: 0
define amdgpu_kernel void @bitcast_v15i64_to_v26i32(i32 %cond, ptr addrspace(1) %out, <15 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
if:
%phi_value = phi <15 x i64> [zeroinitializer, %entry], [%value, %if]
%cast = bitcast <15 x i64> %phi_value to <30 x i32>
%cmp1 = icmp eq i32 %cond, 1
br i1 %cmp1, label %if, label %end
end:
%phi_cast = phi <30 x i32> [zeroinitializer, %entry], [%cast, %if]
store <30 x i32> %phi_cast, ptr addrspace(1) %out
ret void
}