AMDGPU: Replace tests using undef in shufflevector with poison (#130899)
This commit is contained in:
parent
2fbddfbdc0
commit
b76e396990
@ -12,7 +12,7 @@ define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec = load <4 x float>, ptr addrspace(4) %ptr, align 4
|
||||
%vec.3 = extractelement <4 x float> %vec, i32 3
|
||||
%shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
|
||||
%shuffle = shufflevector <4 x float> %vec, <4 x float> poison, <2 x i32> <i32 2, i32 poison>
|
||||
%new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
|
||||
store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8
|
||||
ret void
|
||||
|
@ -870,10 +870,10 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
entry:
|
||||
%insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1081,10 +1081,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1229,10 +1229,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1289,10 +1289,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1494,10 +1494,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1617,10 +1617,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1677,10 +1677,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -1794,10 +1794,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %
|
||||
; GFX11-NEXT: s_endpgm
|
||||
entry:
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -2401,10 +2401,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %v
|
||||
entry:
|
||||
%idx.add = add i32 %idx, 1
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
@ -2525,10 +2525,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
|
||||
entry:
|
||||
%idx.add = add i32 %idx, 1
|
||||
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
|
||||
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
|
||||
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
|
||||
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
|
||||
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
|
||||
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
|
||||
|
@ -1519,7 +1519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
|
||||
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
|
||||
; CHECK-NEXT: S_ENDPGM 0
|
||||
%load = load <2 x i64>, ptr addrspace(1) null
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
|
||||
call void @external_void_func_v3i64(<3 x i64> %val)
|
||||
ret void
|
||||
|
@ -322,7 +322,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
|
||||
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
|
||||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
|
||||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
|
||||
ret i32 %r
|
||||
}
|
||||
@ -349,7 +349,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
|
||||
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
|
||||
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
|
||||
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
|
||||
ret i32 %r
|
||||
}
|
||||
|
@ -306,7 +306,7 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
|
||||
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
|
||||
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
|
||||
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
|
||||
ret i32 %r
|
||||
}
|
||||
@ -332,7 +332,7 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
|
||||
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
|
||||
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
|
||||
; GFX10-NEXT: s_setpc_b64 s[30:31]
|
||||
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
|
||||
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
|
||||
ret i32 %r
|
||||
}
|
||||
|
@ -121,7 +121,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
|
||||
; ; FIXME: G_INSERT mishandled
|
||||
; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) {
|
||||
; %trunc = trunc <3 x i32> %src to <3 x i16>
|
||||
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; %cast = bitcast <4 x i16> %ext to <2 x i32>
|
||||
; ret <2 x i32> %cast
|
||||
; }
|
||||
@ -129,7 +129,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
|
||||
; ; FIXME: G_INSERT mishandled
|
||||
; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) {
|
||||
; %trunc = trunc <3 x i32> %src to <3 x i16>
|
||||
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
; %cast = bitcast <4 x i16> %ext to <2 x i32>
|
||||
; ret <2 x i32> %cast
|
||||
; }
|
||||
|
@ -426,7 +426,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
|
||||
; GFX12-NEXT: s_endpgm
|
||||
bb:
|
||||
%C = load <16 x half>, ptr %Caddr
|
||||
%C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%fneg.C_shuffle = fneg <8 x half> %C_shuffle
|
||||
%res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
|
||||
store <8 x half> %res, ptr addrspace(1) %out
|
||||
|
@ -381,7 +381,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
|
||||
; GFX12-NEXT: s_endpgm
|
||||
bb:
|
||||
%C = load <8 x half>, ptr %Caddr
|
||||
%C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%fneg.C_shuffle = fneg <4 x half> %C_shuffle
|
||||
%res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
|
||||
store <4 x half> %res, ptr addrspace(1) %out
|
||||
|
@ -9,7 +9,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
%tmp4 = extractelement <4 x float> %tmp3, i32 0
|
||||
store volatile float %tmp4, ptr addrspace(1) undef
|
||||
@ -25,7 +25,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
%tmp4 = extractelement <4 x float> %tmp3, i32 1
|
||||
store volatile float %tmp4, ptr addrspace(1) undef
|
||||
@ -41,7 +41,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
%tmp4 = extractelement <4 x float> %tmp3, i32 0
|
||||
store volatile float %tmp4, ptr addrspace(1) undef
|
||||
@ -57,7 +57,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
|
||||
main_body:
|
||||
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
|
||||
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
%tmp4 = extractelement <4 x float> %tmp3, i32 1
|
||||
store volatile float %tmp4, ptr addrspace(1) undef
|
||||
@ -68,7 +68,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
|
||||
main_body:
|
||||
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
|
||||
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
|
||||
%tmp4 = extractelement <4 x float> %tmp3, i32 0
|
||||
store volatile float %tmp4, ptr addrspace(1) undef
|
||||
|
@ -477,7 +477,7 @@ entry:
|
||||
|
||||
then:
|
||||
%x.1 = insertelement <5 x double> <double 3.140000e+00, double poison, double poison, double poison, double poison>, double %x, i32 %idx
|
||||
%0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 undef, i32 8, i32 9>
|
||||
%0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 poison, i32 8, i32 9>
|
||||
%x.4 = insertelement <5 x double> %0, double %x, i64 2
|
||||
br label %finally
|
||||
|
||||
|
@ -18,7 +18,7 @@ define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inre
|
||||
.beginls: ; preds = %.entry
|
||||
%tmp15 = extractelement <6 x i32> %arg8, i32 3
|
||||
%.0.vec.insert.i = insertelement <2 x i32> poison, i32 %tmp15, i32 0
|
||||
%.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
|
||||
%.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> poison, <2 x i32> <i32 0, i32 3>
|
||||
%tmp16 = bitcast <2 x i32> %.4.vec.insert.i to i64
|
||||
br label %.endls
|
||||
|
||||
|
@ -49,7 +49,7 @@ main_body:
|
||||
%buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128
|
||||
%buf1.vec = bitcast i128 %buf1.int to <4 x i32>
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0)
|
||||
%shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> poison, <2 x i32> <i32 1, i32 0>
|
||||
%somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1
|
||||
store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next
|
||||
ret void
|
||||
|
@ -45,7 +45,7 @@ bb1789: ; preds = %bb1750
|
||||
%i1879 = bitcast <3 x i32> %i1878 to <3 x float>
|
||||
%i1881 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1540, %i1879
|
||||
%i1882 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 poison, i32 0)
|
||||
%i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%i1884 = bitcast <4 x i32> %i1883 to <4 x float>
|
||||
%i1885 = shufflevector <4 x float> %i1884, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%i1886 = insertelement <3 x i32> poison, i32 %i1819, i64 0
|
||||
@ -57,7 +57,7 @@ bb1789: ; preds = %bb1750
|
||||
%i1892 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1885, %i1891
|
||||
%i1893 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1892, %i1881
|
||||
%i1894 = bitcast <3 x float> %i1893 to <3 x i32>
|
||||
%i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%i1896 = insertelement <4 x i32> %i1895, i32 %i1819, i64 3
|
||||
br label %bb1897
|
||||
|
||||
@ -70,7 +70,7 @@ bb1897: ; preds = %bb1789, %bb1787
|
||||
%i1901 = bitcast <3 x i32> %i1900 to <3 x float>
|
||||
%i1902 = fadd reassoc nnan nsz arcp contract afn <3 x float> %i1901, %i1899
|
||||
%i1903 = bitcast <3 x float> %i1902 to <3 x i32>
|
||||
%i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%i1908 = shufflevector <4 x i32> %i1907, <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
%i1914 = shufflevector <4 x i32> %i1908, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
|
||||
%__llpc_global_proxy_r3.12.vec.extract2358 = extractelement <2 x i32> zeroinitializer, i64 1
|
||||
|
@ -168,7 +168,7 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
%undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
@ -193,7 +193,7 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) {
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
%undef.lo = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
|
||||
ret void
|
||||
}
|
||||
@ -348,7 +348,7 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
@ -369,7 +369,7 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
|
||||
; GFX8-NEXT: ; use v[0:1]
|
||||
; GFX8-NEXT: ;;#ASMEND
|
||||
; GFX8-NEXT: s_setpc_b64 s[30:31]
|
||||
%undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%undef.hi = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
|
||||
ret void
|
||||
}
|
||||
|
@ -1454,7 +1454,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
|
||||
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
|
||||
; HSA-NEXT: s_endpgm
|
||||
%load = load <2 x i64>, ptr addrspace(1) null
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
|
||||
call void @external_void_func_v3i64(<3 x i64> %val)
|
||||
ret void
|
||||
|
@ -1293,7 +1293,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %o
|
||||
%out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
|
||||
%a = load <2 x half>, ptr addrspace(1) %gep0
|
||||
%add = fadd <2 x half> %a, <half 1.0, half 1.0>
|
||||
%shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf = shufflevector <2 x half> %add, <2 x half> poison, <2 x i32> <i32 1, i32 0>
|
||||
|
||||
%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
|
||||
%clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
|
||||
|
@ -3728,7 +3728,7 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addr
|
||||
%gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
|
||||
%out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
|
||||
%a = load <2 x half>, ptr addrspace(1) %gep0
|
||||
%shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0>
|
||||
%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
|
||||
%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
|
||||
|
||||
|
@ -14,7 +14,7 @@ bb:
|
||||
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %lid
|
||||
%load = load <4 x i32>, ptr addrspace(1) %gep1, align 16
|
||||
%shuffle = shufflevector <4 x i32> %load, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
|
||||
%shuffle = shufflevector <4 x i32> %load, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
|
||||
%gep2 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg1, i32 %lid
|
||||
store <2 x i32> %shuffle, ptr addrspace(1) %gep2, align 8
|
||||
ret void
|
||||
@ -27,9 +27,9 @@ bb:
|
||||
define amdgpu_kernel void @test_vector_creation() #0 {
|
||||
entry:
|
||||
%tmp231 = load <4 x i16>, ptr addrspace(1) undef, align 2
|
||||
%vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%vecinit467 = shufflevector <8 x i16> undef, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 undef, i32 undef>
|
||||
%vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||
%vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%vecinit467 = shufflevector <8 x i16> poison, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
|
||||
%vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||
store <8 x i16> %vecinit471, ptr addrspace(1) undef, align 16
|
||||
ret void
|
||||
}
|
||||
|
@ -23,12 +23,12 @@ for.body: ; preds = %for.body, %entry
|
||||
%i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
|
||||
%vecload2 = load <8 x i32>, ptr addrspace(1) %src, align 32
|
||||
%0 = bitcast <8 x i32> %vecload2 to <32 x i8>
|
||||
%tmp5 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp8 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%tmp5 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%tmp8 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%tmp9 = add nsw <8 x i8> %tmp5, %tmp8
|
||||
%tmp12 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
|
||||
%tmp12 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
|
||||
%tmp13 = add nsw <8 x i8> %tmp9, %tmp12
|
||||
%tmp16 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
%tmp16 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
%tmp17 = add nsw <8 x i8> %tmp13, %tmp16
|
||||
%scevgep = getelementptr <8 x i8>, ptr addrspace(1) %result, i32 %i.01
|
||||
%1 = bitcast <8 x i8> %tmp17 to <2 x i32>
|
||||
|
@ -3,7 +3,7 @@
|
||||
; CHECK: s_waitcnt
|
||||
define <2 x i16> @main(<2 x float>) #0 {
|
||||
%2 = bitcast <2 x float> %0 to <4 x i16>
|
||||
%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <2 x i32> <i32 0, i32 undef>
|
||||
%3 = shufflevector <4 x i16> %2, <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%4 = extractelement <4 x i16> %2, i32 0
|
||||
%5 = insertelement <2 x i16> %3, i16 %4, i32 0
|
||||
ret <2 x i16> %5
|
||||
|
@ -289,8 +289,8 @@ define amdgpu_kernel void @test_concat_v16i16(ptr addrspace(1) %out, <16 x i16>
|
||||
define amdgpu_kernel void @concat_vector_crash(ptr addrspace(1) %out, ptr addrspace(1) %in) {
|
||||
bb:
|
||||
%tmp = load <2 x float>, ptr addrspace(1) %in, align 4
|
||||
%tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||
%tmp1 = shufflevector <2 x float> %tmp, <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%tmp2 = shufflevector <8 x float> poison, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||
store <8 x float> %tmp2, ptr addrspace(1) %out, align 32
|
||||
ret void
|
||||
}
|
||||
@ -301,8 +301,8 @@ define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrs
|
||||
%tmp = load i32, ptr addrspace(1) %in, align 1
|
||||
%tmp1 = trunc i32 %tmp to i24
|
||||
%tmp2 = bitcast i24 %tmp1 to <3 x i8>
|
||||
%tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef>
|
||||
%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
|
||||
%tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
|
||||
%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
|
||||
store <8 x i8> %tmp4, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ define amdgpu_ps void @main(i32 %in1, i32 inreg %arg) local_unnamed_addr {
|
||||
|
||||
bb:
|
||||
%__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> poison, i32 %in1, i32 3
|
||||
%tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> <i32 undef, i32 undef, i32 1>
|
||||
%tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> poison, <3 x i32> <i32 poison, i32 poison, i32 1>
|
||||
%tmp4 = bitcast <3 x i32> %tmp3 to <3 x float>
|
||||
%a2.i123 = extractelement <3 x float> %tmp4, i32 2
|
||||
%tmp5 = bitcast float %a2.i123 to i32
|
||||
@ -26,7 +26,7 @@ bb:
|
||||
|
||||
bb12:
|
||||
%__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ poison, %.entry ]
|
||||
%tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
|
||||
%tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
|
||||
%tmp7 = bitcast <3 x i32> %tmp6 to <3 x float>
|
||||
%a0.i = extractelement <3 x float> %tmp7, i32 0
|
||||
ret void
|
||||
|
@ -146,7 +146,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
|
||||
%0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
|
||||
%.i2243 = extractelement <3 x float> %0, i32 2
|
||||
%1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0)
|
||||
%2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%3 = bitcast <4 x i32> %2 to <4 x float>
|
||||
%.i2248 = extractelement <4 x float> %3, i32 2
|
||||
%.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
|
||||
@ -159,17 +159,17 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
|
||||
%.i0364 = extractelement <2 x float> %7, i32 0
|
||||
%8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
|
||||
%9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 112, i32 0)
|
||||
%10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%11 = bitcast <4 x i32> %10 to <4 x float>
|
||||
%.i2360 = extractelement <4 x float> %11, i32 2
|
||||
%.i2363 = fmul reassoc nnan nsz arcp contract afn float %.i2360, %8
|
||||
%12 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 96, i32 0)
|
||||
%13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%14 = bitcast <4 x i32> %13 to <4 x float>
|
||||
%.i2367 = extractelement <4 x float> %14, i32 2
|
||||
%.i2370 = fmul reassoc nnan nsz arcp contract afn float %.i0364, %.i2367
|
||||
%15 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 32, i32 0)
|
||||
%16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%17 = bitcast <4 x i32> %16 to <4 x float>
|
||||
%.i2373 = extractelement <4 x float> %17, i32 2
|
||||
%.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370
|
||||
@ -212,12 +212,12 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
|
||||
%.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43
|
||||
%.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466
|
||||
%45 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 64, i32 0)
|
||||
%46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%47 = bitcast <4 x i32> %46 to <4 x float>
|
||||
%.i2476 = extractelement <4 x float> %47, i32 2
|
||||
%.i2479 = fmul reassoc nnan nsz arcp contract afn float %.i2476, %18
|
||||
%48 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 80, i32 0)
|
||||
%49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%50 = bitcast <4 x i32> %49 to <4 x float>
|
||||
%.i2482 = extractelement <4 x float> %50, i32 2
|
||||
%.i2485 = fsub reassoc nnan nsz arcp contract afn float %.i2482, %.i2479
|
||||
@ -230,7 +230,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
|
||||
%.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516
|
||||
%.i2525 = fmul reassoc nnan nsz arcp contract afn float %.i2522, %43
|
||||
%52 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 16, i32 0)
|
||||
%53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
%54 = bitcast <4 x i32> %53 to <4 x float>
|
||||
%.i2530 = extractelement <4 x float> %54, i32 2
|
||||
%.i2531 = fmul reassoc nnan nsz arcp contract afn float %.i2333, %.i2530
|
||||
|
@ -16,7 +16,7 @@ entry:
|
||||
%sint = load i32, ptr addrspace(1) %in
|
||||
%conv = sitofp i32 %sint to float
|
||||
%0 = insertelement <4 x float> poison, float %conv, i32 0
|
||||
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
|
||||
store <4 x float> %splat, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
@ -30,7 +30,7 @@ entry:
|
||||
%uint = load i32, ptr addrspace(1) %in
|
||||
%conv = uitofp i32 %uint to float
|
||||
%0 = insertelement <4 x float> poison, float %conv, i32 0
|
||||
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
|
||||
store <4 x float> %splat, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ bb:
|
||||
%tmp = load i32, ptr addrspace(1) undef, align 4
|
||||
%tmp1 = load <4 x float>, ptr addrspace(1) undef, align 16
|
||||
%tmp2 = sext i32 %tmp to i64
|
||||
%tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
|
||||
%tmp3 = shufflevector <4 x float> poison, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
|
||||
%tmp4 = call float @barney() #2
|
||||
%tmp9 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 0
|
||||
%tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
|
||||
@ -53,7 +53,7 @@ bb28: ; preds = %bb25, %bb21
|
||||
%tmp45 = fadd float undef, undef
|
||||
%tmp46 = fdiv float %tmp44, %tmp45
|
||||
%tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0
|
||||
%tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer
|
||||
%tmp49 = fsub <4 x float> %tmp48, %tmp40
|
||||
%tmp50 = extractelement <4 x float> %tmp41, i32 1
|
||||
%tmp51 = extractelement <4 x float> %tmp42, i32 2
|
||||
@ -71,7 +71,7 @@ bb28: ; preds = %bb25, %bb21
|
||||
call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5
|
||||
%tmp59 = bitcast i64 %tmp35 to <2 x float>
|
||||
%tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0
|
||||
%tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
%tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer
|
||||
%tmp62 = fmul <2 x float> %tmp61, undef
|
||||
%tmp63 = fsub <2 x float> %tmp62, %tmp59
|
||||
%tmp64 = extractelement <2 x float> %tmp63, i64 0
|
||||
|
@ -26,7 +26,7 @@ entry:
|
||||
%m_scaleMotion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 4
|
||||
%tmp2 = load <4 x float>, ptr addrspace(1) %m_scaleMotion, align 16
|
||||
%splat.splatinsert = insertelement <4 x float> poison, float %time, i32 0
|
||||
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
|
||||
%tmp3 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp2, <4 x float> %splat.splat, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>)
|
||||
%tmp4 = load <4 x float>, ptr addrspace(1) %call, align 16
|
||||
%m_quaternion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 1
|
||||
@ -61,8 +61,8 @@ entry:
|
||||
%tmp24 = insertelement <4 x float> %tmp23, float %tmp19, i32 1
|
||||
%tmp25 = insertelement <4 x float> %tmp24, float %tmp22, i32 2
|
||||
%tmp26 = extractelement <4 x float> %tmp5, i64 3
|
||||
%splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%mul3.i10.i = fmul <4 x float> %tmp5, %splat.splat2.i9.i
|
||||
%tmp27 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i8.i, <4 x float> %tmp10, <4 x float> %mul3.i10.i)
|
||||
%add.i11.i = fadd <4 x float> %tmp27, %tmp25
|
||||
@ -94,7 +94,7 @@ entry:
|
||||
%tmp52 = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float %tmp44, i32 0
|
||||
%tmp53 = insertelement <4 x float> %tmp52, float %tmp48, i32 1
|
||||
%tmp54 = insertelement <4 x float> %tmp53, float %tmp51, i32 2
|
||||
%splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
|
||||
%tmp55 = extractelement <4 x float> %tmp5, i32 3
|
||||
%mul3.i.i = fmul <4 x float> %splat.splat.i8.i, %tmp39
|
||||
%tmp56 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i.i, <4 x float> %vecinit5.i.i, <4 x float> %mul3.i.i)
|
||||
@ -113,12 +113,12 @@ entry:
|
||||
%tmp66 = extractelement <4 x float> %tmp1, i64 3
|
||||
%mul3 = fmul float %tmp66, %time
|
||||
%tmp67 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 3
|
||||
%tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 undef, i32 3>
|
||||
%tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 poison, i32 3>
|
||||
%vecinit3.i.i = shufflevector <4 x float> %tmp68, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
|
||||
%tmp69 = fcmp oeq <4 x float> %vecinit3.i.i, zeroinitializer
|
||||
%tmp70 = sext <4 x i1> %tmp69 to <4 x i32>
|
||||
%tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
|
||||
%tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
|
||||
%tmp73 = and <2 x i32> %tmp71, %tmp72
|
||||
%tmp74 = extractelement <2 x i32> %tmp73, i64 0
|
||||
%tmp75 = extractelement <2 x i32> %tmp73, i64 1
|
||||
@ -205,7 +205,7 @@ bb141: ; preds = %bb109, %bb98, %bb96
|
||||
%tmp143 = phi float [ %tmp95, %bb86 ], [ %tmp140, %bb109 ], [ %tmp107, %bb98 ], [ %tmp84, %bb96 ]
|
||||
%tmp144 = tail call float @llvm.amdgcn.rsq.f32(float %tmp143)
|
||||
%tmp145 = insertelement <4 x float> poison, float %tmp144, i32 0
|
||||
%tmp146 = shufflevector <4 x float> %tmp145, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%tmp146 = shufflevector <4 x float> %tmp145, <4 x float> poison, <4 x i32> zeroinitializer
|
||||
%tmp147 = fmul <4 x float> %tmp142, %tmp146
|
||||
br label %qtSet.exit
|
||||
|
||||
|
@ -331,7 +331,7 @@ if:
|
||||
|
||||
endif:
|
||||
%r = phi <3 x i32> [ %v, %entry ], [ %u, %if ]
|
||||
%r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%r.ext = shufflevector <3 x i32> %r, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0
|
||||
ret void
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
ret <4 x i16> %r2
|
||||
@ -317,7 +317,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
ret <4 x i16> %r2
|
||||
@ -482,7 +482,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x half> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%v2 = shufflevector <8 x half> %m, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
|
||||
%r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
|
||||
ret <4 x half> %r2
|
||||
@ -685,7 +685,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
ret <4 x i16> %r2
|
||||
@ -890,7 +890,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
|
||||
ret <4 x i16> %r2
|
||||
@ -1099,7 +1099,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x half> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x half> %m, <16 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%v2 = shufflevector <16 x half> %m, <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
|
||||
%b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
|
||||
%r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
|
||||
ret <4 x half> %r2
|
||||
@ -1184,8 +1184,8 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
|
||||
%x.7 = load i16, ptr addrspace(3) %p.7, align 2
|
||||
%v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1
|
||||
|
||||
%z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
|
||||
%z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
|
||||
%z.3 = shufflevector <8 x i16> %z.2, <8 x i16> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
|
||||
ret <8 x i16> %z.3
|
||||
}
|
||||
@ -1464,7 +1464,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%b2 = icmp ugt <8 x i16> %v2, <i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800>
|
||||
%r2 = select <8 x i1> %b2, <8 x i16> <i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900>, <8 x i16> <i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00>
|
||||
ret <8 x i16> %r2
|
||||
@ -1755,7 +1755,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x half> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x half> %m, <16 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%v2 = shufflevector <16 x half> %m, <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%b2 = fcmp ugt <8 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
|
||||
%r2 = select <8 x i1> %b2, <8 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <8 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
|
||||
ret <8 x half> %r2
|
||||
|
@ -13,7 +13,7 @@ define <3 x i32> @quux() {
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 1
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
%tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%tmp1 = extractelement <3 x i8> %tmp, i64 0
|
||||
%tmp2 = zext i8 %tmp1 to i32
|
||||
%tmp3 = insertelement <3 x i32> poison, i32 %tmp2, i32 0
|
||||
|
@ -90,7 +90,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
|
||||
%b2 = icmp sgt <2 x i16> %v2, <i16 -1, i16 -1>
|
||||
%r2 = select <2 x i1> %b2, <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> <i16 -1, i16 -1>
|
||||
ret <2 x i16> %r2
|
||||
@ -161,7 +161,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
|
||||
%b2 = icmp sgt <2 x i64> %v2, <i64 -1, i64 -1>
|
||||
%r2 = select <2 x i1> %b2, <2 x i64> <i64 -32768, i64 -32768>, <2 x i64> <i64 -1, i64 -1>
|
||||
ret <2 x i64> %r2
|
||||
@ -238,7 +238,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%b2 = icmp sgt <4 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
%r2 = select <4 x i1> %b2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
ret <4 x i64> %r2
|
||||
@ -342,7 +342,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x i64> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%v2 = shufflevector <16 x i64> %m, <16 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%b2 = icmp sgt <8 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
%r2 = select <8 x i1> %b2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
|
||||
ret <8 x i64> %r2
|
||||
@ -413,7 +413,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x double> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v2 = shufflevector <8 x double> %m, <8 x double> poison, <2 x i32> <i32 0, i32 1>
|
||||
%b2 = fcmp ogt <2 x double> %v2, <double -1.0, double -1.0>
|
||||
%r2 = select <2 x i1> %b2, <2 x double> <double -2.0, double -2.0>, <2 x double> <double -1.0, double -1.0>
|
||||
ret <2 x double> %r2
|
||||
@ -490,7 +490,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <8 x double> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v2 = shufflevector <8 x double> %m, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%b2 = fcmp ogt <4 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0>
|
||||
%r2 = select <4 x i1> %b2, <4 x double> <double -2.0, double -2.0, double -2.0, double -2.0>, <4 x double> <double -1.0, double -1.0, double -1.0, double -1.0>
|
||||
ret <4 x double> %r2
|
||||
@ -594,7 +594,7 @@ F:
|
||||
|
||||
exit:
|
||||
%m = phi <16 x double> [ %t, %T ], [ %f, %F ]
|
||||
%v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%v2 = shufflevector <16 x double> %m, <16 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%b2 = fcmp ogt <8 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
|
||||
%r2 = select <8 x i1> %b2, <8 x double> <double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0>, <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
|
||||
ret <8 x double> %r2
|
||||
|
@ -27,7 +27,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
|
||||
main_body:
|
||||
%tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0)
|
||||
%tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
|
||||
%tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
||||
%tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
|
||||
%tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
|
||||
ret void
|
||||
|
@ -39,7 +39,7 @@ bb14: ; preds = %bb14, %bb11
|
||||
%tmp25 = load float, ptr addrspace(4) %tmp24, align 4
|
||||
%tmp26 = fptrunc float %tmp25 to half
|
||||
%tmp27 = insertelement <4 x half> poison, half %tmp26, i32 0
|
||||
%tmp28 = shufflevector <4 x half> %tmp27, <4 x half> undef, <4 x i32> zeroinitializer
|
||||
%tmp28 = shufflevector <4 x half> %tmp27, <4 x half> poison, <4 x i32> zeroinitializer
|
||||
%vec.A.0 = extractelement <4 x half> %tmp21, i32 0
|
||||
%vec.B.0 = extractelement <4 x half> %tmp28, i32 0
|
||||
%vec.C.0 = extractelement <4 x half> %tmp15, i32 0
|
||||
|
@ -1585,7 +1585,7 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
|
||||
|
||||
bb: ; preds = %.entry
|
||||
%i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0)
|
||||
%i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
%i4 = bitcast <4 x i32> %i3 to <4 x float>
|
||||
%.i0753 = extractelement <4 x float> %i4, i64 0
|
||||
br label %bb5
|
||||
|
@ -1841,7 +1841,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
|
||||
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load <2 x i64>, ptr addrspace(1) null
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
|
||||
call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
|
||||
ret void
|
||||
@ -10439,7 +10439,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
|
||||
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
|
||||
%load = load <2 x i64>, ptr addrspace(4) null
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
|
||||
call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
|
||||
ret void
|
||||
|
@ -48,6 +48,6 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; NOXNACK-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ main_body:
|
||||
define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
|
||||
main_body:
|
||||
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%r = bitcast <4 x half> %ext to <2 x float>
|
||||
ret <2 x float> %r
|
||||
}
|
||||
@ -90,7 +90,7 @@ main_body:
|
||||
define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
|
||||
main_body:
|
||||
%tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
%ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%res = bitcast <4 x half> %ext to <2 x float>
|
||||
ret <2 x float> %res
|
||||
}
|
||||
@ -129,7 +129,7 @@ main_body:
|
||||
define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
|
||||
main_body:
|
||||
%r = bitcast <2 x float> %in to <4 x half>
|
||||
%data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data = shufflevector <4 x half> %r, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -315,7 +315,7 @@ define amdgpu_ps <2 x float> @image_sample_b_2d_v3f16(<8 x i32> inreg %rsrc, <4
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%tex = call <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
|
||||
%tex_wide = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%tex_wide = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%r = bitcast <4 x half> %tex_wide to <2 x float>
|
||||
ret <2 x float> %r
|
||||
}
|
||||
@ -410,7 +410,7 @@ define amdgpu_ps <4 x float> @image_sample_b_2d_v3f16_tfe(<8 x i32> inreg %rsrc,
|
||||
main_body:
|
||||
%tex = call {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
|
||||
%tex.vec = extractvalue {<3 x half>, i32} %tex, 0
|
||||
%tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%tex.err = extractvalue {<3 x half>, i32} %tex, 1
|
||||
%tex.vecf = bitcast <4 x half> %tex.vec_wide to <2 x float>
|
||||
%tex.vecf.0 = extractelement <2 x float> %tex.vecf, i32 0
|
||||
|
@ -2394,7 +2394,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc,
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
@ -2436,7 +2436,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc,
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
ret <3 x float> %out
|
||||
}
|
||||
|
||||
@ -2478,7 +2478,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc,
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
@ -2520,7 +2520,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc,
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
@ -2562,7 +2562,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc,
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 3>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
@ -2604,7 +2604,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc,
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
|
||||
ret <3 x float> %out
|
||||
}
|
||||
|
||||
@ -2667,7 +2667,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
@ -2709,7 +2709,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
main_body:
|
||||
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
|
||||
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
|
||||
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
|
||||
ret <2 x float> %out
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ main_body:
|
||||
; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
|
||||
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ main_body:
|
||||
; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
|
||||
define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) {
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
|
||||
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
|
||||
; GFX11-PACKED-NEXT: s_endpgm
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -191,7 +191,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
|
||||
; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
|
||||
; GFX12-PACKED-GISEL-NEXT: s_endpgm
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ main_body:
|
||||
; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
|
||||
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -77,7 +77,7 @@ define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <
|
||||
; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen
|
||||
; PACKED-NEXT: s_endpgm
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -160,7 +160,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
|
||||
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
|
||||
; GFX11-PACKED-NEXT: s_endpgm
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -211,7 +211,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
|
||||
; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
|
||||
; GFX12-PACKED-GISEL-NEXT: s_endpgm
|
||||
main_body:
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
@ -22,7 +22,7 @@ define amdgpu_vs void @test(ptr addrspace(8) inreg %arg1, ptr addrspace(3) %arg2
|
||||
; CHECK-NEXT: s_endpgm
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
|
||||
%var1 = load <6 x float>, ptr addrspace(3) %arg2, align 4
|
||||
%var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%var2 = shufflevector <6 x float> %var1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %var2, ptr addrspace(8) %arg1, i32 0, i32 0, i32 0, i32 126, i32 0)
|
||||
ret void
|
||||
}
|
||||
@ -52,9 +52,9 @@ define amdgpu_vs void @test_2(ptr addrspace(8) inreg %arg1, i32 %arg2, i32 inreg
|
||||
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc
|
||||
; CHECK-NEXT: s_endpgm
|
||||
%load = load <8 x float>, ptr addrspace(3) %arg4, align 4
|
||||
%vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%vec1 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec1, ptr addrspace(8) %arg1, i32 %arg2, i32 0, i32 %arg3, i32 77, i32 3)
|
||||
%vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%vec2 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec2, ptr addrspace(8) %arg1, i32 %arg2, i32 16, i32 %arg3, i32 77, i32 3)
|
||||
ret void
|
||||
}
|
||||
@ -102,17 +102,17 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
|
||||
; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
|
||||
; CHECK-NEXT: s_endpgm
|
||||
%load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4
|
||||
%vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%vec11 = shufflevector <6 x float> %load1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec11, ptr addrspace(8) %arg3, i32 %arg1, i32 264, i32 %arg2, i32 77, i32 3)
|
||||
%vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec12 = shufflevector <6 x float> %load1, <6 x float> poison, <2 x i32> <i32 4, i32 5>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec12, ptr addrspace(8) %arg3, i32 %arg1, i32 280, i32 %arg2, i32 64, i32 3)
|
||||
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
|
||||
|
||||
%load2 = load <6 x float>, ptr addrspace(3) %arg6, align 4
|
||||
%vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%vec21 = shufflevector <6 x float> %load2, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec21, ptr addrspace(8) %arg3, i32 %arg1, i32 240, i32 %arg2, i32 77, i32 3)
|
||||
%vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> <i32 4, i32 5>
|
||||
%vec22 = shufflevector <6 x float> %load2, <6 x float> poison, <2 x i32> <i32 4, i32 5>
|
||||
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec22, ptr addrspace(8) %arg3, i32 %arg1, i32 256, i32 %arg2, i32 64, i32 3)
|
||||
|
||||
ret void
|
||||
|
@ -36,7 +36,7 @@ bb1: ; preds = %bb3, %bb
|
||||
%i3 = fmul float %i2, 1.000000e+00
|
||||
%i4 = fmul nsz <3 x float> %arg, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
|
||||
%i5 = insertelement <3 x float> poison, float %i3, i32 0
|
||||
%i6 = shufflevector <3 x float> %i5, <3 x float> undef, <3 x i32> zeroinitializer
|
||||
%i6 = shufflevector <3 x float> %i5, <3 x float> poison, <3 x i32> zeroinitializer
|
||||
%i7 = fmul <3 x float> %i4, %i6
|
||||
%i8 = fcmp oeq float %i3, 0.000000e+00
|
||||
br i1 %i8, label %bb3, label %bb2
|
||||
|
@ -456,9 +456,9 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
|
||||
; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
|
||||
; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
|
||||
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
|
||||
%src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
||||
%src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
|
||||
%src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
|
||||
%src0.shuf = shufflevector <2 x half> %src0, <2 x half> poison, <2 x i32> <i32 1, i32 0>
|
||||
%src1.shuf = shufflevector <2 x half> %src1, <2 x half> poison, <2 x i32> <i32 0, i32 1>
|
||||
%src2.shuf = shufflevector <2 x half> %src2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
||||
%src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
|
||||
%src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
|
||||
%src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
|
||||
|
@ -795,8 +795,8 @@ bb:
|
||||
br i1 %cmp, label %bb11, label %bb7
|
||||
|
||||
bb11:
|
||||
%tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
%tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> undef, <2 x i32> zeroinitializer
|
||||
%tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> poison, <2 x i32> zeroinitializer
|
||||
%tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> poison, <2 x i32> zeroinitializer
|
||||
%tmp17 = shl <2 x i32> %tmp14, <i32 8, i32 8>
|
||||
%tmp18 = ashr <2 x i32> %tmp17, <i32 8, i32 8>
|
||||
%tmp19 = shl <2 x i32> %tmp16, <i32 8, i32 8>
|
||||
|
@ -486,7 +486,7 @@ bb:
|
||||
%neg.scalar0 = fsub float -0.0, %scalar0
|
||||
|
||||
%neg.scalar0.vec = insertelement <2 x float> poison, float %neg.scalar0, i32 0
|
||||
%neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer
|
||||
%neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> poison, <2 x i32> zeroinitializer
|
||||
|
||||
%result = fadd <2 x float> %vec0, %neg.scalar0.broadcast
|
||||
store <2 x float> %result, ptr addrspace(1) %out, align 4
|
||||
@ -526,7 +526,7 @@ bb:
|
||||
%vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
|
||||
%lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
|
||||
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
|
||||
%vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
|
||||
%vec1.swap = shufflevector <2 x float> %vec1, <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
||||
%result = fadd <2 x float> %vec0, %vec1.swap
|
||||
store <2 x float> %result, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
@ -543,7 +543,7 @@ bb:
|
||||
%f32 = load volatile float, ptr addrspace(3) undef, align 8
|
||||
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
|
||||
%vec1.neg = fsub <2 x float> <float -0.0, float -0.0>, %vec1
|
||||
%vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> <i32 1, i32 0>
|
||||
%vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> poison, <2 x i32> <i32 1, i32 0>
|
||||
%result = fadd <2 x float> %vec0, %vec1.neg.swap
|
||||
store <2 x float> %result, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
@ -598,7 +598,7 @@ bb:
|
||||
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
||||
%gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
|
||||
%in.1 = load <4 x float>, ptr addrspace(1) %gep
|
||||
%shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer
|
||||
%shuf = shufflevector <4 x float> %in.1, <4 x float> poison, <4 x i32> zeroinitializer
|
||||
%add.1 = fadd <4 x float> %in.1, %shuf
|
||||
store <4 x float> %add.1, ptr addrspace(1) %gep
|
||||
ret void
|
||||
|
@ -24,7 +24,7 @@ bb:
|
||||
%scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
|
||||
|
||||
%scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
|
||||
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
|
||||
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
|
||||
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast)
|
||||
store <2 x half> %result, ptr addrspace(1) %out, align 4
|
||||
@ -55,7 +55,7 @@ bb:
|
||||
%scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
|
||||
|
||||
%scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
|
||||
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
|
||||
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
|
||||
%neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %scalar0.broadcast
|
||||
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
|
||||
@ -88,7 +88,7 @@ bb:
|
||||
|
||||
%neg.scalar0 = fsub half -0.0, %scalar0
|
||||
%neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
|
||||
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
|
||||
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
|
||||
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
|
||||
store <2 x half> %result, ptr addrspace(1) %out, align 4
|
||||
@ -120,7 +120,7 @@ bb:
|
||||
|
||||
%neg.scalar0 = fsub half -0.0, %scalar0
|
||||
%neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
|
||||
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
|
||||
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
|
||||
%neg.neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %neg.scalar0.broadcast
|
||||
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast)
|
||||
@ -212,7 +212,7 @@ bb:
|
||||
%neg.scalar0.bc = bitcast half %neg.scalar0 to i16
|
||||
|
||||
%neg.scalar0.vec = insertelement <2 x i16> poison, i16 %neg.scalar0.bc, i32 0
|
||||
%neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer
|
||||
%neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> poison, <2 x i32> zeroinitializer
|
||||
|
||||
%result = add <2 x i16> %vec0, %neg.scalar0.broadcast
|
||||
store <2 x i16> %result, ptr addrspace(1) %out, align 4
|
||||
@ -318,7 +318,7 @@ bb:
|
||||
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
|
||||
|
||||
%vec2.fneg = fsub <2 x half> <half -0.0, half -0.0>, %vec2
|
||||
%vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> undef, <2 x i32> <i32 1, i32 1>
|
||||
%vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
||||
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.fneg.elt1.broadcast)
|
||||
store <2 x half> %result, ptr addrspace(1) %out, align 4
|
||||
@ -377,7 +377,7 @@ bb:
|
||||
%vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4
|
||||
%vec1 = load volatile <2 x i16>, ptr addrspace(3) %lds.gep1, align 4
|
||||
|
||||
%vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
|
||||
%vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
|
||||
%result = add <2 x i16> %vec0, %vec1.elt1.broadcast
|
||||
|
||||
store <2 x i16> %result, ptr addrspace(1) %out, align 4
|
||||
@ -407,7 +407,7 @@ bb:
|
||||
%vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
|
||||
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
|
||||
|
||||
%vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
|
||||
%vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
|
||||
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.elt1.broadcast)
|
||||
|
||||
@ -471,7 +471,7 @@ bb:
|
||||
%vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
|
||||
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
|
||||
|
||||
%vec2.swap = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
||||
%vec2.swap = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.swap)
|
||||
|
||||
store <2 x half> %result, ptr addrspace(1) %out, align 4
|
||||
@ -502,7 +502,7 @@ bb:
|
||||
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
|
||||
%neg.vec2 = fsub <2 x half> <half -0.0, half -0.0>, %vec2
|
||||
|
||||
%neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
||||
%neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
|
||||
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.swap)
|
||||
|
||||
store <2 x half> %result, ptr addrspace(1) %out, align 4
|
||||
@ -678,7 +678,7 @@ bb:
|
||||
%f32 = load volatile float, ptr addrspace(3) undef, align 4
|
||||
%neg.f32 = fsub float -0.0, %f32
|
||||
%bc = bitcast float %neg.f32 to <2 x half>
|
||||
%shuf = shufflevector <2 x half> %bc, <2 x half> undef, <2 x i32> <i32 1, i32 0>
|
||||
%shuf = shufflevector <2 x half> %bc, <2 x half> poison, <2 x i32> <i32 1, i32 0>
|
||||
%result = fadd <2 x half> %vec0, %shuf
|
||||
store <2 x half> %result, ptr addrspace(1) %out, align 4
|
||||
ret void
|
||||
|
@ -342,7 +342,7 @@ define hidden void @shuffle7330ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -367,7 +367,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -393,7 +393,7 @@ define hidden void @shuffle6106ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -420,7 +420,7 @@ define hidden void @shuffle4327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -446,7 +446,7 @@ define hidden void @shuffle3263ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -472,7 +472,7 @@ define hidden void @shuffle2763ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -498,7 +498,7 @@ define hidden void @shuffle1327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -524,7 +524,7 @@ define hidden void @shuffle0605ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
|
||||
ret void
|
||||
}
|
||||
@ -554,7 +554,7 @@ define hidden void @insertUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%vecins = insertelement <4 x i8> %shuffle0_0, i8 %elt, i32 1
|
||||
store <4 x i8> %vecins, ptr addrspace(1) %out0
|
||||
ret void
|
||||
@ -598,7 +598,7 @@ define hidden void @addUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
|
||||
%added = add <4 x i8> %shuffle0_0, %vec1
|
||||
store <4 x i8> %added, ptr addrspace(1) %out0
|
||||
ret void
|
||||
@ -783,7 +783,7 @@ define hidden void @add_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %el
|
||||
%gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
|
||||
%vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%vecins = add <4 x i8> %shuffle0_0, %vec1
|
||||
store <4 x i8> %vecins, ptr addrspace(1) %out0
|
||||
ret void
|
||||
@ -835,7 +835,7 @@ define hidden void @add_store(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
|
||||
; GFX9-NEXT: s_setpc_b64 s[30:31]
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
|
||||
%vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%vecins = add <4 x i8> %shuffle0_0, %vec1
|
||||
store <4 x i8> %vecins, ptr addrspace(1) %out0
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
|
||||
@ -903,7 +903,7 @@ define hidden void @add_store_div_16(ptr addrspace(1) %in0, ptr addrspace(1) %in
|
||||
%gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
|
||||
%vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
|
||||
%vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
|
||||
%vecins = add <4 x i8> %shuffle0_0, %vec1
|
||||
store <4 x i8> %vecins, ptr addrspace(1) %out0
|
||||
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
|
||||
|
@ -10,9 +10,9 @@
|
||||
; VI-NEXT: v_add_f16_e32
|
||||
define half @reduction_fadd_v4f16(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%bin.rdx = fadd <4 x half> %vec4, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = fadd <4 x half> %bin.rdx, %rdx.shuf1
|
||||
%res = extractelement <4 x half> %bin.rdx2, i32 0
|
||||
ret half %res
|
||||
@ -30,9 +30,9 @@ entry:
|
||||
; VI-NEXT: s_setpc_b64
|
||||
define half @reduction_fsub_v4f16(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%bin.rdx = fsub <4 x half> %vec4, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = fsub <4 x half> %bin.rdx, %rdx.shuf1
|
||||
%res = extractelement <4 x half> %bin.rdx2, i32 0
|
||||
ret half %res
|
||||
@ -52,9 +52,9 @@ entry:
|
||||
; VI-NEXT: s_setpc_b64
|
||||
define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%bin.rdx = fsub nsz <4 x half> %vec4, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = fsub nsz <4 x half> %bin.rdx, %rdx.shuf1
|
||||
%res = extractelement <4 x half> %bin.rdx2, i32 0
|
||||
%neg.res = fsub half -0.0, %res
|
||||
@ -70,9 +70,9 @@ entry:
|
||||
; VI-NEXT: v_mul_f16_e32
|
||||
define half @reduction_fmul_half4(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%bin.rdx = fmul <4 x half> %vec4, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = fmul <4 x half> %bin.rdx, %rdx.shuf1
|
||||
%res = extractelement <4 x half> %bin.rdx2, i32 0
|
||||
ret half %res
|
||||
@ -87,9 +87,9 @@ entry:
|
||||
; VI-NEXT: v_add_u16_e32
|
||||
define i16 @reduction_v4i16(<4 x i16> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%bin.rdx = add <4 x i16> %vec4, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1
|
||||
%res = extractelement <4 x i16> %bin.rdx2, i32 0
|
||||
ret i16 %res
|
||||
@ -111,11 +111,11 @@ entry:
|
||||
|
||||
define half @reduction_half8(<8 x half> %vec8) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx = fadd <8 x half> %vec8, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = fadd <8 x half> %bin.rdx, %rdx.shuf1
|
||||
%rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx4 = fadd <8 x half> %bin.rdx2, %rdx.shuf3
|
||||
%res = extractelement <8 x half> %bin.rdx4, i32 0
|
||||
ret half %res
|
||||
@ -137,11 +137,11 @@ entry:
|
||||
|
||||
define i16 @reduction_v8i16(<8 x i16> %vec8) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx = add <8 x i16> %vec8, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1
|
||||
%rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3
|
||||
%res = extractelement <8 x i16> %bin.rdx4, i32 0
|
||||
ret i16 %res
|
||||
@ -175,13 +175,13 @@ entry:
|
||||
|
||||
define half @reduction_half16(<16 x half> %vec16) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx = fadd <16 x half> %vec16, %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx2 = fadd <16 x half> %bin.rdx, %rdx.shuf1
|
||||
%rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx4 = fadd <16 x half> %bin.rdx2, %rdx.shuf3
|
||||
%rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%bin.rdx6 = fadd <16 x half> %bin.rdx4, %rdx.shuf5
|
||||
%res = extractelement <16 x half> %bin.rdx6, i32 0
|
||||
ret half %res
|
||||
@ -196,10 +196,10 @@ entry:
|
||||
; VI-NEXT: v_min_u16_e32
|
||||
define i16 @reduction_min_v4i16(<4 x i16> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf
|
||||
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
|
||||
%res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
|
||||
@ -221,13 +221,13 @@ entry:
|
||||
; VI-NEXT: v_min_u16_e32
|
||||
define i16 @reduction_umin_v8i16(<8 x i16> %vec8) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf
|
||||
%rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1
|
||||
%rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4
|
||||
%rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4
|
||||
%res = extractelement <8 x i16> %rdx.minmax.select6, i32 0
|
||||
@ -301,16 +301,16 @@ entry:
|
||||
; VI-NEXT: v_min_i16_e32
|
||||
define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf
|
||||
%rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1
|
||||
%rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4
|
||||
%rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4
|
||||
%rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7
|
||||
%rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7
|
||||
%res = extractelement <16 x i16> %rdx.minmax.select9, i32 0
|
||||
@ -404,10 +404,10 @@ entry:
|
||||
; VI-NEXT: v_max_u16_e32
|
||||
define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf
|
||||
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
|
||||
%res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
|
||||
@ -423,10 +423,10 @@ entry:
|
||||
; VI-NEXT: v_max_i16_e32
|
||||
define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf
|
||||
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
|
||||
%res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
|
||||
@ -451,9 +451,9 @@ entry:
|
||||
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
|
||||
define half @reduction_maxnum_v4f16(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
|
||||
%res = extractelement <4 x half> %rdx.minmax3, i32 0
|
||||
ret half %res
|
||||
@ -476,9 +476,9 @@ entry:
|
||||
; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
|
||||
define half @reduction_minnum_v4f16(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax = call <4 x half> @llvm.minnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
|
||||
%res = extractelement <4 x half> %rdx.minmax3, i32 0
|
||||
ret half %res
|
||||
@ -513,10 +513,10 @@ entry:
|
||||
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
|
||||
define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
|
||||
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
|
||||
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
|
||||
@ -552,10 +552,10 @@ entry:
|
||||
; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
|
||||
define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
|
||||
entry:
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
|
||||
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
|
||||
%rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
|
||||
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
|
||||
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
|
||||
|
@ -67,7 +67,7 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%tmp1 = load i32, ptr addrspace(1) %in, align 4
|
||||
%bc = bitcast i32 %tmp1 to <2 x i16>
|
||||
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
}
|
||||
@ -135,7 +135,7 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%tmp1 = load float, ptr addrspace(1) %in, align 4
|
||||
%bc = bitcast float %tmp1 to <2 x i16>
|
||||
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
|
||||
store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
}
|
||||
@ -193,7 +193,7 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
|
||||
bb:
|
||||
%tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
|
||||
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
|
||||
store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
|
||||
ret void
|
||||
}
|
||||
@ -262,7 +262,7 @@ bb:
|
||||
%load = load half, ptr addrspace(1) undef, align 1
|
||||
%tmp = bitcast half %load to <2 x i8>
|
||||
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
|
||||
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
|
||||
store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
|
||||
ret void
|
||||
}
|
||||
@ -275,7 +275,7 @@ bb:
|
||||
; %tmp1 = load i32, ptr addrspace(1) %in, align 4
|
||||
; %bc = bitcast i32 %tmp1 to <4 x i8>
|
||||
|
||||
; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
; store <8 x i8> %tmp2, ptr addrspace(1) %out, align 4
|
||||
; ret void
|
||||
; }
|
||||
|
@ -56,13 +56,13 @@ declare <2 x float> @_Z3cosDv2_f(<2 x float>)
|
||||
define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) {
|
||||
entry:
|
||||
%loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16
|
||||
%extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
|
||||
%call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
|
||||
%extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%extractVec6 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16
|
||||
%call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
|
||||
%arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1
|
||||
%extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
|
||||
%extractVec13 = shufflevector <3 x float> %call11, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
|
||||
store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16
|
||||
ret void
|
||||
}
|
||||
|
@ -52,7 +52,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
|
||||
; GCN: .cfi_endproc
|
||||
call void @llvm.dbg.value(metadata <4 x float> %arg0, metadata !29, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
|
||||
call void @llvm.dbg.value(metadata <2 x float> %arg1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
|
||||
%tmp = shufflevector <2 x float> %arg1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
|
||||
%tmp = shufflevector <2 x float> %arg1, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
|
||||
%add = fadd <4 x float> %tmp, %arg0, !dbg !33
|
||||
ret <4 x float> %add, !dbg !34
|
||||
}
|
||||
|
@ -15,8 +15,8 @@ entry:
|
||||
define protected amdgpu_kernel void @short2_char4(ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tmp = load <2 x i16>, ptr addrspace(1) undef, align 4
|
||||
%vecinit = shufflevector <2 x i16> %tmp, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
%vecinit = shufflevector <2 x i16> %tmp, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
%vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 poison, i16 poison, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
%tmp1 = trunc <4 x i16> %vecinit2 to <4 x i8>
|
||||
store <4 x i8> %tmp1, ptr addrspace(1) %out, align 4
|
||||
ret void
|
||||
@ -27,8 +27,8 @@ entry:
|
||||
define protected amdgpu_kernel void @short4_char8(ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tmp = load <4 x i16>, ptr addrspace(1) undef, align 8
|
||||
%vecinit = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
|
||||
%vecinit = shufflevector <4 x i16> %tmp, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
|
||||
%tmp1 = trunc <8 x i16> %vecinit2 to <8 x i8>
|
||||
store <8 x i8> %tmp1, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
@ -39,8 +39,8 @@ entry:
|
||||
define protected amdgpu_kernel void @short8_char16(ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tmp = load <8 x i16>, ptr addrspace(1) undef, align 16
|
||||
%vecinit = shufflevector <8 x i16> %tmp, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
|
||||
%vecinit = shufflevector <8 x i16> %tmp, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
|
||||
%tmp1 = trunc <16 x i16> %vecinit2 to <16 x i8>
|
||||
store <16 x i8> %tmp1, ptr addrspace(1) %out, align 16
|
||||
ret void
|
||||
@ -52,8 +52,8 @@ entry:
|
||||
define protected amdgpu_kernel void @short16_char32(ptr addrspace(1) %out) {
|
||||
entry:
|
||||
%tmp = load <16 x i16>, ptr addrspace(1) undef, align 32
|
||||
%vecinit = shufflevector <16 x i16> %tmp, <16 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
|
||||
%vecinit = shufflevector <16 x i16> %tmp, <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
%vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
|
||||
%tmp1 = trunc <32 x i16> %vecinit2 to <32 x i8>
|
||||
store <32 x i8> %tmp1, ptr addrspace(1) %out, align 32
|
||||
ret void
|
||||
|
@ -27,7 +27,7 @@ define <4 x half> @shuffle_v4f16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -101,7 +101,7 @@ define <4 x half> @shuffle_v4f16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -140,7 +140,7 @@ define <4 x half> @shuffle_v4f16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -167,7 +167,7 @@ define <4 x half> @shuffle_v4f16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -216,7 +216,7 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -265,7 +265,7 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -312,7 +312,7 @@ define <4 x half> @shuffle_v4f16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -364,7 +364,7 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) %
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x half>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x half>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
|
||||
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
|
||||
ret <4 x half> %shuffle
|
||||
}
|
||||
|
||||
@ -1881,21 +1881,21 @@ entry:
|
||||
%tmp15 = load <4 x half>, ptr addrspace(1) %arrayidx1, align 8
|
||||
%arrayidx2 = getelementptr inbounds <4 x half>, ptr addrspace(1) %C, i64 %tmp12
|
||||
%tmp16 = load <4 x half>, ptr addrspace(1) %arrayidx2, align 8
|
||||
%tmp17 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> zeroinitializer
|
||||
%tmp18 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp19 = shufflevector <4 x half> %tmp16, <4 x half> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp17 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> zeroinitializer
|
||||
%tmp18 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 0, i32 1>
|
||||
%tmp19 = shufflevector <4 x half> %tmp16, <4 x half> poison, <2 x i32> <i32 0, i32 1>
|
||||
%tmp20 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp17, <2 x half> %tmp18, <2 x half> %tmp19)
|
||||
%tmp21 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp22 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp21 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp22 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 2, i32 3>
|
||||
%tmp23 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp21, <2 x half> %tmp22, <2 x half> %tmp20)
|
||||
%tmp24 = shufflevector <2 x half> %tmp23, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%tmp24 = shufflevector <2 x half> %tmp23, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
%tmp25 = shufflevector <4 x half> %tmp24, <4 x half> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
%tmp26 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 2, i32 2>
|
||||
%tmp27 = shufflevector <4 x half> %tmp25, <4 x half> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp26 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 2, i32 2>
|
||||
%tmp27 = shufflevector <4 x half> %tmp25, <4 x half> poison, <2 x i32> <i32 2, i32 3>
|
||||
%tmp28 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp26, <2 x half> %tmp18, <2 x half> %tmp27)
|
||||
%tmp29 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 3, i32 3>
|
||||
%tmp29 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 3, i32 3>
|
||||
%tmp30 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp29, <2 x half> %tmp22, <2 x half> %tmp28)
|
||||
%tmp31 = shufflevector <2 x half> %tmp30, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%tmp31 = shufflevector <2 x half> %tmp30, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
%tmp32 = shufflevector <4 x half> %tmp25, <4 x half> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
store <4 x half> %tmp32, ptr addrspace(1) %arrayidx2, align 8
|
||||
ret void
|
||||
@ -2006,7 +2006,7 @@ define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(ptr addrspace(4) %in,
|
||||
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[2:3]
|
||||
; GFX11-NEXT: s_endpgm
|
||||
%ld8 = load <8 x i32>, ptr addrspace(4) %in, align 16
|
||||
%id = shufflevector <8 x i32> %ld8, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%id = shufflevector <8 x i32> %ld8, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x i32> %id, ptr addrspace(1) %out, align 8
|
||||
ret void
|
||||
}
|
||||
@ -2052,7 +2052,7 @@ define <2 x half> @low16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
|
||||
entry:
|
||||
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x half> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2098,7 +2098,7 @@ define <2 x half> @hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
|
||||
entry:
|
||||
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x half> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2144,7 +2144,7 @@ define <2 x half> @low16hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x
|
||||
entry:
|
||||
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x half> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2179,7 +2179,7 @@ define <2 x half> @hi16low16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %
|
||||
entry:
|
||||
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x half> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2225,7 +2225,7 @@ define <2 x i16> @i16_low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
|
||||
entry:
|
||||
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i16> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2271,7 +2271,7 @@ define <2 x i16> @i16_low16hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
|
||||
entry:
|
||||
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x i16> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2306,7 +2306,7 @@ define <2 x i16> @i16_hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
|
||||
entry:
|
||||
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i16> %vy1.2.vec.insert
|
||||
}
|
||||
@ -2352,7 +2352,7 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
|
||||
entry:
|
||||
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x i16> %vy1.2.vec.insert
|
||||
}
|
||||
@ -3020,7 +3020,7 @@ define <4 x bfloat> @shuffle_v4bf16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3067,7 +3067,7 @@ define <4 x bfloat> @shuffle_v4bf16_234u(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3094,7 +3094,7 @@ define <4 x bfloat> @shuffle_v4bf16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3133,7 +3133,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3160,7 +3160,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3209,7 +3209,7 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3258,7 +3258,7 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3305,7 +3305,7 @@ define <4 x bfloat> @shuffle_v4bf16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -3357,7 +3357,7 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1
|
||||
; GFX11-NEXT: s_setpc_b64 s[30:31]
|
||||
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
|
||||
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
|
||||
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
|
||||
ret <4 x bfloat> %shuffle
|
||||
}
|
||||
|
||||
@ -5059,21 +5059,21 @@ entry:
|
||||
%tmp15 = load <4 x bfloat>, ptr addrspace(1) %arrayidx1, align 8
|
||||
%arrayidx2 = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %C, i64 %tmp12
|
||||
%tmp16 = load <4 x bfloat>, ptr addrspace(1) %arrayidx2, align 8
|
||||
%tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> zeroinitializer
|
||||
%tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
|
||||
%tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> zeroinitializer
|
||||
%tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
|
||||
%tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
|
||||
%tmp20 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp17, <2 x bfloat> %tmp18, <2 x bfloat> %tmp19)
|
||||
%tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 1, i32 1>
|
||||
%tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 1, i32 1>
|
||||
%tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
|
||||
%tmp23 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp21, <2 x bfloat> %tmp22, <2 x bfloat> %tmp20)
|
||||
%tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
%tmp25 = shufflevector <4 x bfloat> %tmp24, <4 x bfloat> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
|
||||
%tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 2, i32 2>
|
||||
%tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
|
||||
%tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 2, i32 2>
|
||||
%tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
|
||||
%tmp28 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp26, <2 x bfloat> %tmp18, <2 x bfloat> %tmp27)
|
||||
%tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 3, i32 3>
|
||||
%tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 3, i32 3>
|
||||
%tmp30 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp29, <2 x bfloat> %tmp22, <2 x bfloat> %tmp28)
|
||||
%tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
|
||||
%tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
|
||||
%tmp32 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
store <4 x bfloat> %tmp32, ptr addrspace(1) %arrayidx2, align 8
|
||||
ret void
|
||||
@ -5172,7 +5172,7 @@ define <2 x bfloat> @low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
|
||||
entry:
|
||||
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x bfloat> %vy1.2.vec.insert
|
||||
}
|
||||
@ -5218,7 +5218,7 @@ define <2 x bfloat> @hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
|
||||
entry:
|
||||
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x bfloat> %vy1.2.vec.insert
|
||||
}
|
||||
@ -5264,7 +5264,7 @@ define <2 x bfloat> @low16hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1)
|
||||
entry:
|
||||
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
|
||||
ret <2 x bfloat> %vy1.2.vec.insert
|
||||
}
|
||||
@ -5299,7 +5299,7 @@ define <2 x bfloat> @hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
|
||||
entry:
|
||||
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
|
||||
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
|
||||
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
|
||||
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x bfloat> %vy1.2.vec.insert
|
||||
}
|
||||
|
@ -543,12 +543,12 @@ if.then9: ; preds = %entry
|
||||
sw.bb: ; preds = %if.then9
|
||||
%i17 = load i8, ptr addrspace(1) null, align 1
|
||||
%i18 = insertelement <4 x i8> zeroinitializer, i8 %i17, i64 0
|
||||
%a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
|
||||
%a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
|
||||
br label %sw.bb18
|
||||
|
||||
sw.bb18: ; preds = %sw.bb, %if.then9
|
||||
%a.sroa.0.0 = phi <4 x i8> [ %a.sroa.0.0.vecblend, %sw.bb ], [ poison, %if.then9 ]
|
||||
%a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 undef, i32 1, i32 undef>
|
||||
%a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 poison, i32 1, i32 poison>
|
||||
%i19 = insertelement <3 x i8> %a.sroa.0.0.vec.extract61, i8 0, i64 0
|
||||
%i20 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i19
|
||||
%i21 = extractelement <3 x i8> %i20, i64 1
|
||||
|
@ -421,7 +421,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
|
||||
; GFX12-NEXT: s_endpgm
|
||||
bb:
|
||||
%C = load <16 x half>, ptr %Caddr
|
||||
%C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
%fneg.C_shuffle = fneg <8 x half> %C_shuffle
|
||||
%res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
|
||||
store <8 x half> %res, ptr addrspace(1) %out
|
||||
|
@ -378,7 +378,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
|
||||
; GFX12-NEXT: s_endpgm
|
||||
bb:
|
||||
%C = load <8 x half>, ptr %Caddr
|
||||
%C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
|
||||
%fneg.C_shuffle = fneg <4 x half> %C_shuffle
|
||||
%res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
|
||||
store <4 x half> %res, ptr addrspace(1) %out
|
||||
|
@ -12,9 +12,9 @@ define amdgpu_cs void @xyz () {
|
||||
br label %loop
|
||||
loop:
|
||||
%ld = load <8 x float>, ptr addrspace(5) null, align 32
|
||||
%in_shuffle = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> undef, <16 x half> undef, <4 x float> %in_shuffle)
|
||||
%out_shuffle = shufflevector <4 x float> %wmma, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
|
||||
store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
|
||||
br i1 false, label %.exit, label %loop
|
||||
.exit:
|
||||
|
Loading…
x
Reference in New Issue
Block a user