AMDGPU: Replace tests using undef in shufflevector with poison (#130899)

This commit is contained in:
Matt Arsenault 2025-03-12 20:45:02 +07:00 committed by GitHub
parent 2fbddfbdc0
commit b76e396990
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
63 changed files with 305 additions and 305 deletions

View File

@ -12,7 +12,7 @@ define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr
; GFX10-NEXT: s_setpc_b64 s[30:31]
%vec = load <4 x float>, ptr addrspace(4) %ptr, align 4
%vec.3 = extractelement <4 x float> %vec, i32 3
%shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
%shuffle = shufflevector <4 x float> %vec, <4 x float> poison, <2 x i32> <i32 2, i32 poison>
%new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8
ret void

View File

@ -870,10 +870,10 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
; GFX11-NEXT: s_setpc_b64 s[30:31]
entry:
%insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1081,10 +1081,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1229,10 +1229,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1289,10 +1289,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1494,10 +1494,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1617,10 +1617,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1677,10 +1677,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1794,10 +1794,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %
; GFX11-NEXT: s_endpgm
entry:
%insert = insertelement <8 x double> %vec, double %val, i32 %idx
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -2401,10 +2401,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %v
entry:
%idx.add = add i32 %idx, 1
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -2525,10 +2525,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
entry:
%idx.add = add i32 %idx, 1
%insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
%vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
%vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
%vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
%vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
store volatile <2 x double> %vec.0, ptr addrspace(1) undef
store volatile <2 x double> %vec.1, ptr addrspace(1) undef
store volatile <2 x double> %vec.2, ptr addrspace(1) undef

View File

@ -1519,7 +1519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc
; CHECK-NEXT: S_ENDPGM 0
%load = load <2 x i64>, ptr addrspace(1) null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
call void @external_void_func_v3i64(<3 x i64> %val)
ret void

View File

@ -322,7 +322,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
ret i32 %r
}
@ -349,7 +349,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
; GFX10-NEXT: v_dot2_i32_i16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
%r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
ret i32 %r
}

View File

@ -306,7 +306,7 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
; GFX10-NEXT: v_alignbit_b32 v0, v0, v0, 16
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
%shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
ret i32 %r
}
@ -332,7 +332,7 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
; GFX10-NEXT: v_alignbit_b32 v1, v1, v1, 16
; GFX10-NEXT: v_dot2_u32_u16 v0, v0, v1, v2
; GFX10-NEXT: s_setpc_b64 s[30:31]
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
%shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
%r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
ret i32 %r
}

View File

@ -121,7 +121,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
; ; FIXME: G_INSERT mishandled
; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) {
; %trunc = trunc <3 x i32> %src to <3 x i16>
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; %cast = bitcast <4 x i16> %ext to <2 x i32>
; ret <2 x i32> %cast
; }
@ -129,7 +129,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
; ; FIXME: G_INSERT mishandled
; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) {
; %trunc = trunc <3 x i32> %src to <3 x i16>
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; %cast = bitcast <4 x i16> %ext to <2 x i32>
; ret <2 x i32> %cast
; }

View File

@ -426,7 +426,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
; GFX12-NEXT: s_endpgm
bb:
%C = load <16 x half>, ptr %Caddr
%C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%fneg.C_shuffle = fneg <8 x half> %C_shuffle
%res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
store <8 x half> %res, ptr addrspace(1) %out

View File

@ -381,7 +381,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
; GFX12-NEXT: s_endpgm
bb:
%C = load <8 x half>, ptr %Caddr
%C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%fneg.C_shuffle = fneg <4 x half> %C_shuffle
%res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
store <4 x half> %res, ptr addrspace(1) %out

View File

@ -9,7 +9,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
%tmp4 = extractelement <4 x float> %tmp3, i32 0
store volatile float %tmp4, ptr addrspace(1) undef
@ -25,7 +25,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
%tmp4 = extractelement <4 x float> %tmp3, i32 1
store volatile float %tmp4, ptr addrspace(1) undef
@ -41,7 +41,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
%tmp4 = extractelement <4 x float> %tmp3, i32 0
store volatile float %tmp4, ptr addrspace(1) undef
@ -57,7 +57,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
main_body:
%tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <2 x float> %tmp to <2 x i32>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
%tmp4 = extractelement <4 x float> %tmp3, i32 1
store volatile float %tmp4, ptr addrspace(1) undef
@ -68,7 +68,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
main_body:
%tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
%tmp1 = bitcast <4 x float> %tmp to <4 x i32>
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
%tmp4 = extractelement <4 x float> %tmp3, i32 0
store volatile float %tmp4, ptr addrspace(1) undef

View File

@ -477,7 +477,7 @@ entry:
then:
%x.1 = insertelement <5 x double> <double 3.140000e+00, double poison, double poison, double poison, double poison>, double %x, i32 %idx
%0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 undef, i32 8, i32 9>
%0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 poison, i32 8, i32 9>
%x.4 = insertelement <5 x double> %0, double %x, i64 2
br label %finally

View File

@ -18,7 +18,7 @@ define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inre
.beginls: ; preds = %.entry
%tmp15 = extractelement <6 x i32> %arg8, i32 3
%.0.vec.insert.i = insertelement <2 x i32> poison, i32 %tmp15, i32 0
%.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
%.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> poison, <2 x i32> <i32 0, i32 3>
%tmp16 = bitcast <2 x i32> %.4.vec.insert.i to i64
br label %.endls

View File

@ -49,7 +49,7 @@ main_body:
%buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128
%buf1.vec = bitcast i128 %buf1.int to <4 x i32>
call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0)
%shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> <i32 1, i32 0>
%shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> poison, <2 x i32> <i32 1, i32 0>
%somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1
store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next
ret void

View File

@ -45,7 +45,7 @@ bb1789: ; preds = %bb1750
%i1879 = bitcast <3 x i32> %i1878 to <3 x float>
%i1881 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1540, %i1879
%i1882 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 poison, i32 0)
%i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%i1884 = bitcast <4 x i32> %i1883 to <4 x float>
%i1885 = shufflevector <4 x float> %i1884, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
%i1886 = insertelement <3 x i32> poison, i32 %i1819, i64 0
@ -57,7 +57,7 @@ bb1789: ; preds = %bb1750
%i1892 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1885, %i1891
%i1893 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1892, %i1881
%i1894 = bitcast <3 x float> %i1893 to <3 x i32>
%i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%i1896 = insertelement <4 x i32> %i1895, i32 %i1819, i64 3
br label %bb1897
@ -70,7 +70,7 @@ bb1897: ; preds = %bb1789, %bb1787
%i1901 = bitcast <3 x i32> %i1900 to <3 x float>
%i1902 = fadd reassoc nnan nsz arcp contract afn <3 x float> %i1901, %i1899
%i1903 = bitcast <3 x float> %i1902 to <3 x i32>
%i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%i1908 = shufflevector <4 x i32> %i1907, <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
%i1914 = shufflevector <4 x i32> %i1908, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
%__llpc_global_proxy_r3.12.vec.extract2358 = extractelement <2 x i32> zeroinitializer, i64 1

View File

@ -168,7 +168,7 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
%undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
ret void
}
@ -193,7 +193,7 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) {
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
%undef.lo = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
ret void
}
@ -348,7 +348,7 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
ret void
}
@ -369,7 +369,7 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
; GFX8-NEXT: ; use v[0:1]
; GFX8-NEXT: ;;#ASMEND
; GFX8-NEXT: s_setpc_b64 s[30:31]
%undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%undef.hi = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
ret void
}

View File

@ -1454,7 +1454,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
; HSA-NEXT: s_swappc_b64 s[30:31], s[8:9]
; HSA-NEXT: s_endpgm
%load = load <2 x i64>, ptr addrspace(1) null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
call void @external_void_func_v3i64(<3 x i64> %val)
ret void

View File

@ -1293,7 +1293,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %o
%out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
%a = load <2 x half>, ptr addrspace(1) %gep0
%add = fadd <2 x half> %a, <half 1.0, half 1.0>
%shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%shuf = shufflevector <2 x half> %add, <2 x half> poison, <2 x i32> <i32 1, i32 0>
%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
%clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)

View File

@ -3728,7 +3728,7 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addr
%gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
%out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
%a = load <2 x half>, ptr addrspace(1) %gep0
%shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0>
%max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
%med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)

View File

@ -14,7 +14,7 @@ bb:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %lid
%load = load <4 x i32>, ptr addrspace(1) %gep1, align 16
%shuffle = shufflevector <4 x i32> %load, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
%shuffle = shufflevector <4 x i32> %load, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
%gep2 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg1, i32 %lid
store <2 x i32> %shuffle, ptr addrspace(1) %gep2, align 8
ret void
@ -27,9 +27,9 @@ bb:
define amdgpu_kernel void @test_vector_creation() #0 {
entry:
%tmp231 = load <4 x i16>, ptr addrspace(1) undef, align 2
%vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%vecinit467 = shufflevector <8 x i16> undef, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 undef, i32 undef>
%vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
%vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
%vecinit467 = shufflevector <8 x i16> poison, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
%vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
store <8 x i16> %vecinit471, ptr addrspace(1) undef, align 16
ret void
}

View File

@ -23,12 +23,12 @@ for.body: ; preds = %for.body, %entry
%i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
%vecload2 = load <8 x i32>, ptr addrspace(1) %src, align 32
%0 = bitcast <8 x i32> %vecload2 to <32 x i8>
%tmp5 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%tmp8 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp5 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%tmp8 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%tmp9 = add nsw <8 x i8> %tmp5, %tmp8
%tmp12 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
%tmp12 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
%tmp13 = add nsw <8 x i8> %tmp9, %tmp12
%tmp16 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%tmp16 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
%tmp17 = add nsw <8 x i8> %tmp13, %tmp16
%scevgep = getelementptr <8 x i8>, ptr addrspace(1) %result, i32 %i.01
%1 = bitcast <8 x i8> %tmp17 to <2 x i32>

View File

@ -3,7 +3,7 @@
; CHECK: s_waitcnt
define <2 x i16> @main(<2 x float>) #0 {
%2 = bitcast <2 x float> %0 to <4 x i16>
%3 = shufflevector <4 x i16> %2, <4 x i16> undef, <2 x i32> <i32 0, i32 undef>
%3 = shufflevector <4 x i16> %2, <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
%4 = extractelement <4 x i16> %2, i32 0
%5 = insertelement <2 x i16> %3, i16 %4, i32 0
ret <2 x i16> %5

View File

@ -289,8 +289,8 @@ define amdgpu_kernel void @test_concat_v16i16(ptr addrspace(1) %out, <16 x i16>
define amdgpu_kernel void @concat_vector_crash(ptr addrspace(1) %out, ptr addrspace(1) %in) {
bb:
%tmp = load <2 x float>, ptr addrspace(1) %in, align 4
%tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
%tmp1 = shufflevector <2 x float> %tmp, <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%tmp2 = shufflevector <8 x float> poison, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
store <8 x float> %tmp2, ptr addrspace(1) %out, align 32
ret void
}
@ -301,8 +301,8 @@ define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrs
%tmp = load i32, ptr addrspace(1) %in, align 1
%tmp1 = trunc i32 %tmp to i24
%tmp2 = bitcast i24 %tmp1 to <3 x i8>
%tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef>
%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
%tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
store <8 x i8> %tmp4, ptr addrspace(1) %out, align 8
ret void
}

View File

@ -17,7 +17,7 @@ define amdgpu_ps void @main(i32 %in1, i32 inreg %arg) local_unnamed_addr {
bb:
%__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> poison, i32 %in1, i32 3
%tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> <i32 undef, i32 undef, i32 1>
%tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> poison, <3 x i32> <i32 poison, i32 poison, i32 1>
%tmp4 = bitcast <3 x i32> %tmp3 to <3 x float>
%a2.i123 = extractelement <3 x float> %tmp4, i32 2
%tmp5 = bitcast float %a2.i123 to i32
@ -26,7 +26,7 @@ bb:
bb12:
%__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ poison, %.entry ]
%tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
%tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
%tmp7 = bitcast <3 x i32> %tmp6 to <3 x float>
%a0.i = extractelement <3 x float> %tmp7, i32 0
ret void

View File

@ -146,7 +146,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
%.i2243 = extractelement <3 x float> %0, i32 2
%1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0)
%2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%3 = bitcast <4 x i32> %2 to <4 x float>
%.i2248 = extractelement <4 x float> %3, i32 2
%.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
@ -159,17 +159,17 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i0364 = extractelement <2 x float> %7, i32 0
%8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
%9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 112, i32 0)
%10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%11 = bitcast <4 x i32> %10 to <4 x float>
%.i2360 = extractelement <4 x float> %11, i32 2
%.i2363 = fmul reassoc nnan nsz arcp contract afn float %.i2360, %8
%12 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 96, i32 0)
%13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%14 = bitcast <4 x i32> %13 to <4 x float>
%.i2367 = extractelement <4 x float> %14, i32 2
%.i2370 = fmul reassoc nnan nsz arcp contract afn float %.i0364, %.i2367
%15 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 32, i32 0)
%16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%17 = bitcast <4 x i32> %16 to <4 x float>
%.i2373 = extractelement <4 x float> %17, i32 2
%.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370
@ -212,12 +212,12 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43
%.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466
%45 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 64, i32 0)
%46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%47 = bitcast <4 x i32> %46 to <4 x float>
%.i2476 = extractelement <4 x float> %47, i32 2
%.i2479 = fmul reassoc nnan nsz arcp contract afn float %.i2476, %18
%48 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 80, i32 0)
%49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%50 = bitcast <4 x i32> %49 to <4 x float>
%.i2482 = extractelement <4 x float> %50, i32 2
%.i2485 = fsub reassoc nnan nsz arcp contract afn float %.i2482, %.i2479
@ -230,7 +230,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
%.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516
%.i2525 = fmul reassoc nnan nsz arcp contract afn float %.i2522, %43
%52 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 16, i32 0)
%53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
%54 = bitcast <4 x i32> %53 to <4 x float>
%.i2530 = extractelement <4 x float> %54, i32 2
%.i2531 = fmul reassoc nnan nsz arcp contract afn float %.i2333, %.i2530

View File

@ -16,7 +16,7 @@ entry:
%sint = load i32, ptr addrspace(1) %in
%conv = sitofp i32 %sint to float
%0 = insertelement <4 x float> poison, float %conv, i32 0
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
%splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
store <4 x float> %splat, ptr addrspace(1) %out
ret void
}
@ -30,7 +30,7 @@ entry:
%uint = load i32, ptr addrspace(1) %in
%conv = uitofp i32 %uint to float
%0 = insertelement <4 x float> poison, float %conv, i32 0
%splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
%splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
store <4 x float> %splat, ptr addrspace(1) %out
ret void
}

View File

@ -7,7 +7,7 @@ bb:
%tmp = load i32, ptr addrspace(1) undef, align 4
%tmp1 = load <4 x float>, ptr addrspace(1) undef, align 16
%tmp2 = sext i32 %tmp to i64
%tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
%tmp3 = shufflevector <4 x float> poison, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
%tmp4 = call float @barney() #2
%tmp9 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 0
%tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
@ -53,7 +53,7 @@ bb28: ; preds = %bb25, %bb21
%tmp45 = fadd float undef, undef
%tmp46 = fdiv float %tmp44, %tmp45
%tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0
%tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer
%tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer
%tmp49 = fsub <4 x float> %tmp48, %tmp40
%tmp50 = extractelement <4 x float> %tmp41, i32 1
%tmp51 = extractelement <4 x float> %tmp42, i32 2
@ -71,7 +71,7 @@ bb28: ; preds = %bb25, %bb21
call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5
%tmp59 = bitcast i64 %tmp35 to <2 x float>
%tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0
%tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer
%tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer
%tmp62 = fmul <2 x float> %tmp61, undef
%tmp63 = fsub <2 x float> %tmp62, %tmp59
%tmp64 = extractelement <2 x float> %tmp63, i64 0

View File

@ -26,7 +26,7 @@ entry:
%m_scaleMotion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 4
%tmp2 = load <4 x float>, ptr addrspace(1) %m_scaleMotion, align 16
%splat.splatinsert = insertelement <4 x float> poison, float %time, i32 0
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
%splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
%tmp3 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp2, <4 x float> %splat.splat, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>)
%tmp4 = load <4 x float>, ptr addrspace(1) %call, align 16
%m_quaternion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 1
@ -61,8 +61,8 @@ entry:
%tmp24 = insertelement <4 x float> %tmp23, float %tmp19, i32 1
%tmp25 = insertelement <4 x float> %tmp24, float %tmp22, i32 2
%tmp26 = extractelement <4 x float> %tmp5, i64 3
%splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%mul3.i10.i = fmul <4 x float> %tmp5, %splat.splat2.i9.i
%tmp27 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i8.i, <4 x float> %tmp10, <4 x float> %mul3.i10.i)
%add.i11.i = fadd <4 x float> %tmp27, %tmp25
@ -94,7 +94,7 @@ entry:
%tmp52 = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float %tmp44, i32 0
%tmp53 = insertelement <4 x float> %tmp52, float %tmp48, i32 1
%tmp54 = insertelement <4 x float> %tmp53, float %tmp51, i32 2
%splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
%tmp55 = extractelement <4 x float> %tmp5, i32 3
%mul3.i.i = fmul <4 x float> %splat.splat.i8.i, %tmp39
%tmp56 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i.i, <4 x float> %vecinit5.i.i, <4 x float> %mul3.i.i)
@ -113,12 +113,12 @@ entry:
%tmp66 = extractelement <4 x float> %tmp1, i64 3
%mul3 = fmul float %tmp66, %time
%tmp67 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 3
%tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 undef, i32 3>
%tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 poison, i32 3>
%vecinit3.i.i = shufflevector <4 x float> %tmp68, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
%tmp69 = fcmp oeq <4 x float> %vecinit3.i.i, zeroinitializer
%tmp70 = sext <4 x i1> %tmp69 to <4 x i32>
%tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
%tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
%tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
%tmp73 = and <2 x i32> %tmp71, %tmp72
%tmp74 = extractelement <2 x i32> %tmp73, i64 0
%tmp75 = extractelement <2 x i32> %tmp73, i64 1
@ -205,7 +205,7 @@ bb141: ; preds = %bb109, %bb98, %bb96
%tmp143 = phi float [ %tmp95, %bb86 ], [ %tmp140, %bb109 ], [ %tmp107, %bb98 ], [ %tmp84, %bb96 ]
%tmp144 = tail call float @llvm.amdgcn.rsq.f32(float %tmp143)
%tmp145 = insertelement <4 x float> poison, float %tmp144, i32 0
%tmp146 = shufflevector <4 x float> %tmp145, <4 x float> undef, <4 x i32> zeroinitializer
%tmp146 = shufflevector <4 x float> %tmp145, <4 x float> poison, <4 x i32> zeroinitializer
%tmp147 = fmul <4 x float> %tmp142, %tmp146
br label %qtSet.exit

View File

@ -331,7 +331,7 @@ if:
endif:
%r = phi <3 x i32> [ %v, %entry ], [ %u, %if ]
%r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%r.ext = shufflevector <3 x i32> %r, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0
ret void
}

View File

@ -156,7 +156,7 @@ F:
exit:
%m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
ret <4 x i16> %r2
@ -317,7 +317,7 @@ F:
exit:
%m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
ret <4 x i16> %r2
@ -482,7 +482,7 @@ F:
exit:
%m = phi <8 x half> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%v2 = shufflevector <8 x half> %m, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
%r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
ret <4 x half> %r2
@ -685,7 +685,7 @@ F:
exit:
%m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
ret <4 x i16> %r2
@ -890,7 +890,7 @@ F:
exit:
%m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
%r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
ret <4 x i16> %r2
@ -1099,7 +1099,7 @@ F:
exit:
%m = phi <16 x half> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x half> %m, <16 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%v2 = shufflevector <16 x half> %m, <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
%b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
%r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
ret <4 x half> %r2
@ -1184,8 +1184,8 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
%x.7 = load i16, ptr addrspace(3) %p.7, align 2
%v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1
%z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
%z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
%z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
%z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
%z.3 = shufflevector <8 x i16> %z.2, <8 x i16> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
ret <8 x i16> %z.3
}
@ -1464,7 +1464,7 @@ F:
exit:
%m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b2 = icmp ugt <8 x i16> %v2, <i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800>
%r2 = select <8 x i1> %b2, <8 x i16> <i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900>, <8 x i16> <i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00>
ret <8 x i16> %r2
@ -1755,7 +1755,7 @@ F:
exit:
%m = phi <16 x half> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x half> %m, <16 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v2 = shufflevector <16 x half> %m, <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b2 = fcmp ugt <8 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
%r2 = select <8 x i1> %b2, <8 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <8 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
ret <8 x half> %r2

View File

@ -13,7 +13,7 @@ define <3 x i32> @quux() {
; CHECK-NEXT: v_mov_b32_e32 v2, 1
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
%tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
%tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
%tmp1 = extractelement <3 x i8> %tmp, i64 0
%tmp2 = zext i8 %tmp1 to i32
%tmp3 = insertelement <3 x i32> poison, i32 %tmp2, i32 0

View File

@ -90,7 +90,7 @@ F:
exit:
%m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
%v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
%b2 = icmp sgt <2 x i16> %v2, <i16 -1, i16 -1>
%r2 = select <2 x i1> %b2, <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> <i16 -1, i16 -1>
ret <2 x i16> %r2
@ -161,7 +161,7 @@ F:
exit:
%m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
%v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
%b2 = icmp sgt <2 x i64> %v2, <i64 -1, i64 -1>
%r2 = select <2 x i1> %b2, <2 x i64> <i64 -32768, i64 -32768>, <2 x i64> <i64 -1, i64 -1>
ret <2 x i64> %r2
@ -238,7 +238,7 @@ F:
exit:
%m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%b2 = icmp sgt <4 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1>
%r2 = select <4 x i1> %b2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
ret <4 x i64> %r2
@ -342,7 +342,7 @@ F:
exit:
%m = phi <16 x i64> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v2 = shufflevector <16 x i64> %m, <16 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b2 = icmp sgt <8 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
%r2 = select <8 x i1> %b2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
ret <8 x i64> %r2
@ -413,7 +413,7 @@ F:
exit:
%m = phi <8 x double> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> <i32 0, i32 1>
%v2 = shufflevector <8 x double> %m, <8 x double> poison, <2 x i32> <i32 0, i32 1>
%b2 = fcmp ogt <2 x double> %v2, <double -1.0, double -1.0>
%r2 = select <2 x i1> %b2, <2 x double> <double -2.0, double -2.0>, <2 x double> <double -1.0, double -1.0>
ret <2 x double> %r2
@ -490,7 +490,7 @@ F:
exit:
%m = phi <8 x double> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%v2 = shufflevector <8 x double> %m, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%b2 = fcmp ogt <4 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0>
%r2 = select <4 x i1> %b2, <4 x double> <double -2.0, double -2.0, double -2.0, double -2.0>, <4 x double> <double -1.0, double -1.0, double -1.0, double -1.0>
ret <4 x double> %r2
@ -594,7 +594,7 @@ F:
exit:
%m = phi <16 x double> [ %t, %T ], [ %f, %F ]
%v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%v2 = shufflevector <16 x double> %m, <16 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%b2 = fcmp ogt <8 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
%r2 = select <8 x i1> %b2, <8 x double> <double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0>, <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
ret <8 x double> %r2

View File

@ -27,7 +27,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
main_body:
%tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0)
%tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
%tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
%tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
%tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
ret void

View File

@ -39,7 +39,7 @@ bb14: ; preds = %bb14, %bb11
%tmp25 = load float, ptr addrspace(4) %tmp24, align 4
%tmp26 = fptrunc float %tmp25 to half
%tmp27 = insertelement <4 x half> poison, half %tmp26, i32 0
%tmp28 = shufflevector <4 x half> %tmp27, <4 x half> undef, <4 x i32> zeroinitializer
%tmp28 = shufflevector <4 x half> %tmp27, <4 x half> poison, <4 x i32> zeroinitializer
%vec.A.0 = extractelement <4 x half> %tmp21, i32 0
%vec.B.0 = extractelement <4 x half> %tmp28, i32 0
%vec.C.0 = extractelement <4 x half> %tmp15, i32 0

View File

@ -1585,7 +1585,7 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {
bb: ; preds = %.entry
%i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0)
%i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%i4 = bitcast <4 x i32> %i3 to <4 x float>
%.i0753 = extractelement <4 x float> %i4, i64 0
br label %bb5

View File

@ -1841,7 +1841,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%load = load <2 x i64>, ptr addrspace(1) null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
ret void
@ -10439,7 +10439,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
; GFX10-SCRATCH-NEXT: s_waitcnt vmcnt(0)
; GFX10-SCRATCH-NEXT: s_setpc_b64 s[30:31]
%load = load <2 x i64>, ptr addrspace(4) null
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
%val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>
call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
ret void

View File

@ -48,6 +48,6 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
; NOXNACK-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
ret <4 x half> %shuffle
}

View File

@ -36,7 +36,7 @@ main_body:
define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
main_body:
%tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
%ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%r = bitcast <4 x half> %ext to <2 x float>
ret <2 x float> %r
}
@ -90,7 +90,7 @@ main_body:
define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
main_body:
%tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
%ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%res = bitcast <4 x half> %ext to <2 x float>
ret <2 x float> %res
}
@ -129,7 +129,7 @@ main_body:
define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
main_body:
%r = bitcast <2 x float> %in to <4 x half>
%data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data = shufflevector <4 x half> %r, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
ret void
}

View File

@ -315,7 +315,7 @@ define amdgpu_ps <2 x float> @image_sample_b_2d_v3f16(<8 x i32> inreg %rsrc, <4
; GFX12-NEXT: ; return to shader part epilog
main_body:
%tex = call <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
%tex_wide = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tex_wide = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%r = bitcast <4 x half> %tex_wide to <2 x float>
ret <2 x float> %r
}
@ -410,7 +410,7 @@ define amdgpu_ps <4 x float> @image_sample_b_2d_v3f16_tfe(<8 x i32> inreg %rsrc,
main_body:
%tex = call {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
%tex.vec = extractvalue {<3 x half>, i32} %tex, 0
%tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tex.err = extractvalue {<3 x half>, i32} %tex, 1
%tex.vecf = bitcast <4 x half> %tex.vec_wide to <2 x float>
%tex.vecf.0 = extractelement <2 x float> %tex.vecf, i32 0

View File

@ -2394,7 +2394,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc,
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
ret <2 x float> %out
}
@ -2436,7 +2436,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc,
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
%out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
ret <3 x float> %out
}
@ -2478,7 +2478,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc,
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
ret <2 x float> %out
}
@ -2520,7 +2520,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc,
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 3>
ret <2 x float> %out
}
@ -2562,7 +2562,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc,
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 3>
ret <2 x float> %out
}
@ -2604,7 +2604,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc,
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
%out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
ret <3 x float> %out
}
@ -2667,7 +2667,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
ret <2 x float> %out
}
@ -2709,7 +2709,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg
; GFX12-NEXT: ; return to shader part epilog
main_body:
%r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
%out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
%out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
ret <2 x float> %out
}

View File

@ -47,7 +47,7 @@ main_body:
; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}

View File

@ -47,7 +47,7 @@ main_body:
; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
ret void
}

View File

@ -144,7 +144,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
; GFX11-PACKED-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0)
ret void
}

View File

@ -191,7 +191,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
; GFX12-PACKED-GISEL-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
ret void
}

View File

@ -47,7 +47,7 @@ main_body:
; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
}

View File

@ -77,7 +77,7 @@ define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <
; PACKED-NEXT: buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen
; PACKED-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0)
ret void
}

View File

@ -160,7 +160,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
; GFX11-PACKED-NEXT: tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
; GFX11-PACKED-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
ret void
}

View File

@ -211,7 +211,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
; GFX12-PACKED-GISEL-NEXT: tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
; GFX12-PACKED-GISEL-NEXT: s_endpgm
main_body:
%data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
%data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
ret void
}

View File

@ -22,7 +22,7 @@ define amdgpu_vs void @test(ptr addrspace(8) inreg %arg1, ptr addrspace(3) %arg2
; CHECK-NEXT: s_endpgm
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
%var1 = load <6 x float>, ptr addrspace(3) %arg2, align 4
%var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%var2 = shufflevector <6 x float> %var1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %var2, ptr addrspace(8) %arg1, i32 0, i32 0, i32 0, i32 126, i32 0)
ret void
}
@ -52,9 +52,9 @@ define amdgpu_vs void @test_2(ptr addrspace(8) inreg %arg1, i32 %arg2, i32 inreg
; CHECK-NEXT: tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc
; CHECK-NEXT: s_endpgm
%load = load <8 x float>, ptr addrspace(3) %arg4, align 4
%vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%vec1 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec1, ptr addrspace(8) %arg1, i32 %arg2, i32 0, i32 %arg3, i32 77, i32 3)
%vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%vec2 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec2, ptr addrspace(8) %arg1, i32 %arg2, i32 16, i32 %arg3, i32 77, i32 3)
ret void
}
@ -102,17 +102,17 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
; CHECK-NEXT: tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
; CHECK-NEXT: s_endpgm
%load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4
%vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%vec11 = shufflevector <6 x float> %load1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec11, ptr addrspace(8) %arg3, i32 %arg1, i32 264, i32 %arg2, i32 77, i32 3)
%vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> <i32 4, i32 5>
%vec12 = shufflevector <6 x float> %load1, <6 x float> poison, <2 x i32> <i32 4, i32 5>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec12, ptr addrspace(8) %arg3, i32 %arg1, i32 280, i32 %arg2, i32 64, i32 3)
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
%load2 = load <6 x float>, ptr addrspace(3) %arg6, align 4
%vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%vec21 = shufflevector <6 x float> %load2, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec21, ptr addrspace(8) %arg3, i32 %arg1, i32 240, i32 %arg2, i32 77, i32 3)
%vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> <i32 4, i32 5>
%vec22 = shufflevector <6 x float> %load2, <6 x float> poison, <2 x i32> <i32 4, i32 5>
call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec22, ptr addrspace(8) %arg3, i32 %arg1, i32 256, i32 %arg2, i32 64, i32 3)
ret void

View File

@ -36,7 +36,7 @@ bb1: ; preds = %bb3, %bb
%i3 = fmul float %i2, 1.000000e+00
%i4 = fmul nsz <3 x float> %arg, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
%i5 = insertelement <3 x float> poison, float %i3, i32 0
%i6 = shufflevector <3 x float> %i5, <3 x float> undef, <3 x i32> zeroinitializer
%i6 = shufflevector <3 x float> %i5, <3 x float> poison, <3 x i32> zeroinitializer
%i7 = fmul <3 x float> %i4, %i6
%i8 = fcmp oeq float %i3, 0.000000e+00
br i1 %i8, label %bb3, label %bb2

View File

@ -456,9 +456,9 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1
; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2
; GISEL-CI-NEXT: s_setpc_b64 s[30:31]
%src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
%src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
%src0.shuf = shufflevector <2 x half> %src0, <2 x half> poison, <2 x i32> <i32 1, i32 0>
%src1.shuf = shufflevector <2 x half> %src1, <2 x half> poison, <2 x i32> <i32 0, i32 1>
%src2.shuf = shufflevector <2 x half> %src2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
%src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
%src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
%src2.ext = fpext <2 x half> %src2.shuf to <2 x float>

View File

@ -795,8 +795,8 @@ bb:
br i1 %cmp, label %bb11, label %bb7
bb11:
%tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> undef, <2 x i32> zeroinitializer
%tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> poison, <2 x i32> zeroinitializer
%tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> poison, <2 x i32> zeroinitializer
%tmp17 = shl <2 x i32> %tmp14, <i32 8, i32 8>
%tmp18 = ashr <2 x i32> %tmp17, <i32 8, i32 8>
%tmp19 = shl <2 x i32> %tmp16, <i32 8, i32 8>

View File

@ -486,7 +486,7 @@ bb:
%neg.scalar0 = fsub float -0.0, %scalar0
%neg.scalar0.vec = insertelement <2 x float> poison, float %neg.scalar0, i32 0
%neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer
%neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> poison, <2 x i32> zeroinitializer
%result = fadd <2 x float> %vec0, %neg.scalar0.broadcast
store <2 x float> %result, ptr addrspace(1) %out, align 4
@ -526,7 +526,7 @@ bb:
%vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
%lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
%vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
%vec1.swap = shufflevector <2 x float> %vec1, <2 x float> poison, <2 x i32> <i32 1, i32 0>
%result = fadd <2 x float> %vec0, %vec1.swap
store <2 x float> %result, ptr addrspace(1) %out, align 8
ret void
@ -543,7 +543,7 @@ bb:
%f32 = load volatile float, ptr addrspace(3) undef, align 8
%vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
%vec1.neg = fsub <2 x float> <float -0.0, float -0.0>, %vec1
%vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> <i32 1, i32 0>
%vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> poison, <2 x i32> <i32 1, i32 0>
%result = fadd <2 x float> %vec0, %vec1.neg.swap
store <2 x float> %result, ptr addrspace(1) %out, align 8
ret void
@ -598,7 +598,7 @@ bb:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
%in.1 = load <4 x float>, ptr addrspace(1) %gep
%shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer
%shuf = shufflevector <4 x float> %in.1, <4 x float> poison, <4 x i32> zeroinitializer
%add.1 = fadd <4 x float> %in.1, %shuf
store <4 x float> %add.1, ptr addrspace(1) %gep
ret void

View File

@ -24,7 +24,7 @@ bb:
%scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
%scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast)
store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -55,7 +55,7 @@ bb:
%scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2
%scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
%scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
%neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %scalar0.broadcast
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
@ -88,7 +88,7 @@ bb:
%neg.scalar0 = fsub half -0.0, %scalar0
%neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -120,7 +120,7 @@ bb:
%neg.scalar0 = fsub half -0.0, %scalar0
%neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
%neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
%neg.neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %neg.scalar0.broadcast
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast)
@ -212,7 +212,7 @@ bb:
%neg.scalar0.bc = bitcast half %neg.scalar0 to i16
%neg.scalar0.vec = insertelement <2 x i16> poison, i16 %neg.scalar0.bc, i32 0
%neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer
%neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> poison, <2 x i32> zeroinitializer
%result = add <2 x i16> %vec0, %neg.scalar0.broadcast
store <2 x i16> %result, ptr addrspace(1) %out, align 4
@ -318,7 +318,7 @@ bb:
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
%vec2.fneg = fsub <2 x half> <half -0.0, half -0.0>, %vec2
%vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> undef, <2 x i32> <i32 1, i32 1>
%vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> poison, <2 x i32> <i32 1, i32 1>
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.fneg.elt1.broadcast)
store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -377,7 +377,7 @@ bb:
%vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4
%vec1 = load volatile <2 x i16>, ptr addrspace(3) %lds.gep1, align 4
%vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
%vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
%result = add <2 x i16> %vec0, %vec1.elt1.broadcast
store <2 x i16> %result, ptr addrspace(1) %out, align 4
@ -407,7 +407,7 @@ bb:
%vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
%vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
%vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.elt1.broadcast)
@ -471,7 +471,7 @@ bb:
%vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
%vec2.swap = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%vec2.swap = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.swap)
store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -502,7 +502,7 @@ bb:
%vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
%neg.vec2 = fsub <2 x half> <half -0.0, half -0.0>, %vec2
%neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
%result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.swap)
store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -678,7 +678,7 @@ bb:
%f32 = load volatile float, ptr addrspace(3) undef, align 4
%neg.f32 = fsub float -0.0, %f32
%bc = bitcast float %neg.f32 to <2 x half>
%shuf = shufflevector <2 x half> %bc, <2 x half> undef, <2 x i32> <i32 1, i32 0>
%shuf = shufflevector <2 x half> %bc, <2 x half> poison, <2 x i32> <i32 1, i32 0>
%result = fadd <2 x half> %vec0, %shuf
store <2 x half> %result, ptr addrspace(1) %out, align 4
ret void

View File

@ -342,7 +342,7 @@ define hidden void @shuffle7330ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -367,7 +367,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -393,7 +393,7 @@ define hidden void @shuffle6106ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -420,7 +420,7 @@ define hidden void @shuffle4327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -446,7 +446,7 @@ define hidden void @shuffle3263ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -472,7 +472,7 @@ define hidden void @shuffle2763ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -498,7 +498,7 @@ define hidden void @shuffle1327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -524,7 +524,7 @@ define hidden void @shuffle0605ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
ret void
}
@ -554,7 +554,7 @@ define hidden void @insertUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%vecins = insertelement <4 x i8> %shuffle0_0, i8 %elt, i32 1
store <4 x i8> %vecins, ptr addrspace(1) %out0
ret void
@ -598,7 +598,7 @@ define hidden void @addUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
%added = add <4 x i8> %shuffle0_0, %vec1
store <4 x i8> %added, ptr addrspace(1) %out0
ret void
@ -783,7 +783,7 @@ define hidden void @add_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %el
%gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
%vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
%vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%vecins = add <4 x i8> %shuffle0_0, %vec1
store <4 x i8> %vecins, ptr addrspace(1) %out0
ret void
@ -835,7 +835,7 @@ define hidden void @add_store(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
; GFX9-NEXT: s_setpc_b64 s[30:31]
%vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
%vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%vecins = add <4 x i8> %shuffle0_0, %vec1
store <4 x i8> %vecins, ptr addrspace(1) %out0
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
@ -903,7 +903,7 @@ define hidden void @add_store_div_16(ptr addrspace(1) %in0, ptr addrspace(1) %in
%gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
%vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
%vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
%vecins = add <4 x i8> %shuffle0_0, %vec1
store <4 x i8> %vecins, ptr addrspace(1) %out0
store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1

View File

@ -10,9 +10,9 @@
; VI-NEXT: v_add_f16_e32
define half @reduction_fadd_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%bin.rdx = fadd <4 x half> %vec4, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = fadd <4 x half> %bin.rdx, %rdx.shuf1
%res = extractelement <4 x half> %bin.rdx2, i32 0
ret half %res
@ -30,9 +30,9 @@ entry:
; VI-NEXT: s_setpc_b64
define half @reduction_fsub_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%bin.rdx = fsub <4 x half> %vec4, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = fsub <4 x half> %bin.rdx, %rdx.shuf1
%res = extractelement <4 x half> %bin.rdx2, i32 0
ret half %res
@ -52,9 +52,9 @@ entry:
; VI-NEXT: s_setpc_b64
define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%bin.rdx = fsub nsz <4 x half> %vec4, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = fsub nsz <4 x half> %bin.rdx, %rdx.shuf1
%res = extractelement <4 x half> %bin.rdx2, i32 0
%neg.res = fsub half -0.0, %res
@ -70,9 +70,9 @@ entry:
; VI-NEXT: v_mul_f16_e32
define half @reduction_fmul_half4(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%bin.rdx = fmul <4 x half> %vec4, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = fmul <4 x half> %bin.rdx, %rdx.shuf1
%res = extractelement <4 x half> %bin.rdx2, i32 0
ret half %res
@ -87,9 +87,9 @@ entry:
; VI-NEXT: v_add_u16_e32
define i16 @reduction_v4i16(<4 x i16> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%bin.rdx = add <4 x i16> %vec4, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1
%res = extractelement <4 x i16> %bin.rdx2, i32 0
ret i16 %res
@ -111,11 +111,11 @@ entry:
define half @reduction_half8(<8 x half> %vec8) {
entry:
%rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx = fadd <8 x half> %vec8, %rdx.shuf
%rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = fadd <8 x half> %bin.rdx, %rdx.shuf1
%rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx4 = fadd <8 x half> %bin.rdx2, %rdx.shuf3
%res = extractelement <8 x half> %bin.rdx4, i32 0
ret half %res
@ -137,11 +137,11 @@ entry:
define i16 @reduction_v8i16(<8 x i16> %vec8) {
entry:
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx = add <8 x i16> %vec8, %rdx.shuf
%rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1
%rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3
%res = extractelement <8 x i16> %bin.rdx4, i32 0
ret i16 %res
@ -175,13 +175,13 @@ entry:
define half @reduction_half16(<16 x half> %vec16) {
entry:
%rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx = fadd <16 x half> %vec16, %rdx.shuf
%rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx2 = fadd <16 x half> %bin.rdx, %rdx.shuf1
%rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx4 = fadd <16 x half> %bin.rdx2, %rdx.shuf3
%rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%bin.rdx6 = fadd <16 x half> %bin.rdx4, %rdx.shuf5
%res = extractelement <16 x half> %bin.rdx6, i32 0
ret half %res
@ -196,10 +196,10 @@ entry:
; VI-NEXT: v_min_u16_e32
define i16 @reduction_min_v4i16(<4 x i16> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
%res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@ -221,13 +221,13 @@ entry:
; VI-NEXT: v_min_u16_e32
define i16 @reduction_umin_v8i16(<8 x i16> %vec8) {
entry:
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf
%rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf
%rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1
%rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4
%rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4
%res = extractelement <8 x i16> %rdx.minmax.select6, i32 0
@ -301,16 +301,16 @@ entry:
; VI-NEXT: v_min_i16_e32
define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
entry:
%rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf
%rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf
%rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1
%rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4
%rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4
%rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7
%rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7
%res = extractelement <16 x i16> %rdx.minmax.select9, i32 0
@ -404,10 +404,10 @@ entry:
; VI-NEXT: v_max_u16_e32
define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
%res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@ -423,10 +423,10 @@ entry:
; VI-NEXT: v_max_i16_e32
define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 {
entry:
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
%res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@ -451,9 +451,9 @@ entry:
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_maxnum_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
%res = extractelement <4 x half> %rdx.minmax3, i32 0
ret half %res
@ -476,9 +476,9 @@ entry:
; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_minnum_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax = call <4 x half> @llvm.minnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
%res = extractelement <4 x half> %rdx.minmax3, i32 0
ret half %res
@ -513,10 +513,10 @@ entry:
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0
@ -552,10 +552,10 @@ entry:
; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
entry:
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
%rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
%rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
%rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
%rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
%res = extractelement <4 x half> %rdx.minmax.select3, i32 0

View File

@ -67,7 +67,7 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
; GFX9-NEXT: s_endpgm
%tmp1 = load i32, ptr addrspace(1) %in, align 4
%bc = bitcast i32 %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
ret void
}
@ -135,7 +135,7 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
; GFX9-NEXT: s_endpgm
%tmp1 = load float, ptr addrspace(1) %in, align 4
%bc = bitcast float %tmp1 to <2 x i16>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
%tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
ret void
}
@ -193,7 +193,7 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
bb:
%tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
ret void
}
@ -262,7 +262,7 @@ bb:
%load = load half, ptr addrspace(1) undef, align 1
%tmp = bitcast half %load to <2 x i8>
%tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
ret void
}
@ -275,7 +275,7 @@ bb:
; %tmp1 = load i32, ptr addrspace(1) %in, align 4
; %bc = bitcast i32 %tmp1 to <4 x i8>
; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
; store <8 x i8> %tmp2, ptr addrspace(1) %out, align 4
; ret void
; }

View File

@ -56,13 +56,13 @@ declare <2 x float> @_Z3cosDv2_f(<2 x float>)
define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) {
entry:
%loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16
%extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
%extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
%call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
%extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%extractVec6 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16
%call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
%arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1
%extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
%extractVec13 = shufflevector <3 x float> %call11, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16
ret void
}

View File

@ -52,7 +52,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
; GCN: .cfi_endproc
call void @llvm.dbg.value(metadata <4 x float> %arg0, metadata !29, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
call void @llvm.dbg.value(metadata <2 x float> %arg1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
%tmp = shufflevector <2 x float> %arg1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
%tmp = shufflevector <2 x float> %arg1, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
%add = fadd <4 x float> %tmp, %arg0, !dbg !33
ret <4 x float> %add, !dbg !34
}

View File

@ -15,8 +15,8 @@ entry:
define protected amdgpu_kernel void @short2_char4(ptr addrspace(1) %out) {
entry:
%tmp = load <2 x i16>, ptr addrspace(1) undef, align 4
%vecinit = shufflevector <2 x i16> %tmp, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
%vecinit = shufflevector <2 x i16> %tmp, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 poison, i16 poison, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
%tmp1 = trunc <4 x i16> %vecinit2 to <4 x i8>
store <4 x i8> %tmp1, ptr addrspace(1) %out, align 4
ret void
@ -27,8 +27,8 @@ entry:
define protected amdgpu_kernel void @short4_char8(ptr addrspace(1) %out) {
entry:
%tmp = load <4 x i16>, ptr addrspace(1) undef, align 8
%vecinit = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
%vecinit = shufflevector <4 x i16> %tmp, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
%vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
%tmp1 = trunc <8 x i16> %vecinit2 to <8 x i8>
store <8 x i8> %tmp1, ptr addrspace(1) %out, align 8
ret void
@ -39,8 +39,8 @@ entry:
define protected amdgpu_kernel void @short8_char16(ptr addrspace(1) %out) {
entry:
%tmp = load <8 x i16>, ptr addrspace(1) undef, align 16
%vecinit = shufflevector <8 x i16> %tmp, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
%vecinit = shufflevector <8 x i16> %tmp, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
%tmp1 = trunc <16 x i16> %vecinit2 to <16 x i8>
store <16 x i8> %tmp1, ptr addrspace(1) %out, align 16
ret void
@ -52,8 +52,8 @@ entry:
define protected amdgpu_kernel void @short16_char32(ptr addrspace(1) %out) {
entry:
%tmp = load <16 x i16>, ptr addrspace(1) undef, align 32
%vecinit = shufflevector <16 x i16> %tmp, <16 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
%vecinit = shufflevector <16 x i16> %tmp, <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
%tmp1 = trunc <32 x i16> %vecinit2 to <32 x i8>
store <32 x i8> %tmp1, ptr addrspace(1) %out, align 32
ret void

View File

@ -27,7 +27,7 @@ define <4 x half> @shuffle_v4f16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
ret <4 x half> %shuffle
}
@ -74,7 +74,7 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
ret <4 x half> %shuffle
}
@ -101,7 +101,7 @@ define <4 x half> @shuffle_v4f16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
ret <4 x half> %shuffle
}
@ -140,7 +140,7 @@ define <4 x half> @shuffle_v4f16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
ret <4 x half> %shuffle
}
@ -167,7 +167,7 @@ define <4 x half> @shuffle_v4f16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
ret <4 x half> %shuffle
}
@ -216,7 +216,7 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
ret <4 x half> %shuffle
}
@ -265,7 +265,7 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
ret <4 x half> %shuffle
}
@ -312,7 +312,7 @@ define <4 x half> @shuffle_v4f16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
ret <4 x half> %shuffle
}
@ -364,7 +364,7 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) %
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x half>, ptr addrspace(1) %arg0
%val1 = load <4 x half>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
%shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
ret <4 x half> %shuffle
}
@ -1881,21 +1881,21 @@ entry:
%tmp15 = load <4 x half>, ptr addrspace(1) %arrayidx1, align 8
%arrayidx2 = getelementptr inbounds <4 x half>, ptr addrspace(1) %C, i64 %tmp12
%tmp16 = load <4 x half>, ptr addrspace(1) %arrayidx2, align 8
%tmp17 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> zeroinitializer
%tmp18 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 0, i32 1>
%tmp19 = shufflevector <4 x half> %tmp16, <4 x half> undef, <2 x i32> <i32 0, i32 1>
%tmp17 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> zeroinitializer
%tmp18 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 0, i32 1>
%tmp19 = shufflevector <4 x half> %tmp16, <4 x half> poison, <2 x i32> <i32 0, i32 1>
%tmp20 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp17, <2 x half> %tmp18, <2 x half> %tmp19)
%tmp21 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 1, i32 1>
%tmp22 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 2, i32 3>
%tmp21 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 1, i32 1>
%tmp22 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 2, i32 3>
%tmp23 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp21, <2 x half> %tmp22, <2 x half> %tmp20)
%tmp24 = shufflevector <2 x half> %tmp23, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%tmp24 = shufflevector <2 x half> %tmp23, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%tmp25 = shufflevector <4 x half> %tmp24, <4 x half> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
%tmp26 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 2, i32 2>
%tmp27 = shufflevector <4 x half> %tmp25, <4 x half> undef, <2 x i32> <i32 2, i32 3>
%tmp26 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 2, i32 2>
%tmp27 = shufflevector <4 x half> %tmp25, <4 x half> poison, <2 x i32> <i32 2, i32 3>
%tmp28 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp26, <2 x half> %tmp18, <2 x half> %tmp27)
%tmp29 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 3, i32 3>
%tmp29 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 3, i32 3>
%tmp30 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp29, <2 x half> %tmp22, <2 x half> %tmp28)
%tmp31 = shufflevector <2 x half> %tmp30, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%tmp31 = shufflevector <2 x half> %tmp30, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%tmp32 = shufflevector <4 x half> %tmp25, <4 x half> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
store <4 x half> %tmp32, ptr addrspace(1) %arrayidx2, align 8
ret void
@ -2006,7 +2006,7 @@ define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(ptr addrspace(4) %in,
; GFX11-NEXT: global_store_b128 v4, v[0:3], s[2:3]
; GFX11-NEXT: s_endpgm
%ld8 = load <8 x i32>, ptr addrspace(4) %in, align 16
%id = shufflevector <8 x i32> %ld8, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%id = shufflevector <8 x i32> %ld8, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
store <4 x i32> %id, ptr addrspace(1) %out, align 8
ret void
}
@ -2052,7 +2052,7 @@ define <2 x half> @low16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
entry:
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
ret <2 x half> %vy1.2.vec.insert
}
@ -2098,7 +2098,7 @@ define <2 x half> @hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
entry:
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
ret <2 x half> %vy1.2.vec.insert
}
@ -2144,7 +2144,7 @@ define <2 x half> @low16hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x
entry:
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
ret <2 x half> %vy1.2.vec.insert
}
@ -2179,7 +2179,7 @@ define <2 x half> @hi16low16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %
entry:
%0 = load <2 x half>, ptr addrspace(1) %x0, align 4
%1 = load <2 x half>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
ret <2 x half> %vy1.2.vec.insert
}
@ -2225,7 +2225,7 @@ define <2 x i16> @i16_low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
entry:
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
ret <2 x i16> %vy1.2.vec.insert
}
@ -2271,7 +2271,7 @@ define <2 x i16> @i16_low16hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
entry:
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
ret <2 x i16> %vy1.2.vec.insert
}
@ -2306,7 +2306,7 @@ define <2 x i16> @i16_hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
entry:
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
ret <2 x i16> %vy1.2.vec.insert
}
@ -2352,7 +2352,7 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
entry:
%0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
%1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
ret <2 x i16> %vy1.2.vec.insert
}
@ -3020,7 +3020,7 @@ define <4 x bfloat> @shuffle_v4bf16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
ret <4 x bfloat> %shuffle
}
@ -3067,7 +3067,7 @@ define <4 x bfloat> @shuffle_v4bf16_234u(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
ret <4 x bfloat> %shuffle
}
@ -3094,7 +3094,7 @@ define <4 x bfloat> @shuffle_v4bf16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
ret <4 x bfloat> %shuffle
}
@ -3133,7 +3133,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
ret <4 x bfloat> %shuffle
}
@ -3160,7 +3160,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
ret <4 x bfloat> %shuffle
}
@ -3209,7 +3209,7 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
ret <4 x bfloat> %shuffle
}
@ -3258,7 +3258,7 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
ret <4 x bfloat> %shuffle
}
@ -3305,7 +3305,7 @@ define <4 x bfloat> @shuffle_v4bf16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
ret <4 x bfloat> %shuffle
}
@ -3357,7 +3357,7 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1
; GFX11-NEXT: s_setpc_b64 s[30:31]
%val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
%val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
%shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
ret <4 x bfloat> %shuffle
}
@ -5059,21 +5059,21 @@ entry:
%tmp15 = load <4 x bfloat>, ptr addrspace(1) %arrayidx1, align 8
%arrayidx2 = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %C, i64 %tmp12
%tmp16 = load <4 x bfloat>, ptr addrspace(1) %arrayidx2, align 8
%tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> zeroinitializer
%tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
%tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
%tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> zeroinitializer
%tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
%tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
%tmp20 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp17, <2 x bfloat> %tmp18, <2 x bfloat> %tmp19)
%tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 1, i32 1>
%tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
%tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 1, i32 1>
%tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
%tmp23 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp21, <2 x bfloat> %tmp22, <2 x bfloat> %tmp20)
%tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%tmp25 = shufflevector <4 x bfloat> %tmp24, <4 x bfloat> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
%tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 2, i32 2>
%tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
%tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 2, i32 2>
%tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
%tmp28 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp26, <2 x bfloat> %tmp18, <2 x bfloat> %tmp27)
%tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 3, i32 3>
%tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 3, i32 3>
%tmp30 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp29, <2 x bfloat> %tmp22, <2 x bfloat> %tmp28)
%tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
%tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
%tmp32 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
store <4 x bfloat> %tmp32, ptr addrspace(1) %arrayidx2, align 8
ret void
@ -5172,7 +5172,7 @@ define <2 x bfloat> @low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
entry:
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
ret <2 x bfloat> %vy1.2.vec.insert
}
@ -5218,7 +5218,7 @@ define <2 x bfloat> @hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
entry:
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
ret <2 x bfloat> %vy1.2.vec.insert
}
@ -5264,7 +5264,7 @@ define <2 x bfloat> @low16hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1)
entry:
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
ret <2 x bfloat> %vy1.2.vec.insert
}
@ -5299,7 +5299,7 @@ define <2 x bfloat> @hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
entry:
%0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
%1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
%vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
%vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
ret <2 x bfloat> %vy1.2.vec.insert
}

View File

@ -543,12 +543,12 @@ if.then9: ; preds = %entry
sw.bb: ; preds = %if.then9
%i17 = load i8, ptr addrspace(1) null, align 1
%i18 = insertelement <4 x i8> zeroinitializer, i8 %i17, i64 0
%a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
%a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
br label %sw.bb18
sw.bb18: ; preds = %sw.bb, %if.then9
%a.sroa.0.0 = phi <4 x i8> [ %a.sroa.0.0.vecblend, %sw.bb ], [ poison, %if.then9 ]
%a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 undef, i32 1, i32 undef>
%a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 poison, i32 1, i32 poison>
%i19 = insertelement <3 x i8> %a.sroa.0.0.vec.extract61, i8 0, i64 0
%i20 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i19
%i21 = extractelement <3 x i8> %i20, i64 1

View File

@ -421,7 +421,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
; GFX12-NEXT: s_endpgm
bb:
%C = load <16 x half>, ptr %Caddr
%C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%fneg.C_shuffle = fneg <8 x half> %C_shuffle
%res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
store <8 x half> %res, ptr addrspace(1) %out

View File

@ -378,7 +378,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
; GFX12-NEXT: s_endpgm
bb:
%C = load <8 x half>, ptr %Caddr
%C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%fneg.C_shuffle = fneg <4 x half> %C_shuffle
%res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
store <4 x half> %res, ptr addrspace(1) %out

View File

@ -12,9 +12,9 @@ define amdgpu_cs void @xyz () {
br label %loop
loop:
%ld = load <8 x float>, ptr addrspace(5) null, align 32
%in_shuffle = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> undef, <16 x half> undef, <4 x float> %in_shuffle)
%out_shuffle = shufflevector <4 x float> %wmma, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
%out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
br i1 false, label %.exit, label %loop
.exit: