AMDGPU: Replace tests using undef in shufflevector with poison (#130899)

2025-03-12 20:45:02 +07:00 · 2025-03-12 20:45:02 +07:00 · b76e396990
commit b76e396990
parent 2fbddfbdc0
63 changed files with 305 additions and 305 deletions
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
@ -12,7 +12,7 @@ define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4
  %vec.3 = extractelement <4 x float> %vec, i32 3
-  %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
+  %shuffle = shufflevector <4 x float> %vec, <4 x float> poison, <2 x i32> <i32 2, i32 poison>
  %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
  store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8
  ret void
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@ -870,10 +870,10 @@ define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
  %insert = insertelement <8 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1081,10 +1081,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_v(<8 x double> inreg %vec, do
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1229,10 +1229,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_s(<8 x double> inreg %vec, do
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1289,10 +1289,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_s(<8 x double> %vec, double i
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1494,10 +1494,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_v_v(<8 x double> inreg %vec, do
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1617,10 +1617,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_s_v(<8 x double> %vec, double i
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1677,10 +1677,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_s(<8 x double> %vec, double %
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -1794,10 +1794,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v(<8 x double> %vec, double %
 ; GFX11-NEXT:    s_endpgm
 entry:
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -2401,10 +2401,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_s_s_s_add_1(<8 x double> inreg %v
 entry:
  %idx.add = add i32 %idx, 1
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
@ -2525,10 +2525,10 @@ define amdgpu_ps void @dyn_insertelement_v8f64_v_v_v_add_1(<8 x double> %vec, do
 entry:
  %idx.add = add i32 %idx, 1
  %insert = insertelement <8 x double> %vec, double %val, i32 %idx.add
-  %vec.0 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 0, i32 1>
-  %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
-  %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
-  %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
+  %vec.0 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 0, i32 1>
+  %vec.1 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 2, i32 3>
+  %vec.2 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 4, i32 5>
+  %vec.3 = shufflevector <8 x double> %insert, <8 x double> poison, <2 x i32> <i32 6, i32 7>
  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@ -1519,7 +1519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
  ; CHECK-NEXT:   S_ENDPGM 0
  %load = load <2 x i64>, ptr addrspace(1) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>

  call void @external_void_func_v3i64(<3 x i64> %val)
  ret void
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
@ -322,7 +322,7 @@ define i32 @v_sdot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v0, v0, v0, 16
 ; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
  ret i32 %r
 }
@ -349,7 +349,7 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v1, v1, v1, 16
 ; GFX10-NEXT:    v_dot2_i32_i16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
  %r = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
  ret i32 %r
 }
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
@ -306,7 +306,7 @@ define i32 @v_udot2_shuffle10_a(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v0, v0, v0, 16
 ; GFX10-NEXT:    v_dot2_u32_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.a = shufflevector <2 x i16> %a, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
  %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %shuf.a, <2 x i16> %b, i32 %c, i1 false)
  ret i32 %r
 }
@ -332,7 +332,7 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
 ; GFX10-NEXT:    v_alignbit_b32 v1, v1, v1, 16
 ; GFX10-NEXT:    v_dot2_u32_u16 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> undef, <2 x i32> <i32 1, i32 0>
+  %shuf.b = shufflevector <2 x i16> %b, <2 x i16> poison, <2 x i32> <i32 1, i32 0>
  %r = call i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %shuf.b, i32 %c, i1 false)
  ret i32 %r
 }
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/trunc.ll
@ -121,7 +121,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
 ; ; FIXME: G_INSERT mishandled
 ; define <2 x i32> @v_trunc_v3i32_to_v3i16(<3 x i32> %src) {
 ;   %trunc = trunc <3 x i32> %src to <3 x i16>
-;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ;   %cast = bitcast <4 x i16> %ext to <2 x i32>
 ;   ret <2 x i32> %cast
 ; }
@ -129,7 +129,7 @@ define amdgpu_ps i32 @s_trunc_v2i32_to_v2i16(<2 x i32> inreg %src) {
 ; ; FIXME: G_INSERT mishandled
 ; define amdgpu_ps <2 x i32> @s_trunc_v3i32_to_v3i16(<3 x i32> inreg %src) {
 ;   %trunc = trunc <3 x i32> %src to <3 x i16>
-;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+;   %ext = shufflevector <3 x i16> %trunc, <3 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ;   %cast = bitcast <4 x i16> %ext to <2 x i32>
 ;   ret <2 x i32> %cast
 ; }
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@ -426,7 +426,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
 ; GFX12-NEXT:    s_endpgm
 bb:
  %C = load <16 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %fneg.C_shuffle = fneg <8 x half> %C_shuffle
  %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
  store <8 x half> %res, ptr addrspace(1) %out
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@ -381,7 +381,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
 ; GFX12-NEXT:    s_endpgm
 bb:
  %C = load <8 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %fneg.C_shuffle = fneg <4 x half> %C_shuffle
  %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
  store <4 x half> %res, ptr addrspace(1) %out
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@ -9,7 +9,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_nochain() #0 {
 main_body:
  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
  %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
  %tmp4 = extractelement <4 x float> %tmp3, i32 0
  store volatile float %tmp4, ptr addrspace(1) undef
@ -25,7 +25,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_nochain() #0 {
 main_body:
  %tmp = call <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
  %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
  %tmp4 = extractelement <4 x float> %tmp3, i32 1
  store volatile float %tmp4, ptr addrspace(1) undef
@ -41,7 +41,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_chain() #0 {
 main_body:
  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
  %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
  %tmp4 = extractelement <4 x float> %tmp3, i32 0
  store volatile float %tmp4, ptr addrspace(1) undef
@ -57,7 +57,7 @@ define amdgpu_ps void @adjust_writemask_crash_1_chain() #0 {
 main_body:
  %tmp = call <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32 3, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
  %tmp1 = bitcast <2 x float> %tmp to <2 x i32>
-  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
+  %tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
  %tmp4 = extractelement <4 x float> %tmp3, i32 1
  store volatile float %tmp4, ptr addrspace(1) undef
@ -68,7 +68,7 @@ define amdgpu_ps void @adjust_writemask_crash_0_v4() #0 {
 main_body:
  %tmp = call <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32 5, float undef, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
  %tmp1 = bitcast <4 x float> %tmp to <4 x i32>
-  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %tmp3 = bitcast <4 x i32> %tmp2 to <4 x float>
  %tmp4 = extractelement <4 x float> %tmp3, i32 0
  store volatile float %tmp4, ptr addrspace(1) undef
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-break-large-phis-heuristics.ll
@ -477,7 +477,7 @@ entry:

 then:
  %x.1 = insertelement <5 x double> <double 3.140000e+00, double poison, double poison, double poison, double poison>, double %x, i32 %idx
-  %0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 undef, i32 8, i32 9>
+  %0 = shufflevector <5 x double> %x.1, <5 x double> <double poison, double poison, double poison, double 6.140000e+00, double 9.900000e+00>, <5 x i32> <i32 0, i32 1, i32 poison, i32 8, i32 9>
  %x.4 = insertelement <5 x double> %0, double %x, i64 2
  br label %finally

--- a/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal_scratch_mergedshader.ll
@ -18,7 +18,7 @@ define amdgpu_hs void @_amdgpu_hs_main(i32 inreg %arg, i32 inreg %arg1, i32 inre
 .beginls:                                         ; preds = %.entry
  %tmp15 = extractelement <6 x i32> %arg8, i32 3
  %.0.vec.insert.i = insertelement <2 x i32> poison, i32 %tmp15, i32 0
-  %.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> undef, <2 x i32> <i32 0, i32 3>
+  %.4.vec.insert.i = shufflevector <2 x i32> %.0.vec.insert.i, <2 x i32> poison, <2 x i32> <i32 0, i32 3>
  %tmp16 = bitcast <2 x i32> %.4.vec.insert.i to i64
  br label %.endls

--- a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
@ -49,7 +49,7 @@ main_body:
  %buf1.int = ptrtoint ptr addrspace(8) %buf1 to i128
  %buf1.vec = bitcast i128 %buf1.int to <4 x i32>
  call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %buf1.vec, ptr addrspace(8) %buf2, i32 0, i32 0, i32 0)
-  %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> undef, <2 x i32> <i32 1, i32 0>
+  %shuffled = shufflevector <2 x ptr addrspace(8)> %buffers, <2 x ptr addrspace(8)> poison, <2 x i32> <i32 1, i32 0>
  %somewhere.next = getelementptr <2 x ptr addrspace(8)>, ptr addrspace(1) %somewhere, i64 1
  store <2 x ptr addrspace(8)> %shuffled, ptr addrspace(1) %somewhere.next
  ret void
--- a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
@ -45,7 +45,7 @@ bb1789:                                           ; preds = %bb1750
  %i1879 = bitcast <3 x i32> %i1878 to <3 x float>
  %i1881 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1540, %i1879
  %i1882 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> poison, i32 poison, i32 0)
-  %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %i1883 = shufflevector <3 x i32> %i1882, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %i1884 = bitcast <4 x i32> %i1883 to <4 x float>
  %i1885 = shufflevector <4 x float> %i1884, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
  %i1886 = insertelement <3 x i32> poison, i32 %i1819, i64 0
@ -57,7 +57,7 @@ bb1789:                                           ; preds = %bb1750
  %i1892 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1885, %i1891
  %i1893 = fmul reassoc nnan nsz arcp contract afn <3 x float> %i1892, %i1881
  %i1894 = bitcast <3 x float> %i1893 to <3 x i32>
-  %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %i1895 = shufflevector <3 x i32> %i1894, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %i1896 = insertelement <4 x i32> %i1895, i32 %i1819, i64 3
  br label %bb1897

@ -70,7 +70,7 @@ bb1897:                                           ; preds = %bb1789, %bb1787
  %i1901 = bitcast <3 x i32> %i1900 to <3 x float>
  %i1902 = fadd reassoc nnan nsz arcp contract afn <3 x float> %i1901, %i1899
  %i1903 = bitcast <3 x float> %i1902 to <3 x i32>
-  %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %i1907 = shufflevector <3 x i32> %i1903, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %i1908 = shufflevector <4 x i32> %i1907, <4 x i32> %__llpc_global_proxy_r11.19, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  %i1914 = shufflevector <4 x i32> %i1908, <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
  %__llpc_global_proxy_r3.12.vec.extract2358 = extractelement <2 x i32> zeroinitializer, i64 1
--- a/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll
@ -168,7 +168,7 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+  %undef.lo = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
  call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.lo);
  ret void
 }
@ -193,7 +193,7 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.lo = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
+  %undef.lo = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 1, i32 1, i32 2, i32 3>
  call void asm sideeffect "; use $0", "v"(<4 x half> %undef.lo);
  ret void
 }
@ -348,7 +348,7 @@ define void @undef_hi2_v4i16(<2 x i16> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %undef.hi = shufflevector <2 x i16> %arg0, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void asm sideeffect "; use $0", "v"(<4 x i16> %undef.hi);
  ret void
 }
@ -369,7 +369,7 @@ define void @undef_hi2_v4f16(<2 x half> %arg0) {
 ; GFX8-NEXT:    ; use v[0:1]
 ; GFX8-NEXT:    ;;#ASMEND
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
-  %undef.hi = shufflevector <2 x half> %arg0, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %undef.hi = shufflevector <2 x half> %arg0, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void asm sideeffect "; use $0", "v"(<4 x half> %undef.hi);
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@ -1454,7 +1454,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
 ; HSA-NEXT:    s_swappc_b64 s[30:31], s[8:9]
 ; HSA-NEXT:    s_endpgm
  %load = load <2 x i64>, ptr addrspace(1) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>

  call void @external_void_func_v3i64(<3 x i64> %val)
  ret void
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@ -1293,7 +1293,7 @@ define amdgpu_kernel void @v_clamp_add_src_v2f16_denorm_shuf(ptr addrspace(1) %o
  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
  %a = load <2 x half>, ptr addrspace(1) %gep0
  %add = fadd <2 x half> %a, <half 1.0, half 1.0>
-  %shuf = shufflevector <2 x half> %add, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %shuf = shufflevector <2 x half> %add, <2 x half> poison, <2 x i32> <i32 1, i32 0>

  %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
  %clamp = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@ -3728,7 +3728,7 @@ define amdgpu_kernel void @v_clamp_v2f16_shuffle(ptr addrspace(1) %out, ptr addr
  %gep0 = getelementptr <2 x half>, ptr addrspace(1) %aptr, i32 %tid
  %out.gep = getelementptr <2 x half>, ptr addrspace(1) %out, i32 %tid
  %a = load <2 x half>, ptr addrspace(1) %gep0
-  %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %shuf = shufflevector <2 x half> %a, <2 x half> poison, <2 x i32> <i32 1, i32 0>
  %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
  %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)

--- a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
@ -14,7 +14,7 @@ bb:
  %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
  %gep1 = getelementptr inbounds <4 x i32>, ptr addrspace(1) %arg, i32 %lid
  %load = load <4 x i32>, ptr addrspace(1) %gep1, align 16
-  %shuffle = shufflevector <4 x i32> %load, <4 x i32> undef, <2 x i32> <i32 1, i32 3>
+  %shuffle = shufflevector <4 x i32> %load, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
  %gep2 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg1, i32 %lid
  store <2 x i32> %shuffle, ptr addrspace(1) %gep2, align 8
  ret void
@ -27,9 +27,9 @@ bb:
 define amdgpu_kernel void @test_vector_creation() #0 {
 entry:
  %tmp231 = load <4 x i16>, ptr addrspace(1) undef, align 2
-  %vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit467 = shufflevector <8 x i16> undef, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 undef, i32 undef>
-  %vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %vext466 = shufflevector <4 x i16> %tmp231, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit467 = shufflevector <8 x i16> poison, <8 x i16> %vext466, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison>
+  %vecinit471 = shufflevector <8 x i16> %vecinit467, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
  store <8 x i16> %vecinit471, ptr addrspace(1) undef, align 16
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/combine_vloads.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_vloads.ll
@ -23,12 +23,12 @@ for.body:                                         ; preds = %for.body, %entry
  %i.01 = phi i32 [ 0, %entry ], [ %tmp19, %for.body ]
  %vecload2 = load <8 x i32>, ptr addrspace(1) %src, align 32
  %0 = bitcast <8 x i32> %vecload2 to <32 x i8>
-  %tmp5 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %tmp8 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp5 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %tmp8 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
  %tmp9 = add nsw <8 x i8> %tmp5, %tmp8
-  %tmp12 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+  %tmp12 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
  %tmp13 = add nsw <8 x i8> %tmp9, %tmp12
-  %tmp16 = shufflevector <32 x i8> %0, <32 x i8> undef, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+  %tmp16 = shufflevector <32 x i8> %0, <32 x i8> poison, <8 x i32> <i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  %tmp17 = add nsw <8 x i8> %tmp13, %tmp16
  %scevgep = getelementptr <8 x i8>, ptr addrspace(1) %result, i32 %i.01
  %1 = bitcast <8 x i8> %tmp17 to <2 x i32>
--- a/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/computeKnownBits-scalar-to-vector-crash.ll
@ -3,7 +3,7 @@
 ; CHECK: s_waitcnt
 define <2 x i16> @main(<2 x float>) #0 {
  %2 = bitcast <2 x float> %0 to <4 x i16>
-  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <2 x i32> <i32 0, i32 undef>
+  %3 = shufflevector <4 x i16> %2, <4 x i16> poison, <2 x i32> <i32 0, i32 poison>
  %4 = extractelement <4 x i16> %2, i32 0
  %5 = insertelement <2 x i16> %3, i16 %4, i32 0
  ret <2 x i16> %5
--- a/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/concat_vectors.ll
@ -289,8 +289,8 @@ define amdgpu_kernel void @test_concat_v16i16(ptr addrspace(1) %out, <16 x i16>
 define amdgpu_kernel void @concat_vector_crash(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 bb:
  %tmp = load <2 x float>, ptr addrspace(1) %in, align 4
-  %tmp1 = shufflevector <2 x float> %tmp, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %tmp2 = shufflevector <8 x float> undef, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+  %tmp1 = shufflevector <2 x float> %tmp, <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %tmp2 = shufflevector <8 x float> poison, <8 x float> %tmp1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
  store <8 x float> %tmp2, ptr addrspace(1) %out, align 32
  ret void
 }
@ -301,8 +301,8 @@ define amdgpu_kernel void @concat_vector_crash2(ptr addrspace(1) %out, ptr addrs
  %tmp = load i32, ptr addrspace(1) %in, align 1
  %tmp1 = trunc i32 %tmp to i24
  %tmp2 = bitcast i24 %tmp1 to <3 x i8>
-  %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> undef, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 undef, i32 undef>
-  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
+  %tmp3 = shufflevector <3 x i8> %tmp2, <3 x i8> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
+  %tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> <i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 7, i8 8>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 14, i32 15>
  store <8 x i8> %tmp4, ptr addrspace(1) %out, align 8
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcomb-shuffle-vecextend-non2.ll
@ -17,7 +17,7 @@ define amdgpu_ps void @main(i32 %in1, i32 inreg %arg) local_unnamed_addr {

 bb:
  %__llpc_global_proxy_r5.12.vec.insert = insertelement <4 x i32> poison, i32 %in1, i32 3
-  %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> undef, <3 x i32> <i32 undef, i32 undef, i32 1>
+  %tmp3 = shufflevector <4 x i32> %__llpc_global_proxy_r5.12.vec.insert, <4 x i32> poison, <3 x i32> <i32 poison, i32 poison, i32 1>
  %tmp4 = bitcast <3 x i32> %tmp3 to <3 x float>
  %a2.i123 = extractelement <3 x float> %tmp4, i32 2
  %tmp5 = bitcast float %a2.i123 to i32
@ -26,7 +26,7 @@ bb:

 bb12:
  %__llpc_global_proxy_r2.0 = phi <4 x i32> [ %__llpc_global_proxy_r2.0.vec.insert196, %bb ], [ poison, %.entry ]
-  %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %tmp6 = shufflevector <4 x i32> %__llpc_global_proxy_r2.0, <4 x i32> poison, <3 x i32> <i32 1, i32 2, i32 3>
  %tmp7 = bitcast <3 x i32> %tmp6 to <3 x float>
  %a0.i = extractelement <3 x float> %tmp7, i32 0
  ret void
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@ -146,7 +146,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
  %0 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
  %.i2243 = extractelement <3 x float> %0, i32 2
  %1 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 0, i32 0)
-  %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %2 = shufflevector <3 x i32> %1, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %3 = bitcast <4 x i32> %2 to <4 x float>
  %.i2248 = extractelement <4 x float> %3, i32 2
  %.i2249 = fmul reassoc nnan nsz arcp contract afn float %.i2243, %.i2248
@ -159,17 +159,17 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
  %.i0364 = extractelement <2 x float> %7, i32 0
  %8 = call float @llvm.amdgcn.image.sample.2d.f32.f32(i32 1, float undef, float undef, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
  %9 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 112, i32 0)
-  %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %10 = shufflevector <3 x i32> %9, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %11 = bitcast <4 x i32> %10 to <4 x float>
  %.i2360 = extractelement <4 x float> %11, i32 2
  %.i2363 = fmul reassoc nnan nsz arcp contract afn float %.i2360, %8
  %12 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 96, i32 0)
-  %13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %13 = shufflevector <3 x i32> %12, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %14 = bitcast <4 x i32> %13 to <4 x float>
  %.i2367 = extractelement <4 x float> %14, i32 2
  %.i2370 = fmul reassoc nnan nsz arcp contract afn float %.i0364, %.i2367
  %15 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 32, i32 0)
-  %16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %16 = shufflevector <3 x i32> %15, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %17 = bitcast <4 x i32> %16 to <4 x float>
  %.i2373 = extractelement <4 x float> %17, i32 2
  %.i2376 = fsub reassoc nnan nsz arcp contract afn float %.i2373, %.i2370
@ -212,12 +212,12 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
  %.i2466 = fmul reassoc nnan nsz arcp contract afn float %.i2465, %43
  %.i2469 = fmul reassoc nnan nsz arcp contract afn float %.i2415, %.i2466
  %45 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 64, i32 0)
-  %46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %46 = shufflevector <3 x i32> %45, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %47 = bitcast <4 x i32> %46 to <4 x float>
  %.i2476 = extractelement <4 x float> %47, i32 2
  %.i2479 = fmul reassoc nnan nsz arcp contract afn float %.i2476, %18
  %48 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 80, i32 0)
-  %49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %49 = shufflevector <3 x i32> %48, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %50 = bitcast <4 x i32> %49 to <4 x float>
  %.i2482 = extractelement <4 x float> %50, i32 2
  %.i2485 = fsub reassoc nnan nsz arcp contract afn float %.i2482, %.i2479
@ -230,7 +230,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
  %.i2522 = fadd reassoc nnan nsz arcp contract afn float %.i2521, %.i2516
  %.i2525 = fmul reassoc nnan nsz arcp contract afn float %.i2522, %43
  %52 = call <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32> undef, i32 16, i32 0)
-  %53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %53 = shufflevector <3 x i32> %52, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  %54 = bitcast <4 x i32> %53 to <4 x float>
  %.i2530 = extractelement <4 x float> %54, i32 2
  %.i2531 = fmul reassoc nnan nsz arcp contract afn float %.i2333, %.i2530
--- a/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombiner-bug-illegal-vec4-int-to-fp.ll
@ -16,7 +16,7 @@ entry:
  %sint = load i32, ptr addrspace(1) %in
  %conv = sitofp i32 %sint to float
  %0 = insertelement <4 x float> poison, float %conv, i32 0
-  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  %splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
  store <4 x float> %splat, ptr addrspace(1) %out
  ret void
 }
@ -30,7 +30,7 @@ entry:
  %uint = load i32, ptr addrspace(1) %in
  %conv = uitofp i32 %uint to float
  %0 = insertelement <4 x float> poison, float %conv, i32 0
-  %splat = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer
+  %splat = shufflevector <4 x float> %0, <4 x float> poison, <4 x i32> zeroinitializer
  store <4 x float> %splat, ptr addrspace(1) %out
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/debug-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll
@ -7,7 +7,7 @@ bb:
  %tmp = load i32, ptr addrspace(1) undef, align 4
  %tmp1 = load <4 x float>, ptr addrspace(1) undef, align 16
  %tmp2 = sext i32 %tmp to i64
-  %tmp3 = shufflevector <4 x float> undef, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
+  %tmp3 = shufflevector <4 x float> poison, <4 x float> %tmp1, <2 x i32> <i32 3, i32 7>
  %tmp4 = call float @barney() #2
  %tmp9 = getelementptr inbounds %struct.wombat, ptr addrspace(1) %arg, i64 %tmp2, i32 2, i64 0
  %tmp10 = load i32, ptr addrspace(1) %tmp9, align 4
@ -53,7 +53,7 @@ bb28:                                             ; preds = %bb25, %bb21
  %tmp45 = fadd float undef, undef
  %tmp46 = fdiv float %tmp44, %tmp45
  %tmp47 = insertelement <4 x float> poison, float %tmp46, i32 0
-  %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> undef, <4 x i32> zeroinitializer
+  %tmp48 = shufflevector <4 x float> %tmp47, <4 x float> poison, <4 x i32> zeroinitializer
  %tmp49 = fsub <4 x float> %tmp48, %tmp40
  %tmp50 = extractelement <4 x float> %tmp41, i32 1
  %tmp51 = extractelement <4 x float> %tmp42, i32 2
@ -71,7 +71,7 @@ bb28:                                             ; preds = %bb25, %bb21
  call void @llvm.dbg.value(metadata <4 x float> %tmp29, metadata !3, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)) #2, !dbg !5
  %tmp59 = bitcast i64 %tmp35 to <2 x float>
  %tmp60 = insertelement <2 x float> poison, float %tmp58, i32 0
-  %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> undef, <2 x i32> zeroinitializer
+  %tmp61 = shufflevector <2 x float> %tmp60, <2 x float> poison, <2 x i32> zeroinitializer
  %tmp62 = fmul <2 x float> %tmp61, undef
  %tmp63 = fsub <2 x float> %tmp62, %tmp59
  %tmp64 = extractelement <2 x float> %tmp63, i64 0
--- a/llvm/test/CodeGen/AMDGPU/debug-value2.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value2.ll
@ -26,7 +26,7 @@ entry:
  %m_scaleMotion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 4
  %tmp2 = load <4 x float>, ptr addrspace(1) %m_scaleMotion, align 16
  %splat.splatinsert = insertelement <4 x float> poison, float %time, i32 0
-  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+  %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
  %tmp3 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %tmp2, <4 x float> %splat.splat, <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>)
  %tmp4 = load <4 x float>, ptr addrspace(1) %call, align 16
  %m_quaternion = getelementptr inbounds %struct.ShapeData, ptr addrspace(1) %call, i64 0, i32 1
@ -61,8 +61,8 @@ entry:
  %tmp24 = insertelement <4 x float> %tmp23, float %tmp19, i32 1
  %tmp25 = insertelement <4 x float> %tmp24, float %tmp22, i32 2
  %tmp26 = extractelement <4 x float> %tmp5, i64 3
-  %splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-  %splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %splat.splat.i8.i = shufflevector <4 x float> %tmp5, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %splat.splat2.i9.i = shufflevector <4 x float> %tmp10, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %mul3.i10.i = fmul <4 x float> %tmp5, %splat.splat2.i9.i
  %tmp27 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i8.i, <4 x float> %tmp10, <4 x float> %mul3.i10.i)
  %add.i11.i = fadd <4 x float> %tmp27, %tmp25
@ -94,7 +94,7 @@ entry:
  %tmp52 = insertelement <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, float %tmp44, i32 0
  %tmp53 = insertelement <4 x float> %tmp52, float %tmp48, i32 1
  %tmp54 = insertelement <4 x float> %tmp53, float %tmp51, i32 2
-  %splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %splat.splat.i.i = shufflevector <4 x float> %tmp39, <4 x float> poison, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
  %tmp55 = extractelement <4 x float> %tmp5, i32 3
  %mul3.i.i = fmul <4 x float> %splat.splat.i8.i, %tmp39
  %tmp56 = tail call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %splat.splat.i.i, <4 x float> %vecinit5.i.i, <4 x float> %mul3.i.i)
@ -113,12 +113,12 @@ entry:
  %tmp66 = extractelement <4 x float> %tmp1, i64 3
  %mul3 = fmul float %tmp66, %time
  %tmp67 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 3
-  %tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 undef, i32 3>
+  %tmp68 = shufflevector <4 x float> %tmp67, <4 x float> %tmp1, <4 x i32> <i32 0, i32 5, i32 poison, i32 3>
  %vecinit3.i.i = shufflevector <4 x float> %tmp68, <4 x float> %tmp1, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
  %tmp69 = fcmp oeq <4 x float> %vecinit3.i.i, zeroinitializer
  %tmp70 = sext <4 x i1> %tmp69 to <4 x i32>
-  %tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
-  %tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
+  %tmp71 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 2, i32 3>
+  %tmp72 = shufflevector <4 x i32> %tmp70, <4 x i32> poison, <2 x i32> <i32 0, i32 1>
  %tmp73 = and <2 x i32> %tmp71, %tmp72
  %tmp74 = extractelement <2 x i32> %tmp73, i64 0
  %tmp75 = extractelement <2 x i32> %tmp73, i64 1
@ -205,7 +205,7 @@ bb141:                                            ; preds = %bb109, %bb98, %bb96
  %tmp143 = phi float [ %tmp95, %bb86 ], [ %tmp140, %bb109 ], [ %tmp107, %bb98 ], [ %tmp84, %bb96 ]
  %tmp144 = tail call float @llvm.amdgcn.rsq.f32(float %tmp143)
  %tmp145 = insertelement <4 x float> poison, float %tmp144, i32 0
-  %tmp146 = shufflevector <4 x float> %tmp145, <4 x float> undef, <4 x i32> zeroinitializer
+  %tmp146 = shufflevector <4 x float> %tmp145, <4 x float> poison, <4 x i32> zeroinitializer
  %tmp147 = fmul <4 x float> %tmp142, %tmp146
  br label %qtSet.exit

--- a/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
+++ b/llvm/test/CodeGen/AMDGPU/early-if-convert.ll
@ -331,7 +331,7 @@ if:

 endif:
  %r = phi <3 x i32> [ %v, %entry ], [ %u, %if ]
-  %r.ext = shufflevector <3 x i32> %r, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %r.ext = shufflevector <3 x i32> %r, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void asm sideeffect "; reg use $0", "s"(<4 x i32> %r.ext) #0
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@ -156,7 +156,7 @@ F:

 exit:
  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
  %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
  %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
  ret <4 x i16> %r2
@ -317,7 +317,7 @@ F:

 exit:
  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
  %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
  ret <4 x i16> %r2
@ -482,7 +482,7 @@ F:

 exit:
  %m = phi <8 x half> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <8 x half> %m, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
  %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
  %r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
  ret <4 x half> %r2
@ -685,7 +685,7 @@ F:

 exit:
  %m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
  %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
  %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
  ret <4 x i16> %r2
@ -890,7 +890,7 @@ F:

 exit:
  %m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
  %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
  ret <4 x i16> %r2
@ -1099,7 +1099,7 @@ F:

 exit:
  %m = phi <16 x half> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x half> %m, <16 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %v2 = shufflevector <16 x half> %m, <16 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
  %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
  %r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
  ret <4 x half> %r2
@ -1184,8 +1184,8 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
  %x.7 = load i16, ptr addrspace(3) %p.7, align 2
  %v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1

-  %z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
-  %z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
+  %z.1 = shufflevector <8 x i16> %v0, <8 x i16> %v1, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 poison, i32 poison, i32 poison, i32 poison>
+  %z.2 = shufflevector <8 x i16> %z.1, <8 x i16> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
  %z.3 = shufflevector <8 x i16> %z.2, <8 x i16> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
  ret <8 x i16> %z.3
 }
@ -1464,7 +1464,7 @@ F:

 exit:
  %m = phi <16 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i16> %m, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x i16> %m, <16 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %b2 = icmp ugt <8 x i16> %v2, <i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800, i16 u0x3800>
  %r2 = select <8 x i1> %b2, <8 x i16> <i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900, i16 u0x3900>, <8 x i16> <i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00, i16 u0x3D00>
  ret <8 x i16> %r2
@ -1755,7 +1755,7 @@ F:

 exit:
  %m = phi <16 x half> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x half> %m, <16 x half> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x half> %m, <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %b2 = fcmp ugt <8 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
  %r2 = select <8 x i1> %b2, <8 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <8 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
  ret <8 x half> %r2
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-equal-length.ll
@ -13,7 +13,7 @@ define <3 x i32> @quux() {
 ; CHECK-NEXT:    v_mov_b32_e32 v2, 1
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  %tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %tmp = shufflevector <4 x i8> <i8 1, i8 2, i8 3, i8 4>, <4 x i8> poison, <3 x i32> <i32 0, i32 1, i32 2>
  %tmp1 = extractelement <3 x i8> %tmp, i64 0
  %tmp2 = zext i8 %tmp1 to i32
  %tmp3 = insertelement <3 x i32> poison, i32 %tmp2, i32 0
--- a/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector.ll
@ -90,7 +90,7 @@ F:

 exit:
  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> poison, <2 x i32> <i32 0, i32 1>
  %b2 = icmp sgt <2 x i16> %v2, <i16 -1, i16 -1>
  %r2 = select <2 x i1> %b2, <2 x i16> <i16 -32768, i16 -32768>, <2 x i16> <i16 -1, i16 -1>
  ret <2 x i16> %r2
@ -161,7 +161,7 @@ F:

 exit:
  %m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <2 x i32> <i32 0, i32 1>
  %b2 = icmp sgt <2 x i64> %v2, <i64 -1, i64 -1>
  %r2 = select <2 x i1> %b2, <2 x i64> <i64 -32768, i64 -32768>, <2 x i64> <i64 -1, i64 -1>
  ret <2 x i64> %r2
@ -238,7 +238,7 @@ F:

 exit:
  %m = phi <8 x i64> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x i64> %m, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2 = shufflevector <8 x i64> %m, <8 x i64> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %b2 = icmp sgt <4 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1>
  %r2 = select <4 x i1> %b2, <4 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
  ret <4 x i64> %r2
@ -342,7 +342,7 @@ F:

 exit:
  %m = phi <16 x i64> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x i64> %m, <16 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x i64> %m, <16 x i64> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %b2 = icmp sgt <8 x i64> %v2, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
  %r2 = select <8 x i1> %b2, <8 x i64> <i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768, i64 -32768>, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
  ret <8 x i64> %r2
@ -413,7 +413,7 @@ F:

 exit:
  %m = phi <8 x double> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x double> %m, <8 x double> undef, <2 x i32> <i32 0, i32 1>
+  %v2 = shufflevector <8 x double> %m, <8 x double> poison, <2 x i32> <i32 0, i32 1>
  %b2 = fcmp ogt <2 x double> %v2, <double -1.0, double -1.0>
  %r2 = select <2 x i1> %b2, <2 x double> <double -2.0, double -2.0>, <2 x double> <double -1.0, double -1.0>
  ret <2 x double> %r2
@ -490,7 +490,7 @@ F:

 exit:
  %m = phi <8 x double> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <8 x double> %m, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %v2 = shufflevector <8 x double> %m, <8 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %b2 = fcmp ogt <4 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0>
  %r2 = select <4 x i1> %b2, <4 x double> <double -2.0, double -2.0, double -2.0, double -2.0>, <4 x double> <double -1.0, double -1.0, double -1.0, double -1.0>
  ret <4 x double> %r2
@ -594,7 +594,7 @@ F:

 exit:
  %m = phi <16 x double> [ %t, %T ], [ %f, %F ]
-  %v2 = shufflevector <16 x double> %m, <16 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %v2 = shufflevector <16 x double> %m, <16 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %b2 = fcmp ogt <8 x double> %v2, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
  %r2 = select <8 x i1> %b2, <8 x double> <double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0, double -2.0>, <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>
  ret <8 x double> %r2
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@ -27,7 +27,7 @@ define amdgpu_hs void @main(ptr addrspace(6) inreg %arg) {
 main_body:
  %tmp25 = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) undef, i32 undef, i32 0, i32 0)
  %tmp27 = bitcast <4 x float> %tmp25 to <16 x i8>
-  %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> undef, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
+  %tmp28 = shufflevector <16 x i8> %tmp27, <16 x i8> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
  %tmp29 = bitcast <12 x i8> %tmp28 to <3 x i32>
  call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
  ret void
--- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
@ -39,7 +39,7 @@ bb14:                                             ; preds = %bb14, %bb11
  %tmp25 = load float, ptr addrspace(4) %tmp24, align 4
  %tmp26 = fptrunc float %tmp25 to half
  %tmp27 = insertelement <4 x half> poison, half %tmp26, i32 0
-  %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> undef, <4 x i32> zeroinitializer
+  %tmp28 = shufflevector <4 x half> %tmp27, <4 x half> poison, <4 x i32> zeroinitializer
  %vec.A.0 = extractelement <4 x half> %tmp21, i32 0
  %vec.B.0 = extractelement <4 x half> %tmp28, i32 0
  %vec.C.0 = extractelement <4 x half> %tmp15, i32 0
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@ -1585,7 +1585,7 @@ define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) {

 bb:                                               ; preds = %.entry
  %i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0)
-  %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
  %i4 = bitcast <4 x i32> %i3 to <4 x float>
  %.i0753 = extractelement <4 x float> %i4, i64 0
  br label %bb5
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@ -1841,7 +1841,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64() #0 {
 ; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
  %load = load <2 x i64>, ptr addrspace(1) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>

  call amdgpu_gfx void @external_void_func_v3i64(<3 x i64> %val)
  ret void
@ -10439,7 +10439,7 @@ define amdgpu_gfx void @test_call_external_void_func_v3i64_inreg() #0 {
 ; GFX10-SCRATCH-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-SCRATCH-NEXT:    s_setpc_b64 s[30:31]
  %load = load <2 x i64>, ptr addrspace(4) null
-  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
+  %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 poison>, <3 x i32> <i32 0, i32 1, i32 2>

  call amdgpu_gfx void @external_void_func_v3i64_inreg(<3 x i64> inreg %val)
  ret void
--- a/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/greedy-reverse-local-assignment.ll
@ -48,6 +48,6 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; NOXNACK-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
  ret <4 x half> %shuffle
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
@ -36,7 +36,7 @@ main_body:
 define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
 main_body:
  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
-  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %r = bitcast <4 x half> %ext to <2 x float>
  ret <2 x float> %r
 }
@ -90,7 +90,7 @@ main_body:
 define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
 main_body:
  %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
-  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ext = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %res = bitcast <4 x half> %ext to <2 x float>
  ret <2 x float> %res
 }
@ -129,7 +129,7 @@ main_body:
 define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
 main_body:
  %r = bitcast <2 x float> %in to <4 x half>
-  %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data = shufflevector <4 x half> %r, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
@ -315,7 +315,7 @@ define amdgpu_ps <2 x float> @image_sample_b_2d_v3f16(<8 x i32> inreg %rsrc, <4
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %tex = call <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  %tex_wide = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tex_wide = shufflevector <3 x half> %tex, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %r = bitcast <4 x half> %tex_wide to <2 x float>
  ret <2 x float> %r
 }
@ -410,7 +410,7 @@ define amdgpu_ps <4 x float> @image_sample_b_2d_v3f16_tfe(<8 x i32> inreg %rsrc,
 main_body:
  %tex = call {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32 7, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
  %tex.vec = extractvalue {<3 x half>, i32} %tex, 0
-  %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tex.vec_wide = shufflevector <3 x half> %tex.vec, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %tex.err = extractvalue {<3 x half>, i32} %tex, 1
  %tex.vecf = bitcast <4 x half> %tex.vec_wide to <2 x float>
  %tex.vecf.0 = extractelement <2 x float> %tex.vecf, i32 0
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@ -2394,7 +2394,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
  ret <2 x float> %out
 }

@ -2436,7 +2436,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
  ret <3 x float> %out
 }

@ -2478,7 +2478,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
  ret <2 x float> %out
 }

@ -2520,7 +2520,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 3>
  ret <2 x float> %out
 }

@ -2562,7 +2562,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 3>
  ret <2 x float> %out
 }

@ -2604,7 +2604,7 @@ define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc,
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <3 x i32> <i32 1, i32 2, i32 3>
  ret <3 x float> %out
 }

@ -2667,7 +2667,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 0, i32 1>
  ret <2 x float> %out
 }

@ -2709,7 +2709,7 @@ define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg
 ; GFX12-NEXT:    ; return to shader part epilog
 main_body:
  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
+  %out = shufflevector <4 x float> %r, <4 x float> poison, <2 x i32> <i32 1, i32 2>
  ret <2 x float> %out
 }

--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
@ -47,7 +47,7 @@ main_body:
 ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %voffset) {
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
@ -47,7 +47,7 @@ main_body:
 ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 offen
 define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <4 x half> %data, i32 %voffset) {
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
@ -144,7 +144,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM]
 ; GFX11-PACKED-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
@ -191,7 +191,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM]
 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
@ -47,7 +47,7 @@ main_body:
 ; PACKED: buffer_store_format_d16_xyz v[[[LO]]:[[HI]]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0 idxen
 define amdgpu_kernel void @buffer_store_format_d16_xyz(<4 x i32> %rsrc, <4 x half> %data, i32 %index) {
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
@ -77,7 +77,7 @@ define amdgpu_kernel void @buffer_store_format_d16_xyz(ptr addrspace(8) %rsrc, <
 ; PACKED-NEXT:    buffer_store_format_d16_xyz v[0:1], v2, s[0:3], 0 idxen
 ; PACKED-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
@ -160,7 +160,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(ptr addrspace(8) %rsrc, <4 x ha
 ; GFX11-PACKED-NEXT:    tbuffer_store_d16_format_xyz v[0:1], v2, s[0:3], 0 format:[BUF_FMT_10_10_10_2_SNORM] idxen
 ; GFX11-PACKED-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half> %data_subvec, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
@ -211,7 +211,7 @@ define amdgpu_kernel void @tbuffer_store_d16_xyz(<4 x i32> %rsrc, <4 x half> %da
 ; GFX12-PACKED-GISEL-NEXT:    tbuffer_store_d16_format_xyzw v[0:1], v2, s[0:3], null format:[BUF_FMT_10_10_10_2_SNORM] idxen
 ; GFX12-PACKED-GISEL-NEXT:    s_endpgm
 main_body:
-  %data_subvec = shufflevector <4 x half> %data, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %data_subvec = shufflevector <4 x half> %data, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2>
  call void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half> %data_subvec, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
@ -22,7 +22,7 @@ define amdgpu_vs void @test(ptr addrspace(8) inreg %arg1, ptr addrspace(3) %arg2
 ; CHECK-NEXT:    s_endpgm
  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)
  %var1 = load <6 x float>, ptr addrspace(3) %arg2, align 4
-  %var2 = shufflevector <6 x float> %var1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %var2 = shufflevector <6 x float> %var1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %var2, ptr addrspace(8) %arg1, i32 0, i32 0, i32 0, i32 126, i32 0)
  ret void
 }
@ -52,9 +52,9 @@ define amdgpu_vs void @test_2(ptr addrspace(8) inreg %arg1, i32 %arg2, i32 inreg
 ; CHECK-NEXT:    tbuffer_store_format_xyzw v[2:5], v0, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:16 glc slc
 ; CHECK-NEXT:    s_endpgm
  %load = load <8 x float>, ptr addrspace(3) %arg4, align 4
-  %vec1 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %vec1 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec1, ptr addrspace(8) %arg1, i32 %arg2, i32 0, i32 %arg3, i32 77, i32 3)
-  %vec2 = shufflevector <8 x float> %load, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %vec2 = shufflevector <8 x float> %load, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec2, ptr addrspace(8) %arg1, i32 %arg2, i32 16, i32 %arg3, i32 77, i32 3)
  ret void
 }
@ -102,17 +102,17 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
 ; CHECK-NEXT:    tbuffer_store_format_xy v[0:1], v9, s[4:7], s1 format:[BUF_DATA_FORMAT_INVALID,BUF_NUM_FORMAT_UINT] idxen offset:256 glc slc
 ; CHECK-NEXT:    s_endpgm
  %load1 = load <6 x float>, ptr addrspace(3) %arg5, align 4
-  %vec11 = shufflevector <6 x float> %load1, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %vec11 = shufflevector <6 x float> %load1, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec11, ptr addrspace(8) %arg3, i32 %arg1, i32 264, i32 %arg2, i32 77, i32 3)
-  %vec12 = shufflevector <6 x float> %load1, <6 x float> undef, <2 x i32> <i32 4, i32 5>
+  %vec12 = shufflevector <6 x float> %load1, <6 x float> poison, <2 x i32> <i32 4, i32 5>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec12, ptr addrspace(8) %arg3, i32 %arg1, i32 280, i32 %arg2, i32 64, i32 3)

  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 false, i1 false)

  %load2 = load <6 x float>, ptr addrspace(3) %arg6, align 4
-  %vec21 = shufflevector <6 x float> %load2, <6 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %vec21 = shufflevector <6 x float> %load2, <6 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float> %vec21, ptr addrspace(8) %arg3, i32 %arg1, i32 240, i32 %arg2, i32 77, i32 3)
-  %vec22 = shufflevector <6 x float> %load2, <6 x float> undef, <2 x i32> <i32 4, i32 5>
+  %vec22 = shufflevector <6 x float> %load2, <6 x float> poison, <2 x i32> <i32 4, i32 5>
  call void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float> %vec22, ptr addrspace(8) %arg3, i32 %arg1, i32 256, i32 %arg2, i32 64, i32 3)

  ret void
--- a/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-live-out-copy-undef-subrange.ll
@ -36,7 +36,7 @@ bb1:                                              ; preds = %bb3, %bb
  %i3 = fmul float %i2, 1.000000e+00
  %i4 = fmul nsz <3 x float> %arg, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
  %i5 = insertelement <3 x float> poison, float %i3, i32 0
-  %i6 = shufflevector <3 x float> %i5, <3 x float> undef, <3 x i32> zeroinitializer
+  %i6 = shufflevector <3 x float> %i5, <3 x float> poison, <3 x i32> zeroinitializer
  %i7 = fmul <3 x float> %i4, %i6
  %i8 = fcmp oeq float %i3, 0.000000e+00
  br i1 %i8, label %bb3, label %bb2
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@ -456,9 +456,9 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
 ; GISEL-CI-NEXT:    v_mad_f32 v0, v4, v0, v1
 ; GISEL-CI-NEXT:    v_mac_f32_e32 v1, v5, v2
 ; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
-  %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
-  %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
-  %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+  %src0.shuf = shufflevector <2 x half> %src0, <2 x half> poison, <2 x i32> <i32 1, i32 0>
+  %src1.shuf = shufflevector <2 x half> %src1, <2 x half> poison, <2 x i32> <i32 0, i32 1>
+  %src2.shuf = shufflevector <2 x half> %src2, <2 x half> poison, <2 x i32> <i32 1, i32 1>
  %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
  %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
  %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@ -795,8 +795,8 @@ bb:
  br i1 %cmp, label %bb11, label %bb7

 bb11:
-  %tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> undef, <2 x i32> zeroinitializer
-  %tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> undef, <2 x i32> zeroinitializer
+  %tmp14 = shufflevector <2 x i32> %arg1, <2 x i32> poison, <2 x i32> zeroinitializer
+  %tmp16 = shufflevector <2 x i32> %arg2, <2 x i32> poison, <2 x i32> zeroinitializer
  %tmp17 = shl <2 x i32> %tmp14, <i32 8, i32 8>
  %tmp18 = ashr <2 x i32> %tmp17, <i32 8, i32 8>
  %tmp19 = shl <2 x i32> %tmp16, <i32 8, i32 8>
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@ -486,7 +486,7 @@ bb:
  %neg.scalar0 = fsub float -0.0, %scalar0

  %neg.scalar0.vec = insertelement <2 x float> poison, float %neg.scalar0, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x float> %neg.scalar0.vec, <2 x float> poison, <2 x i32> zeroinitializer

  %result = fadd <2 x float> %vec0, %neg.scalar0.broadcast
  store <2 x float> %result, ptr addrspace(1) %out, align 4
@ -526,7 +526,7 @@ bb:
  %vec0 = load volatile <2 x float>, ptr addrspace(3) %lds, align 8
  %lds.gep1 = getelementptr inbounds <2 x float>, ptr addrspace(3) %lds, i32 1
  %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
-  %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %vec1.swap = shufflevector <2 x float> %vec1, <2 x float> poison, <2 x i32> <i32 1, i32 0>
  %result = fadd <2 x float> %vec0, %vec1.swap
  store <2 x float> %result, ptr addrspace(1) %out, align 8
  ret void
@ -543,7 +543,7 @@ bb:
  %f32 = load volatile float, ptr addrspace(3) undef, align 8
  %vec1 = load volatile <2 x float>, ptr addrspace(3) %lds.gep1, align 8
  %vec1.neg = fsub <2 x float> <float -0.0, float -0.0>, %vec1
-  %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+  %vec1.neg.swap = shufflevector <2 x float> %vec1.neg, <2 x float> poison, <2 x i32> <i32 1, i32 0>
  %result = fadd <2 x float> %vec0, %vec1.neg.swap
  store <2 x float> %result, ptr addrspace(1) %out, align 8
  ret void
@ -598,7 +598,7 @@ bb:
  %tid = call i32 @llvm.amdgcn.workitem.id.x()
  %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid
  %in.1 = load <4 x float>, ptr addrspace(1) %gep
-  %shuf = shufflevector <4 x float> %in.1, <4 x float> undef, <4 x i32> zeroinitializer
+  %shuf = shufflevector <4 x float> %in.1, <4 x float> poison, <4 x i32> zeroinitializer
  %add.1 = fadd <4 x float> %in.1, %shuf
  store <4 x float> %add.1, ptr addrspace(1) %gep
  ret void
--- a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
@ -24,7 +24,7 @@ bb:
  %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2

  %scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
-  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer

  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %scalar0.broadcast)
  store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -55,7 +55,7 @@ bb:
  %scalar0 = load volatile half, ptr addrspace(3) %arg2, align 2

  %scalar0.vec = insertelement <2 x half> poison, half %scalar0, i32 0
-  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %scalar0.broadcast = shufflevector <2 x half> %scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
  %neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %scalar0.broadcast

  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
@ -88,7 +88,7 @@ bb:

  %neg.scalar0 = fsub half -0.0, %scalar0
  %neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer

  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.scalar0.broadcast)
  store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -120,7 +120,7 @@ bb:

  %neg.scalar0 = fsub half -0.0, %scalar0
  %neg.scalar0.vec = insertelement <2 x half> poison, half %neg.scalar0, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x half> %neg.scalar0.vec, <2 x half> poison, <2 x i32> zeroinitializer
  %neg.neg.scalar0.broadcast = fsub <2 x half> <half -0.0, half -0.0>, %neg.scalar0.broadcast

  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.neg.scalar0.broadcast)
@ -212,7 +212,7 @@ bb:
  %neg.scalar0.bc = bitcast half %neg.scalar0 to i16

  %neg.scalar0.vec = insertelement <2 x i16> poison, i16 %neg.scalar0.bc, i32 0
-  %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> undef, <2 x i32> zeroinitializer
+  %neg.scalar0.broadcast = shufflevector <2 x i16> %neg.scalar0.vec, <2 x i16> poison, <2 x i32> zeroinitializer

  %result = add <2 x i16> %vec0, %neg.scalar0.broadcast
  store <2 x i16> %result, ptr addrspace(1) %out, align 4
@ -318,7 +318,7 @@ bb:
  %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4

  %vec2.fneg = fsub <2 x half> <half -0.0, half -0.0>, %vec2
-  %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+  %vec2.fneg.elt1.broadcast = shufflevector <2 x half> %vec2.fneg, <2 x half> poison, <2 x i32> <i32 1, i32 1>

  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.fneg.elt1.broadcast)
  store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -377,7 +377,7 @@ bb:
  %vec0 = load volatile <2 x i16>, ptr addrspace(3) %lds, align 4
  %vec1 = load volatile <2 x i16>, ptr addrspace(3) %lds.gep1, align 4

-  %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> undef, <2 x i32> <i32 1, i32 1>
+  %vec1.elt1.broadcast = shufflevector <2 x i16> %vec1, <2 x i16> poison, <2 x i32> <i32 1, i32 1>
  %result = add <2 x i16> %vec0, %vec1.elt1.broadcast

  store <2 x i16> %result, ptr addrspace(1) %out, align 4
@ -407,7 +407,7 @@ bb:
  %vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
  %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4

-  %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
+  %vec2.elt1.broadcast = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 1>

  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.elt1.broadcast)

@ -471,7 +471,7 @@ bb:
  %vec1 = load volatile <2 x half>, ptr addrspace(3) %lds.gep1, align 4
  %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4

-  %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %vec2.swap = shufflevector <2 x half> %vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %vec2.swap)

  store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -502,7 +502,7 @@ bb:
  %vec2 = load volatile <2 x half>, ptr addrspace(3) %lds.gep2, align 4
  %neg.vec2 = fsub <2 x half> <half -0.0, half -0.0>, %vec2

-  %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %neg.vec2.swap = shufflevector <2 x half> %neg.vec2, <2 x half> poison, <2 x i32> <i32 1, i32 0>
  %result = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %vec0, <2 x half> %vec1, <2 x half> %neg.vec2.swap)

  store <2 x half> %result, ptr addrspace(1) %out, align 4
@ -678,7 +678,7 @@ bb:
  %f32 = load volatile float, ptr addrspace(3) undef, align 4
  %neg.f32 = fsub float -0.0, %f32
  %bc = bitcast float %neg.f32 to <2 x half>
-  %shuf = shufflevector <2 x half> %bc, <2 x half> undef, <2 x i32> <i32 1, i32 0>
+  %shuf = shufflevector <2 x half> %bc, <2 x half> poison, <2 x i32> <i32 1, i32 0>
  %result = fadd <2 x half> %vec0, %shuf
  store <2 x half> %result, ptr addrspace(1) %out, align 4
  ret void
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@ -342,7 +342,7 @@ define hidden void @shuffle7330ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 3, i32 3, i32 0>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -367,7 +367,7 @@ define hidden void @shuffle5341ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 5, i32 3, i32 4, i32 1>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -393,7 +393,7 @@ define hidden void @shuffle6106ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 6, i32 1, i32 0, i32 6>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -420,7 +420,7 @@ define hidden void @shuffle4327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 4, i32 3, i32 2, i32 7>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -446,7 +446,7 @@ define hidden void @shuffle3263ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 3, i32 2, i32 6, i32 3>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -472,7 +472,7 @@ define hidden void @shuffle2763ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 2, i32 7, i32 6, i32 3>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -498,7 +498,7 @@ define hidden void @shuffle1327ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 2, i32 7>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -524,7 +524,7 @@ define hidden void @shuffle0605ud2(ptr addrspace(1) %in0, ptr addrspace(1) %out0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 0, i32 6, i32 0, i32 5>
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out0, align 4
  ret void
 }
@ -554,7 +554,7 @@ define hidden void @insertUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
  %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
  %vecins = insertelement <4 x i8> %shuffle0_0, i8 %elt, i32 1
  store <4 x i8> %vecins, ptr addrspace(1) %out0
  ret void
@ -598,7 +598,7 @@ define hidden void @addUsesOr(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
  %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 7, i32 0, i32 6, i32 3>
  %added = add <4 x i8> %shuffle0_0, %vec1
  store <4 x i8> %added, ptr addrspace(1) %out0
  ret void
@ -783,7 +783,7 @@ define hidden void @add_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %el
  %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
  %vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
  %vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
  %vecins = add <4 x i8> %shuffle0_0, %vec1
  store <4 x i8> %vecins, ptr addrspace(1) %out0
  ret void
@ -835,7 +835,7 @@ define hidden void @add_store(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
  %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4
  %vec1 = load <4 x i8>, ptr addrspace(1) %in1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
  %vecins = add <4 x i8> %shuffle0_0, %vec1
  store <4 x i8> %vecins, ptr addrspace(1) %out0
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
@ -903,7 +903,7 @@ define hidden void @add_store_div_16(ptr addrspace(1) %in0, ptr addrspace(1) %in
  %gep1 = getelementptr <4 x i8>, ptr addrspace(1) %in1, i32 %tid
  %vec0 = load <4 x i8>, ptr addrspace(1) %gep0, align 4
  %vec1 = load <4 x i8>, ptr addrspace(1) %gep1, align 4
-  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
+  %shuffle0_0 = shufflevector <4 x i8> %vec0, <4 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 4>
  %vecins = add <4 x i8> %shuffle0_0, %vec1
  store <4 x i8> %vecins, ptr addrspace(1) %out0
  store <4 x i8> %shuffle0_0, ptr addrspace(1) %out1
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@ -10,9 +10,9 @@
 ; VI-NEXT: v_add_f16_e32
 define half @reduction_fadd_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %bin.rdx = fadd <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = fadd <4 x half> %bin.rdx, %rdx.shuf1
  %res = extractelement <4 x half> %bin.rdx2, i32 0
  ret half %res
@ -30,9 +30,9 @@ entry:
 ; VI-NEXT: s_setpc_b64
 define half @reduction_fsub_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %bin.rdx = fsub <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = fsub <4 x half> %bin.rdx, %rdx.shuf1
  %res = extractelement <4 x half> %bin.rdx2, i32 0
  ret half %res
@ -52,9 +52,9 @@ entry:
 ; VI-NEXT: s_setpc_b64
 define half @reduction_fsub_v4f16_preserve_fmf(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %bin.rdx = fsub nsz <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = fsub nsz <4 x half> %bin.rdx, %rdx.shuf1
  %res = extractelement <4 x half> %bin.rdx2, i32 0
  %neg.res = fsub half -0.0, %res
@ -70,9 +70,9 @@ entry:
 ; VI-NEXT: v_mul_f16_e32
 define half @reduction_fmul_half4(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %bin.rdx = fmul <4 x half> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %bin.rdx, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = fmul <4 x half> %bin.rdx, %rdx.shuf1
  %res = extractelement <4 x half> %bin.rdx2, i32 0
  ret half %res
@ -87,9 +87,9 @@ entry:
 ; VI-NEXT: v_add_u16_e32
 define i16 @reduction_v4i16(<4 x i16> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %bin.rdx = add <4 x i16> %vec4, %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %bin.rdx, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = add <4 x i16> %bin.rdx, %rdx.shuf1
  %res = extractelement <4 x i16> %bin.rdx2, i32 0
  ret i16 %res
@ -111,11 +111,11 @@ entry:

 define half @reduction_half8(<8 x half> %vec8) {
 entry:
-  %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <8 x half> %vec8, <8 x half> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx = fadd <8 x half> %vec8, %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <8 x half> %bin.rdx, <8 x half> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = fadd <8 x half> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf3 = shufflevector <8 x half> %bin.rdx2, <8 x half> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx4 = fadd <8 x half> %bin.rdx2, %rdx.shuf3
  %res = extractelement <8 x half> %bin.rdx4, i32 0
  ret half %res
@ -137,11 +137,11 @@ entry:

 define i16 @reduction_v8i16(<8 x i16> %vec8) {
 entry:
-  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx = add <8 x i16> %vec8, %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = add <8 x i16> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf3 = shufflevector <8 x i16> %bin.rdx2, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx4 = add <8 x i16> %bin.rdx2, %rdx.shuf3
  %res = extractelement <8 x i16> %bin.rdx4, i32 0
  ret i16 %res
@ -175,13 +175,13 @@ entry:

 define half @reduction_half16(<16 x half> %vec16) {
 entry:
-  %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <16 x half> %vec16, <16 x half> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx = fadd <16 x half> %vec16, %rdx.shuf
-  %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <16 x half> %bin.rdx, <16 x half> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx2 = fadd <16 x half> %bin.rdx, %rdx.shuf1
-  %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf3 = shufflevector <16 x half> %bin.rdx2, <16 x half> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx4 = fadd <16 x half> %bin.rdx2, %rdx.shuf3
-  %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf5 = shufflevector <16 x half> %bin.rdx4, <16 x half> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %bin.rdx6 = fadd <16 x half> %bin.rdx4, %rdx.shuf5
  %res = extractelement <16 x half> %bin.rdx6, i32 0
  ret half %res
@ -196,10 +196,10 @@ entry:
 ; VI-NEXT: v_min_u16_e32
 define i16 @reduction_min_v4i16(<4 x i16> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax.cmp = icmp ult <4 x i16> %vec4, %rdx.shuf
  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = icmp ult <4 x i16> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
  %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@ -221,13 +221,13 @@ entry:
 ; VI-NEXT: v_min_u16_e32
 define i16 @reduction_umin_v8i16(<8 x i16> %vec8) {
 entry:
-  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <8 x i16> %vec8, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp = icmp ult <8 x i16> %vec8, %rdx.shuf
  %rdx.minmax.select = select <8 x i1> %rdx.minmax.cmp, <8 x i16> %vec8, <8 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <8 x i16> %rdx.minmax.select, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = icmp ult <8 x i16> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <8 x i1> %rdx.minmax.cmp2, <8 x i16> %rdx.minmax.select, <8 x i16> %rdx.shuf1
-  %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf4 = shufflevector <8 x i16> %rdx.minmax.select3, <8 x i16> poison, <8 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp5 = icmp ult <8 x i16> %rdx.minmax.select3, %rdx.shuf4
  %rdx.minmax.select6 = select <8 x i1> %rdx.minmax.cmp5, <8 x i16> %rdx.minmax.select3, <8 x i16> %rdx.shuf4
  %res = extractelement <8 x i16> %rdx.minmax.select6, i32 0
@ -301,16 +301,16 @@ entry:
 ; VI-NEXT: v_min_i16_e32
 define i16 @reduction_smin_v16i16(<16 x i16> %vec16) {
 entry:
-  %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <16 x i16> %vec16, <16 x i16> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp = icmp slt <16 x i16> %vec16, %rdx.shuf
  %rdx.minmax.select = select <16 x i1> %rdx.minmax.cmp, <16 x i16> %vec16, <16 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <16 x i16> %rdx.minmax.select, <16 x i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = icmp slt <16 x i16> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <16 x i1> %rdx.minmax.cmp2, <16 x i16> %rdx.minmax.select, <16 x i16> %rdx.shuf1
-  %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf4 = shufflevector <16 x i16> %rdx.minmax.select3, <16 x i16> poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp5 = icmp slt <16 x i16> %rdx.minmax.select3, %rdx.shuf4
  %rdx.minmax.select6 = select <16 x i1> %rdx.minmax.cmp5, <16 x i16> %rdx.minmax.select3, <16 x i16> %rdx.shuf4
-  %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf7 = shufflevector <16 x i16> %rdx.minmax.select6, <16 x i16> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp8 = icmp slt <16 x i16> %rdx.minmax.select6, %rdx.shuf7
  %rdx.minmax.select9 = select <16 x i1> %rdx.minmax.cmp8, <16 x i16> %rdx.minmax.select6, <16 x i16> %rdx.shuf7
  %res = extractelement <16 x i16> %rdx.minmax.select9, i32 0
@ -404,10 +404,10 @@ entry:
 ; VI-NEXT: v_max_u16_e32
 define i16 @reduction_umax_v4i16(<4 x i16> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax.cmp = icmp ugt <4 x i16> %vec4, %rdx.shuf
  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = icmp ugt <4 x i16> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
  %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@ -423,10 +423,10 @@ entry:
 ; VI-NEXT: v_max_i16_e32
 define i16 @reduction_smax_v4i16(<4 x i16> %vec4) #0 {
 entry:
-  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x i16> %vec4, <4 x i16> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax.cmp = icmp sgt <4 x i16> %vec4, %rdx.shuf
  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x i16> %vec4, <4 x i16> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x i16> %rdx.minmax.select, <4 x i16> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = icmp sgt <4 x i16> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x i16> %rdx.minmax.select, <4 x i16> %rdx.shuf1
  %res = extractelement <4 x i16> %rdx.minmax.select3, i32 0
@ -451,9 +451,9 @@ entry:
 ; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_maxnum_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax3 = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
  %res = extractelement <4 x half> %rdx.minmax3, i32 0
  ret half %res
@ -476,9 +476,9 @@ entry:
 ; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_minnum_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax = call <4 x half> @llvm.minnum.v4f16(<4 x half> %vec4, <4 x half> %rdx.shuf)
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax3 = call <4 x half> @llvm.minnum.v4f16(<4 x half> %rdx.minmax, <4 x half> %rdx.shuf1)
  %res = extractelement <4 x half> %rdx.minmax3, i32 0
  ret half %res
@ -513,10 +513,10 @@ entry:
 ; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax.cmp = fcmp nnan nsz ogt <4 x half> %vec4, %rdx.shuf
  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = fcmp nnan nsz ogt <4 x half> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
  %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
@ -552,10 +552,10 @@ entry:
 ; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
 define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
 entry:
-  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> poison, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  %rdx.minmax.cmp = fcmp nnan nsz olt <4 x half> %vec4, %rdx.shuf
  %rdx.minmax.select = select <4 x i1> %rdx.minmax.cmp, <4 x half> %vec4, <4 x half> %rdx.shuf
-  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %rdx.shuf1 = shufflevector <4 x half> %rdx.minmax.select, <4 x half> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
  %rdx.minmax.cmp2 = fcmp nnan nsz olt <4 x half> %rdx.minmax.select, %rdx.shuf1
  %rdx.minmax.select3 = select <4 x i1> %rdx.minmax.cmp2, <4 x half> %rdx.minmax.select, <4 x half> %rdx.shuf1
  %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.ll
@ -67,7 +67,7 @@ define amdgpu_kernel void @scalar_to_vector_v2i32(ptr addrspace(1) %out, ptr add
 ; GFX9-NEXT:    s_endpgm
  %tmp1 = load i32, ptr addrspace(1) %in, align 4
  %bc = bitcast i32 %tmp1 to <2 x i16>
-  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
  ret void
 }
@ -135,7 +135,7 @@ define amdgpu_kernel void @scalar_to_vector_v2f32(ptr addrspace(1) %out, ptr add
 ; GFX9-NEXT:    s_endpgm
  %tmp1 = load float, ptr addrspace(1) %in, align 4
  %bc = bitcast float %tmp1 to <2 x i16>
-  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %tmp2 = shufflevector <2 x i16> %bc, <2 x i16> poison, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
  store <4 x i16> %tmp2, ptr addrspace(1) %out, align 8
  ret void
 }
@ -193,7 +193,7 @@ define amdgpu_kernel void @scalar_to_vector_v4i16() {
 bb:
  %tmp = load <2 x i8>, ptr addrspace(1) undef, align 1
  %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
  store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
  ret void
 }
@ -262,7 +262,7 @@ bb:
  %load = load half, ptr addrspace(1) undef, align 1
  %tmp = bitcast half %load to <2 x i8>
  %tmp1 = shufflevector <2 x i8> %tmp, <2 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
-  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
+  %tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> poison, <8 x i32> <i32 0, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9>
  store <8 x i8> %tmp2, ptr addrspace(1) undef, align 8
  ret void
 }
@ -275,7 +275,7 @@ bb:
 ;   %tmp1 = load i32, ptr addrspace(1) %in, align 4
 ;   %bc = bitcast i32 %tmp1 to <4 x i8>

-;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+;   %tmp2 = shufflevector <4 x i8> %bc, <4 x i8> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 ;   store <8 x i8> %tmp2, ptr addrspace(1) %out, align 4
 ;   ret void
 ; }
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls.ll
@ -56,13 +56,13 @@ declare <2 x float> @_Z3cosDv2_f(<2 x float>)
 define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) {
 entry:
  %loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16
-  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2>
  %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4)
-  %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec6 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16
  %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4)
  %arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1
-  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %extractVec13 = shufflevector <3 x float> %call11, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
  store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16
  ret void
 }
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@ -52,7 +52,7 @@ define hidden <4 x float> @split_v4f32_multi_arg(<4 x float> %arg0, <2 x float>
 ; GCN:         .cfi_endproc
  call void @llvm.dbg.value(metadata <4 x float> %arg0, metadata !29, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
  call void @llvm.dbg.value(metadata <2 x float> %arg1, metadata !30, metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !31
-  %tmp = shufflevector <2 x float> %arg1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
+  %tmp = shufflevector <2 x float> %arg1, <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>, !dbg !32
  %add = fadd <4 x float> %tmp, %arg0, !dbg !33
  ret <4 x float> %add, !dbg !34
 }
--- a/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-store-vec-i16-to-i8.ll
@ -15,8 +15,8 @@ entry:
 define protected amdgpu_kernel void @short2_char4(ptr addrspace(1) %out) {
 entry:
  %tmp = load <2 x i16>, ptr addrspace(1) undef, align 4
-  %vecinit = shufflevector <2 x i16> %tmp, <2 x i16> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 undef, i16 undef, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <2 x i16> %tmp, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <4 x i16> %vecinit, <4 x i16> <i16 poison, i16 poison, i16 0, i16 0>, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
  %tmp1 = trunc <4 x i16> %vecinit2 to <4 x i8>
  store <4 x i8> %tmp1, ptr addrspace(1) %out, align 4
  ret void
@ -27,8 +27,8 @@ entry:
 define protected amdgpu_kernel void @short4_char8(ptr addrspace(1) %out) {
 entry:
  %tmp = load <4 x i16>, ptr addrspace(1) undef, align 8
-  %vecinit = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <4 x i16> %tmp, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <8 x i16> %vecinit, <8 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0>, <8 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
  %tmp1 = trunc <8 x i16> %vecinit2 to <8 x i8>
  store <8 x i8> %tmp1, ptr addrspace(1) %out, align 8
  ret void
@ -39,8 +39,8 @@ entry:
 define protected amdgpu_kernel void @short8_char16(ptr addrspace(1) %out) {
 entry:
  %tmp = load <8 x i16>, ptr addrspace(1) undef, align 16
-  %vecinit = shufflevector <8 x i16> %tmp, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <8 x i16> %tmp, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <16 x i16> %vecinit, <16 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <16 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
  %tmp1 = trunc <16 x i16> %vecinit2 to <16 x i8>
  store <16 x i8> %tmp1, ptr addrspace(1) %out, align 16
  ret void
@ -52,8 +52,8 @@ entry:
 define protected amdgpu_kernel void @short16_char32(ptr addrspace(1) %out) {
 entry:
  %tmp = load <16 x i16>, ptr addrspace(1) undef, align 32
-  %vecinit = shufflevector <16 x i16> %tmp, <16 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
+  %vecinit = shufflevector <16 x i16> %tmp, <16 x i16> poison, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+  %vecinit2 = shufflevector <32 x i16> %vecinit, <32 x i16> <i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 poison, i16 0, i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 poison, i16 poison, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7, i32 0, i32 1, i32 6, i32 7>
  %tmp1 = trunc <32 x i16> %vecinit2 to <32 x i8>
  store <32 x i8> %tmp1, ptr addrspace(1) %out, align 32
  ret void
--- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
@ -27,7 +27,7 @@ define <4 x half> @shuffle_v4f16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  ret <4 x half> %shuffle
 }

@ -74,7 +74,7 @@ define <4 x half> @shuffle_v4f16_234u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
  ret <4 x half> %shuffle
 }

@ -101,7 +101,7 @@ define <4 x half> @shuffle_v4f16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
  ret <4 x half> %shuffle
 }

@ -140,7 +140,7 @@ define <4 x half> @shuffle_v4f16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
  ret <4 x half> %shuffle
 }

@ -167,7 +167,7 @@ define <4 x half> @shuffle_v4f16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
  ret <4 x half> %shuffle
 }

@ -216,7 +216,7 @@ define <4 x half> @shuffle_v4f16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
  ret <4 x half> %shuffle
 }

@ -265,7 +265,7 @@ define <4 x half> @shuffle_v4f16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
  ret <4 x half> %shuffle
 }

@ -312,7 +312,7 @@ define <4 x half> @shuffle_v4f16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
  ret <4 x half> %shuffle
 }

@ -364,7 +364,7 @@ define <4 x half> @shuffle_v4f16_357u(ptr addrspace(1) %arg0, ptr addrspace(1) %
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x half>, ptr addrspace(1) %arg0
  %val1 = load <4 x half>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
+  %shuffle = shufflevector <4 x half> %val0, <4 x half> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
  ret <4 x half> %shuffle
 }

@ -1881,21 +1881,21 @@ entry:
  %tmp15 = load <4 x half>, ptr addrspace(1) %arrayidx1, align 8
  %arrayidx2 = getelementptr inbounds <4 x half>, ptr addrspace(1) %C, i64 %tmp12
  %tmp16 = load <4 x half>, ptr addrspace(1) %arrayidx2, align 8
-  %tmp17 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> zeroinitializer
-  %tmp18 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 0, i32 1>
-  %tmp19 = shufflevector <4 x half> %tmp16, <4 x half> undef, <2 x i32> <i32 0, i32 1>
+  %tmp17 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> zeroinitializer
+  %tmp18 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 0, i32 1>
+  %tmp19 = shufflevector <4 x half> %tmp16, <4 x half> poison, <2 x i32> <i32 0, i32 1>
  %tmp20 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp17, <2 x half> %tmp18, <2 x half> %tmp19)
-  %tmp21 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 1, i32 1>
-  %tmp22 = shufflevector <4 x half> %tmp15, <4 x half> undef, <2 x i32> <i32 2, i32 3>
+  %tmp21 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 1, i32 1>
+  %tmp22 = shufflevector <4 x half> %tmp15, <4 x half> poison, <2 x i32> <i32 2, i32 3>
  %tmp23 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp21, <2 x half> %tmp22, <2 x half> %tmp20)
-  %tmp24 = shufflevector <2 x half> %tmp23, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp24 = shufflevector <2 x half> %tmp23, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
  %tmp25 = shufflevector <4 x half> %tmp24, <4 x half> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-  %tmp26 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 2, i32 2>
-  %tmp27 = shufflevector <4 x half> %tmp25, <4 x half> undef, <2 x i32> <i32 2, i32 3>
+  %tmp26 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 2, i32 2>
+  %tmp27 = shufflevector <4 x half> %tmp25, <4 x half> poison, <2 x i32> <i32 2, i32 3>
  %tmp28 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp26, <2 x half> %tmp18, <2 x half> %tmp27)
-  %tmp29 = shufflevector <4 x half> %tmp14, <4 x half> undef, <2 x i32> <i32 3, i32 3>
+  %tmp29 = shufflevector <4 x half> %tmp14, <4 x half> poison, <2 x i32> <i32 3, i32 3>
  %tmp30 = tail call <2 x half> @llvm.fma.v2f16(<2 x half> %tmp29, <2 x half> %tmp22, <2 x half> %tmp28)
-  %tmp31 = shufflevector <2 x half> %tmp30, <2 x half> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp31 = shufflevector <2 x half> %tmp30, <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
  %tmp32 = shufflevector <4 x half> %tmp25, <4 x half> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  store <4 x half> %tmp32, ptr addrspace(1) %arrayidx2, align 8
  ret void
@ -2006,7 +2006,7 @@ define amdgpu_kernel void @shuffle_scalar_load_v8i32_0123(ptr addrspace(4) %in,
 ; GFX11-NEXT:    global_store_b128 v4, v[0:3], s[2:3]
 ; GFX11-NEXT:    s_endpgm
  %ld8 = load <8 x i32>, ptr addrspace(4) %in, align 16
-  %id = shufflevector <8 x i32> %ld8, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %id = shufflevector <8 x i32> %ld8, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  store <4 x i32> %id, ptr addrspace(1) %out, align 8
  ret void
 }
@ -2052,7 +2052,7 @@ define <2 x half> @low16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
  %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
  ret <2 x half> %vy1.2.vec.insert
 }
@ -2098,7 +2098,7 @@ define <2 x half> @hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
  %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
  ret <2 x half> %vy1.2.vec.insert
 }
@ -2144,7 +2144,7 @@ define <2 x half> @low16hi16bits_v2f16(ptr addrspace(1) %x0, ptr addrspace(1) %x
 entry:
  %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 0, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 3>
  ret <2 x half> %vy1.2.vec.insert
 }
@ -2179,7 +2179,7 @@ define <2 x half> @hi16low16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %
 entry:
  %0 = load <2 x half>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x half>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x half> %0, <2 x half> poison, <2 x i32> <i32 1, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x half> %vy1.0.vec.insert, <2 x half> %1, <2 x i32> <i32 0, i32 2>
  ret <2 x half> %vy1.2.vec.insert
 }
@ -2225,7 +2225,7 @@ define <2 x i16> @i16_low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
  %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
  ret <2 x i16> %vy1.2.vec.insert
 }
@ -2271,7 +2271,7 @@ define <2 x i16> @i16_low16hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
 entry:
  %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 0, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
  ret <2 x i16> %vy1.2.vec.insert
 }
@ -2306,7 +2306,7 @@ define <2 x i16> @i16_hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
 entry:
  %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 2>
  ret <2 x i16> %vy1.2.vec.insert
 }
@ -2352,7 +2352,7 @@ define <2 x i16> @i16_hi16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
  %0 = load <2 x i16>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x i16>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x i16> %0, <2 x i16> poison, <2 x i32> <i32 1, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x i16> %vy1.0.vec.insert, <2 x i16> %1, <2 x i32> <i32 0, i32 3>
  ret <2 x i16> %vy1.2.vec.insert
 }
@ -3020,7 +3020,7 @@ define <4 x bfloat> @shuffle_v4bf16_23uu(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 poison, i32 poison>
  ret <4 x bfloat> %shuffle
 }

@ -3067,7 +3067,7 @@ define <4 x bfloat> @shuffle_v4bf16_234u(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 2, i32 3, i32 4, i32 poison>
  ret <4 x bfloat> %shuffle
 }

@ -3094,7 +3094,7 @@ define <4 x bfloat> @shuffle_v4bf16_u1u3(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 1, i32 undef, i32 3>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 1, i32 poison, i32 3>
  ret <4 x bfloat> %shuffle
 }

@ -3133,7 +3133,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3u1(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 1>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 1>
  ret <4 x bfloat> %shuffle
 }

@ -3160,7 +3160,7 @@ define <4 x bfloat> @shuffle_v4bf16_u3uu(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 undef, i32 3, i32 undef, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison>
  ret <4 x bfloat> %shuffle
 }

@ -3209,7 +3209,7 @@ define <4 x bfloat> @shuffle_v4bf16_3u6u(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 6, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 6, i32 poison>
  ret <4 x bfloat> %shuffle
 }

@ -3258,7 +3258,7 @@ define <4 x bfloat> @shuffle_v4bf16_3uu7(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 undef, i32 undef, i32 7>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 poison, i32 poison, i32 7>
  ret <4 x bfloat> %shuffle
 }

@ -3305,7 +3305,7 @@ define <4 x bfloat> @shuffle_v4bf16_35u5(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 undef, i32 5>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 poison, i32 5>
  ret <4 x bfloat> %shuffle
 }

@ -3357,7 +3357,7 @@ define <4 x bfloat> @shuffle_v4bf16_357u(ptr addrspace(1) %arg0, ptr addrspace(1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
  %val0 = load <4 x bfloat>, ptr addrspace(1) %arg0
  %val1 = load <4 x bfloat>, ptr addrspace(1) %arg1
-  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 undef>
+  %shuffle = shufflevector <4 x bfloat> %val0, <4 x bfloat> %val1, <4 x i32> <i32 3, i32 5, i32 7, i32 poison>
  ret <4 x bfloat> %shuffle
 }

@ -5059,21 +5059,21 @@ entry:
  %tmp15 = load <4 x bfloat>, ptr addrspace(1) %arrayidx1, align 8
  %arrayidx2 = getelementptr inbounds <4 x bfloat>, ptr addrspace(1) %C, i64 %tmp12
  %tmp16 = load <4 x bfloat>, ptr addrspace(1) %arrayidx2, align 8
-  %tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> zeroinitializer
-  %tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
-  %tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> undef, <2 x i32> <i32 0, i32 1>
+  %tmp17 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> zeroinitializer
+  %tmp18 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
+  %tmp19 = shufflevector <4 x bfloat> %tmp16, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1>
  %tmp20 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp17, <2 x bfloat> %tmp18, <2 x bfloat> %tmp19)
-  %tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 1, i32 1>
-  %tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
+  %tmp21 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 1, i32 1>
+  %tmp22 = shufflevector <4 x bfloat> %tmp15, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
  %tmp23 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp21, <2 x bfloat> %tmp22, <2 x bfloat> %tmp20)
-  %tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp24 = shufflevector <2 x bfloat> %tmp23, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
  %tmp25 = shufflevector <4 x bfloat> %tmp24, <4 x bfloat> %tmp16, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-  %tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 2, i32 2>
-  %tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> undef, <2 x i32> <i32 2, i32 3>
+  %tmp26 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 2, i32 2>
+  %tmp27 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3>
  %tmp28 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp26, <2 x bfloat> %tmp18, <2 x bfloat> %tmp27)
-  %tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> undef, <2 x i32> <i32 3, i32 3>
+  %tmp29 = shufflevector <4 x bfloat> %tmp14, <4 x bfloat> poison, <2 x i32> <i32 3, i32 3>
  %tmp30 = tail call <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat> %tmp29, <2 x bfloat> %tmp22, <2 x bfloat> %tmp28)
-  %tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+  %tmp31 = shufflevector <2 x bfloat> %tmp30, <2 x bfloat> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
  %tmp32 = shufflevector <4 x bfloat> %tmp25, <4 x bfloat> %tmp31, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
  store <4 x bfloat> %tmp32, ptr addrspace(1) %arrayidx2, align 8
  ret void
@ -5172,7 +5172,7 @@ define <2 x bfloat> @low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
  %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
  ret <2 x bfloat> %vy1.2.vec.insert
 }
@ -5218,7 +5218,7 @@ define <2 x bfloat> @hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1) %x1)
 entry:
  %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
  ret <2 x bfloat> %vy1.2.vec.insert
 }
@ -5264,7 +5264,7 @@ define <2 x bfloat> @low16hi16bits_v2bf16(ptr addrspace(1) %x0, ptr addrspace(1)
 entry:
  %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 0, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 3>
  ret <2 x bfloat> %vy1.2.vec.insert
 }
@ -5299,7 +5299,7 @@ define <2 x bfloat> @hi16low16bits(ptr addrspace(1) %x0, ptr addrspace(1) %x1) {
 entry:
  %0 = load <2 x bfloat>, ptr addrspace(1) %x0, align 4
  %1 = load <2 x bfloat>, ptr addrspace(1) %x1, align 4
-  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 undef>
+  %vy1.0.vec.insert = shufflevector <2 x bfloat> %0, <2 x bfloat> poison, <2 x i32> <i32 1, i32 poison>
  %vy1.2.vec.insert = shufflevector <2 x bfloat> %vy1.0.vec.insert, <2 x bfloat> %1, <2 x i32> <i32 0, i32 2>
  ret <2 x bfloat> %vy1.2.vec.insert
 }
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@ -543,12 +543,12 @@ if.then9:                                         ; preds = %entry
 sw.bb:                                            ; preds = %if.then9
  %i17 = load i8, ptr addrspace(1) null, align 1
  %i18 = insertelement <4 x i8> zeroinitializer, i8 %i17, i64 0
-  %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+  %a.sroa.0.0.vecblend = shufflevector <4 x i8> %i18, <4 x i8> zeroinitializer, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>
  br label %sw.bb18

 sw.bb18:                                          ; preds = %sw.bb, %if.then9
  %a.sroa.0.0 = phi <4 x i8> [ %a.sroa.0.0.vecblend, %sw.bb ], [ poison, %if.then9 ]
-  %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 undef, i32 1, i32 undef>
+  %a.sroa.0.0.vec.extract61 = shufflevector <4 x i8> %a.sroa.0.0, <4 x i8> zeroinitializer, <3 x i32> <i32 poison, i32 1, i32 poison>
  %i19 = insertelement <3 x i8> %a.sroa.0.0.vec.extract61, i8 0, i64 0
  %i20 = select <3 x i1> zeroinitializer, <3 x i8> zeroinitializer, <3 x i8> %i19
  %i21 = extractelement <3 x i8> %i20, i64 1
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@ -421,7 +421,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<8 x half> %A, <8 x
 ; GFX12-NEXT:    s_endpgm
 bb:
  %C = load <16 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <16 x half> %C, <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %C_shuffle = shufflevector <16 x half> %C, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
  %fneg.C_shuffle = fneg <8 x half> %C_shuffle
  %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half> %A, <8 x half> %B, <8 x half> %fneg.C_shuffle , i1 0)
  store <8 x half> %res, ptr addrspace(1) %out
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@ -378,7 +378,7 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_negC_pack(<4 x half> %A, <4 x
 ; GFX12-NEXT:    s_endpgm
 bb:
  %C = load <8 x half>, ptr %Caddr
-  %C_shuffle = shufflevector <8 x half> %C, <8 x half> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %C_shuffle = shufflevector <8 x half> %C, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
  %fneg.C_shuffle = fneg <4 x half> %C_shuffle
  %res = call <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half> %A, <4 x half> %B, <4 x half> %fneg.C_shuffle , i1 0)
  store <4 x half> %res, ptr addrspace(1) %out
--- a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
@ -12,9 +12,9 @@ define amdgpu_cs void @xyz () {
  br label %loop
 loop:
  %ld = load <8 x float>, ptr addrspace(5) null, align 32
-  %in_shuffle = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %in_shuffle = shufflevector <8 x float> %ld, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
  %wmma = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half> undef, <16 x half> undef, <4 x float> %in_shuffle)
-  %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %out_shuffle = shufflevector <4 x float> %wmma, <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
  store <8 x float> %out_shuffle, ptr addrspace(5) null, align 32
  br i1 false, label %.exit, label %loop
 .exit: