[LLVM][IR] Use splat syntax when printing Constant[Data]Vector. (#112548)

2024-11-06 11:53:33 +00:00 · 2024-11-06 11:53:33 +00:00 · 38fffa630e
commit 38fffa630e
parent 5a062191f7
1095 changed files with 17404 additions and 17114 deletions
--- a/clang/test/CodeGen/PowerPC/altivec.c
+++ b/clang/test/CodeGen/PowerPC/altivec.c
@ -12,8 +12,8 @@
 // RUN: %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 --target=powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
 // Check initialization

-vector int test0 = (vector int)(1);       // CHECK: @test0 ={{.*}} global <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-vector float test1 = (vector float)(1.0); // CHECK: @test1 ={{.*}} global <4 x float> <float 1.000000e+{{0+}}, float 1.000000e+{{0+}}, float 1.000000e+{{0+}}, float 1.000000e+{{0+}}>
+vector int test0 = (vector int)(1);       // CHECK: @test0 ={{.*}} global <4 x i32> splat (i32 1)
+vector float test1 = (vector float)(1.0); // CHECK: @test1 ={{.*}} global <4 x float> splat (float 1.000000e+{{0+}})

 // CHECK-BE: @v1 ={{.*}} global <16 x i8> <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 2, i8 0, i8 0, i8 0, i8 3, i8 0, i8 0, i8 0, i8 4>
 // CHECK-LE: @v1 ={{.*}} global <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 2, i8 0, i8 0, i8 0, i8 3, i8 0, i8 0, i8 0, i8 4, i8 0, i8 0, i8 0>
@ -32,8 +32,8 @@ void test2(void)
 {
  vector int vi;
  vector float vf;
-  vi = (vector int)(1);             // CHECK: <i32 1, i32 1, i32 1, i32 1>
-  vf = (vector float)(1.0);         // CHECK: <float 1.000000e+{{0+}}, float 1.000000e+{{0+}}, float 1.000000e+{{0+}}, float 1.000000e+{{0+}}>
+  vi = (vector int)(1);             // CHECK: splat (i32 1)
+  vf = (vector float)(1.0);         // CHECK: splat (float 1.000000e+{{0+}})
  vi = (vector int)(1, 2, 3, 4);    // CHECK: <i32 1, i32 2, i32 3, i32 4>
  vi = (vector int)(1, 2, 3, 4, 5); // CHECK: <i32 1, i32 2, i32 3, i32 4>

@ -46,9 +46,9 @@ void test2(void)
 // Check pre/post increment/decrement
 void test3(void) {
  vector int vi;
-  vi++;                                    // CHECK: add <4 x i32> {{.*}} <i32 1, i32 1, i32 1, i32 1>
+  vi++;                                    // CHECK: add <4 x i32> {{.*}} splat (i32 1)
  vector unsigned int vui;
-  --vui;                                   // CHECK: add <4 x i32> {{.*}} <i32 -1, i32 -1, i32 -1, i32 -1>
+  --vui;                                   // CHECK: add <4 x i32> {{.*}} splat (i32 -1)
  vector float vf;
-  vf++;                                    // CHECK: fadd <4 x float> {{.*}} <float 1.000000e+{{0+}}, float 1.000000e+{{0+}}, float 1.000000e+{{0+}}, float 1.000000e+{{0+}}>
+  vf++;                                    // CHECK: fadd <4 x float> {{.*}} splat (float 1.000000e+{{0+}})
 }
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c
@ -92,10 +92,10 @@ void test1() {
 // CHECK-LE: @llvm.ppc.altivec.vmaxsw

  vf = vec_abs(vf);
-// CHECK: and <4 x i32> {{.*}}, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+// CHECK: and <4 x i32> {{.*}}, splat (i32 2147483647)
 // CHECK: store <4 x float> %{{.*}}, ptr @vf
 // CHECK-LE: bitcast <4 x float> %{{.*}} to <4 x i32>
-// CHECK-LE: and <4 x i32> {{.*}}, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+// CHECK-LE: and <4 x i32> {{.*}}, splat (i32 2147483647)
 // CHECK-LE: bitcast <4 x i32> %{{.*}} to <4 x float>
 // CHECK-LE: store <4 x float> %{{.*}}, ptr @vf

@ -3502,39 +3502,39 @@ void test6() {

  /* vec_sl */
  res_vsc = vec_sl(vsc, vuc);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vuc = vec_sl(vuc, vuc);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK-LE: shl <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vs  = vec_sl(vs, vus);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vus = vec_sl(vus, vus);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK-LE: shl <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vi  = vec_sl(vi, vui);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vui = vec_sl(vui, vui);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK-LE: shl <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vsc = vec_vslb(vsc, vuc);
@ -4351,75 +4351,75 @@ void test6() {

  /* vec_sr */
  res_vsc = vec_sr(vsc, vuc);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vuc = vec_sr(vuc, vuc);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vs  = vec_sr(vs, vus);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vus = vec_sr(vus, vus);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vi  = vec_sr(vi, vui);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vui = vec_sr(vui, vui);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vsc = vec_vsrb(vsc, vuc);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vuc = vec_vsrb(vuc, vuc);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <16 x i8> {{[0-9a-zA-Z%.]+}}, splat (i8 8)
 // CHECK-LE: lshr <16 x i8> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vs  = vec_vsrh(vs, vus);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vus = vec_vsrh(vus, vus);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <8 x i16> {{[0-9a-zA-Z%.]+}}, splat (i16 16)
 // CHECK-LE: lshr <8 x i16> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vi  = vec_vsrw(vi, vui);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vui = vec_vsrw(vui, vui);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, <i32 32, i32 32, i32 32, i32 32>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <4 x i32> {{[0-9a-zA-Z%.]+}}, splat (i32 32)
 // CHECK-LE: lshr <4 x i32> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  /* vec_sra */
@ -5623,21 +5623,21 @@ void test6() {

  res_vi = vec_sube(vi, vi, vi);
 // CHECK: and <4 x i32>
-// CHECK: xor <4 x i32> {{%[0-9]+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1)
 // CHECK: add <4 x i32>
 // CHECK: add <4 x i32>
 // CHECK-LE: and <4 x i32>
-// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1)
 // CHECK-LE: add <4 x i32>
 // CHECK-LE: add <4 x i32>

  res_vui = vec_sube(vui, vui, vui);
 // CHECK: and <4 x i32>
-// CHECK: xor <4 x i32> {{%[0-9]+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1)
 // CHECK: add <4 x i32>
 // CHECK: add <4 x i32>
 // CHECK-LE: and <4 x i32>
-// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1)
 // CHECK-LE: add <4 x i32>
 // CHECK-LE: add <4 x i32>

@ -9407,7 +9407,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 1, i8 0, i8 3, i8 2, i8 5, i8 4, i8 7, i8 6, i8 9, i8 8, i8 11, i8 10, i8 13, i8 12, i8 15, i8 14>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 1, i8 0, i8 3, i8 2, i8 5, i8 4, i8 7, i8 6, i8 9, i8 8, i8 11, i8 10, i8 13, i8 12, i8 15, i8 14>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -9415,7 +9415,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 1, i8 0, i8 3, i8 2, i8 5, i8 4, i8 7, i8 6, i8 9, i8 8, i8 11, i8 10, i8 13, i8 12, i8 15, i8 14>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 1, i8 0, i8 3, i8 2, i8 5, i8 4, i8 7, i8 6, i8 9, i8 8, i8 11, i8 10, i8 13, i8 12, i8 15, i8 14>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -9423,7 +9423,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 1, i8 0, i8 3, i8 2, i8 5, i8 4, i8 7, i8 6, i8 9, i8 8, i8 11, i8 10, i8 13, i8 12, i8 15, i8 14>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 1, i8 0, i8 3, i8 2, i8 5, i8 4, i8 7, i8 6, i8 9, i8 8, i8 11, i8 10, i8 13, i8 12, i8 15, i8 14>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -9431,7 +9431,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -9439,7 +9439,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -9447,7 +9447,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -9455,7 +9455,7 @@ void test8() {
 // CHECK: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 3, i8 2, i8 1, i8 0, i8 7, i8 6, i8 5, i8 4, i8 11, i8 10, i8 9, i8 8, i8 15, i8 14, i8 13, i8 12>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 }
@ -9529,10 +9529,10 @@ void test10() {
 vector float test_rsqrtf(vector float a, vector float b) {
  // CHECK-LABEL: test_rsqrtf
  // CHECK: call fast <4 x float> @llvm.sqrt.v4f32
-  // CHECK: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  // CHECK: fdiv fast <4 x float> splat (float 1.000000e+00)
  // CHECK-LE-LABEL: test_rsqrtf
  // CHECK-LE: call fast <4 x float> @llvm.sqrt.v4f32
-  // CHECK-LE: fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  // CHECK-LE: fdiv fast <4 x float> splat (float 1.000000e+00)
  return vec_rsqrt(a);
 }

--- a/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c
@ -42,7 +42,7 @@ vector double test_flags_recipdivd() {
 // CHECK-LABEL: @test_flags_rsqrtf(
 // CHECK:    [[TMP0:%.*]] = load <4 x float>, ptr @a, align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
-// CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[TMP1]]
+// CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast <4 x float> splat (float 1.000000e+00), [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr @b, align 16
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x float> [[RSQRT]], [[TMP2]]
 // CHECK-NEXT:    ret <4 x float> [[ADD]]
@ -54,7 +54,7 @@ vector float test_flags_rsqrtf() {
 // CHECK-LABEL: @test_flags_rsqrtd(
 // CHECK:    [[TMP0:%.*]] = load <2 x double>, ptr @d, align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]])
-// CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, [[TMP1]]
+// CHECK-NEXT:    [[RSQRT:%.*]] = fdiv fast <2 x double> splat (double 1.000000e+00), [[TMP1]]
 // CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr @e, align 16
 // CHECK-NEXT:    [[ADD:%.*]] = fadd <2 x double> [[RSQRT]], [[TMP2]]
 // CHECK-NEXT:    ret <2 x double> [[ADD]]
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
@ -1535,17 +1535,17 @@ vector unsigned long long test_vec_extracth_ul(void) {
 }

 vector signed int test_vec_vec_splati_si(void) {
-  // CHECK: ret <4 x i32> <i32 -17, i32 -17, i32 -17, i32 -17>
+  // CHECK: ret <4 x i32> splat (i32 -17)
  return vec_splati(-17);
 }

 vector unsigned int test_vec_vec_splati_ui(void) {
-  // CHECK: ret <4 x i32> <i32 16, i32 16, i32 16, i32 16>
+  // CHECK: ret <4 x i32> splat (i32 16)
  return vec_splati(16U);
 }

 vector float test_vec_vec_splati_f(void) {
-  // CHECK: ret <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  // CHECK: ret <4 x float> splat (float 1.000000e+00)
  return vec_splati(1.0f);
 }

@ -1863,7 +1863,7 @@ vector bool __int128 test_vec_cmpeq_bool_int128(void) {
 vector bool __int128 test_vec_cmpne_s128(void) {
  // CHECK-LABEL: @test_vec_cmpne_s128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>
-  // CHECK-NEXT: %not.i = xor <1 x i128> %4, <i128 -1>
+  // CHECK-NEXT: %not.i = xor <1 x i128> %4, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128> %not.i
  return vec_cmpne(vsi128a, vsi128b);
 }
@ -1871,7 +1871,7 @@ vector bool __int128 test_vec_cmpne_s128(void) {
 vector bool __int128 test_vec_cmpne_u128(void) {
  // CHECK-LABEL: @test_vec_cmpne_u128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>
-  // CHECK-NEXT: xor <1 x i128> %4, <i128 -1>
+  // CHECK-NEXT: xor <1 x i128> %4, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128>
  return vec_cmpne(vui128a, vui128b);
 }
@ -1879,7 +1879,7 @@ vector bool __int128 test_vec_cmpne_u128(void) {
 vector bool __int128 test_vec_cmpne_bool_int128(void) {
  // CHECK-LABEL: @test_vec_cmpne_bool_int128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpequq(<1 x i128>
-  // CHECK-NEXT: xor <1 x i128> %4, <i128 -1>
+  // CHECK-NEXT: xor <1 x i128> %4, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128>
  return vec_cmpne(vbi128a, vbi128b);
 }
@ -1915,7 +1915,7 @@ vector bool __int128 test_vec_cmplt_u128(void) {
 vector bool __int128 test_vec_cmpge_s128(void) {
  // CHECK-LABEL: @test_vec_cmpge_s128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtsq(<1 x i128>
-  // CHECK-NEXT: xor <1 x i128> %6, <i128 -1>
+  // CHECK-NEXT: xor <1 x i128> %6, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128>
  return vec_cmpge(vsi128a, vsi128b);
 }
@ -1923,7 +1923,7 @@ vector bool __int128 test_vec_cmpge_s128(void) {
 vector bool __int128 test_vec_cmpge_u128(void) {
  // CHECK-LABEL: @test_vec_cmpge_u128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtuq(<1 x i128>
-  // CHECK-NEXT: xor <1 x i128> %6, <i128 -1>
+  // CHECK-NEXT: xor <1 x i128> %6, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128>
  return vec_cmpge(vui128a, vui128b);
 }
@ -1931,7 +1931,7 @@ vector bool __int128 test_vec_cmpge_u128(void) {
 vector bool __int128 test_vec_cmple_s128(void) {
  // CHECK-LABEL: @test_vec_cmple_s128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtsq(<1 x i128>
-  // CHECK-NEXT: xor <1 x i128> %8, <i128 -1>
+  // CHECK-NEXT: xor <1 x i128> %8, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128>
  return vec_cmple(vsi128a, vsi128b);
 }
@ -1939,7 +1939,7 @@ vector bool __int128 test_vec_cmple_s128(void) {
 vector bool __int128 test_vec_cmple_u128(void) {
  // CHECK-LABEL: @test_vec_cmple_u128(
  // CHECK: call <1 x i128> @llvm.ppc.altivec.vcmpgtuq(<1 x i128>
-  // CHECK-NEXT: xor <1 x i128> %8, <i128 -1>
+  // CHECK-NEXT: xor <1 x i128> %8, splat (i128 -1)
  // CHECK-NEXT: ret <1 x i128>
  return vec_cmple(vui128a, vui128b);
 }
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c
@ -675,15 +675,15 @@ void test1() {

  /* vec_sr */
  res_vsll = vec_sr(vsll, vull);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64)
 // CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64)
 // CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  res_vull = vec_sr(vull, vull);
-// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
+// CHECK: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64)
 // CHECK: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]
-// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, <i64 64, i64 64>
+// CHECK-LE: [[UREM:[0-9a-zA-Z%.]+]] = urem <2 x i64> {{[0-9a-zA-Z%.]+}}, splat (i64 64)
 // CHECK-LE: lshr <2 x i64> {{[0-9a-zA-Z%.]+}}, [[UREM]]

  /* vec_sra */
@ -831,299 +831,299 @@ void test1() {
  /* vec_nand */
  res_vsc = vec_nand(vsc, vsc);
 // CHECK: [[T1:%.+]] = and <16 x i8>
-// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: xor <16 x i8> [[T1]], splat (i8 -1)
 // CHECK-LE: [[T1:%.+]] = and <16 x i8>
-// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1)
 // CHECK-PPC: error: call to undeclared function 'vec_nand'

  res_vbc = vec_nand(vbc, vbc);
 // CHECK: [[T1:%.+]] = and <16 x i8>
-// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: xor <16 x i8> [[T1]], splat (i8 -1)
 // CHECK-LE: [[T1:%.+]] = and <16 x i8>
-// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1)

  res_vuc = vec_nand(vuc, vuc);
 // CHECK: [[T1:%.+]] = and <16 x i8>
-// CHECK: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: xor <16 x i8> [[T1]], splat (i8 -1)
 // CHECK-LE: [[T1:%.+]] = and <16 x i8>
-// CHECK-LE: xor <16 x i8> [[T1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: xor <16 x i8> [[T1]], splat (i8 -1)

  res_vss = vec_nand(vss, vss);
 // CHECK: [[T1:%.+]] = and <8 x i16>
-// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: xor <8 x i16> [[T1]], splat (i16 -1)
 // CHECK-LE: [[T1:%.+]] = and <8 x i16>
-// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1)

  res_vbs = vec_nand(vbs, vbs);
 // CHECK: [[T1:%.+]] = and <8 x i16>
-// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: xor <8 x i16> [[T1]], splat (i16 -1)
 // CHECK-LE: [[T1:%.+]] = and <8 x i16>
-// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1)

  res_vus = vec_nand(vus, vus);
 // CHECK: [[T1:%.+]] = and <8 x i16>
-// CHECK: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: xor <8 x i16> [[T1]], splat (i16 -1)
 // CHECK-LE: [[T1:%.+]] = and <8 x i16>
-// CHECK-LE: xor <8 x i16> [[T1]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: xor <8 x i16> [[T1]], splat (i16 -1)

  res_vsi = vec_nand(vsi, vsi);
 // CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> [[T1]], splat (i32 -1)
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1)

  res_vbi = vec_nand(vbi, vbi);
 // CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> [[T1]], splat (i32 -1)
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1)

  res_vui = vec_nand(vui, vui);
 // CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> [[T1]], splat (i32 -1)
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1)

  res_vf = vec_nand(vfa, vfa);
 // CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> [[T1]], splat (i32 -1)
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1)

  res_vsll = vec_nand(vsll, vsll);
 // CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> [[T1]], splat (i64 -1)
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1)

  res_vbll = vec_nand(vbll, vbll);
 // CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> [[T1]], splat (i64 -1)
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1)

  res_vull = vec_nand(vull, vull);
 // CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> [[T1]], splat (i64 -1)
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1)

  res_vd = vec_nand(vda, vda);
 // CHECK: [[T1:%.+]] = and <2 x i64>
-// CHECK: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> [[T1]], splat (i64 -1)
 // CHECK-LE: [[T1:%.+]] = and <2 x i64>
-// CHECK-LE: xor <2 x i64> [[T1]], <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> [[T1]], splat (i64 -1)

  res_vf = vec_nand(vfa, vfa);
 // CHECK: [[T1:%.+]] = and <4 x i32>
-// CHECK: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> [[T1]], splat (i32 -1)
 // CHECK-LE: [[T1:%.+]] = and <4 x i32>
-// CHECK-LE: xor <4 x i32> [[T1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: xor <4 x i32> [[T1]], splat (i32 -1)

  /* vec_orc */
  res_vsc = vec_orc(vsc, vsc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]
 // CHECK-PPC: error: call to undeclared function 'vec_orc'

  res_vsc = vec_orc(vsc, vbc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]

  res_vsc = vec_orc(vbc, vsc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]

  res_vuc = vec_orc(vuc, vuc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]

  res_vuc = vec_orc(vuc, vbc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]

  res_vuc = vec_orc(vbc, vuc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]

  res_vbc = vec_orc(vbc, vbc);
-// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK: or <16 x i8> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-LE: [[T1:%.+]] = xor <16 x i8> {{%.+}}, splat (i8 -1)
 // CHECK-LE: or <16 x i8> {{%.+}}, [[T1]]

  res_vss = vec_orc(vss, vss);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vss = vec_orc(vss, vbs);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vss = vec_orc(vbs, vss);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vus = vec_orc(vus, vus);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vus = vec_orc(vus, vbs);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vus = vec_orc(vbs, vus);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vbs = vec_orc(vbs, vbs);
-// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK: or <8 x i16> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-LE: [[T1:%.+]] = xor <8 x i16> {{%.+}}, splat (i16 -1)
 // CHECK-LE: or <8 x i16> {{%.+}}, [[T1]]

  res_vsi = vec_orc(vsi, vsi);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vsi = vec_orc(vsi, vbi);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vsi = vec_orc(vbi, vsi);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vui = vec_orc(vui, vui);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vui = vec_orc(vui, vbi);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vui = vec_orc(vbi, vui);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vbi = vec_orc(vbi, vbi);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vf = vec_orc(vbi, vfa);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vf = vec_orc(vfa, vbi);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vf = vec_orc(vfa, vfb);
-// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK: or <4 x i32> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-LE: [[T1:%.+]] = xor <4 x i32> {{%.+}}, splat (i32 -1)
 // CHECK-LE: or <4 x i32> {{%.+}}, [[T1]]

  res_vsll = vec_orc(vsll, vsll);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vsll = vec_orc(vsll, vbll);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vsll = vec_orc(vbll, vsll);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vull = vec_orc(vull, vull);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vull = vec_orc(vull, vbll);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vull = vec_orc(vbll, vull);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vbll = vec_orc(vbll, vbll);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vd = vec_orc(vbll, vda);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vd = vec_orc(vda, vbll);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  res_vd = vec_orc(vda, vdb);
-// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK: or <2 x i64> {{%.+}}, [[T1]]
-// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, <i64 -1, i64 -1>
+// CHECK-LE: [[T1:%.+]] = xor <2 x i64> {{%.+}}, splat (i64 -1)
 // CHECK-LE: or <2 x i64> {{%.+}}, [[T1]]

  /* vec_sub */
@ -1250,7 +1250,7 @@ vector unsigned int test_vec_addec_unsigned (vector unsigned int a, vector unsig
 vector signed int test_vec_subec_signed (vector signed int a, vector signed int b, vector signed int c) {
  return vec_subec(a, b, c);
 // CHECK-LABEL: @test_vec_subec_signed
-// CHECK: xor <4 x i32> {{%[0-9]+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1)
 // CHECK: ret <4 x i32>
 }

@ -1258,7 +1258,7 @@ vector unsigned int test_vec_subec_unsigned (vector unsigned int a, vector unsig
  return vec_subec(a, b, c);

 // CHECK-LABEL: @test_vec_subec_unsigned
-// CHECK: xor <4 x i32> {{%[0-9]+}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK: xor <4 x i32> {{%[0-9]+}}, splat (i32 -1)
 // CHECK: ret <4 x i32>
 }

--- a/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c
@ -216,7 +216,7 @@ void test1() {
 // CHECK: store <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK_PPC: error: call to 'vec_revb' is ambiguous
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
@ -240,11 +240,11 @@ void test1() {

  res_vd = vec_andc(vbll, vd);
 // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1)
 // CHECK: and <2 x i64>
 // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
 // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64>
 // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

@ -254,11 +254,11 @@ void test1() {

  res_vd = vec_andc(vd, vbll);
 // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1)
 // CHECK: and <2 x i64>
 // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>
 // CHECK-LE: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64>
 // CHECK-LE: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

@ -267,7 +267,7 @@ void test1() {

  res_vd = vec_andc(vd, vd);
 // CHECK: bitcast <2 x double> %{{[0-9]*}} to <2 x i64>
-// CHECK: xor <2 x i64> %{{[0-9]*}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]*}}, splat (i64 -1)
 // CHECK: and <2 x i64>
 // CHECK: bitcast <2 x i64> %{{[0-9]*}} to <2 x double>

@ -927,10 +927,10 @@ void test1() {
  res_vd = vec_nor(vd, vd);
 // CHECK: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
 // CHECK: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
-// CHECK-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>
+// CHECK-NEXT: xor <2 x i64> [[OR]], splat (i64 -1)
 // CHECK-LE: bitcast <2 x double> %{{[0-9]+}} to <2 x i64>
 // CHECK-LE: [[OR:%.+]] = or <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
-// CHECK-LE-NEXT: xor <2 x i64> [[OR]], <i64 -1, i64 -1>
+// CHECK-LE-NEXT: xor <2 x i64> [[OR]], splat (i64 -1)

  /* vec_or */
  res_vsll = vec_or(vsll, vsll);
@ -1037,12 +1037,12 @@ void test1() {
 // CHECK-LE: call void @dummy()

  res_vd = vec_sel(vd, vd, vbll);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
 // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>
@ -1053,73 +1053,73 @@ void test1() {
 // CHECK-LE: call void @dummy()

  res_vd = vec_sel(vd, vd, vull);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
 // CHECK: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>
 // CHECK-LE: bitcast <2 x i64> %{{[0-9]+}} to <2 x double>

  res_vbll = vec_sel(vbll, vbll, vbll);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>

  res_vbll = vec_sel(vbll, vbll, vull);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>

  res_vsll = vec_sel(vsll, vsll, vbll);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>

  res_vsll = vec_sel(vsll, vsll, vull);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>

  res_vull = vec_sel(vull, vull, vbll);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>

  res_vull = vec_sel(vull, vull, vull);
-// CHECK: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK: and <2 x i64> %{{[0-9]+}},
 // CHECK: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK: or <2 x i64>
-// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, <i64 -1, i64 -1>
+// CHECK-LE: xor <2 x i64> %{{[0-9]+}}, splat (i64 -1)
 // CHECK-LE: and <2 x i64> %{{[0-9]+}},
 // CHECK-LE: and <2 x i64> %{{[0-9]+}}, %{{[0-9]+}}
 // CHECK-LE: or <2 x i64>
@ -1282,16 +1282,16 @@ void test1() {
 // CHECK-LE: fptosi <2 x double> %{{.*}} to <2 x i64>

  res_vsll = vec_ctsl(vf, 3);
-  // CHECK: fmul <4 x float> {{%.*}}, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  // CHECK: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00)
  // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcvspsxds(<4 x float>
-  // CHECK-LE: fmul <4 x float> {{%.*}}, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  // CHECK-LE: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00)
  // CHECK-LE:  shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
  // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcvspsxds(<4 x float>

  res_vsll = vec_ctsl(vd, 3);
-  // CHECK: fmul <2 x double> {{%.*}}, <double 8.000000e+00, double 8.000000e+00>
+  // CHECK: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00)
  // CHECK: fptosi <2 x double> {{%.*}} to <2 x i64>
-  // CHECK-LE: fmul <2 x double> {{%.*}}, <double 8.000000e+00, double 8.000000e+00>
+  // CHECK-LE: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00)
  // CHECK-LE: fptosi <2 x double> {{%.*}} to <2 x i64>

  res_vull = vec_ctu(vd, 0);
@ -1307,16 +1307,16 @@ void test1() {
 // CHECK-LE: fptoui <2 x double> %{{.*}} to <2 x i64>

  res_vull = vec_ctul(vf, 3);
-  // CHECK: fmul <4 x float> {{%.*}}, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  // CHECK: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00)
  // CHECK: call <2 x i64> @llvm.ppc.vsx.xvcvspuxds(<4 x float>
-  // CHECK-LE: fmul <4 x float> {{%.*}}, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  // CHECK-LE: fmul <4 x float> {{%.*}}, splat (float 8.000000e+00)
  // CHECK-LE:  shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
  // CHECK-LE: call <2 x i64> @llvm.ppc.vsx.xvcvspuxds(<4 x float>

  res_vull = vec_ctul(vd, 3);
-  // CHECK: fmul <2 x double> {{%.*}}, <double 8.000000e+00, double 8.000000e+00>
+  // CHECK: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00)
  // CHECK: fptoui <2 x double> {{%.*}} to <2 x i64>
-  // CHECK-LE: fmul <2 x double> {{%.*}}, <double 8.000000e+00, double 8.000000e+00>
+  // CHECK-LE: fmul <2 x double> {{%.*}}, splat (double 8.000000e+00)
  // CHECK-LE: fptoui <2 x double> {{%.*}} to <2 x i64>

  res_vf = vec_ctf(vsll, 0);
@ -1345,29 +1345,29 @@ void test1() {

  res_vd = vec_ctd(vsll, 2);
 // CHECK: sitofp <2 x i64> %{{.*}} to <2 x double>
-// CHECK: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01)
 // CHECK-LE: sitofp <2 x i64> %{{.*}} to <2 x double>
-// CHECK-LE: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01)

  res_vd = vec_ctd(vull, 2);
 // CHECK: uitofp <2 x i64> %{{.*}} to <2 x double>
-// CHECK: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01)
 // CHECK-LE: uitofp <2 x i64> %{{.*}} to <2 x double>
-// CHECK-LE: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01)

  res_vd = vec_ctd(vsi, 2);
 // CHECK: call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32>
-// CHECK: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01)
 // CHECK-LE: vperm
 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32>
-// CHECK-LE: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01)

  res_vd = vec_ctd(vui, 2);
 // CHECK: call <2 x double> @llvm.ppc.vsx.xvcvuxwdp(<4 x i32>
-// CHECK: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK: fmul <2 x double> {{.*}} splat (double 2.500000e-01)
 // CHECK-LE: vperm
 // CHECK-LE: call <2 x double> @llvm.ppc.vsx.xvcvuxwdp(<4 x i32>
-// CHECK-LE: fmul <2 x double> {{.*}} <double 2.500000e-01, double 2.500000e-01>
+// CHECK-LE: fmul <2 x double> {{.*}} splat (double 2.500000e-01)

  res_vsll = vec_signed(vd);
 // CHECK: fptosi <2 x double>
@ -1781,7 +1781,7 @@ void test1() {
 // CHECK: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -1789,7 +1789,7 @@ void test1() {
 // CHECK: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -1797,7 +1797,7 @@ void test1() {
 // CHECK: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -1805,7 +1805,7 @@ void test1() {
 // CHECK: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
 // CHECK: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})
 // CHECK-LE: store <16 x i8> <i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8>, ptr {{%.+}}, align 16
-// CHECK-LE: store <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, ptr {{%.+}}, align 16
+// CHECK-LE: store <16 x i8> splat (i8 -1), ptr {{%.+}}, align 16
 // CHECK-LE: xor <16 x i8>
 // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32> {{%.+}}, <4 x i32> {{%.+}}, <16 x i8> {{%.+}})

@ -2352,10 +2352,10 @@ vector double test_recipdivd(vector double a, vector double b) {
 vector double test_rsqrtd(vector double a, vector double b) {
  // CHECK-LABEL: test_rsqrtd
  // CHECK: call fast <2 x double> @llvm.sqrt.v2f64
-  // CHECK: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>
+  // CHECK: fdiv fast <2 x double> splat (double 1.000000e+00),
  // CHECK-LE-LABEL: test_rsqrtd
  // CHECK-LE: call fast <2 x double> @llvm.sqrt.v2f64
-  // CHECK-LE: fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>
+  // CHECK-LE: fdiv fast <2 x double> splat (double 1.000000e+00)
  return vec_rsqrt(a);
 }

@ -2448,7 +2448,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK: and <4 x i32>
  // CHECK: or <4 x i32>
  // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1)
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
@ -2456,7 +2456,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK-LE: and <4 x i32>
  // CHECK-LE: or <4 x i32>
  // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1)
  res_vbll = vec_cmpge(vull, vull);
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
@ -2464,14 +2464,14 @@ void test_p8overloads_backwards_compat() {
  // CHECK: and <4 x i32>
  // CHECK: or <4 x i32>
  // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1)
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
  // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
  // CHECK-LE: and <4 x i32>
  // CHECK-LE: or <4 x i32>
  // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1)
  dummy();
  // CHECK: call void @dummy()
  // CHECK-LE: call void @dummy()
@ -2516,7 +2516,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK: and <4 x i32>
  // CHECK: or <4 x i32>
  // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1)
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtsw
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
@ -2524,7 +2524,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK-LE: and <4 x i32>
  // CHECK-LE: or <4 x i32>
  // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1)
  res_vbll = vec_cmple(vull, vull);
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vcmpequw
@ -2532,20 +2532,20 @@ void test_p8overloads_backwards_compat() {
  // CHECK: and <4 x i32>
  // CHECK: or <4 x i32>
  // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  // CHECK: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> {{%.*}}, splat (i64 -1)
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpgtuw
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vcmpequw
  // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
  // CHECK-LE: and <4 x i32>
  // CHECK-LE: or <4 x i32>
  // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  // CHECK-LE: xor <2 x i64> {{%.*}}, <i64 -1, i64 -1>
+  // CHECK-LE: xor <2 x i64> {{%.*}}, splat (i64 -1)
  dummy();
  // CHECK: call void @dummy()
  // CHECK-LE: call void @dummy()

  res_vsll = vec_sl(vsll, vull);
-  // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
@ -2553,7 +2553,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 3>
-  // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
@ -2562,7 +2562,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 0, i32 2>
  res_vull = vec_sl(vull, vull);
-  // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
@ -2570,7 +2570,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vslo
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 3>
-  // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vslo
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsl
  // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
@ -2583,7 +2583,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK-LE: call void @dummy()

  res_vsll = vec_sr(vsll, vull);
-  // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
@ -2591,7 +2591,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 0, i32 2>
-  // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
@ -2600,7 +2600,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
  // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 3>
  res_vull = vec_sr(vull, vull);
-  // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
@ -2608,7 +2608,7 @@ void test_p8overloads_backwards_compat() {
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsro
  // CHECK: call <4 x i32> @llvm.ppc.altivec.vsr
  // CHECK: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 0, i32 2>
-  // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK-LE: shufflevector <2 x i64> {{%.*}}, <2 x i64> {{%.*}}, <2 x i32> <i32 1, i32 0>
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsro
  // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vsr
@ -2621,14 +2621,14 @@ void test_p8overloads_backwards_compat() {
  // CHECK-LE: call void @dummy()

  res_vsll = vec_sra(vsll, vull);
-  // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK: ashr <2 x i64>
-  // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK-LE: ashr <2 x i64>
  res_vull = vec_sra(vull, vull);
-  // CHECK: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK: ashr <2 x i64>
-  // CHECK-LE: urem <2 x i64> {{%.*}}, <i64 64, i64 64>
+  // CHECK-LE: urem <2 x i64> {{%.*}}, splat (i64 64)
  // CHECK-LE: ashr <2 x i64>

  /* ----------------------- predicates --------------------------- */
--- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c
+++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c
@ -27,32 +27,32 @@ void test() {
  res_vf = vec_ctf(vsll, 4);
 // CHECK:         [[TMP0:%.*]] = load <2 x i64>, ptr @vsll, align 16
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvsxdsp(<2 x i64> [[TMP0]])
-// CHECK-NEXT:    fmul <4 x float> [[TMP1]], <float 6.250000e-02, float 6.250000e-02, float 6.250000e-02, float 6.250000e-02>
+// CHECK-NEXT:    fmul <4 x float> [[TMP1]], splat (float 6.250000e-02)
 // NOCOMPAT:      [[TMP0:%.*]] = load <2 x i64>, ptr @vsll, align 16
 // NOCOMPAT-NEXT: [[CONV:%.*]] = sitofp <2 x i64> [[TMP0]] to <2 x double>
-// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], <double 6.250000e-02, double 6.250000e-02>
+// NOCOMPAT-NEXT: fmul <2 x double> [[CONV]], splat (double 6.250000e-02)

  res_vf = vec_ctf(vull, 4);
 // CHECK:         [[TMP2:%.*]] = load <2 x i64>, ptr @vull, align 16
 // CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.ppc.vsx.xvcvuxdsp(<2 x i64> [[TMP2]])
-// CHECK-NEXT:    fmul <4 x float> [[TMP3]], <float 6.250000e-02, float 6.250000e-02, float 6.250000e-02, float 6.250000e-02>
+// CHECK-NEXT:    fmul <4 x float> [[TMP3]], splat (float 6.250000e-02)
 // NOCOMPAT:      [[TMP2:%.*]] = load <2 x i64>, ptr @vull, align 16
 // NOCOMPAT-NEXT: [[CONV1:%.*]] = uitofp <2 x i64> [[TMP2]] to <2 x double>
-// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], <double 6.250000e-02, double 6.250000e-02>
+// NOCOMPAT-NEXT: fmul <2 x double> [[CONV1]], splat (double 6.250000e-02)

  res_vsll = vec_cts(vd, 4);
 // CHECK:         [[TMP4:%.*]] = load <2 x double>, ptr @vd, align 16
-// CHECK-NEXT:    fmul <2 x double> [[TMP4]], <double 1.600000e+01, double 1.600000e+01>
+// CHECK-NEXT:    fmul <2 x double> [[TMP4]], splat (double 1.600000e+01)
 // CHECK:         call <4 x i32> @llvm.ppc.vsx.xvcvdpsxws(<2 x double>
 // NOCOMPAT:      [[TMP4:%.*]] = load <2 x double>, ptr @vd, align 16
-// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], <double 1.600000e+01, double 1.600000e+01>
+// NOCOMPAT-NEXT: fmul <2 x double> [[TMP4]], splat (double 1.600000e+01)

  res_vull = vec_ctu(vd, 4);
 // CHECK:         [[TMP8:%.*]] = load <2 x double>, ptr @vd, align 16
-// CHECK-NEXT:    fmul <2 x double> [[TMP8]], <double 1.600000e+01, double 1.600000e+01>
+// CHECK-NEXT:    fmul <2 x double> [[TMP8]], splat (double 1.600000e+01)
 // CHECK:         call <4 x i32> @llvm.ppc.vsx.xvcvdpuxws(<2 x double>
 // NOCOMPAT:      [[TMP7:%.*]] = load <2 x double>, ptr @vd, align 16
-// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], <double 1.600000e+01, double 1.600000e+01>
+// NOCOMPAT-NEXT: fmul <2 x double> [[TMP7]], splat (double 1.600000e+01)

  res_vd = vec_round(vd);
 // CHECK:         call double @llvm.ppc.readflm()
--- a/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
@ -438,7 +438,7 @@ test_converts() {
 // CHECK-LABEL: define available_externally <2 x double> @_mm_cvtepi32_pd
 // CHECK: call <2 x i64> @vec_unpackh(int vector[4])
 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z_.]+}} to <2 x double>
-// CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
+// CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00)

 // CHECK-LABEL: define available_externally <4 x float> @_mm_cvtepi32_ps
 // CHECK: call <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32> %{{[0-9a-zA-Z_.]+}}, i32 0)
@ -466,7 +466,7 @@ test_converts() {
 // CHECK: call <2 x i64> @vec_splats(unsigned long long)
 // CHECK: call <2 x i64> @vec_unpackl(int vector[4])
 // CHECK: %[[CONV:[0-9a-zA-Z_.]+]] = sitofp <2 x i64> %{{[0-9a-zA-Z._]+}} to <2 x double>
-// CHECK: fmul <2 x double> %[[CONV]], <double 1.000000e+00, double 1.000000e+00>
+// CHECK: fmul <2 x double> %[[CONV]], splat (double 1.000000e+00)

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_cvtps_epi32
 // CHECK: call <4 x float> @vec_rint(float vector[4])
@ -1084,23 +1084,23 @@ test_sll() {
 // CHECK-LABEL: @test_sll

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi16
-// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)
-// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15))
 // CHECK: call <8 x i16> @vec_sl(unsigned short vector[8], unsigned short vector[8])
 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi32
 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
-// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
+// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef {{[0-9a-zA-Z_%.]+}}, <4 x i32> noundef splat (i32 32))
 // CHECK: call <4 x i32> @vec_sl(unsigned int vector[4], unsigned int vector[4])
 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sll_epi64
 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, i32 noundef zeroext 0)
-// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef <i64 64, i64 64>)
+// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef {{[0-9a-zA-Z_%.]+}}, <2 x i64> noundef splat (i64 64))
 // CHECK: call <2 x i64> @vec_sl(unsigned long long vector[2], unsigned long long vector[2])
 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2])

@ -1179,21 +1179,21 @@ test_sra() {
 // CHECK-LABEL: @test_sra

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi16
-// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
-// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+// CHECK: call <8 x i16> @vec_min(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15))
 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_sra_epi32
-// CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
-// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 31, i32 31, i32 31, i32 31>)
+// CHECK: call <4 x i32> @vec_min(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 31))
 // CHECK: call <4 x i32> @vec_sra(int vector[4], unsigned int vector[4])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi16
-// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 16
 // CHECK: br i1 %[[CMP]]
 // CHECK: call i1 @llvm.is.constant
@ -1204,7 +1204,7 @@ test_sra() {
 // CHECK: call <8 x i16> @vec_sra(short vector[8], unsigned short vector[8])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srai_epi32
-// CHECK: store <4 x i32> <i32 31, i32 31, i32 31, i32 31>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <4 x i32> splat (i32 31), ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK: %[[CMP:[0-9a-zA-Z_.]+]] = icmp slt i32 %{{[0-9a-zA-Z_.]+}}, 32
 // CHECK: br i1 %[[CMP]]
 // CHECK: call i1 @llvm.is.constant
@ -1230,23 +1230,23 @@ test_srl() {
 // CHECK-LABEL: @test_srl

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi16
-// CHECK: store <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>, ptr %{{[0-9a-zA-Z_.]+}}, align 16
+// CHECK: store <8 x i16> splat (i16 15), ptr %{{[0-9a-zA-Z_.]+}}, align 16
 // CHECK-LE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
 // CHECK-BE: call <8 x i16> @vec_splat(unsigned short vector[8], unsigned int)(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 3)
-// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+// CHECK: call <8 x i16> @vec_cmple(unsigned short vector[8], unsigned short vector[8])(<8 x i16> noundef %{{[0-9a-zA-Z_.]+}}, <8 x i16> noundef splat (i16 15))
 // CHECK: call <8 x i16> @vec_sr(unsigned short vector[8], unsigned short vector[8])
 // CHECK: call <8 x i16> @vec_sel(unsigned short vector[8], unsigned short vector[8], bool vector[8])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi32
 // CHECK-LE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
 // CHECK-BE: call <4 x i32> @vec_splat(unsigned int vector[4], unsigned int)(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 1)
-// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 32, i32 32, i32 32, i32 32>)
+// CHECK: call <4 x i32> @vec_cmplt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 32))
 // CHECK: call <4 x i32> @vec_sr(unsigned int vector[4], unsigned int vector[4])
 // CHECK: call <4 x i32> @vec_sel(unsigned int vector[4], unsigned int vector[4], bool vector[4])

 // CHECK-LABEL: define available_externally <2 x i64> @_mm_srl_epi64
 // CHECK: call <2 x i64> @vec_splat(unsigned long long vector[2], unsigned int)(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, i32 noundef zeroext 0)
-// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef <i64 64, i64 64>)
+// CHECK: call <2 x i64> @vec_cmplt(unsigned long long vector[2], unsigned long long vector[2])(<2 x i64> noundef %{{[0-9a-zA-Z_.]+}}, <2 x i64> noundef splat (i64 64))
 // CHECK: call <2 x i64> @vec_sr(unsigned long long vector[2], unsigned long long vector[2])
 // CHECK: call <2 x i64> @vec_sel(unsigned long long vector[2], unsigned long long vector[2], bool vector[2])

--- a/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
+++ b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
@ -218,30 +218,30 @@ test_cmp() {
 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ps
 // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <4 x float> @vec_abs(float vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}})
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])

 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpord_ss
 // CHECK: call <4 x float> @vec_abs(float vector[4])
 // CHECK: call <4 x float> @vec_abs(float vector[4])
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef splat (i32 2139095040), <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <4 x i32> @vec_and(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef %{{[0-9a-zA-Z_.]+}})
 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ps
 // CHECK: call <4 x float> @vec_abs(float vector[4])
 // CHECK: call <4 x float> @vec_abs(float vector[4])
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040))
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040))
 // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])

 // CHECK-LABEL: define available_externally <4 x float> @_mm_cmpunord_ss
 // CHECK: call <4 x float> @vec_abs(float vector[4])
 // CHECK: call <4 x float> @vec_abs(float vector[4])
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
-// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>)
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040))
+// CHECK: call <4 x i32> @vec_cmpgt(unsigned int vector[4], unsigned int vector[4])(<4 x i32> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef splat (i32 2139095040))
 // CHECK: call <4 x i32> @vec_or(unsigned int vector[4], unsigned int vector[4])
 // CHECK: call <4 x float> @vec_sel(float vector[4], float vector[4], unsigned int vector[4])(<4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x float> noundef %{{[0-9a-zA-Z_.]+}}, <4 x i32> noundef <i32 -1, i32 0, i32 0, i32 0>)

--- a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c
+++ b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c
@ -51,7 +51,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) {
  vbi8_1 = (vector bool char)'a';
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned char'
  // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned char' (vector of 16 'unsigned char' values) and integer type 'int' of different size
-  // XL: <i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97>
+  // XL: splat (i8 97)
  char c = 'c';
  vbi8_2 = (vector bool char)c;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned char'
@ -64,7 +64,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) {
  vbi16_1 = (vector bool short)5;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned short'
  // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned short' (vector of 8 'unsigned short' values) and integer type 'int' of different size
-  // XL: <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  // XL: splat (i16 5)
  short si16 = 55;
  vbi16_2 = (vector bool short)si16;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned short'
@ -77,7 +77,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) {
  vbi32_1 = (vector bool int)9;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned int'
  // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned int' (vector of 4 'unsigned int' values) and integer type 'int' of different size
-  // XL: <i32 9, i32 9, i32 9, i32 9>
+  // XL: splat (i32 9)
  int si32 = 99;
  vbi32_2 = (vector bool int)si32;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned int'
@ -90,7 +90,7 @@ void test_vector_bool_pixel_init_no_parentheses(void) {
  vbi64_1 = (vector bool long long)13;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long'
  // GCC-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long' (vector of 2 'unsigned long long' values) and integer type 'int' of different size
-  // XL: <i64 13, i64 13>
+  // XL: splat (i64 13)
  long long si64 = 1313;
  vbi64_2 = (vector bool long long)si64;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __bool unsigned long long'
@ -103,5 +103,5 @@ void test_vector_bool_pixel_init_no_parentheses(void) {
  p1 = (vector pixel)1;
  // MIXED-ERR: error: invalid conversion between vector type '__vector __pixel '
  // GCC-ERR: error: invalid conversion between vector type '__vector __pixel ' (vector of 8 'unsigned short' values) and integer type 'int' of different size
-  // XL: <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  // XL: splat (i16 1)
 }
--- a/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c
+++ b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c
@ -50,7 +50,7 @@ void test_vector_bool_pixel_init(void) {
  // vector bool char initialization
  vbi8_1 = (vector bool char)('a');
  // MIXED: <i8 97, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
-  // XL: <i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97, i8 97>
+  // XL: splat (i8 97)
  // GCC: error: invalid conversion between vector type '__vector __bool unsigned char' (vector of 16 'unsigned char' values) and integer type 'unsigned char' of different size
  char c = 'c';
  vbi8_2 = (vector bool char)(c);
@ -64,7 +64,7 @@ void test_vector_bool_pixel_init(void) {
  // vector bool short initialization
  vbi16_1 = (vector bool short)(5);
  // MIXED: <i16 5, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-  // XL: <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+  // XL: splat (i16 5)
  // GCC: error: invalid conversion between vector type '__vector __bool unsigned short' (vector of 8 'unsigned short' values) and integer type 'unsigned short' of different size
  short si16 = 55;
  vbi16_2 = (vector bool short)(si16);
@ -78,7 +78,7 @@ void test_vector_bool_pixel_init(void) {
  // vector bool int initialization
  vbi32_1 = (vector bool int)(9);
  // MIXED: <i32 9, i32 0, i32 0, i32 0>
-  // XL: <i32 9, i32 9, i32 9, i32 9>
+  // XL: splat (i32 9)
  // GCC: error: invalid conversion between vector type '__vector __bool unsigned int' (vector of 4 'unsigned int' values) and integer type 'unsigned int' of different size
  int si32 = 99;
  vbi32_2 = (vector bool int)(si32);
@ -92,7 +92,7 @@ void test_vector_bool_pixel_init(void) {
  // vector bool long long initialization
  vbi64_1 = (vector bool long long)(13);
  // MIXED: <i64 13, i64 0>
-  // XL: <i64 13, i64 13>
+  // XL: splat (i64 13)
  // GCC: error: invalid conversion between vector type '__vector __bool unsigned long long' (vector of 2 'unsigned long long' values) and integer type 'unsigned long long' of different size
  long long si64 = 1313;
  vbi64_2 = (vector bool long long)(si64);
@ -106,6 +106,6 @@ void test_vector_bool_pixel_init(void) {
  // vector pixel initialization
  p1 = (vector pixel)(1);
  // MIXED: <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-  // XL: <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  // XL: splat (i16 1)
  // GCC: error: invalid conversion between vector type '__vector __pixel ' (vector of 8 'unsigned short' values) and integer type 'unsigned short' of different size
 }
--- a/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c
+++ b/clang/test/CodeGen/RISCV/rvv-vls-bitwise-ops.c
@ -331,7 +331,7 @@ fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
 // CHECK-LABEL: @not_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
@ -342,7 +342,7 @@ fixed_int8m1_t not_i8(fixed_int8m1_t a) {
 // CHECK-LABEL: @not_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
@ -353,7 +353,7 @@ fixed_int16m1_t not_i16(fixed_int16m1_t a) {
 // CHECK-LABEL: @not_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
@ -364,7 +364,7 @@ fixed_int32m1_t not_i32(fixed_int32m1_t a) {
 // CHECK-LABEL: @not_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
@ -375,7 +375,7 @@ fixed_int64m1_t not_i64(fixed_int64m1_t a) {
 // CHECK-LABEL: @not_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], splat (i8 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i8> [[CAST_SCALABLE]]
 //
@ -386,7 +386,7 @@ fixed_uint8m1_t not_u8(fixed_uint8m1_t a) {
 // CHECK-LABEL: @not_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], splat (i16 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i16> [[CAST_SCALABLE]]
 //
@ -397,7 +397,7 @@ fixed_uint16m1_t not_u16(fixed_uint16m1_t a) {
 // CHECK-LABEL: @not_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], splat (i32 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i32> [[CAST_SCALABLE]]
 //
@ -408,7 +408,7 @@ fixed_uint32m1_t not_u32(fixed_uint32m1_t a) {
 // CHECK-LABEL: @not_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], splat (i64 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 1 x i64> [[CAST_SCALABLE]]
 //
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-constrained.c
@ -238,19 +238,19 @@ void test_float(void) {
  // (emulated)
  vd = vec_ctd(vsl, 1);
  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}})
-  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> <double 5.000000e-01, double 5.000000e-01>, metadata !{{.*}})
+  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 5.000000e-01), metadata !{{.*}})
  // (emulated)
  vd = vec_ctd(vul, 1);
  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}})
-  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> <double 5.000000e-01, double 5.000000e-01>, metadata !{{.*}})
+  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 5.000000e-01), metadata !{{.*}})
  // (emulated)
  vd = vec_ctd(vsl, 31);
  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}})
-  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> <double 0x3E00000000000000, double 0x3E00000000000000>, metadata !{{.*}})
+  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 0x3E00000000000000), metadata !{{.*}})
  // (emulated)
  vd = vec_ctd(vul, 31);
  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %{{.*}}, metadata !{{.*}})
-  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> <double 0x3E00000000000000, double 0x3E00000000000000>, metadata !{{.*}})
+  // CHECK: call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> [[VAL]], <2 x double> splat (double 0x3E00000000000000), metadata !{{.*}})
  // (emulated)

  vsl = vec_ctsl(vd, 0);
@ -260,19 +260,19 @@ void test_float(void) {
  // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> %{{.*}}, metadata !{{.*}})
  // (emulated)
  vsl = vec_ctsl(vd, 1);
-  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> <double 2.000000e+00, double 2.000000e+00>, metadata !{{.*}})
+  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> {{.*}}, <2 x double> splat (double 2.000000e+00), metadata !{{.*}})
  // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}})
  // (emulated)
  vul = vec_ctul(vd, 1);
-  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> <double 2.000000e+00, double 2.000000e+00>, metadata !{{.*}})
+  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 2.000000e+00), metadata !{{.*}})
  // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}})
  // (emulated)
  vsl = vec_ctsl(vd, 31);
-  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> <double 0x41E0000000000000, double 0x41E0000000000000>, metadata !{{.*}})
+  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 0x41E0000000000000), metadata !{{.*}})
  // CHECK: call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}})
  // (emulated)
  vul = vec_ctul(vd, 31);
-  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> <double 0x41E0000000000000, double 0x41E0000000000000>, metadata !{{.*}})
+  // CHECK: [[VAL:%[^ ]+]] = tail call <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double> %{{.*}}, <2 x double> splat (double 0x41E0000000000000), metadata !{{.*}})
  // CHECK: call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double> [[VAL]], metadata !{{.*}})
  // (emulated)

--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@ -701,32 +701,32 @@ void test_core(void) {
  vuc = vec_genmask(0x8000);
  // CHECK: <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
  vuc = vec_genmask(0xffff);
-  // CHECK: <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  // CHECK: <16 x i8> splat (i8 -1)

  vuc = vec_genmasks_8(0, 7);
-  // CHECK: <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  // CHECK: <16 x i8> splat (i8 -1)
  vuc = vec_genmasks_8(1, 4);
-  // CHECK: <16 x i8> <i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120, i8 120>
+  // CHECK: <16 x i8> splat (i8 120)
  vuc = vec_genmasks_8(6, 2);
-  // CHECK: <16 x i8> <i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29, i8 -29>
+  // CHECK: <16 x i8> splat (i8 -29)
  vus = vec_genmasks_16(0, 15);
-  // CHECK: <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+  // CHECK: <8 x i16> splat (i16 -1)
  vus = vec_genmasks_16(2, 11);
-  // CHECK: <8 x i16> <i16 16368, i16 16368, i16 16368, i16 16368, i16 16368, i16 16368, i16 16368, i16 16368>
+  // CHECK: <8 x i16> splat (i16 16368)
  vus = vec_genmasks_16(9, 2);
-  // CHECK:  <8 x i16> <i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065, i16 -8065>
+  // CHECK:  <8 x i16> splat (i16 -8065)
  vui = vec_genmasks_32(0, 31);
-  // CHECK: <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: <4 x i32> splat (i32 -1)
  vui = vec_genmasks_32(7, 20);
-  // CHECK: <4 x i32> <i32 33552384, i32 33552384, i32 33552384, i32 33552384>
+  // CHECK: <4 x i32> splat (i32 33552384)
  vui = vec_genmasks_32(25, 4);
-  // CHECK: <4 x i32> <i32 -134217601, i32 -134217601, i32 -134217601, i32 -134217601>
+  // CHECK: <4 x i32> splat (i32 -134217601)
  vul = vec_genmasks_64(0, 63);
-  // CHECK: <2 x i64> <i64 -1, i64 -1>
+  // CHECK: <2 x i64> splat (i64 -1)
  vul = vec_genmasks_64(3, 40);
-  // CHECK: <2 x i64> <i64 2305843009205305344, i64 2305843009205305344>
+  // CHECK: <2 x i64> splat (i64 2305843009205305344)
  vul = vec_genmasks_64(30, 11);
-  // CHECK: <2 x i64> <i64 -4503582447501313, i64 -4503582447501313>
+  // CHECK: <2 x i64> splat (i64 -4503582447501313)

  vsc = vec_splat(vsc, 0);
  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
@ -808,37 +808,37 @@ void test_core(void) {
  // CHECK-ASM: vrepg

  vsc = vec_splat_s8(-128);
-  // CHECK: <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+  // CHECK: <16 x i8> splat (i8 -128)
  vsc = vec_splat_s8(127);
-  // CHECK: <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>
+  // CHECK: <16 x i8> splat (i8 127)
  vuc = vec_splat_u8(1);
-  // CHECK: <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  // CHECK: <16 x i8> splat (i8 1)
  vuc = vec_splat_u8(254);
-  // CHECK: <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>
+  // CHECK: <16 x i8> splat (i8 -2)
  vss = vec_splat_s16(-32768);
-  // CHECK: <8 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+  // CHECK: <8 x i16> splat (i16 -32768)
  vss = vec_splat_s16(32767);
-  // CHECK: <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>
+  // CHECK: <8 x i16> splat (i16 32767)
  vus = vec_splat_u16(1);
-  // CHECK: <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  // CHECK: <8 x i16> splat (i16 1)
  vus = vec_splat_u16(65534);
-  // CHECK: <8 x i16> <i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2>
+  // CHECK: <8 x i16> splat (i16 -2)
  vsi = vec_splat_s32(-32768);
-  // CHECK: <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+  // CHECK: <4 x i32> splat (i32 -32768)
  vsi = vec_splat_s32(32767);
-  // CHECK: <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+  // CHECK: <4 x i32> splat (i32 32767)
  vui = vec_splat_u32(-32768);
-  // CHECK: <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
+  // CHECK: <4 x i32> splat (i32 -32768)
  vui = vec_splat_u32(32767);
-  // CHECK: <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
+  // CHECK: <4 x i32> splat (i32 32767)
  vsl = vec_splat_s64(-32768);
-  // CHECK: <2 x i64> <i64 -32768, i64 -32768>
+  // CHECK: <2 x i64> splat (i64 -32768)
  vsl = vec_splat_s64(32767);
-  // CHECK: <2 x i64> <i64 32767, i64 32767>
+  // CHECK: <2 x i64> splat (i64 32767)
  vul = vec_splat_u64(-32768);
-  // CHECK: <2 x i64> <i64 -32768, i64 -32768>
+  // CHECK: <2 x i64> splat (i64 -32768)
  vul = vec_splat_u64(32767);
-  // CHECK: <2 x i64> <i64 32767, i64 32767>
+  // CHECK: <2 x i64> splat (i64 32767)

  vsc = vec_splats(sc);
  // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> poison, <16 x i32> zeroinitializer
@ -4471,19 +4471,19 @@ void test_float(void) {
  // (emulated)
  vd = vec_ctd(vsl, 1);
  // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double>
-  // CHECK: fmul <2 x double> [[VAL]], <double 5.000000e-01, double 5.000000e-01>
+  // CHECK: fmul <2 x double> [[VAL]], splat (double 5.000000e-01)
  // (emulated)
  vd = vec_ctd(vul, 1);
  // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double>
-  // CHECK: fmul <2 x double> [[VAL]], <double 5.000000e-01, double 5.000000e-01>
+  // CHECK: fmul <2 x double> [[VAL]], splat (double 5.000000e-01)
  // (emulated)
  vd = vec_ctd(vsl, 31);
  // CHECK: [[VAL:%[^ ]+]] = sitofp <2 x i64> %{{.*}} to <2 x double>
-  // CHECK: fmul <2 x double> [[VAL]], <double 0x3E00000000000000, double 0x3E00000000000000>
+  // CHECK: fmul <2 x double> [[VAL]], splat (double 0x3E00000000000000)
  // (emulated)
  vd = vec_ctd(vul, 31);
  // CHECK: [[VAL:%[^ ]+]] = uitofp <2 x i64> %{{.*}} to <2 x double>
-  // CHECK: fmul <2 x double> [[VAL]], <double 0x3E00000000000000, double 0x3E00000000000000>
+  // CHECK: fmul <2 x double> [[VAL]], splat (double 0x3E00000000000000)
  // (emulated)

  vsl = vec_ctsl(vd, 0);
@ -4493,19 +4493,19 @@ void test_float(void) {
  // CHECK: fptoui <2 x double> %{{.*}} to <2 x i64>
  // (emulated)
  vsl = vec_ctsl(vd, 1);
-  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 2.000000e+00, double 2.000000e+00>
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 2.000000e+00)
  // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64>
  // (emulated)
  vul = vec_ctul(vd, 1);
-  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 2.000000e+00, double 2.000000e+00>
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 2.000000e+00)
  // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64>
  // (emulated)
  vsl = vec_ctsl(vd, 31);
-  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 0x41E0000000000000, double 0x41E0000000000000>
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 0x41E0000000000000)
  // CHECK: fptosi <2 x double> [[VAL]] to <2 x i64>
  // (emulated)
  vul = vec_ctul(vd, 31);
-  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, <double 0x41E0000000000000, double 0x41E0000000000000>
+  // CHECK: [[VAL:%[^ ]+]] = fmul <2 x double> %{{.*}}, splat (double 0x41E0000000000000)
  // CHECK: fptoui <2 x double> [[VAL]] to <2 x i64>
  // (emulated)

--- a/clang/test/CodeGen/SystemZ/zvector.c
+++ b/clang/test/CodeGen/SystemZ/zvector.c
@ -124,31 +124,31 @@ void test_neg(void) {

 // CHECK-LABEL: define{{.*}} void @test_preinc() #0 {
 // CHECK:   [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
-// CHECK:   [[INC:%.*]] = add <16 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+// CHECK:   [[INC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 1)
 // CHECK:   store volatile <16 x i8> [[INC]], ptr @sc2, align 8
 // CHECK:   [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8
-// CHECK:   [[INC1:%.*]] = add <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+// CHECK:   [[INC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 1)
 // CHECK:   store volatile <16 x i8> [[INC1]], ptr @uc2, align 8
 // CHECK:   [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
-// CHECK:   [[INC2:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+// CHECK:   [[INC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 1)
 // CHECK:   store volatile <8 x i16> [[INC2]], ptr @ss2, align 8
 // CHECK:   [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8
-// CHECK:   [[INC3:%.*]] = add <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+// CHECK:   [[INC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 1)
 // CHECK:   store volatile <8 x i16> [[INC3]], ptr @us2, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
-// CHECK:   [[INC4:%.*]] = add <4 x i32> [[TMP4]], <i32 1, i32 1, i32 1, i32 1>
+// CHECK:   [[INC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 1)
 // CHECK:   store volatile <4 x i32> [[INC4]], ptr @si2, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8
-// CHECK:   [[INC5:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 1, i32 1>
+// CHECK:   [[INC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1)
 // CHECK:   store volatile <4 x i32> [[INC5]], ptr @ui2, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
-// CHECK:   [[INC6:%.*]] = add <2 x i64> [[TMP6]], <i64 1, i64 1>
+// CHECK:   [[INC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 1)
 // CHECK:   store volatile <2 x i64> [[INC6]], ptr @sl2, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
-// CHECK:   [[INC7:%.*]] = add <2 x i64> [[TMP7]], <i64 1, i64 1>
+// CHECK:   [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1)
 // CHECK:   store volatile <2 x i64> [[INC7]], ptr @ul2, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK:   [[INC8:%.*]] = fadd <2 x double> [[TMP8]], <double 1.000000e+00, double 1.000000e+00>
+// CHECK:   [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00)
 // CHECK:   store volatile <2 x double> [[INC8]], ptr @fd2, align 8
 // CHECK:   ret void
 void test_preinc(void) {
@ -170,31 +170,31 @@ void test_preinc(void) {

 // CHECK-LABEL: define{{.*}} void @test_postinc() #0 {
 // CHECK:   [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
-// CHECK:   [[INC:%.*]] = add <16 x i8> [[TMP0]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+// CHECK:   [[INC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 1)
 // CHECK:   store volatile <16 x i8> [[INC]], ptr @sc2, align 8
 // CHECK:   [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8
-// CHECK:   [[INC1:%.*]] = add <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+// CHECK:   [[INC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 1)
 // CHECK:   store volatile <16 x i8> [[INC1]], ptr @uc2, align 8
 // CHECK:   [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
-// CHECK:   [[INC2:%.*]] = add <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+// CHECK:   [[INC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 1)
 // CHECK:   store volatile <8 x i16> [[INC2]], ptr @ss2, align 8
 // CHECK:   [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8
-// CHECK:   [[INC3:%.*]] = add <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+// CHECK:   [[INC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 1)
 // CHECK:   store volatile <8 x i16> [[INC3]], ptr @us2, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
-// CHECK:   [[INC4:%.*]] = add <4 x i32> [[TMP4]], <i32 1, i32 1, i32 1, i32 1>
+// CHECK:   [[INC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 1)
 // CHECK:   store volatile <4 x i32> [[INC4]], ptr @si2, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8
-// CHECK:   [[INC5:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 1, i32 1>
+// CHECK:   [[INC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 1)
 // CHECK:   store volatile <4 x i32> [[INC5]], ptr @ui2, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
-// CHECK:   [[INC6:%.*]] = add <2 x i64> [[TMP6]], <i64 1, i64 1>
+// CHECK:   [[INC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 1)
 // CHECK:   store volatile <2 x i64> [[INC6]], ptr @sl2, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
-// CHECK:   [[INC7:%.*]] = add <2 x i64> [[TMP7]], <i64 1, i64 1>
+// CHECK:   [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1)
 // CHECK:   store volatile <2 x i64> [[INC7]], ptr @ul2, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK:   [[INC8:%.*]] = fadd <2 x double> [[TMP8]], <double 1.000000e+00, double 1.000000e+00>
+// CHECK:   [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00)
 // CHECK:   store volatile <2 x double> [[INC8]], ptr @fd2, align 8
 // CHECK:   ret void
 void test_postinc(void) {
@ -216,31 +216,31 @@ void test_postinc(void) {

 // CHECK-LABEL: define{{.*}} void @test_predec() #0 {
 // CHECK:   [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
-// CHECK:   [[DEC:%.*]] = add <16 x i8> [[TMP0]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[DEC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[DEC]], ptr @sc2, align 8
 // CHECK:   [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8
-// CHECK:   [[DEC1:%.*]] = add <16 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[DEC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[DEC1]], ptr @uc2, align 8
 // CHECK:   [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
-// CHECK:   [[DEC2:%.*]] = add <8 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[DEC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[DEC2]], ptr @ss2, align 8
 // CHECK:   [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8
-// CHECK:   [[DEC3:%.*]] = add <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[DEC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[DEC3]], ptr @us2, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
-// CHECK:   [[DEC4:%.*]] = add <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[DEC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[DEC4]], ptr @si2, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8
-// CHECK:   [[DEC5:%.*]] = add <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[DEC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[DEC5]], ptr @ui2, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
-// CHECK:   [[DEC6:%.*]] = add <2 x i64> [[TMP6]], <i64 -1, i64 -1>
+// CHECK:   [[DEC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[DEC6]], ptr @sl2, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
-// CHECK:   [[DEC7:%.*]] = add <2 x i64> [[TMP7]], <i64 -1, i64 -1>
+// CHECK:   [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK:   [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], <double -1.000000e+00, double -1.000000e+00>
+// CHECK:   [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00)
 // CHECK:   store volatile <2 x double> [[DEC8]], ptr @fd2, align 8
 // CHECK:   ret void
 void test_predec(void) {
@ -262,31 +262,31 @@ void test_predec(void) {

 // CHECK-LABEL: define{{.*}} void @test_postdec() #0 {
 // CHECK:   [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
-// CHECK:   [[DEC:%.*]] = add <16 x i8> [[TMP0]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[DEC:%.*]] = add <16 x i8> [[TMP0]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[DEC]], ptr @sc2, align 8
 // CHECK:   [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8
-// CHECK:   [[DEC1:%.*]] = add <16 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[DEC1:%.*]] = add <16 x i8> [[TMP1]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[DEC1]], ptr @uc2, align 8
 // CHECK:   [[TMP2:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
-// CHECK:   [[DEC2:%.*]] = add <8 x i16> [[TMP2]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[DEC2:%.*]] = add <8 x i16> [[TMP2]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[DEC2]], ptr @ss2, align 8
 // CHECK:   [[TMP3:%.*]] = load volatile <8 x i16>, ptr @us2, align 8
-// CHECK:   [[DEC3:%.*]] = add <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[DEC3:%.*]] = add <8 x i16> [[TMP3]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[DEC3]], ptr @us2, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
-// CHECK:   [[DEC4:%.*]] = add <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[DEC4:%.*]] = add <4 x i32> [[TMP4]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[DEC4]], ptr @si2, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8
-// CHECK:   [[DEC5:%.*]] = add <4 x i32> [[TMP5]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[DEC5:%.*]] = add <4 x i32> [[TMP5]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[DEC5]], ptr @ui2, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
-// CHECK:   [[DEC6:%.*]] = add <2 x i64> [[TMP6]], <i64 -1, i64 -1>
+// CHECK:   [[DEC6:%.*]] = add <2 x i64> [[TMP6]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[DEC6]], ptr @sl2, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
-// CHECK:   [[DEC7:%.*]] = add <2 x i64> [[TMP7]], <i64 -1, i64 -1>
+// CHECK:   [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK:   [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], <double -1.000000e+00, double -1.000000e+00>
+// CHECK:   [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00)
 // CHECK:   store volatile <2 x double> [[DEC8]], ptr @fd2, align 8
 // CHECK:   ret void
 void test_postdec(void) {
@ -1086,40 +1086,40 @@ void test_rem_assign(void) {

 // CHECK-LABEL: define{{.*}} void @test_not() #0 {
 // CHECK:   [[TMP0:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
-// CHECK:   [[NEG:%.*]] = xor <16 x i8> [[TMP0]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[NEG]], ptr @sc, align 8
 // CHECK:   [[TMP1:%.*]] = load volatile <16 x i8>, ptr @uc2, align 8
-// CHECK:   [[NEG1:%.*]] = xor <16 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG1:%.*]] = xor <16 x i8> [[TMP1]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[NEG1]], ptr @uc, align 8
 // CHECK:   [[TMP2:%.*]] = load volatile <16 x i8>, ptr @bc2, align 8
-// CHECK:   [[NEG2:%.*]] = xor <16 x i8> [[TMP2]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG2:%.*]] = xor <16 x i8> [[TMP2]], splat (i8 -1)
 // CHECK:   store volatile <16 x i8> [[NEG2]], ptr @bc, align 8
 // CHECK:   [[TMP3:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
-// CHECK:   [[NEG3:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG3:%.*]] = xor <8 x i16> [[TMP3]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[NEG3]], ptr @ss, align 8
 // CHECK:   [[TMP4:%.*]] = load volatile <8 x i16>, ptr @us2, align 8
-// CHECK:   [[NEG4:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG4:%.*]] = xor <8 x i16> [[TMP4]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[NEG4]], ptr @us, align 8
 // CHECK:   [[TMP5:%.*]] = load volatile <8 x i16>, ptr @bs2, align 8
-// CHECK:   [[NEG5:%.*]] = xor <8 x i16> [[TMP5]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG5:%.*]] = xor <8 x i16> [[TMP5]], splat (i16 -1)
 // CHECK:   store volatile <8 x i16> [[NEG5]], ptr @bs, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
-// CHECK:   [[NEG6:%.*]] = xor <4 x i32> [[TMP6]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG6:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[NEG6]], ptr @si, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <4 x i32>, ptr @ui2, align 8
-// CHECK:   [[NEG7:%.*]] = xor <4 x i32> [[TMP7]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG7:%.*]] = xor <4 x i32> [[TMP7]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[NEG7]], ptr @ui, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <4 x i32>, ptr @bi2, align 8
-// CHECK:   [[NEG8:%.*]] = xor <4 x i32> [[TMP8]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG8:%.*]] = xor <4 x i32> [[TMP8]], splat (i32 -1)
 // CHECK:   store volatile <4 x i32> [[NEG8]], ptr @bi, align 8
 // CHECK:   [[TMP9:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
-// CHECK:   [[NEG9:%.*]] = xor <2 x i64> [[TMP9]], <i64 -1, i64 -1>
+// CHECK:   [[NEG9:%.*]] = xor <2 x i64> [[TMP9]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[NEG9]], ptr @sl, align 8
 // CHECK:   [[TMP10:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
-// CHECK:   [[NEG10:%.*]] = xor <2 x i64> [[TMP10]], <i64 -1, i64 -1>
+// CHECK:   [[NEG10:%.*]] = xor <2 x i64> [[TMP10]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[NEG10]], ptr @ul, align 8
 // CHECK:   [[TMP11:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8
-// CHECK:   [[NEG11:%.*]] = xor <2 x i64> [[TMP11]], <i64 -1, i64 -1>
+// CHECK:   [[NEG11:%.*]] = xor <2 x i64> [[TMP11]], splat (i64 -1)
 // CHECK:   store volatile <2 x i64> [[NEG11]], ptr @bl, align 8
 // CHECK:   ret void
 void test_not(void) {
@ -1932,7 +1932,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL2:%.*]] = shl <16 x i8> [[TMP4]], [[SH_PROM]]
 // CHECK:   store volatile <16 x i8> [[SHL2]], ptr @sc, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
-// CHECK:   [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHL3]], ptr @sc, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
@ -1950,7 +1950,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL9:%.*]] = shl <16 x i8> [[TMP11]], [[SH_PROM8]]
 // CHECK:   store volatile <16 x i8> [[SHL9]], ptr @uc, align 8
 // CHECK:   [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
-// CHECK:   [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHL10]], ptr @uc, align 8
 // CHECK:   [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
 // CHECK:   [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
@ -1968,7 +1968,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL16:%.*]] = shl <8 x i16> [[TMP18]], [[SH_PROM15]]
 // CHECK:   store volatile <8 x i16> [[SHL16]], ptr @ss, align 8
 // CHECK:   [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
-// CHECK:   [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHL17]], ptr @ss, align 8
 // CHECK:   [[TMP21:%.*]] = load volatile <8 x i16>, ptr @us, align 8
 // CHECK:   [[TMP22:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
@ -1986,7 +1986,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL23:%.*]] = shl <8 x i16> [[TMP25]], [[SH_PROM22]]
 // CHECK:   store volatile <8 x i16> [[SHL23]], ptr @us, align 8
 // CHECK:   [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8
-// CHECK:   [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHL24]], ptr @us, align 8
 // CHECK:   [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si, align 8
 // CHECK:   [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
@ -2003,7 +2003,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL29:%.*]] = shl <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]]
 // CHECK:   store volatile <4 x i32> [[SHL29]], ptr @si, align 8
 // CHECK:   [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8
-// CHECK:   [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHL30]], ptr @si, align 8
 // CHECK:   [[TMP35:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
 // CHECK:   [[TMP36:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
@ -2020,7 +2020,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL35:%.*]] = shl <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]]
 // CHECK:   store volatile <4 x i32> [[SHL35]], ptr @ui, align 8
 // CHECK:   [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
-// CHECK:   [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHL36]], ptr @ui, align 8
 // CHECK:   [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
 // CHECK:   [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
@ -2038,7 +2038,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL42:%.*]] = shl <2 x i64> [[TMP46]], [[SH_PROM41]]
 // CHECK:   store volatile <2 x i64> [[SHL42]], ptr @sl, align 8
 // CHECK:   [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
-// CHECK:   [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], <i64 5, i64 5>
+// CHECK:   [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHL43]], ptr @sl, align 8
 // CHECK:   [[TMP49:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
 // CHECK:   [[TMP50:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
@ -2056,7 +2056,7 @@ void test_xor_assign(void) {
 // CHECK:   [[SHL49:%.*]] = shl <2 x i64> [[TMP53]], [[SH_PROM48]]
 // CHECK:   store volatile <2 x i64> [[SHL49]], ptr @ul, align 8
 // CHECK:   [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
-// CHECK:   [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], <i64 5, i64 5>
+// CHECK:   [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHL50]], ptr @ul, align 8
 // CHECK:   ret void
 void test_sl(void) {
@ -2115,7 +2115,7 @@ void test_sl(void) {
 // CHECK:   [[SHL2:%.*]] = shl <16 x i8> [[TMP5]], [[SH_PROM]]
 // CHECK:   store volatile <16 x i8> [[SHL2]], ptr @sc, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
-// CHECK:   [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHL3:%.*]] = shl <16 x i8> [[TMP6]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHL3]], ptr @sc, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
@ -2133,7 +2133,7 @@ void test_sl(void) {
 // CHECK:   [[SHL9:%.*]] = shl <16 x i8> [[TMP12]], [[SH_PROM8]]
 // CHECK:   store volatile <16 x i8> [[SHL9]], ptr @uc, align 8
 // CHECK:   [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
-// CHECK:   [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHL10:%.*]] = shl <16 x i8> [[TMP13]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHL10]], ptr @uc, align 8
 // CHECK:   [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
 // CHECK:   [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
@ -2151,7 +2151,7 @@ void test_sl(void) {
 // CHECK:   [[SHL16:%.*]] = shl <8 x i16> [[TMP19]], [[SH_PROM15]]
 // CHECK:   store volatile <8 x i16> [[SHL16]], ptr @ss, align 8
 // CHECK:   [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
-// CHECK:   [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHL17:%.*]] = shl <8 x i16> [[TMP20]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHL17]], ptr @ss, align 8
 // CHECK:   [[TMP21:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
 // CHECK:   [[TMP22:%.*]] = load volatile <8 x i16>, ptr @us, align 8
@ -2169,7 +2169,7 @@ void test_sl(void) {
 // CHECK:   [[SHL23:%.*]] = shl <8 x i16> [[TMP26]], [[SH_PROM22]]
 // CHECK:   store volatile <8 x i16> [[SHL23]], ptr @us, align 8
 // CHECK:   [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8
-// CHECK:   [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHL24:%.*]] = shl <8 x i16> [[TMP27]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHL24]], ptr @us, align 8
 // CHECK:   [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
 // CHECK:   [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si, align 8
@ -2186,7 +2186,7 @@ void test_sl(void) {
 // CHECK:   [[SHL29:%.*]] = shl <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]]
 // CHECK:   store volatile <4 x i32> [[SHL29]], ptr @si, align 8
 // CHECK:   [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8
-// CHECK:   [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHL30:%.*]] = shl <4 x i32> [[TMP34]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHL30]], ptr @si, align 8
 // CHECK:   [[TMP35:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
 // CHECK:   [[TMP36:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
@ -2203,7 +2203,7 @@ void test_sl(void) {
 // CHECK:   [[SHL35:%.*]] = shl <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]]
 // CHECK:   store volatile <4 x i32> [[SHL35]], ptr @ui, align 8
 // CHECK:   [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
-// CHECK:   [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHL36:%.*]] = shl <4 x i32> [[TMP41]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHL36]], ptr @ui, align 8
 // CHECK:   [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
 // CHECK:   [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
@ -2221,7 +2221,7 @@ void test_sl(void) {
 // CHECK:   [[SHL42:%.*]] = shl <2 x i64> [[TMP47]], [[SH_PROM41]]
 // CHECK:   store volatile <2 x i64> [[SHL42]], ptr @sl, align 8
 // CHECK:   [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
-// CHECK:   [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], <i64 5, i64 5>
+// CHECK:   [[SHL43:%.*]] = shl <2 x i64> [[TMP48]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHL43]], ptr @sl, align 8
 // CHECK:   [[TMP49:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
 // CHECK:   [[TMP50:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
@ -2239,7 +2239,7 @@ void test_sl(void) {
 // CHECK:   [[SHL49:%.*]] = shl <2 x i64> [[TMP54]], [[SH_PROM48]]
 // CHECK:   store volatile <2 x i64> [[SHL49]], ptr @ul, align 8
 // CHECK:   [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
-// CHECK:   [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], <i64 5, i64 5>
+// CHECK:   [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHL50]], ptr @ul, align 8
 // CHECK:   ret void
 void test_sl_assign(void) {
@ -2298,7 +2298,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR2:%.*]] = ashr <16 x i8> [[TMP4]], [[SH_PROM]]
 // CHECK:   store volatile <16 x i8> [[SHR2]], ptr @sc, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
-// CHECK:   [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHR3]], ptr @sc, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
@ -2316,7 +2316,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR9:%.*]] = lshr <16 x i8> [[TMP11]], [[SH_PROM8]]
 // CHECK:   store volatile <16 x i8> [[SHR9]], ptr @uc, align 8
 // CHECK:   [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
-// CHECK:   [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHR10]], ptr @uc, align 8
 // CHECK:   [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
 // CHECK:   [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
@ -2334,7 +2334,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR16:%.*]] = ashr <8 x i16> [[TMP18]], [[SH_PROM15]]
 // CHECK:   store volatile <8 x i16> [[SHR16]], ptr @ss, align 8
 // CHECK:   [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
-// CHECK:   [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHR17]], ptr @ss, align 8
 // CHECK:   [[TMP21:%.*]] = load volatile <8 x i16>, ptr @us, align 8
 // CHECK:   [[TMP22:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
@ -2352,7 +2352,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR23:%.*]] = lshr <8 x i16> [[TMP25]], [[SH_PROM22]]
 // CHECK:   store volatile <8 x i16> [[SHR23]], ptr @us, align 8
 // CHECK:   [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8
-// CHECK:   [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHR24]], ptr @us, align 8
 // CHECK:   [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si, align 8
 // CHECK:   [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
@ -2369,7 +2369,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR29:%.*]] = ashr <4 x i32> [[TMP32]], [[SPLAT_SPLAT28]]
 // CHECK:   store volatile <4 x i32> [[SHR29]], ptr @si, align 8
 // CHECK:   [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8
-// CHECK:   [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHR30]], ptr @si, align 8
 // CHECK:   [[TMP35:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
 // CHECK:   [[TMP36:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
@ -2386,7 +2386,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR35:%.*]] = lshr <4 x i32> [[TMP39]], [[SPLAT_SPLAT34]]
 // CHECK:   store volatile <4 x i32> [[SHR35]], ptr @ui, align 8
 // CHECK:   [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
-// CHECK:   [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHR36]], ptr @ui, align 8
 // CHECK:   [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
 // CHECK:   [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
@ -2404,7 +2404,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR42:%.*]] = ashr <2 x i64> [[TMP46]], [[SH_PROM41]]
 // CHECK:   store volatile <2 x i64> [[SHR42]], ptr @sl, align 8
 // CHECK:   [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
-// CHECK:   [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], <i64 5, i64 5>
+// CHECK:   [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHR43]], ptr @sl, align 8
 // CHECK:   [[TMP49:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
 // CHECK:   [[TMP50:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
@ -2422,7 +2422,7 @@ void test_sl_assign(void) {
 // CHECK:   [[SHR49:%.*]] = lshr <2 x i64> [[TMP53]], [[SH_PROM48]]
 // CHECK:   store volatile <2 x i64> [[SHR49]], ptr @ul, align 8
 // CHECK:   [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
-// CHECK:   [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], <i64 5, i64 5>
+// CHECK:   [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHR50]], ptr @ul, align 8
 // CHECK:   ret void
 void test_sr(void) {
@ -2481,7 +2481,7 @@ void test_sr(void) {
 // CHECK:   [[SHR2:%.*]] = ashr <16 x i8> [[TMP5]], [[SH_PROM]]
 // CHECK:   store volatile <16 x i8> [[SHR2]], ptr @sc, align 8
 // CHECK:   [[TMP6:%.*]] = load volatile <16 x i8>, ptr @sc, align 8
-// CHECK:   [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHR3:%.*]] = ashr <16 x i8> [[TMP6]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHR3]], ptr @sc, align 8
 // CHECK:   [[TMP7:%.*]] = load volatile <16 x i8>, ptr @sc2, align 8
 // CHECK:   [[TMP8:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
@ -2499,7 +2499,7 @@ void test_sr(void) {
 // CHECK:   [[SHR9:%.*]] = lshr <16 x i8> [[TMP12]], [[SH_PROM8]]
 // CHECK:   store volatile <16 x i8> [[SHR9]], ptr @uc, align 8
 // CHECK:   [[TMP13:%.*]] = load volatile <16 x i8>, ptr @uc, align 8
-// CHECK:   [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK:   [[SHR10:%.*]] = lshr <16 x i8> [[TMP13]], splat (i8 5)
 // CHECK:   store volatile <16 x i8> [[SHR10]], ptr @uc, align 8
 // CHECK:   [[TMP14:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
 // CHECK:   [[TMP15:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
@ -2517,7 +2517,7 @@ void test_sr(void) {
 // CHECK:   [[SHR16:%.*]] = ashr <8 x i16> [[TMP19]], [[SH_PROM15]]
 // CHECK:   store volatile <8 x i16> [[SHR16]], ptr @ss, align 8
 // CHECK:   [[TMP20:%.*]] = load volatile <8 x i16>, ptr @ss, align 8
-// CHECK:   [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHR17:%.*]] = ashr <8 x i16> [[TMP20]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHR17]], ptr @ss, align 8
 // CHECK:   [[TMP21:%.*]] = load volatile <8 x i16>, ptr @ss2, align 8
 // CHECK:   [[TMP22:%.*]] = load volatile <8 x i16>, ptr @us, align 8
@ -2535,7 +2535,7 @@ void test_sr(void) {
 // CHECK:   [[SHR23:%.*]] = lshr <8 x i16> [[TMP26]], [[SH_PROM22]]
 // CHECK:   store volatile <8 x i16> [[SHR23]], ptr @us, align 8
 // CHECK:   [[TMP27:%.*]] = load volatile <8 x i16>, ptr @us, align 8
-// CHECK:   [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK:   [[SHR24:%.*]] = lshr <8 x i16> [[TMP27]], splat (i16 5)
 // CHECK:   store volatile <8 x i16> [[SHR24]], ptr @us, align 8
 // CHECK:   [[TMP28:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
 // CHECK:   [[TMP29:%.*]] = load volatile <4 x i32>, ptr @si, align 8
@ -2552,7 +2552,7 @@ void test_sr(void) {
 // CHECK:   [[SHR29:%.*]] = ashr <4 x i32> [[TMP33]], [[SPLAT_SPLAT28]]
 // CHECK:   store volatile <4 x i32> [[SHR29]], ptr @si, align 8
 // CHECK:   [[TMP34:%.*]] = load volatile <4 x i32>, ptr @si, align 8
-// CHECK:   [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHR30:%.*]] = ashr <4 x i32> [[TMP34]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHR30]], ptr @si, align 8
 // CHECK:   [[TMP35:%.*]] = load volatile <4 x i32>, ptr @si2, align 8
 // CHECK:   [[TMP36:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
@ -2569,7 +2569,7 @@ void test_sr(void) {
 // CHECK:   [[SHR35:%.*]] = lshr <4 x i32> [[TMP40]], [[SPLAT_SPLAT34]]
 // CHECK:   store volatile <4 x i32> [[SHR35]], ptr @ui, align 8
 // CHECK:   [[TMP41:%.*]] = load volatile <4 x i32>, ptr @ui, align 8
-// CHECK:   [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], <i32 5, i32 5, i32 5, i32 5>
+// CHECK:   [[SHR36:%.*]] = lshr <4 x i32> [[TMP41]], splat (i32 5)
 // CHECK:   store volatile <4 x i32> [[SHR36]], ptr @ui, align 8
 // CHECK:   [[TMP42:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
 // CHECK:   [[TMP43:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
@ -2587,7 +2587,7 @@ void test_sr(void) {
 // CHECK:   [[SHR42:%.*]] = ashr <2 x i64> [[TMP47]], [[SH_PROM41]]
 // CHECK:   store volatile <2 x i64> [[SHR42]], ptr @sl, align 8
 // CHECK:   [[TMP48:%.*]] = load volatile <2 x i64>, ptr @sl, align 8
-// CHECK:   [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], <i64 5, i64 5>
+// CHECK:   [[SHR43:%.*]] = ashr <2 x i64> [[TMP48]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHR43]], ptr @sl, align 8
 // CHECK:   [[TMP49:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
 // CHECK:   [[TMP50:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
@ -2605,7 +2605,7 @@ void test_sr(void) {
 // CHECK:   [[SHR49:%.*]] = lshr <2 x i64> [[TMP54]], [[SH_PROM48]]
 // CHECK:   store volatile <2 x i64> [[SHR49]], ptr @ul, align 8
 // CHECK:   [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
-// CHECK:   [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], <i64 5, i64 5>
+// CHECK:   [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5)
 // CHECK:   store volatile <2 x i64> [[SHR50]], ptr @ul, align 8
 // CHECK:   ret void
 void test_sr_assign(void) {
--- a/clang/test/CodeGen/SystemZ/zvector2.c
+++ b/clang/test/CodeGen/SystemZ/zvector2.c
@ -32,7 +32,7 @@ void test_preinc (void)
 {
 // CHECK-LABEL: test_preinc
 // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2
-// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float 1.000000e+00)
  ++ff2;
 }

@ -40,7 +40,7 @@ void test_postinc (void)
 {
 // CHECK-LABEL: test_postinc
 // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2
-// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float 1.000000e+00)
  ff2++;
 }

@ -48,7 +48,7 @@ void test_predec (void)
 {
 // CHECK-LABEL: test_predec
 // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2
-// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float -1.000000e+00)
  --ff2;
 }

@ -56,7 +56,7 @@ void test_postdec (void)
 {
 // CHECK-LABEL: test_postdec
 // CHECK: [[VAL:%[^ ]+]] = load volatile <4 x float>, ptr @ff2
-// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+// CHECK: %{{.*}} = fadd <4 x float> [[VAL]], splat (float -1.000000e+00)
  ff2--;
 }

--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@ -53,14 +53,14 @@ __m256 test_mm256_and_ps(__m256 A, __m256 B) {

 __m256d test_mm256_andnot_pd(__m256d A, __m256d B) {
  // CHECK-LABEL: test_mm256_andnot_pd
-  // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <4 x i64>
  return _mm256_andnot_pd(A, B);
 }

 __m256 test_mm256_andnot_ps(__m256 A, __m256 B) {
  // CHECK-LABEL: test_mm256_andnot_ps
-  // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <8 x i32>
  return _mm256_andnot_ps(A, B);
 }
--- a/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2bf16-builtins.c
@ -274,7 +274,7 @@ __m256bh test_mm256_loadu_pbh(void *p) {

 __m128bh test_mm_load_sbh(void const *A) {
  // CHECK-LABEL: test_mm_load_sbh
-  // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), <8 x bfloat> %{{.*}})
+  // CHECK: %{{.*}} = call <8 x bfloat> @llvm.masked.load.v8bf16.p0(ptr %{{.*}}, i32 1, <8 x i1> bitcast (<1 x i8> splat (i8 1) to <8 x i1>), <8 x bfloat> %{{.*}})
  return _mm_load_sbh(A);
 }

--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@ -869,18 +869,18 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {

 __m256i test_mm256_mul_epi32(__m256i a, __m256i b) {
  // CHECK-LABEL: test_mm256_mul_epi32
-  // CHECK: shl <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  // CHECK: ashr <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  // CHECK: shl <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  // CHECK: ashr <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
+  // CHECK: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  // CHECK: shl <4 x i64> %{{.*}}, splat (i64 32)
+  // CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32)
  // CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
  return _mm256_mul_epi32(a, b);
 }

 __m256i test_mm256_mul_epu32(__m256i a, __m256i b) {
  // CHECK-LABEL: test_mm256_mul_epu32
-  // CHECK: and <4 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-  // CHECK: and <4 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  // CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  // CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295)
  // CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
  return _mm256_mul_epu32(a, b);
 }
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@ -7,14 +7,14 @@
 __mmask32 test_knot_mask32(__mmask32 a) {
  // CHECK-LABEL: @test_knot_mask32
  // CHECK: [[IN:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
-  // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <32 x i1> [[IN]], splat (i1 true)
  return _knot_mask32(a);
 }

 __mmask64 test_knot_mask64(__mmask64 a) {
  // CHECK-LABEL: @test_knot_mask64
  // CHECK: [[IN:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
-  // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <64 x i1> [[IN]], splat (i1 true)
  return _knot_mask64(a);
 }

@ -42,7 +42,7 @@ __mmask32 test_kandn_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
  // CHECK-LABEL: @test_kandn_mask32
  // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
-  // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = and <32 x i1> [[NOT]], [[RHS]]
  return _mm512_mask_cmpneq_epu16_mask(_kandn_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
                                                     _mm512_cmpneq_epu16_mask(__C, __D)),
@ -53,7 +53,7 @@ __mmask64 test_kandn_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
  // CHECK-LABEL: @test_kandn_mask64
  // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
-  // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = and <64 x i1> [[NOT]], [[RHS]]
  return _mm512_mask_cmpneq_epu8_mask(_kandn_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
                                                    _mm512_cmpneq_epu8_mask(__C, __D)),
@ -84,7 +84,7 @@ __mmask32 test_kxnor_mask32(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
  // CHECK-LABEL: @test_kxnor_mask32
  // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
-  // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <32 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = xor <32 x i1> [[NOT]], [[RHS]]
  return _mm512_mask_cmpneq_epu16_mask(_kxnor_mask32(_mm512_cmpneq_epu16_mask(__A, __B),
                                                     _mm512_cmpneq_epu16_mask(__C, __D)),
@ -95,7 +95,7 @@ __mmask64 test_kxnor_mask64(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
  // CHECK-LABEL: @test_kxnor_mask64
  // CHECK: [[LHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
-  // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <64 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = xor <64 x i1> [[NOT]], [[RHS]]
  return _mm512_mask_cmpneq_epu8_mask(_kxnor_mask64(_mm512_cmpneq_epu8_mask(__A, __B),
                                                    _mm512_cmpneq_epu8_mask(__C, __D)),
--- a/clang/test/CodeGen/X86/avx512dq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512dq-builtins.c
@ -6,7 +6,7 @@
 __mmask8 test_knot_mask8(__mmask8 a) {
  // CHECK-LABEL: @test_knot_mask8
  // CHECK: [[IN:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
-  // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <8 x i1> [[IN]], splat (i1 true)
  return _knot_mask8(a);
 }

@ -24,7 +24,7 @@ __mmask8 test_kandn_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
  // CHECK-LABEL: @test_kandn_mask8
  // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
-  // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = and <8 x i1> [[NOT]], [[RHS]]
  return _mm512_mask_cmpneq_epu64_mask(_kandn_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
                                                    _mm512_cmpneq_epu64_mask(__C, __D)),
@ -45,7 +45,7 @@ __mmask8 test_kxnor_mask8(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
  // CHECK-LABEL: @test_kxnor_mask8
  // CHECK: [[LHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
-  // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <8 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = xor <8 x i1> [[NOT]], [[RHS]]
  return _mm512_mask_cmpneq_epu64_mask(_kxnor_mask8(_mm512_cmpneq_epu64_mask(__A, __B),
                                                    _mm512_cmpneq_epu64_mask(__C, __D)),
@ -388,14 +388,14 @@ __m512 test_mm512_maskz_and_ps (__mmask16 __U, __m512 __A, __m512 __B) {

 __m512d test_mm512_andnot_pd (__m512d __A, __m512d __B) {
  // CHECK-LABEL: @test_mm512_andnot_pd
-  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <8 x i64>
  return (__m512d) _mm512_andnot_pd(__A, __B);
 }

 __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
  // CHECK-LABEL: @test_mm512_mask_andnot_pd
-  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
  return (__m512d) _mm512_mask_andnot_pd(__W, __U, __A, __B);
@ -403,7 +403,7 @@ __m512d test_mm512_mask_andnot_pd (__m512d __W, __mmask8 __U, __m512d __A, __m51

 __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {
  // CHECK-LABEL: @test_mm512_maskz_andnot_pd
-  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
  return (__m512d) _mm512_maskz_andnot_pd(__U, __A, __B);
@ -411,14 +411,14 @@ __m512d test_mm512_maskz_andnot_pd (__mmask8 __U, __m512d __A, __m512d __B) {

 __m512 test_mm512_andnot_ps (__m512 __A, __m512 __B) {
  // CHECK-LABEL: @test_mm512_andnot_ps
-  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <16 x i32>
  return (__m512) _mm512_andnot_ps(__A, __B);
 }

 __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
  // CHECK-LABEL: @test_mm512_mask_andnot_ps
-  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
  return (__m512) _mm512_mask_andnot_ps(__W, __U, __A, __B);
@ -426,7 +426,7 @@ __m512 test_mm512_mask_andnot_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512

 __m512 test_mm512_maskz_andnot_ps (__mmask16 __U, __m512 __A, __m512 __B) {
  // CHECK-LABEL: @test_mm512_maskz_andnot_ps
-  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
  return (__m512) _mm512_maskz_andnot_ps(__U, __A, __B);
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@ -440,7 +440,7 @@ __mmask16 test_mm512_knot(__mmask16 a)
 {
  // CHECK-LABEL: test_mm512_knot
  // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
-  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], splat (i1 true)
  // CHECK: bitcast <16 x i1> [[NOT]] to i16
  return _mm512_knot(a);
 }
@ -2844,7 +2844,7 @@ __m512i test_mm512_xor_epi64(__m512i __src,__mmask8 __k, __m512i __a, __m512i __

 __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){
  // CHECK-LABEL: test_mm512_maskz_andnot_epi32
-  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
  return _mm512_maskz_andnot_epi32(__k,__A,__B);
@ -2853,7 +2853,7 @@ __m512i test_mm512_maskz_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B){
 __m512i test_mm512_mask_andnot_epi32 (__mmask16 __k,__m512i __A, __m512i __B,
                                      __m512i __src) {
  // CHECK-LABEL: test_mm512_mask_andnot_epi32
-  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
  return _mm512_mask_andnot_epi32(__src,__k,__A,__B);
@ -2863,7 +2863,7 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B)
 {
  //CHECK-LABEL: test_mm512_andnot_si512
  //CHECK: load {{.*}}%__A.addr.i, align 64
-  //CHECK: %not.i = xor{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  //CHECK: %not.i = xor{{.*}}, splat (i64 -1)
  //CHECK: load {{.*}}%__B.addr.i, align 64
  //CHECK: and <8 x i64> %not.i,{{.*}}

@ -2872,14 +2872,14 @@ __m512i test_mm512_andnot_si512(__m512i __A, __m512i __B)

 __m512i test_mm512_andnot_epi32(__m512i __A, __m512i __B) {
  // CHECK-LABEL: test_mm512_andnot_epi32
-  // CHECK: xor <16 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <16 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <16 x i32> %{{.*}}, %{{.*}}
  return _mm512_andnot_epi32(__A,__B);
 }

 __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
  // CHECK-LABEL: test_mm512_maskz_andnot_epi64
-  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_maskz_andnot_epi64(__k,__A,__B);
@ -2888,7 +2888,7 @@ __m512i test_mm512_maskz_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B) {
 __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B, 
                                      __m512i __src) {
  //CHECK-LABEL: test_mm512_mask_andnot_epi64
-  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_mask_andnot_epi64(__src,__k,__A,__B);
@ -2896,7 +2896,7 @@ __m512i test_mm512_mask_andnot_epi64 (__mmask8 __k,__m512i __A, __m512i __B,

 __m512i test_mm512_andnot_epi64(__m512i __A, __m512i __B) {
  //CHECK-LABEL: test_mm512_andnot_epi64
-  // CHECK: xor <8 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <8 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <8 x i64> %{{.*}}, %{{.*}}
  return _mm512_andnot_epi64(__A,__B);
 }
@ -2987,20 +2987,20 @@ __m512i test_mm512_add_epi64(__m512i __A, __m512i __B) {

 __m512i test_mm512_mul_epi32(__m512i __A, __m512i __B) {
  //CHECK-LABEL: test_mm512_mul_epi32
-  //CHECK: shl <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: shl <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+  //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
  return _mm512_mul_epi32(__A,__B);
 }

 __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {
  //CHECK-LABEL: test_mm512_maskz_mul_epi32
-  //CHECK: shl <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: shl <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+  //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_maskz_mul_epi32(__k,__A,__B);
@ -3008,10 +3008,10 @@ __m512i test_mm512_maskz_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B) {

 __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) {
  //CHECK-LABEL: test_mm512_mask_mul_epi32
-  //CHECK: shl <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: shl <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <8 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+  //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <8 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <8 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_mask_mul_epi32(__src,__k,__A,__B);
@ -3019,16 +3019,16 @@ __m512i test_mm512_mask_mul_epi32 (__mmask8 __k,__m512i __A, __m512i __B, __m512

 __m512i test_mm512_mul_epu32 (__m512i __A, __m512i __B) {
  //CHECK-LABEL: test_mm512_mul_epu32
-  //CHECK: and <8 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-  //CHECK: and <8 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
  return _mm512_mul_epu32(__A,__B);
 }

 __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {
  //CHECK-LABEL: test_mm512_maskz_mul_epu32
-  //CHECK: and <8 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-  //CHECK: and <8 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_maskz_mul_epu32(__k,__A,__B);
@ -3036,8 +3036,8 @@ __m512i test_mm512_maskz_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B) {

 __m512i test_mm512_mask_mul_epu32 (__mmask8 __k,__m512i __A, __m512i __B, __m512i __src) {
  //CHECK-LABEL: test_mm512_mask_mul_epu32
-  //CHECK: and <8 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-  //CHECK: and <8 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <8 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <8 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_mask_mul_epu32(__src,__k,__A,__B);
@ -8299,7 +8299,7 @@ __mmask16 test_mm512_kandn(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
  // CHECK-LABEL: test_mm512_kandn
  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
-  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]]
  // CHECK: bitcast <16 x i1> {{.*}} to i16
  return _mm512_mask_cmpneq_epu32_mask(_mm512_kandn(_mm512_cmpneq_epu32_mask(__A, __B),
@ -8405,7 +8405,7 @@ __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, _
  // CHECK-LABEL: test_mm512_kxnor
  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
-  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]]
  // CHECK: bitcast <16 x i1> {{.*}} to i16
  return _mm512_mask_cmpneq_epu32_mask(_mm512_kxnor(_mm512_cmpneq_epu32_mask(__A, __B),
@ -8427,7 +8427,7 @@ __mmask16 test_mm512_kxor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __
 __mmask16 test_knot_mask16(__mmask16 a) {
  // CHECK-LABEL: test_knot_mask16
  // CHECK: [[IN:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
-  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[IN]], splat (i1 true)
  // CHECK: bitcast <16 x i1> [[NOT]] to i16
  return _knot_mask16(a);
 }
@ -8447,7 +8447,7 @@ __mmask16 test_kandn_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
  // CHECK-LABEL: test_kandn_mask16
  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
-  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = and <16 x i1> [[NOT]], [[RHS]]
  // CHECK: bitcast <16 x i1> {{.*}} to i16
  return _mm512_mask_cmpneq_epu32_mask(_kandn_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
@ -8470,7 +8470,7 @@ __mmask16 test_kxnor_mask16(__m512i __A, __m512i __B, __m512i __C, __m512i __D,
  // CHECK-LABEL: test_kxnor_mask16
  // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
  // CHECK: [[RHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
-  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
+  // CHECK: [[NOT:%.*]] = xor <16 x i1> [[LHS]], splat (i1 true)
  // CHECK: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[RHS]]
  // CHECK: bitcast <16 x i1> {{.*}} to i16
  return _mm512_mask_cmpneq_epu32_mask(_kxnor_mask16(_mm512_cmpneq_epu32_mask(__A, __B),
--- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
@ -88,121 +88,121 @@ __m512i test_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const* __P) {

 __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_mask_shldi_epi64
-  // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
+  // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47);
 }

 __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_maskz_shldi_epi64
-  // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
+  // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_maskz_shldi_epi64(__U, __A, __B, 63);
 }

 __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_shldi_epi64
-  // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
+  // CHECK: @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
  return _mm512_shldi_epi64(__A, __B, 31);
 }

 __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_mask_shldi_epi32
-  // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
+  // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
  return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7);
 }

 __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_maskz_shldi_epi32
-  // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
+  // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
  return _mm512_maskz_shldi_epi32(__U, __A, __B, 15);
 }

 __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_shldi_epi32
-  // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
+  // CHECK: @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
  return _mm512_shldi_epi32(__A, __B, 31);
 }

 __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_mask_shldi_epi16
-  // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3))
  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
  return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3);
 }

 __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_maskz_shldi_epi16
-  // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
+  // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 7))
  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
  return _mm512_maskz_shldi_epi16(__U, __A, __B, 7);
 }

 __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_shldi_epi16
-  // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  // CHECK: @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
  return _mm512_shldi_epi16(__A, __B, 15);
 }

 __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_mask_shrdi_epi64
-  // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
+  // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 47))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47);
 }

 __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_maskz_shrdi_epi64
-  // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
+  // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 63))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
  return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63);
 }

 __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_shrdi_epi64
-  // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
+  // CHECK: @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
  return _mm512_shrdi_epi64(__A, __B, 31);
 }

 __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_mask_shrdi_epi32
-  // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
+  // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 7))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
  return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7);
 }

 __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_maskz_shrdi_epi32
-  // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
+  // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 15))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
  return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15);
 }

 __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_shrdi_epi32
-  // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
+  // CHECK: @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
  return _mm512_shrdi_epi32(__A, __B, 31);
 }

 __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_mask_shrdi_epi16
-  // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 3))
  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
  return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3);
 }

 __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_maskz_shrdi_epi16
-  // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
+  // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
  return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15);
 }

 __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) {
  // CHECK-LABEL: @test_mm512_shrdi_epi16
-  // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
+  // CHECK: @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31))
  return _mm512_shrdi_epi16(__A, __B, 31);
 }

--- a/clang/test/CodeGen/X86/avx512vl-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vl-builtins.c
@ -726,10 +726,10 @@ __m128i test_mm_maskz_sub_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
 __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,
           __m256i __Y) {
  //CHECK-LABEL: @test_mm256_mask_mul_epi32
-  //CHECK: shl <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  //CHECK: shl <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
+  //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_mask_mul_epi32(__W, __M, __X, __Y);
@ -737,10 +737,10 @@ __m256i test_mm256_mask_mul_epi32 (__m256i __W, __mmask8 __M, __m256i __X,

 __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) {
  //CHECK-LABEL: @test_mm256_maskz_mul_epi32
-  //CHECK: shl <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  //CHECK: shl <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
-  //CHECK: ashr <4 x i64> %{{.*}}, <i64 32, i64 32, i64 32, i64 32>
+  //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <4 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <4 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_maskz_mul_epi32(__M, __X, __Y);
@ -750,10 +750,10 @@ __m256i test_mm256_maskz_mul_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) {
 __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,
        __m128i __Y) {
  //CHECK-LABEL: @test_mm_mask_mul_epi32
-  //CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
-  //CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
-  //CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
-  //CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
+  //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_mask_mul_epi32(__W, __M, __X, __Y);
@ -761,10 +761,10 @@ __m128i test_mm_mask_mul_epi32 (__m128i __W, __mmask8 __M, __m128i __X,

 __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) {
  //CHECK-LABEL: @test_mm_maskz_mul_epi32
-  //CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
-  //CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
-  //CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
-  //CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
+  //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: shl <2 x i64> %{{.*}}, splat (i64 32)
+  //CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32)
  //CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_maskz_mul_epi32(__M, __X, __Y);
@ -773,8 +773,8 @@ __m128i test_mm_maskz_mul_epi32 (__mmask8 __M, __m128i __X, __m128i __Y) {
 __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,
           __m256i __Y) {
  //CHECK-LABEL: @test_mm256_mask_mul_epu32
-  //CHECK: and <4 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-  //CHECK: and <4 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_mask_mul_epu32(__W, __M, __X, __Y);
@ -782,8 +782,8 @@ __m256i test_mm256_mask_mul_epu32 (__m256i __W, __mmask8 __M, __m256i __X,

 __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) {
  //CHECK-LABEL: @test_mm256_maskz_mul_epu32
-  //CHECK: and <4 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
-  //CHECK: and <4 x i64> %{{.*}}, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <4 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <4 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_maskz_mul_epu32(__M, __X, __Y);
@ -792,8 +792,8 @@ __m256i test_mm256_maskz_mul_epu32 (__mmask8 __M, __m256i __X, __m256i __Y) {
 __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,
        __m128i __Y) {
  //CHECK-LABEL: @test_mm_mask_mul_epu32
-  //CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
-  //CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
+  //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_mask_mul_epu32(__W, __M, __X, __Y);
@ -801,8 +801,8 @@ __m128i test_mm_mask_mul_epu32 (__m128i __W, __mmask8 __M, __m128i __X,

 __m128i test_mm_maskz_mul_epu32 (__mmask8 __M, __m128i __X, __m128i __Y) {
  //CHECK-LABEL: @test_mm_maskz_mul_epu32
-  //CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
-  //CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
+  //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  //CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
  //CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_maskz_mul_epu32(__M, __X, __Y);
@ -880,14 +880,14 @@ __m128i test_mm_maskz_and_epi32 (__mmask8 __U, __m128i __A, __m128i __B) {

 __m256i test_mm256_andnot_epi32 (__m256i __A, __m256i __B) {
  //CHECK-LABEL: @test_mm256_andnot_epi32
-  //CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1)
  //CHECK: and <8 x i32> %{{.*}}, %{{.*}}
  return _mm256_andnot_epi32(__A, __B);
 }

 __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  //CHECK-LABEL: @test_mm256_mask_andnot_epi32
-  //CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1)
  //CHECK: and <8 x i32> %{{.*}}, %{{.*}}
  //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
  return _mm256_mask_andnot_epi32(__W, __U, __A, __B);
@ -895,7 +895,7 @@ __m256i test_mm256_mask_andnot_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __

 __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) {
  //CHECK-LABEL: @test_mm256_maskz_andnot_epi32
-  //CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  //CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1)
  //CHECK: and <8 x i32> %{{.*}}, %{{.*}}
  //CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
  return _mm256_maskz_andnot_epi32(__U, __A, __B);
@ -903,14 +903,14 @@ __m256i test_mm256_maskz_andnot_epi32 (__mmask8 __U, __m256i __A, __m256i __B) {

 __m128i test_mm_andnot_epi32 (__m128i __A, __m128i __B) {
  //CHECK-LABEL: @test_mm_andnot_epi32
-  //CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
  //CHECK: and <4 x i32> %{{.*}}, %{{.*}}
  return _mm_andnot_epi32(__A, __B);
 }

 __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  //CHECK-LABEL: @test_mm_mask_andnot_epi32
-  //CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
  //CHECK: and <4 x i32> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
  return _mm_mask_andnot_epi32(__W, __U, __A, __B);
@ -918,7 +918,7 @@ __m128i test_mm_mask_andnot_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m12

 __m128i test_mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) {
  //CHECK-LABEL: @test_mm_maskz_andnot_epi32
-  //CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  //CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
  //CHECK: and <4 x i32> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
  return _mm_maskz_andnot_epi32(__U, __A, __B);
@ -1046,14 +1046,14 @@ __m128i test_mm_maskz_and_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {

 __m256i test_mm256_andnot_epi64 (__m256i __A, __m256i __B) {
  //CHECK-LABEL: @test_mm256_andnot_epi64
-  //CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1)
  //CHECK: and <4 x i64> %{{.*}}, %{{.*}}
  return _mm256_andnot_epi64(__A, __B);
 }

 __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
  //CHECK-LABEL: @test_mm256_mask_andnot_epi64
-  //CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1)
  //CHECK: and <4 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_mask_andnot_epi64(__W, __U, __A, __B);
@ -1061,7 +1061,7 @@ __m256i test_mm256_mask_andnot_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __

 __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
  //CHECK-LABEL: @test_mm256_maskz_andnot_epi64
-  //CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  //CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1)
  //CHECK: and <4 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_maskz_andnot_epi64(__U, __A, __B);
@ -1069,14 +1069,14 @@ __m256i test_mm256_maskz_andnot_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {

 __m128i test_mm_andnot_epi64 (__m128i __A, __m128i __B) {
  //CHECK-LABEL: @test_mm_andnot_epi64
-  //CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  //CHECK: and <2 x i64> %{{.*}}, %{{.*}}
  return _mm_andnot_epi64(__A, __B);
 }

 __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
  //CHECK-LABEL: @test_mm_mask_andnot_epi64
-  //CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  //CHECK: and <2 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_mask_andnot_epi64(__W,__U, __A, __B);
@ -1084,7 +1084,7 @@ __m128i test_mm_mask_andnot_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m12

 __m128i test_mm_maskz_andnot_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
  //CHECK-LABEL: @test_mm_maskz_andnot_epi64
-  //CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  //CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  //CHECK: and <2 x i64> %{{.*}}, %{{.*}}
  //CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_maskz_andnot_epi64(__U, __A, __B);
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@ -45,7 +45,7 @@ __m128i test_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {

 __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
  // CHECK-LABEL: @test_mm256_mask_andnot_pd
-  // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
  return (__m256d) _mm256_mask_andnot_pd ( __W, __U, __A, __B);
@ -53,7 +53,7 @@ __m256d test_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m25

 __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
  // CHECK-LABEL: @test_mm256_maskz_andnot_pd
-  // CHECK: xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: xor <4 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <4 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
  return (__m256d) _mm256_maskz_andnot_pd (__U, __A, __B);
@ -61,7 +61,7 @@ __m256d test_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {

 __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
  // CHECK-LABEL: @test_mm_mask_andnot_pd
-  // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
  return (__m128d) _mm_mask_andnot_pd ( __W, __U, __A, __B);
@ -69,7 +69,7 @@ __m128d test_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d

 __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
  // CHECK-LABEL: @test_mm_maskz_andnot_pd
-  // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <2 x i64> %{{.*}}, %{{.*}}
  // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
  return (__m128d) _mm_maskz_andnot_pd (__U, __A, __B);
@ -77,7 +77,7 @@ __m128d test_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {

 __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
  // CHECK-LABEL: @test_mm256_mask_andnot_ps
-  // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
  return (__m256) _mm256_mask_andnot_ps ( __W, __U, __A, __B);
@ -85,7 +85,7 @@ __m256 test_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 _

 __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
  // CHECK-LABEL: @test_mm256_maskz_andnot_ps
-  // CHECK: xor <8 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <8 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <8 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
  return (__m256) _mm256_maskz_andnot_ps (__U, __A, __B);
@ -93,7 +93,7 @@ __m256 test_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {

 __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
  // CHECK-LABEL: @test_mm_mask_andnot_ps
-  // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
  return (__m128) _mm_mask_andnot_ps ( __W, __U, __A, __B);
@ -101,7 +101,7 @@ __m128 test_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)

 __m128 test_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
  // CHECK-LABEL: @test_mm_maskz_andnot_ps
-  // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <4 x i32> %{{.*}}, %{{.*}}
  // CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
  return (__m128) _mm_maskz_andnot_ps (__U, __A, __B);
--- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c
@ -172,241 +172,241 @@ __m256i test_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const* __P) {

 __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_mask_shldi_epi64
-  // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> <i64 47, i64 47, i64 47, i64 47>)
+  // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 47))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47);
 }

 __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_maskz_shldi_epi64
-  // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> <i64 63, i64 63, i64 63, i64 63>)
+  // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 63))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_maskz_shldi_epi64(__U, __A, __B, 63);
 }

 __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_shldi_epi64
-  // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> <i64 31, i64 31, i64 31, i64 31>)
+  // CHECK: @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31))
  return _mm256_shldi_epi64(__A, __B, 31);
 }

 __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_mask_shldi_epi64
-  // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 47, i64 47>)
+  // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 47))
  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47);
 }

 __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_maskz_shldi_epi64
-  // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 63, i64 63>)
+  // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 63))
  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_maskz_shldi_epi64(__U, __A, __B, 63);
 }

 __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_shldi_epi64
-  // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 31, i64 31>)
+  // CHECK: @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31))
  return _mm_shldi_epi64(__A, __B, 31);
 }

 __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_mask_shldi_epi32
-  // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
+  // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 7))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
  return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7);
 }

 __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_maskz_shldi_epi32
-  // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
+  // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 15))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
  return _mm256_maskz_shldi_epi32(__U, __A, __B, 15);
 }

 __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_shldi_epi32
-  // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
+  // CHECK: @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31))
  return _mm256_shldi_epi32(__A, __B, 31);
 }

 __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_mask_shldi_epi32
-  // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>)
+  // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 7))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
  return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7);
 }

 __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_maskz_shldi_epi32
-  // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 15))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
  return _mm_maskz_shldi_epi32(__U, __A, __B, 15);
 }

 __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_shldi_epi32
-  // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 31, i32 31, i32 31, i32 31>)
+  // CHECK: @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31))
  return _mm_shldi_epi32(__A, __B, 31);
 }

 __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_mask_shldi_epi16
-  // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 3))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
  return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3);
 }

 __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_maskz_shldi_epi16
-  // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
+  // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 7))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
  return _mm256_maskz_shldi_epi16(__U, __A, __B, 7);
 }

 __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_shldi_epi16
-  // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
+  // CHECK: @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31))
  return _mm256_shldi_epi16(__A, __B, 31);
 }

 __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_mask_shldi_epi16
-  // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 3))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
  return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3);
 }

 __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_maskz_shldi_epi16
-  // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
+  // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 7))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
  return _mm_maskz_shldi_epi16(__U, __A, __B, 7);
 }

 __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_shldi_epi16
-  // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
+  // CHECK: @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31))
  return _mm_shldi_epi16(__A, __B, 31);
 }

 __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_mask_shrdi_epi64
-  // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> <i64 47, i64 47, i64 47, i64 47>)
+  // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 47))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47);
 }

 __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_maskz_shrdi_epi64
-  // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> <i64 63, i64 63, i64 63, i64 63>)
+  // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 63))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
  return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63);
 }

 __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_shrdi_epi64
-  // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> <i64 31, i64 31, i64 31, i64 31>)
+  // CHECK: @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)
  return _mm256_shrdi_epi64(__A, __B, 31);
 }

 __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_mask_shrdi_epi64
-  // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 47, i64 47>)
+  // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 47))
  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47);
 }

 __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_maskz_shrdi_epi64
-  // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 63, i64 63>)
+  // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 63))
  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
  return _mm_maskz_shrdi_epi64(__U, __A, __B, 63);
 }

 __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_shrdi_epi64
-  // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 31, i64 31>)
+  // CHECK: @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31))
  return _mm_shrdi_epi64(__A, __B, 31);
 }

 __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_mask_shrdi_epi32
-  // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
+  // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 7))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
  return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7);
 }

 __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_maskz_shrdi_epi32
-  // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
+  // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 15))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
  return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15);
 }

 __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_shrdi_epi32
-  // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
+  // CHECK: @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)
  return _mm256_shrdi_epi32(__A, __B, 31);
 }

 __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_mask_shrdi_epi32
-  // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 7, i32 7, i32 7, i32 7>)
+  // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 7))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
  return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7);
 }

 __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_maskz_shrdi_epi32
-  // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
+  // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 15))
  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
  return _mm_maskz_shrdi_epi32(__U, __A, __B, 15);
 }

 __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_shrdi_epi32
-  // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 31, i32 31, i32 31, i32 31>)
+  // CHECK: @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31))
  return _mm_shrdi_epi32(__A, __B, 31);
 }

 __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_mask_shrdi_epi16
-  // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 3))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
  return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3);
 }

 __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_maskz_shrdi_epi16
-  // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
+  // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 7))
  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
  return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7);
 }

 __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) {
  // CHECK-LABEL: @test_mm256_shrdi_epi16
-  // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>
+  // CHECK: @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31))
  return _mm256_shrdi_epi16(__A, __B, 31);
 }

 __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_mask_shrdi_epi16
-  // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+  // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 3))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
  return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3);
 }

 __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_maskz_shrdi_epi16
-  // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
+  // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 7))
  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
  return _mm_maskz_shrdi_epi16(__U, __A, __B, 7);
 }

 __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) {
  // CHECK-LABEL: @test_mm_shrdi_epi16
-  // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
+  // CHECK: @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31))
  return _mm_shrdi_epi16(__A, __B, 31);
 }

--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@ -86,7 +86,7 @@ __m64 test_mm_and_si64(__m64 a, __m64 b) {

 __m64 test_mm_andnot_si64(__m64 a, __m64 b) {
  // CHECK-LABEL: test_mm_andnot_si64
-  // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, <i64 -1>
+  // CHECK: [[TMP:%.*]] = xor <1 x i64> {{%.*}}, splat (i64 -1)
  // CHECK: and <1 x i64> [[TMP]], {{%.*}}
  return _mm_andnot_si64(a, b);
 }
@ -333,8 +333,8 @@ int test_mm_movemask_pi8(__m64 a) {

 __m64 test_mm_mul_su32(__m64 a, __m64 b) {
  // CHECK-LABEL: test_mm_mul_su32
-  // CHECK: and <2 x i64> {{%.*}}, <i64 4294967295, i64 4294967295>
-  // CHECK: and <2 x i64> {{%.*}}, <i64 4294967295, i64 4294967295>
+  // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295)
+  // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295)
  // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  return _mm_mul_su32(a, b);
 }
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@ -35,7 +35,7 @@ TEST_CONSTEXPR(match_m128(_mm_and_ps((__m128){-4.0f, -5.0f, +6.0f, +7.0f}, (__m1

 __m128 test_mm_andnot_ps(__m128 A, __m128 B) {
  // CHECK-LABEL: test_mm_andnot_ps
-  // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: xor <4 x i32> %{{.*}}, splat (i32 -1)
  // CHECK: and <4 x i32>
  return _mm_andnot_ps(A, B);
 }
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@ -99,7 +99,7 @@ __m128i test_mm_and_si128(__m128i A, __m128i B) {

 __m128d test_mm_andnot_pd(__m128d A, __m128d B) {
  // CHECK-LABEL: test_mm_andnot_pd
-  // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <2 x i64>
  return _mm_andnot_pd(A, B);
 }
@ -107,7 +107,7 @@ TEST_CONSTEXPR(match_m128d(_mm_andnot_pd((__m128d){+1.0, -3.0}, (__m128d){+0.0,

 __m128i test_mm_andnot_si128(__m128i A, __m128i B) {
  // CHECK-LABEL: test_mm_andnot_si128
-  // CHECK: xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: xor <2 x i64> %{{.*}}, splat (i64 -1)
  // CHECK: and <2 x i64>
  return _mm_andnot_si128(A, B);
 }
@ -906,8 +906,8 @@ int test_mm_movemask_pd(__m128d A) {

 __m128i test_mm_mul_epu32(__m128i A, __m128i B) {
  // CHECK-LABEL: test_mm_mul_epu32
-  // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
-  // CHECK: and <2 x i64> %{{.*}}, <i64 4294967295, i64 4294967295>
+  // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
+  // CHECK: and <2 x i64> %{{.*}}, splat (i64 4294967295)
  // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  return _mm_mul_epu32(A, B);
 }
--- a/clang/test/CodeGen/X86/sse41-builtins.c
+++ b/clang/test/CodeGen/X86/sse41-builtins.c
@ -312,10 +312,10 @@ __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {

 __m128i test_mm_mul_epi32(__m128i x, __m128i y) {
  // CHECK-LABEL: test_mm_mul_epi32
-  // CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
-  // CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
-  // CHECK: shl <2 x i64> %{{.*}}, <i64 32, i64 32>
-  // CHECK: ashr <2 x i64> %{{.*}}, <i64 32, i64 32>
+  // CHECK: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32)
+  // CHECK: shl <2 x i64> %{{.*}}, splat (i64 32)
+  // CHECK: ashr <2 x i64> %{{.*}}, splat (i64 32)
  // CHECK: mul <2 x i64> %{{.*}}, %{{.*}}
  return _mm_mul_epi32(x, y);
 }
--- a/clang/test/CodeGen/X86/xop-builtins-cmp.c
+++ b/clang/test/CodeGen/X86/xop-builtins-cmp.c
@ -408,48 +408,48 @@ __m128i test_mm_comfalse_epi64(__m128i a, __m128i b) {

 __m128i test_mm_comtrue_epu8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epu8
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epu8(a, b);
 }

 __m128i test_mm_comtrue_epu16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epu16
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epu16(a, b);
 }

 __m128i test_mm_comtrue_epu32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epu32
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epu32(a, b);
 }

 __m128i test_mm_comtrue_epu64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epu64
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epu64(a, b);
 }

 __m128i test_mm_comtrue_epi8(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epi8
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epi8(a, b);
 }

 __m128i test_mm_comtrue_epi16(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epi16
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epi16(a, b);
 }

 __m128i test_mm_comtrue_epi32(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epi32
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epi32(a, b);
 }

 __m128i test_mm_comtrue_epi64(__m128i a, __m128i b) {
  // CHECK-LABEL: test_mm_comtrue_epi64
-  // CHECK: ret <2 x i64> <i64 -1, i64 -1>
+  // CHECK: ret <2 x i64> splat (i64 -1)
  return _mm_comtrue_epi64(a, b);
 }
--- a/clang/test/CodeGen/X86/xop-builtins.c
+++ b/clang/test/CodeGen/X86/xop-builtins.c
@ -173,7 +173,7 @@ __m128i test_mm_hsubq_epi32(__m128i a) {
 __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
  // CHECK-LABEL: test_mm_cmov_si128
  // CHECK: [[AND:%.*]] = and <2 x i64> %{{.*}}, %{{.*}}
-  // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, <i64 -1, i64 -1>
+  // CHECK: [[NEG:%.*]] = xor <2 x i64> %{{.*}}, splat (i64 -1)
  // CHECK-NEXT: [[ANDN:%.*]] = and <2 x i64> %{{.*}}, [[NEG]]
  // CHECK-NEXT: %{{.*}} = or <2 x i64> [[AND]], [[ANDN]]
  return _mm_cmov_si128(a, b, c);
@ -182,7 +182,7 @@ __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) {
 __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) {
  // CHECK-LABEL: test_mm256_cmov_si256
  // CHECK: [[AND:%.*]] = and <4 x i64> %{{.*}}, %{{.*}}
-  // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: [[NEG:%.*]] = xor <4 x i64> %{{.*}}, splat (i64 -1)
  // CHECK-NEXT: [[ANDN:%.*]] = and <4 x i64> %{{.*}}, [[NEG]]
  // CHECK-NEXT: %{{.*}} = or <4 x i64> [[AND]], [[ANDN]]
  return _mm256_cmov_si256(a, b, c);
@ -220,25 +220,25 @@ __m128i test_mm_rot_epi64(__m128i a, __m128i b) {

 __m128i test_mm_roti_epi8(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi8
-  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> splat (i8 1))
  return _mm_roti_epi8(a, 1);
 }

 __m128i test_mm_roti_epi16(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi16
-  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> <i16 50, i16 50, i16 50, i16 50, i16 50, i16 50, i16 50, i16 50>)
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 50))
  return _mm_roti_epi16(a, 50);
 }

 __m128i test_mm_roti_epi32(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi32
-  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 226, i32 226, i32 226, i32 226>)
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 226))
  return _mm_roti_epi32(a, -30);
 }

 __m128i test_mm_roti_epi64(__m128i a) {
  // CHECK-LABEL: test_mm_roti_epi64
-  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> <i64 100, i64 100>)
+  // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 100))
  return _mm_roti_epi64(a, 100);
 }

--- a/clang/test/CodeGen/aarch64-neon-3v.c
+++ b/clang/test/CodeGen/aarch64-neon-3v.c
@ -341,7 +341,7 @@ uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i8> @test_vbic_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
 // CHECK:   ret <8 x i8> [[AND_I]]
 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
@ -349,7 +349,7 @@ int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <16 x i8> @test_vbicq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[AND_I]]
 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
@ -357,7 +357,7 @@ int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i16> @test_vbic_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
 // CHECK:   ret <4 x i16> [[AND_I]]
 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
@ -365,7 +365,7 @@ int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i16> @test_vbicq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[AND_I]]
 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
@ -373,7 +373,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vbic_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
 // CHECK:   ret <2 x i32> [[AND_I]]
 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
@ -381,7 +381,7 @@ int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vbicq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[AND_I]]
 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
@ -389,7 +389,7 @@ int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbic_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
 // CHECK:   ret <1 x i64> [[AND_I]]
 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
@ -397,7 +397,7 @@ int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbicq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[AND_I]]
 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
@ -405,7 +405,7 @@ int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i8> @test_vbic_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
 // CHECK:   ret <8 x i8> [[AND_I]]
 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
@ -413,7 +413,7 @@ uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <16 x i8> @test_vbicq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[AND_I]]
 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
@ -421,7 +421,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i16> @test_vbic_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
 // CHECK:   ret <4 x i16> [[AND_I]]
 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
@ -429,7 +429,7 @@ uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i16> @test_vbicq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[AND_I]]
 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
@ -437,7 +437,7 @@ uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vbic_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
 // CHECK:   ret <2 x i32> [[AND_I]]
 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
@ -445,7 +445,7 @@ uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vbicq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[AND_I]]
 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
@ -453,7 +453,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbic_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
 // CHECK:   ret <1 x i64> [[AND_I]]
 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
@ -461,7 +461,7 @@ uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbicq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[AND_I]]
 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
@ -469,7 +469,7 @@ uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i8> @test_vorn_s8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
 // CHECK:   ret <8 x i8> [[OR_I]]
 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
@ -477,7 +477,7 @@ int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <16 x i8> @test_vornq_s8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[OR_I]]
 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
@ -485,7 +485,7 @@ int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i16> @test_vorn_s16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
 // CHECK:   ret <4 x i16> [[OR_I]]
 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
@ -493,7 +493,7 @@ int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i16> @test_vornq_s16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[OR_I]]
 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
@ -501,7 +501,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vorn_s32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
 // CHECK:   ret <2 x i32> [[OR_I]]
 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
@ -509,7 +509,7 @@ int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vornq_s32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[OR_I]]
 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
@ -517,7 +517,7 @@ int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vorn_s64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
 // CHECK:   ret <1 x i64> [[OR_I]]
 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
@ -525,7 +525,7 @@ int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vornq_s64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[OR_I]]
 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
@ -533,7 +533,7 @@ int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i8> @test_vorn_u8(<8 x i8> noundef %a, <8 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, splat (i8 -1)
 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
 // CHECK:   ret <8 x i8> [[OR_I]]
 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
@ -541,7 +541,7 @@ uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <16 x i8> @test_vornq_u8(<16 x i8> noundef %a, <16 x i8> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, splat (i8 -1)
 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
 // CHECK:   ret <16 x i8> [[OR_I]]
 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
@ -549,7 +549,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i16> @test_vorn_u16(<4 x i16> noundef %a, <4 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, splat (i16 -1)
 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
 // CHECK:   ret <4 x i16> [[OR_I]]
 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
@ -557,7 +557,7 @@ uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <8 x i16> @test_vornq_u16(<8 x i16> noundef %a, <8 x i16> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, splat (i16 -1)
 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
 // CHECK:   ret <8 x i16> [[OR_I]]
 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
@ -565,7 +565,7 @@ uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i32> @test_vorn_u32(<2 x i32> noundef %a, <2 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, splat (i32 -1)
 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
 // CHECK:   ret <2 x i32> [[OR_I]]
 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
@ -573,7 +573,7 @@ uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
 }

 // CHECK-LABEL: define{{.*}} <4 x i32> @test_vornq_u32(<4 x i32> noundef %a, <4 x i32> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, splat (i32 -1)
 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
 // CHECK:   ret <4 x i32> [[OR_I]]
 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
@ -581,7 +581,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
 }

 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vorn_u64(<1 x i64> noundef %a, <1 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, splat (i64 -1)
 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
 // CHECK:   ret <1 x i64> [[OR_I]]
 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
@ -589,7 +589,7 @@ uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
 }

 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vornq_u64(<2 x i64> noundef %a, <2 x i64> noundef %b) #0 {
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, splat (i64 -1)
 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
 // CHECK:   ret <2 x i64> [[OR_I]]
 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
--- a/clang/test/CodeGen/aarch64-neon-intrinsics.c
+++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c
--- a/clang/test/CodeGen/aarch64-neon-misc.c
+++ b/clang/test/CodeGen/aarch64-neon-misc.c
@ -1668,98 +1668,98 @@ poly8x16_t test_vcntq_p8(poly8x16_t a) {
 }

 // CHECK-LABEL: @test_vmvn_s8(
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
 // CHECK:   ret <8 x i8> [[NEG_I]]
 int8x8_t test_vmvn_s8(int8x8_t a) {
  return vmvn_s8(a);
 }

 // CHECK-LABEL: @test_vmvnq_s8(
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
 // CHECK:   ret <16 x i8> [[NEG_I]]
 int8x16_t test_vmvnq_s8(int8x16_t a) {
  return vmvnq_s8(a);
 }

 // CHECK-LABEL: @test_vmvn_s16(
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1)
 // CHECK:   ret <4 x i16> [[NEG_I]]
 int16x4_t test_vmvn_s16(int16x4_t a) {
  return vmvn_s16(a);
 }

 // CHECK-LABEL: @test_vmvnq_s16(
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1)
 // CHECK:   ret <8 x i16> [[NEG_I]]
 int16x8_t test_vmvnq_s16(int16x8_t a) {
  return vmvnq_s16(a);
 }

 // CHECK-LABEL: @test_vmvn_s32(
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1)
 // CHECK:   ret <2 x i32> [[NEG_I]]
 int32x2_t test_vmvn_s32(int32x2_t a) {
  return vmvn_s32(a);
 }

 // CHECK-LABEL: @test_vmvnq_s32(
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1)
 // CHECK:   ret <4 x i32> [[NEG_I]]
 int32x4_t test_vmvnq_s32(int32x4_t a) {
  return vmvnq_s32(a);
 }

 // CHECK-LABEL: @test_vmvn_u8(
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
 // CHECK:   ret <8 x i8> [[NEG_I]]
 uint8x8_t test_vmvn_u8(uint8x8_t a) {
  return vmvn_u8(a);
 }

 // CHECK-LABEL: @test_vmvnq_u8(
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
 // CHECK:   ret <16 x i8> [[NEG_I]]
 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
  return vmvnq_u8(a);
 }

 // CHECK-LABEL: @test_vmvn_u16(
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, splat (i16 -1)
 // CHECK:   ret <4 x i16> [[NEG_I]]
 uint16x4_t test_vmvn_u16(uint16x4_t a) {
  return vmvn_u16(a);
 }

 // CHECK-LABEL: @test_vmvnq_u16(
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, splat (i16 -1)
 // CHECK:   ret <8 x i16> [[NEG_I]]
 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
  return vmvnq_u16(a);
 }

 // CHECK-LABEL: @test_vmvn_u32(
-// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, splat (i32 -1)
 // CHECK:   ret <2 x i32> [[NEG_I]]
 uint32x2_t test_vmvn_u32(uint32x2_t a) {
  return vmvn_u32(a);
 }

 // CHECK-LABEL: @test_vmvnq_u32(
-// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, splat (i32 -1)
 // CHECK:   ret <4 x i32> [[NEG_I]]
 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
  return vmvnq_u32(a);
 }

 // CHECK-LABEL: @test_vmvn_p8(
-// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, splat (i8 -1)
 // CHECK:   ret <8 x i8> [[NEG_I]]
 poly8x8_t test_vmvn_p8(poly8x8_t a) {
  return vmvn_p8(a);
 }

 // CHECK-LABEL: @test_vmvnq_p8(
-// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, splat (i8 -1)
 // CHECK:   ret <16 x i8> [[NEG_I]]
 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
  return vmvnq_p8(a);
@ -2076,7 +2076,7 @@ uint32x4_t test_vqmovn_high_u64(uint32x2_t a, uint64x2_t b) {

 // CHECK-LABEL: @test_vshll_n_s8(
 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
-// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
 // CHECK:   ret <8 x i16> [[VSHLL_N]]
 int16x8_t test_vshll_n_s8(int8x8_t a) {
  return vshll_n_s8(a, 8);
@ -2086,7 +2086,7 @@ int16x8_t test_vshll_n_s8(int8x8_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK:   ret <4 x i32> [[VSHLL_N]]
 int32x4_t test_vshll_n_s16(int16x4_t a) {
  return vshll_n_s16(a, 16);
@ -2096,7 +2096,7 @@ int32x4_t test_vshll_n_s16(int16x4_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
+// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
 // CHECK:   ret <2 x i64> [[VSHLL_N]]
 int64x2_t test_vshll_n_s32(int32x2_t a) {
  return vshll_n_s32(a, 32);
@ -2104,7 +2104,7 @@ int64x2_t test_vshll_n_s32(int32x2_t a) {

 // CHECK-LABEL: @test_vshll_n_u8(
 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
-// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
 // CHECK:   ret <8 x i16> [[VSHLL_N]]
 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
  return vshll_n_u8(a, 8);
@ -2114,7 +2114,7 @@ uint16x8_t test_vshll_n_u8(uint8x8_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK:   ret <4 x i32> [[VSHLL_N]]
 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
  return vshll_n_u16(a, 16);
@ -2124,7 +2124,7 @@ uint32x4_t test_vshll_n_u16(uint16x4_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
+// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
 // CHECK:   ret <2 x i64> [[VSHLL_N]]
 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
  return vshll_n_u32(a, 32);
@ -2133,7 +2133,7 @@ uint64x2_t test_vshll_n_u32(uint32x2_t a) {
 // CHECK-LABEL: @test_vshll_high_n_s8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
-// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
 // CHECK:   ret <8 x i16> [[VSHLL_N]]
 int16x8_t test_vshll_high_n_s8(int8x16_t a) {
  return vshll_high_n_s8(a, 8);
@ -2144,7 +2144,7 @@ int16x8_t test_vshll_high_n_s8(int8x16_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK:   ret <4 x i32> [[VSHLL_N]]
 int32x4_t test_vshll_high_n_s16(int16x8_t a) {
  return vshll_high_n_s16(a, 16);
@ -2155,7 +2155,7 @@ int32x4_t test_vshll_high_n_s16(int16x8_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
+// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
 // CHECK:   ret <2 x i64> [[VSHLL_N]]
 int64x2_t test_vshll_high_n_s32(int32x4_t a) {
  return vshll_high_n_s32(a, 32);
@ -2164,7 +2164,7 @@ int64x2_t test_vshll_high_n_s32(int32x4_t a) {
 // CHECK-LABEL: @test_vshll_high_n_u8(
 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16>
-// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+// CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], splat (i16 8)
 // CHECK:   ret <8 x i16> [[VSHLL_N]]
 uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
  return vshll_high_n_u8(a, 8);
@ -2175,7 +2175,7 @@ uint16x8_t test_vshll_high_n_u8(uint8x16_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK:   ret <4 x i32> [[VSHLL_N]]
 uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
  return vshll_high_n_u16(a, 16);
@ -2186,7 +2186,7 @@ uint32x4_t test_vshll_high_n_u16(uint16x8_t a) {
 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8>
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
-// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 32, i64 32>
+// CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], splat (i64 32)
 // CHECK:   ret <2 x i64> [[VSHLL_N]]
 uint64x2_t test_vshll_high_n_u32(uint32x4_t a) {
  return vshll_high_n_u32(a, 32);
--- a/clang/test/CodeGen/aarch64-neon-shifts.c
+++ b/clang/test/CodeGen/aarch64-neon-shifts.c
@ -7,13 +7,13 @@

 uint8x8_t test_shift_vshr(uint8x8_t a) {
  // CHECK-LABEL: test_shift_vshr
-  // CHECK: %{{.*}} = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %{{.*}} = lshr <8 x i8> %a, splat (i8 5)
  return vshr_n_u8(a, 5);
 }

 int8x8_t test_shift_vshr_smax(int8x8_t a) {
  // CHECK-LABEL: test_shift_vshr_smax
-  // CHECK: %{{.*}} = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %{{.*}} = ashr <8 x i8> %a, splat (i8 7)
  return vshr_n_s8(a, 8);
 }

@ -25,14 +25,14 @@ uint8x8_t test_shift_vshr_umax(uint8x8_t a) {

 uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
  // CHECK-LABEL: test_shift_vsra
-  // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, splat (i8 5)
  // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]]
  return vsra_n_u8(a, b, 5);
 }

 int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
  // CHECK-LABEL: test_shift_vsra_smax
-  // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, splat (i8 7)
  // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]]
  return vsra_n_s8(a, b, 8);
 }
--- a/clang/test/CodeGen/aarch64-neon-tbl.c
+++ b/clang/test/CodeGen/aarch64-neon-tbl.c
@ -245,10 +245,10 @@ int8x16_t test_vqtbl4q_s8(int8x16x4_t a, int8x16_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8)
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
-// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
 // CHECK-NEXT:    [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
 // CHECK-NEXT:    ret <8 x i8> [[VTBX_I]]
@ -296,10 +296,10 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
 // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
-// CHECK-NEXT:    [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+// CHECK-NEXT:    [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24)
 // CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
-// CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
 // CHECK-NEXT:    [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
 // CHECK-NEXT:    ret <8 x i8> [[VTBX_I]]
@ -725,10 +725,10 @@ uint8x16_t test_vqtbl4q_u8(uint8x16x4_t a, uint8x16_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8)
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
-// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
 // CHECK-NEXT:    [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
 // CHECK-NEXT:    ret <8 x i8> [[VTBX_I]]
@ -776,10 +776,10 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
 // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
-// CHECK-NEXT:    [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+// CHECK-NEXT:    [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24)
 // CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
-// CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
 // CHECK-NEXT:    [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
 // CHECK-NEXT:    ret <8 x i8> [[VTBX_I]]
@ -1205,10 +1205,10 @@ poly8x16_t test_vqtbl4q_p8(poly8x16x4_t a, uint8x16_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[VTBL1_I:%.*]] = shufflevector <8 x i8> [[B]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL11_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[VTBL1_I]], <8 x i8> [[C]])
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 8)
 // CHECK-NEXT:    [[TMP1:%.*]] = sext <8 x i1> [[TMP0]] to <8 x i8>
 // CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[A]]
-// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i8> [[TMP1]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP4:%.*]] = and <8 x i8> [[TMP3]], [[VTBL11_I]]
 // CHECK-NEXT:    [[VTBX_I:%.*]] = or <8 x i8> [[TMP2]], [[TMP4]]
 // CHECK-NEXT:    ret <8 x i8> [[VTBX_I]]
@ -1256,10 +1256,10 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
 // CHECK-NEXT:    [[VTBL2_I:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL25_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 // CHECK-NEXT:    [[VTBL26_I:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[VTBL2_I]], <16 x i8> [[VTBL25_I]], <8 x i8> [[C]])
-// CHECK-NEXT:    [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], <i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24, i8 24>
+// CHECK-NEXT:    [[TMP4:%.*]] = icmp uge <8 x i8> [[C]], splat (i8 24)
 // CHECK-NEXT:    [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i8>
 // CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i8> [[TMP5]], [[A]]
-// CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP7:%.*]] = xor <8 x i8> [[TMP5]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP8:%.*]] = and <8 x i8> [[TMP7]], [[VTBL26_I]]
 // CHECK-NEXT:    [[VTBX_I:%.*]] = or <8 x i8> [[TMP6]], [[TMP8]]
 // CHECK-NEXT:    ret <8 x i8> [[VTBX_I]]
--- a/clang/test/CodeGen/aarch64-poly64.c
+++ b/clang/test/CodeGen/aarch64-poly64.c
@ -42,7 +42,7 @@ uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {

 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> noundef %a, <1 x i64> noundef %b, <1 x i64> noundef %c) #0 {
 // CHECK:   [[VBSL3_I:%.*]] = and <1 x i64> %a, %b
-// CHECK:   [[TMP3:%.*]] = xor <1 x i64> %a, <i64 -1>
+// CHECK:   [[TMP3:%.*]] = xor <1 x i64> %a, splat (i64 -1)
 // CHECK:   [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c
 // CHECK:   [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK:   ret <1 x i64> [[VBSL5_I]]
@ -52,7 +52,7 @@ poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {

 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c) #0 {
 // CHECK:   [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
-// CHECK:   [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
+// CHECK:   [[TMP3:%.*]] = xor <2 x i64> %a, splat (i64 -1)
 // CHECK:   [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
 // CHECK:   [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK:   ret <2 x i64> [[VBSL5_I]]
--- a/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
+++ b/clang/test/CodeGen/aarch64-sve-vls-bitwise-ops.c
@ -370,7 +370,7 @@ fixed_uint64_t xor_u64(fixed_uint64_t a, fixed_uint64_t b) {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_COERCE:%.*]] = bitcast <vscale x 16 x i1> [[TMP0:%.*]] to <vscale x 2 x i8>
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i8> @llvm.vector.extract.v8i8.nxv2i8(<vscale x 2 x i8> [[A_COERCE]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i8> [[A]], splat (i8 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 2 x i8> [[CAST_SCALABLE]] to <vscale x 16 x i1>
 // CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
@ -382,7 +382,7 @@ fixed_bool_t neg_bool(fixed_bool_t a) {
 // CHECK-LABEL: @neg_i8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
@ -393,7 +393,7 @@ fixed_int8_t neg_i8(fixed_int8_t a) {
 // CHECK-LABEL: @neg_i16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
@ -404,7 +404,7 @@ fixed_int16_t neg_i16(fixed_int16_t a) {
 // CHECK-LABEL: @neg_i32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
@ -415,7 +415,7 @@ fixed_int32_t neg_i32(fixed_int32_t a) {
 // CHECK-LABEL: @neg_i64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
@ -426,7 +426,7 @@ fixed_int64_t neg_i64(fixed_int64_t a) {
 // CHECK-LABEL: @neg_u8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <64 x i8> @llvm.vector.extract.v64i8.nxv16i8(<vscale x 16 x i8> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <64 x i8> [[A]], splat (i8 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v64i8(<vscale x 16 x i8> undef, <64 x i8> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 16 x i8> [[CAST_SCALABLE]]
 //
@ -437,7 +437,7 @@ fixed_uint8_t neg_u8(fixed_uint8_t a) {
 // CHECK-LABEL: @neg_u16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <32 x i16> @llvm.vector.extract.v32i16.nxv8i16(<vscale x 8 x i16> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i16> [[A]], splat (i16 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v32i16(<vscale x 8 x i16> undef, <32 x i16> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 8 x i16> [[CAST_SCALABLE]]
 //
@ -448,7 +448,7 @@ fixed_uint16_t neg_u16(fixed_uint16_t a) {
 // CHECK-LABEL: @neg_u32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <16 x i32> @llvm.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i32> [[A]], splat (i32 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> undef, <16 x i32> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 4 x i32> [[CAST_SCALABLE]]
 //
@ -459,7 +459,7 @@ fixed_uint32_t neg_u32(fixed_uint32_t a) {
 // CHECK-LABEL: @neg_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A:%.*]] = call <8 x i64> @llvm.vector.extract.v8i64.nxv2i64(<vscale x 2 x i64> [[A_COERCE:%.*]], i64 0)
-// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i64> [[A]], splat (i64 -1)
 // CHECK-NEXT:    [[CAST_SCALABLE:%.*]] = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v8i64(<vscale x 2 x i64> undef, <8 x i64> [[NOT]], i64 0)
 // CHECK-NEXT:    ret <vscale x 2 x i64> [[CAST_SCALABLE]]
 //
--- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c
+++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c
@ -21,7 +21,7 @@
 // CHECK-NEXT:    [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
 // CHECK-NEXT:    [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16>
 // CHECK-NEXT:    [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]]
-// CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <4 x i16> [[A]], splat (i16 -1)
 // CHECK-NEXT:    [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]]
 // CHECK-NEXT:    [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half>
@ -40,7 +40,7 @@ float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) {
 // CHECK-NEXT:    [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
 // CHECK-NEXT:    [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16>
 // CHECK-NEXT:    [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]]
-// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[TMP3:%.*]] = xor <8 x i16> [[A]], splat (i16 -1)
 // CHECK-NEXT:    [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]]
 // CHECK-NEXT:    [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]]
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half>
--- a/clang/test/CodeGen/arm-bf16-convert-intrinsics.c
+++ b/clang/test/CodeGen/arm-bf16-convert-intrinsics.c
@ -30,7 +30,7 @@
 // CHECK-A64-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8
 // CHECK-A64-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
 // CHECK-A64-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
-// CHECK-A64-NEXT:    [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A64-NEXT:    [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK-A64-NEXT:    store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 16
 // CHECK-A64-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 16
 // CHECK-A64-NEXT:    ret <4 x float> [[TMP3]]
@ -43,7 +43,7 @@
 // CHECK-A32-HARDFP-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
 // CHECK-A32-HARDFP-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
-// CHECK-A32-HARDFP-NEXT:    [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A32-HARDFP-NEXT:    [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK-A32-HARDFP-NEXT:    store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    ret <4 x float> [[TMP3]]
@ -65,7 +65,7 @@
 // CHECK-A32-SOFTFP-NEXT:    [[TMP1:%.*]] = load <4 x i16>, ptr [[__REINT_836_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8>
 // CHECK-A32-SOFTFP-NEXT:    [[TMP3:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A32-SOFTFP-NEXT:    [[VSHLL_N_I:%.*]] = shl <4 x i32> [[TMP3]], splat (i32 16)
 // CHECK-A32-SOFTFP-NEXT:    store <4 x i32> [[VSHLL_N_I]], ptr [[__REINT1_836_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    [[TMP4:%.*]] = load <4 x float>, ptr [[__REINT1_836_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    ret <4 x float> [[TMP4]]
@ -83,7 +83,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
 // CHECK-A64-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8
 // CHECK-A64-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
 // CHECK-A64-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
-// CHECK-A64-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A64-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK-A64-NEXT:    store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 16
 // CHECK-A64-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 16
 // CHECK-A64-NEXT:    ret <4 x float> [[TMP3]]
@ -97,7 +97,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
 // CHECK-A32-HARDFP-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
 // CHECK-A32-HARDFP-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
-// CHECK-A32-HARDFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A32-HARDFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK-A32-HARDFP-NEXT:    store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    ret <4 x float> [[TMP3]]
@ -138,7 +138,7 @@ float32x4_t test_vcvt_f32_bf16(bfloat16x4_t a) {
 // CHECK-A32-SOFTFP-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
 // CHECK-A32-SOFTFP-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A32-SOFTFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
 // CHECK-A32-SOFTFP-NEXT:    store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    ret <4 x float> [[TMP8]]
@ -156,7 +156,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
 // CHECK-A64-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8
 // CHECK-A64-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
 // CHECK-A64-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
-// CHECK-A64-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A64-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK-A64-NEXT:    store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 16
 // CHECK-A64-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 16
 // CHECK-A64-NEXT:    ret <4 x float> [[TMP3]]
@ -170,7 +170,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
 // CHECK-A32-HARDFP-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
 // CHECK-A32-HARDFP-NEXT:    [[TMP2:%.*]] = sext <4 x i16> [[TMP0]] to <4 x i32>
-// CHECK-A32-HARDFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A32-HARDFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP2]], splat (i32 16)
 // CHECK-A32-HARDFP-NEXT:    store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-HARDFP-NEXT:    ret <4 x float> [[TMP3]]
@ -211,7 +211,7 @@ float32x4_t test_vcvtq_low_f32_bf16(bfloat16x8_t a) {
 // CHECK-A32-SOFTFP-NEXT:    [[TMP5:%.*]] = load <4 x i16>, ptr [[__REINT_836_I_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
 // CHECK-A32-SOFTFP-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
-// CHECK-A32-SOFTFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], <i32 16, i32 16, i32 16, i32 16>
+// CHECK-A32-SOFTFP-NEXT:    [[VSHLL_N_I_I:%.*]] = shl <4 x i32> [[TMP7]], splat (i32 16)
 // CHECK-A32-SOFTFP-NEXT:    store <4 x i32> [[VSHLL_N_I_I]], ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    [[TMP8:%.*]] = load <4 x float>, ptr [[__REINT1_836_I_I]], align 8
 // CHECK-A32-SOFTFP-NEXT:    ret <4 x float> [[TMP8]]
--- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
@ -84,7 +84,7 @@ int32x4_t test_vabsq_s32(int32x4_t a)

 // CHECK-LABEL: @test_vmvnq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], splat (i8 -1)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vmvnq_s8(int8x16_t a)
@ -98,7 +98,7 @@ int8x16_t test_vmvnq_s8(int8x16_t a)

 // CHECK-LABEL: @test_vmvnq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], splat (i16 -1)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vmvnq_s16(int16x8_t a)
@ -112,7 +112,7 @@ int16x8_t test_vmvnq_s16(int16x8_t a)

 // CHECK-LABEL: @test_vmvnq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vmvnq_s32(int32x4_t a)
@ -126,7 +126,7 @@ int32x4_t test_vmvnq_s32(int32x4_t a)

 // CHECK-LABEL: @test_vmvnq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], splat (i8 -1)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vmvnq_u8(uint8x16_t a)
@ -140,7 +140,7 @@ uint8x16_t test_vmvnq_u8(uint8x16_t a)

 // CHECK-LABEL: @test_vmvnq_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[A:%.*]], splat (i16 -1)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vmvnq_u16(uint16x8_t a)
@ -154,7 +154,7 @@ uint16x8_t test_vmvnq_u16(uint16x8_t a)

 // CHECK-LABEL: @test_vmvnq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vmvnq_u32(uint32x4_t a)
@ -431,9 +431,9 @@ int32x4_t test_vnegq_s32(int32x4_t a)
 // CHECK-LABEL: @test_vqabsq_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <16 x i8> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <16 x i8> [[A]], splat (i8 -128)
 // CHECK-NEXT:    [[TMP2:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
-// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> splat (i8 127), <16 x i8> [[TMP2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> [[A]], <16 x i8> [[TMP3]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP4]]
 //
@ -449,9 +449,9 @@ int8x16_t test_vqabsq_s8(int8x16_t a)
 // CHECK-LABEL: @test_vqabsq_s16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <8 x i16> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> [[A]], splat (i16 -32768)
 // CHECK-NEXT:    [[TMP2:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
-// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[A]], <8 x i16> [[TMP3]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP4]]
 //
@ -467,9 +467,9 @@ int16x8_t test_vqabsq_s16(int16x8_t a)
 // CHECK-LABEL: @test_vqabsq_s32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt <4 x i32> [[A:%.*]], zeroinitializer
-// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+// CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[A]], splat (i32 -2147483648)
 // CHECK-NEXT:    [[TMP2:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
-// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[A]], <4 x i32> [[TMP3]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP4]]
 //
@ -484,9 +484,9 @@ int32x4_t test_vqabsq_s32(int32x4_t a)

 // CHECK-LABEL: @test_vqnegq_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <16 x i8> [[A:%.*]], splat (i8 -128)
 // CHECK-NEXT:    [[TMP1:%.*]] = sub <16 x i8> zeroinitializer, [[A]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127>, <16 x i8> [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP0]], <16 x i8> splat (i8 127), <16 x i8> [[TMP1]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 //
 int8x16_t test_vqnegq_s8(int8x16_t a)
@ -500,9 +500,9 @@ int8x16_t test_vqnegq_s8(int8x16_t a)

 // CHECK-LABEL: @test_vqnegq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768>
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <8 x i16> [[A:%.*]], splat (i16 -32768)
 // CHECK-NEXT:    [[TMP1:%.*]] = sub <8 x i16> zeroinitializer, [[A]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767>, <8 x i16> [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> splat (i16 32767), <8 x i16> [[TMP1]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vqnegq_s16(int16x8_t a)
@ -516,9 +516,9 @@ int16x8_t test_vqnegq_s16(int16x8_t a)

 // CHECK-LABEL: @test_vqnegq_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+// CHECK-NEXT:    [[TMP0:%.*]] = icmp eq <4 x i32> [[A:%.*]], splat (i32 -2147483648)
 // CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> zeroinitializer, [[A]]
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>, <4 x i32> [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> splat (i32 2147483647), <4 x i32> [[TMP1]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vqnegq_s32(int32x4_t a)
--- a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c
@ -8,7 +8,7 @@

 // CHECK-LABEL: @test_vbicq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007>
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 11007)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vbicq_n_s16(int16x8_t a)
@ -22,7 +22,7 @@ int16x8_t test_vbicq_n_s16(int16x8_t a)

 // CHECK-LABEL: @test_vbicq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -252, i32 -252, i32 -252, i32 -252>
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -252)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vbicq_n_s32(int32x4_t a)
@ -36,7 +36,7 @@ int32x4_t test_vbicq_n_s32(int32x4_t a)

 // CHECK-LABEL: @test_vbicq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243, i16 -243>
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -243)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vbicq_n_u16(uint16x8_t a)
@ -50,7 +50,7 @@ uint16x8_t test_vbicq_n_u16(uint16x8_t a)

 // CHECK-LABEL: @test_vbicq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -8193, i32 -8193, i32 -8193, i32 -8193>
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -8193)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vbicq_n_u32(uint32x4_t a)
@ -64,7 +64,7 @@ uint32x4_t test_vbicq_n_u32(uint32x4_t a)

 // CHECK-LABEL: @test_vorrq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195, i16 195>
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 195)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vorrq_n_s16(int16x8_t a)
@ -78,7 +78,7 @@ int16x8_t test_vorrq_n_s16(int16x8_t a)

 // CHECK-LABEL: @test_vorrq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 65536, i32 65536, i32 65536, i32 65536>
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 65536)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vorrq_n_s32(int32x4_t a)
@ -92,7 +92,7 @@ int32x4_t test_vorrq_n_s32(int32x4_t a)

 // CHECK-LABEL: @test_vorrq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096>
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 -4096)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vorrq_n_u16(uint16x8_t a)
@ -106,7 +106,7 @@ uint16x8_t test_vorrq_n_u16(uint16x8_t a)

 // CHECK-LABEL: @test_vorrq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 8978432, i32 8978432, i32 8978432, i32 8978432>
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 8978432)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vorrq_n_u32(uint32x4_t a)
@ -120,7 +120,7 @@ uint32x4_t test_vorrq_n_u32(uint32x4_t a)

 // CHECK-LABEL: @test_vmvnq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <8 x i16> <i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391, i16 27391>
+// CHECK-NEXT:    ret <8 x i16> splat (i16 27391)
 //
 int16x8_t test_vmvnq_n_s16()
 {
@ -129,7 +129,7 @@ int16x8_t test_vmvnq_n_s16()

 // CHECK-LABEL: @test_vmvnq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 -5570561, i32 -5570561, i32 -5570561, i32 -5570561>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 -5570561)
 //
 int32x4_t test_vmvnq_n_s32()
 {
@ -138,7 +138,7 @@ int32x4_t test_vmvnq_n_s32()

 // CHECK-LABEL: @test_vmvnq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <8 x i16> <i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689, i16 -18689>
+// CHECK-NEXT:    ret <8 x i16> splat (i16 -18689)
 //
 uint16x8_t test_vmvnq_n_u16()
 {
@ -147,7 +147,7 @@ uint16x8_t test_vmvnq_n_u16()

 // CHECK-LABEL: @test_vmvnq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 1023410175, i32 1023410175, i32 1023410175, i32 1023410175>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 1023410175)
 //
 uint32x4_t test_vmvnq_n_u32()
 {
@ -158,7 +158,7 @@ uint32x4_t test_vmvnq_n_u32()
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], <i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265, i16 -11265>
+// CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -11265)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
@ -175,7 +175,7 @@ int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], <i32 -13893633, i32 -13893633, i32 -13893633, i32 -13893633>
+// CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -13893633)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@ -192,7 +192,7 @@ int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], <i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37, i16 -37>
+// CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 -37)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
@ -209,7 +209,7 @@ uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1644167169, i32 -1644167169, i32 -1644167169, i32 -1644167169>
+// CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -1644167169)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@ -226,7 +226,7 @@ uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], <i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568, i16 13568>
+// CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 13568)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
@ -243,7 +243,7 @@ int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], <i32 654311424, i32 654311424, i32 654311424, i32 654311424>
+// CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 654311424)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@ -260,7 +260,7 @@ int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], <i16 175, i16 175, i16 175, i16 175, i16 175, i16 175, i16 175, i16 175>
+// CHECK-NEXT:    [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 175)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
 //
@ -277,7 +277,7 @@ uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], <i32 89, i32 89, i32 89, i32 89>
+// CHECK-NEXT:    [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 89)
 // CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 //
@ -294,7 +294,7 @@ uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841, i16 -3841>, <8 x i16> [[INACTIVE:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 -3841), <8 x i16> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p)
@ -310,7 +310,7 @@ int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -18945, i32 -18945, i32 -18945, i32 -18945>, <4 x i32> [[INACTIVE:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -18945), <4 x i32> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p)
@ -326,7 +326,7 @@ int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295, i16 23295>, <8 x i16> [[INACTIVE:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 23295), <8 x i16> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p)
@ -342,7 +342,7 @@ uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -63489, i32 -63489, i32 -63489, i32 -63489>, <4 x i32> [[INACTIVE:%.*]]
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -63489), <4 x i32> [[INACTIVE:%.*]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p)
@ -358,7 +358,7 @@ uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767, i16 767>, <8 x i16> undef
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 767), <8 x i16> undef
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p)
@ -370,7 +370,7 @@ int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -12189697, i32 -12189697, i32 -12189697, i32 -12189697>, <4 x i32> undef
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -12189697), <4 x i32> undef
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p)
@ -382,7 +382,7 @@ int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> <i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505, i16 -21505>, <8 x i16> undef
+// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> splat (i16 -21505), <8 x i16> undef
 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 //
 uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p)
@ -394,7 +394,7 @@ uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -4865, i32 -4865, i32 -4865, i32 -4865>, <4 x i32> undef
+// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> splat (i32 -4865), <4 x i32> undef
 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 //
 uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p)
--- a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
+++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
@ -8,7 +8,7 @@

 // CHECK-LABEL: @_Z16test_vbicq_n_s1617__simd128_int16_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], <i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007, i16 11007>
+// CHECK-NEXT:    [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], splat (i16 11007)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vbicq_n_s16(int16x8_t a)
@ -22,7 +22,7 @@ int16x8_t test_vbicq_n_s16(int16x8_t a)

 // CHECK-LABEL: @_Z16test_vbicq_n_u3218__simd128_uint32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], <i32 -8193, i32 -8193, i32 -8193, i32 -8193>
+// CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], splat (i32 -8193)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vbicq_n_u32(uint32x4_t a)
@ -36,7 +36,7 @@ uint32x4_t test_vbicq_n_u32(uint32x4_t a)

 // CHECK-LABEL: @_Z16test_vorrq_n_s3217__simd128_int32_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], <i32 65536, i32 65536, i32 65536, i32 65536>
+// CHECK-NEXT:    [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], splat (i32 65536)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vorrq_n_s32(int32x4_t a)
@ -50,7 +50,7 @@ int32x4_t test_vorrq_n_s32(int32x4_t a)

 // CHECK-LABEL: @_Z16test_vorrq_n_u1618__simd128_uint16_t(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], <i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096, i16 -4096>
+// CHECK-NEXT:    [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], splat (i16 -4096)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vorrq_n_u16(uint16x8_t a)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vbicq.c
@ -8,7 +8,7 @@

 // CHECK-LABEL: @test_vbicq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP1:%.*]] = and <16 x i8> [[A:%.*]], [[TMP0]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 //
@ -23,7 +23,7 @@ uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b)

 // CHECK-LABEL: @test_vbicq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], splat (i16 -1)
 // CHECK-NEXT:    [[TMP1:%.*]] = and <8 x i16> [[A:%.*]], [[TMP0]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
@ -38,7 +38,7 @@ int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b)

 // CHECK-LABEL: @test_vbicq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1)
 // CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[A:%.*]], [[TMP0]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@ -55,7 +55,7 @@ uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -1)
 // CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[TMP0]], [[TMP2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float>
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
--- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
@ -8,7 +8,7 @@

 // CHECK-LABEL: @test_vshlq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], splat (i8 5)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vshlq_n_s8(int8x16_t a)
@ -22,7 +22,7 @@ int8x16_t test_vshlq_n_s8(int8x16_t a)

 // CHECK-LABEL: @test_vshlq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5>
+// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], splat (i16 5)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vshlq_n_s16(int16x8_t a)
@ -36,7 +36,7 @@ int16x8_t test_vshlq_n_s16(int16x8_t a)

 // CHECK-LABEL: @test_vshlq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 18, i32 18, i32 18, i32 18>
+// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], splat (i32 18)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vshlq_n_s32(int32x4_t a)
@ -92,7 +92,7 @@ int32x4_t test_vshlq_n_s32_trivial(int32x4_t a)

 // CHECK-LABEL: @test_vshlq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
+// CHECK-NEXT:    [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], splat (i8 3)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vshlq_n_u8(uint8x16_t a)
@ -106,7 +106,7 @@ uint8x16_t test_vshlq_n_u8(uint8x16_t a)

 // CHECK-LABEL: @test_vshlq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+// CHECK-NEXT:    [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], splat (i16 11)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vshlq_n_u16(uint16x8_t a)
@ -120,7 +120,7 @@ uint16x8_t test_vshlq_n_u16(uint16x8_t a)

 // CHECK-LABEL: @test_vshlq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7>
+// CHECK-NEXT:    [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], splat (i32 7)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vshlq_n_u32(uint32x4_t a)
@ -176,7 +176,7 @@ uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a)

 // CHECK-LABEL: @test_vshrq_n_s8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>
+// CHECK-NEXT:    [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], splat (i8 4)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vshrq_n_s8(int8x16_t a)
@ -190,7 +190,7 @@ int8x16_t test_vshrq_n_s8(int8x16_t a)

 // CHECK-LABEL: @test_vshrq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+// CHECK-NEXT:    [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], splat (i16 10)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vshrq_n_s16(int16x8_t a)
@ -204,7 +204,7 @@ int16x8_t test_vshrq_n_s16(int16x8_t a)

 // CHECK-LABEL: @test_vshrq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 19, i32 19, i32 19, i32 19>
+// CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], splat (i32 19)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vshrq_n_s32(int32x4_t a)
@ -218,7 +218,7 @@ int32x4_t test_vshrq_n_s32(int32x4_t a)

 // CHECK-LABEL: @test_vshrq_n_s8_trivial(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+// CHECK-NEXT:    [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], splat (i8 7)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 int8x16_t test_vshrq_n_s8_trivial(int8x16_t a)
@ -232,7 +232,7 @@ int8x16_t test_vshrq_n_s8_trivial(int8x16_t a)

 // CHECK-LABEL: @test_vshrq_n_s16_trivial(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+// CHECK-NEXT:    [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], splat (i16 15)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 int16x8_t test_vshrq_n_s16_trivial(int16x8_t a)
@ -246,7 +246,7 @@ int16x8_t test_vshrq_n_s16_trivial(int16x8_t a)

 // CHECK-LABEL: @test_vshrq_n_s32_trivial(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31>
+// CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], splat (i32 31)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 int32x4_t test_vshrq_n_s32_trivial(int32x4_t a)
@ -260,7 +260,7 @@ int32x4_t test_vshrq_n_s32_trivial(int32x4_t a)

 // CHECK-LABEL: @test_vshrq_n_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+// CHECK-NEXT:    [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], splat (i8 1)
 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vshrq_n_u8(uint8x16_t a)
@ -274,7 +274,7 @@ uint8x16_t test_vshrq_n_u8(uint8x16_t a)

 // CHECK-LABEL: @test_vshrq_n_u16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+// CHECK-NEXT:    [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], splat (i16 10)
 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vshrq_n_u16(uint16x8_t a)
@ -288,7 +288,7 @@ uint16x8_t test_vshrq_n_u16(uint16x8_t a)

 // CHECK-LABEL: @test_vshrq_n_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 10, i32 10, i32 10, i32 10>
+// CHECK-NEXT:    [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], splat (i32 10)
 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
 //
 uint32x4_t test_vshrq_n_u32(uint32x4_t a)
--- a/clang/test/CodeGen/arm-mve-intrinsics/vornq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vornq.c
@ -8,7 +8,7 @@

 // CHECK-LABEL: @test_vornq_u8(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <16 x i8> [[B:%.*]], splat (i8 -1)
 // CHECK-NEXT:    [[TMP1:%.*]] = or <16 x i8> [[A:%.*]], [[TMP0]]
 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
 //
@ -23,7 +23,7 @@ uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b)

 // CHECK-LABEL: @test_vornq_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <8 x i16> [[B:%.*]], splat (i16 -1)
 // CHECK-NEXT:    [[TMP1:%.*]] = or <8 x i16> [[A:%.*]], [[TMP0]]
 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
 //
@ -38,7 +38,7 @@ int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b)

 // CHECK-LABEL: @test_vornq_u32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[TMP0:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1)
 // CHECK-NEXT:    [[TMP1:%.*]] = or <4 x i32> [[A:%.*]], [[TMP0]]
 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 //
@ -55,7 +55,7 @@ uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B:%.*]] to <4 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], splat (i32 -1)
 // CHECK-NEXT:    [[TMP3:%.*]] = or <4 x i32> [[TMP0]], [[TMP2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <4 x float>
 // CHECK-NEXT:    ret <4 x float> [[TMP4]]
--- a/clang/test/CodeGen/arm-neon-shifts.c
+++ b/clang/test/CodeGen/arm-neon-shifts.c
@ -9,13 +9,13 @@

 uint8x8_t test_shift_vshr(uint8x8_t a) {
  // CHECK-LABEL: test_shift_vshr
-  // CHECK: %{{.*}} = lshr <8 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %{{.*}} = lshr <8 x i8> %a, splat (i8 5)
  return vshr_n_u8(a, 5);
 }

 int8x8_t test_shift_vshr_smax(int8x8_t a) {
  // CHECK-LABEL: test_shift_vshr_smax
-  // CHECK: %{{.*}} = ashr <8 x i8> %a, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %{{.*}} = ashr <8 x i8> %a, splat (i8 7)
  return vshr_n_s8(a, 8);
 }

@ -27,14 +27,14 @@ uint8x8_t test_shift_vshr_umax(uint8x8_t a) {

 uint8x8_t test_shift_vsra(uint8x8_t a, uint8x8_t b) {
  // CHECK-LABEL: test_shift_vsra
-  // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5>
+  // CHECK: %[[SHR:.*]] = lshr <8 x i8> %b, splat (i8 5)
  // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]]
  return vsra_n_u8(a, b, 5);
 }

 int8x8_t test_shift_vsra_smax(int8x8_t a, int8x8_t b) {
  // CHECK-LABEL: test_shift_vsra_smax
-  // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+  // CHECK: %[[SHR:.*]] = ashr <8 x i8> %b, splat (i8 7)
  // CHECK: %{{.*}} = add <8 x i8> %a, %[[SHR]]
  return vsra_n_s8(a, b, 8);
 }
--- a/clang/test/CodeGen/arm_neon_intrinsics.c
+++ b/clang/test/CodeGen/arm_neon_intrinsics.c
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@ -962,7 +962,7 @@ void test_builtin_elementwise_copysign(float f1, float f2, double d1, double d2,
  f1 = __builtin_elementwise_copysign(2.0f, f1);

  // CHECK:      [[V2F64:%.+]] = load <2 x double>, ptr %v2f64.addr, align 16
-  // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> <double 1.000000e+00, double 1.000000e+00>, <2 x double> [[V2F64]])
+  // CHECK-NEXT: call <2 x double> @llvm.copysign.v2f64(<2 x double> splat (double 1.000000e+00), <2 x double> [[V2F64]])
  v2f64 = __builtin_elementwise_copysign((double2)1.0, v2f64);
 }

@ -1035,7 +1035,7 @@ void test_builtin_elementwise_fma(float f32, double f64,

  // CHECK:      [[V2F16_0:%.+]] = load <2 x half>, ptr %v2f16.addr
  // CHECK-NEXT: [[V2F16_1:%.+]] = load <2 x half>, ptr %v2f16.addr
-  // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> <half 0xH4400, half 0xH4400>)
+  // CHECK-NEXT: call <2 x half> @llvm.fma.v2f16(<2 x half> [[V2F16_0]], <2 x half> [[V2F16_1]], <2 x half> splat (half 0xH4400))
  half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);

 }
--- a/clang/test/CodeGen/builtins-nvptx.c
+++ b/clang/test/CodeGen/builtins-nvptx.c
@ -999,13 +999,13 @@ __device__ void nvvm_cvt_sm89() {
  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.ff.to.e5m2x2.rn.relu(float 1.000000e+00, float 1.000000e+00)
  __nvvm_ff_to_e5m2x2_rn_relu(1.0f, 1.0f);

-  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> <half 0xH3C00, half 0xH3C00>)
+  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn(<2 x half> splat (half 0xH3C00))
  __nvvm_f16x2_to_e4m3x2_rn({1.0f16, 1.0f16});
-  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> <half 0xH3C00, half 0xH3C00>)
+  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e4m3x2.rn.relu(<2 x half> splat (half 0xH3C00))
  __nvvm_f16x2_to_e4m3x2_rn_relu({1.0f16, 1.0f16});
-  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> <half 0xH3C00, half 0xH3C00>)
+  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn(<2 x half> splat (half 0xH3C00))
  __nvvm_f16x2_to_e5m2x2_rn({1.0f16, 1.0f16});
-  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> <half 0xH3C00, half 0xH3C00>)
+  // CHECK_PTX81_SM89: call i16 @llvm.nvvm.f16x2.to.e5m2x2.rn.relu(<2 x half> splat (half 0xH3C00))
  __nvvm_f16x2_to_e5m2x2_rn_relu({1.0f16, 1.0f16});

  // CHECK_PTX81_SM89: call <2 x half> @llvm.nvvm.e4m3x2.to.f16x2.rn(i16 18504)
@ -1035,12 +1035,12 @@ __device__ void nvvm_abs_neg_bf16_bf16x2_sm80() {

  // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.abs.bf16(bfloat 0xR3DCD)
  __nvvm_abs_bf16(BF16);
-  // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> <bfloat 0xR3DCD, bfloat 0xR3DCD>)
+  // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD))
  __nvvm_abs_bf16x2(BF16X2);

  // CHECK_PTX70_SM80: call bfloat @llvm.nvvm.neg.bf16(bfloat 0xR3DCD)
  __nvvm_neg_bf16(BF16);
-  // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> <bfloat 0xR3DCD, bfloat 0xR3DCD>)
+  // CHECK_PTX70_SM80: call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> splat (bfloat 0xR3DCD))
  __nvvm_neg_bf16x2(BF16X2);
 #endif
  // CHECK: ret void
--- a/clang/test/CodeGen/builtinshufflevector2.c
+++ b/clang/test/CodeGen/builtinshufflevector2.c
@ -5,7 +5,7 @@ typedef unsigned int uint4 __attribute__((ext_vector_type(4)));

 // CHECK-LABEL: define {{.*}}void @clang_shufflevector_v_v(
 void clang_shufflevector_v_v( float4* A, float4 x, uint4 mask ) {
-// CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, <i32 3, i32 3, i32 3, i32 3>
+// CHECK: [[MASK:%.*]] = and <4 x i32> {{%.*}}, splat (i32 3)
 // CHECK: [[I:%.*]] = extractelement <4 x i32> [[MASK]], i{{[0-9]+}} 0
 // CHECK: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i{{[0-9]+}} [[I]]
 //
--- a/clang/test/CodeGen/const-init.c
+++ b/clang/test/CodeGen/const-init.c
@ -139,7 +139,7 @@ void g28(void) {
  typedef long long v1i64 __attribute((vector_size(8)));
  typedef short v12i16 __attribute((vector_size(24)));
  typedef long double v2f80 __attribute((vector_size(24)));
-  // CHECK: @g28.a = internal global <1 x i64> <i64 10>
+  // CHECK: @g28.a = internal global <1 x i64> splat (i64 10)
  // @g28.b = internal global <12 x i16> <i16 0, i16 0, i16 0, i16 -32768, i16 16383, i16 0, i16 0, i16 0, i16 0, i16 -32768, i16 16384, i16 0>
  // @g28.c = internal global <2 x x86_fp80> <x86_fp80 0xK3FFF8000000000000000, x86_fp80 0xK40008000000000000000>, align 32
  static v1i64 a = (v1i64)10LL;
--- a/clang/test/CodeGen/matrix-type-operators.c
+++ b/clang/test/CodeGen/matrix-type-operators.c
@ -790,7 +790,7 @@ void multiply_compound_int_matrix_scalar_ull(ix9x3_t b, unsigned long long s) {
 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
+// CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    ret void
 //
@ -804,7 +804,7 @@ void multiply_float_matrix_constant(fx2x3_t a) {
 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
+// CHECK-NEXT:    [[RES:%.*]] = fmul <6 x float> [[MAT]], splat (float 2.500000e+00)
 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    ret void
 void multiply_compound_float_matrix_constant(fx2x3_t a) {
@ -817,7 +817,7 @@ void multiply_compound_float_matrix_constant(fx2x3_t a) {
 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, [[MAT]]
+// CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> splat (i32 5), [[MAT]]
 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    ret void
 //
@ -831,7 +831,7 @@ void multiply_int_matrix_constant(ix9x3_t a) {
 // CHECK-NEXT:    store <27 x i32> [[A:%.*]], ptr [[A_ADDR]], align 4
 // NOOPT-NEXT:    [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <27 x i32>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
+// CHECK-NEXT:    [[RES:%.*]] = mul <27 x i32> [[MAT]], splat (i32 5)
 // CHECK-NEXT:    store <27 x i32> [[RES]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    ret void
 //
@ -939,7 +939,7 @@ void divide_ull_matrix_scalar_ull(ullx4x2_t b, unsigned long long s) {
 // CHECK-NEXT:    store <6 x float> [[A:%.*]], ptr [[A_ADDR]], align 4
 // NOOPT-NEXT:    [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4{{$}}
 // OPT-NEXT:      [[MAT:%.*]] = load <6 x float>, ptr [[A_ADDR]], align 4, !tbaa !{{[0-9]+}}{{$}}
-// CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], <float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00, float 2.500000e+00>
+// CHECK-NEXT:    [[RES:%.*]] = fdiv <6 x float> [[MAT]], splat (float 2.500000e+00)
 // CHECK-NEXT:    store <6 x float> [[RES]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    ret void
 //
--- a/clang/test/CodeGen/neon-immediate-ubsan.c
+++ b/clang/test/CodeGen/neon-immediate-ubsan.c
@ -21,6 +21,6 @@
 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
  // CHECK-LABEL: @test_vqrshrn_n_s64
  // CHECK-AARCH64: call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> {{.*}}, i32 1)
-  // CHECK-ARMV7: call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> {{.*}}, <2 x i64> <i64 -1, i64 -1>)
+  // CHECK-ARMV7: call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> {{.*}}, <2 x i64> splat (i64 -1))
  return vqrshrn_n_s64(a, 0 + 1);
 }
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@ -103,7 +103,7 @@ float defined_func_f32(float a, float b, float c) {
 // CFINITEONLY-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16
 // CFINITEONLY-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16
 // CFINITEONLY-NEXT:    [[TMP3:%.*]] = call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
-// CFINITEONLY-NEXT:    [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
+// CFINITEONLY-NEXT:    [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP3]], splat (double 4.000000e+00)
 // CFINITEONLY-NEXT:    ret <2 x double> [[ADD]]
 //
 // CLFINITEONLY: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
@ -111,7 +111,7 @@ float defined_func_f32(float a, float b, float c) {
 // CLFINITEONLY-SAME: (<2 x double> noundef nofpclass(nan inf) [[A:%.*]], <2 x double> noundef nofpclass(nan inf) [[B:%.*]], <2 x double> noundef nofpclass(nan inf) [[C:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
 // CLFINITEONLY-NEXT:    [[TMP0:%.*]] = tail call nnan ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[A]], <2 x double> [[B]], <2 x double> [[C]])
-// CLFINITEONLY-NEXT:    [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], <double 4.000000e+00, double 4.000000e+00>
+// CLFINITEONLY-NEXT:    [[ADD:%.*]] = fadd nnan ninf <2 x double> [[TMP0]], splat (double 4.000000e+00)
 // CLFINITEONLY-NEXT:    ret <2 x double> [[ADD]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@ -128,7 +128,7 @@ float defined_func_f32(float a, float b, float c) {
 // NONANS-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16
 // NONANS-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16
 // NONANS-NEXT:    [[TMP3:%.*]] = call nnan <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
-// NONANS-NEXT:    [[ADD:%.*]] = fadd nnan <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
+// NONANS-NEXT:    [[ADD:%.*]] = fadd nnan <2 x double> [[TMP3]], splat (double 4.000000e+00)
 // NONANS-NEXT:    ret <2 x double> [[ADD]]
 //
 // NOINFS: Function Attrs: noinline nounwind optnone
@ -145,7 +145,7 @@ float defined_func_f32(float a, float b, float c) {
 // NOINFS-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16
 // NOINFS-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[C_ADDR]], align 16
 // NOINFS-NEXT:    [[TMP3:%.*]] = call ninf <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
-// NOINFS-NEXT:    [[ADD:%.*]] = fadd ninf <2 x double> [[TMP3]], <double 4.000000e+00, double 4.000000e+00>
+// NOINFS-NEXT:    [[ADD:%.*]] = fadd ninf <2 x double> [[TMP3]], splat (double 4.000000e+00)
 // NOINFS-NEXT:    ret <2 x double> [[ADD]]
 //
 double2 defined_func_v2f64(double2 a, double2 b, double2 c) {
--- a/clang/test/CodeGen/ppc-vec_ct-truncate.c
+++ b/clang/test/CodeGen/ppc-vec_ct-truncate.c
@ -30,11 +30,11 @@ void test(void) {

  res_vsi = vec_cts(a1, 31);
  //  CHECK:       [[TMP0:%.*]] = load <2 x double>, ptr @a1, align 16
-  //  CHECK-NEXT:  fmul <2 x double> [[TMP0]], <double 0x41E0000000000000, double 0x41E0000000000000>
+  //  CHECK-NEXT:  fmul <2 x double> [[TMP0]], splat (double 0x41E0000000000000)

  res_vsi = vec_cts(a1, 500);
  // CHECK:        [[TMP4:%.*]] = load <2 x double>, ptr @a1, align 16
-  // CHECK-NEXT:   fmul <2 x double> [[TMP4]], <double 0x4130000000000000, double 0x4130000000000000>
+  // CHECK-NEXT:   fmul <2 x double> [[TMP4]], splat (double 0x4130000000000000)

  res_vsi = vec_ctu(vf1, 31);
  // CHECK:        [[TMP8:%.*]] = load <4 x float>, ptr @vf1, align 16
@ -46,19 +46,19 @@ void test(void) {

  res_vull = vec_ctul(vf1, 31);
  // CHECK:        [[TMP12:%.*]] = load <4 x float>, ptr @vf1, align 16
-  // CHECK-NEXT:   fmul <4 x float> [[TMP12]], <float 0x41E0000000000000, float 0x41E0000000000000, float 0x41E0000000000000, float 0x41E0000000000000>
+  // CHECK-NEXT:   fmul <4 x float> [[TMP12]], splat (float 0x41E0000000000000)

  res_vull = vec_ctul(vf1, 500);
  // CHECK:        [[TMP21:%.*]] = load <4 x float>, ptr @vf1, align 16
-  // CHECK-NEXT:   fmul <4 x float> [[TMP21]], <float 0x4130000000000000, float 0x4130000000000000, float 0x4130000000000000, float 0x4130000000000000>
+  // CHECK-NEXT:   fmul <4 x float> [[TMP21]], splat (float 0x4130000000000000)

  res_vsll = vec_ctsl(vf1, 31);
  // CHECK:        [[TMP30:%.*]] = load <4 x float>, ptr @vf1, align 16
-  // CHECK-NEXT:   fmul <4 x float> [[TMP30]], <float 0x41E0000000000000, float 0x41E0000000000000, float 0x41E0000000000000, float 0x41E0000000000000>
+  // CHECK-NEXT:   fmul <4 x float> [[TMP30]], splat (float 0x41E0000000000000)

  res_vsll = vec_ctsl(vf1, 500);
  // CHECK:        [[TMP39:%.*]] = load <4 x float>, ptr @vf1, align 16
-  // CHECK-NEXT:   fmul <4 x float> [[TMP39]], <float 0x4130000000000000, float 0x4130000000000000, float 0x4130000000000000, float 0x4130000000000000>
+  // CHECK-NEXT:   fmul <4 x float> [[TMP39]], splat (float 0x4130000000000000)

  res_vf = vec_ctf(vsi1, 31);
  // CHECK:        [[TMP48:%.*]] = load <4 x i32>, ptr @vsi1, align 16
@ -71,10 +71,10 @@ void test(void) {
  res_vd = vec_ctd(vsi1, 31);
  // CHECK:        [[TMP53:%.*]] = load <4 x i32>, ptr @vsi1, align 16
  // CHECK:        [[TMP83:%.*]] = call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> [[TMP82:%.*]])
-  // CHECK-NEXT:   fmul <2 x double> [[TMP83]], <double 0x3E00000000000000, double 0x3E00000000000000>
+  // CHECK-NEXT:   fmul <2 x double> [[TMP83]], splat (double 0x3E00000000000000)

  res_vd = vec_ctd(vsi1, 500);
  // CHECK:        [[TMP84:%.*]] = load <4 x i32>, ptr @vsi1, align 16
  // CHECK:        [[TMP115:%.*]] = call <2 x double> @llvm.ppc.vsx.xvcvsxwdp(<4 x i32> [[TMP114:%.*]])
-  // CHECK-NEXT:   fmul <2 x double> [[TMP115]], <double 0x3EB0000000000000, double 0x3EB0000000000000>
+  // CHECK-NEXT:   fmul <2 x double> [[TMP115]], splat (double 0x3EB0000000000000)
 }
--- a/clang/test/CodeGen/variadic-nvptx.c
+++ b/clang/test/CodeGen/variadic-nvptx.c
@ -39,7 +39,7 @@ extern void varargs_simple(int, ...);
 // CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[A]], i32 0, i32 2
 // CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
 // CHECK-NEXT:    call void (i32, ...) @varargs_simple(i32 noundef 0, i32 [[TMP7]], i8 [[TMP9]], i32 [[TMP11]]) #[[ATTR3]]
-// CHECK-NEXT:    store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr [[V]], align 16
+// CHECK-NEXT:    store <4 x i32> splat (i32 1), ptr [[V]], align 16
 // CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i32>, ptr [[V]], align 16
 // CHECK-NEXT:    call void (i32, ...) @varargs_simple(i32 noundef 0, <4 x i32> noundef [[TMP12]]) #[[ATTR3]]
 // CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON_0]], ptr [[T]], i32 0, i32 0
--- a/clang/test/CodeGen/vecshift.c
+++ b/clang/test/CodeGen/vecshift.c
@ -51,22 +51,22 @@ vector_uint4 vui4;
 void foo(void) {
  vc8 = 1 << vc8;
 // CHECK: [[t0:%.+]] = load <8 x i8>, ptr {{@.+}},
-// CHECK: shl <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[t0]]
+// CHECK: shl <8 x i8> splat (i8 1), [[t0]]
  vuc8 = 1 << vuc8;
 // CHECK: [[t1:%.+]] = load <8 x i8>, ptr {{@.+}},
-// CHECK: shl <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, [[t1]]
+// CHECK: shl <8 x i8> splat (i8 1), [[t1]]
  vi8 = 1 << vi8;
 // CHECK: [[t2:%.+]] = load <8 x i32>, ptr {{@.+}},
-// CHECK: shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[t2]]
+// CHECK: shl <8 x i32> splat (i32 1), [[t2]]
  vui8 = 1 << vui8;
 // CHECK: [[t3:%.+]] = load <8 x i32>, ptr {{@.+}},
-// CHECK: shl <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, [[t3]]
+// CHECK: shl <8 x i32> splat (i32 1), [[t3]]
  vs8 = 1 << vs8;
 // CHECK: [[t4:%.+]] = load <8 x i16>, ptr {{@.+}},
-// CHECK: shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, [[t4]]
+// CHECK: shl <8 x i16> splat (i16 1), [[t4]]
  vus8 = 1 << vus8;
 // CHECK: [[t5:%.+]] = load <8 x i16>, ptr {{@.+}},
-// CHECK: shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, [[t5]]
+// CHECK: shl <8 x i16> splat (i16 1), [[t5]]

  vc8 = c << vc8;
 // CHECK: [[t6:%.+]] = load i8, ptr @c,
@ -104,7 +104,7 @@ void foo(void) {
 // CHECK: shl <8 x i16> [[splat_splat20]], [[t15]]
  vus8 = 1 << vus8;
 // CHECK: [[t16:%.+]] = load <8 x i16>, ptr {{@.+}},
-// CHECK: [[shl22:%.+]] = shl <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, [[t16]]
+// CHECK: [[shl22:%.+]] = shl <8 x i16> splat (i16 1), [[t16]]

 vc8 = vc8 << vc8;
 // CHECK: [[t17:%.+]] = load <8 x i8>, ptr {{@.+}},
--- a/clang/test/CodeGen/vector-scalar.c
+++ b/clang/test/CodeGen/vector-scalar.c
@ -5,28 +5,28 @@
 typedef unsigned char uchar4  __attribute__ ((vector_size (4)));

 // CHECK: @add2
-// CHECK: add <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+// CHECK: add <4 x i8> {{.*}}, splat (i8 2)
 uchar4 add2(uchar4 v)
 {
  return v + 2;
 }

 // CHECK: @sub2
-// CHECK: sub <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+// CHECK: sub <4 x i8> {{.*}}, splat (i8 2)
 uchar4 sub2(uchar4 v)
 {
  return v - 2;
 }

 // CHECK: @mul2
-// CHECK: mul <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+// CHECK: mul <4 x i8> {{.*}}, splat (i8 2)
 uchar4 mul2(uchar4 v)
 {
  return v * 2;
 }

 // CHECK: @div2
-// CHECK: udiv <4 x i8> {{.*}}, <i8 2, i8 2, i8 2, i8 2>
+// CHECK: udiv <4 x i8> {{.*}}, splat (i8 2)
 uchar4 div2(uchar4 v)
 {
  return v / 2;
@ -35,7 +35,7 @@ uchar4 div2(uchar4 v)
 typedef __attribute__(( ext_vector_type(4) )) unsigned char uchar4_ext;

 // CHECK: @div3_ext
-// CHECK: udiv <4 x i8> %{{.*}}, <i8 3, i8 3, i8 3, i8 3>
+// CHECK: udiv <4 x i8> %{{.*}}, splat (i8 3)
 uchar4_ext div3_ext(uchar4_ext v)
 {
  return v / 3;
--- a/clang/test/CodeGenCXX/auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/auto-var-init.cpp
@ -1581,7 +1581,7 @@ TEST_UNINIT(intvec16, int  __attribute__((vector_size(16))));
 // CHECK:       %uninit = alloca <4 x i32>, align
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_intvec16_uninit()
-// PATTERN: store <4 x i32> <i32 [[I32]], i32 [[I32]], i32 [[I32]], i32 [[I32]]>, ptr %uninit, align 16, !annotation [[AUTO_INIT]]
+// PATTERN: store <4 x i32> splat (i32 [[I32]]), ptr %uninit, align 16, !annotation [[AUTO_INIT]]
 // ZERO-LABEL: @test_intvec16_uninit()
 // ZERO: store <4 x i32> zeroinitializer, ptr %uninit, align 16, !annotation [[AUTO_INIT]]

@ -1595,7 +1595,7 @@ TEST_BRACES(intvec16, int  __attribute__((vector_size(16))));
 TEST_CUSTOM(intvec16, int  __attribute__((vector_size(16))), { 0x44444444, 0x44444444, 0x44444444, 0x44444444 });
 // CHECK-LABEL: @test_intvec16_custom()
 // CHECK:       %custom = alloca <4 x i32>, align [[ALIGN:[0-9]*]]
-// CHECK-NEXT:  store <4 x i32> <i32 1145324612, i32 1145324612, i32 1145324612, i32 1145324612>, ptr %custom, align [[ALIGN]]
+// CHECK-NEXT:  store <4 x i32> splat (i32 1145324612), ptr %custom, align [[ALIGN]]
 // CHECK-NOT:   !annotation
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)

@ -1604,7 +1604,7 @@ TEST_UNINIT(longlongvec32, long long  __attribute__((vector_size(32))));
 // CHECK:       %uninit = alloca <4 x i64>, align
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_longlongvec32_uninit()
-// PATTERN: store <4 x i64> <i64 [[I64]], i64 [[I64]], i64 [[I64]], i64 [[I64]]>, ptr %uninit, align 32, !annotation [[AUTO_INIT]]
+// PATTERN: store <4 x i64> splat (i64 [[I64]]), ptr %uninit, align 32, !annotation [[AUTO_INIT]]
 // ZERO-LABEL: @test_longlongvec32_uninit()
 // ZERO: store <4 x i64> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]]

@ -1618,7 +1618,7 @@ TEST_BRACES(longlongvec32, long long  __attribute__((vector_size(32))));
 TEST_CUSTOM(longlongvec32, long long  __attribute__((vector_size(32))), { 0x3333333333333333, 0x3333333333333333, 0x3333333333333333, 0x3333333333333333 });
 // CHECK-LABEL: @test_longlongvec32_custom()
 // CHECK:       %custom = alloca <4 x i64>, align [[ALIGN:[0-9]*]]
-// CHECK-NEXT:  store <4 x i64> <i64 3689348814741910323, i64 3689348814741910323, i64 3689348814741910323, i64 3689348814741910323>, ptr %custom, align [[ALIGN]]
+// CHECK-NEXT:  store <4 x i64> splat (i64 3689348814741910323), ptr %custom, align [[ALIGN]]
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)

 TEST_UNINIT(floatvec16, float  __attribute__((vector_size(16))));
@ -1626,7 +1626,7 @@ TEST_UNINIT(floatvec16, float  __attribute__((vector_size(16))));
 // CHECK:       %uninit = alloca <4 x float>, align
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_floatvec16_uninit()
-// PATTERN: store <4 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000>, ptr %uninit, align 16, !annotation [[AUTO_INIT]]
+// PATTERN: store <4 x float> splat (float 0xFFFFFFFFE0000000), ptr %uninit, align 16, !annotation [[AUTO_INIT]]
 // ZERO-LABEL: @test_floatvec16_uninit()
 // ZERO: store <4 x float> zeroinitializer, ptr %uninit, align 16, !annotation [[AUTO_INIT]]

@ -1640,7 +1640,7 @@ TEST_BRACES(floatvec16, float  __attribute__((vector_size(16))));
 TEST_CUSTOM(floatvec16, float  __attribute__((vector_size(16))), { 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433 });
 // CHECK-LABEL: @test_floatvec16_custom()
 // CHECK:       %custom = alloca <4 x float>, align [[ALIGN:[0-9]*]]
-// CHECK-NEXT:  store <4 x float> <float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000, float 0x400921FB60000000>, ptr %custom, align [[ALIGN]]
+// CHECK-NEXT:  store <4 x float> splat (float 0x400921FB60000000), ptr %custom, align [[ALIGN]]
 // CHECK-NOT:   !annotation
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)

@ -1649,7 +1649,7 @@ TEST_UNINIT(doublevec32, double  __attribute__((vector_size(32))));
 // CHECK:       %uninit = alloca <4 x double>, align
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_doublevec32_uninit()
-// PATTERN: store <4 x double> <double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF>, ptr %uninit, align 32, !annotation [[AUTO_INIT]]
+// PATTERN: store <4 x double> splat (double 0xFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]]
 // ZERO-LABEL: @test_doublevec32_uninit()
 // ZERO: store <4 x double> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]]

@ -1663,7 +1663,7 @@ TEST_BRACES(doublevec32, double  __attribute__((vector_size(32))));
 TEST_CUSTOM(doublevec32, double  __attribute__((vector_size(32))), { 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433, 3.1415926535897932384626433 });
 // CHECK-LABEL: @test_doublevec32_custom()
 // CHECK:       %custom = alloca <4 x double>, align [[ALIGN:[0-9]*]]
-// CHECK-NEXT:  store <4 x double> <double 0x400921FB54442D18, double 0x400921FB54442D18, double 0x400921FB54442D18, double 0x400921FB54442D18>, ptr %custom, align [[ALIGN]]
+// CHECK-NEXT:  store <4 x double> splat (double 0x400921FB54442D18), ptr %custom, align [[ALIGN]]
 // CHECK-NOT:   !annotation
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)

@ -1673,7 +1673,7 @@ TEST_UNINIT(doublevec24, double  __attribute__((vector_size(24))));
 // CHECK:       %uninit = alloca <3 x double>, align
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_doublevec24_uninit()
-// PATTERN: store <3 x double> <double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF, double 0xFFFFFFFFFFFFFFFF>, ptr %uninit, align 32, !annotation [[AUTO_INIT]]
+// PATTERN: store <3 x double> splat (double 0xFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]]
 // ZERO-LABEL: @test_doublevec24_uninit()
 // ZERO: store <3 x double> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]]

@ -1683,7 +1683,7 @@ TEST_UNINIT(longdoublevec32, long double  __attribute__((vector_size(sizeof(long
 // CHECK:       %uninit = alloca <2 x x86_fp80>, align
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%uninit)
 // PATTERN-LABEL: @test_longdoublevec32_uninit()
-// PATTERN: store <2 x x86_fp80> <x86_fp80 0xKFFFFFFFFFFFFFFFFFFFF, x86_fp80 0xKFFFFFFFFFFFFFFFFFFFF>, ptr %uninit, align 32, !annotation [[AUTO_INIT]]
+// PATTERN: store <2 x x86_fp80> splat (x86_fp80 0xKFFFFFFFFFFFFFFFFFFFF), ptr %uninit, align 32, !annotation [[AUTO_INIT]]
 // ZERO-LABEL: @test_longdoublevec32_uninit()
 // ZERO: store <2 x x86_fp80> zeroinitializer, ptr %uninit, align 32, !annotation [[AUTO_INIT]]

--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@ -450,38 +450,38 @@ void ShiftBitIntByConstant(uint16_t4 Ext) {
 // LIN32: define dso_local void @_Z21ShiftBitIntByConstantDv4_DU16_(i64 %
 // WIN: define dso_local void @"?ShiftBitIntByConstant@@YAXT?$__vector@U?$_UBitInt@$0BA@@__clang@@$03@__clang@@@Z"(<4 x i16>
  Ext << 7;
-  // CHECK: shl <4 x i16> %{{.+}}, <i16 7, i16 7, i16 7, i16 7>
+  // CHECK: shl <4 x i16> %{{.+}}, splat (i16 7)
  Ext >> 7;
-  // CHECK: lshr <4 x i16> %{{.+}}, <i16 7, i16 7, i16 7, i16 7>
+  // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 7)
  Ext << -7;
-  // CHECK: shl <4 x i16> %{{.+}}, <i16 -7, i16 -7, i16 -7, i16 -7>
+  // CHECK: shl <4 x i16> %{{.+}}, splat (i16 -7)
  Ext >> -7;
-  // CHECK: lshr <4 x i16> %{{.+}}, <i16 -7, i16 -7, i16 -7, i16 -7>
+  // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 -7)

  // UB in C/C++, Defined in OpenCL.
  Ext << 29;
-  // CHECK: shl <4 x i16> %{{.+}}, <i16 29, i16 29, i16 29, i16 29>
+  // CHECK: shl <4 x i16> %{{.+}}, splat (i16 29)
  Ext >> 29;
-  // CHECK: lshr <4 x i16> %{{.+}}, <i16 29, i16 29, i16 29, i16 29>
+  // CHECK: lshr <4 x i16> %{{.+}}, splat (i16 29)
 }
 void ShiftBitIntByConstant(vint32_t8 Ext) {
 // LIN64: define{{.*}} void @_Z21ShiftBitIntByConstantDv8_DB32_(ptr byval(<8 x i32>) align 32 %
 // LIN32: define dso_local void @_Z21ShiftBitIntByConstantDv8_DB32_(<8 x i32> %
 // WIN: define dso_local void @"?ShiftBitIntByConstant@@YAXT?$__vector@U?$_BitInt@$0CA@@__clang@@$07@__clang@@@Z"(<8 x i32>
  Ext << 7;
-  // CHECK: shl <8 x i32> %{{.+}}, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // CHECK: shl <8 x i32> %{{.+}}, splat (i32 7)
  Ext >> 7;
-  // CHECK: ashr <8 x i32> %{{.+}}, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+  // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 7)
  Ext << -7;
-  // CHECK: shl <8 x i32> %{{.+}}, <i32 -7, i32 -7, i32 -7, i32 -7, i32 -7, i32 -7, i32 -7, i32 -7>
+  // CHECK: shl <8 x i32> %{{.+}}, splat (i32 -7)
  Ext >> -7;
-  // CHECK: ashr <8 x i32> %{{.+}}, <i32 -7, i32 -7, i32 -7, i32 -7, i32 -7, i32 -7, i32 -7, i32 -7>
+  // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 -7)

  // UB in C/C++, Defined in OpenCL.
  Ext << 29;
-  // CHECK: shl <8 x i32> %{{.+}}, <i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29>
+  // CHECK: shl <8 x i32> %{{.+}}, splat (i32 29)
  Ext >> 29;
-  // CHECK: ashr <8 x i32> %{{.+}}, <i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29, i32 29>
+  // CHECK: ashr <8 x i32> %{{.+}}, splat (i32 29)
 }

 void ConstantShiftByBitInt(_BitInt(28) Ext, _BitInt(65) LargeExt) {
--- a/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
+++ b/clang/test/CodeGenCXX/ext-vector-type-conditional.cpp
@ -41,7 +41,7 @@ void TwoVectorOps() {
  // CHECK: [[RHS:%.+]] = load <2 x i32>
  // CHECK: [[NEG:%.+]] = icmp slt <2 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i32>
-  // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], <i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_AND:%.+]] = and <2 x i32> [[RHS]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <2 x i32> [[LHS]], [[SEXT]]
  // CHECK: = or <2 x i32> [[RHS_AND]], [[LHS_AND]]
@ -52,7 +52,7 @@ void TwoVectorOps() {
  // CHECK: [[RHS:%.+]] = load <2 x float>
  // CHECK: [[NEG:%.+]] = icmp slt <2 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i32>
-  // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], <i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <2 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_EXT:%.+]] = bitcast <2 x float> [[RHS]] to <2 x i32>
  // CHECK: [[LHS_EXT:%.+]] = bitcast <2 x float> [[LHS]] to <2 x i32>
  // CHECK: [[RHS_AND:%.+]] = and <2 x i32> [[RHS_EXT]], [[XOR]]
@ -66,7 +66,7 @@ void TwoVectorOps() {
  // CHECK: [[RHS:%.+]] = load <2 x double>
  // CHECK: [[NEG:%.+]] = icmp slt <2 x i64> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <2 x i1> [[NEG]] to <2 x i64>
-  // CHECK: [[XOR:%.+]] = xor <2 x i64> [[SEXT]], <i64 -1, i64 -1>
+  // CHECK: [[XOR:%.+]] = xor <2 x i64> [[SEXT]], splat (i64 -1)
  // CHECK: [[RHS_EXT:%.+]] = bitcast <2 x double> [[RHS]] to <2 x i64>
  // CHECK: [[LHS_EXT:%.+]] = bitcast <2 x double> [[LHS]] to <2 x i64>
  // CHECK: [[RHS_AND:%.+]] = and <2 x i64> [[RHS_EXT]], [[XOR]]
@ -87,7 +87,7 @@ void TwoScalarOps() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16>
-  // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], <i16 -1, i16 -1, i16 -1, i16 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], splat (i16 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i16> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i16> [[LHS_SPLAT]], [[SEXT]]
  // CHECK: = or <4 x i16> [[RHS_AND]], [[LHS_AND]]
@ -102,7 +102,7 @@ void TwoScalarOps() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i16> [[RHS_SPLAT_INSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i16> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i16>
-  // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], <i16 -1, i16 -1, i16 -1, i16 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i16> [[SEXT]], splat (i16 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i16> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i16> [[LHS_SPLAT]], [[SEXT]]
  // CHECK: = or <4 x i16> [[RHS_AND]], [[LHS_AND]]
@ -119,7 +119,7 @@ void TwoScalarOps() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
-  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS_SPLAT]], [[SEXT]]
  // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]]
@ -135,7 +135,7 @@ void TwoScalarOps() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
-  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x float> [[RHS_SPLAT]] to <4 x i32>
  // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x float> [[LHS_SPLAT]] to <4 x i32>
  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_CAST]], [[XOR]]
@ -153,7 +153,7 @@ void TwoScalarOps() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x double> [[RHS_SPLAT_INSERT]], <4 x double> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
-  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1)
  // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x double> [[RHS_SPLAT]] to <4 x i64>
  // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x double> [[LHS_SPLAT]] to <4 x i64>
  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_CAST]], [[XOR]]
@ -171,7 +171,7 @@ void TwoScalarOps() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
-  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS_SPLAT]], [[SEXT]]
  // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]]
@ -187,7 +187,7 @@ void OneScalarOp() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i32> [[RHS_SPLAT_INSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
-  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS]], [[SEXT]]
  // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]]
@ -197,8 +197,8 @@ void OneScalarOp() {
  // CHECK: [[LHS:%.+]] = load <4 x i32>
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
-  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
-  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> <i32 5, i32 5, i32 5, i32 5>, [[XOR]]
+  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1)
+  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> splat (i32 5), [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i32> [[LHS]], [[SEXT]]
  // CHECK: = or <4 x i32> [[RHS_AND]], [[LHS_AND]]

@ -210,7 +210,7 @@ void OneScalarOp() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x float> [[RHS_SPLAT_INSERT]], <4 x float> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i32> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i32>
-  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i32> [[SEXT]], splat (i32 -1)
  // CHECK: [[RHS_CAST:%.+]] = bitcast <4 x float> [[RHS_SPLAT]] to <4 x i32>
  // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x float> [[LHS]] to <4 x i32>
  // CHECK: [[RHS_AND:%.+]] = and <4 x i32> [[RHS_CAST]], [[XOR]]
@ -222,9 +222,9 @@ void OneScalarOp() {
  // CHECK: [[LHS:%.+]] = load <4 x double>
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
-  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1)
  // CHECK: [[LHS_CAST:%.+]] = bitcast <4 x double> [[LHS]] to <4 x i64>
-  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> <i64 4618441417868443648, i64 4618441417868443648, i64 4618441417868443648, i64 4618441417868443648>, [[XOR]]
+  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> splat (i64 4618441417868443648), [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS_CAST]], [[SEXT]]
  // CHECK: = or <4 x i64> [[RHS_AND]], [[LHS_AND]]

@ -233,8 +233,8 @@ void OneScalarOp() {
  // CHECK: [[LHS:%.+]] = load <4 x i64>
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
-  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], <i64 -1, i64 -1, i64 -1, i64 -1>
-  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> <i64 6, i64 6, i64 6, i64 6>, [[XOR]]
+  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1)
+  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> splat (i64 6), [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]]
  // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]]

@ -247,7 +247,7 @@ void OneScalarOp() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
-  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]]
  // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]]
@ -260,7 +260,7 @@ void OneScalarOp() {
  // CHECK: [[RHS_SPLAT:%.+]] = shufflevector <4 x i64> [[RHS_SPLAT_INSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
  // CHECK: [[NEG:%.+]] = icmp slt <4 x i64> [[COND]], zeroinitializer
  // CHECK: [[SEXT:%.+]] = sext <4 x i1> [[NEG]] to <4 x i64>
-  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: [[XOR:%.+]] = xor <4 x i64> [[SEXT]], splat (i64 -1)
  // CHECK: [[RHS_AND:%.+]] = and <4 x i64> [[RHS_SPLAT]], [[XOR]]
  // CHECK: [[LHS_AND:%.+]] = and <4 x i64> [[LHS]], [[SEXT]]
  // CHECK: [[OR:%.+]] = or <4 x i64> [[RHS_AND]], [[LHS_AND]]
--- a/clang/test/CodeGenCXX/matrix-type-builtins.cpp
+++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp
@ -57,7 +57,7 @@ void test_transpose_rvalue() {
  // CHECK-NEXT:  entry:
  // CHECK-NEXT:    [[M_T_ADDR:%.*]] = alloca [9 x float], align 4
  // CHECK-NEXT:    [[CALL_RES:%.*]] = call noundef <9 x float> @_Z10get_matrixv()
-  // CHECK-NEXT:    [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+  // CHECK-NEXT:    [[ADD:%.*]] = fadd <9 x float> [[CALL_RES]], splat (float 2.000000e+00)
  // CHECK-NEXT:    [[M_T:%.*]] = call <9 x float> @llvm.matrix.transpose.v9f32(<9 x float> [[ADD]], i32 3, i32 3)
  // CHECK-NEXT:    store <9 x float> [[M_T]], ptr [[M_T_ADDR]], align 4
  matrix_t<float, 3, 3> m_t = __builtin_matrix_transpose(get_matrix() + 2.0);
--- a/clang/test/CodeGenCXX/matrix-type-operators.cpp
+++ b/clang/test/CodeGenCXX/matrix-type-operators.cpp
@ -407,7 +407,7 @@ void test_constexpr2(matrix_type<int, 5, 5> &m) {
  // NOOPT:         [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4{{$}}
  // OPT:           [[MAT:%.*]] = load <25 x i32>, ptr {{.*}}, align 4, !tbaa !{{[0-9]+}}{{$}}
  // CHECK-NEXT:    [[SUB:%.*]] = sub <25 x i32> [[IM]], [[MAT]]
-  // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  // CHECK-NEXT:    [[SUB2:%.*]] = add <25 x i32> [[SUB]], splat (i32 1)
  // NOOPT-NEXT:    [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8{{$}}
  // OPT-NEXT:      [[MAT_ADDR:%.*]] = load ptr, ptr %m.addr, align 8, !tbaa !{{[0-9]+}}{{$}}
  // CHECK-NEXT:    store <25 x i32> [[SUB2]], ptr [[MAT_ADDR]], align 4
--- a/clang/test/CodeGenCXX/vector-size-conditional.cpp
+++ b/clang/test/CodeGenCXX/vector-size-conditional.cpp
@ -146,7 +146,7 @@ void OneScalarOp() {
  // CHECK: %[[COND:.+]] = load <4 x i32>
  // CHECK: %[[LHS:.+]] = load <4 x i32>
  // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
-  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> splat (i32 5)

  four_ints ?: some_float;
  // CHECK: %[[COND:.+]] = load <4 x i32>
@ -161,7 +161,7 @@ void OneScalarOp() {
  // CHECK: %[[COND:.+]] = load <4 x i32>
  // CHECK: %[[LHS:.+]] = load <4 x i32>
  // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i32> %[[COND]], zeroinitializer
-  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> <i32 5, i32 5, i32 5, i32 5>
+  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i32> %[[LHS]], <4 x i32> splat (i32 5)

  four_ints ? some_float : four_ints;
  // CHECK: %[[COND:.+]] = load <4 x i32>
@ -186,19 +186,19 @@ void OneScalarOp() {
  // CHECK: %[[COND:.+]] = load <4 x i64>
  // CHECK: %[[LHS:.+]] = load <4 x double>
  // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
-  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS]], <4 x double> <double 6.{{.+}}, double 6.{{.+}}, double 6.{{.+}}>
+  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x double> %[[LHS]], <4 x double> splat (double 6.{{.+}})

  four_ll ? four_ll : 6.0;
  // CHECK: %[[COND:.+]] = load <4 x i64>
  // CHECK: %[[LHS:.+]] = load <4 x i64>
  // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
-  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> <i64 6, i64 6, i64 6, i64 6>
+  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> splat (i64 6)

  four_ll ? four_ll : 6;
  // CHECK: %[[COND:.+]] = load <4 x i64>
  // CHECK: %[[LHS:.+]] = load <4 x i64>
  // CHECK: %[[NEZERO:.+]] = icmp ne <4 x i64> %[[COND]], zeroinitializer
-  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> <i64 6, i64 6, i64 6, i64 6>
+  // CHECK: %[[SELECT:.+]] = select <4 x i1> %[[NEZERO]], <4 x i64> %[[LHS]], <4 x i64> splat (i64 6)

  four_ll ? four_ll : some_int;
  // CHECK: %[[COND:.+]] = load <4 x i64>
--- a/clang/test/CodeGenCXX/vector-splat-conversion.cpp
+++ b/clang/test/CodeGenCXX/vector-splat-conversion.cpp
@ -22,28 +22,28 @@ typedef __attribute__((__ext_vector_type__(4))) __int128 bigint4;

 // CHECK-LABEL: define{{.*}} void @_Z14BoolConversionv
 void BoolConversion() {
-  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> splat (i32 -1)
  int4 intsT = (int4)true;
  // CHECK: store <4 x i32> zeroinitializer
  int4 intsF = (int4)false;
-  // CHECK: store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  // CHECK: store <4 x float> splat (float -1.000000e+00)
  float4 floatsT = (float4)true;
  // CHECK: store <4 x float> zeroinitializer
  float4 floatsF = (float4)false;
-  // CHECK: store <4 x i128> <i128 -1, i128 -1, i128 -1, i128 -1>
+  // CHECK: store <4 x i128> splat (i128 -1)
  bigint4 bigintsT = (bigint4)true;
  // CHECK: store <4 x i128> zeroinitializer
  bigint4 bigintsF = (bigint4)false;

-  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> splat (i32 -1)
  constexpr int4 cIntsT = (int4)true;
  // CHECK: store <4 x i32> zeroinitializer
  constexpr int4 cIntsF = (int4)false;
-  // CHECK: store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+  // CHECK: store <4 x float> splat (float -1.000000e+00)
  constexpr float4 cFloatsT = (float4)true;
  // CHECK: store <4 x float> zeroinitializer
  constexpr float4 cFloatsF = (float4)false;
-  // CHECK: store <4 x i128> <i128 -1, i128 -1, i128 -1, i128 -1>
+  // CHECK: store <4 x i128> splat (i128 -1)
  constexpr bigint4 cBigintsT = (bigint4)true;
  // CHECK: store <4 x i128> zeroinitializer
  constexpr bigint4 cBigintsF = (bigint4)false;
--- a/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/standard_conversion_sequences.hlsl
@ -3,7 +3,7 @@
 // CHECK-LABEL: f3_to_d4
 // CHECK: [[f3:%.*]] = alloca <3 x float>
 // CHECK: [[d4:%.*]] = alloca <4 x double>
-// CHECK: store <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr [[f3]]
+// CHECK: store <3 x float> splat (float 1.000000e+00), ptr [[f3]]
 // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]]
 // CHECK: [[vecf4:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
 // CHECK: [[vecd4:%.*]] = fpext <4 x float> [[vecf4]] to <4 x double>
@ -16,7 +16,7 @@ void f3_to_d4() {
 // CHECK-LABEL: f3_to_f2
 // CHECK: [[f3:%.*]] = alloca <3 x float>
 // CHECK: [[f2:%.*]] = alloca <2 x float>
-// CHECK: store <3 x float> <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>, ptr [[f3]]
+// CHECK: store <3 x float> splat (float 2.000000e+00), ptr [[f3]]
 // CHECK: [[vecf3:%.*]] = load <3 x float>, ptr [[f3]]
 // CHECK: [[vecf2:%.*]] = shufflevector <3 x float> [[vecf3]], <3 x float> poison, <2 x i32> <i32 0, i32 1>
 // CHECK: store <2 x float> [[vecf2]], ptr [[f2]]
@ -28,7 +28,7 @@ void f3_to_f2() {
 // CHECK-LABEL: d4_to_f2
 // CHECK: [[d4:%.*]] = alloca <4 x double>
 // CHECK: [[f2:%.*]] = alloca <2 x float>
-// CHECK: store <4 x double> <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>, ptr [[d4]]
+// CHECK: store <4 x double> splat (double 3.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
 // CHECK: [[vecf4:%.*]] = fptrunc <4 x double> [[vecd4]] to <4 x float>
 // CHECK: [[vecf2:%.*]] = shufflevector <4 x float> [[vecf4]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
@ -41,7 +41,7 @@ void d4_to_f2() {
 // CHECK-LABEL: f2_to_i2
 // CHECK: [[f2:%.*]] = alloca <2 x float>
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
-// CHECK: store <2 x float> <float 4.000000e+00, float 4.000000e+00>, ptr [[f2]]
+// CHECK: store <2 x float> splat (float 4.000000e+00), ptr [[f2]]
 // CHECK: [[vecf2:%.*]] = load <2 x float>, ptr [[f2]]
 // CHECK: [[veci2:%.*]] = fptosi <2 x float> [[vecf2]] to <2 x i32>
 // CHECK: store <2 x i32> [[veci2]], ptr [[i2]]
@ -53,7 +53,7 @@ void f2_to_i2() {
 // CHECK-LABEL: d4_to_i2
 // CHECK: [[f4:%.*]] = alloca <4 x double>
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
-// CHECK: store <4 x double> <double 5.000000e+00, double 5.000000e+00, double 5.000000e+00, double 5.000000e+00>, ptr [[d4]]
+// CHECK: store <4 x double> splat (double 5.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
 // CHECK: [[veci4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i32>
 // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
@ -66,7 +66,7 @@ void d4_to_i2() {
 // CHECK-LABEL: d4_to_l4
 // CHECK: [[d4:%.*]] = alloca <4 x double>
 // CHECK: [[l4:%.*]] = alloca <4 x i64>
-// CHECK: store <4 x double> <double 6.000000e+00, double 6.000000e+00, double 6.000000e+00, double 6.000000e+00>, ptr [[d4]]
+// CHECK: store <4 x double> splat (double 6.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
 // CHECK: [[vecl4:%.*]] = fptosi <4 x double> [[vecd4]] to <4 x i64>
 // CHECK: store <4 x i64> [[vecl4]], ptr [[l4]]
@ -79,7 +79,7 @@ void d4_to_l4() {
 // CHECK-LABEL: l4_to_i2
 // CHECK: [[l4:%.*]] = alloca <4 x i64>
 // CHECK: [[i2:%.*]] = alloca <2 x i32>
-// CHECK: store <4 x i64> <i64 7, i64 7, i64 7, i64 7>, ptr [[l4]]
+// CHECK: store <4 x i64> splat (i64 7), ptr [[l4]]
 // CHECK: [[vecl4:%.*]] = load <4 x i64>, ptr [[l4]]
 // CHECK: [[veci4:%.*]] = trunc <4 x i64> [[vecl4]] to <4 x i32>
 // CHECK: [[veci2:%.*]] = shufflevector <4 x i32> [[veci4]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
@ -92,7 +92,7 @@ void l4_to_i2() {
 // CHECK-LABEL: i2_to_b2
 // CHECK: [[l2:%.*]] = alloca <2 x i32>
 // CHECK: [[b2:%.*]] = alloca i8
-// CHECK: store <2 x i32> <i32 8, i32 8>, ptr [[i2]]
+// CHECK: store <2 x i32> splat (i32 8), ptr [[i2]]
 // CHECK: [[veci2:%.*]] = load <2 x i32>, ptr [[i2]]
 // CHECK: [[vecb2:%.*]] = icmp ne <2 x i32> [[veci2]], zeroinitializer
 // CHECK: [[vecb8:%.*]] = shufflevector <2 x i1> [[vecb2]], <2 x i1> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
@ -106,7 +106,7 @@ void i2_to_b2() {
 // CHECK-LABEL: d4_to_b2
 // CHECK: [[d4:%.*]] = alloca <4 x double>
 // CHECK: [[b2:%.*]] = alloca i8
-// CHECK: store <4 x double> <double 9.000000e+00, double 9.000000e+00, double 9.000000e+00, double 9.000000e+00>, ptr [[d4]]
+// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
 // CHECK: [[vecb4:%.*]] = fcmp une <4 x double> [[vecd4]], zeroinitializer
 // CHECK: [[vecd2:%.*]] = shufflevector <4 x i1> [[vecb4]], <4 x i1> poison, <2 x i32> <i32 0, i32 1>
@ -121,7 +121,7 @@ void d4_to_b2() {
 // CHECK-LABEL: d4_to_d1
 // CHECK: [[d4:%.*]] = alloca <4 x double>
 // CHECK: [[d1:%.*]] = alloca <1 x double>
-// CHECK: store <4 x double> <double 9.000000e+00, double 9.000000e+00, double 9.000000e+00, double 9.000000e+00>, ptr [[d4]]
+// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
 // CHECK: [[vecd1:%.*]] = shufflevector <4 x double> [[vecd4]], <4 x double> poison, <1 x i32> zeroinitializer
 // CHECK: store <1 x double> [[vecd1]], ptr [[d1:%.*]], align 8
@ -133,7 +133,7 @@ void d4_to_d1() {
 // CHECK-LABEL: d4_to_dScalar
 // CHECK: [[d4:%.*]] = alloca <4 x double>
 // CHECK: [[d:%.*]] = alloca double
-// CHECK: store <4 x double> <double 9.000000e+00, double 9.000000e+00, double 9.000000e+00, double 9.000000e+00>, ptr [[d4]]
+// CHECK: store <4 x double> splat (double 9.000000e+00), ptr [[d4]]
 // CHECK: [[vecd4:%.*]] = load <4 x double>, ptr [[d4]]
 // CHECK: [[d4x:%.*]] = extractelement <4 x double> [[vecd4]], i32 0
 // CHECK: store double [[d4x]], ptr [[d]]
--- a/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/ScalarSwizzles.hlsl
@ -20,7 +20,7 @@ float4 ToFourFloats(float V){

 // CHECK-LABEL: FillOne
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4
-// CHECK: store <1 x i32> <i32 1>, ptr [[vec1Ptr]], align 4
+// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4
 // CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4
 // CHECK: [[vec2:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <2 x i32> zeroinitializer
 // CHECK: ret <2 x i32> [[vec2]]
@ -30,7 +30,7 @@ int2 FillOne(){

 // CHECK-LABEL: FillOneUnsigned
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i32>, align 4
-// CHECK: store <1 x i32> <i32 1>, ptr [[vec1Ptr]], align 4
+// CHECK: store <1 x i32> splat (i32 1), ptr [[vec1Ptr]], align 4
 // CHECK: [[vec1:%.*]] = load <1 x i32>, ptr [[vec1Ptr]], align 4
 // CHECK: [[vec3:%.*]] = shufflevector <1 x i32> [[vec1]], <1 x i32> poison, <3 x i32> zeroinitializer
 // CHECK: ret <3 x i32> [[vec3]]
@ -40,7 +40,7 @@ uint3 FillOneUnsigned(){

 // CHECK-LABEL: FillOneUnsignedLong
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x i64>, align 8
-// CHECK: store <1 x i64> <i64 1>, ptr [[vec1Ptr]], align 8
+// CHECK: store <1 x i64> splat (i64 1), ptr [[vec1Ptr]], align 8
 // CHECK: [[vec1:%.*]] = load <1 x i64>, ptr [[vec1Ptr]], align 8
 // CHECK: [[vec4:%.*]] = shufflevector <1 x i64> [[vec1]], <1 x i64> poison, <4 x i32> zeroinitializer
 // CHECK: ret <4 x i64> [[vec4]]
@ -50,7 +50,7 @@ vector<uint64_t,4> FillOneUnsignedLong(){

 // CHECK-LABEL: FillTwoPointFive
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8
-// CHECK: store <1 x double> <double 2.500000e+00>, ptr [[vec1Ptr]], align 8
+// CHECK: store <1 x double> splat (double 2.500000e+00), ptr [[vec1Ptr]], align 8
 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8
 // CHECK: [[vec2:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <2 x i32> zeroinitializer
 // CHECK: ret <2 x double> [[vec2]]
@ -60,7 +60,7 @@ double2 FillTwoPointFive(){

 // CHECK-LABEL: FillOneHalf
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x double>, align 8
-// CHECK: store <1 x double> <double 5.000000e-01>, ptr [[vec1Ptr]], align 8
+// CHECK: store <1 x double> splat (double 5.000000e-01), ptr [[vec1Ptr]], align 8
 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[vec1Ptr]], align 8
 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer
 // CHECK: ret <3 x double> [[vec3]]
@ -70,7 +70,7 @@ double3 FillOneHalf(){

 // CHECK-LABEL: FillTwoPointFiveFloat
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4
-// CHECK: store <1 x float> <float 2.500000e+00>, ptr [[vec1Ptr]], align 4
+// CHECK: store <1 x float> splat (float 2.500000e+00), ptr [[vec1Ptr]], align 4
 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4
 // CHECK: [[vec4:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <4 x i32> zeroinitializer
 // CHECK: ret <4 x float> [[vec4]]
@ -83,7 +83,7 @@ float4 FillTwoPointFiveFloat(){

 // CHECK-LABEL: FillOneHalfFloat
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4
-// CHECK: store <1 x float> <float 5.000000e-01>, ptr [[vec1Ptr]], align 4
+// CHECK: store <1 x float> splat (float 5.000000e-01), ptr [[vec1Ptr]], align 4
 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4
 // CHECK: [[el0:%.*]] = extractelement <1 x float> [[vec1]], i32 0
 // CHECK: [[vec1Splat:%.*]] = insertelement <1 x float> poison, float [[el0]], i64 0
@ -116,7 +116,7 @@ float2 HowManyFloats(float V) {

 // CHECK-LABEL: AllRighty
 // CHECK: [[Tmp:%.*]] = alloca <1 x double>, align 8
-// CHECK: store <1 x double> <double 1.000000e+00>, ptr [[Tmp]], align 8
+// CHECK: store <1 x double> splat (double 1.000000e+00), ptr [[Tmp]], align 8
 // CHECK: [[vec1:%.*]] = load <1 x double>, ptr [[Tmp]], align 8
 // CHECK: [[vec3:%.*]] = shufflevector <1 x double> [[vec1]], <1 x double> poison, <3 x i32> zeroinitializer
 // CHECK: [[vec3f:%.*]] = fptrunc <3 x double> [[vec3]] to <3 x float>
@ -128,7 +128,7 @@ float3 AllRighty() {

 // CHECK-LABEL: AllRighty2
 // CHECK: [[vec1Ptr:%.*]] = alloca <1 x float>, align 4
-// CHECK: store <1 x float> <float 1.000000e+00>, ptr [[vec1Ptr]], align 4
+// CHECK: store <1 x float> splat (float 1.000000e+00), ptr [[vec1Ptr]], align 4
 // CHECK: [[vec1:%.*]] = load <1 x float>, ptr [[vec1Ptr]], align 4
 // CHECK: [[vec3:%.*]] = shufflevector <1 x float> [[vec1]], <1 x float> poison, <3 x i32>
 // CHECK: ret <3 x float> [[vec3]]
--- a/clang/test/CodeGenHLSL/builtins/rcp.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/rcp.hlsl
@ -25,31 +25,31 @@ half test_rcp_half(half p0) { return rcp(p0); }

 // DXIL_NATIVE_HALF: define noundef <2 x half> @
 // SPIR_NATIVE_HALF: define spir_func noundef <2 x half> @
-// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> <half  0xH3C00, half  0xH3C00>, %{{.*}} 
+// NATIVE_HALF: %hlsl.rcp = fdiv <2 x half> splat (half  0xH3C00), %{{.*}} 
 // NATIVE_HALF: ret <2 x half> %hlsl.rcp
 // DXIL_NO_HALF: define noundef <2 x float> @
 // SPIR_NO_HALF: define spir_func noundef <2 x float> @
-// NO_HALF: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// NO_HALF: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}}
 // NO_HALF: ret <2 x float> %hlsl.rcp
 half2 test_rcp_half2(half2 p0) { return rcp(p0); }

 // DXIL_NATIVE_HALF: define noundef <3 x half> @
 // SPIR_NATIVE_HALF: define spir_func noundef <3 x half> @
-// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> <half  0xH3C00, half  0xH3C00, half  0xH3C00>, %{{.*}} 
+// NATIVE_HALF: %hlsl.rcp = fdiv <3 x half> splat (half  0xH3C00), %{{.*}} 
 // NATIVE_HALF: ret <3 x half> %hlsl.rcp
 // DXIL_NO_HALF: define noundef <3 x float> @
 // SPIR_NO_HALF: define spir_func noundef <3 x float> @
-// NO_HALF: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// NO_HALF: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}}
 // NO_HALF: ret <3 x float> %hlsl.rcp
 half3 test_rcp_half3(half3 p0) { return rcp(p0); }

 // DXIL_NATIVE_HALF: define noundef <4 x half> @
 // SPIR_NATIVE_HALF: define spir_func noundef <4 x half> @
-// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> <half  0xH3C00, half  0xH3C00, half  0xH3C00, half  0xH3C00>, %{{.*}} 
+// NATIVE_HALF: %hlsl.rcp = fdiv <4 x half> splat (half  0xH3C00), %{{.*}} 
 // NATIVE_HALF: ret <4 x half> %hlsl.rcp
 // DXIL_NO_HALF: define noundef <4 x float> @
 // SPIR_NO_HALF: define spir_func noundef <4 x float> @
-// NO_HALF: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// NO_HALF: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}}
 // NO_HALF: ret <4 x float> %hlsl.rcp
 half4 test_rcp_half4(half4 p0) { return rcp(p0); }

@ -61,19 +61,19 @@ float test_rcp_float(float p0) { return rcp(p0); }

 // DXIL_CHECK: define noundef <2 x float> @
 // SPIR_CHECK: define spir_func noundef <2 x float> @
-// CHECK: %hlsl.rcp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// CHECK: %hlsl.rcp = fdiv <2 x float> splat (float 1.000000e+00), %{{.*}}
 // CHECK: ret <2 x float> %hlsl.rcp
 float2 test_rcp_float2(float2 p0) { return rcp(p0); }

 // DXIL_CHECK: define noundef <3 x float> @
 // SPIR_CHECK: define spir_func noundef <3 x float> @
-// CHECK: %hlsl.rcp = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// CHECK: %hlsl.rcp = fdiv <3 x float> splat (float 1.000000e+00), %{{.*}}
 // CHECK: ret <3 x float> %hlsl.rcp
 float3 test_rcp_float3(float3 p0) { return rcp(p0); }

 // DXIL_CHECK: define noundef <4 x float> @
 // SPIR_CHECK: define spir_func noundef <4 x float> @
-// CHECK: %hlsl.rcp = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %{{.*}}
+// CHECK: %hlsl.rcp = fdiv <4 x float> splat (float 1.000000e+00), %{{.*}}
 // CHECK: ret <4 x float> %hlsl.rcp
 float4 test_rcp_float4(float4 p0) { return rcp(p0); }

@ -85,18 +85,18 @@ double test_rcp_double(double p0) { return rcp(p0); }

 // DXIL_CHECK: define noundef <2 x double> @
 // SPIR_CHECK: define spir_func noundef <2 x double> @
-// CHECK: %hlsl.rcp = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+// CHECK: %hlsl.rcp = fdiv <2 x double> splat (double 1.000000e+00), %{{.*}}
 // CHECK: ret <2 x double> %hlsl.rcp
 double2 test_rcp_double2(double2 p0) { return rcp(p0); }

 // DXIL_CHECK: define noundef <3 x double> @
 // SPIR_CHECK: define spir_func noundef <3 x double> @
-// CHECK: %hlsl.rcp = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+// CHECK: %hlsl.rcp = fdiv <3 x double> splat (double 1.000000e+00), %{{.*}}
 // CHECK: ret <3 x double> %hlsl.rcp
 double3 test_rcp_double3(double3 p0) { return rcp(p0); }

 // DXIL_CHECK: define noundef <4 x double> @
 // SPIR_CHECK: define spir_func noundef <4 x double> @
-// CHECK: %hlsl.rcp = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %{{.*}}
+// CHECK: %hlsl.rcp = fdiv <4 x double> splat (double 1.000000e+00), %{{.*}}
 // CHECK: ret <4 x double> %hlsl.rcp
 double4 test_rcp_double4(double4 p0) { return rcp(p0); }
--- a/clang/test/CodeGenHLSL/builtins/sign.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/sign.hlsl
@ -121,17 +121,17 @@ int test_sign_uint16_t(uint16_t p0) { return sign(p0); }

 // NATIVE_HALF: define [[FNATTRS]] <2 x i32> @
 // NATIVE_HALF: [[CMP:%.*]] = icmp eq <2 x i16> [[ARG:%.*]], zeroinitializer
-// NATIVE_HALF: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>
+// NATIVE_HALF: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1)
 int2 test_sign_uint16_t2(uint16_t2 p0) { return sign(p0); }

 // NATIVE_HALF: define [[FNATTRS]] <3 x i32> @
 // NATIVE_HALF: [[CMP:%.*]] = icmp eq <3 x i16> [[ARG:%.*]], zeroinitializer
-// NATIVE_HALF: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> <i32 1, i32 1, i32 1>
+// NATIVE_HALF: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1)
 int3 test_sign_uint16_t3(uint16_t3 p0) { return sign(p0); }

 // NATIVE_HALF: define [[FNATTRS]] <4 x i32> @
 // NATIVE_HALF: [[CMP:%.*]] = icmp eq <4 x i16> [[ARG:%.*]], zeroinitializer
-// NATIVE_HALF: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// NATIVE_HALF: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1)
 int4 test_sign_uint16_t4(uint16_t4 p0) { return sign(p0); }
 #endif // __HLSL_ENABLE_16_BIT

@ -164,17 +164,17 @@ int test_sign_uint(uint p0) { return sign(p0); }

 // CHECK: define [[FNATTRS]] <2 x i32> @
 // CHECK: [[CMP:%.*]] = icmp eq <2 x i32> [[ARG:%.*]], zeroinitializer
-// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>
+// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1)
 int2 test_sign_uint2(uint2 p0) { return sign(p0); }

 // CHECK: define [[FNATTRS]] <3 x i32> @
 // CHECK: [[CMP:%.*]] = icmp eq <3 x i32> [[ARG:%.*]], zeroinitializer
-// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> <i32 1, i32 1, i32 1>
+// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1)
 int3 test_sign_uint3(uint3 p0) { return sign(p0); }

 // CHECK: define [[FNATTRS]] <4 x i32> @
 // CHECK: [[CMP:%.*]] = icmp eq <4 x i32> [[ARG:%.*]], zeroinitializer
-// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1)
 int4 test_sign_uint4(uint4 p0) { return sign(p0); }


@ -206,15 +206,15 @@ int test_sign_uint64_t(uint64_t p0) { return sign(p0); }

 // CHECK: define [[FNATTRS]] <2 x i32> @
 // CHECK: [[CMP:%.*]] = icmp eq <2 x i64> [[ARG:%.*]], zeroinitializer
-// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> <i32 1, i32 1>
+// CHECK: %hlsl.sign = select <2 x i1> [[CMP]], <2 x i32> zeroinitializer, <2 x i32> splat (i32 1)
 int2 test_sign_uint64_t2(uint64_t2 p0) { return sign(p0); }

 // CHECK: define [[FNATTRS]] <3 x i32> @
 // CHECK: [[CMP:%.*]] = icmp eq <3 x i64> [[ARG:%.*]], zeroinitializer
-// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> <i32 1, i32 1, i32 1>
+// CHECK: %hlsl.sign = select <3 x i1> [[CMP]], <3 x i32> zeroinitializer, <3 x i32> splat (i32 1)
 int3 test_sign_uint64_t3(uint64_t3 p0) { return sign(p0); }

 // CHECK: define [[FNATTRS]] <4 x i32> @
 // CHECK: [[CMP:%.*]] = icmp eq <4 x i64> [[ARG:%.*]], zeroinitializer
-// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+// CHECK: %hlsl.sign = select <4 x i1> [[CMP]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 1)
 int4 test_sign_uint64_t4(uint64_t4 p0) { return sign(p0); }
--- a/clang/test/CodeGenOpenCL/bool_cast.cl
+++ b/clang/test/CodeGenOpenCL/bool_cast.cl
@ -22,12 +22,12 @@ void kernel ker() {

  uchar4 vc;
  vc = (uchar4)true;
-// CHECK: store <4 x i8> <i8 -1, i8 -1, i8 -1, i8 -1>, ptr %vc, align 4
+// CHECK: store <4 x i8> splat (i8 -1), ptr %vc, align 4
  unsigned char c;
  c = (unsigned char)true;
 // CHECK: store i8 1, ptr %c, align 1

  float4 vf;
  vf = (float4)true;
-// CHECK: store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>
+// CHECK: store <4 x float> splat (float -1.000000e+00)
 }
--- a/clang/test/CodeGenOpenCL/logical-ops.cl
+++ b/clang/test/CodeGenOpenCL/logical-ops.cl
@ -10,23 +10,23 @@ typedef double double4 __attribute((ext_vector_type(4)));

 // CHECK: floatops
 kernel void floatops(global int4 *out, global float4 *fout) {
-  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> splat (i32 -1)
  out[0] = (float4)(1, 1, 1, 1) && 1.0f;
  // CHECK: store <4 x i32> zeroinitializer
  out[1] = (float4)(0, 0, 0, 0) && (float4)(0, 0, 0, 0);

-  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> splat (i32 -1)
  out[2] = (float4)(0, 0, 0, 0) || (float4)(1, 1, 1, 1);
  // CHECK: store <4 x i32> zeroinitializer
  out[3] = (float4)(0, 0, 0, 0) || 0.0f;

-  // CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  // CHECK: store <4 x i32> splat (i32 -1)
  out[4] = !(float4)(0, 0, 0, 0);
  // CHECK: store <4 x i32> zeroinitializer
  out[5] = !(float4)(1, 2, 3, 4);
  // CHECK: store <4 x i32> <i32 -1, i32 0, i32 -1, i32 0>
  out[6] = !(float4)(0, 1, 0, 1);
-  // CHECK: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
+  // CHECK: store <4 x float> splat (float 1.000000e+00)
  fout[0] = (float4)(!0.0f);
  // CHECK: store <4 x float> zeroinitializer
  fout[1] = (float4)(!1.0f);
@ -34,23 +34,23 @@ kernel void floatops(global int4 *out, global float4 *fout) {

 // CHECK: doubleops
 kernel void doubleops(global long4 *out, global double4 *dout) {
-  // CHECK: store <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: store <4 x i64> splat (i64 -1)
  out[0] = (double4)(1, 1, 1, 1) && 1.0;
  // CHECK: store <4 x i64> zeroinitializer
  out[1] = (double4)(0, 0, 0, 0) && (double4)(0, 0, 0, 0);

-  // CHECK: store <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: store <4 x i64> splat (i64 -1)
  out[2] = (double4)(0, 0, 0, 0) || (double4)(1, 1, 1, 1);
  // CHECK: store <4 x i64> zeroinitializer
  out[3] = (double4)(0, 0, 0, 0) || 0.0f;

-  // CHECK: store <4 x i64> <i64 -1, i64 -1, i64 -1, i64 -1>
+  // CHECK: store <4 x i64> splat (i64 -1)
  out[4] = !(double4)(0, 0, 0, 0);
  // CHECK: store <4 x i64> zeroinitializer
  out[5] = !(double4)(1, 2, 3, 4);
  // CHECK: store <4 x i64> <i64 -1, i64 0, i64 -1, i64 0>
  out[6] = !(double4)(0, 1, 0, 1);
-  // CHECK: store <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
+  // CHECK: store <4 x double> splat (double 1.000000e+00)
  dout[0] = (double4)(!0.0f);
  // CHECK: store <4 x double> zeroinitializer
  dout[1] = (double4)(!1.0f);
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@ -21,7 +21,7 @@ StrucTy GS = {1, 2};
 // CHECK: @GV1 ={{.*}} addrspace(1) global <4 x i32> <i32 1, i32 2, i32 3, i32 4>, align 16
 int4 GV1 = (int4)((int2)(1,2),3,4);

-// CHECK: @GV2 ={{.*}} addrspace(1) global <4 x i32> <i32 1, i32 1, i32 1, i32 1>, align 16
+// CHECK: @GV2 ={{.*}} addrspace(1) global <4 x i32> splat (i32 1), align 16
 int4 GV2 = (int4)(1);

 // CHECK: @__const.f.S = private unnamed_addr addrspace(2) constant %struct.StrucTy { i32 1, i32 2, i32 0 }, align 4
@ -57,7 +57,7 @@ void f(void) {
  // CHECK: store <4 x i32> %[[v7]], ptr %[[V1]], align 16
  int4 V1 = (int4)((int2)(1,2),3,4);

-  // CHECK: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %[[V2]], align 16
+  // CHECK: store <4 x i32> splat (i32 1), ptr %[[V2]], align 16
  int4 V2 = (int4)(1);
 }

--- a/clang/test/CodeGenOpenCL/shifts.cl
+++ b/clang/test/CodeGenOpenCL/shifts.cl
@ -47,7 +47,7 @@ typedef __attribute__((ext_vector_type(4))) int int4;

 //OPT: @vectorVectorTest
 int4 vectorVectorTest(int4 a,int4 b) {
-  //OPT: [[VM:%.+]] = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  //OPT: [[VM:%.+]] = and <4 x i32> %b, splat (i32 31)
  //OPT-NEXT: [[VC:%.+]] = shl <4 x i32> %a, [[VM]]
  int4 c = a << b;
  //OPT-NEXT: [[VF:%.+]] = add <4 x i32> [[VC]], <i32 2, i32 4, i32 16, i32 8>
@ -62,10 +62,10 @@ int4 vectorVectorTest(int4 a,int4 b) {
 int4 vectorScalarTest(int4 a,int b) {
  //NOOPT: [[SP0:%.+]] = insertelement <4 x i32> poison
  //NOOPT: [[SP1:%.+]] = shufflevector <4 x i32> [[SP0]], <4 x i32> poison, <4 x i32> zeroinitializer
-  //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], <i32 31, i32 31, i32 31, i32 31>
+  //NOOPT: [[VSM:%.+]] = and <4 x i32> [[SP1]], splat (i32 31)
  //NOOPT: [[VSC:%.+]] = shl <4 x i32> [[VSS:%.+]], [[VSM]]
  int4 c = a << b;
-  //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], <i32 2, i32 2, i32 2, i32 2>
+  //NOOPT: [[VSF:%.+]] = shl <4 x i32> [[VSC1:%.+]], splat (i32 2)
  //NOOPT: [[VSA:%.+]] = add <4 x i32> [[VSC2:%.+]], [[VSF]]
  int4 d = {1, 1, 1, 1};
  int4 f = c + (d << 34);
--- a/clang/test/CodeGenOpenCL/vector_literals.cl
+++ b/clang/test/CodeGenOpenCL/vector_literals.cl
@ -48,7 +48,7 @@ void vector_literals_valid() {
  //CHECK: shufflevector <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>, <4 x i32> %{{.+}}, <4 x i32> <i32 0, i32 4, i32 5, i32 6>
  int4 a_1_3 = (int4)(1, (int3)(2, 3, 4));

-  //CHECK: store <4 x i32> <i32 1, i32 1, i32 1, i32 1>, ptr %a
+  //CHECK: store <4 x i32> splat (i32 1), ptr %a
  int4 a = (int4)(1);

  //CHECK: load <4 x i32>, ptr %a
@ -60,7 +60,7 @@ void vector_literals_valid() {
  //CHECK: shufflevector <8 x i32> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
  int8 b = (int8)(1, 2, a.xy, a);

-  //CHECK: store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr %V2
+  //CHECK: store <4 x float> splat (float 1.000000e+00), ptr %V2
  float4 V2 = (float4)(1);
 }

--- a/clang/test/Headers/__clang_hip_math_deprecated.hip
+++ b/clang/test/Headers/__clang_hip_math_deprecated.hip
@ -21,7 +21,7 @@ extern "C" __device__ _Float16 test_rcpf16_wrapper(_Float16 x) {

 // CHECK-LABEL: @test_rcp2f16_wrapper(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[DIV_I:%.*]] = fdiv contract <2 x half> <half 0xH3C00, half 0xH3C00>, [[X:%.*]]
+// CHECK-NEXT:    [[DIV_I:%.*]] = fdiv contract <2 x half> splat (half 0xH3C00), [[X:%.*]]
 // CHECK-NEXT:    ret <2 x half> [[DIV_I]]
 //
 extern "C" __device__ __2f16 test_rcp2f16_wrapper(__2f16 x) {
--- a/clang/test/Headers/wasm.c
+++ b/clang/test/Headers/wasm.c
@ -482,7 +482,7 @@ v128_t test_f64x2_const(void) {

 // CHECK-LABEL: @test_i8x16_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 707406378, i32 707406378, i32 707406378, i32 707406378>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 707406378)
 //
 v128_t test_i8x16_const_splat(void) {
  return wasm_i8x16_const_splat(42);
@ -490,7 +490,7 @@ v128_t test_i8x16_const_splat(void) {

 // CHECK-LABEL: @test_u8x16_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 707406378, i32 707406378, i32 707406378, i32 707406378>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 707406378)
 //
 v128_t test_u8x16_const_splat(void) {
  return wasm_u8x16_const_splat(42);
@ -498,7 +498,7 @@ v128_t test_u8x16_const_splat(void) {

 // CHECK-LABEL: @test_i16x8_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 2752554, i32 2752554, i32 2752554, i32 2752554>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 2752554)
 //
 v128_t test_i16x8_const_splat(void) {
  return wasm_i16x8_const_splat(42);
@ -506,7 +506,7 @@ v128_t test_i16x8_const_splat(void) {

 // CHECK-LABEL: @test_u16x8_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 2752554, i32 2752554, i32 2752554, i32 2752554>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 2752554)
 //
 v128_t test_u16x8_const_splat(void) {
  return wasm_u16x8_const_splat(42);
@ -514,7 +514,7 @@ v128_t test_u16x8_const_splat(void) {

 // CHECK-LABEL: @test_i32x4_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 42)
 //
 v128_t test_i32x4_const_splat(void) {
  return wasm_i32x4_const_splat(42);
@ -522,7 +522,7 @@ v128_t test_i32x4_const_splat(void) {

 // CHECK-LABEL: @test_u32x4_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 42, i32 42, i32 42, i32 42>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 42)
 //
 v128_t test_u32x4_const_splat(void) {
  return wasm_u32x4_const_splat(42);
@ -546,7 +546,7 @@ v128_t test_u64x2_const_splat(void) {

 // CHECK-LABEL: @test_f32x4_const_splat(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    ret <4 x i32> <i32 1109917696, i32 1109917696, i32 1109917696, i32 1109917696>
+// CHECK-NEXT:    ret <4 x i32> splat (i32 1109917696)
 //
 v128_t test_f32x4_const_splat(void) {
  return wasm_f32x4_const_splat(42);
@ -1462,7 +1462,7 @@ v128_t test_f64x2_ge(v128_t a, v128_t b) {

 // CHECK-LABEL: @test_v128_not(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[A:%.*]], splat (i32 -1)
 // CHECK-NEXT:    ret <4 x i32> [[NOT_I]]
 //
 v128_t test_v128_not(v128_t a) {
@ -1498,7 +1498,7 @@ v128_t test_v128_xor(v128_t a, v128_t b) {

 // CHECK-LABEL: @test_v128_andnot(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[NOT_I:%.*]] = xor <4 x i32> [[B:%.*]], splat (i32 -1)
 // CHECK-NEXT:    [[AND_I:%.*]] = and <4 x i32> [[A:%.*]], [[NOT_I]]
 // CHECK-NEXT:    ret <4 x i32> [[AND_I]]
 //
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@ -1689,6 +1689,23 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
  if (isa<ConstantVector>(CV) || isa<ConstantDataVector>(CV)) {
    auto *CVVTy = cast<FixedVectorType>(CV->getType());
    Type *ETy = CVVTy->getElementType();
+
+    // Use the same shorthand for splat vector (i.e. "splat(Ty val)") as is
+    // permitted on IR input to reduce the output changes when enabling
+    // UseConstant{Int,FP}ForFixedLengthSplat.
+    // TODO: Remove this block when the UseConstant{Int,FP}ForFixedLengthSplat
+    // options are removed.
+    if (auto *SplatVal = CV->getSplatValue()) {
+      if (isa<ConstantInt>(SplatVal) || isa<ConstantFP>(SplatVal)) {
+        Out << "splat (";
+        WriterCtx.TypePrinter->print(ETy, Out);
+        Out << ' ';
+        WriteAsOperandInternal(Out, SplatVal, WriterCtx);
+        Out << ')';
+        return;
+      }
+    }
+
    Out << '<';
    WriterCtx.TypePrinter->print(ETy, Out);
    Out << ' ';
--- a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@ -70,15 +70,15 @@ define i32 @fsub(i32 %arg) {
 define i32 @fneg_idiom(i32 %arg) {
 ; CHECK-LABEL: 'fneg_idiom'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F16 = fsub half 0xH8000, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <4 x half> splat (half 0xH8000), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <8 x half> splat (half 0xH8000), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <2 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = fsub double -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %F16 = fsub half -0.0, undef
--- a/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/arith-widening.ll
@ -2091,11 +2091,11 @@ define void @extmulv16(<16 x i8> %i8, <16 x i16> %i16, <16 x i32> %i32, <16 x i6
 define void @extmul_const(<8 x i8> %i8, <8 x i16> %i16, <8 x i32> %i32, <8 x i64> %i64)  {
 ; CHECK-LABEL: 'extmul_const'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sl1_8_16 = sext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %asl_8_16 = mul <8 x i16> %sl1_8_16, splat (i16 10)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16 = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %azl_8_16 = mul <8 x i16> %zl1_8_16, splat (i16 10)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: %zl1_8_16b = zext <8 x i8> %i8 to <8 x i16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %and = and <8 x i16> %sl1_8_16, splat (i16 255)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %aal_8_16 = mul <8 x i16> %zl1_8_16b, %and
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
--- a/llvm/test/Analysis/CostModel/AArch64/div.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div.ll
@ -192,21 +192,21 @@ define i32 @sdiv_uniformconst() {
 ; CHECK-LABEL: 'sdiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I128 = sdiv i128 undef, 7
@ -238,21 +238,21 @@ define i32 @udiv_uniformconst() {
 ; CHECK-LABEL: 'udiv_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 7
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I128 = udiv i128 undef, 7
@ -376,21 +376,21 @@ define i32 @sdiv_uniformconstpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I64 = sdiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 30 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I32 = sdiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I16 = sdiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 51 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %I8 = sdiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I128 = sdiv i128 undef, 16
@ -422,21 +422,21 @@ define i32 @udiv_uniformconstpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, 16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I128 = udiv i128 undef, 16
@ -560,21 +560,21 @@ define i32 @sdiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = sdiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = sdiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I128 = sdiv i128 undef, -16
@ -606,21 +606,21 @@ define i32 @udiv_uniformconstnegpow2() {
 ; CHECK-LABEL: 'udiv_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %I128 = udiv i128 undef, -16
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = udiv i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I128 = udiv i128 undef, -16
--- a/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/div_cte.ll
@ -7,7 +7,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

 define <16 x i8> @sdiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'sdiv8xi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
  %div = sdiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@ -16,7 +16,7 @@ define <16 x i8> @sdiv8xi16(<16 x i8> %x) {

 define <8 x i16> @sdiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'sdiv16xi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
  %div = sdiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@ -25,7 +25,7 @@ define <8 x i16> @sdiv16xi8(<8 x i16> %x) {

 define <4 x i32> @sdiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'sdiv32xi4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = sdiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
  %div = sdiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
@ -34,7 +34,7 @@ define <4 x i32> @sdiv32xi4(<4 x i32> %x) {

 define <16 x i8> @udiv8xi16(<16 x i8> %x) {
 ; CHECK-LABEL: 'udiv8xi16'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <16 x i8> %x, splat (i8 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %div
 ;
  %div = udiv <16 x i8> %x, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9>
@ -43,7 +43,7 @@ define <16 x i8> @udiv8xi16(<16 x i8> %x) {

 define <8 x i16> @udiv16xi8(<8 x i16> %x) {
 ; CHECK-LABEL: 'udiv16xi8'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <8 x i16> %x, splat (i16 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %div
 ;
  %div = udiv <8 x i16> %x, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
@ -52,7 +52,7 @@ define <8 x i16> @udiv16xi8(<8 x i16> %x) {

 define <4 x i32> @udiv32xi4(<4 x i32> %x) {
 ; CHECK-LABEL: 'udiv32xi4'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %div = udiv <4 x i32> %x, splat (i32 9)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %div
 ;
  %div = udiv <4 x i32> %x, <i32 9, i32 9, i32 9, i32 9>
--- a/llvm/test/Analysis/CostModel/AArch64/fshl.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshl.ll
@ -96,7 +96,7 @@ declare i19 @llvm.fshl.i19(i19, i19, i19)

 define <16 x i8> @fshl_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshl_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshl
 ;
 entry:
@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)

 define <8 x i16> @fshl_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshl_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshl
 ;
 entry:
@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)

 define <4 x i32> @fshl_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshl_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshl
 ;
 entry:
@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)

 define <2 x i64> @fshl_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshl_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshl
 ;
 entry:
--- a/llvm/test/Analysis/CostModel/AArch64/fshr.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/fshr.ll
@ -96,7 +96,7 @@ declare i19 @llvm.fshr.i19(i19, i19, i19)

 define <16 x i8> @fshr_v16i8_3rd_arg_vec_const_all_lanes_same(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: 'fshr_v16i8_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <16 x i8> @llvm.fshr.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> splat (i8 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %fshr
 ;
 entry:
@ -128,7 +128,7 @@ declare <16 x i8> @llvm.fshr.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)

 define <8 x i16> @fshr_v8i16_3rd_arg_vec_const_all_lanes_same(<8 x i16> %a, <8 x i16> %b) {
 ; CHECK-LABEL: 'fshr_v8i16_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %fshr = tail call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> splat (i16 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %fshr
 ;
 entry:
@ -160,7 +160,7 @@ declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)

 define <4 x i32> @fshr_v4i32_3rd_arg_vec_const_all_lanes_same(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: 'fshr_v4i32_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> splat (i32 3))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %fshr
 ;
 entry:
@ -192,7 +192,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)

 define <2 x i64> @fshr_v2i64_3rd_arg_vec_const_all_lanes_same(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-LABEL: 'fshr_v2i64_3rd_arg_vec_const_all_lanes_same'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %fshr = tail call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> splat (i64 1))
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %fshr
 ;
 entry:
--- a/llvm/test/Analysis/CostModel/AArch64/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/logicalop.ll
@ -31,14 +31,14 @@ define void @vecop() {
 ; CHECK-THROUGHPUT-LABEL: 'vecop'
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SIZE-LABEL: 'vecop'
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
--- a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@ -190,19 +190,19 @@ declare <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr>, i32 immarg, <4 x i1>,
 define <4 x i8> @gather_load_4xi8_constant_mask(<4 x ptr> %ptrs) {
 ; CHECK:         gather_load_4xi8_constant_mask
 ; CHECK-NEON-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-128-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-256-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
 ; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
 ;
 ; CHECK-SVE-512-LABEL: 'gather_load_4xi8_constant_mask'
-; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i8> undef)
 ; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i8> %lv
 ;
  %lv = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
@ -235,19 +235,19 @@ declare void @llvm.masked.scatter.v4i8.v4p0(<4 x i8>, <4 x ptr>, i32 immarg, <4
 define void @scatter_store_4xi8_constant_mask(<4 x i8> %val, <4 x ptr> %ptrs) {
 ; CHECK:         scatter_store_4xi8_constant_mask
 ; CHECK-NEON-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE-512-LABEL: 'scatter_store_4xi8_constant_mask'
-; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
@ -280,19 +280,19 @@ declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32 immarg, <4 x i1>
 define <4 x i32> @gather_load_4xi32_constant_mask(<4 x ptr> %ptrs) {
 ; CHECK:         gather_load_4xi32_constant_mask
 ; CHECK-NEON-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-128-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
 ; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-256-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
 ; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
 ;
 ; CHECK-SVE-512-LABEL: 'gather_load_4xi32_constant_mask'
-; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true), <4 x i32> undef)
 ; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %lv
 ;
  %lv = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
@ -325,19 +325,19 @@ declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32 immarg, <
 define void @scatter_store_4xi32_constant_mask(<4 x i32> %val, <4 x ptr> %ptrs) {
 ; CHECK:         scatter_store_4xi32_constant_mask
 ; CHECK-NEON-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE-128-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-128-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE-256-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-256-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SVE-512-LABEL: 'scatter_store_4xi32_constant_mask'
-; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> splat (i1 true))
 ; CHECK-SVE-512-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %val, <4 x ptr> %ptrs, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
--- a/llvm/test/Analysis/CostModel/AArch64/rem.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll
@ -178,21 +178,21 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; CHECK-LABEL: 'srem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I64 = srem i64 undef, 7
@ -221,21 +221,21 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; CHECK-LABEL: 'urem_uniformconst'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 7
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I64 = urem i64 undef, 7
@ -350,21 +350,21 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; CHECK-LABEL: 'srem_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I64 = srem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I32 = srem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I16 = srem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %I8 = srem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 176 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 352 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 704 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I64 = srem i64 undef, 16
@ -393,21 +393,21 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; CHECK-LABEL: 'urem_uniformconstpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, 16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I64 = urem i64 undef, 16
@ -522,21 +522,21 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'srem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = srem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = srem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = srem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I64 = srem i64 undef, -16
@ -565,21 +565,21 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; CHECK-LABEL: 'urem_uniformconstnegpow2'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %I64 = urem i64 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 26 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 52 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 104 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I32 = urem i32 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I16 = urem i16 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %I8 = urem i8 undef, -16
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 224 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 448 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
  %I64 = urem i64 undef, -16
--- a/llvm/test/Analysis/CostModel/AMDGPU/div.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/div.ll
@ -337,59 +337,59 @@ define i32 @udiv_const() {
 define i32 @sdiv_uniformconst() {
 ; FAST-LABEL: 'sdiv_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'sdiv_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'sdiv_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = sdiv i64 undef, 7
@ -418,59 +418,59 @@ define i32 @sdiv_uniformconst() {
 define i32 @udiv_uniformconst() {
 ; FAST-LABEL: 'udiv_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'udiv_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'udiv_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = udiv i64 undef, 7
@ -661,59 +661,59 @@ define i32 @udiv_constpow2() {
 define i32 @sdiv_uniformconstpow2() {
 ; FAST-LABEL: 'sdiv_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'sdiv_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'sdiv_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = sdiv i64 undef, 16
@ -742,59 +742,59 @@ define i32 @sdiv_uniformconstpow2() {
 define i32 @udiv_uniformconstpow2() {
 ; FAST-LABEL: 'udiv_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'udiv_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'udiv_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = udiv i64 undef, 16
@ -985,59 +985,59 @@ define i32 @udiv_constnegpow2() {
 define i32 @sdiv_uniformconstnegpow2() {
 ; FAST-LABEL: 'sdiv_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'sdiv_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'sdiv_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = sdiv i64 undef, -16
@ -1066,59 +1066,59 @@ define i32 @sdiv_uniformconstnegpow2() {
 define i32 @udiv_uniformconstnegpow2() {
 ; FAST-LABEL: 'udiv_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'udiv_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'udiv_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = udiv i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = udiv <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = udiv <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = udiv i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = udiv <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = udiv <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = udiv <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = udiv i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = udiv <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = udiv <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = udiv <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = udiv i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = udiv <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = udiv <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = udiv <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = udiv i64 undef, -16
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@ -314,146 +314,146 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 define amdgpu_kernel void @rcp_ieee() #0 {
 ; CIFASTF64-LABEL: 'rcp_ieee'
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ieee'
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ieee'
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ieee'
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 112 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ieee'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 14 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 28 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 42 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 56 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 70 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ieee'
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ieee'
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ieee'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 60 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
  %f16 = fdiv half 1.0, undef
@ -477,146 +477,146 @@ define amdgpu_kernel void @rcp_ieee() #0 {
 define amdgpu_kernel void @rcp_ftzdaz() #1 {
 ; CIFASTF64-LABEL: 'rcp_ftzdaz'
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CISLOWF64-LABEL: 'rcp_ftzdaz'
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SIFASTF64-LABEL: 'rcp_ftzdaz'
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 27 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 54 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 81 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 108 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 648 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SIFASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SISLOWF64-LABEL: 'rcp_ftzdaz'
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 41 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 82 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 123 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 164 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 984 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SISLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; FP16-LABEL: 'rcp_ftzdaz'
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 38 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 76 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 114 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 152 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 912 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CI-SIZE-LABEL: 'rcp_ftzdaz'
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; CI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SI-SIZE-LABEL: 'rcp_ftzdaz'
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 25 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 50 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 75 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 100 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 600 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; SI-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; FP16-SIZE-LABEL: 'rcp_ftzdaz'
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f16 = fdiv half 0xH3C00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> <half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> <half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00, half 0xH3C00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f16 = fdiv <2 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v3f16 = fdiv <3 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = fdiv <4 x half> splat (half 0xH3C00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5f16 = fdiv <5 x half> splat (half 0xH3C00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = fdiv float 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = fdiv <2 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = fdiv <3 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = fdiv <4 x float> splat (float 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = fdiv <5 x float> splat (float 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %f64 = fdiv double 1.000000e+00, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> <double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %v2f64 = fdiv <2 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 66 for instruction: %v3f64 = fdiv <3 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 88 for instruction: %v4f64 = fdiv <4 x double> splat (double 1.000000e+00), undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 528 for instruction: %v5f64 = fdiv <5 x double> splat (double 1.000000e+00), undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
  %f16 = fdiv half 1.0, undef
--- a/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fneg.ll
@ -139,24 +139,24 @@ define void @fneg_f64() {
 define i32 @fneg_idiom(i32 %arg) {
 ; CHECK-LABEL: 'fneg_idiom'
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %F64 = fsub double -0.000000e+00, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SIZE-LABEL: 'fneg_idiom'
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = fsub float -0.000000e+00, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = fsub <4 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = fsub <8 x float> splat (float -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = fsub <16 x float> splat (float -0.000000e+00), undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = fsub double -0.000000e+00, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
-; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = fsub <2 x double> splat (double -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = fsub <4 x double> splat (double -0.000000e+00), undef
+; SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8F64 = fsub <8 x double> splat (double -0.000000e+00), undef
 ; SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %F32 = fsub float -0.0, undef
--- a/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/logicalop.ll
@ -32,22 +32,22 @@ define void @vecop() {
 ; CHECK-THROUGHPUT-LABEL: 'vecop'
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> <i1 true, i1 true>, <2 x i1> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> splat (i1 true), <2 x i1> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; CHECK-SIZE-LABEL: 'vecop'
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sand = select <2 x i1> undef, <2 x i1> undef, <2 x i1> zeroinitializer
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2band = and <2 x i1> undef, undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> <i1 true, i1 true>, <2 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2sor = select <2 x i1> undef, <2 x i1> splat (i1 true), <2 x i1> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2bor = or <2 x i1> undef, undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4band = and <4 x i1> undef, undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4bor = or <4 x i1> undef, undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@ -215,78 +215,78 @@ define i32 @mul_constpow2() {
 define i32 @mul_uniformconstpow2() {
 ; SLOW16-LABEL: 'mul_uniformconstpow2'
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; FAST16-LABEL: 'mul_uniformconstpow2'
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW16-SIZE-LABEL: 'mul_uniformconstpow2'
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; FAST16-SIZE-LABEL: 'mul_uniformconstpow2'
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 16, i64 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = mul i64 undef, 16
@ -415,78 +415,78 @@ define i32 @mul_constnegpow2() {
 define i32 @mul_uniformconstnegpow2() {
 ; SLOW16-LABEL: 'mul_uniformconstnegpow2'
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 136 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLOW16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLOW16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; FAST16-LABEL: 'mul_uniformconstnegpow2'
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %I64 = mul i64 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = mul i32 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 264 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW16-SIZE-LABEL: 'mul_uniformconstnegpow2'
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 68 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; SLOW16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; FAST16-SIZE-LABEL: 'mul_uniformconstnegpow2'
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %I64 = mul i64 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, <i64 -16, i64 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V2i64 = mul <2 x i64> undef, splat (i64 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V4i64 = mul <4 x i64> undef, splat (i64 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 288 for instruction: %V8i64 = mul <8 x i64> undef, splat (i64 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I32 = mul i32 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4i32 = mul <4 x i32> undef, splat (i32 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = mul <8 x i32> undef, splat (i32 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = mul <16 x i32> undef, splat (i32 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, splat (i16 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, splat (i16 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, splat (i16 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, splat (i8 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, splat (i8 -16)
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 132 for instruction: %V64i8 = mul <64 x i8> undef, splat (i8 -16)
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = mul i64 undef, -16
--- a/llvm/test/Analysis/CostModel/AMDGPU/rem.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/rem.ll
@ -337,59 +337,59 @@ define i32 @urem_const() {
 define i32 @srem_uniformconst() {
 ; FAST-LABEL: 'srem_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'srem_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'srem_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = srem i64 undef, 7
@ -418,59 +418,59 @@ define i32 @srem_uniformconst() {
 define i32 @urem_uniformconst() {
 ; FAST-LABEL: 'urem_uniformconst'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'urem_uniformconst'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 7
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'urem_uniformconst'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 7, i64 7, i64 7, i64 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 7
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 7)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 7)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = urem i64 undef, 7
@ -661,59 +661,59 @@ define i32 @urem_constpow2() {
 define i32 @srem_uniformconstpow2() {
 ; FAST-LABEL: 'srem_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'srem_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'srem_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = srem i64 undef, 16
@ -742,59 +742,59 @@ define i32 @srem_uniformconstpow2() {
 define i32 @urem_uniformconstpow2() {
 ; FAST-LABEL: 'urem_uniformconstpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'urem_uniformconstpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, 16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'urem_uniformconstpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 16, i64 16, i64 16, i64 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16, i64 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, 16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = urem i64 undef, 16
@ -985,59 +985,59 @@ define i32 @urem_constnegpow2() {
 define i32 @srem_uniformconstnegpow2() {
 ; FAST-LABEL: 'srem_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'srem_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = srem i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'srem_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = srem i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = srem i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = srem i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = srem i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = srem i64 undef, -16
@ -1066,59 +1066,59 @@ define i32 @srem_uniformconstnegpow2() {
 define i32 @urem_uniformconstnegpow2() {
 ; FAST-LABEL: 'urem_uniformconstnegpow2'
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 31 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 63 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, -16
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'urem_uniformconstnegpow2'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 23 for instruction: %I64 = urem i64 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 46 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 92 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 184 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I32 = urem i32 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I16 = urem i16 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 266 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %I8 = urem i8 undef, -16
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 256 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; SLOW-NEXT:  Cost Model: Found an estimated cost of 522 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; ALL-SIZE-LABEL: 'urem_uniformconstnegpow2'
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I64 = urem i64 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, <i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, <i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16, i64 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = urem <2 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = urem <4 x i64> undef, splat (i64 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i64 = urem <8 x i64> undef, splat (i64 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I32 = urem i32 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, <i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16, i32 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = urem <4 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = urem <8 x i32> undef, splat (i32 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = urem <16 x i32> undef, splat (i32 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = urem i16 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = urem <8 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = urem <16 x i16> undef, splat (i16 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i16 = urem <32 x i16> undef, splat (i16 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = urem i8 undef, -16
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
-; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V16i8 = urem <16 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V32i8 = urem <32 x i8> undef, splat (i8 -16)
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V64i8 = urem <64 x i8> undef, splat (i8 -16)
 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
  %I64 = urem i64 undef, -16
--- a/llvm/test/Analysis/CostModel/ARM/divrem.ll
+++ b/llvm/test/Analysis/CostModel/ARM/divrem.ll
@ -1031,98 +1031,98 @@ define void @vf64() {

 define void @vi8_2() {
 ; CHECK-NEON-LABEL: 'vi8_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi8_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi8_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi8_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi8_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, <i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, <i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e1 = sdiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %e2 = udiv <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i8> undef, splat (i8 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i8> undef, splat (i8 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %t1 = sdiv <2 x i8> undef, <i8 2, i8 2>
@ -1146,98 +1146,98 @@ define void @vi8_2() {

 define void @vi16_2() {
 ; CHECK-NEON-LABEL: 'vi16_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi16_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi16_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi16_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi16_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, <i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, <i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f1 = sdiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %f2 = udiv <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i16> undef, splat (i16 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i16> undef, splat (i16 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %t1 = sdiv <2 x i16> undef, <i16 2, i16 2>
@ -1261,98 +1261,98 @@ define void @vi16_2() {

 define void @vi32_2() {
 ; CHECK-NEON-LABEL: 'vi32_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi32_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi32_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi32_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi32_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, <i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, <i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i32> undef, splat (i32 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i32> undef, splat (i32 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %t1 = sdiv <2 x i32> undef, <i32 2, i32 2>
@ -1376,98 +1376,98 @@ define void @vi32_2() {

 define void @vi64_2() {
 ; CHECK-NEON-LABEL: 'vi64_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vi64_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 36 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 72 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 144 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vi64_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vi64_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vi64_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, <i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, <i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t1 = sdiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t2 = udiv <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t3 = srem <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %t4 = urem <2 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f1 = sdiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f2 = udiv <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f3 = srem <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %f4 = urem <4 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e1 = sdiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e2 = udiv <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e3 = srem <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 160 for instruction: %e4 = urem <8 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s1 = sdiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s2 = udiv <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s3 = srem <16 x i64> undef, splat (i64 2)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %s4 = urem <16 x i64> undef, splat (i64 2)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %t1 = sdiv <2 x i64> undef, <i64 2, i64 2>
@ -1491,48 +1491,48 @@ define void @vi64_2() {

 define void @vf16_2() {
 ; CHECK-NEON-LABEL: 'vf16_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf16_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf16_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf16_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf16_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, <half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, <half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000, half 0xH4000>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x half> undef, splat (half 0xH4000)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x half> undef, splat (half 0xH4000)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %1 = fdiv <2 x half> undef, <half 2., half 2.>
@ -1546,48 +1546,48 @@ define void @vf16_2() {

 define void @vf32_2() {
 ; CHECK-NEON-LABEL: 'vf32_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf32_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf32_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf32_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf32_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, <float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = fdiv <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = frem <2 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %3 = fdiv <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %4 = frem <4 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %5 = fdiv <8 x float> undef, splat (float 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %6 = frem <8 x float> undef, splat (float 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %1 = fdiv <2 x float> undef, <float 2., float 2.>
@ -1601,48 +1601,48 @@ define void @vf32_2() {

 define void @vf64_2() {
 ; CHECK-NEON-LABEL: 'vf64_2'
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-NEON-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-MVE-LABEL: 'vf64_2'
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-MVE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-V8M-MAIN-LABEL: 'vf64_2'
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8M-MAIN-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8M-BASE-LABEL: 'vf64_2'
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8M-BASE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-V8R-LABEL: 'vf64_2'
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, <double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
-; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, <double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00, double 2.000000e+00>
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = fdiv <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %2 = frem <2 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %3 = fdiv <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %4 = frem <4 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %5 = fdiv <8 x double> undef, splat (double 2.000000e+00)
+; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %6 = frem <8 x double> undef, splat (double 2.000000e+00)
 ; CHECK-V8R-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
  %1 = fdiv <2 x double> undef, <double 2., double 2.>
--- a/llvm/test/Analysis/CostModel/ARM/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/ARM/logicalop.ll
@ -80,56 +80,56 @@ define void @vecop() {
 ; CHECK-MVE-RECIP-LABEL: 'vecop'
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-MVE-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-NEON-RECIP-LABEL: 'vecop'
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-NEON-RECIP-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-THUMB1-RECIP-LABEL: 'vecop'
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-THUMB1-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB2-RECIP-LABEL: 'vecop'
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-THUMB2-RECIP-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-MVE-SIZE-LABEL: 'vecop'
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-MVE-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-NEON-SIZE-LABEL: 'vecop'
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-NEON-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB1-SIZE-LABEL: 'vecop'
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-THUMB1-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; CHECK-THUMB2-SIZE-LABEL: 'vecop'
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-THUMB2-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
--- a/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/logicalop.ll
@ -33,7 +33,7 @@ define void @vecop() {
 ; CHECK-SIZE-LABEL: 'vecop'
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
--- a/llvm/test/Analysis/CostModel/RISCV/arith-int.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/arith-int.ll
@ -1173,9 +1173,9 @@ define void @add_of_constant() {
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> <i64 1, i64 1>, undef
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> <i32 1, i32 1, i32 2, i32 1>, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> <i32 2, i32 1, i32 1, i32 1>, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, undef
@ -1191,9 +1191,9 @@ define void @add_of_constant() {
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = add <4 x i32> undef, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = add <4 x i32> zeroinitializer, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = add <2 x i64> zeroinitializer, undef
-; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, undef
-; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> <i64 1, i64 1>, undef
-; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> <i32 4096, i32 4096, i32 4096, i32 4096>, undef
+; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %5 = add <4 x i32> splat (i32 1), undef
+; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %6 = add <2 x i64> splat (i64 1), undef
+; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %7 = add <4 x i32> splat (i32 4096), undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %8 = add <4 x i32> <i32 1, i32 1, i32 2, i32 1>, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %9 = add <4 x i32> <i32 2, i32 1, i32 1, i32 1>, undef
 ; SIFIVE-X280-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %10 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, undef
--- a/llvm/test/Analysis/CostModel/RISCV/logicalop.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/logicalop.ll
@ -38,14 +38,14 @@ define void @vecop() {
 ; CHECK-THROUGHPUT-LABEL: 'vecop'
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-THROUGHPUT-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-SIZE-LABEL: 'vecop'
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sand = select <4 x i1> undef, <4 x i1> undef, <4 x i1> zeroinitializer
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %band = and <4 x i1> undef, undef
-; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i1> undef
+; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %sor = select <4 x i1> undef, <4 x i1> splat (i1 true), <4 x i1> undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %bor = or <4 x i1> undef, undef
 ; CHECK-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
--- a/Show More
+++ b/Show More