[PowerPC] extend smaller splats into bigger splats (with fix) (#142194)
For pwr9, xxspltib is a byte splat with a range -128 to 127 - it can be used with a following vector extend sign to make splats of i16, i32, or i64 element size. For pwr8, vspltisw with a following vector extend sign can be used to make splats of i64 elements in the range -16 to 15. Add check for P8 to make sure the 64-bit vector ops are there.
This commit is contained in:
parent
01671ff849
commit
5d6218d311
@ -9672,7 +9672,24 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
}
|
||||
}
|
||||
|
||||
if (!BVNIsConstantSplat || SplatBitSize > 32) {
|
||||
bool IsSplat64 = false;
|
||||
uint64_t SplatBits = 0;
|
||||
int32_t SextVal = 0;
|
||||
if (BVNIsConstantSplat && SplatBitSize <= 64) {
|
||||
SplatBits = APSplatBits.getZExtValue();
|
||||
if (SplatBitSize <= 32) {
|
||||
SextVal = SignExtend32(SplatBits, SplatBitSize);
|
||||
} else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
|
||||
int64_t Splat64Val = static_cast<int64_t>(SplatBits);
|
||||
bool P9Vector = Subtarget.hasP9Vector();
|
||||
int32_t Hi = P9Vector ? 127 : 15;
|
||||
int32_t Lo = P9Vector ? -128 : -16;
|
||||
IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
|
||||
SextVal = static_cast<int32_t>(SplatBits);
|
||||
}
|
||||
}
|
||||
|
||||
if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
|
||||
unsigned NewOpcode = PPCISD::LD_SPLAT;
|
||||
|
||||
// Handle load-and-splat patterns as we have instructions that will do this
|
||||
@ -9758,7 +9775,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
uint64_t SplatBits = APSplatBits.getZExtValue();
|
||||
uint64_t SplatUndef = APSplatUndef.getZExtValue();
|
||||
unsigned SplatSize = SplatBitSize / 8;
|
||||
|
||||
@ -9793,13 +9809,43 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
||||
dl);
|
||||
|
||||
// If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
|
||||
int32_t SextVal = SignExtend32(SplatBits, SplatBitSize);
|
||||
if (SextVal >= -16 && SextVal <= 15)
|
||||
return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
|
||||
dl);
|
||||
// Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
|
||||
if (SextVal >= -16 && SextVal <= 15) {
|
||||
// SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
|
||||
// generate a splat word with extend for size 8.
|
||||
unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
|
||||
SDValue Res =
|
||||
getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
|
||||
if (SplatSize != 8)
|
||||
return Res;
|
||||
return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
|
||||
}
|
||||
|
||||
// Two instruction sequences.
|
||||
|
||||
if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
|
||||
SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
|
||||
SmallVector<SDValue, 16> Ops(16, C);
|
||||
SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
|
||||
unsigned IID;
|
||||
switch (SplatSize) {
|
||||
default:
|
||||
llvm_unreachable("Unexpected type for vector constant.");
|
||||
case 2:
|
||||
IID = Intrinsic::ppc_altivec_vupklsb;
|
||||
break;
|
||||
case 4:
|
||||
IID = Intrinsic::ppc_altivec_vextsb2w;
|
||||
break;
|
||||
case 8:
|
||||
IID = Intrinsic::ppc_altivec_vextsb2d;
|
||||
break;
|
||||
}
|
||||
SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl);
|
||||
return DAG.getBitcast(Op->getValueType(0), Extend);
|
||||
}
|
||||
assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
|
||||
|
||||
// If this value is in the range [-32,30] and is even, use:
|
||||
// VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
|
||||
// If this value is in the range [17,31] and is odd, use:
|
||||
|
@ -3713,30 +3713,26 @@ entry:
|
||||
define <2 x i64> @spltConst1ll() {
|
||||
; P9BE-LABEL: spltConst1ll:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: addis r3, r2, .LCPI65_0@toc@ha
|
||||
; P9BE-NEXT: addi r3, r3, .LCPI65_0@toc@l
|
||||
; P9BE-NEXT: lxv v2, 0(r3)
|
||||
; P9BE-NEXT: vspltisw v2, 1
|
||||
; P9BE-NEXT: vupklsw v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: spltConst1ll:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: addis r3, r2, .LCPI65_0@toc@ha
|
||||
; P9LE-NEXT: addi r3, r3, .LCPI65_0@toc@l
|
||||
; P9LE-NEXT: lxv v2, 0(r3)
|
||||
; P9LE-NEXT: vspltisw v2, 1
|
||||
; P9LE-NEXT: vupklsw v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: spltConst1ll:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI65_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI65_0@toc@l
|
||||
; P8BE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8BE-NEXT: vspltisw v2, 1
|
||||
; P8BE-NEXT: vupklsw v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: spltConst1ll:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: addis r3, r2, .LCPI65_0@toc@ha
|
||||
; P8LE-NEXT: addi r3, r3, .LCPI65_0@toc@l
|
||||
; P8LE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8LE-NEXT: vspltisw v2, 1
|
||||
; P8LE-NEXT: vupklsw v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> <i64 1, i64 1>
|
||||
@ -4173,30 +4169,26 @@ entry:
|
||||
define <2 x i64> @spltCnstConvftoll() {
|
||||
; P9BE-LABEL: spltCnstConvftoll:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: addis r3, r2, .LCPI78_0@toc@ha
|
||||
; P9BE-NEXT: addi r3, r3, .LCPI78_0@toc@l
|
||||
; P9BE-NEXT: lxv v2, 0(r3)
|
||||
; P9BE-NEXT: vspltisw v2, 4
|
||||
; P9BE-NEXT: vupklsw v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: spltCnstConvftoll:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: addis r3, r2, .LCPI78_0@toc@ha
|
||||
; P9LE-NEXT: addi r3, r3, .LCPI78_0@toc@l
|
||||
; P9LE-NEXT: lxv v2, 0(r3)
|
||||
; P9LE-NEXT: vspltisw v2, 4
|
||||
; P9LE-NEXT: vupklsw v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: spltCnstConvftoll:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI78_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI78_0@toc@l
|
||||
; P8BE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8BE-NEXT: vspltisw v2, 4
|
||||
; P8BE-NEXT: vupklsw v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: spltCnstConvftoll:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: addis r3, r2, .LCPI78_0@toc@ha
|
||||
; P8LE-NEXT: addi r3, r3, .LCPI78_0@toc@l
|
||||
; P8LE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8LE-NEXT: vspltisw v2, 4
|
||||
; P8LE-NEXT: vupklsw v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> <i64 4, i64 4>
|
||||
@ -4526,30 +4518,26 @@ entry:
|
||||
define <2 x i64> @spltCnstConvdtoll() {
|
||||
; P9BE-LABEL: spltCnstConvdtoll:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: addis r3, r2, .LCPI87_0@toc@ha
|
||||
; P9BE-NEXT: addi r3, r3, .LCPI87_0@toc@l
|
||||
; P9BE-NEXT: lxv v2, 0(r3)
|
||||
; P9BE-NEXT: vspltisw v2, 4
|
||||
; P9BE-NEXT: vupklsw v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: spltCnstConvdtoll:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: addis r3, r2, .LCPI87_0@toc@ha
|
||||
; P9LE-NEXT: addi r3, r3, .LCPI87_0@toc@l
|
||||
; P9LE-NEXT: lxv v2, 0(r3)
|
||||
; P9LE-NEXT: vspltisw v2, 4
|
||||
; P9LE-NEXT: vupklsw v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: spltCnstConvdtoll:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI87_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI87_0@toc@l
|
||||
; P8BE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8BE-NEXT: vspltisw v2, 4
|
||||
; P8BE-NEXT: vupklsw v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: spltCnstConvdtoll:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: addis r3, r2, .LCPI87_0@toc@ha
|
||||
; P8LE-NEXT: addi r3, r3, .LCPI87_0@toc@l
|
||||
; P8LE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8LE-NEXT: vspltisw v2, 4
|
||||
; P8LE-NEXT: vupklsw v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> <i64 4, i64 4>
|
||||
@ -4879,30 +4867,26 @@ entry:
|
||||
define <2 x i64> @spltConst1ull() {
|
||||
; P9BE-LABEL: spltConst1ull:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: addis r3, r2, .LCPI97_0@toc@ha
|
||||
; P9BE-NEXT: addi r3, r3, .LCPI97_0@toc@l
|
||||
; P9BE-NEXT: lxv v2, 0(r3)
|
||||
; P9BE-NEXT: vspltisw v2, 1
|
||||
; P9BE-NEXT: vupklsw v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: spltConst1ull:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: addis r3, r2, .LCPI97_0@toc@ha
|
||||
; P9LE-NEXT: addi r3, r3, .LCPI97_0@toc@l
|
||||
; P9LE-NEXT: lxv v2, 0(r3)
|
||||
; P9LE-NEXT: vspltisw v2, 1
|
||||
; P9LE-NEXT: vupklsw v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: spltConst1ull:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI97_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI97_0@toc@l
|
||||
; P8BE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8BE-NEXT: vspltisw v2, 1
|
||||
; P8BE-NEXT: vupklsw v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: spltConst1ull:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: addis r3, r2, .LCPI97_0@toc@ha
|
||||
; P8LE-NEXT: addi r3, r3, .LCPI97_0@toc@l
|
||||
; P8LE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8LE-NEXT: vspltisw v2, 1
|
||||
; P8LE-NEXT: vupklsw v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> <i64 1, i64 1>
|
||||
@ -5339,30 +5323,26 @@ entry:
|
||||
define <2 x i64> @spltCnstConvftoull() {
|
||||
; P9BE-LABEL: spltCnstConvftoull:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: addis r3, r2, .LCPI110_0@toc@ha
|
||||
; P9BE-NEXT: addi r3, r3, .LCPI110_0@toc@l
|
||||
; P9BE-NEXT: lxv v2, 0(r3)
|
||||
; P9BE-NEXT: vspltisw v2, 4
|
||||
; P9BE-NEXT: vupklsw v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: spltCnstConvftoull:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: addis r3, r2, .LCPI110_0@toc@ha
|
||||
; P9LE-NEXT: addi r3, r3, .LCPI110_0@toc@l
|
||||
; P9LE-NEXT: lxv v2, 0(r3)
|
||||
; P9LE-NEXT: vspltisw v2, 4
|
||||
; P9LE-NEXT: vupklsw v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: spltCnstConvftoull:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI110_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI110_0@toc@l
|
||||
; P8BE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8BE-NEXT: vspltisw v2, 4
|
||||
; P8BE-NEXT: vupklsw v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: spltCnstConvftoull:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: addis r3, r2, .LCPI110_0@toc@ha
|
||||
; P8LE-NEXT: addi r3, r3, .LCPI110_0@toc@l
|
||||
; P8LE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8LE-NEXT: vspltisw v2, 4
|
||||
; P8LE-NEXT: vupklsw v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> <i64 4, i64 4>
|
||||
@ -5692,30 +5672,26 @@ entry:
|
||||
define <2 x i64> @spltCnstConvdtoull() {
|
||||
; P9BE-LABEL: spltCnstConvdtoull:
|
||||
; P9BE: # %bb.0: # %entry
|
||||
; P9BE-NEXT: addis r3, r2, .LCPI119_0@toc@ha
|
||||
; P9BE-NEXT: addi r3, r3, .LCPI119_0@toc@l
|
||||
; P9BE-NEXT: lxv v2, 0(r3)
|
||||
; P9BE-NEXT: vspltisw v2, 4
|
||||
; P9BE-NEXT: vupklsw v2, v2
|
||||
; P9BE-NEXT: blr
|
||||
;
|
||||
; P9LE-LABEL: spltCnstConvdtoull:
|
||||
; P9LE: # %bb.0: # %entry
|
||||
; P9LE-NEXT: addis r3, r2, .LCPI119_0@toc@ha
|
||||
; P9LE-NEXT: addi r3, r3, .LCPI119_0@toc@l
|
||||
; P9LE-NEXT: lxv v2, 0(r3)
|
||||
; P9LE-NEXT: vspltisw v2, 4
|
||||
; P9LE-NEXT: vupklsw v2, v2
|
||||
; P9LE-NEXT: blr
|
||||
;
|
||||
; P8BE-LABEL: spltCnstConvdtoull:
|
||||
; P8BE: # %bb.0: # %entry
|
||||
; P8BE-NEXT: addis r3, r2, .LCPI119_0@toc@ha
|
||||
; P8BE-NEXT: addi r3, r3, .LCPI119_0@toc@l
|
||||
; P8BE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8BE-NEXT: vspltisw v2, 4
|
||||
; P8BE-NEXT: vupklsw v2, v2
|
||||
; P8BE-NEXT: blr
|
||||
;
|
||||
; P8LE-LABEL: spltCnstConvdtoull:
|
||||
; P8LE: # %bb.0: # %entry
|
||||
; P8LE-NEXT: addis r3, r2, .LCPI119_0@toc@ha
|
||||
; P8LE-NEXT: addi r3, r3, .LCPI119_0@toc@l
|
||||
; P8LE-NEXT: lxvd2x v2, 0, r3
|
||||
; P8LE-NEXT: vspltisw v2, 4
|
||||
; P8LE-NEXT: vupklsw v2, v2
|
||||
; P8LE-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> <i64 4, i64 4>
|
||||
|
@ -271,8 +271,7 @@ define <2 x i64> @test1_v2i64(<2 x i64> %a) {
|
||||
ret <2 x i64> %tmp.1
|
||||
}
|
||||
; CHECK-LABEL: test1_v2i64:
|
||||
; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
|
||||
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
|
||||
; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
|
||||
; CHECK-NOT: vmul
|
||||
; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
|
||||
|
||||
@ -282,8 +281,7 @@ define <2 x i64> @test2_v2i64(<2 x i64> %a) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test2_v2i64:
|
||||
; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
|
||||
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
|
||||
; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
|
||||
; CHECK-NOT: vmul
|
||||
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
|
||||
; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]]
|
||||
@ -294,8 +292,7 @@ define <2 x i64> @test3_v2i64(<2 x i64> %a) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test3_v2i64:
|
||||
; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
|
||||
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
|
||||
; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
|
||||
; CHECK-NOT: vmul
|
||||
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
|
||||
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
|
||||
@ -308,8 +305,7 @@ define <2 x i64> @test4_v2i64(<2 x i64> %a) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test4_v2i64:
|
||||
; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
|
||||
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
|
||||
; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
|
||||
; CHECK-NOT: vmul
|
||||
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
|
||||
; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
|
||||
@ -322,8 +318,7 @@ define <2 x i64> @test5_v2i64(<2 x i64> %a) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test5_v2i64:
|
||||
; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
|
||||
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
|
||||
; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
|
||||
; CHECK-NOT: vmul
|
||||
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
|
||||
; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]]
|
||||
@ -337,8 +332,7 @@ define <2 x i64> @test6_v2i64(<2 x i64> %a) {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test6_v2i64:
|
||||
; CHECK-P8: lxvd2x v[[REG1:[0-9]+]], 0, r{{[0-9]+}}
|
||||
; CHECK-P9: lxv v[[REG2:[0-9]+]], 0(r{{[0-9]+}})
|
||||
; CHECK: vupklsw v[[REG1:[0-9]+]], v{{[0-9]+}}
|
||||
; CHECK-NOT: vmul
|
||||
; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
|
||||
; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]]
|
||||
|
@ -105,9 +105,8 @@ define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
|
||||
;
|
||||
; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
|
||||
; CHECK-NOPREFIX: # %bb.0: # %entry
|
||||
; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI2_0@toc@ha
|
||||
; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI2_0@toc@l
|
||||
; CHECK-NOPREFIX-NEXT: lxv vs34, 0(r3)
|
||||
; CHECK-NOPREFIX-NEXT: vspltisw v2, -16
|
||||
; CHECK-NOPREFIX-NEXT: vupklsw v2, v2
|
||||
; CHECK-NOPREFIX-NEXT: blr
|
||||
;
|
||||
; CHECK-BE-LABEL: testDoubleToDoubleNaNFail:
|
||||
|
@ -22,10 +22,10 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9LE-NEXT: lfdx 0, 3, 4
|
||||
; P9LE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; P9LE-NEXT: xxlxor 2, 2, 2
|
||||
; P9LE-NEXT: vspltisw 4, 8
|
||||
; P9LE-NEXT: xxspltib 4, 16
|
||||
; P9LE-NEXT: lxsd 3, 4(5)
|
||||
; P9LE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; P9LE-NEXT: vadduwm 4, 4, 4
|
||||
; P9LE-NEXT: vextsb2w 4, 4
|
||||
; P9LE-NEXT: lxv 1, 0(3)
|
||||
; P9LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
; P9LE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
@ -45,10 +45,10 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9BE-NEXT: lxsdx 2, 3, 4
|
||||
; P9BE-NEXT: addis 3, 2, .LCPI0_0@toc@ha
|
||||
; P9BE-NEXT: xxlxor 1, 1, 1
|
||||
; P9BE-NEXT: vspltisw 4, 8
|
||||
; P9BE-NEXT: xxspltib 4, 16
|
||||
; P9BE-NEXT: lxsd 3, 4(5)
|
||||
; P9BE-NEXT: addi 3, 3, .LCPI0_0@toc@l
|
||||
; P9BE-NEXT: vadduwm 4, 4, 4
|
||||
; P9BE-NEXT: vextsb2w 4, 4
|
||||
; P9BE-NEXT: lxv 0, 0(3)
|
||||
; P9BE-NEXT: addis 3, 2, .LCPI0_1@toc@ha
|
||||
; P9BE-NEXT: addi 3, 3, .LCPI0_1@toc@l
|
||||
@ -68,11 +68,11 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9BE-AIX-NEXT: lxsdx 2, 3, 4
|
||||
; P9BE-AIX-NEXT: ld 3, L..C0(2) # %const.0
|
||||
; P9BE-AIX-NEXT: xxlxor 1, 1, 1
|
||||
; P9BE-AIX-NEXT: vspltisw 4, 8
|
||||
; P9BE-AIX-NEXT: xxspltib 4, 16
|
||||
; P9BE-AIX-NEXT: lxsd 3, 4(5)
|
||||
; P9BE-AIX-NEXT: lxv 0, 0(3)
|
||||
; P9BE-AIX-NEXT: ld 3, L..C1(2) # %const.1
|
||||
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
|
||||
; P9BE-AIX-NEXT: vextsb2w 4, 4
|
||||
; P9BE-AIX-NEXT: xxperm 2, 1, 0
|
||||
; P9BE-AIX-NEXT: lxv 0, 0(3)
|
||||
; P9BE-AIX-NEXT: xxperm 3, 3, 0
|
||||
@ -89,10 +89,10 @@ define void @test64(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9BE-AIX32-NEXT: lxvwsx 0, 3, 4
|
||||
; P9BE-AIX32-NEXT: li 3, 4
|
||||
; P9BE-AIX32-NEXT: xxlxor 2, 2, 2
|
||||
; P9BE-AIX32-NEXT: vspltisw 4, 8
|
||||
; P9BE-AIX32-NEXT: xxspltib 4, 16
|
||||
; P9BE-AIX32-NEXT: lxvwsx 1, 5, 3
|
||||
; P9BE-AIX32-NEXT: lwz 3, L..C0(2) # %const.0
|
||||
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
|
||||
; P9BE-AIX32-NEXT: vextsb2w 4, 4
|
||||
; P9BE-AIX32-NEXT: xxmrghw 2, 0, 1
|
||||
; P9BE-AIX32-NEXT: lxv 0, 0(3)
|
||||
; P9BE-AIX32-NEXT: li 3, 8
|
||||
@ -137,11 +137,11 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9LE-NEXT: lxsiwzx 2, 3, 4
|
||||
; P9LE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; P9LE-NEXT: xxlxor 0, 0, 0
|
||||
; P9LE-NEXT: vspltisw 4, 8
|
||||
; P9LE-NEXT: xxspltib 4, 16
|
||||
; P9LE-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; P9LE-NEXT: lxv 1, 0(3)
|
||||
; P9LE-NEXT: li 3, 4
|
||||
; P9LE-NEXT: vadduwm 4, 4, 4
|
||||
; P9LE-NEXT: vextsb2w 4, 4
|
||||
; P9LE-NEXT: lxsiwzx 3, 5, 3
|
||||
; P9LE-NEXT: xxperm 2, 0, 1
|
||||
; P9LE-NEXT: xxperm 3, 0, 1
|
||||
@ -158,11 +158,11 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9BE-NEXT: lxsiwzx 2, 3, 4
|
||||
; P9BE-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; P9BE-NEXT: xxlxor 0, 0, 0
|
||||
; P9BE-NEXT: vspltisw 4, 8
|
||||
; P9BE-NEXT: xxspltib 4, 16
|
||||
; P9BE-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; P9BE-NEXT: lxv 1, 0(3)
|
||||
; P9BE-NEXT: li 3, 4
|
||||
; P9BE-NEXT: vadduwm 4, 4, 4
|
||||
; P9BE-NEXT: vextsb2w 4, 4
|
||||
; P9BE-NEXT: lxsiwzx 3, 5, 3
|
||||
; P9BE-NEXT: xxperm 2, 0, 1
|
||||
; P9BE-NEXT: xxperm 3, 0, 1
|
||||
@ -179,10 +179,10 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9BE-AIX-NEXT: lxsiwzx 2, 3, 4
|
||||
; P9BE-AIX-NEXT: ld 3, L..C2(2) # %const.0
|
||||
; P9BE-AIX-NEXT: xxlxor 0, 0, 0
|
||||
; P9BE-AIX-NEXT: vspltisw 4, 8
|
||||
; P9BE-AIX-NEXT: xxspltib 4, 16
|
||||
; P9BE-AIX-NEXT: lxv 1, 0(3)
|
||||
; P9BE-AIX-NEXT: li 3, 4
|
||||
; P9BE-AIX-NEXT: vadduwm 4, 4, 4
|
||||
; P9BE-AIX-NEXT: vextsb2w 4, 4
|
||||
; P9BE-AIX-NEXT: lxsiwzx 3, 5, 3
|
||||
; P9BE-AIX-NEXT: xxperm 2, 0, 1
|
||||
; P9BE-AIX-NEXT: xxperm 3, 0, 1
|
||||
@ -199,10 +199,10 @@ define void @test32(ptr nocapture readonly %pix2, i32 signext %i_pix2) {
|
||||
; P9BE-AIX32-NEXT: lxsiwzx 2, 3, 4
|
||||
; P9BE-AIX32-NEXT: lwz 3, L..C2(2) # %const.0
|
||||
; P9BE-AIX32-NEXT: xxlxor 0, 0, 0
|
||||
; P9BE-AIX32-NEXT: vspltisw 4, 8
|
||||
; P9BE-AIX32-NEXT: xxspltib 4, 16
|
||||
; P9BE-AIX32-NEXT: lxv 1, 0(3)
|
||||
; P9BE-AIX32-NEXT: li 3, 4
|
||||
; P9BE-AIX32-NEXT: vadduwm 4, 4, 4
|
||||
; P9BE-AIX32-NEXT: vextsb2w 4, 4
|
||||
; P9BE-AIX32-NEXT: lxsiwzx 3, 5, 3
|
||||
; P9BE-AIX32-NEXT: xxperm 2, 0, 1
|
||||
; P9BE-AIX32-NEXT: xxperm 3, 0, 1
|
||||
|
50
llvm/test/CodeGen/PowerPC/splat-extend.ll
Normal file
50
llvm/test/CodeGen/PowerPC/splat-extend.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-aix-xcoff \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc-aix-xcoff \
|
||||
; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
|
||||
; RUN: FileCheck %s
|
||||
|
||||
define dso_local noundef <8 x i16> @v103s() local_unnamed_addr {
|
||||
; CHECK-LABEL: v103s:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltib v2, 103
|
||||
; CHECK-NEXT: vupklsb v2, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
ret <8 x i16> splat (i16 103)
|
||||
}
|
||||
|
||||
define dso_local noundef <2 x i64> @v103l() local_unnamed_addr {
|
||||
; CHECK-LABEL: v103l:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltib v2, 103
|
||||
; CHECK-NEXT: vextsb2d v2, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> splat (i64 103)
|
||||
}
|
||||
|
||||
define dso_local noundef <4 x i32> @v103i() local_unnamed_addr {
|
||||
; CHECK-LABEL: v103i:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: xxspltib v2, 103
|
||||
; CHECK-NEXT: vextsb2w v2, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
ret <4 x i32> splat (i32 103)
|
||||
}
|
||||
|
||||
define dso_local noundef <2 x i64> @v11l() local_unnamed_addr {
|
||||
; CHECK-LABEL: v11l:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: vspltisw v2, -11
|
||||
; CHECK-NEXT: vupklsw v2, v2
|
||||
; CHECK-NEXT: blr
|
||||
entry:
|
||||
ret <2 x i64> splat (i64 -11)
|
||||
}
|
@ -16,9 +16,8 @@ define <2 x i64> @test_add(<2 x i64> %x, <2 x i64> %y) nounwind {
|
||||
define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
|
||||
; VSX-LABEL: increment_by_one:
|
||||
; VSX: # %bb.0:
|
||||
; VSX-NEXT: addis 3, 2, .LCPI1_0@toc@ha
|
||||
; VSX-NEXT: addi 3, 3, .LCPI1_0@toc@l
|
||||
; VSX-NEXT: lxvd2x 35, 0, 3
|
||||
; VSX-NEXT: vspltisw 3, 1
|
||||
; VSX-NEXT: vupklsw 3, 3
|
||||
; VSX-NEXT: vaddudm 2, 2, 3
|
||||
; VSX-NEXT: blr
|
||||
;
|
||||
|
@ -144,9 +144,8 @@ entry:
|
||||
define <2 x i64> @test_none(<2 x i64> %m) {
|
||||
; CHECK-P9-LABEL: test_none:
|
||||
; CHECK-P9: # %bb.0: # %entry
|
||||
; CHECK-P9-NEXT: addis 3, 2, .LCPI5_0@toc@ha
|
||||
; CHECK-P9-NEXT: addi 3, 3, .LCPI5_0@toc@l
|
||||
; CHECK-P9-NEXT: lxv 35, 0(3)
|
||||
; CHECK-P9-NEXT: xxspltib 35, 16
|
||||
; CHECK-P9-NEXT: vextsb2d 3, 3
|
||||
; CHECK-P9-NEXT: vsld 2, 2, 3
|
||||
; CHECK-P9-NEXT: vsrad 2, 2, 3
|
||||
; CHECK-P9-NEXT: blr
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user