[X86][BreakFalseDeps] Using reverse order for undef register selection (#137569)

BreakFalseDeps picks the best register for undef operands if
instructions have false dependency. The problem is if the instruction is
close to the beginning of the function, ReachingDefAnalysis is over
optimism to the unused registers, which results in collision with
registers just defined in the caller.

This patch changes the selection of undef register in an reverse order,
which reduces the probability of register collisions between caller and
callee. It brings improvement in some of our internal benchmarks with
negligible effect on other benchmarks.
This commit is contained in:
Phoebe Wang 2025-06-11 22:08:20 +08:00 committed by GitHub
parent 6fb2a80189
commit 76e14deb4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
44 changed files with 973 additions and 927 deletions

View File

@ -50,6 +50,8 @@ class RegisterClassInfo {
// entry is valid when its tag matches.
unsigned Tag = 0;
bool Reverse = false;
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
@ -86,9 +88,11 @@ class RegisterClassInfo {
public:
LLVM_ABI RegisterClassInfo();
/// runOnFunction - Prepare to answer questions about MF. This must be called
/// runOnFunction - Prepare to answer questions about MF. Rev indicates to
/// use reversed raw order when compute register order. This must be called
/// before any other methods are used.
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF);
LLVM_ABI void runOnMachineFunction(const MachineFunction &MF,
bool Rev = false);
/// getNumAllocatableRegs - Returns the number of actually allocatable
/// registers in RC in the current function.

View File

@ -68,7 +68,7 @@ public:
const bool CoveredBySubRegs;
const unsigned *SuperClasses;
const uint16_t SuperClassesSize;
ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction&);
ArrayRef<MCPhysReg> (*OrderFunc)(const MachineFunction &, bool Rev);
/// Return the register class ID number.
unsigned getID() const { return MC->getID(); }
@ -199,8 +199,9 @@ public:
/// other criteria.
///
/// By default, this method returns all registers in the class.
ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF) const {
return OrderFunc ? OrderFunc(MF) : getRegisters();
ArrayRef<MCPhysReg> getRawAllocationOrder(const MachineFunction &MF,
bool Rev = false) const {
return OrderFunc ? OrderFunc(MF, Rev) : getRegisters();
}
/// Returns the combination of all lane masks of register in this class.

View File

@ -314,7 +314,7 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
// to use in a given machine function. The code will be inserted in a
// function like this:
//
// static inline unsigned f(const MachineFunction &MF) { ... }
// static inline unsigned f(const MachineFunction &MF, bool Rev) { ... }
//
// The function should return 0 to select the default order defined by
// MemberList, 1 to select the first AltOrders entry and so on.

View File

@ -285,7 +285,7 @@ bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
TRI = MF->getSubtarget().getRegisterInfo();
RDA = &getAnalysis<ReachingDefAnalysis>();
RegClassInfo.runOnMachineFunction(mf);
RegClassInfo.runOnMachineFunction(mf, /*Rev=*/true);
LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");

View File

@ -39,14 +39,16 @@ StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
RegisterClassInfo::RegisterClassInfo() = default;
void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf,
bool Rev) {
bool Update = false;
MF = &mf;
auto &STI = MF->getSubtarget();
// Allocate new array the first time we see a new target.
if (STI.getRegisterInfo() != TRI) {
if (STI.getRegisterInfo() != TRI || Reverse != Rev) {
Reverse = Rev;
TRI = STI.getRegisterInfo();
RegClass.reset(new RCInfo[TRI->getNumRegClasses()]);
Update = true;
@ -142,7 +144,12 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
// FIXME: Once targets reserve registers instead of removing them from the
// allocation order, we can simply use begin/end here.
ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF);
ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF, Reverse);
std::vector<MCPhysReg> ReverseOrder;
if (Reverse) {
llvm::append_range(ReverseOrder, reverse(RawOrder));
RawOrder = ArrayRef<MCPhysReg>(ReverseOrder);
}
for (unsigned PhysReg : RawOrder) {
// Remove reserved registers from the allocation order.
if (Reserved.test(PhysReg))

View File

@ -806,17 +806,37 @@ def VR512_0_15 : RegisterClass<"X86", [v16f32, v8f64, v64i8, v32i16, v16i32, v8i
512, (sequence "ZMM%u", 0, 15)>;
// Scalar AVX-512 floating point registers.
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;
def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)> {
let AltOrders = [(add (sequence "XMM%u", 16, 31), (sequence "XMM%u", 0, 15))];
let AltOrderSelect = [{
return Rev;
}];
}
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;
def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)> {
let AltOrders = [(add (sequence "XMM%u", 16, 31), (sequence "XMM%u", 0, 15))];
let AltOrderSelect = [{
return Rev;
}];
}
def FR16X : RegisterClass<"X86", [f16], 16, (add FR32X)> {let Size = 32;}
// Extended VR128 and VR256 for AVX-512 instructions
def VR128X : RegisterClass<"X86", [v4f32, v2f64, v8f16, v8bf16, v16i8, v8i16, v4i32, v2i64, f128],
128, (add FR32X)>;
128, (add FR32X)> {
let AltOrders = [(add (sequence "XMM%u", 16, 31), (sequence "XMM%u", 0, 15))];
let AltOrderSelect = [{
return Rev;
}];
}
def VR256X : RegisterClass<"X86", [v8f32, v4f64, v16f16, v16bf16, v32i8, v16i16, v8i32, v4i64],
256, (sequence "YMM%u", 0, 31)>;
256, (sequence "YMM%u", 0, 31)> {
let AltOrders = [(add (sequence "YMM%u", 16, 31), (sequence "YMM%u", 0, 15))];
let AltOrderSelect = [{
return Rev;
}];
}
// Mask registers
def VK1 : RegisterClass<"X86", [v1i1], 16, (sequence "K%u", 0, 7)> {let Size = 16;}

View File

@ -108,7 +108,7 @@ define <2 x double> @fpext01(<2 x double> %a0, <4 x float> %a1) nounwind {
define double @funcA(ptr nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcA:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i64, ptr %e, align 8
%conv = sitofp i64 %tmp1 to double
@ -118,7 +118,7 @@ define double @funcA(ptr nocapture %e) nounwind uwtable readonly ssp {
define double @funcB(ptr nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcB:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vcvtsi2sdl (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i32, ptr %e, align 4
%conv = sitofp i32 %tmp1 to double
@ -128,7 +128,7 @@ define double @funcB(ptr nocapture %e) nounwind uwtable readonly ssp {
define float @funcC(ptr nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcC:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vcvtsi2ssl (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i32, ptr %e, align 4
%conv = sitofp i32 %tmp1 to float
@ -138,7 +138,7 @@ define float @funcC(ptr nocapture %e) nounwind uwtable readonly ssp {
define float @funcD(ptr nocapture %e) nounwind uwtable readonly ssp {
; CHECK-LABEL: funcD:
; CHECK: # %bb.0:
; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%tmp1 = load i64, ptr %e, align 8
%conv = sitofp i64 %tmp1 to float
@ -183,7 +183,7 @@ declare float @llvm.floor.f32(float %p)
define float @floor_f32_load(ptr %aptr) optsize {
; CHECK-LABEL: floor_f32_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vroundss $9, (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%a = load float, ptr %aptr
%res = call float @llvm.floor.f32(float %a)
@ -193,7 +193,7 @@ define float @floor_f32_load(ptr %aptr) optsize {
define float @floor_f32_load_pgso(ptr %aptr) !prof !14 {
; CHECK-LABEL: floor_f32_load_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundss $9, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vroundss $9, (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%a = load float, ptr %aptr
%res = call float @llvm.floor.f32(float %a)
@ -203,7 +203,7 @@ define float @floor_f32_load_pgso(ptr %aptr) !prof !14 {
define double @nearbyint_f64_load(ptr %aptr) optsize {
; CHECK-LABEL: nearbyint_f64_load:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%a = load double, ptr %aptr
%res = call double @llvm.nearbyint.f64(double %a)
@ -213,7 +213,7 @@ define double @nearbyint_f64_load(ptr %aptr) optsize {
define double @nearbyint_f64_load_pgso(ptr %aptr) !prof !14 {
; CHECK-LABEL: nearbyint_f64_load_pgso:
; CHECK: # %bb.0:
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm0, %xmm0
; CHECK-NEXT: vroundsd $12, (%rdi), %xmm15, %xmm0
; CHECK-NEXT: retq
%a = load double, ptr %aptr
%res = call double @llvm.nearbyint.f64(double %a)

View File

@ -22,27 +22,27 @@ define <8 x double> @sltof864(<8 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@ -66,14 +66,14 @@ define <4 x double> @slto4f64(<4 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-NEXT: retq
@ -97,9 +97,9 @@ define <2 x double> @slto2f64(<2 x i64> %a) {
; NODQ-LABEL: slto2f64:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; NODQ-NEXT: retq
;
@ -123,9 +123,9 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
; NODQ-LABEL: sltof2f32:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; NODQ-NEXT: retq
;
@ -148,12 +148,12 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
define <4 x float> @slto4f32_mem(ptr %a) {
; NODQ-LABEL: slto4f32_mem:
; NODQ: # %bb.0:
; NODQ-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
; NODQ-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
; NODQ-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm0
; NODQ-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; NODQ-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
; NODQ-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; NODQ-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
; NODQ-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; NODQ-NEXT: retq
;
@ -246,16 +246,16 @@ define <4 x float> @slto4f32(<4 x i64> %a) {
; NODQ-LABEL: slto4f32:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; NODQ-NEXT: vzeroupper
; NODQ-NEXT: retq
@ -281,16 +281,16 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
; NODQ-LABEL: ulto4f32:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; NODQ-NEXT: vzeroupper
; NODQ-NEXT: retq
@ -316,16 +316,16 @@ define <4 x float> @ulto4f32_nneg(<4 x i64> %a) {
; NODQ-LABEL: ulto4f32_nneg:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; NODQ-NEXT: vzeroupper
; NODQ-NEXT: retq
@ -864,7 +864,7 @@ define <2 x double> @f32tof64_inreg(<2 x double> %a0, <4 x float> %a1) nounwind
define double @sltof64_load(ptr nocapture %e) {
; ALL-LABEL: sltof64_load:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%tmp1 = load i64, ptr %e, align 8
@ -875,7 +875,7 @@ entry:
define double @sitof64_load(ptr %e) {
; ALL-LABEL: sitof64_load:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%tmp1 = load i32, ptr %e, align 4
@ -886,7 +886,7 @@ entry:
define float @sitof32_load(ptr %e) {
; ALL-LABEL: sitof32_load:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%tmp1 = load i32, ptr %e, align 4
@ -897,7 +897,7 @@ entry:
define float @sltof32_load(ptr %e) {
; ALL-LABEL: sltof32_load:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%tmp1 = load i64, ptr %e, align 8
@ -990,28 +990,28 @@ define <8 x float> @slto8f32(<8 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-NEXT: retq
@ -1034,54 +1034,54 @@ define <16 x float> @slto16f32(<16 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm3
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm1
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm0
; NODQ-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@ -1109,27 +1109,27 @@ define <8 x double> @slto8f64(<8 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@ -1153,53 +1153,53 @@ define <16 x double> @slto16f64(<16 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm2
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
; NODQ-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1
@ -1225,28 +1225,28 @@ define <8 x float> @ulto8f32(<8 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm0
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-NEXT: retq
@ -1269,54 +1269,54 @@ define <16 x float> @ulto16f32(<16 x i64> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm3
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm4, %xmm4
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm1
; NODQ-NEXT: vmovq %xmm1, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2
; NODQ-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm2, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3]
; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; NODQ-NEXT: vmovq %xmm3, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3]
; NODQ-NEXT: vpextrq $1, %xmm3, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm4
; NODQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm0
; NODQ-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@ -1498,7 +1498,7 @@ define i32 @fptoui(float %a) nounwind {
define float @uitof32(i32 %a) nounwind {
; ALL-LABEL: uitof32:
; ALL: # %bb.0:
; ALL-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; ALL-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; ALL-NEXT: retq
%b = uitofp i32 %a to float
ret float %b
@ -1507,7 +1507,7 @@ define float @uitof32(i32 %a) nounwind {
define double @uitof64(i32 %a) nounwind {
; ALL-LABEL: uitof64:
; ALL: # %bb.0:
; ALL-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; ALL-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; ALL-NEXT: retq
%b = uitofp i32 %a to double
ret double %b

View File

@ -1221,17 +1221,17 @@ define dso_local x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signex
; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; X32-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
; X32-NEXT: vcvtsi2sd %eax, %xmm3, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
; X32-NEXT: vcvtsi2sd %ecx, %xmm3, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vmovd %edx, %xmm1
; X32-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
; X32-NEXT: vcvtqq2pd %ymm1, %ymm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sd %esi, %xmm2, %xmm1
; X32-NEXT: vcvtsi2sd %esi, %xmm3, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvtsi2sdl (%ebx), %xmm2, %xmm1
; X32-NEXT: vcvtsi2sdl (%ebx), %xmm3, %xmm1
; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; X32-NEXT: vcvttsd2si %xmm0, %eax
; X32-NEXT: popl %ebx
@ -1242,15 +1242,15 @@ define dso_local x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signex
; WIN64: # %bb.0:
; WIN64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; WIN64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; WIN64-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
; WIN64-NEXT: vcvtsi2sd %eax, %xmm7, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
; WIN64-NEXT: vcvtsi2sd %ecx, %xmm7, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm1
; WIN64-NEXT: vcvtsi2sd %rdx, %xmm7, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sd %edi, %xmm2, %xmm1
; WIN64-NEXT: vcvtsi2sd %edi, %xmm7, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
; WIN64-NEXT: vcvtsi2sdl (%rsi), %xmm7, %xmm1
; WIN64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; WIN64-NEXT: vcvttsd2si %xmm0, %eax
; WIN64-NEXT: retq
@ -1259,15 +1259,15 @@ define dso_local x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signex
; LINUXOSX64: # %bb.0:
; LINUXOSX64-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm0, %xmm1, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %eax, %xmm2, %xmm1
; LINUXOSX64-NEXT: vcvtsi2sd %eax, %xmm7, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %ecx, %xmm2, %xmm1
; LINUXOSX64-NEXT: vcvtsi2sd %ecx, %xmm7, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm1
; LINUXOSX64-NEXT: vcvtsi2sd %rdx, %xmm7, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sd %edi, %xmm2, %xmm1
; LINUXOSX64-NEXT: vcvtsi2sd %edi, %xmm7, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm2, %xmm1
; LINUXOSX64-NEXT: vcvtsi2sdl (%rsi), %xmm7, %xmm1
; LINUXOSX64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; LINUXOSX64-NEXT: vcvttsd2si %xmm0, %eax
; LINUXOSX64-NEXT: retq

View File

@ -412,7 +412,7 @@ define double @extload_f16_f64(ptr %x) {
define float @extload_f16_f32_optsize(ptr %x) optsize {
; X64-LABEL: extload_f16_f32_optsize:
; X64: # %bb.0:
; X64-NEXT: vcvtsh2ss (%rdi), %xmm0, %xmm0
; X64-NEXT: vcvtsh2ss (%rdi), %xmm15, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: extload_f16_f32_optsize:
@ -420,7 +420,7 @@ define float @extload_f16_f32_optsize(ptr %x) optsize {
; X86-NEXT: pushl %eax
; X86-NEXT: .cfi_def_cfa_offset 8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsh2ss (%eax), %xmm0, %xmm0
; X86-NEXT: vcvtsh2ss (%eax), %xmm7, %xmm0
; X86-NEXT: vmovss %xmm0, (%esp)
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
@ -434,7 +434,7 @@ define float @extload_f16_f32_optsize(ptr %x) optsize {
define double @extload_f16_f64_optsize(ptr %x) optsize {
; X64-LABEL: extload_f16_f64_optsize:
; X64: # %bb.0:
; X64-NEXT: vcvtsh2sd (%rdi), %xmm0, %xmm0
; X64-NEXT: vcvtsh2sd (%rdi), %xmm15, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: extload_f16_f64_optsize:
@ -447,7 +447,7 @@ define double @extload_f16_f64_optsize(ptr %x) optsize {
; X86-NEXT: andl $-8, %esp
; X86-NEXT: subl $8, %esp
; X86-NEXT: movl 8(%ebp), %eax
; X86-NEXT: vcvtsh2sd (%eax), %xmm0, %xmm0
; X86-NEXT: vcvtsh2sd (%eax), %xmm7, %xmm0
; X86-NEXT: vmovsd %xmm0, (%esp)
; X86-NEXT: fldl (%esp)
; X86-NEXT: movl %ebp, %esp
@ -559,13 +559,13 @@ define half @s8_to_half(i8 %x) {
; X64-LABEL: s8_to_half:
; X64: # %bb.0:
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: s8_to_half:
; X86: # %bb.0:
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
%a = sitofp i8 %x to half
ret half %a
@ -575,13 +575,13 @@ define half @s16_to_half(i16 %x) {
; X64-LABEL: s16_to_half:
; X64: # %bb.0:
; X64-NEXT: movswl %di, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: s16_to_half:
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
%a = sitofp i16 %x to half
ret half %a
@ -590,12 +590,12 @@ define half @s16_to_half(i16 %x) {
define half @s32_to_half(i32 %x) {
; X64-LABEL: s32_to_half:
; X64: # %bb.0:
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %edi, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: s32_to_half:
; X86: # %bb.0:
; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
%a = sitofp i32 %x to half
ret half %a
@ -604,7 +604,7 @@ define half @s32_to_half(i32 %x) {
define half @s64_to_half(i64 %x) {
; X64-LABEL: s64_to_half:
; X64: # %bb.0:
; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %rdi, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: s64_to_half:
@ -644,13 +644,13 @@ define half @u8_to_half(i8 %x) {
; X64-LABEL: u8_to_half:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: u8_to_half:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
%a = uitofp i8 %x to half
ret half %a
@ -660,13 +660,13 @@ define half @u16_to_half(i16 %x) {
; X64-LABEL: u16_to_half:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: u16_to_half:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
%a = uitofp i16 %x to half
ret half %a
@ -675,12 +675,12 @@ define half @u16_to_half(i16 %x) {
define half @u32_to_half(i32 %x) {
; X64-LABEL: u32_to_half:
; X64: # %bb.0:
; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
; X64-NEXT: vcvtusi2sh %edi, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: u32_to_half:
; X86: # %bb.0:
; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
%a = uitofp i32 %x to half
ret half %a
@ -689,7 +689,7 @@ define half @u32_to_half(i32 %x) {
define half @u64_to_half(i64 %x) {
; X64-LABEL: u64_to_half:
; X64: # %bb.0:
; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
; X64-NEXT: vcvtusi2sh %rdi, %xmm31, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: u64_to_half:

View File

@ -16,14 +16,14 @@ define <4 x half> @vector_sint32ToHalf(<4 x i32> %int32) {
; CHECK-LABEL: vector_sint32ToHalf:
; CHECK: # %bb.0:
; CHECK-NEXT: vextractps $3, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm1, %xmm1
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm1
; CHECK-NEXT: vextractps $2, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm2, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; CHECK-NEXT: vextractps $1, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm3, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vmovd %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm3, %xmm0
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-NEXT: retq
@ -36,32 +36,32 @@ define <8 x half> @vector_sint16ToHalf(<8 x i16> %int16) {
; CHECK: # %bb.0:
; CHECK-NEXT: vpextrw $7, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm1, %xmm1
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm1
; CHECK-NEXT: vpextrw $6, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm2, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; CHECK-NEXT: vpextrw $5, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm3, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpextrw $4, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm3, %xmm3
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm3
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: vpextrw $3, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm3
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm3
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm3
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm3
; CHECK-NEXT: vmovw %xmm0, %eax
; CHECK-NEXT: cwtl
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm0
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
@ -97,25 +97,25 @@ define <8 x half> @vector_uint16ToHalf(<8 x i16> %int16) {
; CHECK-LABEL: vector_uint16ToHalf:
; CHECK: # %bb.0:
; CHECK-NEXT: vpextrw $7, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm1, %xmm1
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm1
; CHECK-NEXT: vpextrw $6, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm2, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
; CHECK-NEXT: vpextrw $5, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm3, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpextrw $4, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm3, %xmm3
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm3
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; CHECK-NEXT: vpextrw $3, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm2
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm2
; CHECK-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm3
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm3
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3]
; CHECK-NEXT: vpextrw $1, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm3
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm3
; CHECK-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-NEXT: vcvtsi2sh %eax, %xmm4, %xmm0
; CHECK-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
; CHECK-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]

View File

@ -36,7 +36,7 @@ define dso_local float @t2(ptr nocapture %x) nounwind readonly ssp optsize {
;
; AVX-LABEL: t2:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsd2ss (%rcx), %xmm0, %xmm0
; AVX-NEXT: vcvtsd2ss (%rcx), %xmm5, %xmm0
; AVX-NEXT: retq
entry:
%0 = load double, ptr %x, align 8
@ -93,7 +93,7 @@ define dso_local float @squirtf_size(ptr %x) nounwind optsize {
;
; AVX-LABEL: squirtf_size:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vsqrtss (%rcx), %xmm0, %xmm0
; AVX-NEXT: vsqrtss (%rcx), %xmm5, %xmm0
; AVX-NEXT: retq
entry:
%z = load float, ptr %x
@ -114,7 +114,7 @@ define dso_local double @squirt_size(ptr %x) nounwind optsize {
;
; AVX-LABEL: squirt_size:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vsqrtsd (%rcx), %xmm0, %xmm0
; AVX-NEXT: vsqrtsd (%rcx), %xmm5, %xmm0
; AVX-NEXT: retq
entry:
%z = load double, ptr %x
@ -199,8 +199,8 @@ define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp {
; AVX1-NEXT: .p2align 4
; AVX1-NEXT: .LBB6_3: # %for.body
; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
; AVX1-NEXT: vcvtsi2ss %eax, %xmm4, %xmm2
; AVX1-NEXT: vcvtsi2ss %ecx, %xmm4, %xmm3
; AVX1-NEXT: vcvtsi2ss %eax, %xmm5, %xmm2
; AVX1-NEXT: vcvtsi2ss %ecx, %xmm5, %xmm3
; AVX1-NEXT: vaddss %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vaddss %xmm3, %xmm1, %xmm1
; AVX1-NEXT: incl %eax
@ -226,9 +226,9 @@ define dso_local float @loopdep1(i32 %m) nounwind uwtable readnone ssp {
; AVX512VL-NEXT: .p2align 4
; AVX512VL-NEXT: .LBB6_3: # %for.body
; AVX512VL-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512VL-NEXT: vcvtsi2ss %eax, %xmm3, %xmm2
; AVX512VL-NEXT: vcvtsi2ss %eax, %xmm5, %xmm2
; AVX512VL-NEXT: vaddss %xmm2, %xmm0, %xmm0
; AVX512VL-NEXT: vcvtsi2ss %ecx, %xmm3, %xmm2
; AVX512VL-NEXT: vcvtsi2ss %ecx, %xmm5, %xmm2
; AVX512VL-NEXT: vaddss %xmm2, %xmm1, %xmm1
; AVX512VL-NEXT: incl %eax
; AVX512VL-NEXT: decl %ecx
@ -358,8 +358,8 @@ define i64 @loopdep2(ptr nocapture %x, ptr nocapture %y) nounwind {
; AVX-NEXT: .p2align 4
; AVX-NEXT: .LBB7_1: # %loop
; AVX-NEXT: # =>This Inner Loop Header: Depth=1
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vcvtsi2sd %rcx, %xmm1, %xmm0
; AVX-NEXT: vxorps %xmm5, %xmm5, %xmm5
; AVX-NEXT: vcvtsi2sd %rcx, %xmm5, %xmm0
; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX-NEXT: #APP
; AVX-NEXT: #NO_APP
@ -566,8 +566,8 @@ define dso_local void @loopdep3() {
; AVX-NEXT: .LBB8_2: # %for.body3
; AVX-NEXT: # Parent Loop BB8_1 Depth=1
; AVX-NEXT: # => This Inner Loop Header: Depth=2
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sdl (%r11), %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm5, %xmm5, %xmm5
; AVX-NEXT: vcvtsi2sdl (%r11), %xmm5, %xmm0
; AVX-NEXT: vmulsd (%rsi,%rdx), %xmm0, %xmm0
; AVX-NEXT: vmulsd (%rsi,%r8), %xmm0, %xmm0
; AVX-NEXT: vmulsd (%rsi,%r9), %xmm0, %xmm0
@ -761,8 +761,8 @@ define dso_local double @inlineasmdep(i64 %arg) {
; AVX-NEXT: #NO_APP
; AVX-NEXT: #APP
; AVX-NEXT: #NO_APP
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0
; AVX-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX-NEXT: vcvtsi2sd %rcx, %xmm3, %xmm0
; AVX-NEXT: vmovaps (%rsp), %xmm6 # 16-byte Reload
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 16-byte Reload
; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload

View File

@ -16,7 +16,7 @@ define void @runcont(ptr %source) nounwind {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: LBB0_1: ## %bb
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vcvtsi2ssl (%eax,%edx,4), %xmm2, %xmm1
; CHECK-NEXT: vcvtsi2ssl (%eax,%edx,4), %xmm7, %xmm1
; CHECK-NEXT: vaddss %xmm0, %xmm1, %xmm0
; CHECK-NEXT: incl %edx
; CHECK-NEXT: cmpl %edx, %ecx

View File

@ -78,7 +78,7 @@ define double @single_to_double_rm_optsize(ptr %x) optsize {
;
; AVX-LABEL: single_to_double_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtss2sd (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtss2sd (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = load float, ptr %x, align 4
@ -112,7 +112,7 @@ define float @double_to_single_rm_optsize(ptr %x) optsize {
;
; AVX-LABEL: double_to_single_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsd2ss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = load double, ptr %x, align 8

View File

@ -12,7 +12,7 @@ define double @long_to_double_rr(i64 %a) {
;
; AVX-LABEL: long_to_double_rr:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = sitofp i64 %a to double
@ -27,7 +27,7 @@ define double @long_to_double_rm(ptr %a) {
;
; AVX-LABEL: long_to_double_rm:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, ptr %a
@ -43,7 +43,7 @@ define double @long_to_double_rm_optsize(ptr %a) optsize {
;
; AVX-LABEL: long_to_double_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, ptr %a
@ -59,7 +59,7 @@ define float @long_to_float_rr(i64 %a) {
;
; AVX-LABEL: long_to_float_rr:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = sitofp i64 %a to float
@ -74,7 +74,7 @@ define float @long_to_float_rm(ptr %a) {
;
; AVX-LABEL: long_to_float_rm:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, ptr %a
@ -90,7 +90,7 @@ define float @long_to_float_rm_optsize(ptr %a) optsize {
;
; AVX-LABEL: long_to_float_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%0 = load i64, ptr %a

View File

@ -15,7 +15,7 @@ define double @int_to_double_rr(i32 %a) {
;
; AVX-LABEL: int_to_double_rr:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_double_rr:
@ -44,7 +44,7 @@ define double @int_to_double_rr(i32 %a) {
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm7, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@ -64,7 +64,7 @@ define double @int_to_double_rm(ptr %a) {
;
; AVX-LABEL: int_to_double_rm:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_double_rm:
@ -95,7 +95,7 @@ define double @int_to_double_rm(ptr %a) {
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: movl 8(%ebp), %eax
; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@ -116,7 +116,7 @@ define double @int_to_double_rm_optsize(ptr %a) optsize {
;
; AVX-LABEL: int_to_double_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sdl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_double_rm_optsize:
@ -147,7 +147,7 @@ define double @int_to_double_rm_optsize(ptr %a) optsize {
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: movl 8(%ebp), %eax
; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtsi2sdl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@ -168,7 +168,7 @@ define float @int_to_float_rr(i32 %a) {
;
; AVX-LABEL: int_to_float_rr:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_float_rr:
@ -186,7 +186,7 @@ define float @int_to_float_rr(i32 %a) {
; AVX_X86: # %bb.0: # %entry
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm7, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
@ -205,7 +205,7 @@ define float @int_to_float_rm(ptr %a) {
;
; AVX-LABEL: int_to_float_rm:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_float_rm:
@ -225,7 +225,7 @@ define float @int_to_float_rm(ptr %a) {
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
@ -245,7 +245,7 @@ define float @int_to_float_rm_optsize(ptr %a) optsize {
;
; AVX-LABEL: int_to_float_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ssl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; SSE2_X86-LABEL: int_to_float_rm_optsize:
@ -265,7 +265,7 @@ define float @int_to_float_rm_optsize(ptr %a) optsize {
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtsi2ssl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax

View File

@ -5,7 +5,7 @@
define double @long_to_double_rr(i64 %a) {
; ALL-LABEL: long_to_double_rr:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; ALL-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%0 = uitofp i64 %a to double
@ -15,7 +15,7 @@ entry:
define double @long_to_double_rm(ptr %a) {
; ALL-LABEL: long_to_double_rm:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%0 = load i64, ptr %a
@ -26,7 +26,7 @@ entry:
define double @long_to_double_rm_optsize(ptr %a) optsize {
; ALL-LABEL: long_to_double_rm_optsize:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtusi2sdq (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%0 = load i64, ptr %a
@ -37,7 +37,7 @@ entry:
define float @long_to_float_rr(i64 %a) {
; ALL-LABEL: long_to_float_rr:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; ALL-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%0 = uitofp i64 %a to float
@ -47,7 +47,7 @@ entry:
define float @long_to_float_rm(ptr %a) {
; ALL-LABEL: long_to_float_rm:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%0 = load i64, ptr %a
@ -58,7 +58,7 @@ entry:
define float @long_to_float_rm_optsize(ptr %a) optsize {
; ALL-LABEL: long_to_float_rm_optsize:
; ALL: # %bb.0: # %entry
; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm0, %xmm0
; ALL-NEXT: vcvtusi2ssq (%rdi), %xmm15, %xmm0
; ALL-NEXT: retq
entry:
%0 = load i64, ptr %a

View File

@ -6,7 +6,7 @@
define double @int_to_double_rr(i32 %a) {
; AVX-LABEL: int_to_double_rr:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_double_rr:
@ -18,7 +18,7 @@ define double @int_to_double_rr(i32 %a) {
; AVX_X86-NEXT: .cfi_def_cfa_register %ebp
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm7, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@ -33,7 +33,7 @@ entry:
define double @int_to_double_rm(ptr %a) {
; AVX-LABEL: int_to_double_rm:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_double_rm:
@ -46,7 +46,7 @@ define double @int_to_double_rm(ptr %a) {
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: movl 8(%ebp), %eax
; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@ -62,7 +62,7 @@ entry:
define double @int_to_double_rm_optsize(ptr %a) optsize {
; AVX-LABEL: int_to_double_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtusi2sdl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_double_rm_optsize:
@ -75,7 +75,7 @@ define double @int_to_double_rm_optsize(ptr %a) optsize {
; AVX_X86-NEXT: andl $-8, %esp
; AVX_X86-NEXT: subl $8, %esp
; AVX_X86-NEXT: movl 8(%ebp), %eax
; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtusi2sdl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovsd %xmm0, (%esp)
; AVX_X86-NEXT: fldl (%esp)
; AVX_X86-NEXT: movl %ebp, %esp
@ -91,14 +91,14 @@ entry:
define float @int_to_float_rr(i32 %a) {
; AVX-LABEL: int_to_float_rr:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_float_rr:
; AVX_X86: # %bb.0: # %entry
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm7, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
@ -112,7 +112,7 @@ entry:
define float @int_to_float_rm(ptr %a) {
; AVX-LABEL: int_to_float_rm:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_float_rm:
@ -120,7 +120,7 @@ define float @int_to_float_rm(ptr %a) {
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX_X86-NEXT: vcvtusi2ssl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtusi2ssl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax
@ -135,7 +135,7 @@ entry:
define float @int_to_float_rm_optsize(ptr %a) optsize {
; AVX-LABEL: int_to_float_rm_optsize:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0
; AVX-NEXT: vcvtusi2ssl (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
;
; AVX_X86-LABEL: int_to_float_rm_optsize:
@ -143,7 +143,7 @@ define float @int_to_float_rm_optsize(ptr %a) optsize {
; AVX_X86-NEXT: pushl %eax
; AVX_X86-NEXT: .cfi_def_cfa_offset 8
; AVX_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; AVX_X86-NEXT: vcvtusi2ssl (%eax), %xmm0, %xmm0
; AVX_X86-NEXT: vcvtusi2ssl (%eax), %xmm7, %xmm0
; AVX_X86-NEXT: vmovss %xmm0, (%esp)
; AVX_X86-NEXT: flds (%esp)
; AVX_X86-NEXT: popl %eax

View File

@ -399,11 +399,11 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
; AVX1-LABEL: PR140534:
; AVX1: # %bb.0:
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: movl %esi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: movl %edx, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vcmpltsd %xmm2, %xmm1, %xmm2
; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm0
@ -414,9 +414,9 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) {
;
; AVX512-LABEL: PR140534:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %esi, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2sd %edx, %xmm2, %xmm2
; AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: vcvtusi2sd %esi, %xmm15, %xmm1
; AVX512-NEXT: vcvtusi2sd %edx, %xmm15, %xmm2
; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX512-NEXT: vcmpltsd %xmm2, %xmm1, %k0
; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1

View File

@ -887,14 +887,14 @@ define double @fmul_pow_shl_cnt_fail_maybe_non_pow2(i64 %v, i64 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: movq %rsi, %rcx
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rdi
; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: retq
;
; CHECK-FMA-LABEL: fmul_pow_shl_cnt_fail_maybe_non_pow2:
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: shlxq %rsi, %rdi, %rax
; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-FMA-NEXT: retq
%shl = shl nuw i64 %v, %cnt
@ -927,9 +927,9 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-AVX2-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
; CHECK-AVX2-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; CHECK-AVX2-NEXT: vmovq %xmm0, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
; CHECK-AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -940,9 +940,9 @@ define <2 x float> @fmul_pow_shl_cnt_vec_fail_expensive_cast(<2 x i64> %cnt) nou
; CHECK-NO-FASTFMA-NEXT: vpmovsxbq {{.*#+}} xmm1 = [2,2]
; CHECK-NO-FASTFMA-NEXT: vpsllvq %xmm0, %xmm1, %xmm0
; CHECK-NO-FASTFMA-NEXT: vpextrq $1, %xmm0, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; CHECK-NO-FASTFMA-NEXT: vmovq %xmm0, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; CHECK-NO-FASTFMA-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.5E+1,1.5E+1,1.5E+1,1.5E+1]
; CHECK-NO-FASTFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -1108,13 +1108,13 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind {
; CHECK-AVX2-NEXT: vpsllvd %ymm0, %ymm1, %ymm0
; CHECK-AVX2-NEXT: vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
; CHECK-AVX2-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-AVX2-NEXT: vmovdqu {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
; CHECK-AVX2-NEXT: vpextrw $0, %xmm0, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm2, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: vzeroupper
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
@ -1201,7 +1201,7 @@ define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: movl $1, %eax
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: retq
;
@ -1209,7 +1209,7 @@ define double @fmul_pow_shl_cnt_fail_maybe_bad_exp(i64 %cnt) nounwind {
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: movl $1, %eax
; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax
; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; CHECK-FMA-NEXT: retq
%shl = shl nuw i64 1, %cnt
@ -1317,11 +1317,11 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
; CHECK-AVX2-NEXT: testq %rax, %rax
; CHECK-AVX2-NEXT: js .LBB23_1
; CHECK-AVX2-NEXT: # %bb.2:
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: jmp .LBB23_3
; CHECK-AVX2-NEXT: .LBB23_1:
; CHECK-AVX2-NEXT: shrq %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm0
; CHECK-AVX2-NEXT: .LBB23_3:
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
@ -1334,7 +1334,7 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: movl $8, %eax
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-NO-FASTFMA-NEXT: retq
@ -1343,7 +1343,7 @@ define float @fdiv_pow_shl_cnt_fail_maybe_z(i64 %cnt) nounwind {
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: movl $8, %eax
; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax
; CHECK-FMA-NEXT: vcvtusi2ss %rax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-FMA-NEXT: retq
@ -1371,7 +1371,7 @@ define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
; CHECK-AVX2-NEXT: movl $8, %eax
; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-AVX2-NEXT: shlq %cl, %rax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: retq
@ -1382,7 +1382,7 @@ define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: movl $8, %eax
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $rcx
; CHECK-NO-FASTFMA-NEXT: shlq %cl, %rax
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-NO-FASTFMA-NEXT: retq
@ -1391,7 +1391,7 @@ define float @fdiv_pow_shl_cnt_fail_neg_int(i64 %cnt) nounwind {
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: movl $8, %eax
; CHECK-FMA-NEXT: shlxq %rdi, %rax, %rax
; CHECK-FMA-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [-9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-FMA-NEXT: retq
@ -1451,7 +1451,7 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind {
; CHECK-AVX2-NEXT: movl $1, %eax
; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-AVX2-NEXT: shll %cl, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
@ -1466,7 +1466,7 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: movl $1, %eax
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
@ -1478,7 +1478,7 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bounds(i32 %cnt) nounwind {
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: movl $1, %eax
; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax
; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
@ -1562,7 +1562,7 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-AVX2-NEXT: shll %cl, %eax
; CHECK-AVX2-NEXT: movzwl %ax, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT
; CHECK-AVX2-NEXT: callq __extendhfsf2@PLT
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
@ -1578,7 +1578,7 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax
; CHECK-NO-FASTFMA-NEXT: movzwl %ax, %eax
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
@ -1591,7 +1591,7 @@ define half @fdiv_pow_shl_cnt_fail_out_of_bound2(i16 %cnt) nounwind {
; CHECK-FMA-NEXT: movl $1, %eax
; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax
; CHECK-FMA-NEXT: movzwl %ax, %eax
; CHECK-FMA-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtph2ps %xmm0, %xmm0
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0]
@ -1648,7 +1648,7 @@ define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
; CHECK-AVX2-NEXT: movl $1, %eax
; CHECK-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-AVX2-NEXT: shll %cl, %eax
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; CHECK-AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-AVX2-NEXT: retq
@ -1659,7 +1659,7 @@ define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
; CHECK-NO-FASTFMA-NEXT: movl $1, %eax
; CHECK-NO-FASTFMA-NEXT: # kill: def $cl killed $cl killed $ecx
; CHECK-NO-FASTFMA-NEXT: shll %cl, %eax
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0
; CHECK-NO-FASTFMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
; CHECK-NO-FASTFMA-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0]
; CHECK-NO-FASTFMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-NO-FASTFMA-NEXT: retq
@ -1668,7 +1668,7 @@ define float @fdiv_pow_shl_cnt32_out_of_bounds2(i32 %cnt) nounwind {
; CHECK-FMA: # %bb.0:
; CHECK-FMA-NEXT: movl $1, %eax
; CHECK-FMA-NEXT: shlxl %edi, %eax, %eax
; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm0, %xmm0
; CHECK-FMA-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
; CHECK-FMA-NEXT: vmovss {{.*#+}} xmm1 = [1.00974148E-28,0.0E+0,0.0E+0,0.0E+0]
; CHECK-FMA-NEXT: vdivss %xmm0, %xmm1, %xmm0
; CHECK-FMA-NEXT: retq

View File

@ -89,7 +89,7 @@ define float @rcpss_size(ptr %a) optsize {
;
; AVX-LABEL: rcpss_size:
; AVX: # %bb.0:
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrcpss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load float, ptr %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
@ -106,7 +106,7 @@ define <4 x float> @rcpss_full_size(ptr %a) optsize {
;
; AVX-LABEL: rcpss_full_size:
; AVX: # %bb.0:
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrcpss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, ptr %a
%res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ld)
@ -121,7 +121,7 @@ define float @rcpss_pgso(ptr %a) !prof !14 {
;
; AVX-LABEL: rcpss_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrcpss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load float, ptr %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
@ -138,7 +138,7 @@ define <4 x float> @rcpss_full_pgso(ptr %a) !prof !14 {
;
; AVX-LABEL: rcpss_full_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrcpss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrcpss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, ptr %a
%res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ld)
@ -153,7 +153,7 @@ define float @rsqrtss_size(ptr %a) optsize {
;
; AVX-LABEL: rsqrtss_size:
; AVX: # %bb.0:
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrsqrtss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load float, ptr %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
@ -170,7 +170,7 @@ define <4 x float> @rsqrtss_full_size(ptr %a) optsize {
;
; AVX-LABEL: rsqrtss_full_size:
; AVX: # %bb.0:
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrsqrtss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, ptr %a
%res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ld)
@ -185,7 +185,7 @@ define float @rsqrtss_pgso(ptr %a) !prof !14 {
;
; AVX-LABEL: rsqrtss_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrsqrtss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load float, ptr %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
@ -202,7 +202,7 @@ define <4 x float> @rsqrtss_full_pgso(ptr %a) !prof !14 {
;
; AVX-LABEL: rsqrtss_full_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vrsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vrsqrtss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load <4 x float>, ptr %a
%res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ld)
@ -217,7 +217,7 @@ define float @sqrtss_size(ptr %a) optsize{
;
; AVX-LABEL: sqrtss_size:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vsqrtss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load float, ptr %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
@ -268,7 +268,7 @@ define float @sqrtss_pgso(ptr %a) !prof !14 {
;
; AVX-LABEL: sqrtss_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtss (%rdi), %xmm0, %xmm0
; AVX-NEXT: vsqrtss (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load float, ptr %a
%ins = insertelement <4 x float> undef, float %ld, i32 0
@ -319,7 +319,7 @@ define double @sqrtsd_size(ptr %a) optsize {
;
; AVX-LABEL: sqrtsd_size:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
; AVX-NEXT: vsqrtsd (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load double, ptr %a
%ins = insertelement <2 x double> undef, double %ld, i32 0
@ -370,7 +370,7 @@ define double @sqrtsd_pgso(ptr %a) !prof !14 {
;
; AVX-LABEL: sqrtsd_pgso:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtsd (%rdi), %xmm0, %xmm0
; AVX-NEXT: vsqrtsd (%rdi), %xmm15, %xmm0
; AVX-NEXT: retq
%ld = load double, ptr %a
%ins = insertelement <2 x double> undef, double %ld, i32 0

View File

@ -2018,7 +2018,7 @@ define double @sifdb(i8 %x) #0 {
; AVX-LABEL: sifdb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x,
@ -2062,7 +2062,7 @@ define double @sifdw(i16 %x) #0 {
; AVX-LABEL: sifdw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x,
@ -2103,7 +2103,7 @@ define double @sifdi(i32 %x) #0 {
;
; AVX-LABEL: sifdi:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x,
@ -2147,7 +2147,7 @@ define float @siffb(i8 %x) #0 {
; AVX-LABEL: siffb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x,
@ -2191,7 +2191,7 @@ define float @siffw(i16 %x) #0 {
; AVX-LABEL: siffw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x,
@ -2232,7 +2232,7 @@ define float @siffi(i32 %x) #0 {
;
; AVX-LABEL: siffi:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x,
@ -2267,7 +2267,7 @@ define double @sifdl(i64 %x) #0 {
;
; AVX-LABEL: sifdl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x,
@ -2302,7 +2302,7 @@ define float @siffl(i64 %x) #0 {
;
; AVX-LABEL: siffl:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x,
@ -2349,7 +2349,7 @@ define double @uifdb(i8 %x) #0 {
; AVX-LABEL: uifdb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x,
@ -2393,7 +2393,7 @@ define double @uifdw(i16 %x) #0 {
; AVX-LABEL: uifdw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x,
@ -2440,12 +2440,12 @@ define double @uifdi(i32 %x) #0 {
; AVX1-LABEL: uifdi:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uifdi:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x,
@ -2518,7 +2518,7 @@ define double @uifdl(i64 %x) #0 {
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sd %rcx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB48_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
@ -2527,7 +2527,7 @@ define double @uifdl(i64 %x) #0 {
;
; AVX512-LABEL: uifdl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x,
@ -2571,7 +2571,7 @@ define float @uiffb(i8 %x) #0 {
; AVX-LABEL: uiffb:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x,
@ -2615,7 +2615,7 @@ define float @uiffw(i16 %x) #0 {
; AVX-LABEL: uiffw:
; AVX: # %bb.0: # %entry
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x,
@ -2662,12 +2662,12 @@ define float @uiffi(i32 %x) #0 {
; AVX1-LABEL: uiffi:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffi:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x,
@ -2740,7 +2740,7 @@ define float @uiffl(i64 %x) #0 {
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB52_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
@ -2749,7 +2749,7 @@ define float @uiffl(i64 %x) #0 {
;
; AVX512-LABEL: uiffl:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x,

View File

@ -33,7 +33,7 @@ define half @sitofp_i1tof16(i1 %x) #0 {
; AVX-NEXT: andb $1, %dil
; AVX-NEXT: negb %dil
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -45,7 +45,7 @@ define half @sitofp_i1tof16(i1 %x) #0 {
; X86-NEXT: andb $1, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i1tof16:
@ -53,7 +53,7 @@ define half @sitofp_i1tof16(i1 %x) #0 {
; X64-NEXT: andb $1, %dil
; X64-NEXT: negb %dil
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i1(i1 %x,
metadata !"round.dynamic",
@ -74,7 +74,7 @@ define half @sitofp_i8tof16(i8 %x) #0 {
; AVX-LABEL: sitofp_i8tof16:
; AVX: # %bb.0:
; AVX-NEXT: movsbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -83,13 +83,13 @@ define half @sitofp_i8tof16(i8 %x) #0 {
; X86-LABEL: sitofp_i8tof16:
; X86: # %bb.0:
; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i8tof16:
; X64: # %bb.0:
; X64-NEXT: movsbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i8(i8 %x,
metadata !"round.dynamic",
@ -110,7 +110,7 @@ define half @sitofp_i16tof16(i16 %x) #0 {
; AVX-LABEL: sitofp_i16tof16:
; AVX: # %bb.0:
; AVX-NEXT: movswl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -119,13 +119,13 @@ define half @sitofp_i16tof16(i16 %x) #0 {
; X86-LABEL: sitofp_i16tof16:
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i16tof16:
; X64: # %bb.0:
; X64-NEXT: movswl %di, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i16(i16 %x,
metadata !"round.dynamic",
@ -144,7 +144,7 @@ define half @sitofp_i32tof16(i32 %x) #0 {
;
; AVX-LABEL: sitofp_i32tof16:
; AVX: # %bb.0:
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -152,12 +152,12 @@ define half @sitofp_i32tof16(i32 %x) #0 {
;
; X86-LABEL: sitofp_i32tof16:
; X86: # %bb.0:
; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vcvtsi2shl {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: sitofp_i32tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %edi, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i32(i32 %x,
metadata !"round.dynamic",
@ -176,7 +176,7 @@ define half @sitofp_i64tof16(i64 %x) #0 {
;
; AVX-LABEL: sitofp_i64tof16:
; AVX: # %bb.0:
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -190,7 +190,7 @@ define half @sitofp_i64tof16(i64 %x) #0 {
;
; X64-LABEL: sitofp_i64tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtsi2sh %rdi, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %rdi, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.sitofp.f16.i64(i64 %x,
metadata !"round.dynamic",
@ -211,7 +211,7 @@ define half @uitofp_i1tof16(i1 %x) #0 {
; AVX-LABEL: uitofp_i1tof16:
; AVX: # %bb.0:
; AVX-NEXT: andl $1, %edi
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -222,13 +222,13 @@ define half @uitofp_i1tof16(i1 %x) #0 {
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: andb $1, %al
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i1tof16:
; X64: # %bb.0:
; X64-NEXT: andl $1, %edi
; X64-NEXT: vcvtsi2sh %edi, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %edi, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i1(i1 %x,
metadata !"round.dynamic",
@ -249,7 +249,7 @@ define half @uitofp_i8tof16(i8 %x) #0 {
; AVX-LABEL: uitofp_i8tof16:
; AVX: # %bb.0:
; AVX-NEXT: movzbl %dil, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -258,13 +258,13 @@ define half @uitofp_i8tof16(i8 %x) #0 {
; X86-LABEL: uitofp_i8tof16:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i8tof16:
; X64: # %bb.0:
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i8(i8 %x,
metadata !"round.dynamic",
@ -285,7 +285,7 @@ define half @uitofp_i16tof16(i16 %x) #0 {
; AVX-LABEL: uitofp_i16tof16:
; AVX: # %bb.0:
; AVX-NEXT: movzwl %di, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -294,13 +294,13 @@ define half @uitofp_i16tof16(i16 %x) #0 {
; X86-LABEL: uitofp_i16tof16:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; X86-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X86-NEXT: vcvtsi2sh %eax, %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i16tof16:
; X64: # %bb.0:
; X64-NEXT: movzwl %di, %eax
; X64-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; X64-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i16(i16 %x,
metadata !"round.dynamic",
@ -321,7 +321,7 @@ define half @uitofp_i32tof16(i32 %x) #0 {
; F16C-LABEL: uitofp_i32tof16:
; F16C: # %bb.0:
; F16C-NEXT: movl %edi, %eax
; F16C-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; F16C-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1
; F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -329,7 +329,7 @@ define half @uitofp_i32tof16(i32 %x) #0 {
;
; AVX512-LABEL: uitofp_i32tof16:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -337,12 +337,12 @@ define half @uitofp_i32tof16(i32 %x) #0 {
;
; X86-LABEL: uitofp_i32tof16:
; X86: # %bb.0:
; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vcvtusi2shl {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: uitofp_i32tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtusi2sh %edi, %xmm0, %xmm0
; X64-NEXT: vcvtusi2sh %edi, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i32(i32 %x,
metadata !"round.dynamic",
@ -381,7 +381,7 @@ define half @uitofp_i64tof16(i64 %x) #0 {
; F16C-NEXT: orq %rax, %rcx
; F16C-NEXT: testq %rdi, %rdi
; F16C-NEXT: cmovnsq %rdi, %rcx
; F16C-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
; F16C-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm0
; F16C-NEXT: jns .LBB9_2
; F16C-NEXT: # %bb.1:
; F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
@ -393,7 +393,7 @@ define half @uitofp_i64tof16(i64 %x) #0 {
;
; AVX512-LABEL: uitofp_i64tof16:
; AVX512: # %bb.0:
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -407,7 +407,7 @@ define half @uitofp_i64tof16(i64 %x) #0 {
;
; X64-LABEL: uitofp_i64tof16:
; X64: # %bb.0:
; X64-NEXT: vcvtusi2sh %rdi, %xmm0, %xmm0
; X64-NEXT: vcvtusi2sh %rdi, %xmm31, %xmm0
; X64-NEXT: retq
%result = call half @llvm.experimental.constrained.uitofp.f16.i64(i64 %x,
metadata !"round.dynamic",

View File

@ -62,7 +62,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
; AVX-X86-NEXT: andb $1, %al
; AVX-X86-NEXT: negb %al
; AVX-X86-NEXT: movsbl %al, %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -75,7 +75,7 @@ define float @sitofp_i1tof32(i1 %x) #0 {
; AVX-X64-NEXT: andb $1, %dil
; AVX-X64-NEXT: negb %dil
; AVX-X64-NEXT: movsbl %dil, %eax
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i1tof32:
@ -123,7 +123,7 @@ define float @sitofp_i8tof32(i8 %x) #0 {
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -134,7 +134,7 @@ define float @sitofp_i8tof32(i8 %x) #0 {
; AVX-X64-LABEL: sitofp_i8tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movsbl %dil, %eax
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i8tof32:
@ -179,7 +179,7 @@ define float @sitofp_i16tof32(i16 %x) #0 {
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -190,7 +190,7 @@ define float @sitofp_i16tof32(i16 %x) #0 {
; AVX-X64-LABEL: sitofp_i16tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movswl %di, %eax
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i16tof32:
@ -232,7 +232,7 @@ define float @sitofp_i32tof32(i32 %x) #0 {
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -242,7 +242,7 @@ define float @sitofp_i32tof32(i32 %x) #0 {
;
; AVX-X64-LABEL: sitofp_i32tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i32tof32:
@ -294,7 +294,7 @@ define float @sitofp_i64tof32(i64 %x) #0 {
;
; AVX-X64-LABEL: sitofp_i64tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i64tof32:
@ -337,7 +337,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: andb $1, %al
; AVX-X86-NEXT: movzbl %al, %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -348,7 +348,7 @@ define float @uitofp_i1tof32(i1 %x) #0 {
; AVX-X64-LABEL: uitofp_i1tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: andl $1, %edi
; AVX-X64-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: uitofp_i1tof32:
@ -395,7 +395,7 @@ define float @uitofp_i8tof32(i8 %x) #0 {
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -406,7 +406,7 @@ define float @uitofp_i8tof32(i8 %x) #0 {
; AVX-X64-LABEL: uitofp_i8tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movzbl %dil, %eax
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: uitofp_i8tof32:
@ -451,7 +451,7 @@ define float @uitofp_i16tof32(i16 %x) #0 {
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: .cfi_def_cfa_offset 8
; AVX-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovss %xmm0, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: wait
@ -462,7 +462,7 @@ define float @uitofp_i16tof32(i16 %x) #0 {
; AVX-X64-LABEL: uitofp_i16tof32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movzwl %di, %eax
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: uitofp_i16tof32:
@ -534,14 +534,14 @@ define float @uitofp_i32tof32(i32 %x) #0 {
; AVX1-X64-LABEL: uitofp_i32tof32:
; AVX1-X64: # %bb.0:
; AVX1-X64-NEXT: movl %edi, %eax
; AVX1-X64-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; AVX1-X64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-X64-NEXT: retq
;
; AVX512-X86-LABEL: uitofp_i32tof32:
; AVX512-X86: # %bb.0:
; AVX512-X86-NEXT: pushl %eax
; AVX512-X86-NEXT: .cfi_def_cfa_offset 8
; AVX512-X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512-X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm7, %xmm0
; AVX512-X86-NEXT: vmovss %xmm0, (%esp)
; AVX512-X86-NEXT: flds (%esp)
; AVX512-X86-NEXT: wait
@ -551,7 +551,7 @@ define float @uitofp_i32tof32(i32 %x) #0 {
;
; AVX512-X64-LABEL: uitofp_i32tof32:
; AVX512-X64: # %bb.0:
; AVX512-X64-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-X64-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX512-X64-NEXT: retq
;
; X87-LABEL: uitofp_i32tof32:
@ -656,7 +656,7 @@ define float @uitofp_i64tof32(i64 %x) #0 {
; AVX1-X64-NEXT: orq %rax, %rcx
; AVX1-X64-NEXT: testq %rdi, %rdi
; AVX1-X64-NEXT: cmovnsq %rdi, %rcx
; AVX1-X64-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
; AVX1-X64-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm0
; AVX1-X64-NEXT: jns .LBB9_2
; AVX1-X64-NEXT: # %bb.1:
; AVX1-X64-NEXT: vaddss %xmm0, %xmm0, %xmm0
@ -665,7 +665,7 @@ define float @uitofp_i64tof32(i64 %x) #0 {
;
; AVX512-X64-LABEL: uitofp_i64tof32:
; AVX512-X64: # %bb.0:
; AVX512-X64-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-X64-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; AVX512-X64-NEXT: retq
;
; X87-LABEL: uitofp_i64tof32:
@ -733,7 +733,7 @@ define double @sitofp_i8tof64(i8 %x) #0 {
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: movsbl 8(%ebp), %eax
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
@ -745,7 +745,7 @@ define double @sitofp_i8tof64(i8 %x) #0 {
; AVX-X64-LABEL: sitofp_i8tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movsbl %dil, %eax
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i8tof64:
@ -801,7 +801,7 @@ define double @sitofp_i16tof64(i16 %x) #0 {
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: movswl 8(%ebp), %eax
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
@ -813,7 +813,7 @@ define double @sitofp_i16tof64(i16 %x) #0 {
; AVX-X64-LABEL: sitofp_i16tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movswl %di, %eax
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i16tof64:
@ -866,7 +866,7 @@ define double @sitofp_i32tof64(i32 %x) #0 {
; AVX-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm7, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
@ -877,7 +877,7 @@ define double @sitofp_i32tof64(i32 %x) #0 {
;
; AVX-X64-LABEL: sitofp_i32tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i32tof64:
@ -941,7 +941,7 @@ define double @sitofp_i64tof64(i64 %x) #0 {
;
; AVX-X64-LABEL: sitofp_i64tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: sitofp_i64tof64:
@ -995,7 +995,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
; AVX-X86-NEXT: movzbl 8(%ebp), %eax
; AVX-X86-NEXT: andb $1, %al
; AVX-X86-NEXT: movzbl %al, %eax
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
@ -1007,7 +1007,7 @@ define double @uitofp_i1tof64(i1 %x) #0 {
; AVX-X64-LABEL: uitofp_i1tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: andl $1, %edi
; AVX-X64-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: uitofp_i1tof64:
@ -1065,7 +1065,7 @@ define double @uitofp_i8tof64(i8 %x) #0 {
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: movzbl 8(%ebp), %eax
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
@ -1077,7 +1077,7 @@ define double @uitofp_i8tof64(i8 %x) #0 {
; AVX-X64-LABEL: uitofp_i8tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movzbl %dil, %eax
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: uitofp_i8tof64:
@ -1133,7 +1133,7 @@ define double @uitofp_i16tof64(i16 %x) #0 {
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: movzwl 8(%ebp), %eax
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm7, %xmm0
; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: wait
@ -1145,7 +1145,7 @@ define double @uitofp_i16tof64(i16 %x) #0 {
; AVX-X64-LABEL: uitofp_i16tof64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: movzwl %di, %eax
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-X64-NEXT: retq
;
; X87-LABEL: uitofp_i16tof64:
@ -1217,7 +1217,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; AVX1-X64-LABEL: uitofp_i32tof64:
; AVX1-X64: # %bb.0:
; AVX1-X64-NEXT: movl %edi, %eax
; AVX1-X64-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX1-X64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-X64-NEXT: retq
;
; AVX512-X86-LABEL: uitofp_i32tof64:
@ -1229,7 +1229,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
; AVX512-X86-NEXT: .cfi_def_cfa_register %ebp
; AVX512-X86-NEXT: andl $-8, %esp
; AVX512-X86-NEXT: subl $8, %esp
; AVX512-X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0
; AVX512-X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm7, %xmm0
; AVX512-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX512-X86-NEXT: fldl (%esp)
; AVX512-X86-NEXT: wait
@ -1240,7 +1240,7 @@ define double @uitofp_i32tof64(i32 %x) #0 {
;
; AVX512-X64-LABEL: uitofp_i32tof64:
; AVX512-X64: # %bb.0:
; AVX512-X64-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-X64-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512-X64-NEXT: retq
;
; X87-LABEL: uitofp_i32tof64:
@ -1345,7 +1345,7 @@ define double @uitofp_i64tof64(i64 %x) #0 {
; AVX1-X64-NEXT: orq %rax, %rcx
; AVX1-X64-NEXT: testq %rdi, %rdi
; AVX1-X64-NEXT: cmovnsq %rdi, %rcx
; AVX1-X64-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0
; AVX1-X64-NEXT: vcvtsi2sd %rcx, %xmm15, %xmm0
; AVX1-X64-NEXT: jns .LBB18_2
; AVX1-X64-NEXT: # %bb.1:
; AVX1-X64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
@ -1354,7 +1354,7 @@ define double @uitofp_i64tof64(i64 %x) #0 {
;
; AVX512-X64-LABEL: uitofp_i64tof64:
; AVX512-X64: # %bb.0:
; AVX512-X64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512-X64-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; AVX512-X64-NEXT: retq
;
; X87-LABEL: uitofp_i64tof64:

View File

@ -37,7 +37,7 @@ define half @fceil32(half %f) #0 {
;
; X86-LABEL: fceil32:
; X86: # %bb.0:
; X86-NEXT: vrndscalesh $10, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vrndscalesh $10, {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: fceil32:
@ -73,7 +73,7 @@ define half @ffloor32(half %f) #0 {
;
; X86-LABEL: ffloor32:
; X86: # %bb.0:
; X86-NEXT: vrndscalesh $9, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vrndscalesh $9, {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: ffloor32:
@ -109,7 +109,7 @@ define half @ftrunc32(half %f) #0 {
;
; X86-LABEL: ftrunc32:
; X86: # %bb.0:
; X86-NEXT: vrndscalesh $11, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vrndscalesh $11, {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: ftrunc32:
@ -145,7 +145,7 @@ define half @frint32(half %f) #0 {
;
; X86-LABEL: frint32:
; X86: # %bb.0:
; X86-NEXT: vrndscalesh $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vrndscalesh $4, {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: frint32:
@ -182,7 +182,7 @@ define half @fnearbyint32(half %f) #0 {
;
; X86-LABEL: fnearbyint32:
; X86: # %bb.0:
; X86-NEXT: vrndscalesh $12, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vrndscalesh $12, {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: fnearbyint32:
@ -219,7 +219,7 @@ define half @froundeven16(half %f) #0 {
;
; X86-LABEL: froundeven16:
; X86: # %bb.0:
; X86-NEXT: vrndscalesh $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X86-NEXT: vrndscalesh $8, {{[0-9]+}}(%esp), %xmm7, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: froundeven16:

View File

@ -514,7 +514,7 @@ define double @trunc_signed_f64_no_fast_math(double %x) {
; X64-AVX1-LABEL: trunc_signed_f64_no_fast_math:
; X64-AVX1: # %bb.0:
; X64-AVX1-NEXT: vcvttsd2si %xmm0, %rax
; X64-AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
; X64-AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; X64-AVX1-NEXT: retq
;
; X86-AVX1-LABEL: trunc_signed_f64_no_fast_math:
@ -695,7 +695,7 @@ define float @trunc_unsigned_f32_disable_via_intrinsic(float %x) #0 {
; X64-AVX1-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-AVX1-NEXT: movl $-1, %eax
; X64-AVX1-NEXT: cmovbel %ecx, %eax
; X64-AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; X64-AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; X64-AVX1-NEXT: retq
;
; X86-AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic:
@ -752,7 +752,7 @@ define double @trunc_signed_f64_disable_via_intrinsic(double %x) #0 {
; X64-AVX1-NEXT: xorl %eax, %eax
; X64-AVX1-NEXT: vucomisd %xmm0, %xmm0
; X64-AVX1-NEXT: cmovnpq %rcx, %rax
; X64-AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
; X64-AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; X64-AVX1-NEXT: retq
;
; X86-AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic:

View File

@ -262,7 +262,7 @@ define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
;
; BWON-F16C-LABEL: test_sitofp_i64:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi)
; BWON-F16C-NEXT: retq
@ -385,14 +385,14 @@ define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
; BWON-F16C-NEXT: testq %rdi, %rdi
; BWON-F16C-NEXT: js .LBB10_1
; BWON-F16C-NEXT: # %bb.2:
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; BWON-F16C-NEXT: jmp .LBB10_3
; BWON-F16C-NEXT: .LBB10_1:
; BWON-F16C-NEXT: movq %rdi, %rax
; BWON-F16C-NEXT: shrq %rax
; BWON-F16C-NEXT: andl $1, %edi
; BWON-F16C-NEXT: orq %rax, %rdi
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0
; BWON-F16C-NEXT: .LBB10_3:
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0
@ -843,7 +843,7 @@ define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
; BWON-F16C-LABEL: test_sitofp_fadd_i32:
; BWON-F16C: # %bb.0:
; BWON-F16C-NEXT: vpinsrw $0, (%rsi), %xmm0, %xmm0
; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm1, %xmm1
; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1
; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0

View File

@ -33,7 +33,7 @@ define double @test_ui64_to_double(i64 %x) {
;
; AVX512-LABEL: test_ui64_to_double:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = uitofp i64 %x to double
@ -49,7 +49,7 @@ define double @test_ui32_to_double(i32 %x) {
;
; AVX512-LABEL: test_ui32_to_double:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = uitofp i32 %x to double
@ -64,12 +64,12 @@ define double @test_ui16_to_double(i16 zeroext %x) {
;
; SDAG-AVX512-LABEL: test_ui16_to_double:
; SDAG-AVX512: # %bb.0: # %entry
; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; SDAG-AVX512-NEXT: retq
;
; GISEL-AVX512-LABEL: test_ui16_to_double:
; GISEL-AVX512: # %bb.0: # %entry
; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; GISEL-AVX512-NEXT: retq
entry:
%conv = uitofp i16 %x to double
@ -84,12 +84,12 @@ define double @test_ui8_to_double(i8 zeroext %x) {
;
; SDAG-AVX512-LABEL: test_ui8_to_double:
; SDAG-AVX512: # %bb.0: # %entry
; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; SDAG-AVX512-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; SDAG-AVX512-NEXT: retq
;
; GISEL-AVX512-LABEL: test_ui8_to_double:
; GISEL-AVX512: # %bb.0: # %entry
; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; GISEL-AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; GISEL-AVX512-NEXT: retq
entry:
%conv = uitofp i8 %x to double
@ -135,7 +135,7 @@ define float @test_ui64_to_float(i64 %x) {
;
; AVX512-LABEL: test_ui64_to_float:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = uitofp i64 %x to float
@ -151,7 +151,7 @@ define float @test_ui32_to_float(i32 %x) {
;
; AVX512-LABEL: test_ui32_to_float:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = uitofp i32 %x to float
@ -166,12 +166,12 @@ define float @test_ui16_to_float(i16 zeroext %x) {
;
; SDAG-AVX512-LABEL: test_ui16_to_float:
; SDAG-AVX512: # %bb.0: # %entry
; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; SDAG-AVX512-NEXT: retq
;
; GISEL-AVX512-LABEL: test_ui16_to_float:
; GISEL-AVX512: # %bb.0: # %entry
; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; GISEL-AVX512-NEXT: retq
entry:
%conv = uitofp i16 %x to float
@ -186,12 +186,12 @@ define float @test_ui8_to_float(i8 zeroext %x) {
;
; SDAG-AVX512-LABEL: test_ui8_to_float:
; SDAG-AVX512: # %bb.0: # %entry
; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; SDAG-AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; SDAG-AVX512-NEXT: retq
;
; GISEL-AVX512-LABEL: test_ui8_to_float:
; GISEL-AVX512: # %bb.0: # %entry
; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; GISEL-AVX512-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; GISEL-AVX512-NEXT: retq
entry:
%conv = uitofp i8 %x to float
@ -206,7 +206,7 @@ define double @test_si64_to_double(i64 %x) {
;
; AVX512-LABEL: test_si64_to_double:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i64 %x to double
@ -221,7 +221,7 @@ define double @test_si32_to_double(i32 %x) {
;
; AVX512-LABEL: test_si32_to_double:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i32 %x to double
@ -236,7 +236,7 @@ define double @test_si16_to_double(i16 signext %x) {
;
; AVX512-LABEL: test_si16_to_double:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i16 %x to double
@ -251,7 +251,7 @@ define double @test_si8_to_double(i8 signext %x) {
;
; AVX512-LABEL: test_si8_to_double:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i8 %x to double
@ -270,7 +270,7 @@ define double @test_si31_to_double(i31 %x) {
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: addl %edi, %edi
; AVX512-NEXT: sarl %edi
; AVX512-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i31 %x to double
@ -289,7 +289,7 @@ define double @test_si33_to_double(i33 %x) {
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: shlq $31, %rdi
; AVX512-NEXT: sarq $31, %rdi
; AVX512-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i33 %x to double
@ -304,7 +304,7 @@ define float @test_si64_to_float(i64 %x) {
;
; AVX512-LABEL: test_si64_to_float:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i64 %x to float
@ -319,7 +319,7 @@ define float @test_si32_to_float(i32 %x) {
;
; AVX512-LABEL: test_si32_to_float:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i32 %x to float
@ -334,7 +334,7 @@ define float @test_si16_to_float(i16 signext %x) {
;
; AVX512-LABEL: test_si16_to_float:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i16 %x to float
@ -349,7 +349,7 @@ define float @test_si8_to_float(i8 signext %x) {
;
; AVX512-LABEL: test_si8_to_float:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i8 %x to float
@ -368,7 +368,7 @@ define float @test_si31_to_float(i31 %x) {
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: addl %edi, %edi
; AVX512-NEXT: sarl %edi
; AVX512-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i31 %x to float
@ -387,7 +387,7 @@ define float @test_si33_to_float(i33 %x) {
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: shlq $31, %rdi
; AVX512-NEXT: sarq $31, %rdi
; AVX512-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%conv = sitofp i33 %x to float

View File

@ -124,7 +124,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
; AVX-NEXT: fldt 16(%rbp)
; AVX-NEXT: fld %st(0)
; AVX-NEXT: fisttpl -4(%rbp)
; AVX-NEXT: vcvtsi2sdl -4(%rbp), %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sdl -4(%rbp), %xmm15, %xmm0
; AVX-NEXT: vmovsd %xmm0, -48(%rbp)
; AVX-NEXT: vmovsd %xmm0, -24(%rbp)
; AVX-NEXT: fsubl -24(%rbp)
@ -132,7 +132,7 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 {
; AVX-NEXT: fmul %st, %st(1)
; AVX-NEXT: fld %st(1)
; AVX-NEXT: fisttpl -8(%rbp)
; AVX-NEXT: vcvtsi2sdl -8(%rbp), %xmm1, %xmm0
; AVX-NEXT: vcvtsi2sdl -8(%rbp), %xmm15, %xmm0
; AVX-NEXT: vmovsd %xmm0, -40(%rbp)
; AVX-NEXT: vmovsd %xmm0, -16(%rbp)
; AVX-NEXT: fxch %st(1)

View File

@ -5,7 +5,7 @@ define double @foo(ptr nocapture readonly) #0 {
; CHECK-LABEL: foo:
; CHECK: ## %bb.0:
; CHECK-NEXT: movq (%rax), %rax
; CHECK-NEXT: vcvtsi2sd %rax, %xmm0, %xmm1
; CHECK-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; CHECK-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1}

View File

@ -17,7 +17,7 @@ define dso_local float @_Z3fn2v() {
; CHECK-NEXT: cmpl $0, c(%rip)
; CHECK-NEXT: je .LBB0_2
; CHECK-NEXT: # %bb.1: # %if.then
; CHECK-NEXT: vcvtsi2ssl b(%rip), %xmm1, %xmm1
; CHECK-NEXT: vcvtsi2ssl b(%rip), %xmm15, %xmm1
; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
; CHECK-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}

View File

@ -221,12 +221,12 @@ define float @test11(ptr %xptr) nounwind optsize {
;
; CHECK-AVX-LABEL: test11:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX-NEXT: vroundss $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX-NEXT: retq
;
; CHECK-AVX512-LABEL: test11:
; CHECK-AVX512: ## %bb.0:
; CHECK-AVX512-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX512-NEXT: vroundss $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX512-NEXT: retq
%x = load float, ptr %xptr
%call = tail call float @truncf(float %x) nounwind readnone
@ -241,12 +241,12 @@ define double @test12(ptr %xptr) nounwind optsize {
;
; CHECK-AVX-LABEL: test12:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX-NEXT: vroundsd $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX-NEXT: retq
;
; CHECK-AVX512-LABEL: test12:
; CHECK-AVX512: ## %bb.0:
; CHECK-AVX512-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX512-NEXT: vroundsd $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX512-NEXT: retq
%x = load double, ptr %xptr
%call = tail call double @trunc(double %x) nounwind readnone
@ -261,12 +261,12 @@ define float @test11_pgso(ptr %xptr) nounwind !prof !14 {
;
; CHECK-AVX-LABEL: test11_pgso:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX-NEXT: vroundss $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX-NEXT: retq
;
; CHECK-AVX512-LABEL: test11_pgso:
; CHECK-AVX512: ## %bb.0:
; CHECK-AVX512-NEXT: vroundss $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX512-NEXT: vroundss $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX512-NEXT: retq
%x = load float, ptr %xptr
%call = tail call float @truncf(float %x) nounwind readnone
@ -281,12 +281,12 @@ define double @test12_pgso(ptr %xptr) nounwind !prof !14 {
;
; CHECK-AVX-LABEL: test12_pgso:
; CHECK-AVX: ## %bb.0:
; CHECK-AVX-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX-NEXT: vroundsd $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX-NEXT: retq
;
; CHECK-AVX512-LABEL: test12_pgso:
; CHECK-AVX512: ## %bb.0:
; CHECK-AVX512-NEXT: vroundsd $11, (%rdi), %xmm0, %xmm0
; CHECK-AVX512-NEXT: vroundsd $11, (%rdi), %xmm15, %xmm0
; CHECK-AVX512-NEXT: retq
%x = load double, ptr %xptr
%call = tail call double @trunc(double %x) nounwind readnone

View File

@ -18,7 +18,7 @@ define float @u32_to_f(i32 %a) nounwind {
; AVX512_32-LABEL: u32_to_f:
; AVX512_32: # %bb.0:
; AVX512_32-NEXT: pushl %eax
; AVX512_32-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512_32-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm7, %xmm0
; AVX512_32-NEXT: vmovss %xmm0, (%esp)
; AVX512_32-NEXT: flds (%esp)
; AVX512_32-NEXT: popl %eax
@ -26,7 +26,7 @@ define float @u32_to_f(i32 %a) nounwind {
;
; AVX512_64-LABEL: u32_to_f:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; SSE2_32-LABEL: u32_to_f:
@ -84,7 +84,7 @@ define float @s32_to_f(i32 %a) nounwind {
; AVX512_32-LABEL: s32_to_f:
; AVX512_32: # %bb.0:
; AVX512_32-NEXT: pushl %eax
; AVX512_32-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX512_32-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm7, %xmm0
; AVX512_32-NEXT: vmovss %xmm0, (%esp)
; AVX512_32-NEXT: flds (%esp)
; AVX512_32-NEXT: popl %eax
@ -92,7 +92,7 @@ define float @s32_to_f(i32 %a) nounwind {
;
; AVX512_64-LABEL: s32_to_f:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; SSE_32-LABEL: s32_to_f:
@ -128,7 +128,7 @@ define double @u32_to_d(i32 %a) nounwind {
; AVX512_32-NEXT: movl %esp, %ebp
; AVX512_32-NEXT: andl $-8, %esp
; AVX512_32-NEXT: subl $8, %esp
; AVX512_32-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0
; AVX512_32-NEXT: vcvtusi2sdl 8(%ebp), %xmm7, %xmm0
; AVX512_32-NEXT: vmovsd %xmm0, (%esp)
; AVX512_32-NEXT: fldl (%esp)
; AVX512_32-NEXT: movl %ebp, %esp
@ -137,7 +137,7 @@ define double @u32_to_d(i32 %a) nounwind {
;
; AVX512_64-LABEL: u32_to_d:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; SSE2_32-LABEL: u32_to_d:
@ -199,7 +199,7 @@ define double @s32_to_d(i32 %a) nounwind {
; AVX512_32-NEXT: movl %esp, %ebp
; AVX512_32-NEXT: andl $-8, %esp
; AVX512_32-NEXT: subl $8, %esp
; AVX512_32-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0
; AVX512_32-NEXT: vcvtsi2sdl 8(%ebp), %xmm7, %xmm0
; AVX512_32-NEXT: vmovsd %xmm0, (%esp)
; AVX512_32-NEXT: fldl (%esp)
; AVX512_32-NEXT: movl %ebp, %esp
@ -208,7 +208,7 @@ define double @s32_to_d(i32 %a) nounwind {
;
; AVX512_64-LABEL: s32_to_d:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; SSE2_32-LABEL: s32_to_d:
@ -308,7 +308,7 @@ define float @u64_to_f(i64 %a) nounwind {
;
; AVX512_64-LABEL: u64_to_f:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: u64_to_f:
@ -437,7 +437,7 @@ define float @s64_to_f(i64 %a) nounwind {
;
; AVX512_64-LABEL: s64_to_f:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: s64_to_f:
@ -502,7 +502,7 @@ define float @s64_to_f_2(i64 %a) nounwind {
; AVX512_64-LABEL: s64_to_f_2:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: addq $5, %rdi
; AVX512_64-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: s64_to_f_2:
@ -626,7 +626,7 @@ define double @u64_to_d(i64 %a) nounwind {
;
; AVX512_64-LABEL: u64_to_d:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: u64_to_d:
@ -748,7 +748,7 @@ define double @u64_to_d_optsize(i64 %a) nounwind optsize {
;
; AVX512_64-LABEL: u64_to_d_optsize:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: u64_to_d_optsize:
@ -869,7 +869,7 @@ define double @s64_to_d(i64 %a) nounwind {
;
; AVX512_64-LABEL: s64_to_d:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: s64_to_d:
@ -955,7 +955,7 @@ define double @s64_to_d_2(i64 %a) nounwind {
; AVX512_64-LABEL: s64_to_d_2:
; AVX512_64: # %bb.0:
; AVX512_64-NEXT: addq $5, %rdi
; AVX512_64-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX512_64-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX512_64-NEXT: retq
;
; AVX512DQ_32-LABEL: s64_to_d_2:

View File

@ -17,13 +17,13 @@ define float @uint8ToFloat(i8 %int8) {
; CHECK-NO_FP16-LABEL: uint8ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint8ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i8 %int8 to float
ret float %fp32
@ -62,14 +62,14 @@ define half @uint8ToHalf(i8 %int8) {
; CHECK-NO_FP16-LABEL: uint8ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint8ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i8 %int8 to half
ret half %fp32
@ -111,13 +111,13 @@ define float @sint8ToFloat(i8 %int8) {
; CHECK-NO_FP16-LABEL: sint8ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movsbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint8ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i8 %int8 to float
ret float %fp32
@ -143,14 +143,14 @@ define half @sint8ToHalf(i8 %int8) {
; CHECK-NO_FP16-LABEL: sint8ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movsbl %dil, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint8ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movsbl %dil, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i8 %int8 to half
ret half %fp32
@ -184,13 +184,13 @@ define float @uint16ToFloat(i16 %int16) {
; CHECK-NO_FP16-LABEL: uint16ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzwl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint16ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzwl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i16 %int16 to float
ret float %fp32
@ -216,14 +216,14 @@ define half @uint16ToHalf(i16 %int16) {
; CHECK-NO_FP16-LABEL: uint16ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movzwl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: uint16ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movzwl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = uitofp i16 %int16 to half
ret half %fp32
@ -249,13 +249,13 @@ define float @sint16ToFloat(i16 %int16) {
; CHECK-NO_FP16-LABEL: sint16ToFloat:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movswl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint16ToFloat:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movswl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i16 %int16 to float
ret float %fp32
@ -281,14 +281,14 @@ define half @sint16ToHalf(i16 %int16) {
; CHECK-NO_FP16-LABEL: sint16ToHalf:
; CHECK-NO_FP16: # %bb.0:
; CHECK-NO_FP16-NEXT: movswl %di, %eax
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; CHECK-NO_FP16-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-NO_FP16-NEXT: retq
;
; CHECK-WITH_FP16-LABEL: sint16ToHalf:
; CHECK-WITH_FP16: # %bb.0:
; CHECK-WITH_FP16-NEXT: movswl %di, %eax
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm0, %xmm0
; CHECK-WITH_FP16-NEXT: vcvtsi2sh %eax, %xmm31, %xmm0
; CHECK-WITH_FP16-NEXT: retq
%fp32 = sitofp i16 %int16 to half
ret half %fp32

View File

@ -1,21 +1,35 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s
; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X86
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
define <4 x i64> @autogen_SD88863() {
; CHECK-LABEL: autogen_SD88863:
; CHECK: # %bb.0: # %BB
; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm0[0,1]
; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
; CHECK-NEXT: movb $1, %al
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %CF
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
; CHECK-NEXT: # %bb.2: # %CF240
; CHECK-NEXT: ret{{[l|q]}}
; X86-LABEL: autogen_SD88863:
; X86: # %bb.0: # %BB
; X86-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm7[0,1]
; X86-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X86-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
; X86-NEXT: movb $1, %al
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB0_1: # %CF
; X86-NEXT: # =>This Inner Loop Header: Depth=1
; X86-NEXT: testb %al, %al
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # %bb.2: # %CF240
; X86-NEXT: retl
;
; X64-LABEL: autogen_SD88863:
; X64: # %bb.0: # %BB
; X64-NEXT: vperm2f128 {{.*#+}} ymm0 = zero,zero,ymm15[0,1]
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; X64-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[3],ymm1[3]
; X64-NEXT: movb $1, %al
; X64-NEXT: .p2align 4
; X64-NEXT: .LBB0_1: # %CF
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: testb %al, %al
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # %bb.2: # %CF240
; X64-NEXT: retq
BB:
%I26 = insertelement <4 x i64> undef, i64 undef, i32 2
br label %CF

View File

@ -23,7 +23,7 @@ define float @float_to_int_to_float_mem_f32_i32(ptr %p) #0 {
; AVX-LABEL: float_to_int_to_float_mem_f32_i32:
; AVX: # %bb.0:
; AVX-NEXT: vcvttss2si (%rdi), %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: retq
%x = load <4 x float>, ptr %p, align 16
%fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
@ -42,7 +42,7 @@ define float @float_to_int_to_float_reg_f32_i32(<4 x float> %x) #0 {
; AVX-LABEL: float_to_int_to_float_reg_f32_i32:
; AVX: # %bb.0:
; AVX-NEXT: vcvttss2si %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: retq
%fptosi = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %x)
%sitofp = sitofp i32 %fptosi to float
@ -59,7 +59,7 @@ define float @float_to_int_to_float_mem_f32_i64(ptr %p) #0 {
; AVX-LABEL: float_to_int_to_float_mem_f32_i64:
; AVX: # %bb.0:
; AVX-NEXT: vcvttss2si (%rdi), %rax
; AVX-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX-NEXT: retq
%x = load <4 x float>, ptr %p, align 16
%fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
@ -78,7 +78,7 @@ define float @float_to_int_to_float_reg_f32_i64(<4 x float> %x) #0 {
; AVX-LABEL: float_to_int_to_float_reg_f32_i64:
; AVX: # %bb.0:
; AVX-NEXT: vcvttss2si %xmm0, %rax
; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0
; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX-NEXT: retq
%fptosi = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %x)
%sitofp = sitofp i64 %fptosi to float
@ -95,7 +95,7 @@ define double @float_to_int_to_float_mem_f64_i32(ptr %p) #0 {
; AVX-LABEL: float_to_int_to_float_mem_f64_i32:
; AVX: # %bb.0:
; AVX-NEXT: vcvttsd2si (%rdi), %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: retq
%x = load <2 x double>, ptr %p, align 16
%fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
@ -114,7 +114,7 @@ define double @float_to_int_to_float_reg_f64_i32(<2 x double> %x) #0 {
; AVX-LABEL: float_to_int_to_float_reg_f64_i32:
; AVX: # %bb.0:
; AVX-NEXT: vcvttsd2si %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: retq
%fptosi = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %x)
%sitofp = sitofp i32 %fptosi to double
@ -131,7 +131,7 @@ define double @float_to_int_to_float_mem_f64_i64(ptr %p) #0 {
; AVX-LABEL: float_to_int_to_float_mem_f64_i64:
; AVX: # %bb.0:
; AVX-NEXT: vcvttsd2si (%rdi), %rax
; AVX-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX-NEXT: retq
%x = load <2 x double>, ptr %p, align 16
%fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
@ -150,7 +150,7 @@ define double @float_to_int_to_float_reg_f64_i64(<2 x double> %x) #0 {
; AVX-LABEL: float_to_int_to_float_reg_f64_i64:
; AVX: # %bb.0:
; AVX-NEXT: vcvttsd2si %xmm0, %rax
; AVX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
; AVX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX-NEXT: retq
%fptosi = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %x)
%sitofp = sitofp i64 %fptosi to double

View File

@ -782,7 +782,7 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr %
; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-AVX1: ## %bb.0:
; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
; X86-AVX1-NEXT: vcvtss2sd (%eax), %xmm7, %xmm1 ## encoding: [0xc5,0xc2,0x5a,0x08]
; X86-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
; X86-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
; X86-AVX1-NEXT: retl ## encoding: [0xc3]
@ -790,7 +790,7 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr %
; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X86-AVX512: ## %bb.0:
; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
; X86-AVX512-NEXT: vcvtss2sd (%eax), %xmm7, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xc2,0x5a,0x08]
; X86-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
; X86-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
; X86-AVX512-NEXT: retl ## encoding: [0xc3]
@ -804,14 +804,14 @@ define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, ptr %
;
; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-AVX1: ## %bb.0:
; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
; X64-AVX1-NEXT: vcvtss2sd (%rdi), %xmm15, %xmm1 ## encoding: [0xc5,0x82,0x5a,0x0f]
; X64-AVX1-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
; X64-AVX1-NEXT: ## xmm0 = xmm1[0],xmm0[1]
; X64-AVX1-NEXT: retq ## encoding: [0xc3]
;
; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
; X64-AVX512: ## %bb.0:
; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
; X64-AVX512-NEXT: vcvtss2sd (%rdi), %xmm15, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0x82,0x5a,0x0f]
; X64-AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
; X64-AVX512-NEXT: ## xmm0 = xmm1[0],xmm0[1]
; X64-AVX512-NEXT: retq ## encoding: [0xc3]

View File

@ -990,7 +990,7 @@ define double @stack_fold_cvtsi2sd(i32 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vcvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
@ -1034,7 +1034,7 @@ define <2 x double> @stack_fold_cvtsi2sd_int(i32 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vcvtsi2sdl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
@ -1080,7 +1080,7 @@ define double @stack_fold_cvtsi642sd(i64 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vcvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
@ -1124,7 +1124,7 @@ define <2 x double> @stack_fold_cvtsi642sd_int(i64 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vcvtsi2sdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
@ -1170,7 +1170,7 @@ define float @stack_fold_cvtsi2ss(i32 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vcvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
@ -1214,7 +1214,7 @@ define <4 x float> @stack_fold_cvtsi2ss_int(i32 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vcvtsi2ssl {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: popq %rbx
@ -1261,7 +1261,7 @@ define float @stack_fold_cvtsi642ss(i64 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vcvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
@ -1305,7 +1305,7 @@ define <4 x float> @stack_fold_cvtsi642ss_int(i64 %a0) {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vcvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vcvtsi2ssq {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
; CHECK-NEXT: popq %rbx
@ -2861,8 +2861,8 @@ define double @stack_fold_roundsd(double %a0) optsize {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call double @llvm.floor.f64(double %a0)
@ -2876,7 +2876,7 @@ define double @stack_fold_roundsd_minsize(double %a0) minsize {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vroundsd $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call double @llvm.floor.f64(double %a0)
@ -2908,8 +2908,8 @@ define float @stack_fold_roundss(float %a0) optsize {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vroundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
; CHECK-NEXT: vroundss $9, {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call float @llvm.floor.f32(float %a0)
@ -3106,8 +3106,8 @@ define double @stack_fold_sqrtsd(double %a0) optsize {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vsqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
; CHECK-NEXT: vsqrtsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call double @llvm.sqrt.f64(double %a0)
@ -3124,8 +3124,8 @@ define float @stack_fold_sqrtss(float %a0) optsize {
; CHECK-NEXT: #APP
; CHECK-NEXT: nop
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: vsqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: vxorps %xmm15, %xmm15, %xmm15
; CHECK-NEXT: vsqrtss {{[-0-9]+}}(%r{{[sb]}}p), %xmm15, %xmm0 # 4-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call float @llvm.sqrt.f32(float %a0)

View File

@ -227,9 +227,9 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX-64-LABEL: sitofp_v2i64_v2f32:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX-64-NEXT: vmovq %xmm0, %rax
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX-64-NEXT: retq
;
@ -246,9 +246,9 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512DQ-64-LABEL: sitofp_v2i64_v2f32:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512DQ-64-NEXT: vmovq %xmm0, %rax
; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512DQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512DQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512DQ-64-NEXT: retq
;
@ -439,9 +439,9 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX1-64-NEXT: vpor %xmm1, %xmm2, %xmm1
; AVX1-64-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-64-NEXT: vmovq %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm2
; AVX1-64-NEXT: vpxor %xmm3, %xmm3, %xmm3
@ -453,18 +453,18 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512F-64-LABEL: uitofp_v2i64_v2f32:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512F-64-NEXT: retq
;
; AVX512VL-64-LABEL: uitofp_v2i64_v2f32:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512VL-64-NEXT: retq
;
@ -481,9 +481,9 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 {
; AVX512DQ-64-LABEL: uitofp_v2i64_v2f32:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512DQ-64-NEXT: vmovq %xmm0, %rax
; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512DQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512DQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512DQ-64-NEXT: retq
;
@ -1237,9 +1237,9 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX-64-LABEL: sitofp_v2i64_v2f64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX-64-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX-64-NEXT: vmovq %xmm0, %rax
; AVX-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; AVX-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-64-NEXT: retq
;
@ -1439,7 +1439,7 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX1-64-NEXT: orq %rcx, %rdx
; AVX1-64-NEXT: testq %rax, %rax
; AVX1-64-NEXT: cmovnsq %rax, %rdx
; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1
; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm1
; AVX1-64-NEXT: jns .LBB21_2
; AVX1-64-NEXT: # %bb.1:
; AVX1-64-NEXT: vaddsd %xmm1, %xmm1, %xmm1
@ -1452,7 +1452,7 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX1-64-NEXT: orq %rcx, %rdx
; AVX1-64-NEXT: testq %rax, %rax
; AVX1-64-NEXT: cmovnsq %rax, %rdx
; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0
; AVX1-64-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm0
; AVX1-64-NEXT: jns .LBB21_4
; AVX1-64-NEXT: # %bb.3:
; AVX1-64-NEXT: vaddsd %xmm0, %xmm0, %xmm0
@ -1463,18 +1463,18 @@ define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 {
; AVX512F-64-LABEL: uitofp_v2i64_v2f64:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-64-NEXT: retq
;
; AVX512VL-64-LABEL: uitofp_v2i64_v2f64:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-64-NEXT: retq
;

View File

@ -670,14 +670,14 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX1-64: # %bb.0:
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-64-NEXT: vmovq %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-64-NEXT: vmovq %xmm0, %rax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-64-NEXT: retq
@ -686,14 +686,14 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX2-64-NEXT: vmovq %xmm1, %rax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX2-64-NEXT: vmovq %xmm0, %rax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-64-NEXT: retq
@ -702,14 +702,14 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vmovq %xmm1, %rax
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX512F-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-64-NEXT: retq
@ -718,14 +718,14 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vmovq %xmm1, %rax
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX512VL-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
@ -802,26 +802,26 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX1-64: # %bb.0:
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-64-NEXT: vpextrd $2, %xmm1, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-64-NEXT: vmovd %xmm1, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX1-64-NEXT: vextractps $2, %xmm0, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-64-NEXT: vmovq %xmm0, %rax
; AVX1-64-NEXT: movl %eax, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-64-NEXT: vpextrd $3, %xmm1, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-64-NEXT: vpextrd $1, %xmm1, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; AVX1-64-NEXT: vpextrd $3, %xmm0, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-64-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
; AVX1-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-64-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
@ -832,28 +832,28 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX2-64-NEXT: vextractps $3, %xmm1, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX2-64-NEXT: vextractps $1, %xmm1, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX2-64-NEXT: vextractps $3, %xmm0, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX2-64-NEXT: vextractps $1, %xmm0, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
; AVX2-64-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX2-64-NEXT: vbroadcastsd {{.*#+}} ymm3 = [4.294967296E+9,4.294967296E+9,4.294967296E+9,4.294967296E+9]
; AVX2-64-NEXT: vmulpd %ymm3, %ymm2, %ymm2
; AVX2-64-NEXT: vextractps $2, %xmm1, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX2-64-NEXT: vmovd %xmm1, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; AVX2-64-NEXT: vextractps $2, %xmm0, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX2-64-NEXT: vmovq %xmm0, %rax
; AVX2-64-NEXT: movl %eax, %eax
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
; AVX2-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX2-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX2-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-64-NEXT: vaddpd %ymm0, %ymm2, %ymm0
@ -863,14 +863,14 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vmovq %xmm1, %rax
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
; AVX512F-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512F-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-64-NEXT: retq
@ -879,14 +879,14 @@ define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 {
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vmovq %xmm1, %rax
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
; AVX512VL-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512VL-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-64-NEXT: retq
@ -947,16 +947,16 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX1-64-LABEL: sitofp_v4i64_v4f32:
; AVX1-64: # %bb.0:
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-64-NEXT: vmovq %xmm0, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-64-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-64-NEXT: vmovq %xmm0, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX1-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-64-NEXT: vzeroupper
; AVX1-64-NEXT: retq
@ -964,16 +964,16 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX2-64-LABEL: sitofp_v4i64_v4f32:
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX2-64-NEXT: vmovq %xmm0, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-64-NEXT: vmovq %xmm0, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX2-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX2-64-NEXT: vzeroupper
; AVX2-64-NEXT: retq
@ -981,16 +981,16 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX512F-64-LABEL: sitofp_v4i64_v4f32:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX512F-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512F-64-NEXT: vzeroupper
; AVX512F-64-NEXT: retq
@ -998,16 +998,16 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX512VL-64-LABEL: sitofp_v4i64_v4f32:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX512VL-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512VL-64-NEXT: vzeroupper
; AVX512VL-64-NEXT: retq
@ -1092,16 +1092,16 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX1-64-NEXT: vorpd %ymm3, %ymm1, %ymm1
; AVX1-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX1-64-NEXT: vmovq %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-64-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-64-NEXT: vmovq %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; AVX1-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-64-NEXT: vaddps %xmm1, %xmm1, %xmm3
; AVX1-64-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
@ -1117,16 +1117,16 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX2-64-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX2-64-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-64-NEXT: vmovq %xmm1, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX2-64-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-64-NEXT: vmovq %xmm1, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; AVX2-64-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
; AVX2-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX2-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
; AVX2-64-NEXT: vaddps %xmm1, %xmm1, %xmm2
; AVX2-64-NEXT: vextracti128 $1, %ymm0, %xmm3
@ -1138,16 +1138,16 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX512F-64-LABEL: uitofp_v4i64_v4f32:
; AVX512F-64: # %bb.0:
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-64-NEXT: vmovq %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; AVX512F-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512F-64-NEXT: vzeroupper
; AVX512F-64-NEXT: retq
@ -1155,16 +1155,16 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 {
; AVX512VL-64-LABEL: uitofp_v4i64_v4f32:
; AVX512VL-64: # %bb.0:
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512VL-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-64-NEXT: vmovq %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512VL-64-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; AVX512VL-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512VL-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512VL-64-NEXT: vzeroupper
; AVX512VL-64-NEXT: retq

View File

@ -323,27 +323,27 @@ define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 {
; NODQ-64: # %bb.0:
; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vmovq %xmm1, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vmovq %xmm2, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vmovq %xmm2, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vmovq %xmm0, %rax
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0
; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@ -452,27 +452,27 @@ define <8 x double> @uitofp_v8i64_v8f64(<8 x i64> %x) #0 {
; NODQ-64: # %bb.0:
; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vmovq %xmm1, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm2
; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm3
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vmovq %xmm2, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm2
; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vmovq %xmm2, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vmovq %xmm0, %rax
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm0
; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; NODQ-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; NODQ-64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
@ -548,28 +548,28 @@ define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-64: # %bb.0:
; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vmovq %xmm1, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NODQ-64-NEXT: vmovq %xmm2, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vmovq %xmm0, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-64-NEXT: vmovq %xmm0, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-64-NEXT: retq
@ -675,28 +675,28 @@ define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 {
; NODQ-64: # %bb.0:
; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm1
; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vmovq %xmm1, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NODQ-64-NEXT: vmovq %xmm2, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; NODQ-64-NEXT: vmovq %xmm0, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-64-NEXT: vmovq %xmm0, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm3
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm0
; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; NODQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NODQ-64-NEXT: retq

View File

@ -126,27 +126,27 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
; VEX-LABEL: sitofp_2i64_to_2f64:
; VEX: # %bb.0:
; VEX-NEXT: vpextrq $1, %xmm0, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; VEX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; VEX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_2f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_2f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512VL-NEXT: retq
;
@ -352,14 +352,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@ -368,14 +368,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
; AVX2: # %bb.0:
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
@ -384,14 +384,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
; AVX512F: # %bb.0:
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
@ -400,14 +400,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) {
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512VL-NEXT: vmovq %xmm1, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
@ -1247,27 +1247,27 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-LABEL: sitofp_2i64_to_4f32:
; VEX: # %bb.0:
; VEX-NEXT: vpextrq $1, %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512VL-NEXT: retq
;
@ -1316,27 +1316,27 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) {
; VEX-LABEL: sitofp_2i64_to_4f32_zero:
; VEX: # %bb.0:
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; VEX-NEXT: vpextrq $1, %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_2i64_to_4f32_zero:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512VL-NEXT: retq
;
@ -1383,27 +1383,27 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; VEX-LABEL: sitofp_4i64_to_4f32_undef:
; VEX: # %bb.0:
; VEX-NEXT: vpextrq $1, %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; VEX-NEXT: vmovq %xmm0, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_4i64_to_4f32_undef:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512VL-NEXT: retq
;
@ -1581,16 +1581,16 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX1-LABEL: sitofp_4i64_to_4f32:
; AVX1: # %bb.0:
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -1598,16 +1598,16 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX2-LABEL: sitofp_4i64_to_4f32:
; AVX2: # %bb.0:
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX2-NEXT: vpextrq $1, %xmm0, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
@ -1615,16 +1615,16 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX512F-LABEL: sitofp_4i64_to_4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@ -1632,16 +1632,16 @@ define <4 x float> @sitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX512VL-LABEL: sitofp_4i64_to_4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -1892,9 +1892,9 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1
; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
; VEX-NEXT: vpextrq $1, %xmm1, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; VEX-NEXT: vmovq %xmm1, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2
; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3
@ -1906,18 +1906,18 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) {
; AVX512F-LABEL: uitofp_2i64_to_4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512VL-NEXT: retq
;
@ -2007,9 +2007,9 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
; VEX-NEXT: vpor %xmm1, %xmm2, %xmm1
; VEX-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
; VEX-NEXT: vpextrq $1, %xmm1, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; VEX-NEXT: vmovq %xmm1, %rax
; VEX-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
; VEX-NEXT: vaddps %xmm1, %xmm1, %xmm2
; VEX-NEXT: vpxor %xmm3, %xmm3, %xmm3
@ -2022,18 +2022,18 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) {
; AVX512F-LABEL: uitofp_2i64_to_2f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_2i64_to_2f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],zero,zero
; AVX512VL-NEXT: retq
;
@ -2125,9 +2125,9 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX1-NEXT: vmovaps %xmm0, %xmm2
; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3]
; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
@ -2148,16 +2148,16 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
; AVX2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
@ -2168,18 +2168,18 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) {
; AVX512F-LABEL: uitofp_4i64_to_4f32_undef:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
; AVX512VL-NEXT: retq
;
@ -2494,16 +2494,16 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
@ -2519,16 +2519,16 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3
@ -2540,16 +2540,16 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX512F-LABEL: uitofp_4i64_to_4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@ -2557,16 +2557,16 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) {
; AVX512VL-LABEL: uitofp_4i64_to_4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovq %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
@ -2836,22 +2836,22 @@ define <2 x double> @sitofp_load_2i64_to_2f64(ptr%a) {
;
; VEX-LABEL: sitofp_load_2i64_to_2f64:
; VEX: # %bb.0:
; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm15, %xmm0
; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm1
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_2i64_to_2f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm15, %xmm0
; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_2i64_to_2f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm15, %xmm0
; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: retq
;
@ -3011,33 +3011,33 @@ define <4 x double> @sitofp_load_4i64_to_4f64(ptr%a) {
;
; VEX-LABEL: sitofp_load_4i64_to_4f64:
; VEX: # %bb.0:
; VEX-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
; VEX-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
; VEX-NEXT: vcvtsi2sdq 24(%rdi), %xmm15, %xmm0
; VEX-NEXT: vcvtsi2sdq 16(%rdi), %xmm15, %xmm1
; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
; VEX-NEXT: vcvtsi2sdq 8(%rdi), %xmm15, %xmm1
; VEX-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm2
; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_4i64_to_4f64:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2sdq 24(%rdi), %xmm15, %xmm0
; AVX512F-NEXT: vcvtsi2sdq 16(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vcvtsi2sdq 8(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_4i64_to_4f64:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtsi2sdq 24(%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vcvtsi2sdq 16(%rdi), %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2sdq 24(%rdi), %xmm15, %xmm0
; AVX512VL-NEXT: vcvtsi2sdq 16(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm2, %xmm2
; AVX512VL-NEXT: vcvtsi2sdq 8(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vcvtsi2sdq (%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
@ -3776,34 +3776,34 @@ define <4 x float> @sitofp_load_4i64_to_4f32(ptr%a) {
;
; VEX-LABEL: sitofp_load_4i64_to_4f32:
; VEX: # %bb.0:
; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm0
; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_4i64_to_4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm0
; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_4i64_to_4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm0
; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512VL-NEXT: retq
;
@ -3938,57 +3938,57 @@ define <8 x float> @sitofp_load_8i64_to_8f32(ptr%a) {
;
; VEX-LABEL: sitofp_load_8i64_to_8f32:
; VEX: # %bb.0:
; VEX-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
; VEX-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
; VEX-NEXT: vcvtsi2ssq 40(%rdi), %xmm15, %xmm0
; VEX-NEXT: vcvtsi2ssq 32(%rdi), %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; VEX-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
; VEX-NEXT: vcvtsi2ssq 48(%rdi), %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; VEX-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
; VEX-NEXT: vcvtsi2ssq 56(%rdi), %xmm15, %xmm1
; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
; VEX-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm1
; VEX-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm2
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
; VEX-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm2
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
; VEX-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm2
; VEX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; VEX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: sitofp_load_8i64_to_8f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ssq 40(%rdi), %xmm15, %xmm0
; AVX512F-NEXT: vcvtsi2ssq 32(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512F-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtsi2ssq 48(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512F-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtsi2ssq 56(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
; AVX512F-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
; AVX512F-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: sitofp_load_8i64_to_8f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtsi2ssq 40(%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vcvtsi2ssq 32(%rdi), %xmm1, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq 40(%rdi), %xmm15, %xmm0
; AVX512VL-NEXT: vcvtsi2ssq 32(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vcvtsi2ssq 48(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq 48(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512VL-NEXT: vcvtsi2ssq 56(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq 56(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm2, %xmm2
; AVX512VL-NEXT: vcvtsi2ssq 8(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vcvtsi2ssq (%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm3, %xmm2
; AVX512VL-NEXT: vcvtsi2ssq 16(%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm3, %xmm2
; AVX512VL-NEXT: vcvtsi2ssq 24(%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
@ -4235,16 +4235,16 @@ define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) {
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
@ -4261,16 +4261,16 @@ define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) {
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
; AVX2-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX2-NEXT: vmovq %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm4, %xmm1
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1,2],xmm1[0]
; AVX2-NEXT: vaddps %xmm1, %xmm1, %xmm2
; AVX2-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0
@ -4280,23 +4280,23 @@ define <4 x float> @uitofp_load_4i64_to_4f32(ptr%a) {
;
; AVX512F-LABEL: uitofp_load_4i64_to_4f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm15, %xmm0
; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_load_4i64_to_4f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm1, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm15, %xmm0
; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512VL-NEXT: retq
;
@ -4664,16 +4664,16 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; AVX1-NEXT: vorps %ymm3, %ymm4, %ymm3
; AVX1-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm3
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vmovq %xmm3, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm6
; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm6[0],xmm4[0],xmm6[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3
; AVX1-NEXT: vmovq %xmm3, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm6
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm6
; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0]
; AVX1-NEXT: vaddps %xmm3, %xmm3, %xmm4
; AVX1-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
@ -4686,16 +4686,16 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; AVX1-NEXT: vorps %ymm2, %ymm3, %ymm2
; AVX1-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm2
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm3
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX1-NEXT: vmovq %xmm2, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm5
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
; AVX1-NEXT: vmovq %xmm2, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm5
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm5
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0],xmm3[3]
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm7, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
; AVX1-NEXT: vaddps %xmm2, %xmm2, %xmm3
; AVX1-NEXT: vpackssdw %xmm4, %xmm0, %xmm0
@ -4713,16 +4713,16 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vblendvpd %ymm1, %ymm3, %ymm1, %ymm3
; AVX2-NEXT: vpextrq $1, %xmm3, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX2-NEXT: vmovq %xmm3, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm5, %xmm5
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm5
; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0],xmm4[0],xmm5[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm3
; AVX2-NEXT: vmovq %xmm3, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm5
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm5
; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0],xmm4[3]
; AVX2-NEXT: vpextrq $1, %xmm3, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0,1,2],xmm3[0]
; AVX2-NEXT: vaddps %xmm3, %xmm3, %xmm4
; AVX2-NEXT: vpackssdw 48(%rdi), %xmm1, %xmm1
@ -4732,16 +4732,16 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
; AVX2-NEXT: vpor %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vblendvpd %ymm0, %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm3
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX2-NEXT: vmovq %xmm2, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX2-NEXT: vmovq %xmm2, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm4
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: vcvtsi2ss %rax, %xmm6, %xmm2
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
; AVX2-NEXT: vaddps %xmm2, %xmm2, %xmm3
; AVX2-NEXT: vpackssdw 16(%rdi), %xmm0, %xmm0
@ -4751,38 +4751,38 @@ define <8 x float> @uitofp_load_8i64_to_8f32(ptr%a) {
;
; AVX512F-LABEL: uitofp_load_8i64_to_8f32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
; AVX512F-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ssq 40(%rdi), %xmm15, %xmm0
; AVX512F-NEXT: vcvtusi2ssq 32(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512F-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtusi2ssq 48(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512F-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtusi2ssq 56(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2
; AVX512F-NEXT: vcvtusi2ssq 8(%rdi), %xmm15, %xmm1
; AVX512F-NEXT: vcvtusi2ssq (%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
; AVX512F-NEXT: vcvtusi2ssq 16(%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
; AVX512F-NEXT: vcvtusi2ssq 24(%rdi), %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: uitofp_load_8i64_to_8f32:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvtusi2ssq 40(%rdi), %xmm0, %xmm0
; AVX512VL-NEXT: vcvtusi2ssq 32(%rdi), %xmm1, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq 40(%rdi), %xmm15, %xmm0
; AVX512VL-NEXT: vcvtusi2ssq 32(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3]
; AVX512VL-NEXT: vcvtusi2ssq 48(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq 48(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3]
; AVX512VL-NEXT: vcvtusi2ssq 56(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq 56(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0]
; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm2, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm2, %xmm2
; AVX512VL-NEXT: vcvtusi2ssq 8(%rdi), %xmm15, %xmm1
; AVX512VL-NEXT: vcvtusi2ssq (%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm3, %xmm2
; AVX512VL-NEXT: vcvtusi2ssq 16(%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm3, %xmm2
; AVX512VL-NEXT: vcvtusi2ssq 24(%rdi), %xmm15, %xmm2
; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
; AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
@ -5148,7 +5148,7 @@ define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind {
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX-NEXT: incl %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%e = extractelement <4 x i32> %x, i32 0
@ -5207,7 +5207,7 @@ define float @extract0_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
; VEX-LABEL: extract0_uitofp_v4i32_f32:
; VEX: # %bb.0:
; VEX-NEXT: vmovd %xmm0, %eax
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: extract0_uitofp_v4i32_f32:
@ -5251,7 +5251,7 @@ define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
; VEX-LABEL: extract0_uitofp_v4i32_f64:
; VEX: # %bb.0:
; VEX-NEXT: vmovd %xmm0, %eax
; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
; VEX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: extract0_uitofp_v4i32_f64:
@ -5348,7 +5348,7 @@ define float @extract3_uitofp_v4i32_f32(<4 x i32> %x) nounwind {
; VEX-LABEL: extract3_uitofp_v4i32_f32:
; VEX: # %bb.0:
; VEX-NEXT: vextractps $3, %xmm0, %eax
; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm0
; VEX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: extract3_uitofp_v4i32_f32:
@ -5402,7 +5402,7 @@ define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind {
; VEX-LABEL: extract3_uitofp_v4i32_f64:
; VEX: # %bb.0:
; VEX-NEXT: vextractps $3, %xmm0, %eax
; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0
; VEX-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: extract3_uitofp_v4i32_f64:

View File

@ -1,4 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
@ -6504,7 +6504,7 @@ define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 {
;
; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %edi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x double>
@ -6522,7 +6522,7 @@ define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 {
;
; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %edi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x float>
@ -6540,7 +6540,7 @@ define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 {
;
; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2sd %rdi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x double>
@ -6558,7 +6558,7 @@ define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 {
;
; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
; AVX-NEXT: vcvtsi2ss %rdi, %xmm15, %xmm0
; AVX-NEXT: retq
entry:
%result = call <1 x float>
@ -6622,18 +6622,18 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
@ -6668,9 +6668,9 @@ define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vpextrq $1, %xmm0, %rax
; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX-NEXT: vmovq %xmm0, %rax
; AVX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
; AVX-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX-NEXT: retq
entry:
@ -6703,12 +6703,12 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 {
; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm1
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm2
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm2
; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX-NEXT: vpextrd $2, %xmm0, %eax
; AVX-NEXT: vcvtsi2sd %eax, %xmm3, %xmm0
; AVX-NEXT: vcvtsi2sd %eax, %xmm15, %xmm0
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX-NEXT: retq
entry:
@ -6740,12 +6740,12 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32:
; AVX: # %bb.0: # %entry
; AVX-NEXT: vextractps $1, %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm1
; AVX-NEXT: vmovd %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm2
; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX-NEXT: vpextrd $2, %xmm0, %eax
; AVX-NEXT: vcvtsi2ss %eax, %xmm3, %xmm0
; AVX-NEXT: vcvtsi2ss %eax, %xmm15, %xmm0
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX-NEXT: retq
entry:
@ -6770,26 +6770,26 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX512-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq
entry:
@ -6814,13 +6814,13 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -6828,13 +6828,13 @@ define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX512-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -6910,14 +6910,14 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
@ -6926,14 +6926,14 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
@ -6977,16 +6977,16 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@ -6994,16 +6994,16 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
@ -7033,12 +7033,12 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 {
; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call <1 x double>
@ -7058,12 +7058,12 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 {
; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: movl %edi, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %edi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call <1 x float>
@ -7099,7 +7099,7 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2sd %rcx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB175_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
@ -7108,7 +7108,7 @@ define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 {
;
; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2sd %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call <1 x double>
@ -7144,7 +7144,7 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
; AVX1-NEXT: orq %rax, %rcx
; AVX1-NEXT: testq %rdi, %rdi
; AVX1-NEXT: cmovnsq %rdi, %rcx
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
; AVX1-NEXT: vcvtsi2ss %rcx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB176_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
@ -7153,7 +7153,7 @@ define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
;
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0
; AVX512-NEXT: vcvtusi2ss %rdi, %xmm15, %xmm0
; AVX512-NEXT: retq
entry:
%result = call <1 x float>
@ -7279,7 +7279,7 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm1
; AVX1-NEXT: jns .LBB179_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1
@ -7292,7 +7292,7 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB179_4
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
@ -7303,9 +7303,9 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 {
; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX512F-NEXT: retq
;
@ -7367,9 +7367,9 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
@ -7381,9 +7381,9 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX512-NEXT: retq
entry:
@ -7416,24 +7416,24 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 {
; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractps $1, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vextractps $1, %xmm0, %eax
; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm2
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512-NEXT: vpextrd $2, %xmm0, %eax
; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq
entry:
@ -7465,24 +7465,24 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 {
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractps $1, %xmm0, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vmovd %xmm0, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm2
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vpextrd $2, %xmm0, %eax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vextractps $1, %xmm0, %eax
; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm1
; AVX512-NEXT: vmovd %xmm0, %eax
; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm2
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512-NEXT: vpextrd $2, %xmm0, %eax
; AVX512-NEXT: vcvtusi2ss %eax, %xmm3, %xmm0
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX512-NEXT: retq
entry:
@ -7547,7 +7547,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm1
; AVX1-NEXT: jns .LBB183_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1
@ -7560,7 +7560,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm2
; AVX1-NEXT: jns .LBB183_4
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2
@ -7575,7 +7575,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2sd %rdx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB183_6
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0
@ -7586,13 +7586,13 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 {
; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
; AVX512-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX512-NEXT: retq
entry:
@ -7657,7 +7657,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm15, %xmm1
; AVX1-NEXT: jns .LBB184_2
; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
@ -7670,7 +7670,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm15, %xmm2
; AVX1-NEXT: jns .LBB184_4
; AVX1-NEXT: # %bb.3:
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
@ -7685,7 +7685,7 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
; AVX1-NEXT: cmovnsq %rax, %rdx
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
; AVX1-NEXT: vcvtsi2ss %rdx, %xmm15, %xmm0
; AVX1-NEXT: jns .LBB184_6
; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
@ -7697,13 +7697,13 @@ define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; AVX512: # %bb.0: # %entry
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; AVX512-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@ -7860,26 +7860,26 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpextrd $2, %xmm1, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm2
; AVX1-NEXT: vmovd %xmm1, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0]
; AVX1-NEXT: vextractps $2, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-NEXT: vmovq %xmm0, %rax
; AVX1-NEXT: movl %eax, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm4
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0]
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT: vpextrd $3, %xmm1, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-NEXT: vpextrd $1, %xmm1, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm1
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; AVX1-NEXT: vpextrd $3, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm3
; AVX1-NEXT: vpextrd $1, %xmm0, %eax
; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0
; AVX1-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
@ -7890,14 +7890,14 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 {
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpextrq $1, %xmm1, %rax
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512F-NEXT: vmovq %xmm1, %rax
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm1
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm2
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtusi2sd %rax, %xmm15, %xmm0
; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX512F-NEXT: retq
@ -7991,16 +7991,16 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1
; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm3
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm4
; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1
; AVX1-NEXT: vcvtsi2ss %rax, %xmm15, %xmm1
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0]
; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3
; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
@ -8011,16 +8011,16 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX512F: # %bb.0: # %entry
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm1
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm2
; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX512F-NEXT: vpextrq $1, %xmm0, %rax
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0
; AVX512F-NEXT: vcvtusi2ss %rax, %xmm15, %xmm0
; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq

View File

@ -1403,10 +1403,10 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
for (const auto &RC : RegisterClasses) {
if (!RC.AltOrderSelect.empty()) {
OS << "\nstatic inline unsigned " << RC.getName()
<< "AltOrderSelect(const MachineFunction &MF) {" << RC.AltOrderSelect
<< "}\n\n"
<< "AltOrderSelect(const MachineFunction &MF, bool Rev) {"
<< RC.AltOrderSelect << "}\n\n"
<< "static ArrayRef<MCPhysReg> " << RC.getName()
<< "GetRawAllocationOrder(const MachineFunction &MF) {\n";
<< "GetRawAllocationOrder(const MachineFunction &MF, bool Rev) {\n";
for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi) {
ArrayRef<const Record *> Elems = RC.getOrder(oi);
if (!Elems.empty()) {
@ -1426,8 +1426,8 @@ void RegisterInfoEmitter::runTargetDesc(raw_ostream &OS) {
else
OS << "),\n ArrayRef(AltOrder" << oi;
OS << ")\n };\n const unsigned Select = " << RC.getName()
<< "AltOrderSelect(MF);\n assert(Select < " << RC.getNumOrders()
<< ");\n return Order[Select];\n}\n";
<< "AltOrderSelect(MF, Rev);\n assert(Select < "
<< RC.getNumOrders() << ");\n return Order[Select];\n}\n";
}
}