diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5dd7909c7ee9..dd70e35b0eee 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -20583,6 +20583,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { } else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() && !SrcIt->second.second) { VecOpcode = Instruction::UIToFP; + } else if (VecOpcode == Instruction::BitCast && SrcIt != MinBWs.end() && + ScalarTy->isFPOrFPVectorTy()) { + Type *OrigSrcScalarTy = CI->getSrcTy(); + auto *OrigSrcVectorTy = + getWidenedType(OrigSrcScalarTy, E->Scalars.size()); + InVec = + Builder.CreateIntCast(InVec, OrigSrcVectorTy, SrcIt->second.second); } Value *V = (VecOpcode != ShuffleOrOp && VecOpcode == Instruction::BitCast) ? InVec diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll new file mode 100644 index 000000000000..714dee4cb3a0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i16 @test(i16 %conv11) { +; CHECK-LABEL: define i16 @test( +; CHECK-SAME: i16 [[CONV11:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> , i16 [[CONV11]], i32 3 +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double> +; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], +; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x double> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> zeroinitializer, <4 x i16> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i16> [[TMP6]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> [[TMP7]]) +; CHECK-NEXT: ret i16 [[TMP8]] +; +entry: + %0 = add i64 0, 0 + %1 = bitcast i64 %0 to double + %mul.i.i.17 = fmul double 0.000000e+00, %1 + %2 = or i64 0, 0 + %3 = bitcast i64 %2 to double + %mul.i.i.18 = fmul double 0.000000e+00, %3 + %4 = fcmp uno double %mul.i.i.18, 0.000000e+00 + %5 = select i1 %4, i16 0, i16 0 + %6 = trunc i64 %2 to i16 + %7 = or i16 %5, %6 + %8 = fcmp uno double %mul.i.i.17, 0.000000e+00 + %9 = select i1 %8, i16 0, i16 0 + %10 = trunc i64 %0 to i16 + %11 = or i16 %9, %10 + %12 = or i64 0, 0 + %13 = bitcast i64 %12 to double + %14 = fcmp uno double %13, 0.000000e+00 + %15 = select i1 %14, i16 0, i16 0 + %16 = trunc i64 %12 to i16 + %17 = zext i16 %conv11 to i64 + %18 = or i64 %17, 0 + %19 = bitcast i64 %18 to double + %mul.i.i.21 = fmul double 0.000000e+00, %19 + %20 = fcmp uno double %mul.i.i.21, 0.000000e+00 + %21 = select i1 %20, i16 0, i16 0 + %22 = trunc i64 %18 to i16 + %23 = or i16 %21, %22 + %24 = or i16 %15, %16 + %conv140.i.18 = xor i16 %7, %11 + %conv140.i.20 = xor i16 %24, %conv140.i.18 + %conv140.i.21 = xor i16 %23, %conv140.i.20 + ret i16 %conv140.i.21 +}