[SLP]Cast incoming value to a propr type for int nodes, bitcasted to fp

Before casting the value to FP type, need to check, if the type for reduced during minbitwidth analysis and need to restore the original source type to generate correct bitcast operation. Fixes #178884
2026-01-30 08:45:39 -08:00 · 2026-01-30 08:45:39 -08:00 · b73122d5b7
commit b73122d5b7
parent 5d01a0ad3d
2 changed files with 59 additions and 0 deletions
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@ -20583,6 +20583,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
      } else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
                 !SrcIt->second.second) {
        VecOpcode = Instruction::UIToFP;
+      } else if (VecOpcode == Instruction::BitCast && SrcIt != MinBWs.end() &&
+                 ScalarTy->isFPOrFPVectorTy()) {
+        Type *OrigSrcScalarTy = CI->getSrcTy();
+        auto *OrigSrcVectorTy =
+            getWidenedType(OrigSrcScalarTy, E->Scalars.size());
+        InVec =
+            Builder.CreateIntCast(InVec, OrigSrcVectorTy, SrcIt->second.second);
      }
      Value *V = (VecOpcode != ShuffleOrOp && VecOpcode == Instruction::BitCast)
                     ? InVec
--- a/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/minbw-bitcast-to-fp.ll
@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+
+define i16 @test(i16 %conv11) {
+; CHECK-LABEL: define i16 @test(
+; CHECK-SAME: i16 [[CONV11:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[CONV11]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[TMP0]], zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], <double 0.000000e+00, double 0.000000e+00, double 1.000000e+00, double 0.000000e+00>
+; CHECK-NEXT:    [[TMP5:%.*]] = fcmp uno <4 x double> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> zeroinitializer, <4 x i16> zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = or <4 x i16> [[TMP6]], [[TMP1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> [[TMP7]])
+; CHECK-NEXT:    ret i16 [[TMP8]]
+;
+entry:
+  %0 = add i64 0, 0
+  %1 = bitcast i64 %0 to double
+  %mul.i.i.17 = fmul double 0.000000e+00, %1
+  %2 = or i64 0, 0
+  %3 = bitcast i64 %2 to double
+  %mul.i.i.18 = fmul double 0.000000e+00, %3
+  %4 = fcmp uno double %mul.i.i.18, 0.000000e+00
+  %5 = select i1 %4, i16 0, i16 0
+  %6 = trunc i64 %2 to i16
+  %7 = or i16 %5, %6
+  %8 = fcmp uno double %mul.i.i.17, 0.000000e+00
+  %9 = select i1 %8, i16 0, i16 0
+  %10 = trunc i64 %0 to i16
+  %11 = or i16 %9, %10
+  %12 = or i64 0, 0
+  %13 = bitcast i64 %12 to double
+  %14 = fcmp uno double %13, 0.000000e+00
+  %15 = select i1 %14, i16 0, i16 0
+  %16 = trunc i64 %12 to i16
+  %17 = zext i16 %conv11 to i64
+  %18 = or i64 %17, 0
+  %19 = bitcast i64 %18 to double
+  %mul.i.i.21 = fmul double 0.000000e+00, %19
+  %20 = fcmp uno double %mul.i.i.21, 0.000000e+00
+  %21 = select i1 %20, i16 0, i16 0
+  %22 = trunc i64 %18 to i16
+  %23 = or i16 %21, %22
+  %24 = or i16 %15, %16
+  %conv140.i.18 = xor i16 %7, %11
+  %conv140.i.20 = xor i16 %24, %conv140.i.18
+  %conv140.i.21 = xor i16 %23, %conv140.i.20
+  ret i16 %conv140.i.21
+}