[SLP]Cast incoming value to a propr type for int nodes, bitcasted to fp

Before casting the value to FP type, need to check, if the type for
reduced during minbitwidth analysis and need to restore the original
source type to generate correct bitcast operation.

Fixes #178884
This commit is contained in:
Alexey Bataev 2026-01-30 08:45:39 -08:00
parent 5d01a0ad3d
commit b73122d5b7
2 changed files with 59 additions and 0 deletions

View File

@ -20583,6 +20583,13 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
} else if (VecOpcode == Instruction::SIToFP && SrcIt != MinBWs.end() &&
!SrcIt->second.second) {
VecOpcode = Instruction::UIToFP;
} else if (VecOpcode == Instruction::BitCast && SrcIt != MinBWs.end() &&
ScalarTy->isFPOrFPVectorTy()) {
Type *OrigSrcScalarTy = CI->getSrcTy();
auto *OrigSrcVectorTy =
getWidenedType(OrigSrcScalarTy, E->Scalars.size());
InVec =
Builder.CreateIntCast(InVec, OrigSrcVectorTy, SrcIt->second.second);
}
Value *V = (VecOpcode != ShuffleOrOp && VecOpcode == Instruction::BitCast)
? InVec

View File

@ -0,0 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
define i16 @test(i16 %conv11) {
; CHECK-LABEL: define i16 @test(
; CHECK-SAME: i16 [[CONV11:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 poison>, i16 [[CONV11]], i32 3
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i16> [[TMP0]], zeroinitializer
; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64>
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i64> [[TMP2]] to <4 x double>
; CHECK-NEXT: [[TMP4:%.*]] = fmul <4 x double> [[TMP3]], <double 0.000000e+00, double 0.000000e+00, double 1.000000e+00, double 0.000000e+00>
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x double> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i16> zeroinitializer, <4 x i16> zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = or <4 x i16> [[TMP6]], [[TMP1]]
; CHECK-NEXT: [[TMP8:%.*]] = call i16 @llvm.vector.reduce.xor.v4i16(<4 x i16> [[TMP7]])
; CHECK-NEXT: ret i16 [[TMP8]]
;
entry:
%0 = add i64 0, 0
%1 = bitcast i64 %0 to double
%mul.i.i.17 = fmul double 0.000000e+00, %1
%2 = or i64 0, 0
%3 = bitcast i64 %2 to double
%mul.i.i.18 = fmul double 0.000000e+00, %3
%4 = fcmp uno double %mul.i.i.18, 0.000000e+00
%5 = select i1 %4, i16 0, i16 0
%6 = trunc i64 %2 to i16
%7 = or i16 %5, %6
%8 = fcmp uno double %mul.i.i.17, 0.000000e+00
%9 = select i1 %8, i16 0, i16 0
%10 = trunc i64 %0 to i16
%11 = or i16 %9, %10
%12 = or i64 0, 0
%13 = bitcast i64 %12 to double
%14 = fcmp uno double %13, 0.000000e+00
%15 = select i1 %14, i16 0, i16 0
%16 = trunc i64 %12 to i16
%17 = zext i16 %conv11 to i64
%18 = or i64 %17, 0
%19 = bitcast i64 %18 to double
%mul.i.i.21 = fmul double 0.000000e+00, %19
%20 = fcmp uno double %mul.i.i.21, 0.000000e+00
%21 = select i1 %20, i16 0, i16 0
%22 = trunc i64 %18 to i16
%23 = or i16 %21, %22
%24 = or i16 %15, %16
%conv140.i.18 = xor i16 %7, %11
%conv140.i.20 = xor i16 %24, %conv140.i.18
%conv140.i.21 = xor i16 %23, %conv140.i.20
ret i16 %conv140.i.21
}