[ValueTracking] ComputeNumSignBitsImpl - add basic handling of BITCAST nodes (#127218)

When a wider scalar/vector type containing all sign bits is bitcast to a
narrower vector type, we can deduce that the resulting narrow elements
will also be all sign bits. This matches existing behavior in
SelectionDAG and helps optimize cases involving SSE intrinsics where
sign-extended values are bitcast between different vector types.

The current implementation fails to recognize that an arithmetic right
shift is redundant when applied to elements that are already known to be
all sign bits. This PR improves ComputeNumSignBitsImpl to track this
information through bitcasts, enabling the optimization of such cases.

```
%ext = sext <1 x i1> %cmp to <1 x i8>
%sub = bitcast <1 x i8> %ext to <4 x i2>
%sra = ashr <4 x i2> %sub, <i2 1, i2 1, i2 1, i2 1>
; Can be simplified to just:
%sub = bitcast <1 x i8> %ext to <4 x i2>
```

Closes #87624
This commit is contained in:
Narayan 2025-03-06 14:00:36 +05:30 committed by GitHub
parent 1987d18012
commit 6311e3fcc8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 86 additions and 0 deletions

View File

@ -3987,6 +3987,31 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
if (auto *U = dyn_cast<Operator>(V)) {
switch (Operator::getOpcode(V)) {
default: break;
case Instruction::BitCast: {
Value *Src = U->getOperand(0);
Type *SrcTy = Src->getType();
// Skip if the source type is not an integer or integer vector type
// This ensures we only process integer-like types
if (!SrcTy->isIntOrIntVectorTy())
break;
unsigned SrcBits = SrcTy->getScalarSizeInBits();
// Bitcast 'large element' scalar/vector to 'small element' vector.
if ((SrcBits % TyBits) != 0)
break;
// Only proceed if the destination type is a fixed-size vector
if (isa<FixedVectorType>(Ty)) {
// Fast case - sign splat can be simply split across the small elements.
// This works for both vector and scalar sources
Tmp = ComputeNumSignBits(Src, Depth + 1, Q);
if (Tmp == SrcBits)
return TyBits;
}
break;
}
case Instruction::SExt:
Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +

View File

@ -0,0 +1,61 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
; Case 1: Vector to Vector bitcast
define <4 x i2> @test_vector_to_vector(<1 x i8> %a0, <1 x i8> %a1) {
; CHECK-LABEL: define <4 x i2> @test_vector_to_vector(
; CHECK-SAME: <1 x i8> [[A0:%.*]], <1 x i8> [[A1:%.*]]) {
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <1 x i8> [[A0]], [[A1]]
; CHECK-NEXT: [[EXT:%.*]] = sext <1 x i1> [[CMP]] to <1 x i8>
; CHECK-NEXT: [[SUB:%.*]] = bitcast <1 x i8> [[EXT]] to <4 x i2>
; CHECK-NEXT: ret <4 x i2> [[SUB]]
;
%cmp = icmp sgt <1 x i8> %a0, %a1
%ext = sext <1 x i1> %cmp to <1 x i8>
%sub = bitcast <1 x i8> %ext to <4 x i2>
%sra = ashr <4 x i2> %sub, <i2 1, i2 1, i2 1, i2 1>
ret <4 x i2> %sra
}
; Case 2: Scalar to Vector bitcast
define <2 x i16> @test_scalar_to_vector(i1 %cond) {
; CHECK-LABEL: define <2 x i16> @test_scalar_to_vector(
; CHECK-SAME: i1 [[COND:%.*]]) {
; CHECK-NEXT: [[EXT:%.*]] = sext i1 [[COND]] to i32
; CHECK-NEXT: [[BC:%.*]] = bitcast i32 [[EXT]] to <2 x i16>
; CHECK-NEXT: ret <2 x i16> [[BC]]
;
%ext = sext i1 %cond to i32
%bc = bitcast i32 %ext to <2 x i16>
%sra = ashr <2 x i16> %bc, <i16 8, i16 8>
ret <2 x i16> %sra
}
; Case 3: Multiple right shifts
define <8 x i8> @test_multiple_shifts(i1 %cond) {
; CHECK-LABEL: define <8 x i8> @test_multiple_shifts(
; CHECK-SAME: i1 [[COND:%.*]]) {
; CHECK-NEXT: [[EXT:%.*]] = sext i1 [[COND]] to i64
; CHECK-NEXT: [[BC:%.*]] = bitcast i64 [[EXT]] to <8 x i8>
; CHECK-NEXT: ret <8 x i8> [[BC]]
;
%ext = sext i1 %cond to i64
%bc = bitcast i64 %ext to <8 x i8>
%sra1 = ashr <8 x i8> %bc, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 1>
%sra2 = ashr <8 x i8> %sra1, <i8 2, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
ret <8 x i8> %sra2
}
; (Negative) Case 4: Test with non-sign-extended source
define <4 x i8> @test_non_sign_extended(i32 %val) {
; CHECK-LABEL: define <4 x i8> @test_non_sign_extended(
; CHECK-SAME: i32 [[VAL:%.*]]) {
; CHECK-NEXT: [[BC:%.*]] = bitcast i32 [[VAL]] to <4 x i8>
; CHECK-NEXT: [[SRA:%.*]] = ashr <4 x i8> [[BC]], splat (i8 1)
; CHECK-NEXT: ret <4 x i8> [[SRA]]
;
%bc = bitcast i32 %val to <4 x i8>
%sra = ashr <4 x i8> %bc, <i8 1, i8 1, i8 1, i8 1>
ret <4 x i8> %sra
}