From b830bcfde3bafb0db4e733a5f3f7913e2fcf6b22 Mon Sep 17 00:00:00 2001
From: addmisol <218448340+addmisol@users.noreply.github.com>
Date: Wed, 25 Feb 2026 15:25:09 +0530
Subject: [PATCH] [AMDGPU]Fix compute num sign bits unsigned underflow
 (#182723)

Fixes #182677

The `BFE_I32` case in `ComputeNumSignBitsForTargetNode` was not masking
the width operand with `& 0x1f`, unlike other BFE operations in the same
  file. Since the hardware instruction only uses the low 5 bits of the
  width field, values >= 32 passed via `@llvm.amdgcn.sbfe.i32` caused
  unsigned integer underflow in the calculation:

      unsigned SignBits = 32 - Width->getZExtValue() + 1;

  When width > 33, this underflows, producing incorrect SignBits values.
  When width == 33, SignBits becomes 0, violating the expected return
  range of [1, BitWidth]. This led to assertion failures and
  miscompilation where subsequent BFE narrowing operations were
  incorrectly eliminated.

  This patch:
  - Masks the width value with `& 0x1f` to match hardware behavior
  - Handles width == 0 (after masking) by returning 32 sign bits
  - Adds regression tests for width values >= 32
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |  2 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll  | 40 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 3f9f95f1c4a6..09c5d19ee5ab 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -5981,7 +5981,7 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
     if (!Width)
       return 1;
 
-    unsigned SignBits = 32 - Width->getZExtValue() + 1;
+    unsigned SignBits = 32 - (Width->getZExtValue() & 0x1f) + 1;
     if (!isNullConstant(Op.getOperand(1)))
       return SignBits;
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
index e441d9a85cb4..4b659c3e4288 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
@@ -550,6 +550,46 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, pt
   ret void
 }
 
+; Test that width values >= 32 are correctly masked with & 0x1f
+; This is a regression test for issue #182677 where missing mask caused
+; unsigned underflow in ComputeNumSignBitsForTargetNode
+
+; GCN-LABEL: {{^}}bfe_i32_width_33:
+; GCN-NOT: {{[^@]}}bfe
+; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[VREG]],
+; GCN: s_endpgm
+define amdgpu_kernel void @bfe_i32_width_33(ptr addrspace(1) %out) #0 {
+  ; Width 33 & 0x1f = 1, extracts 1 bit from position 0 of value 0
+  %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 33)
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}bfe_i32_width_64:
+; GCN-NOT: {{[^@]}}bfe
+; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[VREG]],
+; GCN: s_endpgm
+define amdgpu_kernel void @bfe_i32_width_64(ptr addrspace(1) %out) #0 {
+  ; Width 64 & 0x1f = 0, should return 0 (width 0 extracts nothing)
+  %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 0, i32 64)
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}bfe_i32_width_32:
+; GCN-NOT: {{[^@]}}bfe
+; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0
+; GCN: buffer_store_dword [[VREG]],
+; GCN: s_endpgm
+define amdgpu_kernel void @bfe_i32_width_32(ptr addrspace(1) %out) #0 {
+  ; Width 32 & 0x1f = 0, should return 0
+  %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 0, i32 32)
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
+  ret void
+}
+
 declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
 
 attributes #0 = { nounwind }