Fix EXTEND_VECTOR_INREG widening when input > result size (#177095)

This patch fixes an LLVM crash on AMDGPU that occurred when compiling
valid code involving non-power-of-two vector sizes. During type
legalization, LLVM widened an EXTEND_VECTOR_INREG operation by first
widening the input vector, which could make the input larger than the
result and trigger an assertion failure.

The fix changes the logic to widen the result first and then extract the
needed portion so there's no invalid size mismatch. I've added a test
that previously crashed but now doesn't.

fixes #176966.

---------

Co-authored-by: Natalia Kokoromyti <knatalia@yost-cm-01-imme.stanford.edu>
This commit is contained in:
nataliakokoromyti 2026-01-22 00:33:46 -08:00 committed by GitHub
parent fa0071baab
commit efe9b436e0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 50 additions and 2 deletions

View File

@ -7702,8 +7702,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), InOp);
SDLoc DL(N);
EVT ResVT = N->getValueType(0);
// Widen the input as requested by the legalizer.
SDValue WideInOp = GetWidenedVector(N->getOperand(0));
EVT WideInVT = WideInOp.getValueType();
// Simple case: if widened input is still smaller than or equal to result,
// just use it directly.
if (WideInVT.getSizeInBits() <= ResVT.getSizeInBits())
return DAG.getNode(N->getOpcode(), DL, ResVT, WideInOp);
// EXTEND_VECTOR_INREG requires input bits <= result bits.
// If widening makes the input larger than the original result, widen the
// result to match, then extract back down.
EVT ResEltVT = ResVT.getVectorElementType();
unsigned EltBits = ResEltVT.getSizeInBits();
assert((WideInVT.getSizeInBits() % EltBits) == 0 &&
"Widened input size must be a multiple of result element size");
unsigned WideNumElts = WideInVT.getSizeInBits() / EltBits;
EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), ResEltVT, WideNumElts);
SDValue WideRes = DAG.getNode(N->getOpcode(), DL, WideResVT, WideInOp);
return DAG.getExtractSubvector(DL, ResVT, WideRes, 0);
}
SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {

View File

@ -0,0 +1,25 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o - | FileCheck %s
;
; Regression test for https://github.com/llvm/llvm-project/issues/176966
; Ensures we don't crash in DAG type legalization when widening
; EXTEND_VECTOR_INREG where the widened input becomes larger than the result.
define <32 x i8> @pr176966_extend_vector_inreg(<6 x i8> %input) {
; CHECK-LABEL: pr176966_extend_vector_inreg:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v20, v5
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%shuffle = shufflevector <6 x i8> %input, <6 x i8> zeroinitializer,
<32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison,
i32 5, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison>
ret <32 x i8> %shuffle
}