Fix EXTEND_VECTOR_INREG widening when input > result size (#177095)

This patch fixes an LLVM crash on AMDGPU that occurred when compiling valid code involving non-power-of-two vector sizes. During type legalization, LLVM widened an EXTEND_VECTOR_INREG operation by first widening the input vector, which could make the input larger than the result and trigger an assertion failure. The fix changes the logic to widen the result first and then extract the needed portion so there's no invalid size mismatch. I've added a test that previously crashed but now doesn't. fixes #176966. --------- Co-authored-by: Natalia Kokoromyti <knatalia@yost-cm-01-imme.stanford.edu>
2026-01-22 00:33:46 -08:00 · 2026-01-22 00:33:46 -08:00 · efe9b436e0
commit efe9b436e0
parent fa0071baab
2 changed files with 50 additions and 2 deletions
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@ -7702,8 +7702,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
 }

 SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) {
-  SDValue InOp = GetWidenedVector(N->getOperand(0));
-  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), InOp);
+  SDLoc DL(N);
+  EVT ResVT = N->getValueType(0);
+
+  // Widen the input as requested by the legalizer.
+  SDValue WideInOp = GetWidenedVector(N->getOperand(0));
+  EVT WideInVT = WideInOp.getValueType();
+
+  // Simple case: if widened input is still smaller than or equal to result,
+  // just use it directly.
+  if (WideInVT.getSizeInBits() <= ResVT.getSizeInBits())
+    return DAG.getNode(N->getOpcode(), DL, ResVT, WideInOp);
+
+  // EXTEND_VECTOR_INREG requires input bits <= result bits.
+  // If widening makes the input larger than the original result, widen the
+  // result to match, then extract back down.
+  EVT ResEltVT = ResVT.getVectorElementType();
+  unsigned EltBits = ResEltVT.getSizeInBits();
+  assert((WideInVT.getSizeInBits() % EltBits) == 0 &&
+         "Widened input size must be a multiple of result element size");
+
+  unsigned WideNumElts = WideInVT.getSizeInBits() / EltBits;
+  EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), ResEltVT, WideNumElts);
+
+  SDValue WideRes = DAG.getNode(N->getOpcode(), DL, WideResVT, WideInOp);
+  return DAG.getExtractSubvector(DL, ResVT, WideRes, 0);
 }

 SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
--- a/llvm/test/CodeGen/AMDGPU/issue176966-extend-vector-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue176966-extend-vector-inreg.ll
@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o - | FileCheck %s
+;
+; Regression test for https://github.com/llvm/llvm-project/issues/176966
+; Ensures we don't crash in DAG type legalization when widening
+; EXTEND_VECTOR_INREG where the widened input becomes larger than the result.
+
+define <32 x i8> @pr176966_extend_vector_inreg(<6 x i8> %input) {
+; CHECK-LABEL: pr176966_extend_vector_inreg:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v20, v5
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %shuffle = shufflevector <6 x i8> %input, <6 x i8> zeroinitializer,
+              <32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison,
+                          i32 poison, i32 poison, i32 poison, i32 poison,
+                          i32 poison, i32 poison, i32 poison, i32 poison,
+                          i32 poison, i32 poison, i32 poison, i32 poison,
+                          i32 poison, i32 poison, i32 poison, i32 poison,
+                          i32 5, i32 poison, i32 poison, i32 poison,
+                          i32 poison, i32 poison, i32 poison, i32 poison,
+                          i32 poison, i32 poison, i32 poison, i32 poison>
+  ret <32 x i8> %shuffle
+}