This patch fixes an LLVM crash on AMDGPU that occurred when compiling valid code involving non-power-of-two vector sizes. During type legalization, LLVM widened an EXTEND_VECTOR_INREG operation by first widening the input vector, which could make the input larger than the result and trigger an assertion failure. The fix changes the logic to widen the result first and then extract the needed portion so there's no invalid size mismatch. I've added a test that previously crashed but now doesn't. fixes #176966. --------- Co-authored-by: Natalia Kokoromyti <knatalia@yost-cm-01-imme.stanford.edu>
26 lines
1.3 KiB
LLVM
26 lines
1.3 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 %s -o - | FileCheck %s
|
|
;
|
|
; Regression test for https://github.com/llvm/llvm-project/issues/176966
|
|
; Ensures we don't crash in DAG type legalization when widening
|
|
; EXTEND_VECTOR_INREG where the widened input becomes larger than the result.
|
|
|
|
define <32 x i8> @pr176966_extend_vector_inreg(<6 x i8> %input) {
|
|
; CHECK-LABEL: pr176966_extend_vector_inreg:
|
|
; CHECK: ; %bb.0: ; %entry
|
|
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; CHECK-NEXT: v_mov_b32_e32 v20, v5
|
|
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
|
entry:
|
|
%shuffle = shufflevector <6 x i8> %input, <6 x i8> zeroinitializer,
|
|
<32 x i32> <i32 poison, i32 poison, i32 poison, i32 poison,
|
|
i32 poison, i32 poison, i32 poison, i32 poison,
|
|
i32 poison, i32 poison, i32 poison, i32 poison,
|
|
i32 poison, i32 poison, i32 poison, i32 poison,
|
|
i32 poison, i32 poison, i32 poison, i32 poison,
|
|
i32 5, i32 poison, i32 poison, i32 poison,
|
|
i32 poison, i32 poison, i32 poison, i32 poison,
|
|
i32 poison, i32 poison, i32 poison, i32 poison>
|
|
ret <32 x i8> %shuffle
|
|
}
|