[AMDGPU] Improve llvm.amdgcn.wave.shuffle handling for pre-GFX8 (#174845)

Before, GlobalISel would still return true for lowering the intrinsic
for GFX7 and earlier even though the required ds_bpermute_b32
instruction is not supported. After this change, GlobalISel will
properly report failure to select in this case. Testing is updated
appropriately.

Signed-off-by: Domenic Nutile <domenic.nutile@gmail.com>
This commit is contained in:
saxlungs 2026-01-07 15:48:11 -05:00 committed by GitHub
parent 31b93d6e38
commit 7bbaf2e16b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 4 deletions

View File

@ -3915,6 +3915,9 @@ bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
if (DstTy != LLT::scalar(32))
return false;
if (!Subtarget->supportsBPermute())
return false;
// If we can bpermute across the whole wave, then just do that
if (Subtarget->supportsWaveWideBPermute()) {
Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);

View File

@ -1884,6 +1884,10 @@ public:
return RequiresWaitsBeforeSystemScopeStores;
}
bool supportsBPermute() const {
return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
bool supportsWaveWideBPermute() const {
return (getGeneration() <= AMDGPUSubtarget::GFX9 ||
getGeneration() == AMDGPUSubtarget::GFX12) ||

View File

@ -20,16 +20,17 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-W64-GISEL %s
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-SDAG-ERR %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s
; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s
; GFX6-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute
; GFX6-GISEL-ERR: "Invalid opcode!"
; GFX6-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float)
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-SDAG-ERR %s
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s
; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s
; GFX7-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute
; GFX7-GISEL-ERR: "Invalid opcode!"
; GFX7-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float)
define float @test_wave_shuffle_float(float %val, i32 %idx) {