[AMDGPU] Improve llvm.amdgcn.wave.shuffle handling for pre-GFX8 (#174845)
Before, GlobalISel would still return true for lowering the intrinsic for GFX7 and earlier even though the required ds_bpermute_b32 instruction is not supported. After this change, GlobalISel will properly report failure to select in this case. Testing is updated appropriately. Signed-off-by: Domenic Nutile <domenic.nutile@gmail.com>
This commit is contained in:
parent
31b93d6e38
commit
7bbaf2e16b
@ -3915,6 +3915,9 @@ bool AMDGPUInstructionSelector::selectWaveShuffleIntrin(
|
||||
if (DstTy != LLT::scalar(32))
|
||||
return false;
|
||||
|
||||
if (!Subtarget->supportsBPermute())
|
||||
return false;
|
||||
|
||||
// If we can bpermute across the whole wave, then just do that
|
||||
if (Subtarget->supportsWaveWideBPermute()) {
|
||||
Register ShiftIdxReg = MRI->createVirtualRegister(DstRC);
|
||||
|
||||
@ -1884,6 +1884,10 @@ public:
|
||||
return RequiresWaitsBeforeSystemScopeStores;
|
||||
}
|
||||
|
||||
bool supportsBPermute() const {
|
||||
return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
|
||||
}
|
||||
|
||||
bool supportsWaveWideBPermute() const {
|
||||
return (getGeneration() <= AMDGPUSubtarget::GFX9 ||
|
||||
getGeneration() == AMDGPUSubtarget::GFX12) ||
|
||||
|
||||
@ -20,16 +20,17 @@
|
||||
; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-W64-GISEL %s
|
||||
|
||||
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-SDAG-ERR %s
|
||||
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s
|
||||
; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s
|
||||
|
||||
; GFX6-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute
|
||||
; GFX6-GISEL-ERR: "Invalid opcode!"
|
||||
; GFX6-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float)
|
||||
|
||||
; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-SDAG-ERR %s
|
||||
; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s
|
||||
; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s
|
||||
|
||||
; GFX7-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute
|
||||
; GFX7-GISEL-ERR: "Invalid opcode!"
|
||||
; GFX7-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float)
|
||||
|
||||
|
||||
|
||||
define float @test_wave_shuffle_float(float %val, i32 %idx) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user