diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 3093b9aaf174..15056a9a9cd6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -3915,6 +3915,9 @@ bool AMDGPUInstructionSelector::selectWaveShuffleIntrin( if (DstTy != LLT::scalar(32)) return false; + if (!Subtarget->supportsBPermute()) + return false; + // If we can bpermute across the whole wave, then just do that if (Subtarget->supportsWaveWideBPermute()) { Register ShiftIdxReg = MRI->createVirtualRegister(DstRC); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 2d1e54bf5883..c7cac17ddbe0 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1884,6 +1884,10 @@ public: return RequiresWaitsBeforeSystemScopeStores; } + bool supportsBPermute() const { + return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS; + } + bool supportsWaveWideBPermute() const { return (getGeneration() <= AMDGPUSubtarget::GFX9 || getGeneration() == AMDGPUSubtarget::GFX12) || diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll index 9fc7ae205c7b..1f259ac96be4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.shuffle.ll @@ -20,16 +20,17 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-W64-GISEL %s ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-SDAG-ERR %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s +; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX6-GISEL-ERR %s ; GFX6-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute -; GFX6-GISEL-ERR: "Invalid opcode!" +; GFX6-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float) ; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-SDAG-ERR %s -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s +; RUN: not llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=null %s 2>&1 | FileCheck -check-prefixes=GFX7-GISEL-ERR %s ; GFX7-SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.ds.bpermute -; GFX7-GISEL-ERR: "Invalid opcode!" +; GFX7-GISEL-ERR: LLVM ERROR: cannot select: %10:vgpr_32(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.wave.shuffle), %0:vgpr(s32), %1:vgpr(s32) (in function: test_wave_shuffle_float) + define float @test_wave_shuffle_float(float %val, i32 %idx) {