diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 3579c79e19cb..46ff5342a7dd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -1272,6 +1272,7 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { return isAnyPtr(Ty, 128) ? Ty : LLT(); case SgprB64: case VgprB64: + case SgprB64_ReadFirstLane: case UniInVgprB64: if (Ty == LLT::scalar(64) || Ty == LLT::fixed_vector(2, 32) || Ty == LLT::fixed_vector(4, 16) || isAnyPtr(Ty, 64)) @@ -1725,14 +1726,15 @@ bool RegBankLegalizeHelper::applyMappingSrc( break; } case SgprB32_M0: - case SgprB32_ReadFirstLane: { + case SgprB32_ReadFirstLane: + case SgprB64_ReadFirstLane: { assert(Ty == getBTyFromID(MethodIDs[i], Ty)); if (RB == SgprRB) break; assert(RB == VgprRB); - Register NewSGPR32 = MRI.createVirtualRegister({SgprRB, Ty}); - buildReadFirstLane(B, NewSGPR32, Op.getReg(), RBI); - Op.setReg(NewSGPR32); + Register NewSGPR = MRI.createVirtualRegister({SgprRB, Ty}); + buildReadFirstLane(B, NewSGPR, Op.getReg(), RBI); + Op.setReg(NewSGPR); break; } // sgpr and vgpr scalars with extend diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 93ffa0d9a54c..5d628d91ade8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1492,6 +1492,12 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForIOpcs({amdgcn_s_ttracedata}).Any({{}, {{}, {IntrId, SgprB32_M0}}}); + addRulesForIOpcs({amdgcn_s_sleep_var}) + .Any({{}, {{}, {IntrId, SgprB32_ReadFirstLane}}}); + + addRulesForIOpcs({amdgcn_s_prefetch_data}) + .Any({{}, {{}, {IntrId, SgprB64_ReadFirstLane, SgprB32_ReadFirstLane}}}); + // This is "intrinsic lane mask" it was set to i32/i64 in llvm-ir. addRulesForIOpcs({amdgcn_end_cf}) .Any({{_, UniS32}, {{}, {IntrId, Sgpr32}}}) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 4bd037d97a9b..d8a88b0ee558 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -243,6 +243,7 @@ enum RegBankLLTMappingApplyID { // Src only modifiers: operand must be SGPR, if in VGPR, insert readfirstlane // to move to SGPR. SgprB32_ReadFirstLane, + SgprB64_ReadFirstLane, // Src only modifiers: extends Sgpr32AExt, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.prefetch.data.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.prefetch.data.ll index a8bf9ef350fe..26d64aa1cdf4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.prefetch.data.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.prefetch.data.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,SDAG %s -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,GISEL %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GCN,GISEL %s define amdgpu_ps void @prefetch_data_sgpr_base_sgpr_len(ptr addrspace(4) inreg %ptr, i32 inreg %len) { ; GCN-LABEL: prefetch_data_sgpr_base_sgpr_len: @@ -123,6 +123,27 @@ entry: ret void } +define amdgpu_ps void @prefetch_data_vgpr_base_vgpr_len(ptr addrspace(4) %ptr, i32 %len) { +; SDAG-LABEL: prefetch_data_vgpr_base_vgpr_len: +; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: v_readfirstlane_b32 s2, v2 +; SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; SDAG-NEXT: s_prefetch_data s[0:1], 0x0, s2, 0 +; SDAG-NEXT: s_endpgm +; +; GISEL-LABEL: prefetch_data_vgpr_base_vgpr_len: +; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GISEL-NEXT: s_prefetch_data s[0:1], 0x0, s2, 0 +; GISEL-NEXT: s_endpgm +entry: + tail call void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) %ptr, i32 %len) + ret void +} + declare void @llvm.amdgcn.s.prefetch.data.p4(ptr addrspace(4) %ptr, i32 %len) declare void @llvm.amdgcn.s.prefetch.data.p1(ptr addrspace(1) %ptr, i32 %len) declare void @llvm.amdgcn.s.prefetch.data.p0(ptr %ptr, i32 %len) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll index 11c2df97cbb8..b5beb16111a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 -new-reg-bank-select < %s | FileCheck -check-prefixes=GCN %s declare void @llvm.amdgcn.s.sleep.var(i32)