diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 7a681dff6f89..4fe1442f2786 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -1188,6 +1188,7 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) { switch (ID) { case SgprB32: case VgprB32: + case SgprB32_M0: case UniInVgprB32: if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) || isAnyPtr(Ty, 32)) @@ -1640,6 +1641,16 @@ bool RegBankLegalizeHelper::applyMappingSrc( } break; } + case SgprB32_M0: { + assert(Ty == getBTyFromID(MethodIDs[i], Ty)); + if (RB == SgprRB) + break; + assert(RB == VgprRB); + Register NewSGPR32 = MRI.createVirtualRegister({SgprRB, Ty}); + buildReadFirstLane(B, NewSGPR32, Op.getReg(), RBI); + Op.setReg(NewSGPR32); + break; + } // sgpr and vgpr scalars with extend case Sgpr32AExt: { // Note: this ext allows S1, and it is meant to be combined away. diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 58ffc32148a1..f3264536006d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1430,6 +1430,16 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}}) .Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}}); + addRulesForIOpcs({amdgcn_exp}) + .Any({{_, _, _, S32, S32, S32, S32}, + {{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}}); + + addRulesForIOpcs({amdgcn_exp_row}) + .Any({{_, _, _, S32, S32, S32, S32, _, S32}, + {{}, + {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32, Imm, + SgprB32_M0}}}); + addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard) .Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 2a3a9411379c..c5634c53270a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -226,6 +226,10 @@ enum RegBankLLTMappingApplyID { SgprP0Call_WF, SgprP4Call_WF, + // Src only modifiers: for operands that must end up in M0. If divergent, + // readfirstlane to SGPR. The result can then be copied to M0 in ISel. + SgprB32_M0, + // Src only modifiers: extends Sgpr32AExt, Sgpr32AExtBoolInReg, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir index 313b0c5b6707..7d2a3c1c5f6f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir @@ -1,18 +1,26 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s --- | - define void @exp_s() { - call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 1.0, float 1.0, float 1.0, float 1.0, i1 0, i1 0) + define amdgpu_ps void @exp_s(float inreg %v0, float inreg %v1, float inreg %v2, float inreg %v3) { + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v0, float %v1, float %v2, float %v3, i1 false, i1 false) ret void } - define void @exp_v() { - call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 1.0, float 1.0, float 1.0, float 1.0, i1 0, i1 0) + define amdgpu_ps void @exp_v(float %v0, float %v1, float %v2, float %v3) { + call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v0, float %v1, float %v2, float %v3, i1 false, i1 false) + ret void + } + define amdgpu_ps void @exp_row_s(i32 inreg %row, float inreg %val) { + call void @llvm.amdgcn.exp.row.f32(i32 12, i32 1, float %val, float %val, float %val, float %val, i1 true, i32 %row) + ret void + } + define amdgpu_ps void @exp_row_v(float %val, i32 %row) { + call void @llvm.amdgcn.exp.row.f32(i32 12, i32 1, float %val, float %val, float %val, float %val, i1 true, i32 %row) ret void } declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) + declare void @llvm.amdgcn.exp.row.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i32) ... --- @@ -33,12 +41,12 @@ body: | ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32) - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0 %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = COPY $sgpr2 %3:_(s32) = COPY $sgpr3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %0(s32), %1(s32), %2(s32), %3(s32), 0, 0 ... --- name: exp_v @@ -54,10 +62,49 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3 - ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0 + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(s32) = COPY $vgpr3 - G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %0(s32), %1(s32), %2(s32), %3(s32), 0, 0 +... +--- +name: exp_row_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: exp_row_s + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), -1, [[COPY]](s32) + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, %1(s32), %1(s32), %1(s32), %1(s32), -1, %0(s32) +... +--- +name: exp_row_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: exp_row_v + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) + ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), -1, [[INTRINSIC_CONVERGENT]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, %0(s32), %0(s32), %0(s32), %0(s32), -1, %1(s32) ... diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll index d2a29a94f5d0..fd4919480d8a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -1,7 +1,11 @@ -; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 @@ -554,7 +558,7 @@ end: ; GFX8-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s1 ; GFX8-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s0 ; GFX8-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]] -; GFX8-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]] +; GFX8-DAG: v_{{sub|subrev}}_f32_e{{32|64}} [[Z1:v[0-9]+]] ; GFX8: {{exp|export}} param0, [[Y]], [[X]], [[Z0]], [[W0]]{{$}} ; GFX8-NEXT: {{exp|export}} param1, [[Y]], [[X]], [[Z1]], [[W1]] done{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll index 0451f7de5bf7..88c6383f8b5b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11 -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11 +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12 -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12 +; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12 declare void @llvm.amdgcn.exp.row.i32(i32, i32, i32, i32, i32, i32, i1, i32) declare void @llvm.amdgcn.exp.row.f32(i32, i32, float, float, float, float, i1, i32)