AMDGPU/GlobalISel: RegBankLegalize rules for amdgcn_exp/exp_row (#181956)

This commit is contained in:
vangthao95 2026-03-06 14:03:14 -08:00 committed by GitHub
parent dc8de1083e
commit d34f17904b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 93 additions and 17 deletions

View File

@ -1188,6 +1188,7 @@ LLT RegBankLegalizeHelper::getBTyFromID(RegBankLLTMappingApplyID ID, LLT Ty) {
switch (ID) {
case SgprB32:
case VgprB32:
case SgprB32_M0:
case UniInVgprB32:
if (Ty == LLT::scalar(32) || Ty == LLT::fixed_vector(2, 16) ||
isAnyPtr(Ty, 32))
@ -1640,6 +1641,16 @@ bool RegBankLegalizeHelper::applyMappingSrc(
}
break;
}
case SgprB32_M0: {
assert(Ty == getBTyFromID(MethodIDs[i], Ty));
if (RB == SgprRB)
break;
assert(RB == VgprRB);
Register NewSGPR32 = MRI.createVirtualRegister({SgprRB, Ty});
buildReadFirstLane(B, NewSGPR32, Op.getReg(), RBI);
Op.setReg(NewSGPR32);
break;
}
// sgpr and vgpr scalars with extend
case Sgpr32AExt: {
// Note: this ext allows S1, and it is meant to be combined away.

View File

@ -1430,6 +1430,16 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Uni(S64, {{Sgpr64}, {IntrId, Vcc, Sgpr64}})
.Uni(S32, {{Sgpr32}, {IntrId, Vcc, Sgpr32}});
addRulesForIOpcs({amdgcn_exp})
.Any({{_, _, _, S32, S32, S32, S32},
{{}, {IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32}}});
addRulesForIOpcs({amdgcn_exp_row})
.Any({{_, _, _, S32, S32, S32, S32, _, S32},
{{},
{IntrId, Imm, Imm, Vgpr32, Vgpr32, Vgpr32, Vgpr32, Imm,
SgprB32_M0}}});
addRulesForIOpcs({amdgcn_mbcnt_lo, amdgcn_mbcnt_hi}, Standard)
.Div(S32, {{}, {Vgpr32, None, Vgpr32, Vgpr32}});

View File

@ -226,6 +226,10 @@ enum RegBankLLTMappingApplyID {
SgprP0Call_WF,
SgprP4Call_WF,
// Src only modifiers: for operands that must end up in M0. If divergent,
// readfirstlane to SGPR. The result can then be copied to M0 in ISel.
SgprB32_M0,
// Src only modifiers: extends
Sgpr32AExt,
Sgpr32AExtBoolInReg,

View File

@ -1,18 +1,26 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass='amdgpu-regbankselect,amdgpu-regbanklegalize' %s -o - | FileCheck %s
--- |
define void @exp_s() {
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 1.0, float 1.0, float 1.0, float 1.0, i1 0, i1 0)
define amdgpu_ps void @exp_s(float inreg %v0, float inreg %v1, float inreg %v2, float inreg %v3) {
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v0, float %v1, float %v2, float %v3, i1 false, i1 false)
ret void
}
define void @exp_v() {
call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 1.0, float 1.0, float 1.0, float 1.0, i1 0, i1 0)
define amdgpu_ps void @exp_v(float %v0, float %v1, float %v2, float %v3) {
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v0, float %v1, float %v2, float %v3, i1 false, i1 false)
ret void
}
define amdgpu_ps void @exp_row_s(i32 inreg %row, float inreg %val) {
call void @llvm.amdgcn.exp.row.f32(i32 12, i32 1, float %val, float %val, float %val, float %val, i1 true, i32 %row)
ret void
}
define amdgpu_ps void @exp_row_v(float %val, i32 %row) {
call void @llvm.amdgcn.exp.row.f32(i32 12, i32 1, float %val, float %val, float %val, float %val, i1 true, i32 %row)
ret void
}
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
declare void @llvm.amdgcn.exp.row.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i32)
...
---
@ -33,12 +41,12 @@ body: |
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), 0, 0
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = COPY $sgpr3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %0(s32), %1(s32), %2(s32), %3(s32), 0, 0
...
---
name: exp_v
@ -54,10 +62,49 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY $vgpr3
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 0, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), 0, 0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), 0, 0, %0, %1, %2, %3, 0, 0
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, %0(s32), %1(s32), %2(s32), %3(s32), 0, 0
...
---
name: exp_row_s
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1
; CHECK-LABEL: name: exp_row_s
; CHECK: liveins: $sgpr0, $sgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), -1, [[COPY]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, %1(s32), %1(s32), %1(s32), %1(s32), -1, %0(s32)
...
---
name: exp_row_v
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1
; CHECK-LABEL: name: exp_row_v
; CHECK: liveins: $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sgpr(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32)
; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), -1, [[INTRINSIC_CONVERGENT]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.row), 12, 1, %0(s32), %0(s32), %0(s32), %0(s32), -1, %1(s32)
...

View File

@ -1,7 +1,11 @@
; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX8,PREGFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
@ -554,7 +558,7 @@ end:
; GFX8-DAG: v_mov_b32_e32 [[X:v[0-9]+]], s1
; GFX8-DAG: v_mov_b32_e32 [[Y:v[0-9]+]], s0
; GFX8-DAG: v_add_f32_e{{32|64}} [[Z0:v[0-9]+]]
; GFX8-DAG: v_sub_f32_e{{32|64}} [[Z1:v[0-9]+]]
; GFX8-DAG: v_{{sub|subrev}}_f32_e{{32|64}} [[Z1:v[0-9]+]]
; GFX8: {{exp|export}} param0, [[Y]], [[X]], [[Z0]], [[W0]]{{$}}
; GFX8-NEXT: {{exp|export}} param1, [[Y]], [[X]], [[Z1]], [[W1]] done{{$}}

View File

@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s -check-prefixes=GFX11
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
; RUN: llc -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
declare void @llvm.amdgcn.exp.row.i32(i32, i32, i32, i32, i32, i32, i1, i32)
declare void @llvm.amdgcn.exp.row.f32(i32, i32, float, float, float, float, i1, i32)