[AMDGPU][GlobalIsel] Add register bank legalization rules for amdgcn atomic fminmax num (#184564)

This patch adds register bank legalization rules for amdgcn global/flat
atomic fmin/fmax num operations in the AMDGPU GlobalISel pipeline.
This commit is contained in:
Syadus Sefat 2026-03-10 14:53:32 -05:00 committed by GitHub
parent ab048ac6c0
commit b50cf35d57
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 221 additions and 56 deletions

View File

@ -1505,6 +1505,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
.Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
addRulesForIOpcs(
{amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
.Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
Standard)
.Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
.Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});

View File

@ -1,53 +1,168 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --version 6
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GCN,GFX12,GFX12-SDAG
; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GCN,GFX12,GFX12-GISEL
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefixes=GCN,GFX1250,GFX1250-SDAG
; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefixes=GCN,GFX1250,GFX1250-GISEL
declare float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
declare float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
define amdgpu_cs void @flat_atomic_fmin_num_f32_noret(ptr %ptr, float %data) {
; GFX12-LABEL: flat_atomic_fmin_num_f32_noret:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2
; GFX12-NEXT: s_endpgm
; GCN-LABEL: flat_atomic_fmin_num_f32_noret:
; GCN: ; %bb.0:
; GCN: flat_atomic_min_num_f32 v[0:1], v2
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
ret void
}
define amdgpu_cs void @flat_atomic_fmax_num_f32_noret(ptr %ptr, float %data) {
; GFX12-LABEL: flat_atomic_fmax_num_f32_noret:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2
; GFX12-NEXT: s_endpgm
; GCN-LABEL: flat_atomic_fmax_num_f32_noret:
; GCN: ; %bb.0:
; GCN: flat_atomic_max_num_f32 v[0:1], v2
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
ret void
}
define amdgpu_cs float @flat_atomic_fmin_num_f32_rtn(ptr %ptr, float %data, ptr %out) {
define amdgpu_cs float @flat_atomic_fmin_num_f32_rtn(float %data, ptr %ptr) {
; GFX12-LABEL: flat_atomic_fmin_num_f32_rtn:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[3:4], v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12: ; %bb.0:
; GFX12: flat_atomic_min_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
; GFX12: s_wait_loadcnt_dscnt 0x0
; GFX12: ; return to shader part epilog
;
; GFX1250-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG: flat_atomic_min_num_f32 v0, v[2:3], v0 th:TH_ATOMIC_RETURN
; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG: ; return to shader part epilog
;
; GFX1250-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL: flat_atomic_min_num_f32 v0, v[4:5], v0 th:TH_ATOMIC_RETURN
; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
store float %ret, ptr %out
ret float %ret
}
define amdgpu_cs float @flat_atomic_fmax_num_f32_rtn(ptr %ptr, float %data, ptr %out) {
define amdgpu_cs float @flat_atomic_fmax_num_f32_rtn(float %data, ptr %ptr) {
; GFX12-LABEL: flat_atomic_fmax_num_f32_rtn:
; GFX12: ; %bb.0:
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-NEXT: flat_store_b32 v[3:4], v0
; GFX12-NEXT: s_wait_dscnt 0x0
; GFX12-NEXT: ; return to shader part epilog
; GFX12: ; %bb.0:
; GFX12: flat_atomic_max_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
; GFX12: s_wait_loadcnt_dscnt 0x0
; GFX12: ; return to shader part epilog
;
; GFX1250-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn:
; GFX1250-SDAG: ; %bb.0:
; GFX1250-SDAG: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG: flat_atomic_max_num_f32 v0, v[2:3], v0 th:TH_ATOMIC_RETURN
; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
; GFX1250-SDAG: ; return to shader part epilog
;
; GFX1250-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn:
; GFX1250-GISEL: ; %bb.0:
; GFX1250-GISEL: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL: flat_atomic_max_num_f32 v0, v[4:5], v0 th:TH_ATOMIC_RETURN
; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
; GFX1250-GISEL: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
ret float %ret
}
define amdgpu_ps void @flat_atomic_fmin_num_f32_noret_saddr(ptr inreg %ptr, float %data) {
; GFX12-SDAG-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
; GFX12-SDAG: flat_atomic_min_num_f32 v[1:2], v0
; GFX12-SDAG: s_endpgm
;
; GFX12-GISEL-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX12-GISEL: flat_atomic_min_num_f32 v[1:2], v0
; GFX12-GISEL: s_endpgm
;
; GFX1250-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
; GFX1250: ; %bb.0:
; GFX1250: v_mov_b32_e32 v1, 0
; GFX1250: flat_atomic_min_num_f32 v1, v0, s[0:1]
; GFX1250: s_endpgm
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
ret void
}
define amdgpu_ps void @flat_atomic_fmax_num_f32_noret_saddr(ptr inreg %ptr, float %data) {
; GFX12-SDAG-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
; GFX12-SDAG: flat_atomic_max_num_f32 v[1:2], v0
; GFX12-SDAG: s_endpgm
;
; GFX12-GISEL-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX12-GISEL: flat_atomic_max_num_f32 v[1:2], v0
; GFX12-GISEL: s_endpgm
;
; GFX1250-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
; GFX1250: ; %bb.0:
; GFX1250: v_mov_b32_e32 v1, 0
; GFX1250: flat_atomic_max_num_f32 v1, v0, s[0:1]
; GFX1250: s_endpgm
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
ret void
}
define amdgpu_ps float @flat_atomic_fmin_num_f32_rtn_saddr(ptr inreg %ptr, float %data) {
; GFX12-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
; GFX12-SDAG: flat_atomic_min_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
; GFX12-SDAG: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX12-GISEL: flat_atomic_min_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
; GFX12-GISEL: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL: ; return to shader part epilog
;
; GFX1250-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
; GFX1250: ; %bb.0:
; GFX1250: v_mov_b32_e32 v1, 0
; GFX1250: flat_atomic_min_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250: s_wait_loadcnt_dscnt 0x0
; GFX1250: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
ret float %ret
}
define amdgpu_ps float @flat_atomic_fmax_num_f32_rtn_saddr(ptr inreg %ptr, float %data) {
; GFX12-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
; GFX12-SDAG: ; %bb.0:
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
; GFX12-SDAG: flat_atomic_max_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
; GFX12-SDAG: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG: ; return to shader part epilog
;
; GFX12-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
; GFX12-GISEL: ; %bb.0:
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX12-GISEL: flat_atomic_max_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
; GFX12-GISEL: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL: ; return to shader part epilog
;
; GFX1250-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
; GFX1250: ; %bb.0:
; GFX1250: v_mov_b32_e32 v1, 0
; GFX1250: flat_atomic_max_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
; GFX1250: s_wait_loadcnt_dscnt 0x0
; GFX1250: ; return to shader part epilog
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
store float %ret, ptr %out
ret float %ret
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}

View File

@ -1,51 +1,93 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --version 6
; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GCN
declare float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
declare float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
define amdgpu_cs void @global_atomic_fmin_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
; GFX12-LABEL: global_atomic_fmin_num_f32_noret:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off
; GFX12-NEXT: s_endpgm
; GCN-LABEL: global_atomic_fmin_num_f32_noret:
; GCN: ; %bb.0:
; GCN: global_atomic_min_num_f32 v[0:1], v2, off
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}
define amdgpu_cs void @global_atomic_fmax_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
; GFX12-LABEL: global_atomic_fmax_num_f32_noret:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off
; GFX12-NEXT: s_endpgm
; GCN-LABEL: global_atomic_fmax_num_f32_noret:
; GCN: ; %bb.0:
; GCN: global_atomic_max_num_f32 v[0:1], v2, off
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}
define amdgpu_cs void @global_atomic_fmax_num_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) {
; GFX12-LABEL: global_atomic_fmax_num_f32_rtn:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v[3:4], v0, off
; GFX12-NEXT: s_endpgm
; GCN-LABEL: global_atomic_fmax_num_f32_rtn:
; GCN: ; %bb.0:
; GCN: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
; GCN: s_wait_loadcnt 0x0
; GCN: global_store_b32 v[3:4], v0, off
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
store float %ret, ptr addrspace(1) %out
ret void
}
define amdgpu_cs void @global_atomic_fmin_num_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) {
; GFX12-LABEL: global_atomic_fmin_num_f32_rtn:
; GFX12: ; %bb.0:
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: global_store_b32 v[3:4], v0, off
; GFX12-NEXT: s_endpgm
; GCN-LABEL: global_atomic_fmin_num_f32_rtn:
; GCN: ; %bb.0:
; GCN: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
; GCN: s_wait_loadcnt 0x0
; GCN: global_store_b32 v[3:4], v0, off
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
store float %ret, ptr addrspace(1) %out
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX12-GISEL: {{.*}}
; GFX12-SDAG: {{.*}}
define amdgpu_ps void @global_atomic_fmin_num_f32_noret_saddr(ptr addrspace(1) inreg %ptr, float %data) {
; GCN-LABEL: global_atomic_fmin_num_f32_noret_saddr:
; GCN: ; %bb.0:
; GCN: v_mov_b32_e32 v1, 0
; GCN: global_atomic_min_num_f32 v1, v0, s[0:1]
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}
define amdgpu_ps void @global_atomic_fmax_num_f32_noret_saddr(ptr addrspace(1) inreg %ptr, float %data) {
; GCN-LABEL: global_atomic_fmax_num_f32_noret_saddr:
; GCN: ; %bb.0:
; GCN: v_mov_b32_e32 v1, 0
; GCN: global_atomic_max_num_f32 v1, v0, s[0:1]
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
ret void
}
define amdgpu_ps void @global_atomic_fmin_num_f32_rtn_saddr(ptr addrspace(1) inreg %ptr, float %data, ptr addrspace(1) %out) {
; GCN-LABEL: global_atomic_fmin_num_f32_rtn_saddr:
; GCN: ; %bb.0:
; GCN: v_mov_b32_e32 v3, 0
; GCN: global_atomic_min_num_f32 v0, v3, v0, s[0:1] th:TH_ATOMIC_RETURN
; GCN: s_wait_loadcnt 0x0
; GCN: global_store_b32 v[1:2], v0, off
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
store float %ret, ptr addrspace(1) %out
ret void
}
define amdgpu_ps void @global_atomic_fmax_num_f32_rtn_saddr(ptr addrspace(1) inreg %ptr, float %data, ptr addrspace(1) %out) {
; GCN-LABEL: global_atomic_fmax_num_f32_rtn_saddr:
; GCN: ; %bb.0:
; GCN: v_mov_b32_e32 v3, 0
; GCN: global_atomic_max_num_f32 v0, v3, v0, s[0:1] th:TH_ATOMIC_RETURN
; GCN: s_wait_loadcnt 0x0
; GCN: global_store_b32 v[1:2], v0, off
; GCN: s_endpgm
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
store float %ret, ptr addrspace(1) %out
ret void
}