[AMDGPU][GlobalIsel] Add register bank legalization rules for amdgcn atomic fminmax num (#184564)
This patch adds register bank legalization rules for amdgcn global/flat atomic fmin/fmax num operations in the AMDGPU GlobalISel pipeline.
This commit is contained in:
parent
ab048ac6c0
commit
b50cf35d57
@ -1505,6 +1505,14 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
|
||||
addRulesForIOpcs({amdgcn_global_atomic_ordered_add_b64})
|
||||
.Any({{DivS64}, {{Vgpr64}, {IntrId, VgprP1, Vgpr64}}});
|
||||
|
||||
addRulesForIOpcs(
|
||||
{amdgcn_global_atomic_fmin_num, amdgcn_global_atomic_fmax_num}, Standard)
|
||||
.Div(S32, {{Vgpr32}, {IntrId, VgprP1, Vgpr32}});
|
||||
|
||||
addRulesForIOpcs({amdgcn_flat_atomic_fmin_num, amdgcn_flat_atomic_fmax_num},
|
||||
Standard)
|
||||
.Div(S32, {{Vgpr32}, {IntrId, VgprP0, Vgpr32}});
|
||||
|
||||
addRulesForIOpcs({amdgcn_raw_buffer_load_lds})
|
||||
.Any({{_}, {{}, {IntrId, SgprV4S32, SgprP3, Imm, Vgpr32, Sgpr32}}});
|
||||
|
||||
|
||||
@ -1,53 +1,168 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
|
||||
; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --version 6
|
||||
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GCN,GFX12,GFX12-SDAG
|
||||
; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GCN,GFX12,GFX12-GISEL
|
||||
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefixes=GCN,GFX1250,GFX1250-SDAG
|
||||
; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1250 | FileCheck %s -check-prefixes=GCN,GFX1250,GFX1250-GISEL
|
||||
|
||||
declare float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
declare float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
|
||||
define amdgpu_cs void @flat_atomic_fmin_num_f32_noret(ptr %ptr, float %data) {
|
||||
; GFX12-LABEL: flat_atomic_fmin_num_f32_noret:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: flat_atomic_min_num_f32 v[0:1], v2
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GCN-LABEL: flat_atomic_fmin_num_f32_noret:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: flat_atomic_min_num_f32 v[0:1], v2
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @flat_atomic_fmax_num_f32_noret(ptr %ptr, float %data) {
|
||||
; GFX12-LABEL: flat_atomic_fmax_num_f32_noret:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: flat_atomic_max_num_f32 v[0:1], v2
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GCN-LABEL: flat_atomic_fmax_num_f32_noret:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: flat_atomic_max_num_f32 v[0:1], v2
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs float @flat_atomic_fmin_num_f32_rtn(ptr %ptr, float %data, ptr %out) {
|
||||
define amdgpu_cs float @flat_atomic_fmin_num_f32_rtn(float %data, ptr %ptr) {
|
||||
; GFX12-LABEL: flat_atomic_fmin_num_f32_rtn:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: flat_atomic_min_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: flat_store_b32 v[3:4], v0
|
||||
; GFX12-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12: flat_atomic_min_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX12: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG: flat_atomic_min_num_f32 v0, v[2:3], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-SDAG: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL: flat_atomic_min_num_f32 v0, v[4:5], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-GISEL: ; return to shader part epilog
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
store float %ret, ptr %out
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define amdgpu_cs float @flat_atomic_fmax_num_f32_rtn(ptr %ptr, float %data, ptr %out) {
|
||||
define amdgpu_cs float @flat_atomic_fmax_num_f32_rtn(float %data, ptr %ptr) {
|
||||
; GFX12-LABEL: flat_atomic_fmax_num_f32_rtn:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: flat_atomic_max_num_f32 v0, v[0:1], v2 th:TH_ATOMIC_RETURN
|
||||
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-NEXT: flat_store_b32 v[3:4], v0
|
||||
; GFX12-NEXT: s_wait_dscnt 0x0
|
||||
; GFX12-NEXT: ; return to shader part epilog
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12: flat_atomic_max_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX12: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn:
|
||||
; GFX1250-SDAG: ; %bb.0:
|
||||
; GFX1250-SDAG: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
|
||||
; GFX1250-SDAG: flat_atomic_max_num_f32 v0, v[2:3], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX1250-SDAG: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-SDAG: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn:
|
||||
; GFX1250-GISEL: ; %bb.0:
|
||||
; GFX1250-GISEL: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
|
||||
; GFX1250-GISEL: flat_atomic_max_num_f32 v0, v[4:5], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX1250-GISEL: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250-GISEL: ; return to shader part epilog
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define amdgpu_ps void @flat_atomic_fmin_num_f32_noret_saddr(ptr inreg %ptr, float %data) {
|
||||
; GFX12-SDAG-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
|
||||
; GFX12-SDAG: flat_atomic_min_num_f32 v[1:2], v0
|
||||
; GFX12-SDAG: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
|
||||
; GFX12-GISEL: flat_atomic_min_num_f32 v[1:2], v0
|
||||
; GFX12-GISEL: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_atomic_fmin_num_f32_noret_saddr:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250: v_mov_b32_e32 v1, 0
|
||||
; GFX1250: flat_atomic_min_num_f32 v1, v0, s[0:1]
|
||||
; GFX1250: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @flat_atomic_fmax_num_f32_noret_saddr(ptr inreg %ptr, float %data) {
|
||||
; GFX12-SDAG-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
|
||||
; GFX12-SDAG: flat_atomic_max_num_f32 v[1:2], v0
|
||||
; GFX12-SDAG: s_endpgm
|
||||
;
|
||||
; GFX12-GISEL-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
|
||||
; GFX12-GISEL: flat_atomic_max_num_f32 v[1:2], v0
|
||||
; GFX12-GISEL: s_endpgm
|
||||
;
|
||||
; GFX1250-LABEL: flat_atomic_fmax_num_f32_noret_saddr:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250: v_mov_b32_e32 v1, 0
|
||||
; GFX1250: flat_atomic_max_num_f32 v1, v0, s[0:1]
|
||||
; GFX1250: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps float @flat_atomic_fmin_num_f32_rtn_saddr(ptr inreg %ptr, float %data) {
|
||||
; GFX12-SDAG-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
|
||||
; GFX12-SDAG: flat_atomic_min_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX12-SDAG: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-SDAG: ; return to shader part epilog
|
||||
;
|
||||
; GFX12-GISEL-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
|
||||
; GFX12-GISEL: flat_atomic_min_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX12-GISEL: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-LABEL: flat_atomic_fmin_num_f32_rtn_saddr:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250: v_mov_b32_e32 v1, 0
|
||||
; GFX1250: flat_atomic_min_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
|
||||
; GFX1250: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250: ; return to shader part epilog
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define amdgpu_ps float @flat_atomic_fmax_num_f32_rtn_saddr(ptr inreg %ptr, float %data) {
|
||||
; GFX12-SDAG-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
|
||||
; GFX12-SDAG: ; %bb.0:
|
||||
; GFX12-SDAG: v_dual_mov_b32 v1, s0 :: v_dual_mov_b32 v2, s1
|
||||
; GFX12-SDAG: flat_atomic_max_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX12-SDAG: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-SDAG: ; return to shader part epilog
|
||||
;
|
||||
; GFX12-GISEL-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
|
||||
; GFX12-GISEL: ; %bb.0:
|
||||
; GFX12-GISEL: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
|
||||
; GFX12-GISEL: flat_atomic_max_num_f32 v0, v[1:2], v0 th:TH_ATOMIC_RETURN
|
||||
; GFX12-GISEL: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX12-GISEL: ; return to shader part epilog
|
||||
;
|
||||
; GFX1250-LABEL: flat_atomic_fmax_num_f32_rtn_saddr:
|
||||
; GFX1250: ; %bb.0:
|
||||
; GFX1250: v_mov_b32_e32 v1, 0
|
||||
; GFX1250: flat_atomic_max_num_f32 v0, v1, v0, s[0:1] th:TH_ATOMIC_RETURN
|
||||
; GFX1250: s_wait_loadcnt_dscnt 0x0
|
||||
; GFX1250: ; return to shader part epilog
|
||||
%ret = call float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
|
||||
store float %ret, ptr %out
|
||||
ret float %ret
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX12-GISEL: {{.*}}
|
||||
; GFX12-SDAG: {{.*}}
|
||||
|
||||
@ -1,51 +1,93 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
|
||||
; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "s_setreg_imm32_b32" --version 6
|
||||
; RUN: llc < %s -global-isel=1 -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1200 | FileCheck %s -check-prefixes=GCN
|
||||
|
||||
declare float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
declare float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
|
||||
define amdgpu_cs void @global_atomic_fmin_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
|
||||
; GFX12-LABEL: global_atomic_fmin_num_f32_noret:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: global_atomic_min_num_f32 v[0:1], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GCN-LABEL: global_atomic_fmin_num_f32_noret:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: global_atomic_min_num_f32 v[0:1], v2, off
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @global_atomic_fmax_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
|
||||
; GFX12-LABEL: global_atomic_fmax_num_f32_noret:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: global_atomic_max_num_f32 v[0:1], v2, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GCN-LABEL: global_atomic_fmax_num_f32_noret:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: global_atomic_max_num_f32 v[0:1], v2, off
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @global_atomic_fmax_num_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) {
|
||||
; GFX12-LABEL: global_atomic_fmax_num_f32_rtn:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v0, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GCN-LABEL: global_atomic_fmax_num_f32_rtn:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: global_atomic_max_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
|
||||
; GCN: s_wait_loadcnt 0x0
|
||||
; GCN: global_store_b32 v[3:4], v0, off
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
store float %ret, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_cs void @global_atomic_fmin_num_f32_rtn(ptr addrspace(1) %ptr, float %data, ptr addrspace(1) %out) {
|
||||
; GFX12-LABEL: global_atomic_fmin_num_f32_rtn:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
|
||||
; GFX12-NEXT: s_wait_loadcnt 0x0
|
||||
; GFX12-NEXT: global_store_b32 v[3:4], v0, off
|
||||
; GFX12-NEXT: s_endpgm
|
||||
; GCN-LABEL: global_atomic_fmin_num_f32_rtn:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: global_atomic_min_num_f32 v0, v[0:1], v2, off th:TH_ATOMIC_RETURN
|
||||
; GCN: s_wait_loadcnt 0x0
|
||||
; GCN: global_store_b32 v[3:4], v0, off
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
store float %ret, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
|
||||
; GFX12-GISEL: {{.*}}
|
||||
; GFX12-SDAG: {{.*}}
|
||||
|
||||
define amdgpu_ps void @global_atomic_fmin_num_f32_noret_saddr(ptr addrspace(1) inreg %ptr, float %data) {
|
||||
; GCN-LABEL: global_atomic_fmin_num_f32_noret_saddr:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: v_mov_b32_e32 v1, 0
|
||||
; GCN: global_atomic_min_num_f32 v1, v0, s[0:1]
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_atomic_fmax_num_f32_noret_saddr(ptr addrspace(1) inreg %ptr, float %data) {
|
||||
; GCN-LABEL: global_atomic_fmax_num_f32_noret_saddr:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: v_mov_b32_e32 v1, 0
|
||||
; GCN: global_atomic_max_num_f32 v1, v0, s[0:1]
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_atomic_fmin_num_f32_rtn_saddr(ptr addrspace(1) inreg %ptr, float %data, ptr addrspace(1) %out) {
|
||||
; GCN-LABEL: global_atomic_fmin_num_f32_rtn_saddr:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: v_mov_b32_e32 v3, 0
|
||||
; GCN: global_atomic_min_num_f32 v0, v3, v0, s[0:1] th:TH_ATOMIC_RETURN
|
||||
; GCN: s_wait_loadcnt 0x0
|
||||
; GCN: global_store_b32 v[1:2], v0, off
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
store float %ret, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_ps void @global_atomic_fmax_num_f32_rtn_saddr(ptr addrspace(1) inreg %ptr, float %data, ptr addrspace(1) %out) {
|
||||
; GCN-LABEL: global_atomic_fmax_num_f32_rtn_saddr:
|
||||
; GCN: ; %bb.0:
|
||||
; GCN: v_mov_b32_e32 v3, 0
|
||||
; GCN: global_atomic_max_num_f32 v0, v3, v0, s[0:1] th:TH_ATOMIC_RETURN
|
||||
; GCN: s_wait_loadcnt 0x0
|
||||
; GCN: global_store_b32 v[1:2], v0, off
|
||||
; GCN: s_endpgm
|
||||
%ret = call float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
|
||||
store float %ret, ptr addrspace(1) %out
|
||||
ret void
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user