[AMDGPU] Move InferAddressSpacesPass to middle end optimization pipeline (#138604)
It will run twice in the non-LTO pipeline with `O1` or higher. In LTO post link pipeline, it will be run once with `O2` or higher, since inline and SROA don't run in `O1`.
This commit is contained in:
parent
1651aa2943
commit
84a69a0f8f
@ -811,6 +811,22 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
|
||||
#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
|
||||
#include "llvm/Passes/TargetPassRegistry.inc"
|
||||
|
||||
PB.registerScalarOptimizerLateEPCallback(
|
||||
[](FunctionPassManager &FPM, OptimizationLevel Level) {
|
||||
if (Level == OptimizationLevel::O0)
|
||||
return;
|
||||
|
||||
FPM.addPass(InferAddressSpacesPass());
|
||||
});
|
||||
|
||||
PB.registerVectorizerEndEPCallback(
|
||||
[](FunctionPassManager &FPM, OptimizationLevel Level) {
|
||||
if (Level == OptimizationLevel::O0)
|
||||
return;
|
||||
|
||||
FPM.addPass(InferAddressSpacesPass());
|
||||
});
|
||||
|
||||
PB.registerPipelineEarlySimplificationEPCallback(
|
||||
[](ModulePassManager &PM, OptimizationLevel Level,
|
||||
ThinOrFullLTOPhase Phase) {
|
||||
@ -908,6 +924,12 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
|
||||
if (EnableLowerModuleLDS)
|
||||
PM.addPass(AMDGPULowerModuleLDSPass(*this));
|
||||
if (Level != OptimizationLevel::O0) {
|
||||
// We only want to run this with O2 or higher since inliner and SROA
|
||||
// don't run in O1.
|
||||
if (Level != OptimizationLevel::O1) {
|
||||
PM.addPass(
|
||||
createModuleToFunctionPassAdaptor(InferAddressSpacesPass()));
|
||||
}
|
||||
// Do we really need internalization in LTO?
|
||||
if (InternalizeSymbols) {
|
||||
PM.addPass(InternalizePass(mustPreserveGV));
|
||||
@ -1265,9 +1287,6 @@ void AMDGPUPassConfig::addIRPasses() {
|
||||
addPass(createAMDGPULowerModuleLDSLegacyPass(&TM));
|
||||
}
|
||||
|
||||
if (TM.getOptLevel() > CodeGenOptLevel::None)
|
||||
addPass(createInferAddressSpacesPass());
|
||||
|
||||
// Run atomic optimizer before Atomic Expand
|
||||
if ((TM.getTargetTriple().isAMDGCN()) &&
|
||||
(TM.getOptLevel() >= CodeGenOptLevel::Less) &&
|
||||
@ -2010,9 +2029,6 @@ void AMDGPUCodeGenPassBuilder::addIRPasses(AddIRPass &addPass) const {
|
||||
if (EnableLowerModuleLDS)
|
||||
addPass(AMDGPULowerModuleLDSPass(TM));
|
||||
|
||||
if (TM.getOptLevel() > CodeGenOptLevel::None)
|
||||
addPass(InferAddressSpacesPass());
|
||||
|
||||
// Run atomic optimizer before Atomic Expand
|
||||
if (TM.getOptLevel() >= CodeGenOptLevel::Less &&
|
||||
(AMDGPUAtomicOptimizerStrategy != ScanOptions::None))
|
||||
|
@ -136,30 +136,35 @@ define void @constrained_if_register_class() {
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; CHECK-NEXT: s_cbranch_scc0 .LBB4_2
|
||||
; CHECK-NEXT: .LBB4_1: ; %bb12
|
||||
; CHECK-NEXT: ; %bb.1: ; %bb12
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
; CHECK-NEXT: .LBB4_2: ; %bb2
|
||||
; CHECK-NEXT: s_getpc_b64 s[4:5]
|
||||
; CHECK-NEXT: s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
|
||||
; CHECK-NEXT: s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 0
|
||||
; CHECK-NEXT: s_mov_b32 s6, -1
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: global_load_dword v0, v0, s[4:5]
|
||||
; CHECK-NEXT: s_mov_b32 s4, -1
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB4_4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s4
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s5
|
||||
; CHECK-NEXT: flat_load_dword v0, v[0:1]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, 1.0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
|
||||
; CHECK-NEXT: ; %bb.3: ; %bb7
|
||||
; CHECK-NEXT: s_mov_b32 s4, 0
|
||||
; CHECK-NEXT: .LBB4_4: ; %bb8
|
||||
; CHECK-NEXT: s_cmp_lg_u32 s4, 0
|
||||
; CHECK-NEXT: s_cbranch_scc1 .LBB4_1
|
||||
; CHECK-NEXT: s_mov_b32 s6, 0
|
||||
; CHECK-NEXT: ; %bb.4: ; %bb8
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], s6, 0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[6:7]
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB4_6
|
||||
; CHECK-NEXT: ; %bb.5: ; %bb11
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, 4.0
|
||||
; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen
|
||||
; CHECK-NEXT: .LBB4_6: ; %Flow
|
||||
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_setpc_b64 s[30:31]
|
||||
bb:
|
||||
|
@ -361,7 +361,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.37(0x40000000), %bb.36(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec :: (load (s8) from %ir.i21, addrspace 1)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_SBYTE renamable $vgpr40_vgpr41, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i23)
|
||||
; GFX90A-NEXT: renamable $vgpr42 = V_ADD_CO_U32_e32 1024, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr38_sgpr39 = S_MOV_B64 -1
|
||||
@ -407,7 +407,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.39(0x40000000), %bb.38(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr44_sgpr45, $sgpr42_sgpr43, $sgpr54_sgpr55, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr50_sgpr51, $sgpr66_sgpr67
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec :: (load (s8) from %ir.i28, addrspace 1)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i30)
|
||||
; GFX90A-NEXT: renamable $vgpr44 = V_ADD_CO_U32_e32 2048, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr40_sgpr41 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = COPY renamable $sgpr36_sgpr37
|
||||
@ -460,7 +460,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.41(0x40000000), %bb.40(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr17, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr46_sgpr47:0x000000000000000F, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr18_sgpr19, $sgpr44_sgpr45, $sgpr50_sgpr51, $sgpr58_sgpr59, $sgpr60_sgpr61, $sgpr54_sgpr55, $sgpr62_sgpr63, $sgpr52_sgpr53, $sgpr64_sgpr65, $sgpr66_sgpr67
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec :: (load (s8) from %ir.i35, addrspace 1)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr40_vgpr41, 3072, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i37)
|
||||
; GFX90A-NEXT: renamable $vgpr56 = V_ADD_CO_U32_e32 3072, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37
|
||||
@ -512,7 +512,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $vgpr58 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
|
||||
; GFX90A-NEXT: renamable $vgpr59, dead renamable $sgpr18_sgpr19 = V_ADDC_U32_e64 0, $vgpr41, killed $sgpr18_sgpr19, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec :: (load (s8) from %ir.i42, addrspace 1)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE renamable $vgpr58_vgpr59, 0, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i44)
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr44_sgpr45 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = COPY renamable $sgpr36_sgpr37
|
||||
@ -610,7 +610,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: renamable $sgpr18_sgpr19 = COPY $vcc
|
||||
; GFX90A-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 4096, $vgpr40, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr1, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr41, killed $vcc, 0, implicit $exec
|
||||
; GFX90A-NEXT: renamable $vgpr0 = GLOBAL_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec :: (load (s8) from %ir.i49, addrspace 1)
|
||||
; GFX90A-NEXT: renamable $vgpr0 = FLAT_LOAD_UBYTE killed renamable $vgpr0_vgpr1, 1024, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i51)
|
||||
; GFX90A-NEXT: renamable $sgpr48_sgpr49 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr60_sgpr61 = S_MOV_B64 -1
|
||||
; GFX90A-NEXT: renamable $sgpr62_sgpr63 = COPY renamable $sgpr36_sgpr37
|
||||
@ -726,7 +726,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
|
||||
; GFX90A-NEXT: successors: %bb.52(0x40000000), %bb.55(0x40000000)
|
||||
; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr18, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr24_sgpr25, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47:0x000000000000000F, $sgpr48_sgpr49, $sgpr64_sgpr65, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x0000000000000003, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr54_sgpr55
|
||||
; GFX90A-NEXT: {{ $}}
|
||||
; GFX90A-NEXT: renamable $vgpr6 = GLOBAL_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec :: (load (s8) from %ir.i74, addrspace 1)
|
||||
; GFX90A-NEXT: renamable $vgpr6 = FLAT_LOAD_UBYTE renamable $vgpr0_vgpr1, 2048, 0, implicit $exec, implicit $flat_scr :: (load (s8) from %ir.i76)
|
||||
; GFX90A-NEXT: renamable $vgpr4 = V_ADD_CO_U32_e32 2048, $vgpr0, implicit-def $vcc, implicit $exec
|
||||
; GFX90A-NEXT: renamable $sgpr50_sgpr51 = S_MOV_B64 0
|
||||
; GFX90A-NEXT: renamable $sgpr56_sgpr57 = S_MOV_B64 -1
|
||||
|
@ -4,19 +4,49 @@
|
||||
define protected amdgpu_kernel void @IllegalGEPConst(i32 %a, ptr addrspace(1) %b, double %c) {
|
||||
; CHECK-LABEL: IllegalGEPConst:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; CHECK-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
|
||||
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: s_mov_b32 s14, -1
|
||||
; CHECK-NEXT: s_mov_b32 s15, 0xe00000
|
||||
; CHECK-NEXT: s_add_u32 s12, s12, s11
|
||||
; CHECK-NEXT: s_addc_u32 s13, s13, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_ashr_i32 s7, s6, 31
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s3
|
||||
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s2
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, s3
|
||||
; CHECK-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] offset:-8
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_lshl_b64 s[6:7], s[6:7], 3
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s6
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, s7
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, -8
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, -1
|
||||
; CHECK-NEXT: s_cmp_eq_u32 s1, s5
|
||||
; CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0
|
||||
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], -1
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB0_3
|
||||
; CHECK-NEXT: ; %bb.1: ; %Flow
|
||||
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[4:5]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB0_4
|
||||
; CHECK-NEXT: .LBB0_2: ; %atomicrmw.phi
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: .LBB0_3: ; %atomicrmw.global
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_wbinvl1_vol
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB0_2
|
||||
; CHECK-NEXT: .LBB0_4: ; %atomicrmw.private
|
||||
; CHECK-NEXT: s_cmp_lg_u64 s[0:1], 0
|
||||
; CHECK-NEXT: s_cselect_b32 s0, s0, -1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CHECK-NEXT: buffer_load_dword v0, v2, s[12:15], 0 offen
|
||||
; CHECK-NEXT: buffer_load_dword v1, v2, s[12:15], 0 offen offset:4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3]
|
||||
; CHECK-NEXT: buffer_store_dword v0, v2, s[12:15], 0 offen
|
||||
; CHECK-NEXT: buffer_store_dword v1, v2, s[12:15], 0 offen offset:4
|
||||
; CHECK-NEXT: s_endpgm
|
||||
entry:
|
||||
%i = add nsw i32 %a, -1
|
||||
|
@ -166,7 +166,6 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory(pt
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -352,7 +351,6 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -544,7 +542,6 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -730,7 +727,6 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -911,7 +907,6 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1097,7 +1092,6 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1290,7 +1284,6 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1476,7 +1469,6 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1678,7 +1670,6 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote(ptr addrspace(1) %pt
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -1881,7 +1872,6 @@ define float @global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denor
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -2078,7 +2068,6 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_deno
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -2264,7 +2253,6 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -2437,7 +2425,6 @@ define float @global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -2640,7 +2627,6 @@ define float @global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode(ptr a
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32_amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -2837,7 +2823,6 @@ define void @global_agent_atomic_fadd_noret_f32_maybe_remote(ptr addrspace(1) %p
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32_maybe_remote:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -3019,7 +3004,6 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory(
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -3175,7 +3159,6 @@ define void @global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32___amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -3371,7 +3354,6 @@ define void @global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode(ptr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32_amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -3572,7 +3554,6 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory(ptr addr
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -3766,7 +3747,6 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory(ptr add
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -3966,7 +3946,6 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -4160,7 +4139,6 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -4344,7 +4322,6 @@ define float @global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -4524,7 +4501,6 @@ define void @global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__amdgpu_no_remote_memory__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -4698,7 +4674,6 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -4870,7 +4845,6 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fi
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -5048,7 +5022,6 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -5208,7 +5181,6 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -5363,7 +5335,6 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -5523,7 +5494,6 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_f
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_neg__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -5702,7 +5672,6 @@ define float @global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_f
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_ret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -5862,7 +5831,6 @@ define void @global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_noret_f32__offset12b_pos__ftz__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -6034,7 +6002,6 @@ define float @global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_f
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -6191,7 +6158,6 @@ define void @global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__offset12b_pos__ieee__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -6392,7 +6358,6 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory(ptr
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -6586,7 +6551,6 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory(pt
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -6756,7 +6720,6 @@ define float @global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memo
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -6910,7 +6873,6 @@ define void @global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_mem
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f32__ftz__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -7129,7 +7091,6 @@ define double @global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f64__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -7351,7 +7312,6 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -7578,7 +7538,6 @@ define double @global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -7791,7 +7750,6 @@ define void @global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f64__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -7997,7 +7955,6 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -8208,7 +8165,6 @@ define void @global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f64__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -8629,7 +8585,6 @@ define half @global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory(ptr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v5, v2
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -9064,7 +9019,6 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v6, v4
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -9503,7 +9457,6 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_gra
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v6, v4
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -9915,7 +9868,6 @@ define void @global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory(p
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -10334,7 +10286,6 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -10756,7 +10707,6 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_g
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -11089,7 +11039,6 @@ define half @global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v4
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_f16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -11401,7 +11350,6 @@ define void @global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_n
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_f16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -11832,7 +11780,6 @@ define half @global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_gr
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v6, v4
|
||||
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_ret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -12259,7 +12206,6 @@ define void @global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_noret_f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -12762,7 +12708,6 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v5, v2
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_bf16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -13275,7 +13220,6 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v6, v4
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -13792,7 +13736,6 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v6, v4
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -14278,7 +14221,6 @@ define void @global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory(
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_bf16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -14775,7 +14717,6 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -15275,7 +15216,6 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -15693,7 +15633,6 @@ define bfloat @global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v4
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_bf16__offset12b_pos__align4__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -16090,7 +16029,6 @@ define void @global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_bf16__offset12b__align4_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -16599,7 +16537,6 @@ define bfloat @global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX7-NEXT: v_lshrrev_b32_e32 v0, v6, v4
|
||||
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_ret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -17104,7 +17041,6 @@ define void @global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_noret_bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -17333,7 +17269,6 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -17565,7 +17500,6 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -17800,7 +17734,6 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -18014,7 +17947,6 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -18225,7 +18157,6 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -18441,7 +18372,6 @@ define void @global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2f16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -18679,7 +18609,6 @@ define <2 x half> @global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_ret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -18896,7 +18825,6 @@ define void @global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_noret_v2f16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -19140,7 +19068,6 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory(p
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -19377,7 +19304,6 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory(ptr a
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -19606,7 +19532,6 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_me
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -19817,7 +19742,6 @@ define void @global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2f16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -20060,7 +19984,6 @@ define <2 x half> @global_agent_atomic_fadd_ret_v2f16__maybe_remote(ptr addrspac
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2f16__maybe_remote:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -20297,7 +20220,6 @@ define void @global_agent_atomic_fadd_noret_v2f16__maybe_remote(ptr addrspace(1)
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2f16__maybe_remote:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -20682,7 +20604,6 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -21062,7 +20983,6 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -21445,7 +21365,6 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -21817,7 +21736,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -22186,7 +22104,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fin
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -22560,7 +22477,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fin
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2bf16__offset12b_neg__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -22946,7 +22862,6 @@ define <2 x bfloat> @global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_ret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -23321,7 +23236,6 @@ define void @global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fi
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_system_atomic_fadd_noret_v2bf16__offset12b_pos__amdgpu_no_fine_grained_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -23699,7 +23613,6 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memor
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -24068,7 +23981,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory(ptr
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -24445,7 +24357,6 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -24814,7 +24725,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memor
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2bf16__amdgpu_no_fine_grained_memory__amdgpu_no_remote_memory:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -25191,7 +25101,6 @@ define <2 x bfloat> @global_agent_atomic_fadd_ret_v2bf16__maybe_remote(ptr addrs
|
||||
; GFX7-NEXT: v_mov_b32_e32 v0, v3
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v2
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_ret_v2bf16__maybe_remote:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -25560,7 +25469,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__maybe_remote(ptr addrspace(1
|
||||
; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end
|
||||
; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
|
||||
; GFX7-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; GFX6-LABEL: global_agent_atomic_fadd_noret_v2bf16__maybe_remote:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
@ -25609,255 +25517,6 @@ define void @global_agent_atomic_fadd_noret_v2bf16__maybe_remote(ptr addrspace(1
|
||||
ret void
|
||||
}
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; misc
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) #1 {
|
||||
; GFX12-LABEL: infer_as_before_atomic:
|
||||
; GFX12: ; %bb.0:
|
||||
; GFX12-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX12-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX12-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX12-NEXT: v_cmpx_eq_u32_e32 0, v0
|
||||
; GFX12-NEXT: s_cbranch_execz .LBB92_2
|
||||
; GFX12-NEXT: ; %bb.1:
|
||||
; GFX12-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX12-NEXT: s_bcnt1_i32_b32 s0, s0
|
||||
; GFX12-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX12-NEXT: s_wait_alu 0xfffe
|
||||
; GFX12-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
|
||||
; GFX12-NEXT: s_wait_kmcnt 0x0
|
||||
; GFX12-NEXT: global_atomic_add_f32 v0, v1, s[2:3] scope:SCOPE_DEV
|
||||
; GFX12-NEXT: .LBB92_2:
|
||||
; GFX12-NEXT: s_endpgm
|
||||
;
|
||||
; GFX942-LABEL: infer_as_before_atomic:
|
||||
; GFX942: ; %bb.0:
|
||||
; GFX942-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX942-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX942-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
|
||||
; GFX942-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX942-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX942-NEXT: s_cbranch_execz .LBB92_2
|
||||
; GFX942-NEXT: ; %bb.1:
|
||||
; GFX942-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
|
||||
; GFX942-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
|
||||
; GFX942-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX942-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; GFX942-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX942-NEXT: global_atomic_add_f32 v0, v1, s[2:3]
|
||||
; GFX942-NEXT: .LBB92_2:
|
||||
; GFX942-NEXT: s_endpgm
|
||||
;
|
||||
; GFX11-LABEL: infer_as_before_atomic:
|
||||
; GFX11: ; %bb.0:
|
||||
; GFX11-NEXT: s_mov_b32 s0, exec_lo
|
||||
; GFX11-NEXT: s_mov_b32 s1, exec_lo
|
||||
; GFX11-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
|
||||
; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0
|
||||
; GFX11-NEXT: s_cbranch_execz .LBB92_2
|
||||
; GFX11-NEXT: ; %bb.1:
|
||||
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x0
|
||||
; GFX11-NEXT: s_bcnt1_i32_b32 s0, s0
|
||||
; GFX11-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
|
||||
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX11-NEXT: global_atomic_add_f32 v0, v1, s[2:3]
|
||||
; GFX11-NEXT: .LBB92_2:
|
||||
; GFX11-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: infer_as_before_atomic:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_mov_b32 s3, exec_lo
|
||||
; GFX10-NEXT: s_mov_b32 s2, 0
|
||||
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v0, s3, 0
|
||||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
|
||||
; GFX10-NEXT: s_and_saveexec_b32 s0, vcc_lo
|
||||
; GFX10-NEXT: s_cbranch_execz .LBB92_3
|
||||
; GFX10-NEXT: ; %bb.1:
|
||||
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX10-NEXT: s_bcnt1_i32_b32 s3, s3
|
||||
; GFX10-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v2, s3
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: s_load_dword s4, s[0:1], 0x0
|
||||
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, s4
|
||||
; GFX10-NEXT: .LBB92_2: ; %atomicrmw.start
|
||||
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX10-NEXT: v_add_f32_e32 v0, v1, v2
|
||||
; GFX10-NEXT: global_atomic_cmpswap v0, v3, v[0:1], s[0:1] glc
|
||||
; GFX10-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v0, v1
|
||||
; GFX10-NEXT: v_mov_b32_e32 v1, v0
|
||||
; GFX10-NEXT: s_or_b32 s2, vcc_lo, s2
|
||||
; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
|
||||
; GFX10-NEXT: s_cbranch_execnz .LBB92_2
|
||||
; GFX10-NEXT: .LBB92_3:
|
||||
; GFX10-NEXT: s_endpgm
|
||||
;
|
||||
; GFX90A-LABEL: infer_as_before_atomic:
|
||||
; GFX90A: ; %bb.0:
|
||||
; GFX90A-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX90A-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX90A-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
|
||||
; GFX90A-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX90A-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX90A-NEXT: s_cbranch_execz .LBB92_2
|
||||
; GFX90A-NEXT: ; %bb.1:
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
|
||||
; GFX90A-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
|
||||
; GFX90A-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX90A-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX90A-NEXT: global_atomic_add_f32 v0, v1, s[2:3]
|
||||
; GFX90A-NEXT: .LBB92_2:
|
||||
; GFX90A-NEXT: s_endpgm
|
||||
;
|
||||
; GFX908-LABEL: infer_as_before_atomic:
|
||||
; GFX908: ; %bb.0:
|
||||
; GFX908-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX908-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX908-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
|
||||
; GFX908-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX908-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX908-NEXT: s_cbranch_execz .LBB92_2
|
||||
; GFX908-NEXT: ; %bb.1:
|
||||
; GFX908-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
|
||||
; GFX908-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
|
||||
; GFX908-NEXT: v_mov_b32_e32 v0, 0
|
||||
; GFX908-NEXT: v_cvt_f32_ubyte0_e32 v1, s0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX908-NEXT: global_atomic_add_f32 v0, v1, s[2:3]
|
||||
; GFX908-NEXT: .LBB92_2:
|
||||
; GFX908-NEXT: s_endpgm
|
||||
;
|
||||
; GFX8-LABEL: infer_as_before_atomic:
|
||||
; GFX8: ; %bb.0:
|
||||
; GFX8-NEXT: s_mov_b64 s[0:1], exec
|
||||
; GFX8-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; GFX8-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
|
||||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX8-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; GFX8-NEXT: s_cbranch_execz .LBB92_3
|
||||
; GFX8-NEXT: ; %bb.1:
|
||||
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x0
|
||||
; GFX8-NEXT: s_bcnt1_i32_b64 s5, s[0:1]
|
||||
; GFX8-NEXT: s_mov_b64 s[0:1], 0
|
||||
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v4, s5
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: s_load_dword s4, s[2:3], 0x0
|
||||
; GFX8-NEXT: v_mov_b32_e32 v0, s2
|
||||
; GFX8-NEXT: v_mov_b32_e32 v1, s3
|
||||
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, s4
|
||||
; GFX8-NEXT: .LBB92_2: ; %atomicrmw.start
|
||||
; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX8-NEXT: v_add_f32_e32 v2, v3, v4
|
||||
; GFX8-NEXT: flat_atomic_cmpswap v2, v[0:1], v[2:3] glc
|
||||
; GFX8-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, v2, v3
|
||||
; GFX8-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
|
||||
; GFX8-NEXT: v_mov_b32_e32 v3, v2
|
||||
; GFX8-NEXT: s_andn2_b64 exec, exec, s[0:1]
|
||||
; GFX8-NEXT: s_cbranch_execnz .LBB92_2
|
||||
; GFX8-NEXT: .LBB92_3:
|
||||
; GFX8-NEXT: s_endpgm
|
||||
;
|
||||
; GFX7-LABEL: infer_as_before_atomic:
|
||||
; GFX7: ; %bb.0:
|
||||
; GFX7-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s2, 0
|
||||
; GFX7-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s3, v0
|
||||
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX7-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX7-NEXT: s_cbranch_execz .LBB92_3
|
||||
; GFX7-NEXT: ; %bb.1:
|
||||
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX7-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX7-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX7-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX7-NEXT: v_cvt_f32_ubyte0_e32 v2, s2
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
|
||||
; GFX7-NEXT: s_mov_b32 s2, -1
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: s_load_dword s6, s[0:1], 0x0
|
||||
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GFX7-NEXT: .LBB92_2: ; %atomicrmw.start
|
||||
; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX7-NEXT: v_add_f32_e32 v0, v1, v2
|
||||
; GFX7-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GFX7-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX7-NEXT: buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc
|
||||
; GFX7-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
|
||||
; GFX7-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX7-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX7-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX7-NEXT: s_cbranch_execnz .LBB92_2
|
||||
; GFX7-NEXT: .LBB92_3:
|
||||
; GFX7-NEXT: s_endpgm
|
||||
;
|
||||
; GFX6-LABEL: infer_as_before_atomic:
|
||||
; GFX6: ; %bb.0:
|
||||
; GFX6-NEXT: s_mov_b64 s[2:3], exec
|
||||
; GFX6-NEXT: v_mbcnt_lo_u32_b32_e64 v0, s2, 0
|
||||
; GFX6-NEXT: v_mbcnt_hi_u32_b32_e32 v0, s3, v0
|
||||
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; GFX6-NEXT: s_and_saveexec_b64 s[0:1], vcc
|
||||
; GFX6-NEXT: s_cbranch_execz .LBB92_3
|
||||
; GFX6-NEXT: ; %bb.1:
|
||||
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
|
||||
; GFX6-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
|
||||
; GFX6-NEXT: s_mov_b64 s[4:5], 0
|
||||
; GFX6-NEXT: s_mov_b32 s3, 0xf000
|
||||
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, s2
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
|
||||
; GFX6-NEXT: s_mov_b32 s2, -1
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: s_load_dword s6, s[0:1], 0x0
|
||||
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, s6
|
||||
; GFX6-NEXT: .LBB92_2: ; %atomicrmw.start
|
||||
; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1
|
||||
; GFX6-NEXT: v_add_f32_e32 v0, v1, v2
|
||||
; GFX6-NEXT: s_waitcnt expcnt(0)
|
||||
; GFX6-NEXT: v_mov_b32_e32 v4, v1
|
||||
; GFX6-NEXT: v_mov_b32_e32 v3, v0
|
||||
; GFX6-NEXT: buffer_atomic_cmpswap v[3:4], off, s[0:3], 0 glc
|
||||
; GFX6-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v3, v1
|
||||
; GFX6-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
|
||||
; GFX6-NEXT: v_mov_b32_e32 v1, v3
|
||||
; GFX6-NEXT: s_andn2_b64 exec, exec, s[4:5]
|
||||
; GFX6-NEXT: s_cbranch_execnz .LBB92_2
|
||||
; GFX6-NEXT: .LBB92_3:
|
||||
; GFX6-NEXT: s_endpgm
|
||||
%load = load ptr, ptr addrspace(4) %arg
|
||||
%v = atomicrmw fadd ptr %load, float 1.0 syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
|
||||
|
||||
|
@ -31,28 +31,21 @@ entry:
|
||||
define protected amdgpu_kernel void @InferFadd(i32 %a, ptr addrspace(1) %b, double %c) {
|
||||
; CHECK-LABEL: InferFadd:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], exec
|
||||
; CHECK-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; CHECK-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB1_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_load_dword s2, s[4:5], 0x24
|
||||
; CHECK-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x2c
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
|
||||
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_ashr_i32 s3, s2, 31
|
||||
; CHECK-NEXT: s_lshl_b64 s[2:3], s[2:3], 3
|
||||
; CHECK-NEXT: s_add_u32 s2, s8, s2
|
||||
; CHECK-NEXT: s_addc_u32 s3, s9, s3
|
||||
; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
|
||||
; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
|
||||
; CHECK-NEXT: v_mul_f64 v[0:1], s[10:11], v[0:1]
|
||||
; CHECK-NEXT: global_atomic_add_f64 v2, v[0:1], s[2:3] offset:-8
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_ashr_i32 s7, s6, 31
|
||||
; CHECK-NEXT: v_mov_b32_e32 v0, s2
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s3
|
||||
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s2
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, s3
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s1
|
||||
; CHECK-NEXT: v_add_co_u32_e64 v2, vcc, -8, s0
|
||||
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, -1, v3, vcc
|
||||
; CHECK-NEXT: flat_atomic_add_f64 v[2:3], v[0:1]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_wbinvl1_vol
|
||||
; CHECK-NEXT: .LBB1_2:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
entry:
|
||||
%i = add nsw i32 %a, -1
|
||||
@ -66,35 +59,25 @@ entry:
|
||||
define protected amdgpu_kernel void @InferMixed(i32 %a, ptr addrspace(1) %b, double %c, ptr %d) {
|
||||
; CHECK-LABEL: InferMixed:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
|
||||
; CHECK-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x3c
|
||||
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
|
||||
; CHECK-NEXT: s_mov_b64 s[6:7], exec
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_ashr_i32 s7, s6, 31
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[8:9], s[8:9] op_sel:[0,1]
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; CHECK-NEXT: s_lshl_b64 s[2:3], s[6:7], 3
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s2
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, s3
|
||||
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_wbinvl1_vol
|
||||
; CHECK-NEXT: v_mbcnt_lo_u32_b32 v0, s6, 0
|
||||
; CHECK-NEXT: v_mbcnt_hi_u32_b32 v0, s7, v0
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB2_2
|
||||
; CHECK-NEXT: ; %bb.1:
|
||||
; CHECK-NEXT: s_load_dword s4, s[4:5], 0x24
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_ashr_i32 s5, s4, 31
|
||||
; CHECK-NEXT: s_lshl_b64 s[4:5], s[4:5], 3
|
||||
; CHECK-NEXT: s_add_u32 s0, s0, s4
|
||||
; CHECK-NEXT: s_addc_u32 s1, s1, s5
|
||||
; CHECK-NEXT: s_bcnt1_i32_b64 s4, s[6:7]
|
||||
; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], s4
|
||||
; CHECK-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
|
||||
; CHECK-NEXT: global_atomic_add_f64 v2, v[0:1], s[0:1] offset:-7
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, s1
|
||||
; CHECK-NEXT: v_add_co_u32_e64 v0, vcc, -7, s0
|
||||
; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
|
||||
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_wbinvl1_vol
|
||||
; CHECK-NEXT: .LBB2_2:
|
||||
; CHECK-NEXT: s_endpgm
|
||||
entry:
|
||||
%i = add nsw i32 %a, -1
|
||||
@ -115,8 +98,14 @@ bb1: ; preds = %entry
|
||||
define protected amdgpu_kernel void @InferPHI(i32 %a, ptr addrspace(1) %b, double %c) {
|
||||
; CHECK-LABEL: InferPHI:
|
||||
; CHECK: ; %bb.0: ; %entry
|
||||
; CHECK-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
|
||||
; CHECK-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
|
||||
; CHECK-NEXT: s_load_dword s6, s[4:5], 0x24
|
||||
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
|
||||
; CHECK-NEXT: s_mov_b32 s14, -1
|
||||
; CHECK-NEXT: s_mov_b32 s15, 0xe00000
|
||||
; CHECK-NEXT: s_add_u32 s12, s12, s11
|
||||
; CHECK-NEXT: s_addc_u32 s13, s13, 0
|
||||
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; CHECK-NEXT: s_ashr_i32 s7, s6, 31
|
||||
; CHECK-NEXT: s_lshl_b64 s[4:5], s[6:7], 3
|
||||
@ -133,21 +122,34 @@ define protected amdgpu_kernel void @InferPHI(i32 %a, ptr addrspace(1) %b, doubl
|
||||
; CHECK-NEXT: s_and_b64 vcc, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB3_1
|
||||
; CHECK-NEXT: ; %bb.2: ; %bb1
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], exec
|
||||
; CHECK-NEXT: v_mbcnt_lo_u32_b32 v0, s0, 0
|
||||
; CHECK-NEXT: v_mbcnt_hi_u32_b32 v0, s1, v0
|
||||
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
|
||||
; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc
|
||||
; CHECK-NEXT: s_cbranch_execz .LBB3_4
|
||||
; CHECK-NEXT: ; %bb.3:
|
||||
; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1]
|
||||
; CHECK-NEXT: v_cvt_f64_u32_e32 v[0:1], s0
|
||||
; CHECK-NEXT: v_mul_f64 v[0:1], s[2:3], v[0:1]
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: global_atomic_add_f64 v2, v[0:1], s[4:5]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], src_private_base
|
||||
; CHECK-NEXT: s_cmp_eq_u32 s5, s1
|
||||
; CHECK-NEXT: s_cselect_b64 s[0:1], -1, 0
|
||||
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[0:1]
|
||||
; CHECK-NEXT: s_mov_b64 s[0:1], -1
|
||||
; CHECK-NEXT: s_cbranch_vccnz .LBB3_5
|
||||
; CHECK-NEXT: ; %bb.3: ; %Flow
|
||||
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[0:1]
|
||||
; CHECK-NEXT: s_cbranch_vccz .LBB3_6
|
||||
; CHECK-NEXT: .LBB3_4: ; %atomicrmw.phi
|
||||
; CHECK-NEXT: s_endpgm
|
||||
; CHECK-NEXT: .LBB3_5: ; %atomicrmw.global
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[0:1], s[4:5], s[4:5] op_sel:[0,1]
|
||||
; CHECK-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
|
||||
; CHECK-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: buffer_wbinvl1_vol
|
||||
; CHECK-NEXT: .LBB3_4:
|
||||
; CHECK-NEXT: s_cbranch_execnz .LBB3_4
|
||||
; CHECK-NEXT: .LBB3_6: ; %atomicrmw.private
|
||||
; CHECK-NEXT: s_cmp_lg_u64 s[4:5], 0
|
||||
; CHECK-NEXT: s_cselect_b32 s0, s4, -1
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, s0
|
||||
; CHECK-NEXT: buffer_load_dword v0, v2, s[12:15], 0 offen
|
||||
; CHECK-NEXT: buffer_load_dword v1, v2, s[12:15], 0 offen offset:4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: v_add_f64 v[0:1], v[0:1], s[2:3]
|
||||
; CHECK-NEXT: buffer_store_dword v0, v2, s[12:15], 0 offen
|
||||
; CHECK-NEXT: buffer_store_dword v1, v2, s[12:15], 0 offen offset:4
|
||||
; CHECK-NEXT: s_endpgm
|
||||
entry:
|
||||
%i = add nsw i32 %a, -1
|
||||
|
@ -188,7 +188,6 @@
|
||||
; GCN-O1-NEXT: AMDGPU Software lowering of LDS
|
||||
; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions
|
||||
; GCN-O1-NEXT: FunctionPass Manager
|
||||
; GCN-O1-NEXT: Infer address spaces
|
||||
; GCN-O1-NEXT: Dominator Tree Construction
|
||||
; GCN-O1-NEXT: Cycle Info Analysis
|
||||
; GCN-O1-NEXT: Uniformity Analysis
|
||||
@ -471,7 +470,6 @@
|
||||
; GCN-O1-OPTS-NEXT: AMDGPU Software lowering of LDS
|
||||
; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions
|
||||
; GCN-O1-OPTS-NEXT: FunctionPass Manager
|
||||
; GCN-O1-OPTS-NEXT: Infer address spaces
|
||||
; GCN-O1-OPTS-NEXT: Dominator Tree Construction
|
||||
; GCN-O1-OPTS-NEXT: Cycle Info Analysis
|
||||
; GCN-O1-OPTS-NEXT: Uniformity Analysis
|
||||
@ -784,7 +782,6 @@
|
||||
; GCN-O2-NEXT: AMDGPU Software lowering of LDS
|
||||
; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions
|
||||
; GCN-O2-NEXT: FunctionPass Manager
|
||||
; GCN-O2-NEXT: Infer address spaces
|
||||
; GCN-O2-NEXT: Dominator Tree Construction
|
||||
; GCN-O2-NEXT: Cycle Info Analysis
|
||||
; GCN-O2-NEXT: Uniformity Analysis
|
||||
@ -1101,7 +1098,6 @@
|
||||
; GCN-O3-NEXT: AMDGPU Software lowering of LDS
|
||||
; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions
|
||||
; GCN-O3-NEXT: FunctionPass Manager
|
||||
; GCN-O3-NEXT: Infer address spaces
|
||||
; GCN-O3-NEXT: Dominator Tree Construction
|
||||
; GCN-O3-NEXT: Cycle Info Analysis
|
||||
; GCN-O3-NEXT: Uniformity Analysis
|
||||
|
@ -28,28 +28,24 @@ define amdgpu_kernel void @test_simple_indirect_call() {
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_add_u32 flat_scratch_lo, s12, s17
|
||||
; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
|
||||
; GFX9-NEXT: s_mov_b32 s13, s15
|
||||
; GFX9-NEXT: s_mov_b32 s12, s14
|
||||
; GFX9-NEXT: s_load_dwordx2 s[14:15], s[4:5], 0x4
|
||||
; GFX9-NEXT: s_add_u32 s0, s0, s17
|
||||
; GFX9-NEXT: s_addc_u32 s1, s1, 0
|
||||
; GFX9-NEXT: s_mov_b32 s32, 0
|
||||
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
|
||||
; GFX9-NEXT: s_lshr_b32 s14, s14, 16
|
||||
; GFX9-NEXT: s_mul_i32 s14, s14, s15
|
||||
; GFX9-NEXT: v_mul_lo_u32 v3, s14, v0
|
||||
; GFX9-NEXT: s_getpc_b64 s[18:19]
|
||||
; GFX9-NEXT: s_add_u32 s18, s18, indirect@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s19, s19, indirect@rel32@hi+12
|
||||
; GFX9-NEXT: s_mov_b32 s14, s16
|
||||
; GFX9-NEXT: v_mad_u32_u24 v3, v1, s15, v3
|
||||
; GFX9-NEXT: v_add_lshl_u32 v5, v3, v2, 3
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, s18
|
||||
; GFX9-NEXT: s_mov_b32 s13, s15
|
||||
; GFX9-NEXT: s_mov_b32 s12, s14
|
||||
; GFX9-NEXT: s_mov_b64 s[14:15], src_private_base
|
||||
; GFX9-NEXT: v_mov_b32_e32 v5, s18
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 20, v2
|
||||
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 10, v1
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, s19
|
||||
; GFX9-NEXT: v_mov_b32_e32 v3, 0
|
||||
; GFX9-NEXT: v_mov_b32_e32 v4, s15
|
||||
; GFX9-NEXT: v_mov_b32_e32 v6, s19
|
||||
; GFX9-NEXT: v_or3_b32 v31, v0, v1, v2
|
||||
; GFX9-NEXT: ds_write_b64 v5, v[3:4]
|
||||
; GFX9-NEXT: s_mov_b32 s14, s16
|
||||
; GFX9-NEXT: s_movk_i32 s32, 0x400
|
||||
; GFX9-NEXT: flat_store_dwordx2 v[3:4], v[5:6]
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
%fptr = alloca ptr, addrspace(5)
|
||||
|
@ -20,9 +20,11 @@ define protected amdgpu_kernel void @foo(ptr addrspace(1) %arg, ptr addrspace(1)
|
||||
; CHECK-NEXT: v_mov_b32_e32 v1, 0
|
||||
; CHECK-NEXT: s_mov_b32 s32, 0
|
||||
; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19]
|
||||
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], 0
|
||||
; CHECK-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:4
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: s_mov_b64 s[4:5], src_private_base
|
||||
; CHECK-NEXT: v_mov_b32_e32 v2, 0
|
||||
; CHECK-NEXT: v_mov_b32_e32 v3, s5
|
||||
; CHECK-NEXT: flat_load_dwordx2 v[2:3], v[2:3]
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
||||
; CHECK-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
|
||||
; CHECK-NEXT: s_endpgm
|
||||
bb:
|
||||
|
@ -23,20 +23,19 @@ define void @test() {
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET1:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @const.ptr, target-flags(amdgpu-gotprel32-hi) @const.ptr, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET1]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM killed [[S_LOAD_DWORDX2_IMM1]], 0, 0 :: (invariant load (s64) from @const.ptr, addrspace 4)
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR killed [[S_LOAD_DWORDX2_IMM2]], killed [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load (s32) from %ir.0, addrspace 1)
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1092616192
|
||||
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY [[GLOBAL_LOAD_DWORD_SADDR]]
|
||||
; CHECK-NEXT: nofpexcept S_CMP_LT_F32 killed [[COPY]], killed [[S_MOV_B32_2]], implicit-def $scc, implicit $mode
|
||||
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit $scc
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM2:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[S_LOAD_DWORDX2_IMM1]], 0, 0 :: (invariant load (s64) from @const.ptr, addrspace 4)
|
||||
; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM2]]
|
||||
; CHECK-NEXT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %ir.ptr)
|
||||
; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 1065353216
|
||||
; CHECK-NEXT: [[V_CMP_NLT_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_NLT_F32_e64 0, killed [[FLAT_LOAD_DWORD]], 0, killed [[S_MOV_B32_1]], 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1092616192, implicit $exec
|
||||
; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_NLT_F32_e64_]], %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.2
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.2.bb2:
|
||||
; CHECK-NEXT: successors: %bb.4(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.4
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.3.Flow1:
|
||||
@ -45,12 +44,13 @@ define void @test() {
|
||||
; CHECK-NEXT: S_BRANCH %bb.7
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.4.bb3:
|
||||
; CHECK-NEXT: successors: %bb.5(0x50000000), %bb.6(0x30000000)
|
||||
; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:sgpr_32 = PHI [[S_MOV_B32_1]], %bb.1, [[S_MOV_B32_3]], %bb.2
|
||||
; CHECK-NEXT: [[S_MOV_B32_4:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: nofpexcept S_CMP_NEQ_F32 [[PHI]], killed [[S_MOV_B32_4]], implicit-def $scc, implicit $mode
|
||||
; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit $scc
|
||||
; CHECK-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[V_MOV_B32_e32_]], %bb.1, [[V_MOV_B32_e32_1]], %bb.2
|
||||
; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
|
||||
; CHECK-NEXT: [[V_CMP_EQ_F32_e64_:%[0-9]+]]:sreg_32 = nofpexcept V_CMP_EQ_F32_e64 0, [[PHI]], 0, killed [[S_MOV_B32_2]], 0, implicit $mode, implicit $exec
|
||||
; CHECK-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_EQ_F32_e64_]], %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.5
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.5.bb4:
|
||||
@ -58,13 +58,14 @@ define void @test() {
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: [[SI_PC_ADD_REL_OFFSET2:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant2, target-flags(amdgpu-gotprel32-hi) @external_constant2, implicit-def dead $scc
|
||||
; CHECK-NEXT: [[S_LOAD_DWORDX2_IMM3:%[0-9]+]]:sreg_64_xexec_xnull = S_LOAD_DWORDX2_IMM killed [[SI_PC_ADD_REL_OFFSET2]], 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_1]], killed [[V_MOV_B32_e32_2]], killed [[S_LOAD_DWORDX2_IMM3]], 0, 0, implicit $exec :: (store (s32) into @external_constant2, addrspace 1)
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1082130432, implicit $exec
|
||||
; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR killed [[V_MOV_B32_e32_2]], killed [[V_MOV_B32_e32_3]], killed [[S_LOAD_DWORDX2_IMM3]], 0, 0, implicit $exec :: (store (s32) into @external_constant2, addrspace 1)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.6.Flow:
|
||||
; CHECK-NEXT: successors: %bb.3(0x80000000)
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: SI_END_CF [[SI_IF1]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
; CHECK-NEXT: S_BRANCH %bb.3
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: bb.7.bb5:
|
||||
|
@ -0,0 +1,23 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=infer-address-spaces < %s | FileCheck %s
|
||||
|
||||
define amdgpu_kernel void @infer_as_before_atomic(ptr addrspace(4) %arg) #0 {
|
||||
; CHECK-LABEL: define amdgpu_kernel void @infer_as_before_atomic(
|
||||
; CHECK-SAME: ptr addrspace(4) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG]], align 8
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[LOAD]] to ptr addrspace(1)
|
||||
; CHECK-NEXT: [[V:%.*]] = atomicrmw fadd ptr addrspace(1) [[TMP1]], float 1.000000e+00 syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory [[META0:![0-9]+]], !amdgpu.ignore.denormal.mode [[META0]]
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%load = load ptr, ptr addrspace(4) %arg
|
||||
%v = atomicrmw fadd ptr %load, float 1.0 syncscope("agent-one-as") monotonic, align 4, !amdgpu.no.fine.grained.memory !0, !amdgpu.ignore.denormal.mode !0
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
|
||||
|
||||
!0 = !{}
|
||||
|
||||
;.
|
||||
; CHECK: [[META0]] = !{}
|
||||
;.
|
@ -0,0 +1,60 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
||||
; RUN: opt -S -O1 -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=INFER %s
|
||||
; RUN: opt -S -O2 -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=INFER %s
|
||||
; RUN: opt -S -O3 -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=INFER %s
|
||||
; RUN: opt -S -passes='lto<O1>' -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=NO-INFER %s
|
||||
; RUN: opt -S -passes='lto<O2>' -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=INFER %s
|
||||
; RUN: opt -S -passes='lto<O3>' -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck --check-prefix=INFER %s
|
||||
|
||||
%struct.data = type { ptr, i32 }
|
||||
|
||||
declare void @external_use(i32)
|
||||
|
||||
define internal void @callee(ptr %ptr_as0, i32 %val) {
|
||||
; NO-INFER-LABEL: define internal void @callee(
|
||||
; NO-INFER-SAME: ptr [[PTR_AS0:%.*]], i32 [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; NO-INFER-NEXT: [[LOADED:%.*]] = load i32, ptr [[PTR_AS0]], align 4
|
||||
; NO-INFER-NEXT: [[COMPUTED:%.*]] = add i32 [[LOADED]], [[VAL]]
|
||||
; NO-INFER-NEXT: store i32 [[COMPUTED]], ptr [[PTR_AS0]], align 4
|
||||
; NO-INFER-NEXT: call void @external_use(i32 [[COMPUTED]])
|
||||
; NO-INFER-NEXT: ret void
|
||||
;
|
||||
%loaded = load i32, ptr %ptr_as0, align 4
|
||||
%computed = add i32 %loaded, %val
|
||||
store i32 %computed, ptr %ptr_as0, align 4
|
||||
call void @external_use(i32 %computed)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @caller(ptr addrspace(1) %ptr_as1, i32 %value) {
|
||||
; INFER-LABEL: define void @caller(
|
||||
; INFER-SAME: ptr addrspace(1) captures(none) [[PTR_AS1:%.*]], i32 [[VALUE:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
|
||||
; INFER-NEXT: [[LOADED_I:%.*]] = load i32, ptr addrspace(1) [[PTR_AS1]], align 4
|
||||
; INFER-NEXT: [[COMPUTED_I:%.*]] = add i32 [[LOADED_I]], [[VALUE]]
|
||||
; INFER-NEXT: store i32 [[COMPUTED_I]], ptr addrspace(1) [[PTR_AS1]], align 4
|
||||
; INFER-NEXT: tail call void @external_use(i32 [[COMPUTED_I]])
|
||||
; INFER-NEXT: ret void
|
||||
;
|
||||
; NO-INFER-LABEL: define void @caller(
|
||||
; NO-INFER-SAME: ptr addrspace(1) [[PTR_AS1:%.*]], i32 [[VALUE:%.*]]) #[[ATTR1]] {
|
||||
; NO-INFER-NEXT: [[DATA:%.*]] = alloca [[STRUCT_DATA:%.*]], align 8, addrspace(5)
|
||||
; NO-INFER-NEXT: [[VAL_FIELD:%.*]] = getelementptr [[STRUCT_DATA]], ptr addrspace(5) [[DATA]], i32 0, i32 1
|
||||
; NO-INFER-NEXT: store i32 [[VALUE]], ptr addrspace(5) [[VAL_FIELD]], align 4
|
||||
; NO-INFER-NEXT: [[GENERIC_INPUT:%.*]] = addrspacecast ptr addrspace(1) [[PTR_AS1]] to ptr
|
||||
; NO-INFER-NEXT: store ptr [[GENERIC_INPUT]], ptr addrspace(5) [[DATA]], align 8
|
||||
; NO-INFER-NEXT: [[RETRIEVED_PTR:%.*]] = load ptr, ptr addrspace(5) [[DATA]], align 8
|
||||
; NO-INFER-NEXT: [[RETRIEVED_VAL:%.*]] = load i32, ptr addrspace(5) [[VAL_FIELD]], align 4
|
||||
; NO-INFER-NEXT: call void @callee(ptr [[RETRIEVED_PTR]], i32 [[RETRIEVED_VAL]])
|
||||
; NO-INFER-NEXT: ret void
|
||||
;
|
||||
%data = alloca %struct.data, align 8, addrspace(5)
|
||||
%ptr_field = getelementptr %struct.data, ptr addrspace(5) %data, i32 0, i32 0
|
||||
%val_field = getelementptr %struct.data, ptr addrspace(5) %data, i32 0, i32 1
|
||||
store i32 %value, ptr addrspace(5) %val_field, align 4
|
||||
%generic_input = addrspacecast ptr addrspace(1) %ptr_as1 to ptr
|
||||
store ptr %generic_input, ptr addrspace(5) %ptr_field, align 8
|
||||
%retrieved_ptr = load ptr, ptr addrspace(5) %ptr_field, align 8
|
||||
%retrieved_val = load i32, ptr addrspace(5) %val_field, align 4
|
||||
call void @callee(ptr %retrieved_ptr, i32 %retrieved_val)
|
||||
ret void
|
||||
}
|
2
llvm/test/Transforms/PhaseOrdering/AMDGPU/lit.local.cfg
Normal file
2
llvm/test/Transforms/PhaseOrdering/AMDGPU/lit.local.cfg
Normal file
@ -0,0 +1,2 @@
|
||||
if not "AMDGPU" in config.root.targets:
|
||||
config.unsupported = True
|
Loading…
x
Reference in New Issue
Block a user