llvm-project/llvm/test/CodeGen/AMDGPU/global_atomic_optimizer_fp_rtn.ll
Matt Arsenault 2502e3b7ba
IR: Promote "denormal-fp-math" to a first class attribute (#174293)
Convert "denormal-fp-math" and "denormal-fp-math-f32" into a first
class denormal_fpenv attribute. Previously the query for the effective
denormal mode involved two string attribute queries with parsing. I'm
introducing more uses of this, so it makes sense to convert this
to a more efficient encoding. The old representation was also awkward
since it was split across two separate attributes. The new encoding
just stores the default and float modes as bitfields, largely avoiding
the need to consider if the other mode is set.

The syntax in the common cases looks like this:
  `denormal_fpenv(preservesign,preservesign)`
  `denormal_fpenv(float: preservesign,preservesign)`
  `denormal_fpenv(dynamic,dynamic float: preservesign,preservesign)`

I wasn't sure about reusing the float type name instead of adding a
new keyword. It's parsed as a type but only accepts float. I'm also
debating switching the name to subnormal to match the current
preferred IEEE terminology (also used by nofpclass and other
contexts).

This has a behavior change when using the command flag debug
options to set the denormal mode. The behavior of the flag
ignored functions with an explicit attribute set, per
the default and f32 version. Now that these are one attribute,
the flag logic can't distinguish which of the two components
were explicitly set on the function. Only one test appeared to
rely on this behavior, so I just avoided using the flags in it.

This also does not perform all the code cleanups this enables.
In particular the attributor handling could be cleaned up.

I also guessed at how to support this in MLIR. I followed
MemoryEffects as a reference; it appears bitfields are expanded
into arguments to attributes, so the representation there is
a bit uglier with the 2 2-element fields flattened into 4 arguments.
2026-02-05 13:31:26 +00:00

1952 lines
151 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=iterative>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-ITERATIVE %s
; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -passes='amdgpu-atomic-optimizer<strategy=dpp>,verify<domtree>' %s | FileCheck --check-prefixes=IR,IR-DPP %s
; Tests various combinations of uniform/divergent address and uniform/divergent value inputs of various types for atomic operations.
; Optimization remains same for Iterative and DPP strategies when value in uniform. These different scan/reduction
; strategies are valid for only divergent values. This optimization is valid for divergent addresses. Test also covers different scopes.
define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
; IR-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR: 2:
; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
; IR-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR: 14:
; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
; IR-NEXT: br label [[TMP16]]
; IR: 16:
; IR-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]])
; IR-NEXT: [[TMP19:%.*]] = uitofp i32 [[TMP8]] to float
; IR-NEXT: [[TMP20:%.*]] = fmul float [[VAL]], [[TMP19]]
; IR-NEXT: [[TMP21:%.*]] = fadd float [[TMP18]], [[TMP20]]
; IR-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-NEXT: br label [[TMP23]]
; IR: 23:
; IR-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-NEXT: ret float [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]])
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd float [[TMP14]], [[TMP22:%.*]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]])
; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]])
; IR-ITERATIVE-NEXT: [[TMP23]] = fadd float [[ACCUMULATOR]], [[TMP21]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00)
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP11:%.*]] = fadd float [[TMP9]], [[TMP10]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP13:%.*]] = fadd float [[TMP11]], [[TMP12]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP15:%.*]] = fadd float [[TMP13]], [[TMP14]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP17:%.*]] = fadd float [[TMP15]], [[TMP16]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP19:%.*]] = fadd float [[TMP17]], [[TMP18]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP21:%.*]] = fadd float [[TMP19]], [[TMP20]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63)
; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]])
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]])
; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]])
; IR-DPP-NEXT: [[TMP32:%.*]] = fadd float [[TMP30]], [[TMP31]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret float [[TMP35]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP16]]
; IR-ITERATIVE: 16:
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-ITERATIVE-NEXT: br label [[TMP23]]
; IR-ITERATIVE: 23:
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP24]]
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-DPP: 14:
; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP16]]
; IR-DPP: 16:
; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-DPP-NEXT: br label [[TMP23]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-DPP-NEXT: ret float [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("one-as") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("one-as") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret float [[TMP35]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP16]]
; IR-ITERATIVE: 16:
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-ITERATIVE-NEXT: br label [[TMP23]]
; IR-ITERATIVE: 23:
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP24]]
;
; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-DPP: 14:
; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP16]]
; IR-DPP: 16:
; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-DPP-NEXT: br label [[TMP23]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-DPP-NEXT: ret float [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret float [[TMP35]]
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
; IR-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
; IR: 2:
; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
; IR: 10:
; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: br label [[TMP12]]
; IR: 12:
; IR-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
; IR-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]])
; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to float
; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]]
; IR-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP16]])
; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]]
; IR-NEXT: br label [[TMP19]]
; IR: 19:
; IR-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
; IR-NEXT: ret float [[TMP20]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]])
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP22:%.*]])
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]])
; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]])
; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP21]])
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000)
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.minnum.f32(float [[TMP9]], float [[TMP10]])
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP12]])
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.minnum.f32(float [[TMP13]], float [[TMP14]])
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP15]], float [[TMP16]])
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP17]], float [[TMP18]])
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.minnum.f32(float [[TMP19]], float [[TMP20]])
; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63)
; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]])
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]])
; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]])
; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.minnum.f32(float [[TMP30]], float [[TMP31]])
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret float [[TMP35]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{
; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]]
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]]
; IR-ITERATIVE-NEXT: br label [[TMP19]]
; IR-ITERATIVE: 19:
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP20]]
;
; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
; IR-DPP: 10:
; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP12]]
; IR-DPP: 12:
; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], float 0x7FF8000000000000, float [[VAL]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float [[TMP14]], float [[TMP17]]
; IR-DPP-NEXT: br label [[TMP19]]
; IR-DPP: 19:
; IR-DPP-NEXT: [[TMP20:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
; IR-DPP-NEXT: ret float [[TMP20]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, float %val) #1{
; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float 0x7FF8000000000000) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP9]], float [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP13]], float [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP15]], float [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP17]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP19]], float [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF8000000000000, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP30]], float [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret float [[TMP35]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP16]]
; IR-ITERATIVE: 16:
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-ITERATIVE-NEXT: br label [[TMP23]]
; IR-ITERATIVE: 23:
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP24]]
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-DPP: 14:
; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
; IR-DPP-NEXT: br label [[TMP16]]
; IR-DPP: 16:
; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP17]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], float [[TMP18]], float [[TMP21]]
; IR-DPP-NEXT: br label [[TMP23]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-DPP-NEXT: ret float [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP23:%.*]] monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], float [[TMP14]], float [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret float [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call float @llvm.amdgcn.writelane.f32(float [[ACCUMULATOR]], i32 [[TMP20]], float [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.set.inactive.f32(float [[VAL:%.*]], float -0.000000e+00) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP9]], float [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP13]], float [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP15]], float [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP17]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP19]], float [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP24]] monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP30]], float [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], float [[TMP30]], float [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret float [[TMP35]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
; IR-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
; IR-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
; IR-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1 {
; IR-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
; IR-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
; IR-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
; IR-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
; IR-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float inreg %val) #1{
; IR-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, float %val) #1{
; IR-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
; IR-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret float %result
}
define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
; IR-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
; IR-NEXT: ret float [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
ret float %result
}
define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 {
; IR-LABEL: @global_atomic_fadd_double_uni_address_uni_value_agent_scope_unsafe(
; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR: 2:
; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
; IR-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to double
; IR-NEXT: [[TMP12:%.*]] = fmul double [[VAL:%.*]], [[TMP11]]
; IR-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR: 14:
; IR-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 4
; IR-NEXT: br label [[TMP16]]
; IR: 16:
; IR-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]])
; IR-NEXT: [[TMP19:%.*]] = uitofp i32 [[TMP8]] to double
; IR-NEXT: [[TMP20:%.*]] = fmul double [[VAL]], [[TMP19]]
; IR-NEXT: [[TMP21:%.*]] = fadd double [[TMP18]], [[TMP20]]
; IR-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-NEXT: br label [[TMP23]]
; IR: 23:
; IR-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-NEXT: ret double [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]])
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = fadd double [[TMP14]], [[TMP22:%.*]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]])
; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]])
; IR-ITERATIVE-NEXT: [[TMP23]] = fadd double [[ACCUMULATOR]], [[TMP21]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_scope_agent_scope_unsafe(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00)
; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP11:%.*]] = fadd double [[TMP9]], [[TMP10]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP13:%.*]] = fadd double [[TMP11]], [[TMP12]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP15:%.*]] = fadd double [[TMP13]], [[TMP14]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP17:%.*]] = fadd double [[TMP15]], [[TMP16]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP19:%.*]] = fadd double [[TMP17]], [[TMP18]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP21:%.*]] = fadd double [[TMP19]], [[TMP20]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63)
; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]])
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]])
; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]])
; IR-DPP-NEXT: [[TMP32:%.*]] = fadd double [[TMP30]], [[TMP31]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret double [[TMP35]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP16]]
; IR-ITERATIVE: 16:
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-ITERATIVE-NEXT: br label [[TMP23]]
; IR-ITERATIVE: 23:
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP24]]
;
; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_one_as_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-DPP: 14:
; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("one-as") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP16]]
; IR-DPP: 16:
; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-DPP-NEXT: br label [[TMP23]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-DPP-NEXT: ret double [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("one-as") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_one_as_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("one-as") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret double [[TMP35]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP16]]
; IR-ITERATIVE: 16:
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-ITERATIVE-NEXT: br label [[TMP23]]
; IR-ITERATIVE: 23:
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP24]]
;
; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_uni_value_agent_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-DPP: 14:
; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] syncscope("agent") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP16]]
; IR-DPP: 16:
; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-DPP-NEXT: br label [[TMP23]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-DPP-NEXT: ret double [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fsub_double_uni_address_div_value_agent_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret double [[TMP35]]
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double inreg %val) #0 {
; IR-LABEL: @global_atomic_fmin_double_uni_address_uni_value_agent_scope_unsafe(
; IR-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
; IR: 2:
; IR-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
; IR: 10:
; IR-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: br label [[TMP12]]
; IR: 12:
; IR-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
; IR-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]])
; IR-NEXT: [[TMP15:%.*]] = uitofp i32 [[TMP8]] to double
; IR-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]]
; IR-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP16]])
; IR-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]]
; IR-NEXT: br label [[TMP19]]
; IR: 19:
; IR-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
; IR-NEXT: ret double [[TMP20]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, double %val) #0 {
; IR-ITERATIVE-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]])
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP14]], double [[TMP22:%.*]])
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]])
; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]])
; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.minnum.f64(double [[ACCUMULATOR]], double [[TMP21]])
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fmin_double_uni_address_div_value_agent_scope_unsafe(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000)
; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.minnum.f64(double [[TMP9]], double [[TMP10]])
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.minnum.f64(double [[TMP11]], double [[TMP12]])
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.minnum.f64(double [[TMP13]], double [[TMP14]])
; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.minnum.f64(double [[TMP15]], double [[TMP16]])
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.minnum.f64(double [[TMP17]], double [[TMP18]])
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.minnum.f64(double [[TMP19]], double [[TMP20]])
; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false)
; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63)
; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]])
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]])
; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]])
; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.minnum.f64(double [[TMP30]], double [[TMP31]])
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret double [[TMP35]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #1{
; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP12]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]]
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]]
; IR-ITERATIVE-NEXT: br label [[TMP19]]
; IR-ITERATIVE: 19:
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP20]]
;
; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_uni_value_agent_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP19:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
; IR-DPP: 10:
; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP12]]
; IR-DPP: 12:
; IR-DPP-NEXT: [[TMP13:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = select i1 [[TMP9]], double 0x7FF8000000000000, double [[VAL]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], double [[TMP14]], double [[TMP17]]
; IR-DPP-NEXT: br label [[TMP19]]
; IR-DPP: 19:
; IR-DPP-NEXT: [[TMP20:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP18]], [[TMP12]] ]
; IR-DPP-NEXT: ret double [[TMP20]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) inreg %ptr, double %val) #1{
; IR-ITERATIVE-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] syncscope("agent") monotonic, align 8
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ 0x7FF8000000000000, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.maxnum.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic__fmax_double_uni_address_div_value_agent_scope_unsafe_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double 0x7FF8000000000000) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP9]], double [[TMP10]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP11]], double [[TMP12]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP13]], double [[TMP14]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP15]], double [[TMP16]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP17]], double [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP19]], double [[TMP20]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double 0x7FF8000000000000, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] syncscope("agent") monotonic, align 8
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.maxnum.f64(double [[TMP30]], double [[TMP31]], metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret double [[TMP35]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double inreg %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-ITERATIVE: 14:
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP16]]
; IR-ITERATIVE: 16:
; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-ITERATIVE-NEXT: br label [[TMP23]]
; IR-ITERATIVE: 23:
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP24]]
;
; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_uni_value_system_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP23:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL:%.*]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
; IR-DPP: 14:
; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP12]] monotonic, align 4
; IR-DPP-NEXT: br label [[TMP16]]
; IR-DPP: 16:
; IR-DPP-NEXT: [[TMP17:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP17]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[VAL]], double [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP18]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = select i1 [[TMP13]], double [[TMP18]], double [[TMP21]]
; IR-DPP-NEXT: br label [[TMP23]]
; IR-DPP: 23:
; IR-DPP-NEXT: [[TMP24:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP22]], [[TMP16]] ]
; IR-DPP-NEXT: ret double [[TMP24]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, double %val) #2 {
; IR-ITERATIVE-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(
; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
; IR-ITERATIVE: 2:
; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
; IR-ITERATIVE: 10:
; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP23:%.*]] monotonic, align 4
; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
; IR-ITERATIVE: 12:
; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi double [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP13]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[TMP22:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = select i1 [[TMP28:%.*]], double [[TMP14]], double [[TMP15]]
; IR-ITERATIVE-NEXT: br label [[TMP17]]
; IR-ITERATIVE: 17:
; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP16]], [[TMP12]] ]
; IR-ITERATIVE-NEXT: ret double [[TMP18]]
; IR-ITERATIVE: ComputeLoop:
; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi double [ -0.000000e+00, [[TMP2]] ], [ [[TMP23]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP22]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP26:%.*]], [[COMPUTELOOP]] ]
; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32
; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[VAL:%.*]], i32 [[TMP20]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP22]] = call double @llvm.amdgcn.writelane.f64(double [[ACCUMULATOR]], i32 [[TMP20]], double [[OLDVALUEPHI]]) #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP23]] = call double @llvm.experimental.constrained.fadd.f64(double [[ACCUMULATOR]], double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = shl i64 1, [[TMP19]]
; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = xor i64 [[TMP24]], -1
; IR-ITERATIVE-NEXT: [[TMP26]] = and i64 [[ACTIVEBITS]], [[TMP25]]
; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = icmp eq i64 [[TMP26]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP27]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
; IR-ITERATIVE: ComputeEnd:
; IR-ITERATIVE-NEXT: [[TMP28]] = icmp eq i32 [[TMP8]], 0
; IR-ITERATIVE-NEXT: br i1 [[TMP28]], label [[TMP10]], label [[TMP12]]
;
; IR-DPP-LABEL: @global_atomic_fadd_double_uni_address_div_value_system_scope_strictfp(
; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP34:%.*]]
; IR-DPP: 2:
; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP9:%.*]] = call double @llvm.amdgcn.set.inactive.f64(double [[VAL:%.*]], double -0.000000e+00) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP10:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP9]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP9]], double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP12:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP11]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP11]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP14:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP13]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP13]], double [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP16:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP15]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP15]], double [[TMP16]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP18:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP17]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP17]], double [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP20:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP19]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP21:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP19]], double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP22:%.*]] = call double @llvm.amdgcn.update.dpp.f64(double -0.000000e+00, double [[TMP21]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP23:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[TMP21]], i32 63) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP24:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP23]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP25:%.*]] = icmp eq i32 [[TMP8]], 0
; IR-DPP-NEXT: br i1 [[TMP25]], label [[TMP26:%.*]], label [[TMP28:%.*]]
; IR-DPP: 26:
; IR-DPP-NEXT: [[TMP27:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[TMP24]] monotonic, align 4
; IR-DPP-NEXT: br label [[TMP28]]
; IR-DPP: 28:
; IR-DPP-NEXT: [[TMP29:%.*]] = phi double [ poison, [[TMP2]] ], [ [[TMP27]], [[TMP26]] ]
; IR-DPP-NEXT: [[TMP30:%.*]] = call double @llvm.amdgcn.readfirstlane.f64(double [[TMP29]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP31:%.*]] = call double @llvm.amdgcn.strict.wwm.f64(double [[TMP22]]) #[[ATTR8]]
; IR-DPP-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP30]], double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
; IR-DPP-NEXT: [[TMP33:%.*]] = select i1 [[TMP25]], double [[TMP30]], double [[TMP32]]
; IR-DPP-NEXT: br label [[TMP34]]
; IR-DPP: 34:
; IR-DPP-NEXT: [[TMP35:%.*]] = phi double [ poison, [[TMP0:%.*]] ], [ [[TMP33]], [[TMP28]] ]
; IR-DPP-NEXT: ret double [[TMP35]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double inreg %val) #0 {
; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_agent_scope_unsafe(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, double %val) #0 {
; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_agent_scope_unsafe(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 4
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1 {
; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_one_as_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1 {
; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_one_as_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("one-as") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("one-as") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 {
; IR-LABEL: @global_atomic_fsub_double_div_address_uni_value_agent_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 {
; IR-LABEL: @global_atomic_fsub_double_div_address_div_value_agent_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fsub ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fmin_double_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, double inreg %val) #0 {
; IR-LABEL: @global_atomic_fmin_double_div_address_uni_value_agent_scope(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fmin_double_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, double %val) #0 {
; IR-LABEL: @global_atomic_fmin_double_div_address_div_value_agent_scope(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fmin ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double inreg %val) #1{
; IR-LABEL: @global_atomic__fmax_double_div_address_uni_value_agent_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp(ptr addrspace(1) %ptr, double %val) #1{
; IR-LABEL: @global_atomic__fmax_double_div_address_div_value_agent_scope_unsafe_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] syncscope("agent") monotonic, align 8
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fmax ptr addrspace(1) %ptr, double %val syncscope("agent") monotonic
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, double inreg %val) #2 {
; IR-LABEL: @global_atomic_fadd_double_div_address_uni_value_system_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
ret double %result
}
define amdgpu_ps double @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, double %val) #2 {
; IR-LABEL: @global_atomic_fadd_double_div_address_div_value_system_scope_strictfp(
; IR-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], double [[VAL:%.*]] monotonic, align 4
; IR-NEXT: ret double [[RESULT]]
;
%result = atomicrmw fadd ptr addrspace(1) %ptr, double %val monotonic, align 4
ret double %result
}
attributes #0 = { denormal_fpenv(float: preservesign) }
attributes #1 = { strictfp denormal_fpenv(float: preservesign) }
attributes #2 = { strictfp }