Alex MacLean 3a84a4e55d
Reland "[NVPTX] Unify and extend barrier{.cta} intrinsic support" (#141143)
Note: This relands #140615 adding a ".count" suffix to the non-".all"
variants.

Our current intrinsic support for barrier intrinsics is confusing and
incomplete, with multiple intrinsics mapping to the same instruction and
intrinsic names not clearly conveying intrinsic semantics. Further, we
lack support for some variants. This change unifies the IR
representation to a single consistently named set of intrinsics.

- llvm.nvvm.barrier.cta.sync.aligned.all(i32)
- llvm.nvvm.barrier.cta.sync.aligned.count(i32, i32)
- llvm.nvvm.barrier.cta.arrive.aligned.count(i32, i32)
- llvm.nvvm.barrier.cta.sync.all(i32)
- llvm.nvvm.barrier.cta.sync.count(i32, i32)
- llvm.nvvm.barrier.cta.arrive.count(i32, i32)

The following Auto-Upgrade rules are used to maintain compatibility with
IR using the legacy intrinsics:

* llvm.nvvm.barrier0 --> llvm.nvvm.barrier.cta.sync.aligned.all(0)
* llvm.nvvm.barrier.n --> llvm.nvvm.barrier.cta.sync.aligned.all(x)
* llvm.nvvm.bar.sync --> llvm.nvvm.barrier.cta.sync.aligned.all(x)
* llvm.nvvm.barrier --> llvm.nvvm.barrier.cta.sync.aligned.count(x, y)
* llvm.nvvm.barrier.sync --> llvm.nvvm.barrier.cta.sync.all(x)
* llvm.nvvm.barrier.sync.cnt --> llvm.nvvm.barrier.cta.sync.count(x, y)
2025-05-22 19:38:10 -07:00

172 lines
5.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: opt -passes=function-attrs -S < %s | FileCheck %s
define i32 @nonleaf() convergent {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@nonleaf
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @leaf()
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 @leaf()
ret i32 %a
}
define i32 @leaf() convergent {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@leaf
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: ret i32 0
;
ret i32 0
}
declare i32 @k() convergent
define i32 @extern() convergent {
; CHECK: Function Attrs: convergent
; CHECK-LABEL: define {{[^@]+}}@extern
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @k() #[[ATTR1]]
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 @k() convergent
ret i32 %a
}
; Convergent should not be removed on the function here. Although the call is
; not explicitly convergent, it picks up the convergent attr from the callee.
define i32 @extern_non_convergent_call() convergent {
; CHECK: Function Attrs: convergent
; CHECK-LABEL: define {{[^@]+}}@extern_non_convergent_call
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @k()
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 @k()
ret i32 %a
}
define i32 @indirect_convergent_call(ptr %f) convergent {
; CHECK: Function Attrs: convergent
; CHECK-LABEL: define {{[^@]+}}@indirect_convergent_call
; CHECK-SAME: (ptr readonly captures(none) [[F:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[A:%.*]] = call i32 [[F]]() #[[ATTR1]]
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 %f() convergent
ret i32 %a
}
; Give indirect_non_convergent_call the norecurse attribute so we get a
; "Function Attrs" comment in the output.
define i32 @indirect_non_convergent_call(ptr %f) convergent norecurse {
; CHECK: Function Attrs: norecurse
; CHECK-LABEL: define {{[^@]+}}@indirect_non_convergent_call
; CHECK-SAME: (ptr readonly captures(none) [[F:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[A:%.*]] = call i32 [[F]]()
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 %f()
ret i32 %a
}
declare void @llvm.nvvm.barrier.cta.sync.aligned.all(i32) convergent
define i32 @intrinsic() convergent {
; Implicitly convergent, because the intrinsic is convergent.
; CHECK: Function Attrs: convergent norecurse nounwind
; CHECK-LABEL: define {{[^@]+}}@intrinsic
; CHECK-SAME: () #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
; CHECK-NEXT: ret i32 0
;
call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
ret i32 0
}
define i32 @recursive1() convergent {
; CHECK: Function Attrs: nofree nosync nounwind memory(none)
; CHECK-LABEL: define {{[^@]+}}@recursive1
; CHECK-SAME: () #[[ATTR5:[0-9]+]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @recursive2() #[[ATTR1]]
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 @recursive2() convergent
ret i32 %a
}
define i32 @recursive2() convergent {
; CHECK: Function Attrs: nofree nosync nounwind memory(none)
; CHECK-LABEL: define {{[^@]+}}@recursive2
; CHECK-SAME: () #[[ATTR5]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @recursive1() #[[ATTR1]]
; CHECK-NEXT: ret i32 [[A]]
;
%a = call i32 @recursive1() convergent
ret i32 %a
}
define i32 @noopt() convergent optnone noinline {
; CHECK: Function Attrs: convergent noinline optnone
; CHECK-LABEL: define {{[^@]+}}@noopt
; CHECK-SAME: () #[[ATTR6:[0-9]+]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @noopt_friend() #[[ATTR1]]
; CHECK-NEXT: ret i32 0
;
%a = call i32 @noopt_friend() convergent
ret i32 0
}
; A function which is mutually-recursive with a convergent, optnone function
; shouldn't have its convergent attribute stripped.
define i32 @noopt_friend() convergent {
; CHECK: Function Attrs: convergent
; CHECK-LABEL: define {{[^@]+}}@noopt_friend
; CHECK-SAME: () #[[ATTR1]] {
; CHECK-NEXT: [[A:%.*]] = call i32 @noopt()
; CHECK-NEXT: ret i32 0
;
%a = call i32 @noopt()
ret i32 0
}
; A function which is stripped of its convergent attribute, even
; if used in a controlled convergence call.
; This should be OK.
define i32 @leaf_noconvergent_used() convergent {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@leaf_noconvergent_used
; CHECK-SAME: () #[[ATTR0]] {
; CHECK-NEXT: ret i32 0
;
ret i32 0
}
define i32 @nonleaf_convergent() convergent {
; CHECK: Function Attrs: convergent mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@nonleaf_convergent
; CHECK-SAME: () #[[ATTR7:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.experimental.convergence.entry()
; CHECK-NEXT: [[TMP2:%.*]] = call i32 @leaf_noconvergent_used() [ "convergencectrl"(token [[TMP1]]) ]
; CHECK-NEXT: ret i32 0
;
%1 = call token @llvm.experimental.convergence.entry()
%2 = call i32 @leaf_noconvergent_used() [ "convergencectrl"(token %1) ]
ret i32 0
}
declare token @llvm.experimental.convergence.entry() #1
;.
; CHECK: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR1]] = { convergent }
; CHECK: attributes #[[ATTR2]] = { norecurse }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nounwind }
; CHECK: attributes #[[ATTR4]] = { convergent norecurse nounwind }
; CHECK: attributes #[[ATTR5]] = { nofree nosync nounwind memory(none) }
; CHECK: attributes #[[ATTR6]] = { convergent noinline optnone }
; CHECK: attributes #[[ATTR7]] = { convergent mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR8:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
;.