Let's do the lowering of non-split into split barriers in a new IR pass, AMDGPULowerIntrinsics. That way, there is no code duplication between SelectionDAG and GlobalISel. This simplifies some upcoming extensions to the code.
81 lines
3.7 KiB
LLVM
81 lines
3.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
|
|
; RUN: opt < %s -passes=amdgpu-lower-intrinsics -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -codegen-opt-level=0 | FileCheck --check-prefixes=CHECK,NOOPT %s
|
|
; RUN: opt < %s -passes=amdgpu-lower-intrinsics -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -codegen-opt-level=1 -mattr=+wavefrontsize32 | FileCheck --check-prefixes=CHECK,OPT-WAVE32 %s
|
|
; RUN: opt < %s -passes=amdgpu-lower-intrinsics -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -codegen-opt-level=1 -mattr=+wavefrontsize64 | FileCheck --check-prefixes=CHECK,OPT-WAVE64 %s
|
|
|
|
declare void @foo(i1)
|
|
|
|
define amdgpu_kernel void @barrier() {
|
|
; CHECK-LABEL: define amdgpu_kernel void @barrier(
|
|
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
; CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
; CHECK-NEXT: [[ISFIRST:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
; CHECK-NEXT: call void @foo(i1 [[ISFIRST]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
%isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
call void @foo(i1 %isfirst)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @barrier_32threads() "amdgpu-flat-work-group-size"="32,32" {
|
|
; NOOPT-LABEL: define amdgpu_kernel void @barrier_32threads(
|
|
; NOOPT-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; NOOPT-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
; NOOPT-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
; NOOPT-NEXT: [[ISFIRST:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
; NOOPT-NEXT: call void @foo(i1 [[ISFIRST]])
|
|
; NOOPT-NEXT: ret void
|
|
;
|
|
; OPT-WAVE32-LABEL: define amdgpu_kernel void @barrier_32threads(
|
|
; OPT-WAVE32-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; OPT-WAVE32-NEXT: call void @llvm.amdgcn.wave.barrier()
|
|
; OPT-WAVE32-NEXT: call void @foo(i1 true)
|
|
; OPT-WAVE32-NEXT: ret void
|
|
;
|
|
; OPT-WAVE64-LABEL: define amdgpu_kernel void @barrier_32threads(
|
|
; OPT-WAVE64-SAME: ) #[[ATTR1:[0-9]+]] {
|
|
; OPT-WAVE64-NEXT: call void @llvm.amdgcn.wave.barrier()
|
|
; OPT-WAVE64-NEXT: call void @foo(i1 true)
|
|
; OPT-WAVE64-NEXT: ret void
|
|
;
|
|
call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
%isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
call void @foo(i1 %isfirst)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @barrier_64threads() "amdgpu-flat-work-group-size"="64,64" {
|
|
; NOOPT-LABEL: define amdgpu_kernel void @barrier_64threads(
|
|
; NOOPT-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; NOOPT-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
; NOOPT-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
; NOOPT-NEXT: [[ISFIRST:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
; NOOPT-NEXT: call void @foo(i1 [[ISFIRST]])
|
|
; NOOPT-NEXT: ret void
|
|
;
|
|
; OPT-WAVE32-LABEL: define amdgpu_kernel void @barrier_64threads(
|
|
; OPT-WAVE32-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; OPT-WAVE32-NEXT: call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
; OPT-WAVE32-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
; OPT-WAVE32-NEXT: [[ISFIRST:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
; OPT-WAVE32-NEXT: call void @foo(i1 [[ISFIRST]])
|
|
; OPT-WAVE32-NEXT: ret void
|
|
;
|
|
; OPT-WAVE64-LABEL: define amdgpu_kernel void @barrier_64threads(
|
|
; OPT-WAVE64-SAME: ) #[[ATTR2:[0-9]+]] {
|
|
; OPT-WAVE64-NEXT: call void @llvm.amdgcn.wave.barrier()
|
|
; OPT-WAVE64-NEXT: call void @foo(i1 true)
|
|
; OPT-WAVE64-NEXT: ret void
|
|
;
|
|
call void @llvm.amdgcn.s.barrier.signal(i32 -1)
|
|
call void @llvm.amdgcn.s.barrier.wait(i16 -1)
|
|
%isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
|
|
call void @foo(i1 %isfirst)
|
|
ret void
|
|
}
|