280 lines
14 KiB
LLVM
280 lines
14 KiB
LLVM
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck %s
|
|
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s
|
|
|
|
; Loop body exceeds MaxAsyncMarkers on first iteration
|
|
; Preloop: 5 markers
|
|
; Loop body: 18 markers
|
|
|
|
; CHECK-LABEL: test_loop_exceeds_max_first_iteration:
|
|
; CHECK: ; wait_asyncmark(3)
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
|
|
define void @test_loop_exceeds_max_first_iteration(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 %n, ptr addrspace(1) %out) {
|
|
entry:
|
|
; Preloop: 5 async LDS DMA operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [ 0, %entry ], [ %i.next, %loop_body ]
|
|
%i.next = add i32 %i, 1
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_body, label %exit
|
|
|
|
loop_body:
|
|
; Loop body with 18 async operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
br label %loop_header
|
|
|
|
exit:
|
|
call void @llvm.amdgcn.wait.asyncmark(i16 3)
|
|
%lds_val = load i32, ptr addrspace(3) %lds
|
|
store i32 %lds_val, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Loop body does not exceed MaxAsyncMarkers on first iteration
|
|
; Preloop: 5 markers
|
|
; Loop body: 5 markers
|
|
|
|
; CHECK-LABEL: test_loop_needs_more_iterations:
|
|
; CHECK: ; wait_asyncmark(3)
|
|
; CHECK-NEXT: s_waitcnt vmcnt(3)
|
|
|
|
define void @test_loop_needs_more_iterations(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 %n, ptr addrspace(1) %out) {
|
|
entry:
|
|
; Preloop: 5 async LDS DMA operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
br label %loop_header
|
|
|
|
loop_header:
|
|
%i = phi i32 [ 0, %entry ], [ %i.next, %loop_body ]
|
|
%i.next = add i32 %i, 1
|
|
%cmp = icmp slt i32 %i, %n
|
|
br i1 %cmp, label %loop_body, label %exit
|
|
|
|
loop_body:
|
|
; Loop body with 5 async operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
br label %loop_header
|
|
|
|
exit:
|
|
call void @llvm.amdgcn.wait.asyncmark(i16 3)
|
|
%lds_val = load i32, ptr addrspace(3) %lds
|
|
store i32 %lds_val, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Merge exceeds MaxAsyncMarkers
|
|
|
|
; CHECK-LABEL: max_when_merged:
|
|
; CHECK: ; wait_asyncmark(17)
|
|
; CHECK-NEXT: s_waitcnt vmcnt(15)
|
|
|
|
define void @max_when_merged(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 %n, ptr addrspace(1) %out) {
|
|
entry:
|
|
%cmp = icmp slt i32 0, %n
|
|
br i1 %cmp, label %then, label %else
|
|
|
|
then:
|
|
; 5 async operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
br label %endif
|
|
|
|
else:
|
|
; 18 async operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
br label %endif
|
|
|
|
endif:
|
|
call void @llvm.amdgcn.wait.asyncmark(i16 17)
|
|
%lds_val = load i32, ptr addrspace(3) %lds
|
|
store i32 %lds_val, ptr addrspace(1) %out
|
|
ret void
|
|
}
|
|
|
|
; Straightline exceeds MaxAsyncMarkers
|
|
|
|
; CHECK-LABEL: no_max_in_straightline:
|
|
; CHECK: ; wait_asyncmark(17)
|
|
; CHECK-NEXT: s_waitcnt vmcnt(17)
|
|
|
|
define void @no_max_in_straightline(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 %n, ptr addrspace(1) %out) {
|
|
; 18 async operations
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
call void @llvm.amdgcn.global.load.async.lds(ptr addrspace(1) %in, ptr addrspace(3) %lds, i32 4, i32 0, i32 0)
|
|
call void @llvm.amdgcn.asyncmark()
|
|
|
|
call void @llvm.amdgcn.wait.asyncmark(i16 17)
|
|
%lds_val = load i32, ptr addrspace(3) %lds
|
|
store i32 %lds_val, ptr addrspace(1) %out
|
|
ret void
|
|
}
|