472 lines
21 KiB
LLVM
472 lines
21 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-enable-uniform-intrinsic-combine=0 -O3 -S < %s | FileCheck %s -check-prefix=CURRENT-CHECK
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -amdgpu-uniform-intrinsic-combine -S < %s | FileCheck %s -check-prefix=PASS-CHECK
|
|
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -O3 -S < %s | FileCheck %s -check-prefix=O3-CHECK
|
|
|
|
define protected amdgpu_kernel void @trivial_waterfall_eq_zero(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]]
|
|
; CURRENT-CHECK: [[IF_PEEL]]:
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[EXIT]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
|
|
; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
|
|
; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
|
|
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ 0, %entry ], [ 1, %if ]
|
|
%not_done = xor i1 %done, true
|
|
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
|
|
%is_done = icmp eq i64 %ballot, 0 ; in this case is_done = !not_done
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]]
|
|
; CURRENT-CHECK: [[IF_PEEL]]:
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[EXIT]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
|
|
; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
|
|
; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 0, [[BALLOT]]
|
|
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_swap_op(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ 0, %entry ], [ 1, %if ]
|
|
%not_done = xor i1 %done, true
|
|
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
|
|
%is_done = icmp eq i64 0, %ballot ; in this case is_done = !not_done
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @trivial_waterfall_ne_zero(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; CURRENT-CHECK: [[WHILE]]:
|
|
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP0:![0-9]+]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 0, [[BALLOT]]
|
|
; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ 0, %entry ], [ 1, %if ]
|
|
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
|
|
%is_done = icmp ne i64 0, %ballot ; in this case is_done = done
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; CURRENT-CHECK: [[WHILE]]:
|
|
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP2:![0-9]+]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[DONE]])
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i64 [[BALLOT]], 0
|
|
; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_swap(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ 0, %entry ], [ 1, %if ]
|
|
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %done)
|
|
%is_done = icmp ne i64 %ballot, 0 ; in this case is_done = done
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @trivial_uniform_waterfall(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[WORK_PEEL:.*]]
|
|
; CURRENT-CHECK: [[WORK_PEEL]]:
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[EXIT]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ]
|
|
; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
|
|
; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
|
|
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 0)
|
|
; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 0, [[FIRST_ACTIVE_ID]]
|
|
; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]]
|
|
; PASS-CHECK: [[WORK]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[TAIL]]
|
|
; PASS-CHECK: [[TAIL]]:
|
|
; PASS-CHECK-NEXT: [[NEW_DONE]] = phi i1 [ true, %[[WORK]] ], [ false, %[[IF]] ]
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_uniform_waterfall(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ false, %entry ], [ %new_done, %tail ]
|
|
%not_done = xor i1 %done, true
|
|
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
|
|
%is_done = icmp eq i64 %ballot, 0
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
%first_active_id = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 0)
|
|
%is_first_active_id = icmp eq i32 0, %first_active_id
|
|
br i1 %is_first_active_id, label %work, label %tail
|
|
|
|
work:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %tail
|
|
|
|
tail:
|
|
%new_done = phi i1 [ true, %work ], [ false, %if ]
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @uniform_waterfall(ptr addrspace(1) %out, i32 %mymask) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]], i32 [[MYMASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[TMP0]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[WORK_PEEL:.*]]
|
|
; CURRENT-CHECK: [[WORK_PEEL]]:
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[EXIT]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], i32 [[MYMASK:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[NEW_DONE:%.*]], %[[TAIL:.*]] ]
|
|
; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i64 @llvm.amdgcn.ballot.i64(i1 [[NOT_DONE]])
|
|
; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i64 [[BALLOT]], 0
|
|
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF:.*]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: [[FIRST_ACTIVE_ID:%.*]] = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 [[MYMASK]])
|
|
; PASS-CHECK-NEXT: [[IS_FIRST_ACTIVE_ID:%.*]] = icmp eq i32 [[MYMASK]], [[FIRST_ACTIVE_ID]]
|
|
; PASS-CHECK-NEXT: br i1 [[IS_FIRST_ACTIVE_ID]], label %[[WORK:.*]], label %[[TAIL]]
|
|
; PASS-CHECK: [[WORK]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[TAIL]]
|
|
; PASS-CHECK: [[TAIL]]:
|
|
; PASS-CHECK-NEXT: [[NEW_DONE]] = phi i1 [ true, %[[WORK]] ], [ false, %[[IF]] ]
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @uniform_waterfall(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]], i32 [[MYMASK:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ false, %entry ], [ %new_done, %tail ]
|
|
%not_done = xor i1 %done, true
|
|
%ballot = tail call i64 @llvm.amdgcn.ballot.i64(i1 %not_done)
|
|
%is_done = icmp eq i64 %ballot, 0
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
%first_active_id = tail call noundef i32 @llvm.amdgcn.readfirstlane.i32(i32 %mymask)
|
|
%is_first_active_id = icmp eq i32 %mymask, %first_active_id
|
|
br i1 %is_first_active_id, label %work, label %tail
|
|
|
|
work:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %tail
|
|
|
|
tail:
|
|
%new_done = phi i1 [ true, %work ], [ false, %if ]
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: [[BALLOT_PEEL:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_PEEL:%.*]] = icmp eq i32 [[BALLOT_PEEL]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_PEEL]], label %[[EXIT:.*]], label %[[IF_PEEL:.*]]
|
|
; CURRENT-CHECK: [[IF_PEEL]]:
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[EXIT]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
|
|
; PASS-CHECK-NEXT: [[NOT_DONE:%.*]] = xor i1 [[DONE]], true
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[NOT_DONE]])
|
|
; PASS-CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[NOT_DONE]], true
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp eq i32 [[BALLOT]], 0
|
|
; PASS-CHECK-NEXT: br i1 [[TMP0]], label %[[EXIT:.*]], label %[[IF]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_eq_zero_i32(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ 0, %entry ], [ 1, %if ]
|
|
%not_done = xor i1 %done, true
|
|
%ballot = tail call i32 @llvm.amdgcn.ballot.i32(i1 %not_done)
|
|
%is_done = icmp eq i32 %ballot, 0 ; in this case is_done = !not_done
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(ptr addrspace(1) %out) {
|
|
; CURRENT-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(
|
|
; CURRENT-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR1]] {
|
|
; CURRENT-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CURRENT-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; CURRENT-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; CURRENT-CHECK: [[WHILE]]:
|
|
; CURRENT-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 true)
|
|
; CURRENT-CHECK-NEXT: [[IS_DONE_NOT:%.*]] = icmp eq i32 [[BALLOT]], 0
|
|
; CURRENT-CHECK-NEXT: br i1 [[IS_DONE_NOT]], label %[[WHILE]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
|
|
; CURRENT-CHECK: [[EXIT]]:
|
|
; CURRENT-CHECK-NEXT: ret void
|
|
;
|
|
; PASS-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(
|
|
; PASS-CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
|
|
; PASS-CHECK-NEXT: [[ENTRY:.*]]:
|
|
; PASS-CHECK-NEXT: br label %[[WHILE:.*]]
|
|
; PASS-CHECK: [[WHILE]]:
|
|
; PASS-CHECK-NEXT: [[DONE:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ true, %[[IF:.*]] ]
|
|
; PASS-CHECK-NEXT: [[BALLOT:%.*]] = tail call i32 @llvm.amdgcn.ballot.i32(i1 [[DONE]])
|
|
; PASS-CHECK-NEXT: [[IS_DONE:%.*]] = icmp ne i32 0, [[BALLOT]]
|
|
; PASS-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[IF]]
|
|
; PASS-CHECK: [[IF]]:
|
|
; PASS-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; PASS-CHECK-NEXT: br label %[[WHILE]]
|
|
; PASS-CHECK: [[EXIT]]:
|
|
; PASS-CHECK-NEXT: ret void
|
|
;
|
|
; O3-CHECK-LABEL: define protected amdgpu_kernel void @trivial_waterfall_ne_zero_i32(
|
|
; O3-CHECK-SAME: ptr addrspace(1) writeonly captures(none) initializes((0, 4)) [[OUT:%.*]]) local_unnamed_addr #[[ATTR0]] {
|
|
; O3-CHECK-NEXT: [[ENTRY:.*:]]
|
|
; O3-CHECK-NEXT: store i32 5, ptr addrspace(1) [[OUT]], align 4
|
|
; O3-CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
br label %while
|
|
|
|
while:
|
|
%done = phi i1 [ 0, %entry ], [ 1, %if ]
|
|
%ballot = tail call i32 @llvm.amdgcn.ballot.i32(i1 %done)
|
|
%is_done = icmp ne i32 0, %ballot ; in this case is_done = done
|
|
br i1 %is_done, label %exit, label %if
|
|
|
|
if:
|
|
store i32 5, ptr addrspace(1) %out
|
|
br label %while
|
|
|
|
exit:
|
|
ret void
|
|
}
|
|
|
|
declare i64 @llvm.amdgcn.ballot.i64(i1) #1
|
|
!6 = !{i64 690}
|
|
!7 = distinct !{!7, !8}
|
|
!8 = !{!"llvm.loop.mustprogress"}
|
|
;.
|
|
; CURRENT-CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]}
|
|
; CURRENT-CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1}
|
|
; CURRENT-CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
|
|
; CURRENT-CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
|
|
;.
|