
This patch is intended to be the first of a series with end goal to adapt atomic optimizer pass to support i64 and f64 operations (along with removing all unnecessary bitcasts). This legalizes 64 bit readlane, writelane and readfirstlane ops pre-ISel --------- Co-authored-by: vikramRH <vikhegde@amd.com>
89 lines
4.1 KiB
LLVM
89 lines
4.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
|
|
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx908 -passes="amdgpu-atomic-optimizer,verify<domtree>" %s -S -o - | FileCheck %s
|
|
|
|
; Check we're properly adding an edge from ComputeEnd to the "End" block added by
|
|
; SplitBlockAndInsertIfThen
|
|
;
|
|
; If the edge is not added, domtree verification will fail.
|
|
|
|
declare i32 @quux()
|
|
|
|
define amdgpu_kernel void @ham(ptr addrspace(4) %arg) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @ham(
|
|
; CHECK-SAME: ptr addrspace(4) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @quux()
|
|
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[CALL]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP]], label [[BB1:%.*]], label [[BB3:%.*]]
|
|
; CHECK: bb1:
|
|
; CHECK-NEXT: [[CALL2:%.*]] = tail call i32 @quux()
|
|
; CHECK-NEXT: br label [[BB3]]
|
|
; CHECK: bb3:
|
|
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[CALL2]], [[BB1]] ], [ [[CALL]], [[BB:%.*]] ]
|
|
; CHECK-NEXT: br label [[BB4:%.*]]
|
|
; CHECK: bb4:
|
|
; CHECK-NEXT: [[CALL5:%.*]] = tail call i32 @quux()
|
|
; CHECK-NEXT: [[ICMP6:%.*]] = icmp eq i32 [[CALL5]], 0
|
|
; CHECK-NEXT: br i1 [[ICMP6]], label [[BB8:%.*]], label [[BB7:%.*]]
|
|
; CHECK: bb7:
|
|
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG]], align 8
|
|
; CHECK-NEXT: [[ADDRSPACECAST:%.*]] = addrspacecast ptr [[LOAD]] to ptr addrspace(1)
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
|
|
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 32
|
|
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
|
|
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1]], i32 0)
|
|
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
|
|
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
|
|
; CHECK-NEXT: br label [[COMPUTELOOP:%.*]]
|
|
; CHECK: 7:
|
|
; CHECK-NEXT: [[TMP8:%.*]] = atomicrmw add ptr addrspace(1) [[ADDRSPACECAST]], i32 [[TMP13:%.*]] syncscope("agent-one-as") monotonic, align 4
|
|
; CHECK-NEXT: br label [[TMP9:%.*]]
|
|
; CHECK: 9:
|
|
; CHECK-NEXT: br label [[BB8]]
|
|
; CHECK: bb8:
|
|
; CHECK-NEXT: br label [[BB4]]
|
|
; CHECK: ComputeLoop:
|
|
; CHECK-NEXT: [[ACCUMULATOR:%.*]] = phi i32 [ 0, [[BB7]] ], [ [[TMP13]], [[COMPUTELOOP]] ]
|
|
; CHECK-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP6]], [[BB7]] ], [ [[TMP16:%.*]], [[COMPUTELOOP]] ]
|
|
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
|
|
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
|
|
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[PHI]], i32 [[TMP11]])
|
|
; CHECK-NEXT: [[TMP13]] = add i32 [[ACCUMULATOR]], [[TMP12]]
|
|
; CHECK-NEXT: [[TMP14:%.*]] = shl i64 1, [[TMP10]]
|
|
; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], -1
|
|
; CHECK-NEXT: [[TMP16]] = and i64 [[ACTIVEBITS]], [[TMP15]]
|
|
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP16]], 0
|
|
; CHECK-NEXT: br i1 [[TMP17]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
|
|
; CHECK: ComputeEnd:
|
|
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP5]], 0
|
|
; CHECK-NEXT: br i1 [[TMP18]], label [[TMP7:%.*]], label [[TMP9]]
|
|
;
|
|
bb:
|
|
%call = tail call i32 @quux()
|
|
%icmp = icmp eq i32 %call, 0
|
|
br i1 %icmp, label %bb1, label %bb3
|
|
|
|
bb1: ; preds = %bb
|
|
%call2 = tail call i32 @quux()
|
|
br label %bb3
|
|
|
|
bb3: ; preds = %bb1, %bb
|
|
%phi = phi i32 [ %call2, %bb1 ], [ %call, %bb ]
|
|
br label %bb4
|
|
|
|
bb4: ; preds = %bb8, %bb3
|
|
%call5 = tail call i32 @quux()
|
|
%icmp6 = icmp eq i32 %call5, 0
|
|
br i1 %icmp6, label %bb8, label %bb7
|
|
|
|
bb7: ; preds = %bb4
|
|
%load = load ptr, ptr addrspace(4) %arg, align 8
|
|
%addrspacecast = addrspacecast ptr %load to ptr addrspace(1)
|
|
%atomicrmw = atomicrmw add ptr addrspace(1) %addrspacecast, i32 %phi syncscope("agent-one-as") monotonic, align 4
|
|
br label %bb8
|
|
|
|
bb8: ; preds = %bb7, %bb4
|
|
br label %bb4
|
|
}
|