llvm-project/llvm/test/CodeGen/AMDGPU/atomic_optimization_split_dt_update.ll
Vikram Hegde 5feb32ba92
[AMDGPU] Extend readlane, writelane and readfirstlane intrinsic lowering for generic types (#89217)
This patch is intended to be the first of a series with end goal to
adapt atomic optimizer pass to support i64 and f64 operations (along
with removing all unnecessary bitcasts). This legalizes 64 bit readlane,
writelane and readfirstlane ops pre-ISel

---------

Co-authored-by: vikramRH <vikhegde@amd.com>
2024-06-25 14:35:19 +05:30

89 lines
4.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx908 -passes="amdgpu-atomic-optimizer,verify<domtree>" %s -S -o - | FileCheck %s
; Check we're properly adding an edge from ComputeEnd to the "End" block added by
; SplitBlockAndInsertIfThen
;
; If the edge is not added, domtree verification will fail.
declare i32 @quux()
define amdgpu_kernel void @ham(ptr addrspace(4) %arg) {
; CHECK-LABEL: define amdgpu_kernel void @ham(
; CHECK-SAME: ptr addrspace(4) [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @quux()
; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[CALL]], 0
; CHECK-NEXT: br i1 [[ICMP]], label [[BB1:%.*]], label [[BB3:%.*]]
; CHECK: bb1:
; CHECK-NEXT: [[CALL2:%.*]] = tail call i32 @quux()
; CHECK-NEXT: br label [[BB3]]
; CHECK: bb3:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[CALL2]], [[BB1]] ], [ [[CALL]], [[BB:%.*]] ]
; CHECK-NEXT: br label [[BB4:%.*]]
; CHECK: bb4:
; CHECK-NEXT: [[CALL5:%.*]] = tail call i32 @quux()
; CHECK-NEXT: [[ICMP6:%.*]] = icmp eq i32 [[CALL5]], 0
; CHECK-NEXT: br i1 [[ICMP6]], label [[BB8:%.*]], label [[BB7:%.*]]
; CHECK: bb7:
; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG]], align 8
; CHECK-NEXT: [[ADDRSPACECAST:%.*]] = addrspacecast ptr [[LOAD]] to ptr addrspace(1)
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP0]], 32
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP2]] to i32
; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP1]], i32 0)
; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP3]], i32 [[TMP4]])
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
; CHECK-NEXT: br label [[COMPUTELOOP:%.*]]
; CHECK: 7:
; CHECK-NEXT: [[TMP8:%.*]] = atomicrmw add ptr addrspace(1) [[ADDRSPACECAST]], i32 [[TMP13:%.*]] syncscope("agent-one-as") monotonic, align 4
; CHECK-NEXT: br label [[TMP9:%.*]]
; CHECK: 9:
; CHECK-NEXT: br label [[BB8]]
; CHECK: bb8:
; CHECK-NEXT: br label [[BB4]]
; CHECK: ComputeLoop:
; CHECK-NEXT: [[ACCUMULATOR:%.*]] = phi i32 [ 0, [[BB7]] ], [ [[TMP13]], [[COMPUTELOOP]] ]
; CHECK-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP6]], [[BB7]] ], [ [[TMP16:%.*]], [[COMPUTELOOP]] ]
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i32
; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[PHI]], i32 [[TMP11]])
; CHECK-NEXT: [[TMP13]] = add i32 [[ACCUMULATOR]], [[TMP12]]
; CHECK-NEXT: [[TMP14:%.*]] = shl i64 1, [[TMP10]]
; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP14]], -1
; CHECK-NEXT: [[TMP16]] = and i64 [[ACTIVEBITS]], [[TMP15]]
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[TMP16]], 0
; CHECK-NEXT: br i1 [[TMP17]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
; CHECK: ComputeEnd:
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[TMP5]], 0
; CHECK-NEXT: br i1 [[TMP18]], label [[TMP7:%.*]], label [[TMP9]]
;
bb:
%call = tail call i32 @quux()
%icmp = icmp eq i32 %call, 0
br i1 %icmp, label %bb1, label %bb3
bb1: ; preds = %bb
%call2 = tail call i32 @quux()
br label %bb3
bb3: ; preds = %bb1, %bb
%phi = phi i32 [ %call2, %bb1 ], [ %call, %bb ]
br label %bb4
bb4: ; preds = %bb8, %bb3
%call5 = tail call i32 @quux()
%icmp6 = icmp eq i32 %call5, 0
br i1 %icmp6, label %bb8, label %bb7
bb7: ; preds = %bb4
%load = load ptr, ptr addrspace(4) %arg, align 8
%addrspacecast = addrspacecast ptr %load to ptr addrspace(1)
%atomicrmw = atomicrmw add ptr addrspace(1) %addrspacecast, i32 %phi syncscope("agent-one-as") monotonic, align 4
br label %bb8
bb8: ; preds = %bb7, %bb4
br label %bb4
}