llvm-project/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-control-flow.ll
Krzysztof Drewniak 96ce9f9d64
[AMDGPU] Prevent re-visits in LowerBufferFatPointers (#159168)
Fixes https://github.com/iree-org/iree/issues/22001

The visitor in SplitPtrStructs would re-visit instructions if an
instruction earlier in program order caused a recursive visit() call via
getPtrParts(). This would cause instructions to be processed multiple
times.

As a consequence of this, PHI nodes could be added to the Conditionals
array multiple times, which would to a conditinoal that was already
simplified being processed multiple times. After the code moved to
InstSimplifyFolder, this re-processing, combined with more agressive
simplifications, would lead to an attempt to replace an instruction with
itself, causing an assertion failure and crash.

This commit resolves the issue and adds the reduced form of the crashing
input as a test.
2025-09-16 18:02:18 -07:00

484 lines
22 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
target triple = "amdgcn--"
;; This should optimize to just the offset part
define float @sum(ptr addrspace(8) %buf, i32 %len) {
; CHECK-LABEL: define float @sum
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
br i1 %test, label %loop, label %exit
exit:
ret float %sum
}
;; But this should not
define float @sum_integer_ops(ptr addrspace(8) %buf, i32 %len) {
; CHECK-LABEL: define float @sum_integer_ops
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32
; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160
; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]]
; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4
; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128
; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8)
; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160
%ptr.int = add i160 %ptr.prev.int, 4
%ptr = inttoptr i160 %ptr.int to ptr addrspace(7)
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
br i1 %test, label %loop, label %exit
exit:
ret float %sum
}
;; Should go to offsets only
define float @sum_2d(ptr addrspace(8) %buf, i32 %ii, i32 %jj) {
; CHECK-LABEL: define float @sum_2d
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[II:%.*]], i32 [[JJ:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP1_ENTRY:%.*]]
; CHECK: loop1.entry:
; CHECK-NEXT: [[SUM1_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP1_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR1_PREV_OFF:%.*]] = phi i32 [ [[PTR1:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: br label [[LOOP2:%.*]]
; CHECK: loop2:
; CHECK-NEXT: [[SUM2_PREV:%.*]] = phi float [ [[SUM]], [[LOOP2]] ], [ [[SUM1_PREV]], [[LOOP1_ENTRY]] ]
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[LOOP2]] ], [ 0, [[LOOP1_ENTRY]] ]
; CHECK-NEXT: [[PTR2_PREV_OFF:%.*]] = phi i32 [ [[PTR2:%.*]], [[LOOP2]] ], [ [[PTR1_PREV_OFF]], [[LOOP1_ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR2_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM2_PREV]], [[VAL]]
; CHECK-NEXT: [[PTR2]] = add i32 [[PTR2_PREV_OFF]], 4
; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1
; CHECK-NEXT: [[TEST2:%.*]] = icmp ult i32 [[J_NEXT]], [[JJ]]
; CHECK-NEXT: br i1 [[TEST2]], label [[LOOP2]], label [[LOOP1_EXIT]]
; CHECK: loop1.exit:
; CHECK-NEXT: [[PTR1]] = add i32 [[PTR2]], 4
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST1:%.*]] = icmp ult i32 [[I_NEXT]], [[II]]
; CHECK-NEXT: br i1 [[TEST1]], label [[LOOP1_ENTRY]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop1.entry
loop1.entry:
%sum1.prev = phi float [ %sum, %loop1.exit ], [ 0.0, %entry ]
%ptr1.prev = phi ptr addrspace(7) [ %ptr1, %loop1.exit ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop1.exit ], [ 0, %entry ]
br label %loop2
loop2:
%sum2.prev = phi float [ %sum, %loop2 ], [ %sum1.prev, %loop1.entry ]
%ptr2.prev = phi ptr addrspace(7) [ %ptr2, %loop2 ], [ %ptr1.prev, %loop1.entry ]
%j = phi i32 [ %j.next, %loop2 ], [ 0, %loop1.entry ]
%val = load float, ptr addrspace(7) %ptr2.prev
%sum = fadd float %sum2.prev, %val
%ptr2 = getelementptr float, ptr addrspace(7) %ptr2.prev, i32 1
%j.next = add i32 %j, 1
%test2 = icmp ult i32 %j.next, %jj
br i1 %test2, label %loop2, label %loop1.exit
loop1.exit:
%ptr1 = getelementptr float, ptr addrspace(7) %ptr2, i32 1
%i.next = add i32 %i, 1
%test1 = icmp ult i32 %i.next, %ii
br i1 %test1, label %loop1.entry, label %exit
exit:
ret float %sum
}
;; This should optimize to just the offset parts since all the arguments to the
;; select point to the same buffer.
define float @sum_jump_on_negative(ptr addrspace(8) %buf, i32 %len) {
; CHECK-LABEL: define float @sum_jump_on_negative
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4
; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8
; CHECK-NEXT: [[PTR_OFF]] = select i1 [[SKIP_NEXT]], i32 [[LARGE_JUMP]], i32 [[SMALL_JUMP]]
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%skip.next = fcmp olt float %val, 0.0
%small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
%large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2
%ptr = select i1 %skip.next, ptr addrspace(7) %large.jump, ptr addrspace(7) %small.jump
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
br i1 %test, label %loop, label %exit
exit:
ret float %sum
}
define float @sum_jump_on_negative_with_phi(ptr addrspace(8) %buf, i32 %len) {
; CHECK-LABEL: define float @sum_jump_on_negative_with_phi
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
; CHECK-NEXT: br i1 [[SKIP_NEXT]], label [[THEN:%.*]], label [[ELSE:%.*]]
; CHECK: then:
; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: else:
; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: loop.exit:
; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[LARGE_JUMP]], [[THEN]] ], [ [[SMALL_JUMP]], [[ELSE]] ]
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
%skip.next = fcmp olt float %val, 0.0
br i1 %skip.next, label %then, label %else
then:
%large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2
br label %loop.exit
else:
%small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
br label %loop.exit
loop.exit:
%ptr = phi ptr addrspace(7) [ %large.jump, %then ], [ %small.jump, %else ]
br i1 %test, label %loop, label %exit
exit:
ret float %sum
}
;; But this has a shifting resource part.
define float @sum_new_buffer_on_negative(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) {
; CHECK-LABEL: define float @sum_new_buffer_on_negative
; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF1]], [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4
; CHECK-NEXT: [[PTR_RSRC]] = select i1 [[HOP]], ptr addrspace(8) [[PTR_PREV_RSRC]], ptr addrspace(8) [[BUF2]]
; CHECK-NEXT: [[PTR_OFF]] = select i1 [[HOP]], i32 [[THIS_NEXT]], i32 0
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7)
%start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%hop = fcmp olt float %val, 0.0
%this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
%ptr = select i1 %hop, ptr addrspace(7) %this.next, ptr addrspace(7) %start2
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
br i1 %test, label %loop, label %exit
exit:
ret float %sum
}
;; As does this.
define float @sum_new_buffer_on_negative_with_phi(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) {
; CHECK-LABEL: define float @sum_new_buffer_on_negative_with_phi
; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP_EXIT]] ], [ [[BUF1]], [[ENTRY]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
; CHECK-NEXT: br i1 [[HOP]], label [[THEN:%.*]], label [[LOOP_EXIT]]
; CHECK: then:
; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4
; CHECK-NEXT: br label [[LOOP_EXIT]]
; CHECK: loop.exit:
; CHECK-NEXT: [[PTR_RSRC]] = phi ptr addrspace(8) [ [[PTR_PREV_RSRC]], [[THEN]] ], [ [[BUF2]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[THIS_NEXT]], [[THEN]] ], [ 0, [[LOOP]] ]
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7)
%start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ]
%i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
%hop = fcmp olt float %val, 0.0
br i1 %hop, label %then, label %loop.exit
then:
%this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
br label %loop.exit
loop.exit:
%ptr = phi ptr addrspace(7) [ %this.next, %then ], [ %start2, %loop ]
br i1 %test, label %loop, label %exit
exit:
ret float %sum
}
;; Test that the uniform buffer descriptor optimization works correctly for phi
;; nodes that repeat the same predecessor multiple times.
define float @sum_duplicate_preds(ptr addrspace(8) %buf, i32 %len) {
; CHECK-LABEL: define float @sum_duplicate_preds
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR]], [[LOOP]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32
; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [
; CHECK-NEXT: i32 1, label [[LOOP]]
; CHECK-NEXT: i32 0, label [[EXIT:%.*]]
; CHECK-NEXT: ]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ]
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
%test.ext = zext i1 %test to i32
switch i32 %test.ext, label %loop [
i32 1, label %loop
i32 0, label %exit
]
exit:
ret float %sum
}
;; And similirly check the "might not be uniform" case.
define float @sum_integer_ops_duplicate_preds(ptr addrspace(8) %buf, i32 %len) {
; CHECK-LABEL: define float @sum_integer_ops_duplicate_preds
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ], [ [[PTR_RSRC]], [[LOOP]] ]
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR_OFF]], [[LOOP]] ]
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32
; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160
; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]]
; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4
; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32
; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128
; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8)
; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32
; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [
; CHECK-NEXT: i32 1, label [[LOOP]]
; CHECK-NEXT: i32 0, label [[EXIT:%.*]]
; CHECK-NEXT: ]
; CHECK: exit:
; CHECK-NEXT: ret float [[SUM]]
;
entry:
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
br label %loop
loop:
%sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ]
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ]
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ]
%val = load float, ptr addrspace(7) %ptr.prev
%sum = fadd float %sum.prev, %val
%ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160
%ptr.int = add i160 %ptr.prev.int, 4
%ptr = inttoptr i160 %ptr.int to ptr addrspace(7)
%i.next = add i32 %i, 1
%test = icmp ult i32 %i.next, %len
%test.ext = zext i1 %test to i32
switch i32 %test.ext, label %loop [
i32 1, label %loop
i32 0, label %exit
]
exit:
ret float %sum
}
define void @dominance_not_in_program_order(ptr addrspace(7) inreg %arg) {
; CHECK-LABEL: define void @dominance_not_in_program_order
; CHECK-SAME: ({ ptr addrspace(8), i32 } inreg [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: .preheader15:
; CHECK-NEXT: [[ARG_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 0
; CHECK-NEXT: [[ARG_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 1
; CHECK-NEXT: br label [[DOTLR_PH18:%.*]]
; CHECK: .loopexit:
; CHECK-NEXT: [[SCEVGEP12:%.*]] = add i32 [[LSR_IV11_OFF:%.*]], 16
; CHECK-NEXT: br label [[DOTLR_PH18]]
; CHECK: .lr.ph18:
; CHECK-NEXT: [[LSR_IV11_OFF]] = phi i32 [ [[ARG_OFF]], [[DOTLOOPEXIT:%.*]] ], [ [[ARG_OFF]], [[DOTPREHEADER15:%.*]] ]
; CHECK-NEXT: br label [[DOTLOOPEXIT]]
;
.preheader15:
br label %.lr.ph18
.loopexit: ; preds = %.lr.ph18
%scevgep12 = getelementptr i8, ptr addrspace(7) %lsr.iv11, i32 16
br label %.lr.ph18
.lr.ph18: ; preds = %.loopexit, %.preheader15
%lsr.iv11 = phi ptr addrspace(7) [ %arg, %.loopexit ], [ %arg, %.preheader15 ]
br label %.loopexit
}