Fixes https://github.com/iree-org/iree/issues/22001 The visitor in SplitPtrStructs would re-visit instructions if an instruction earlier in program order caused a recursive visit() call via getPtrParts(). This would cause instructions to be processed multiple times. As a consequence of this, PHI nodes could be added to the Conditionals array multiple times, which would to a conditinoal that was already simplified being processed multiple times. After the code moved to InstSimplifyFolder, this re-processing, combined with more agressive simplifications, would lead to an attempt to replace an instruction with itself, causing an assertion failure and crash. This commit resolves the issue and adds the reduced form of the crashing input as a test.
484 lines
22 KiB
LLVM
484 lines
22 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
|
|
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
|
|
|
|
target triple = "amdgcn--"
|
|
|
|
;; This should optimize to just the offset part
|
|
define float @sum(ptr addrspace(8) %buf, i32 %len) {
|
|
; CHECK-LABEL: define float @sum
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
br i1 %test, label %loop, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; But this should not
|
|
define float @sum_integer_ops(ptr addrspace(8) %buf, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_integer_ops
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32
|
|
; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160
|
|
; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]]
|
|
; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128
|
|
; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8)
|
|
; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160
|
|
%ptr.int = add i160 %ptr.prev.int, 4
|
|
%ptr = inttoptr i160 %ptr.int to ptr addrspace(7)
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
br i1 %test, label %loop, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; Should go to offsets only
|
|
define float @sum_2d(ptr addrspace(8) %buf, i32 %ii, i32 %jj) {
|
|
; CHECK-LABEL: define float @sum_2d
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[II:%.*]], i32 [[JJ:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP1_ENTRY:%.*]]
|
|
; CHECK: loop1.entry:
|
|
; CHECK-NEXT: [[SUM1_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP1_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR1_PREV_OFF:%.*]] = phi i32 [ [[PTR1:%.*]], [[LOOP1_EXIT]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: br label [[LOOP2:%.*]]
|
|
; CHECK: loop2:
|
|
; CHECK-NEXT: [[SUM2_PREV:%.*]] = phi float [ [[SUM]], [[LOOP2]] ], [ [[SUM1_PREV]], [[LOOP1_ENTRY]] ]
|
|
; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[LOOP2]] ], [ 0, [[LOOP1_ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR2_PREV_OFF:%.*]] = phi i32 [ [[PTR2:%.*]], [[LOOP2]] ], [ [[PTR1_PREV_OFF]], [[LOOP1_ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR2_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM2_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[PTR2]] = add i32 [[PTR2_PREV_OFF]], 4
|
|
; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1
|
|
; CHECK-NEXT: [[TEST2:%.*]] = icmp ult i32 [[J_NEXT]], [[JJ]]
|
|
; CHECK-NEXT: br i1 [[TEST2]], label [[LOOP2]], label [[LOOP1_EXIT]]
|
|
; CHECK: loop1.exit:
|
|
; CHECK-NEXT: [[PTR1]] = add i32 [[PTR2]], 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST1:%.*]] = icmp ult i32 [[I_NEXT]], [[II]]
|
|
; CHECK-NEXT: br i1 [[TEST1]], label [[LOOP1_ENTRY]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop1.entry
|
|
loop1.entry:
|
|
%sum1.prev = phi float [ %sum, %loop1.exit ], [ 0.0, %entry ]
|
|
%ptr1.prev = phi ptr addrspace(7) [ %ptr1, %loop1.exit ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop1.exit ], [ 0, %entry ]
|
|
|
|
br label %loop2
|
|
loop2:
|
|
%sum2.prev = phi float [ %sum, %loop2 ], [ %sum1.prev, %loop1.entry ]
|
|
%ptr2.prev = phi ptr addrspace(7) [ %ptr2, %loop2 ], [ %ptr1.prev, %loop1.entry ]
|
|
%j = phi i32 [ %j.next, %loop2 ], [ 0, %loop1.entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr2.prev
|
|
%sum = fadd float %sum2.prev, %val
|
|
|
|
%ptr2 = getelementptr float, ptr addrspace(7) %ptr2.prev, i32 1
|
|
%j.next = add i32 %j, 1
|
|
%test2 = icmp ult i32 %j.next, %jj
|
|
|
|
br i1 %test2, label %loop2, label %loop1.exit
|
|
loop1.exit:
|
|
%ptr1 = getelementptr float, ptr addrspace(7) %ptr2, i32 1
|
|
%i.next = add i32 %i, 1
|
|
%test1 = icmp ult i32 %i.next, %ii
|
|
br i1 %test1, label %loop1.entry, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; This should optimize to just the offset parts since all the arguments to the
|
|
;; select point to the same buffer.
|
|
define float @sum_jump_on_negative(ptr addrspace(8) %buf, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_jump_on_negative
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
|
|
; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4
|
|
; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8
|
|
; CHECK-NEXT: [[PTR_OFF]] = select i1 [[SKIP_NEXT]], i32 [[LARGE_JUMP]], i32 [[SMALL_JUMP]]
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%skip.next = fcmp olt float %val, 0.0
|
|
%small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
|
|
%large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2
|
|
%ptr = select i1 %skip.next, ptr addrspace(7) %large.jump, ptr addrspace(7) %small.jump
|
|
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
br i1 %test, label %loop, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
define float @sum_jump_on_negative_with_phi(ptr addrspace(8) %buf, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_jump_on_negative_with_phi
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: [[SKIP_NEXT:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
|
|
; CHECK-NEXT: br i1 [[SKIP_NEXT]], label [[THEN:%.*]], label [[ELSE:%.*]]
|
|
; CHECK: then:
|
|
; CHECK-NEXT: [[LARGE_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 8
|
|
; CHECK-NEXT: br label [[LOOP_EXIT]]
|
|
; CHECK: else:
|
|
; CHECK-NEXT: [[SMALL_JUMP:%.*]] = add i32 [[PTR_PREV_OFF]], 4
|
|
; CHECK-NEXT: br label [[LOOP_EXIT]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[LARGE_JUMP]], [[THEN]] ], [ [[SMALL_JUMP]], [[ELSE]] ]
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
|
|
%skip.next = fcmp olt float %val, 0.0
|
|
br i1 %skip.next, label %then, label %else
|
|
then:
|
|
%large.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 2
|
|
br label %loop.exit
|
|
else:
|
|
%small.jump = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
|
|
br label %loop.exit
|
|
loop.exit:
|
|
%ptr = phi ptr addrspace(7) [ %large.jump, %then ], [ %small.jump, %else ]
|
|
br i1 %test, label %loop, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; But this has a shifting resource part.
|
|
define float @sum_new_buffer_on_negative(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_new_buffer_on_negative
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF1]], [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
|
|
; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4
|
|
; CHECK-NEXT: [[PTR_RSRC]] = select i1 [[HOP]], ptr addrspace(8) [[PTR_PREV_RSRC]], ptr addrspace(8) [[BUF2]]
|
|
; CHECK-NEXT: [[PTR_OFF]] = select i1 [[HOP]], i32 [[THIS_NEXT]], i32 0
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7)
|
|
%start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%hop = fcmp olt float %val, 0.0
|
|
%this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
|
|
%ptr = select i1 %hop, ptr addrspace(7) %this.next, ptr addrspace(7) %start2
|
|
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
br i1 %test, label %loop, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; As does this.
|
|
define float @sum_new_buffer_on_negative_with_phi(ptr addrspace(8) %buf1, ptr addrspace(8) %buf2, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_new_buffer_on_negative_with_phi
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF1:%.*]], ptr addrspace(8) [[BUF2:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP_EXIT:%.*]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP_EXIT]] ], [ [[BUF1]], [[ENTRY]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP_EXIT]] ], [ 0, [[ENTRY]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: [[HOP:%.*]] = fcmp olt float [[VAL]], 0.000000e+00
|
|
; CHECK-NEXT: br i1 [[HOP]], label [[THEN:%.*]], label [[LOOP_EXIT]]
|
|
; CHECK: then:
|
|
; CHECK-NEXT: [[THIS_NEXT:%.*]] = add i32 [[PTR_PREV_OFF]], 4
|
|
; CHECK-NEXT: br label [[LOOP_EXIT]]
|
|
; CHECK: loop.exit:
|
|
; CHECK-NEXT: [[PTR_RSRC]] = phi ptr addrspace(8) [ [[PTR_PREV_RSRC]], [[THEN]] ], [ [[BUF2]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[PTR_OFF]] = phi i32 [ [[THIS_NEXT]], [[THEN]] ], [ 0, [[LOOP]] ]
|
|
; CHECK-NEXT: br i1 [[TEST]], label [[LOOP]], label [[EXIT:%.*]]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf1 to ptr addrspace(7)
|
|
%start2 = addrspacecast ptr addrspace(8) %buf2 to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop.exit ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop.exit ], [ %start, %entry ]
|
|
%i = phi i32 [ %i.next, %loop.exit ], [ 0, %entry ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
%hop = fcmp olt float %val, 0.0
|
|
br i1 %hop, label %then, label %loop.exit
|
|
then:
|
|
%this.next = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
|
|
br label %loop.exit
|
|
loop.exit:
|
|
%ptr = phi ptr addrspace(7) [ %this.next, %then ], [ %start2, %loop ]
|
|
br i1 %test, label %loop, label %exit
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; Test that the uniform buffer descriptor optimization works correctly for phi
|
|
;; nodes that repeat the same predecessor multiple times.
|
|
define float @sum_duplicate_preds(ptr addrspace(8) %buf, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_duplicate_preds
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[PTR]] = add i32 [[PTR_PREV_OFF]], 4
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32
|
|
; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [
|
|
; CHECK-NEXT: i32 1, label [[LOOP]]
|
|
; CHECK-NEXT: i32 0, label [[EXIT:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ]
|
|
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%ptr = getelementptr float, ptr addrspace(7) %ptr.prev, i32 1
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
%test.ext = zext i1 %test to i32
|
|
switch i32 %test.ext, label %loop [
|
|
i32 1, label %loop
|
|
i32 0, label %exit
|
|
]
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
;; And similirly check the "might not be uniform" case.
|
|
define float @sum_integer_ops_duplicate_preds(ptr addrspace(8) %buf, i32 %len) {
|
|
; CHECK-LABEL: define float @sum_integer_ops_duplicate_preds
|
|
; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[LEN:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: entry:
|
|
; CHECK-NEXT: br label [[LOOP:%.*]]
|
|
; CHECK: loop:
|
|
; CHECK-NEXT: [[SUM_PREV:%.*]] = phi float [ [[SUM:%.*]], [[LOOP]] ], [ [[SUM]], [[LOOP]] ], [ 0.000000e+00, [[ENTRY:%.*]] ]
|
|
; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[I_NEXT]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_RSRC:%.*]] = phi ptr addrspace(8) [ [[PTR_RSRC:%.*]], [[LOOP]] ], [ [[BUF]], [[ENTRY]] ], [ [[PTR_RSRC]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[PTR_PREV_OFF:%.*]] = phi i32 [ [[PTR_OFF:%.*]], [[LOOP]] ], [ 0, [[ENTRY]] ], [ [[PTR_OFF]], [[LOOP]] ]
|
|
; CHECK-NEXT: [[VAL:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[PTR_PREV_RSRC]], i32 [[PTR_PREV_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[SUM]] = fadd float [[SUM_PREV]], [[VAL]]
|
|
; CHECK-NEXT: [[PTR_PREV_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[PTR_PREV_RSRC]] to i160
|
|
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw i160 [[PTR_PREV_INT_RSRC]], 32
|
|
; CHECK-NEXT: [[PTR_PREV_INT_OFF:%.*]] = zext i32 [[PTR_PREV_OFF]] to i160
|
|
; CHECK-NEXT: [[PTR_PREV_INT:%.*]] = or i160 [[TMP0]], [[PTR_PREV_INT_OFF]]
|
|
; CHECK-NEXT: [[PTR_INT:%.*]] = add i160 [[PTR_PREV_INT]], 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[PTR_INT]], 32
|
|
; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128
|
|
; CHECK-NEXT: [[PTR_RSRC]] = inttoptr i128 [[TMP2]] to ptr addrspace(8)
|
|
; CHECK-NEXT: [[PTR_OFF]] = trunc i160 [[PTR_INT]] to i32
|
|
; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1
|
|
; CHECK-NEXT: [[TEST:%.*]] = icmp ult i32 [[I_NEXT]], [[LEN]]
|
|
; CHECK-NEXT: [[TEST_EXT:%.*]] = zext i1 [[TEST]] to i32
|
|
; CHECK-NEXT: switch i32 [[TEST_EXT]], label [[LOOP]] [
|
|
; CHECK-NEXT: i32 1, label [[LOOP]]
|
|
; CHECK-NEXT: i32 0, label [[EXIT:%.*]]
|
|
; CHECK-NEXT: ]
|
|
; CHECK: exit:
|
|
; CHECK-NEXT: ret float [[SUM]]
|
|
;
|
|
entry:
|
|
%start = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7)
|
|
br label %loop
|
|
loop:
|
|
%sum.prev = phi float [ %sum, %loop ], [ %sum, %loop ], [ 0.0, %entry ]
|
|
%ptr.prev = phi ptr addrspace(7) [ %ptr, %loop ], [ %start, %entry ], [ %ptr, %loop ]
|
|
%i = phi i32 [ %i.next, %loop ], [ 0, %entry ], [ %i.next, %loop ]
|
|
|
|
%val = load float, ptr addrspace(7) %ptr.prev
|
|
%sum = fadd float %sum.prev, %val
|
|
|
|
%ptr.prev.int = ptrtoint ptr addrspace(7) %ptr.prev to i160
|
|
%ptr.int = add i160 %ptr.prev.int, 4
|
|
%ptr = inttoptr i160 %ptr.int to ptr addrspace(7)
|
|
%i.next = add i32 %i, 1
|
|
%test = icmp ult i32 %i.next, %len
|
|
%test.ext = zext i1 %test to i32
|
|
switch i32 %test.ext, label %loop [
|
|
i32 1, label %loop
|
|
i32 0, label %exit
|
|
]
|
|
exit:
|
|
ret float %sum
|
|
}
|
|
|
|
define void @dominance_not_in_program_order(ptr addrspace(7) inreg %arg) {
|
|
; CHECK-LABEL: define void @dominance_not_in_program_order
|
|
; CHECK-SAME: ({ ptr addrspace(8), i32 } inreg [[ARG:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: .preheader15:
|
|
; CHECK-NEXT: [[ARG_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 0
|
|
; CHECK-NEXT: [[ARG_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 1
|
|
; CHECK-NEXT: br label [[DOTLR_PH18:%.*]]
|
|
; CHECK: .loopexit:
|
|
; CHECK-NEXT: [[SCEVGEP12:%.*]] = add i32 [[LSR_IV11_OFF:%.*]], 16
|
|
; CHECK-NEXT: br label [[DOTLR_PH18]]
|
|
; CHECK: .lr.ph18:
|
|
; CHECK-NEXT: [[LSR_IV11_OFF]] = phi i32 [ [[ARG_OFF]], [[DOTLOOPEXIT:%.*]] ], [ [[ARG_OFF]], [[DOTPREHEADER15:%.*]] ]
|
|
; CHECK-NEXT: br label [[DOTLOOPEXIT]]
|
|
;
|
|
.preheader15:
|
|
br label %.lr.ph18
|
|
|
|
.loopexit: ; preds = %.lr.ph18
|
|
%scevgep12 = getelementptr i8, ptr addrspace(7) %lsr.iv11, i32 16
|
|
br label %.lr.ph18
|
|
|
|
.lr.ph18: ; preds = %.loopexit, %.preheader15
|
|
%lsr.iv11 = phi ptr addrspace(7) [ %arg, %.loopexit ], [ %arg, %.preheader15 ]
|
|
br label %.loopexit
|
|
}
|