
This PR updates AMDGPULowerBufferFatPointers to use the InstSimplifyFolder when creating IR during buffer fat pointer lowering. This shouldn't cause any large functional changes and might improve the quality of the generated code.
118 lines
6.6 KiB
LLVM
118 lines
6.6 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
|
|
; RUN: opt -S -mcpu=gfx900 -amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
|
|
; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s
|
|
|
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
|
|
target triple = "amdgcn--"
|
|
|
|
define ptr addrspace(7) @recur.inner.1(ptr addrspace(7) %x, i32 %v) {
|
|
; CHECK-LABEL: define { ptr addrspace(8), i32 } @recur.inner.1
|
|
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[X:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
|
|
; CHECK-NEXT: bb:
|
|
; CHECK-NEXT: [[X_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[X]], 0
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[X]], 1
|
|
; CHECK-NEXT: [[ISBASE:%.*]] = icmp sgt i32 [[V]], 0
|
|
; CHECK-NEXT: br i1 [[ISBASE]], label [[RECUR:%.*]], label [[ELSE:%.*]]
|
|
; CHECK: recur:
|
|
; CHECK-NEXT: [[DEC:%.*]] = sub i32 [[V]], 1
|
|
; CHECK-NEXT: [[INC:%.*]] = call { ptr addrspace(8), i32 } @recur.inner.2(i32 [[DEC]], { ptr addrspace(8), i32 } [[X]])
|
|
; CHECK-NEXT: [[INC_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[INC]], 0
|
|
; CHECK-NEXT: [[INC_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[INC]], 1
|
|
; CHECK-NEXT: br label [[END:%.*]]
|
|
; CHECK: else:
|
|
; CHECK-NEXT: br label [[END]]
|
|
; CHECK: end:
|
|
; CHECK-NEXT: [[RET_RSRC:%.*]] = phi ptr addrspace(8) [ [[INC_RSRC]], [[RECUR]] ], [ [[X_RSRC]], [[ELSE]] ]
|
|
; CHECK-NEXT: [[RET_OFF:%.*]] = phi i32 [ [[INC_OFF]], [[RECUR]] ], [ [[X_OFF]], [[ELSE]] ]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[RET_RSRC]], 0
|
|
; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP0]], i32 [[RET_OFF]], 1
|
|
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]]
|
|
;
|
|
bb:
|
|
%isBase = icmp sgt i32 %v, 0
|
|
br i1 %isBase, label %recur, label %else
|
|
recur:
|
|
%dec = sub i32 %v, 1
|
|
%inc = call ptr addrspace(7) @recur.inner.2(i32 %dec, ptr addrspace(7) %x)
|
|
br label %end
|
|
else:
|
|
br label %end
|
|
end:
|
|
%ret = phi ptr addrspace(7) [%inc, %recur], [%x, %else]
|
|
ret ptr addrspace(7) %ret
|
|
}
|
|
|
|
define ptr addrspace(7) @recur.inner.2(i32 %v, ptr addrspace(7) %x) {
|
|
; CHECK-LABEL: define { ptr addrspace(8), i32 } @recur.inner.2
|
|
; CHECK-SAME: (i32 [[V:%.*]], { ptr addrspace(8), i32 } [[X:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[X_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[X]], 0
|
|
; CHECK-NEXT: [[X_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[X]], 1
|
|
; CHECK-NEXT: [[INC:%.*]] = add i32 [[X_OFF]], 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[X]], i32 [[INC]], 1
|
|
; CHECK-NEXT: [[RET:%.*]] = call { ptr addrspace(8), i32 } @recur.inner.1({ ptr addrspace(8), i32 } [[TMP1]], i32 [[V]])
|
|
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]]
|
|
;
|
|
%inc = getelementptr i32, ptr addrspace(7) %x, i32 1
|
|
%ret = call ptr addrspace(7) @recur.inner.1(ptr addrspace(7) %inc, i32 %v)
|
|
ret ptr addrspace(7) %ret
|
|
}
|
|
|
|
define void @recur.outer(ptr addrspace(7) %x, ptr %arg) {
|
|
; CHECK-LABEL: define void @recur.outer
|
|
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[X:%.*]], ptr [[ARG:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[BOUND:%.*]] = load i32, ptr [[ARG]], align 4
|
|
; CHECK-NEXT: [[RET:%.*]] = call { ptr addrspace(8), i32 } @recur.inner.1({ ptr addrspace(8), i32 } [[X]], i32 [[BOUND]])
|
|
; CHECK-NEXT: [[RET_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 0
|
|
; CHECK-NEXT: [[RET_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 1
|
|
; CHECK-NEXT: [[RET_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[RET_RSRC]] to i160
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i160 [[RET_INT_RSRC]], 32
|
|
; CHECK-NEXT: [[RET_INT_OFF:%.*]] = zext i32 [[RET_OFF]] to i160
|
|
; CHECK-NEXT: [[RET_INT:%.*]] = or i160 [[TMP1]], [[RET_INT_OFF]]
|
|
; CHECK-NEXT: store i160 [[RET_INT]], ptr [[ARG]], align 32
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%bound = load i32, ptr %arg
|
|
%ret = call ptr addrspace(7) @recur.inner.1(ptr addrspace(7) %x, i32 %bound)
|
|
store ptr addrspace(7) %ret, ptr %arg
|
|
ret void
|
|
}
|
|
|
|
declare ptr addrspace(7) @extern(ptr addrspace(7) %arg)
|
|
define void @caller(ptr addrspace(7) noundef nonnull %arg) {
|
|
; CHECK-LABEL: define void @caller
|
|
; CHECK-SAME: ({ ptr addrspace(8), i32 } noundef [[ARG:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ARG_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 0
|
|
; CHECK-NEXT: [[ARG_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 1
|
|
; CHECK-NEXT: [[V:%.*]] = call { ptr addrspace(8), i32 } @extern({ ptr addrspace(8), i32 } [[ARG]])
|
|
; CHECK-NEXT: [[V_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V]], 0
|
|
; CHECK-NEXT: [[V_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V]], 1
|
|
; CHECK-NEXT: [[V_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[V_RSRC]] to i160
|
|
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i160 [[V_INT_RSRC]], 32
|
|
; CHECK-NEXT: [[V_INT_OFF:%.*]] = zext i32 [[V_OFF]] to i160
|
|
; CHECK-NEXT: [[V_INT:%.*]] = or i160 [[TMP1]], [[V_INT_OFF]]
|
|
; CHECK-NEXT: [[V_INT_LEGAL:%.*]] = bitcast i160 [[V_INT]] to <5 x i32>
|
|
; CHECK-NEXT: [[V_INT_SLICE_0:%.*]] = shufflevector <5 x i32> [[V_INT_LEGAL]], <5 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[V_INT_SLICE_0]], ptr addrspace(8) align 32 [[ARG_RSRC]], i32 [[ARG_OFF]], i32 0, i32 0)
|
|
; CHECK-NEXT: [[ARG_PART_4:%.*]] = add nuw i32 [[ARG_OFF]], 16
|
|
; CHECK-NEXT: [[V_INT_SLICE_4:%.*]] = extractelement <5 x i32> [[V_INT_LEGAL]], i64 4
|
|
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[V_INT_SLICE_4]], ptr addrspace(8) align 16 [[ARG_RSRC]], i32 [[ARG_PART_4]], i32 0, i32 0)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%v = call ptr addrspace(7) @extern(ptr addrspace(7) %arg)
|
|
store ptr addrspace(7) %v, ptr addrspace(7) %arg
|
|
ret void
|
|
}
|
|
|
|
define internal noalias noundef nonnull ptr addrspace(7) @foo(ptr addrspace(7) noalias noundef nonnull %arg) {
|
|
; CHECK-LABEL: define internal noundef { ptr addrspace(8), i32 } @foo
|
|
; CHECK-SAME: ({ ptr addrspace(8), i32 } noundef [[ARG:%.*]]) #[[ATTR0]] {
|
|
; CHECK-NEXT: [[ARG_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 0
|
|
; CHECK-NEXT: [[ARG_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 1
|
|
; CHECK-NEXT: [[RET:%.*]] = add nuw i32 [[ARG_OFF]], 4
|
|
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[ARG]], i32 [[RET]], 1
|
|
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]]
|
|
;
|
|
%ret = getelementptr inbounds i32, ptr addrspace(7) %arg, i32 1
|
|
ret ptr addrspace(7) %ret
|
|
}
|