
This (mostly) removes one of the largest remaining limitations of `hipstdpar` based algorithm acceleration, by adding support for global variable usage in offloaded algorithms. It is mean to compose with a run time component that will live in the support library, and fires iff a special variable is provided by the latter. In short, things work as follows: - We replace uses some global `G` with an indirect access via an implicitly created anonymous global `F`, which is of pointer type and is expected to hold the program-wide address of `G`; - We append 'F', alongside 'G''s name, to an table structure; - At run-time, the support library uses the table to look-up the program-wide address of a contained symbol based on its name, and then stores the address via the paired pointer. This doesn't handle internal linkage symbols (`static foo` or `namespace { foo }`) if they are not unique i.e. if there's a name clash that is solved by the linker, as the resolution would not be visible. Also, initially we will only support "true" globals in RDC mode. Things would be much simpler if we had direct access to the accelerator loader, but since the expectation is to compose at the HIP RT level we have to jump through additional hoops.
111 lines
6.1 KiB
LLVM
111 lines
6.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --check-globals all --version 5
|
|
; REQUIRES: amdgpu-registered-target
|
|
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=hipstdpar-select-accelerator-code \
|
|
; RUN: %s | FileCheck %s
|
|
|
|
%class.anon = type { i64, ptr, %struct.anon.1 }
|
|
%struct.anon.1 = type { ptr, ptr }
|
|
%struct.A = type { i32, i32, i32, i32, i32, double, [205 x double], [2000 x i32], [52000 x i32], [156000 x double], [14823 x double] }
|
|
|
|
@do_not_indirect = protected addrspace(4) externally_initialized constant [4 x double] [double 1.000000e+00, double 1.000000e+00, double 2.000000e+00, double 6.000000e+00], align 16
|
|
@a = external hidden local_unnamed_addr addrspace(1) global %struct.A, align 8
|
|
@b = external hidden local_unnamed_addr addrspace(1) global ptr, align 8
|
|
@c = internal addrspace(1) global { i32 } zeroinitializer, align 4
|
|
@d = external hidden local_unnamed_addr addrspace(1) global ptr addrspace(1), align 8
|
|
@__hipstdpar_symbol_indirection_table = weak_odr protected addrspace(4) externally_initialized constant %class.anon zeroinitializer, align 8
|
|
|
|
declare i64 @fn(i64 %x, i32 %y, i64 %z, i64 %w)
|
|
|
|
;.
|
|
; CHECK: @do_not_indirect = protected addrspace(4) externally_initialized constant [4 x double] [double 1.000000e+00, double 1.000000e+00, double 2.000000e+00, double 6.000000e+00], align 16
|
|
; CHECK: @[[GLOB0:[0-9]+]] = private addrspace(1) constant [2 x i8] c"a\00"
|
|
; CHECK: @[[GLOB1:[0-9]+]] = private addrspace(1) externally_initialized constant ptr addrspace(1) poison
|
|
; CHECK: @[[GLOB2:[0-9]+]] = private addrspace(1) constant [2 x i8] c"b\00"
|
|
; CHECK: @[[GLOB3:[0-9]+]] = private addrspace(1) externally_initialized constant ptr addrspace(1) poison
|
|
; CHECK: @[[GLOB4:[0-9]+]] = private addrspace(1) constant [2 x i8] c"c\00"
|
|
; CHECK: @[[GLOB5:[0-9]+]] = private addrspace(1) externally_initialized constant ptr addrspace(1) poison
|
|
; CHECK: @[[GLOB6:[0-9]+]] = private addrspace(1) constant [2 x i8] c"d\00"
|
|
; CHECK: @[[GLOB7:[0-9]+]] = private addrspace(1) externally_initialized constant ptr addrspace(1) poison
|
|
; CHECK: @[[GLOB8:[0-9]+]] = private addrspace(1) constant [4 x %struct.anon.1] [%struct.anon.1 { ptr addrspacecast (ptr addrspace(1) @[[GLOB0]] to ptr), ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr) }, %struct.anon.1 { ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), ptr addrspacecast (ptr addrspace(1) @[[GLOB3]] to ptr) }, %struct.anon.1 { ptr addrspacecast (ptr addrspace(1) @[[GLOB4]] to ptr), ptr addrspacecast (ptr addrspace(1) @[[GLOB5]] to ptr) }, %struct.anon.1 { ptr addrspacecast (ptr addrspace(1) @[[GLOB6]] to ptr), ptr addrspacecast (ptr addrspace(1) @[[GLOB7]] to ptr) }]
|
|
; CHECK: @__hipstdpar_symbol_indirection_table = weak_odr protected addrspace(4) externally_initialized constant %class.anon { i64 4, ptr addrspacecast (ptr addrspace(1) @[[GLOB8]] to ptr), %struct.anon.1 poison }, align 8
|
|
;.
|
|
define double @gep(i64 %idx) {
|
|
; CHECK-LABEL: define double @gep(
|
|
; CHECK-SAME: i64 [[IDX:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(1) @[[GLOB1]], align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(1) [[TMP0]], i64 217672
|
|
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [156000 x double], ptr addrspace(1) [[TMP1]], i64 0, i64 [[IDX]]
|
|
; CHECK-NEXT: [[R:%.*]] = load double, ptr addrspace(1) [[ARRAYIDX]], align 8
|
|
; CHECK-NEXT: ret double [[R]]
|
|
;
|
|
entry:
|
|
%arrayidx = getelementptr inbounds [156000 x double], ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @a, i64 217672), i64 0, i64 %idx
|
|
%r = load double, ptr addrspace(1) %arrayidx, align 8
|
|
ret double %r
|
|
}
|
|
|
|
define void @store(ptr %p) {
|
|
; CHECK-LABEL: define void @store(
|
|
; CHECK-SAME: ptr [[P:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(1) @[[GLOB3]], align 8
|
|
; CHECK-NEXT: store ptr [[P]], ptr addrspace(1) [[TMP0]], align 8
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
store ptr %p, ptr addrspace(1) @b, align 8
|
|
ret void
|
|
}
|
|
|
|
define i64 @chain(i64 %x, i32 %y, i64 %z) {
|
|
; CHECK-LABEL: define i64 @chain(
|
|
; CHECK-SAME: i64 [[X:%.*]], i32 [[Y:%.*]], i64 [[Z:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(1) @[[GLOB5]], align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr
|
|
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
|
|
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @fn(i64 [[X]], i32 [[Y]], i64 [[TMP2]], i64 [[Z]])
|
|
; CHECK-NEXT: ret i64 [[TMP3]]
|
|
;
|
|
entry:
|
|
%0 = call i64 @fn(i64 %x, i32 %y, i64 ptrtoint (ptr addrspacecast (ptr addrspace(1) @c to ptr) to i64), i64 %z)
|
|
ret i64 %0
|
|
}
|
|
|
|
define void @direct(ptr %p, i64 %n) {
|
|
; CHECK-LABEL: define void @direct(
|
|
; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(1) @[[GLOB7]], align 8
|
|
; CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP0]], align 8
|
|
; CHECK-NEXT: tail call void @llvm.memcpy.p0.p1.i64(ptr align 4 [[P]], ptr addrspace(1) align 4 [[TMP1]], i64 [[N]], i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
%0 = load ptr addrspace(1), ptr addrspace(1) @d, align 8
|
|
tail call void @llvm.memcpy.p0.p1.i64(ptr align 4 %p, ptr addrspace(1) align 4 %0, i64 %n, i1 false)
|
|
ret void
|
|
}
|
|
|
|
define amdgpu_kernel void @ensure_reachable(ptr %p, i64 %idx, i64 %x, i32 %y, i64 %z) {
|
|
; CHECK-LABEL: define amdgpu_kernel void @ensure_reachable(
|
|
; CHECK-SAME: ptr [[P:%.*]], i64 [[IDX:%.*]], i64 [[X:%.*]], i32 [[Y:%.*]], i64 [[Z:%.*]]) {
|
|
; CHECK-NEXT: [[ENTRY:.*:]]
|
|
; CHECK-NEXT: call void @store(ptr [[P]])
|
|
; CHECK-NEXT: [[TMP0:%.*]] = call double @gep(i64 [[IDX]])
|
|
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @chain(i64 [[X]], i32 [[Y]], i64 [[Z]])
|
|
; CHECK-NEXT: call void @direct(ptr [[P]], i64 [[X]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
entry:
|
|
call void @store(ptr %p)
|
|
%0 = call double @gep(i64 %idx)
|
|
%1 = call i64 @chain(i64 %x, i32 %y, i64 %z)
|
|
call void @direct(ptr %p, i64 %x)
|
|
ret void
|
|
}
|
|
;.
|
|
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
|
|
;.
|