Adding device information print for AMD devices on the
`llvm-omp-device-info` command line tool. The output is inspired by
the rocminfo command line tool.
This commit adds missing HSA functions, enums and structs
needed to query additional information from the HSA agents.
A generic message for the `generic-elf-64bit` plugin is also added
Example of an output:
```
llvm-omp-device-info
Device (0):
This is a generic-elf-64bit device
Device (1):
This is a generic-elf-64bit device
Device (2):
This is a generic-elf-64bit device
Device (3):
This is a generic-elf-64bit device
Device (4):
HSA Runtime Version: 1.1
HSA OpenMP Device Number: 0
Device Name: gfx906
Vendor Name: AMD
Device Type: GPU
Max Queues: 128
Queue Min Size: 64
Queue Max Size: 131072
Cache:
L0: 16384 bytes
L1: 8388608 bytes
Cacheline Size: 64
Max Clock Freq(MHz): 1725
Compute Units: 60
SIMD per CU: 4
Fast F16 Operation: TRUE
Wavefront Size: 64
Workgroup Max Size: 1024
Workgroup Max Size per Dimension:
x: 1024
y: 1024
z: 1024
Max Waves Per CU: 40
Max Work-item Per CU: 2560
Grid Max Size: 4294967295
Grid Max Size per Dimension:
x: 4294967295
y: 4294967295
z: 4294967295
Max fbarriers/Workgrp: 32
Memory Pools:
Pool GLOBAL; FLAGS: COARSE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GLOBAL; FLAGS: FINE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GROUP:
Size: 65536 bytes
Allocatable: FALSE
Runtime Alloc Granule: 0 bytes
Runtime Alloc alignment: 0 bytes
Accessable by all: FALSE
Device (5):
HSA Runtime Version: 1.1
HSA OpenMP Device Number: 1
Device Name: gfx906
Vendor Name: AMD
Device Type: GPU
Max Queues: 128
Queue Min Size: 64
Queue Max Size: 131072
Cache:
L0: 16384 bytes
L1: 8388608 bytes
Cacheline Size: 64
Max Clock Freq(MHz): 1725
Compute Units: 60
SIMD per CU: 4
Fast F16 Operation: TRUE
Wavefront Size: 64
Workgroup Max Size: 1024
Workgroup Max Size per Dimension:
x: 1024
y: 1024
z: 1024
Max Waves Per CU: 40
Max Work-item Per CU: 2560
Grid Max Size: 4294967295
Grid Max Size per Dimension:
x: 4294967295
y: 4294967295
z: 4294967295
Max fbarriers/Workgrp: 32
Memory Pools:
Pool GLOBAL; FLAGS: COARSE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GLOBAL; FLAGS: FINE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GROUP:
Size: 65536 bytes
Allocatable: FALSE
Runtime Alloc Granule: 0 bytes
Runtime Alloc alignment: 0 bytes
Accessable by all: FALSE
Device (6):
HSA Runtime Version: 1.1
HSA OpenMP Device Number: 2
Device Name: gfx906
Vendor Name: AMD
Device Type: GPU
Max Queues: 128
Queue Min Size: 64
Queue Max Size: 131072
Cache:
L0: 16384 bytes
L1: 8388608 bytes
Cacheline Size: 64
Max Clock Freq(MHz): 1725
Compute Units: 60
SIMD per CU: 4
Fast F16 Operation: TRUE
Wavefront Size: 64
Workgroup Max Size: 1024
Workgroup Max Size per Dimension:
x: 1024
y: 1024
z: 1024
Max Waves Per CU: 40
Max Work-item Per CU: 2560
Grid Max Size: 4294967295
Grid Max Size per Dimension:
x: 4294967295
y: 4294967295
z: 4294967295
Max fbarriers/Workgrp: 32
Memory Pools:
Pool GLOBAL; FLAGS: COARSE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GLOBAL; FLAGS: FINE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GROUP:
Size: 65536 bytes
Allocatable: FALSE
Runtime Alloc Granule: 0 bytes
Runtime Alloc alignment: 0 bytes
Accessable by all: FALSE
Device (7):
HSA Runtime Version: 1.1
HSA OpenMP Device Number: 3
Device Name: gfx906
Vendor Name: AMD
Device Type: GPU
Max Queues: 128
Queue Min Size: 64
Queue Max Size: 131072
Cache:
L0: 16384 bytes
L1: 8388608 bytes
Cacheline Size: 64
Max Clock Freq(MHz): 1725
Compute Units: 60
SIMD per CU: 4
Fast F16 Operation: TRUE
Wavefront Size: 64
Workgroup Max Size: 1024
Workgroup Max Size per Dimension:
x: 1024
y: 1024
z: 1024
Max Waves Per CU: 40
Max Work-item Per CU: 2560
Grid Max Size: 4294967295
Grid Max Size per Dimension:
x: 4294967295
y: 4294967295
z: 4294967295
Max fbarriers/Workgrp: 32
Memory Pools:
Pool GLOBAL; FLAGS: COARSE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GLOBAL; FLAGS: FINE GRAINED, :
Size: 34342961152 bytes
Allocatable: TRUE
Runtime Alloc Granule: 4096 bytes
Runtime Alloc alignment: 4096 bytes
Accessable by all: FALSE
Pool GROUP:
Size: 65536 bytes
Allocatable: FALSE
Runtime Alloc Granule: 0 bytes
Runtime Alloc alignment: 0 bytes
Accessable by all: FALSE
```
Differential Revision: https://reviews.llvm.org/D126836
103 lines
2.9 KiB
C++
103 lines
2.9 KiB
C++
//===--- amdgpu/dynamic_hsa/hsa.cpp ------------------------------- C++ -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Implement subset of hsa api by calling into hsa library via dlopen
|
|
// Does the dlopen/dlsym calls as part of the call to hsa_init
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
#include "hsa.h"
|
|
#include "Debug.h"
|
|
#include "dlwrap.h"
|
|
#include "hsa_ext_amd.h"
|
|
|
|
#include <dlfcn.h>
|
|
|
|
DLWRAP_INITIALIZE();
|
|
|
|
DLWRAP_INTERNAL(hsa_init, 0);
|
|
|
|
DLWRAP(hsa_status_string, 2);
|
|
DLWRAP(hsa_shut_down, 0);
|
|
DLWRAP(hsa_system_get_info, 2);
|
|
DLWRAP(hsa_agent_get_info, 3);
|
|
DLWRAP(hsa_isa_get_info_alt, 3);
|
|
DLWRAP(hsa_iterate_agents, 2);
|
|
DLWRAP(hsa_agent_iterate_isas, 3);
|
|
DLWRAP(hsa_signal_create, 4);
|
|
DLWRAP(hsa_signal_destroy, 1);
|
|
DLWRAP(hsa_signal_store_relaxed, 2);
|
|
DLWRAP(hsa_signal_store_screlease, 2);
|
|
DLWRAP(hsa_signal_wait_scacquire, 5);
|
|
DLWRAP(hsa_queue_create, 8);
|
|
DLWRAP(hsa_queue_destroy, 1);
|
|
DLWRAP(hsa_queue_load_read_index_scacquire, 1);
|
|
DLWRAP(hsa_queue_add_write_index_relaxed, 2);
|
|
DLWRAP(hsa_memory_copy, 3);
|
|
DLWRAP(hsa_executable_create, 4);
|
|
DLWRAP(hsa_executable_destroy, 1);
|
|
DLWRAP(hsa_executable_freeze, 2);
|
|
DLWRAP(hsa_executable_symbol_get_info, 3);
|
|
DLWRAP(hsa_executable_iterate_symbols, 3);
|
|
DLWRAP(hsa_code_object_deserialize, 4);
|
|
DLWRAP(hsa_executable_load_code_object, 4);
|
|
DLWRAP(hsa_amd_agent_memory_pool_get_info, 4);
|
|
DLWRAP(hsa_amd_agent_iterate_memory_pools, 3);
|
|
DLWRAP(hsa_amd_memory_pool_allocate, 4);
|
|
DLWRAP(hsa_amd_memory_pool_free, 1);
|
|
DLWRAP(hsa_amd_memory_async_copy, 8);
|
|
DLWRAP(hsa_amd_memory_pool_get_info, 3);
|
|
DLWRAP(hsa_amd_agents_allow_access, 4);
|
|
DLWRAP(hsa_amd_memory_lock, 5);
|
|
DLWRAP(hsa_amd_memory_unlock, 1);
|
|
DLWRAP(hsa_amd_memory_fill, 3);
|
|
DLWRAP(hsa_amd_register_system_event_handler, 2);
|
|
|
|
DLWRAP_FINALIZE();
|
|
|
|
#ifndef DYNAMIC_HSA_PATH
|
|
#define DYNAMIC_HSA_PATH "libhsa-runtime64.so"
|
|
#endif
|
|
|
|
#ifndef TARGET_NAME
|
|
#error "Missing TARGET_NAME macro"
|
|
#endif
|
|
#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
|
|
|
|
static bool checkForHSA() {
|
|
// return true if dlopen succeeded and all functions found
|
|
|
|
const char *HsaLib = DYNAMIC_HSA_PATH;
|
|
void *DynlibHandle = dlopen(HsaLib, RTLD_NOW);
|
|
if (!DynlibHandle) {
|
|
DP("Unable to load library '%s': %s!\n", HsaLib, dlerror());
|
|
return false;
|
|
}
|
|
|
|
for (size_t I = 0; I < dlwrap::size(); I++) {
|
|
const char *Sym = dlwrap::symbol(I);
|
|
|
|
void *P = dlsym(DynlibHandle, Sym);
|
|
if (P == nullptr) {
|
|
DP("Unable to find '%s' in '%s'!\n", Sym, HsaLib);
|
|
return false;
|
|
}
|
|
DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
|
|
|
|
*dlwrap::pointer(I) = P;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
hsa_status_t hsa_init() {
|
|
if (!checkForHSA()) {
|
|
return HSA_STATUS_ERROR;
|
|
}
|
|
return dlwrap_hsa_init();
|
|
}
|