[libc] Efficiently implement 'realloc' for AMDGPU devices (#145960)
Summary: Now that we have `malloc` we can implement `realloc` efficiently. This uses the known chunk sizes to avoid unnecessary allocations. We just return nullptr for NVPTX. I'd remove the list for the entrypoint but then the libc++ code would stop working. When someone writes the NVPTX support this will be trivial.
This commit is contained in:
parent
d7e23bef6a
commit
10445acfa6
@ -22,6 +22,7 @@
|
||||
#include "src/__support/GPU/utils.h"
|
||||
#include "src/__support/RPC/rpc_client.h"
|
||||
#include "src/__support/threads/sleep.h"
|
||||
#include "src/string/memory_utils/inline_memcpy.h"
|
||||
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
@ -550,5 +551,26 @@ void deallocate(void *ptr) {
|
||||
release_slab(slab);
|
||||
}
|
||||
|
||||
void *reallocate(void *ptr, uint64_t size) {
|
||||
if (ptr == nullptr)
|
||||
return gpu::allocate(size);
|
||||
|
||||
// Non-slab allocations are considered foreign pointers so we fail.
|
||||
if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
|
||||
return nullptr;
|
||||
|
||||
// The original slab pointer is the 2MiB boundary using the given pointer.
|
||||
Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
|
||||
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
|
||||
if (slab->get_chunk_size() >= size)
|
||||
return ptr;
|
||||
|
||||
// If we need a new chunk we reallocate and copy it over.
|
||||
void *new_ptr = gpu::allocate(size);
|
||||
inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
|
||||
gpu::deallocate(ptr);
|
||||
return new_ptr;
|
||||
}
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
@ -17,6 +17,7 @@ namespace gpu {
|
||||
|
||||
void *allocate(uint64_t size);
|
||||
void deallocate(void *ptr);
|
||||
void *reallocate(void *ptr, uint64_t size);
|
||||
|
||||
} // namespace gpu
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
@ -16,17 +16,15 @@
|
||||
namespace LIBC_NAMESPACE_DECL {
|
||||
|
||||
LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
|
||||
if (ptr == nullptr)
|
||||
return gpu::allocate(size);
|
||||
|
||||
void *newmem = gpu::allocate(size);
|
||||
if (newmem == nullptr)
|
||||
return nullptr;
|
||||
|
||||
// This will copy garbage if it goes beyond the old allocation size.
|
||||
inline_memcpy(newmem, ptr, size);
|
||||
gpu::deallocate(ptr);
|
||||
return newmem;
|
||||
// FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot
|
||||
// reason with. But we still need to provide this function for compatibility.
|
||||
#ifndef LIBC_TARGET_ARCH_IS_NVPTX
|
||||
return gpu::reallocate(ptr, size);
|
||||
#else
|
||||
(void)ptr;
|
||||
(void)size;
|
||||
return nullptr;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace LIBC_NAMESPACE_DECL
|
||||
|
@ -17,6 +17,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
|
||||
--blocks 1024
|
||||
)
|
||||
|
||||
add_integration_test(
|
||||
realloc
|
||||
SUITE
|
||||
stdlib-gpu-integration-tests
|
||||
SRCS
|
||||
realloc.cpp
|
||||
DEPENDS
|
||||
libc.src.stdlib.malloc
|
||||
libc.src.stdlib.free
|
||||
libc.src.stdlib.realloc
|
||||
LOADER_ARGS
|
||||
--threads 256
|
||||
--blocks 1024
|
||||
)
|
||||
|
||||
add_integration_test(
|
||||
malloc_stress
|
||||
SUITE
|
||||
|
44
libc/test/integration/src/stdlib/gpu/realloc.cpp
Normal file
44
libc/test/integration/src/stdlib/gpu/realloc.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
#include "test/IntegrationTest/test.h"
|
||||
|
||||
#include "src/__support/GPU/utils.h"
|
||||
#include "src/stdlib/free.h"
|
||||
#include "src/stdlib/malloc.h"
|
||||
#include "src/stdlib/realloc.h"
|
||||
|
||||
using namespace LIBC_NAMESPACE;
|
||||
|
||||
TEST_MAIN(int, char **, char **) {
|
||||
// realloc(nullptr, size) is equivalent to malloc.
|
||||
int *alloc = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(nullptr, 32));
|
||||
EXPECT_NE(alloc, nullptr);
|
||||
*alloc = 42;
|
||||
EXPECT_EQ(*alloc, 42);
|
||||
|
||||
// realloc to same size returns the same pointer.
|
||||
void *same = LIBC_NAMESPACE::realloc(alloc, 32);
|
||||
EXPECT_EQ(same, alloc);
|
||||
EXPECT_EQ(reinterpret_cast<int *>(same)[0], 42);
|
||||
|
||||
// realloc to smaller size returns same pointer.
|
||||
void *smaller = LIBC_NAMESPACE::realloc(same, 16);
|
||||
EXPECT_EQ(smaller, alloc);
|
||||
EXPECT_EQ(reinterpret_cast<int *>(smaller)[0], 42);
|
||||
|
||||
// realloc to larger size returns new pointer and preserves contents.
|
||||
int *larger = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(smaller, 128));
|
||||
EXPECT_NE(larger, nullptr);
|
||||
EXPECT_EQ(larger[0], 42);
|
||||
|
||||
// realloc works when called with a divergent size.
|
||||
int *div = reinterpret_cast<int *>(
|
||||
LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
|
||||
EXPECT_NE(div, nullptr);
|
||||
div[0] = static_cast<int>(gpu::get_thread_id());
|
||||
int *div_realloc = reinterpret_cast<int *>(
|
||||
LIBC_NAMESPACE::realloc(div, ((gpu::get_thread_id() + 1) * 32)));
|
||||
EXPECT_NE(div_realloc, nullptr);
|
||||
EXPECT_EQ(div_realloc[0], static_cast<int>(gpu::get_thread_id()));
|
||||
LIBC_NAMESPACE::free(div_realloc);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user