[libc] Efficiently implement 'realloc' for AMDGPU devices (#145960)

Summary:
Now that we have `malloc` we can implement `realloc` efficiently. This
uses the known chunk sizes to avoid unnecessary allocations. We just
return nullptr for NVPTX. I'd remove the list for the entrypoint but
then the libc++ code would stop working. When someone writes the NVPTX
support this will be trivial.
This commit is contained in:
Joseph Huber 2025-06-30 08:39:40 -05:00 committed by GitHub
parent d7e23bef6a
commit 10445acfa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 91 additions and 11 deletions

View File

@ -22,6 +22,7 @@
#include "src/__support/GPU/utils.h"
#include "src/__support/RPC/rpc_client.h"
#include "src/__support/threads/sleep.h"
#include "src/string/memory_utils/inline_memcpy.h"
namespace LIBC_NAMESPACE_DECL {
@ -550,5 +551,26 @@ void deallocate(void *ptr) {
release_slab(slab);
}
void *reallocate(void *ptr, uint64_t size) {
if (ptr == nullptr)
return gpu::allocate(size);
// Non-slab allocations are considered foreign pointers so we fail.
if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
return nullptr;
// The original slab pointer is the 2MiB boundary using the given pointer.
Slab *slab = cpp::launder(reinterpret_cast<Slab *>(
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT)));
if (slab->get_chunk_size() >= size)
return ptr;
// If we need a new chunk we reallocate and copy it over.
void *new_ptr = gpu::allocate(size);
inline_memcpy(new_ptr, ptr, slab->get_chunk_size());
gpu::deallocate(ptr);
return new_ptr;
}
} // namespace gpu
} // namespace LIBC_NAMESPACE_DECL

View File

@ -17,6 +17,7 @@ namespace gpu {
void *allocate(uint64_t size);
void deallocate(void *ptr);
void *reallocate(void *ptr, uint64_t size);
} // namespace gpu
} // namespace LIBC_NAMESPACE_DECL

View File

@ -16,17 +16,15 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) {
if (ptr == nullptr)
return gpu::allocate(size);
void *newmem = gpu::allocate(size);
if (newmem == nullptr)
return nullptr;
// This will copy garbage if it goes beyond the old allocation size.
inline_memcpy(newmem, ptr, size);
gpu::deallocate(ptr);
return newmem;
// FIXME: NVIDIA targets currently use the built-in 'malloc' which we cannot
// reason with. But we still need to provide this function for compatibility.
#ifndef LIBC_TARGET_ARCH_IS_NVPTX
return gpu::reallocate(ptr, size);
#else
(void)ptr;
(void)size;
return nullptr;
#endif
}
} // namespace LIBC_NAMESPACE_DECL

View File

@ -17,6 +17,21 @@ if(NOT LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
--blocks 1024
)
add_integration_test(
realloc
SUITE
stdlib-gpu-integration-tests
SRCS
realloc.cpp
DEPENDS
libc.src.stdlib.malloc
libc.src.stdlib.free
libc.src.stdlib.realloc
LOADER_ARGS
--threads 256
--blocks 1024
)
add_integration_test(
malloc_stress
SUITE

View File

@ -0,0 +1,44 @@
#include "test/IntegrationTest/test.h"
#include "src/__support/GPU/utils.h"
#include "src/stdlib/free.h"
#include "src/stdlib/malloc.h"
#include "src/stdlib/realloc.h"
using namespace LIBC_NAMESPACE;
TEST_MAIN(int, char **, char **) {
// realloc(nullptr, size) is equivalent to malloc.
int *alloc = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(nullptr, 32));
EXPECT_NE(alloc, nullptr);
*alloc = 42;
EXPECT_EQ(*alloc, 42);
// realloc to same size returns the same pointer.
void *same = LIBC_NAMESPACE::realloc(alloc, 32);
EXPECT_EQ(same, alloc);
EXPECT_EQ(reinterpret_cast<int *>(same)[0], 42);
// realloc to smaller size returns same pointer.
void *smaller = LIBC_NAMESPACE::realloc(same, 16);
EXPECT_EQ(smaller, alloc);
EXPECT_EQ(reinterpret_cast<int *>(smaller)[0], 42);
// realloc to larger size returns new pointer and preserves contents.
int *larger = reinterpret_cast<int *>(LIBC_NAMESPACE::realloc(smaller, 128));
EXPECT_NE(larger, nullptr);
EXPECT_EQ(larger[0], 42);
// realloc works when called with a divergent size.
int *div = reinterpret_cast<int *>(
LIBC_NAMESPACE::malloc((gpu::get_thread_id() + 1) * 16));
EXPECT_NE(div, nullptr);
div[0] = static_cast<int>(gpu::get_thread_id());
int *div_realloc = reinterpret_cast<int *>(
LIBC_NAMESPACE::realloc(div, ((gpu::get_thread_id() + 1) * 32)));
EXPECT_NE(div_realloc, nullptr);
EXPECT_EQ(div_realloc[0], static_cast<int>(gpu::get_thread_id()));
LIBC_NAMESPACE::free(div_realloc);
return 0;
}