[libc] Enable wide-read memory operations by default on Linux (#154602)

Summary:
This patch changes the linux build to use the wide reads on the memory
operations by default. These memory functions will now potentially read
outside of the bounds explicitly allowed by the current function. While
technically undefined behavior in the standard, plenty of C library
implementations do this. it will not cause a segmentation fault on linux
as long as you do not cross a page boundary, and because we are only
*reading* memory it should not have atomic effects.
This commit is contained in:
Joseph Huber 2025-08-20 17:17:12 -05:00 committed by GitHub
parent ac8f0bb070
commit c80d1483c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 25 additions and 14 deletions

View File

@ -0,0 +1,7 @@
{
"string": {
"LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
"value": true
}
}
}

View File

@ -17,14 +17,15 @@
namespace LIBC_NAMESPACE_DECL {
namespace neon {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
const Vector *block_ptr =
reinterpret_cast<const Vector *>(src - misalign_bytes);
Vector v = *block_ptr;
Vector vcmp = vceqz_u8(v);
uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp);
uint64_t cmp = vget_lane_u64(cmp_mask, 0);
cmp = cmp >> (misalign_bytes << 3);
if (cmp)
@ -34,7 +35,7 @@ namespace neon {
++block_ptr;
v = *block_ptr;
vcmp = vceqz_u8(v);
cmp_mask = vreinterpret_u64_s8(vcmp);
cmp_mask = vreinterpret_u64_u8(vcmp);
cmp = vget_lane_u64(cmp_mask, 0);
if (cmp)
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -

View File

@ -18,22 +18,22 @@ namespace LIBC_NAMESPACE_DECL {
namespace string_length_internal {
// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
Mask CompareAndMask(const Vector *block_ptr);
LIBC_INLINE static Mask compare_and_mask(const Vector *block_ptr);
template <typename Vector, typename Mask,
decltype(CompareAndMask<Vector, Mask>)>
decltype(compare_and_mask<Vector, Mask>)>
size_t string_length_vector(const char *src) {
uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
const Vector *block_ptr =
reinterpret_cast<const Vector *>(src - misalign_bytes);
auto cmp = CompareAndMask<Vector, Mask>(block_ptr) >> misalign_bytes;
auto cmp = compare_and_mask<Vector, Mask>(block_ptr) >> misalign_bytes;
if (cmp)
return cpp::countr_zero(cmp);
while (true) {
block_ptr++;
cmp = CompareAndMask<Vector, Mask>(block_ptr);
cmp = compare_and_mask<Vector, Mask>(block_ptr);
if (cmp)
return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
reinterpret_cast<uintptr_t>(src) +
@ -42,7 +42,8 @@ size_t string_length_vector(const char *src) {
}
template <>
uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
LIBC_INLINE uint32_t
compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) {
__m128i v = _mm_load_si128(block_ptr);
__m128i z = _mm_setzero_si128();
__m128i c = _mm_cmpeq_epi8(z, v);
@ -52,13 +53,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
namespace sse2 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m128i, uint32_t,
CompareAndMask<__m128i, uint32_t>>(src);
compare_and_mask<__m128i, uint32_t>>(src);
}
} // namespace sse2
#if defined(__AVX2__)
template <>
uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
LIBC_INLINE uint32_t
compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) {
__m256i v = _mm256_load_si256(block_ptr);
__m256i z = _mm256_setzero_si256();
__m256i c = _mm256_cmpeq_epi8(z, v);
@ -68,14 +70,15 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
namespace avx2 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m256i, uint32_t,
CompareAndMask<__m256i, uint32_t>>(src);
compare_and_mask<__m256i, uint32_t>>(src);
}
} // namespace avx2
#endif
#if defined(__AVX512F__)
template <>
__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
LIBC_INLINE __mmask64
compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
__m512i v = _mm512_load_si512(block_ptr);
__m512i z = _mm512_setzero_si512();
return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
@ -83,7 +86,7 @@ __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
namespace avx512 {
[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
return string_length_vector<__m512i, __mmask64,
CompareAndMask<__m512i, __mmask64>>(src);
compare_and_mask<__m512i, __mmask64>>(src);
}
} // namespace avx512
#endif