[libc] Enable wide-read memory operations by default on Linux (#154602)

Summary: This patch changes the linux build to use the wide reads on the memory operations by default. These memory functions will now potentially read outside of the bounds explicitly allowed by the current function. While technically undefined behavior in the standard, plenty of C library implementations do this. it will not cause a segmentation fault on linux as long as you do not cross a page boundary, and because we are only *reading* memory it should not have atomic effects.
2025-08-20 17:17:12 -05:00 · 2025-08-20 17:17:12 -05:00 · c80d1483c6
commit c80d1483c6
parent ac8f0bb070
3 changed files with 25 additions and 14 deletions
--- a/libc/config/linux/config.json
+++ b/libc/config/linux/config.json
@ -0,0 +1,7 @@
+{
+  "string": {
+    "LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
+      "value": true
+    }
+  }
+}
--- a/libc/src/string/memory_utils/aarch64/inline_strlen.h
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@ -17,14 +17,15 @@
 namespace LIBC_NAMESPACE_DECL {

 namespace neon {
-[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
+[[maybe_unused]] LIBC_INLINE static size_t string_length(const char *src) {
  using Vector __attribute__((may_alias)) = uint8x8_t;

  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
-  Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
+  const Vector *block_ptr =
+      reinterpret_cast<const Vector *>(src - misalign_bytes);
  Vector v = *block_ptr;
  Vector vcmp = vceqz_u8(v);
-  uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
+  uint64x1_t cmp_mask = vreinterpret_u64_u8(vcmp);
  uint64_t cmp = vget_lane_u64(cmp_mask, 0);
  cmp = cmp >> (misalign_bytes << 3);
  if (cmp)
@ -34,7 +35,7 @@ namespace neon {
    ++block_ptr;
    v = *block_ptr;
    vcmp = vceqz_u8(v);
-    cmp_mask = vreinterpret_u64_s8(vcmp);
+    cmp_mask = vreinterpret_u64_u8(vcmp);
    cmp = vget_lane_u64(cmp_mask, 0);
    if (cmp)
      return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
--- a/libc/src/string/memory_utils/x86_64/inline_strlen.h
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@ -18,22 +18,22 @@ namespace LIBC_NAMESPACE_DECL {
 namespace string_length_internal {
 // Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
 template <typename Vector, typename Mask>
-Mask CompareAndMask(const Vector *block_ptr);
+LIBC_INLINE static Mask compare_and_mask(const Vector *block_ptr);

 template <typename Vector, typename Mask,
-          decltype(CompareAndMask<Vector, Mask>)>
+          decltype(compare_and_mask<Vector, Mask>)>
 size_t string_length_vector(const char *src) {
  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);

  const Vector *block_ptr =
      reinterpret_cast<const Vector *>(src - misalign_bytes);
-  auto cmp = CompareAndMask<Vector, Mask>(block_ptr) >> misalign_bytes;
+  auto cmp = compare_and_mask<Vector, Mask>(block_ptr) >> misalign_bytes;
  if (cmp)
    return cpp::countr_zero(cmp);

  while (true) {
    block_ptr++;
-    cmp = CompareAndMask<Vector, Mask>(block_ptr);
+    cmp = compare_and_mask<Vector, Mask>(block_ptr);
    if (cmp)
      return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
                                 reinterpret_cast<uintptr_t>(src) +
@ -42,7 +42,8 @@ size_t string_length_vector(const char *src) {
 }

 template <>
-uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
+LIBC_INLINE uint32_t
+compare_and_mask<__m128i, uint32_t>(const __m128i *block_ptr) {
  __m128i v = _mm_load_si128(block_ptr);
  __m128i z = _mm_setzero_si128();
  __m128i c = _mm_cmpeq_epi8(z, v);
@ -52,13 +53,14 @@ uint32_t CompareAndMask<__m128i, uint32_t>(const __m128i *block_ptr) {
 namespace sse2 {
 [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
  return string_length_vector<__m128i, uint32_t,
-                              CompareAndMask<__m128i, uint32_t>>(src);
+                              compare_and_mask<__m128i, uint32_t>>(src);
 }
 } // namespace sse2

 #if defined(__AVX2__)
 template <>
-uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
+LIBC_INLINE uint32_t
+compare_and_mask<__m256i, uint32_t>(const __m256i *block_ptr) {
  __m256i v = _mm256_load_si256(block_ptr);
  __m256i z = _mm256_setzero_si256();
  __m256i c = _mm256_cmpeq_epi8(z, v);
@ -68,14 +70,15 @@ uint32_t CompareAndMask<__m256i, uint32_t>(const __m256i *block_ptr) {
 namespace avx2 {
 [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
  return string_length_vector<__m256i, uint32_t,
-                              CompareAndMask<__m256i, uint32_t>>(src);
+                              compare_and_mask<__m256i, uint32_t>>(src);
 }
 } // namespace avx2
 #endif

 #if defined(__AVX512F__)
 template <>
-__mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
+LIBC_INLINE __mmask64
+compare_and_mask<__m512i, __mmask64>(const __m512i *block_ptr) {
  __m512i v = _mm512_load_si512(block_ptr);
  __m512i z = _mm512_setzero_si512();
  return _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
@ -83,7 +86,7 @@ __mmask64 CompareAndMask<__m512i, __mmask64>(const __m512i *block_ptr) {
 namespace avx512 {
 [[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
  return string_length_vector<__m512i, __mmask64,
-                              CompareAndMask<__m512i, __mmask64>>(src);
+                              compare_and_mask<__m512i, __mmask64>>(src);
 }
 } // namespace avx512
 #endif