From 838b1ccdd90ba4fe5735fdfcefdd26e87ff53e48 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 17 Mar 2026 15:36:41 -0700 Subject: [PATCH] [libc] Add a smaller b36_char_to_int (#180841) For ASCII systems, b36_char_to_int gets compiled into a jump table. That jump table ends up being pretty large because it covers the range from '0' (48) to 'z' (122). On size-constrained systems that can assume ASCII, this patch provides a new flag: LIBC_CONF_CTYPE_SMALLER_ASCII that forces a smaller implementation that doesn't compile into a jump table. --- .../cmake/modules/LLVMLibCCompileOptionRules.cmake | 4 ++++ libc/config/baremetal/config.json | 5 +++++ libc/config/config.json | 6 ++++++ libc/docs/configure.rst | 2 ++ libc/src/__support/ctype_utils.h | 14 ++++++++++++++ 5 files changed, 31 insertions(+) diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index b15a9fab09c3..1ce027114f73 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -147,6 +147,10 @@ function(_get_compile_options_from_config output_var) endif() endif() + if(LIBC_CONF_CTYPE_SMALLER_ASCII) + list(APPEND config_options "-DLIBC_COPT_CTYPE_SMALLER_ASCII") + endif() + if(LIBC_CONF_PRINTF_DISABLE_WIDE) list(APPEND config_options "-DLIBC_COPT_PRINTF_DISABLE_WIDE") endif() diff --git a/libc/config/baremetal/config.json b/libc/config/baremetal/config.json index 5edc045b5782..1c52cd0093e1 100644 --- a/libc/config/baremetal/config.json +++ b/libc/config/baremetal/config.json @@ -70,5 +70,10 @@ "LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR": { "value": false } + }, + "ctype": { + "LIBC_CONF_CTYPE_SMALLER_ASCII": { + "value": true + } } } diff --git a/libc/config/config.json b/libc/config/config.json index 088e94fb2c22..603fa005fcc5 100644 --- a/libc/config/config.json +++ b/libc/config/config.json @@ -99,6 +99,12 @@ "doc": "Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled." } }, + "ctype": { + "LIBC_CONF_CTYPE_SMALLER_ASCII": { + "value": false, + "doc": "Shrinks b36_char_to_int by assuming the character encoding is ASCII." + } + }, "codegen": { "LIBC_CONF_KEEP_FRAME_POINTER": { "value": true, diff --git a/libc/docs/configure.rst b/libc/docs/configure.rst index 1e91a1f14da5..06d20df261bc 100644 --- a/libc/docs/configure.rst +++ b/libc/docs/configure.rst @@ -28,6 +28,8 @@ to learn about the defaults for your platform and target. * **"codegen" options** - ``LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR``: Enable -fstack-protector-strong to defend against stack smashing attack. - ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience. +* **"ctype" options** + - ``LIBC_CONF_CTYPE_SMALLER_ASCII``: Shrinks b36_char_to_int by assuming the character encoding is ASCII. * **"errno" options** - ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM_INLINE. * **"fenv" options** diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h index 515eca18f9e4..e52f6ec425e2 100644 --- a/libc/src/__support/ctype_utils.h +++ b/libc/src/__support/ctype_utils.h @@ -371,6 +371,7 @@ LIBC_INLINE constexpr bool isalnum(char ch) { } } +#ifndef LIBC_COPT_CTYPE_SMALLER_ASCII LIBC_INLINE constexpr int b36_char_to_int(char ch) { switch (ch) { case '0': @@ -475,6 +476,19 @@ LIBC_INLINE constexpr int b36_char_to_int(char ch) { return 0; } } +#else // LIBC_COPT_SMALL_ASCII_CTYPE +// This version assumes ASCII for the tolower, but generates smaller code since +// the switch version of this function ends up with a table. This should only be +// used when the target is known to be ASCII. +LIBC_INLINE constexpr int b36_char_to_int(char ch) { + if (ch >= '0' && ch <= '9') + return ch - '0'; + char ch_unsafe_lower = ch | 32; + if (ch_unsafe_lower >= 'a' && ch_unsafe_lower <= 'z') + return ch_unsafe_lower - 'a' + 10; + return 0; +} +#endif // LIBC_COPT_SMALL_ASCII_CTYPE LIBC_INLINE constexpr char int_to_b36_char(int num) { // Can't actually use LIBC_ASSERT here because it depends on integer_to_string