[libc] Add a smaller b36_char_to_int (#180841)

For ASCII systems, b36_char_to_int gets compiled into a jump table. That
jump table ends up being pretty large because it covers the range from
'0' (48) to 'z' (122). On size-constrained systems that can assume
ASCII, this patch provides a new flag: LIBC_CONF_CTYPE_SMALLER_ASCII
that forces a smaller implementation that doesn't compile into a jump
table.
This commit is contained in:
Michael Jones 2026-03-17 15:36:41 -07:00 committed by GitHub
parent d17ce9a6fb
commit 838b1ccdd9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 31 additions and 0 deletions

View File

@ -147,6 +147,10 @@ function(_get_compile_options_from_config output_var)
endif()
endif()
if(LIBC_CONF_CTYPE_SMALLER_ASCII)
list(APPEND config_options "-DLIBC_COPT_CTYPE_SMALLER_ASCII")
endif()
if(LIBC_CONF_PRINTF_DISABLE_WIDE)
list(APPEND config_options "-DLIBC_COPT_PRINTF_DISABLE_WIDE")
endif()

View File

@ -70,5 +70,10 @@
"LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR": {
"value": false
}
},
"ctype": {
"LIBC_CONF_CTYPE_SMALLER_ASCII": {
"value": true
}
}
}

View File

@ -99,6 +99,12 @@
"doc": "Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled."
}
},
"ctype": {
"LIBC_CONF_CTYPE_SMALLER_ASCII": {
"value": false,
"doc": "Shrinks b36_char_to_int by assuming the character encoding is ASCII."
}
},
"codegen": {
"LIBC_CONF_KEEP_FRAME_POINTER": {
"value": true,

View File

@ -28,6 +28,8 @@ to learn about the defaults for your platform and target.
* **"codegen" options**
- ``LIBC_CONF_ENABLE_STRONG_STACK_PROTECTOR``: Enable -fstack-protector-strong to defend against stack smashing attack.
- ``LIBC_CONF_KEEP_FRAME_POINTER``: Keep frame pointer in functions for better debugging experience.
* **"ctype" options**
- ``LIBC_CONF_CTYPE_SMALLER_ASCII``: Shrinks b36_char_to_int by assuming the character encoding is ASCII.
* **"errno" options**
- ``LIBC_CONF_ERRNO_MODE``: The implementation used for errno, acceptable values are LIBC_ERRNO_MODE_DEFAULT, LIBC_ERRNO_MODE_UNDEFINED, LIBC_ERRNO_MODE_THREAD_LOCAL, LIBC_ERRNO_MODE_SHARED, LIBC_ERRNO_MODE_EXTERNAL, and LIBC_ERRNO_MODE_SYSTEM_INLINE.
* **"fenv" options**

View File

@ -371,6 +371,7 @@ LIBC_INLINE constexpr bool isalnum(char ch) {
}
}
#ifndef LIBC_COPT_CTYPE_SMALLER_ASCII
LIBC_INLINE constexpr int b36_char_to_int(char ch) {
switch (ch) {
case '0':
@ -475,6 +476,19 @@ LIBC_INLINE constexpr int b36_char_to_int(char ch) {
return 0;
}
}
#else // LIBC_COPT_SMALL_ASCII_CTYPE
// This version assumes ASCII for the tolower, but generates smaller code since
// the switch version of this function ends up with a table. This should only be
// used when the target is known to be ASCII.
LIBC_INLINE constexpr int b36_char_to_int(char ch) {
if (ch >= '0' && ch <= '9')
return ch - '0';
char ch_unsafe_lower = ch | 32;
if (ch_unsafe_lower >= 'a' && ch_unsafe_lower <= 'z')
return ch_unsafe_lower - 'a' + 10;
return 0;
}
#endif // LIBC_COPT_SMALL_ASCII_CTYPE
LIBC_INLINE constexpr char int_to_b36_char(int num) {
// Can't actually use LIBC_ASSERT here because it depends on integer_to_string