
This commit introduces basic annotations for `std::basic_string`,
mirroring the approach used in `std::vector` and `std::deque`.
Initially, only long strings with the default allocator will be
annotated. Short strings (_SSO - short string optimization_) and strings
with non-default allocators will be annotated in the near future, with
separate commits dedicated to enabling them. The process will be similar
to the workflow employed for enabling annotations in `std::deque`.
**Please note**: these annotations function effectively only when libc++
and libc++abi dylibs are instrumented (with ASan). This aligns with the
prevailing behavior of Memory Sanitizer.
To avoid breaking everything, this commit also appends
`_LIBCPP_INSTRUMENTED_WITH_ASAN` to `__config_site` whenever libc++ is
compiled with ASan. If this macro is not defined, string annotations are
not enabled. However, linking a binary that does **not** annotate
strings with a dynamic library that annotates strings, is not permitted.
Originally proposed here: https://reviews.llvm.org/D132769
Related patches on Phabricator:
- Turning on annotations for short strings:
https://reviews.llvm.org/D147680
- Turning on annotations for all allocators:
https://reviews.llvm.org/D146214
This PR is a part of a series of patches extending AddressSanitizer C++
container overflow detection capabilities by adding annotations, similar
to those existing in `std::vector` and `std::deque` collections. These
enhancements empower ASan to effectively detect instances where the
instrumented program attempts to access memory within a collection's
internal allocation that remains unused. This includes cases where
access occurs before or after the stored elements in `std::deque`, or
between the `std::basic_string`'s size (including the null terminator)
and capacity bounds.
The introduction of these annotations was spurred by a real-world
software bug discovered by Trail of Bits, involving an out-of-bounds
memory access during the comparison of two strings using the
`std::equals` function. This function was taking iterators
(`iter1_begin`, `iter1_end`, `iter2_begin`) to perform the comparison,
using a custom comparison function. When the `iter1` object exceeded the
length of `iter2`, an out-of-bounds read could occur on the `iter2`
object. Container sanitization, upon enabling these annotations, would
effectively identify and flag this potential vulnerability.
This Pull Request introduces basic annotations for `std::basic_string`.
Long strings exhibit structural similarities to `std::vector` and will
be annotated accordingly. Short strings are already implemented, but
will be turned on separately in a forthcoming commit. Look at [a
comment](https://github.com/llvm/llvm-project/pull/72677#issuecomment-1850554465)
below to read about SSO issues at current moment.
Due to the functionality introduced in
[D132522](dd1b7b797a
),
the `__sanitizer_annotate_contiguous_container` function now offers
compatibility with all allocators. However, enabling this support will
be done in a subsequent commit. For the time being, only strings with
the default allocator will be annotated.
If you have any questions, please email:
- advenam.tacet@trailofbits.com
- disconnect3d@trailofbits.com
130 lines
4.9 KiB
C++
130 lines
4.9 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// <string>
|
|
// UNSUPPORTED: c++03, c++11, c++14
|
|
|
|
// template<class InputIterator,
|
|
// class Allocator = allocator<typename iterator_traits<InputIterator>::value_type>>
|
|
// basic_string(InputIterator, InputIterator, Allocator = Allocator())
|
|
// -> basic_string<typename iterator_traits<InputIterator>::value_type,
|
|
// char_traits<typename iterator_traits<InputIterator>::value_type>,
|
|
// Allocator>; // constexpr since C++20
|
|
//
|
|
// The deduction guide shall not participate in overload resolution if InputIterator
|
|
// is a type that does not qualify as an input iterator, or if Allocator is a type
|
|
// that does not qualify as an allocator.
|
|
|
|
#include <cassert>
|
|
#include <cstddef>
|
|
#include <iterator>
|
|
#include <string>
|
|
#include <type_traits>
|
|
|
|
#include "test_macros.h"
|
|
#include "test_allocator.h"
|
|
#include "min_allocator.h"
|
|
#include "asan_testing.h"
|
|
|
|
class NotAnIterator {};
|
|
using NotAnInputIterator = std::back_insert_iterator<std::basic_string<char16_t>>;
|
|
|
|
template <typename T>
|
|
struct NotAnAllocator {
|
|
typedef T value_type;
|
|
};
|
|
|
|
template <class Iter, class Alloc, class = void>
|
|
struct CanDeduce : std::false_type {};
|
|
|
|
template <class Iter, class Alloc>
|
|
struct CanDeduce<Iter,
|
|
Alloc,
|
|
decltype((void)std::basic_string{std::declval<Iter>(), std::declval<Iter>(), std::declval<Alloc>()})>
|
|
: std::true_type {};
|
|
|
|
static_assert(CanDeduce<char*, std::allocator<char>>::value);
|
|
static_assert(!CanDeduce<NotAnIterator, std::allocator<char>>::value);
|
|
static_assert(!CanDeduce<NotAnInputIterator, std::allocator<char16_t>>::value);
|
|
static_assert(!CanDeduce<char*, NotAnAllocator<char>>::value);
|
|
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
|
|
static_assert(CanDeduce<wchar_t*, std::allocator<wchar_t>>::value);
|
|
static_assert(!CanDeduce<wchar_t const*, NotAnAllocator<wchar_t>>::value);
|
|
#endif
|
|
|
|
TEST_CONSTEXPR_CXX20 bool test() {
|
|
{
|
|
const char* s = "12345678901234";
|
|
std::basic_string s1(s, s + 10); // Can't use {} here
|
|
using S = decltype(s1); // what type did we get?
|
|
static_assert(std::is_same_v<S::value_type, char>, "");
|
|
static_assert(std::is_same_v<S::traits_type, std::char_traits<char>>, "");
|
|
static_assert(std::is_same_v<S::allocator_type, std::allocator<char>>, "");
|
|
assert(s1.size() == 10);
|
|
assert(s1.compare(0, s1.size(), s, s1.size()) == 0);
|
|
LIBCPP_ASSERT(is_string_asan_correct(s1));
|
|
}
|
|
{
|
|
const char* s = "12345678901234";
|
|
std::basic_string s1{s, s + 10, std::allocator<char>{}};
|
|
using S = decltype(s1); // what type did we get?
|
|
static_assert(std::is_same_v<S::value_type, char>, "");
|
|
static_assert(std::is_same_v<S::traits_type, std::char_traits<char>>, "");
|
|
static_assert(std::is_same_v<S::allocator_type, std::allocator<char>>, "");
|
|
assert(s1.size() == 10);
|
|
assert(s1.compare(0, s1.size(), s, s1.size()) == 0);
|
|
LIBCPP_ASSERT(is_string_asan_correct(s1));
|
|
}
|
|
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
|
|
{
|
|
const wchar_t* s = L"12345678901234";
|
|
std::basic_string s1{s, s + 10, test_allocator<wchar_t>{}};
|
|
using S = decltype(s1); // what type did we get?
|
|
static_assert(std::is_same_v<S::value_type, wchar_t>, "");
|
|
static_assert(std::is_same_v<S::traits_type, std::char_traits<wchar_t>>, "");
|
|
static_assert(std::is_same_v<S::allocator_type, test_allocator<wchar_t>>, "");
|
|
assert(s1.size() == 10);
|
|
assert(s1.compare(0, s1.size(), s, s1.size()) == 0);
|
|
LIBCPP_ASSERT(is_string_asan_correct(s1));
|
|
}
|
|
#endif
|
|
{
|
|
const char16_t* s = u"12345678901234";
|
|
std::basic_string s1{s, s + 10, min_allocator<char16_t>{}};
|
|
using S = decltype(s1); // what type did we get?
|
|
static_assert(std::is_same_v<S::value_type, char16_t>, "");
|
|
static_assert(std::is_same_v<S::traits_type, std::char_traits<char16_t>>, "");
|
|
static_assert(std::is_same_v<S::allocator_type, min_allocator<char16_t>>, "");
|
|
assert(s1.size() == 10);
|
|
assert(s1.compare(0, s1.size(), s, s1.size()) == 0);
|
|
LIBCPP_ASSERT(is_string_asan_correct(s1));
|
|
}
|
|
{
|
|
const char32_t* s = U"12345678901234";
|
|
std::basic_string s1{s, s + 10, explicit_allocator<char32_t>{}};
|
|
using S = decltype(s1); // what type did we get?
|
|
static_assert(std::is_same_v<S::value_type, char32_t>, "");
|
|
static_assert(std::is_same_v<S::traits_type, std::char_traits<char32_t>>, "");
|
|
static_assert(std::is_same_v<S::allocator_type, explicit_allocator<char32_t>>, "");
|
|
assert(s1.size() == 10);
|
|
assert(s1.compare(0, s1.size(), s, s1.size()) == 0);
|
|
LIBCPP_ASSERT(is_string_asan_correct(s1));
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
int main(int, char**) {
|
|
test();
|
|
#if TEST_STD_VER > 17
|
|
static_assert(test());
|
|
#endif
|
|
|
|
return 0;
|
|
}
|