Vitaly Buka e76739eeb9
[libclang] Always Dup in createRef(StringRef) (#125020)
We can't guaranty that underlying string is
0-terminated and [String.size()] is even in the
same allocation.


https://lab.llvm.org/buildbot/#/builders/94/builds/4152/steps/17/logs/stdio
```
==c-index-test==1846256==WARNING: MemorySanitizer: use-of-uninitialized-value
    #0  in clang::cxstring::createRef(llvm::StringRef) llvm-project/clang/tools/libclang/CXString.cpp:96:36
    #1  in DumpCXCommentInternal llvm-project/clang/tools/c-index-test/c-index-test.c:521:39
    #2  in DumpCXCommentInternal llvm-project/clang/tools/c-index-test/c-index-test.c:674:7
    #3  in DumpCXCommentInternal llvm-project/clang/tools/c-index-test/c-index-test.c:674:7
    #4  in DumpCXComment llvm-project/clang/tools/c-index-test/c-index-test.c:685:3
    #5  in PrintCursorComments llvm-project/clang/tools/c-index-test/c-index-test.c:768:7

  Memory was marked as uninitialized
    #0  in __msan_allocated_memory llvm-project/compiler-rt/lib/msan/msan_interceptors.cpp:1023:5
    #1  in Allocate llvm-project/llvm/include/llvm/Support/Allocator.h:172:7
    #2  in Allocate llvm-project/llvm/include/llvm/Support/Allocator.h:216:12
    #3  in Allocate llvm-project/llvm/include/llvm/Support/AllocatorBase.h:53:43
    #4  in Allocate<char> llvm-project/llvm/include/llvm/Support/AllocatorBase.h:76:29
    #5  in convertCodePointToUTF8 llvm-project/clang/lib/AST/CommentLexer.cpp:42:30
    #6  in clang::comments::Lexer::resolveHTMLDecimalCharacterReference(llvm::StringRef) const llvm-project/clang/lib/AST/CommentLexer.cpp:76:10
    #7  in clang::comments::Lexer::lexHTMLCharacterReference(clang::comments::Token&) llvm-project/clang/lib/AST/CommentLexer.cpp:615:16
    #8  in consumeToken llvm-project/clang/include/clang/AST/CommentParser.h:62:9
    #9  in clang::comments::Parser::parseParagraphOrBlockCommand() llvm-project/clang/lib/AST/CommentParser.cpp
    #10 in clang::comments::Parser::parseFullComment() llvm-project/clang/lib/AST/CommentParser.cpp:925:22
    #11 in clang::RawComment::parse(clang::ASTContext const&, clang::Preprocessor const*, clang::Decl const*) const llvm-project/clang/lib/AST/RawCommentList.cpp:221:12
    #12 in clang::ASTContext::getCommentForDecl(clang::Decl const*, clang::Preprocessor const*) const llvm-project/clang/lib/AST/ASTContext.cpp:714:35
    #13 in clang_Cursor_getParsedComment llvm-project/clang/tools/libclang/CXComment.cpp:36:35
    #14 in PrintCursorComments llvm-project/clang/tools/c-index-test/c-index-test.c:756:25
 ```
2025-02-12 22:05:19 -08:00

189 lines
4.9 KiB
C++

//===- CXString.cpp - Routines for manipulating CXStrings -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines routines for manipulating CXStrings. It should be the
// only file that has internal knowledge of the encoding of the data in
// CXStrings.
//
//===----------------------------------------------------------------------===//
#include "CXString.h"
#include "CXTranslationUnit.h"
#include "clang-c/Index.h"
#include "clang/Frontend/ASTUnit.h"
#include "llvm/Support/ErrorHandling.h"
using namespace clang;
/// Describes the kind of underlying data in CXString.
enum CXStringFlag {
/// CXString contains a 'const char *' that it doesn't own.
CXS_Unmanaged,
/// CXString contains a 'const char *' that it allocated with malloc().
CXS_Malloc,
/// CXString contains a CXStringBuf that needs to be returned to the
/// CXStringPool.
CXS_StringBuf
};
namespace clang {
namespace cxstring {
//===----------------------------------------------------------------------===//
// Basic generation of CXStrings.
//===----------------------------------------------------------------------===//
CXString createEmpty() {
CXString Str;
Str.data = "";
Str.private_flags = CXS_Unmanaged;
return Str;
}
CXString createNull() {
CXString Str;
Str.data = nullptr;
Str.private_flags = CXS_Unmanaged;
return Str;
}
CXString createRef(const char *String) {
if (String && String[0] == '\0')
return createEmpty();
CXString Str;
Str.data = String;
Str.private_flags = CXS_Unmanaged;
return Str;
}
CXString createDup(const char *String) {
if (!String)
return createNull();
if (String[0] == '\0')
return createEmpty();
CXString Str;
Str.data = strdup(String);
Str.private_flags = CXS_Malloc;
return Str;
}
CXString createRef(StringRef String) {
if (!String.data())
return createNull();
// If the string is empty, it might point to a position in another string
// while having zero length. Make sure we don't create a reference to the
// larger string.
if (String.empty())
return createEmpty();
return createDup(String);
}
CXString createDup(StringRef String) {
CXString Result;
char *Spelling = static_cast<char *>(llvm::safe_malloc(String.size() + 1));
memmove(Spelling, String.data(), String.size());
Spelling[String.size()] = 0;
Result.data = Spelling;
Result.private_flags = (unsigned) CXS_Malloc;
return Result;
}
CXString createCXString(CXStringBuf *buf) {
CXString Str;
Str.data = buf;
Str.private_flags = (unsigned) CXS_StringBuf;
return Str;
}
CXStringSet *createSet(const std::vector<std::string> &Strings) {
CXStringSet *Set = new CXStringSet;
Set->Count = Strings.size();
Set->Strings = new CXString[Set->Count];
for (unsigned SI = 0, SE = Set->Count; SI < SE; ++SI)
Set->Strings[SI] = createDup(Strings[SI]);
return Set;
}
//===----------------------------------------------------------------------===//
// String pools.
//===----------------------------------------------------------------------===//
CXStringPool::~CXStringPool() {
for (std::vector<CXStringBuf *>::iterator I = Pool.begin(), E = Pool.end();
I != E; ++I) {
delete *I;
}
}
CXStringBuf *CXStringPool::getCXStringBuf(CXTranslationUnit TU) {
if (Pool.empty())
return new CXStringBuf(TU);
CXStringBuf *Buf = Pool.back();
Buf->Data.clear();
Pool.pop_back();
return Buf;
}
CXStringBuf *getCXStringBuf(CXTranslationUnit TU) {
return TU->StringPool->getCXStringBuf(TU);
}
void CXStringBuf::dispose() {
TU->StringPool->Pool.push_back(this);
}
bool isManagedByPool(CXString str) {
return ((CXStringFlag) str.private_flags) == CXS_StringBuf;
}
} // end namespace cxstring
} // end namespace clang
//===----------------------------------------------------------------------===//
// libClang public APIs.
//===----------------------------------------------------------------------===//
const char *clang_getCString(CXString string) {
if (string.private_flags == (unsigned) CXS_StringBuf) {
return static_cast<const cxstring::CXStringBuf *>(string.data)->Data.data();
}
return static_cast<const char *>(string.data);
}
void clang_disposeString(CXString string) {
switch ((CXStringFlag) string.private_flags) {
case CXS_Unmanaged:
break;
case CXS_Malloc:
if (string.data)
free(const_cast<void *>(string.data));
break;
case CXS_StringBuf:
static_cast<cxstring::CXStringBuf *>(
const_cast<void *>(string.data))->dispose();
break;
}
}
void clang_disposeStringSet(CXStringSet *set) {
for (unsigned SI = 0, SE = set->Count; SI < SE; ++SI)
clang_disposeString(set->Strings[SI]);
delete[] set->Strings;
delete set;
}