From 174bb4d2f16dc0ef2b497846521c1ec7f3371eca Mon Sep 17 00:00:00 2001 From: Bartosz Taudul Date: Fri, 26 Mar 2021 01:53:49 +0100 Subject: [PATCH] Extract source code tokenizer to a separate file. --- profiler/build/win32/Tracy.vcxproj | 2 + profiler/build/win32/Tracy.vcxproj.filters | 6 + server/TracySourceTokenizer.cpp | 315 +++++++++++++++++++++ server/TracySourceTokenizer.hpp | 53 ++++ server/TracySourceView.cpp | 304 +------------------- server/TracySourceView.hpp | 51 +--- 6 files changed, 382 insertions(+), 349 deletions(-) create mode 100644 server/TracySourceTokenizer.cpp create mode 100644 server/TracySourceTokenizer.hpp diff --git a/profiler/build/win32/Tracy.vcxproj b/profiler/build/win32/Tracy.vcxproj index dfd6b505..e946c2f4 100644 --- a/profiler/build/win32/Tracy.vcxproj +++ b/profiler/build/win32/Tracy.vcxproj @@ -123,6 +123,7 @@ + @@ -213,6 +214,7 @@ + diff --git a/profiler/build/win32/Tracy.vcxproj.filters b/profiler/build/win32/Tracy.vcxproj.filters index 936a5338..889c26ee 100644 --- a/profiler/build/win32/Tracy.vcxproj.filters +++ b/profiler/build/win32/Tracy.vcxproj.filters @@ -219,6 +219,9 @@ imgui\freetype + + server + @@ -524,6 +527,9 @@ imgui\freetype + + server + diff --git a/server/TracySourceTokenizer.cpp b/server/TracySourceTokenizer.cpp new file mode 100644 index 00000000..03e64582 --- /dev/null +++ b/server/TracySourceTokenizer.cpp @@ -0,0 +1,315 @@ +#include "tracy_robin_hood.h" +#include "TracyCharUtil.hpp" +#include "TracySourceTokenizer.hpp" + +namespace tracy +{ + +namespace { +static unordered_flat_set GetKeywords() +{ + unordered_flat_set ret; + for( auto& v : { + "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", + "bitand", "bitor", "break", "case", "catch", "class", "compl", "concept", "const", "consteval", + "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", + "default", "delete", "do", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "for", + "friend", "if", "inline", "mutable", "namespace", "new", "noexcept", "not", "not_eq", "operator", + "or", "or_eq", "private", "protected", "public", "reflexpr", "register", "reinterpret_cast", + "return", "requires", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", + "synchronized", "template", "thread_local", "throw", "try", "typedef", "typeid", "typename", + "union", "using", "virtual", "volatile", "while", "xor", "xor_eq", "override", "final", "import", + "module", "transaction_safe", "transaction_safe_dynamic" } ) + { + ret.insert( v ); + } + return ret; +} +static unordered_flat_set GetTypes() +{ + unordered_flat_set ret; + for( auto& v : { + "bool", "char", "char8_t", "char16_t", "char32_t", "double", "float", "int", "long", "short", "signed", + "unsigned", "void", "wchar_t", "size_t", "int8_t", "int16_t", "int32_t", "int64_t", "int_fast8_t", + "int_fast16_t", "int_fast32_t", "int_fast64_t", "int_least8_t", "int_least16_t", "int_least32_t", + "int_least64_t", "intmax_t", "intptr_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "uint_fast8_t", + "uint_fast16_t", "uint_fast32_t", "uint_fast64_t", "uint_least8_t", "uint_least16_t", "uint_least32_t", + "uint_least64_t", "uintmax_t", "uintptr_t", "type_info", "bad_typeid", "bad_cast", "type_index", + "clock_t", "time_t", "tm", "timespec", "ptrdiff_t", "nullptr_t", "max_align_t", "auto", + + "__m64", "__m128", "__m128i", "__m128d", "__m256", "__m256i", "__m256d", "__m512", "__m512i", + "__m512d", "__mmask8", "__mmask16", "__mmask32", "__mmask64", + + "int8x8_t", "int16x4_t", "int32x2_t", "int64x1_t", "uint8x8_t", "uint16x4_t", "uint32x2_t", + "uint64x1_t", "float32x2_t", "poly8x8_t", "poly16x4_t", "int8x16_t", "int16x8_t", "int32x4_t", + "int64x2_t", "uint8x16_t", "uint16x8_t", "uint32x4_t", "uint64x2_t", "float32x4_t", "poly8x16_t", + "poly16x8_t", + + "int8x8x2_t", "int16x4x2_t", "int32x2x2_t", "int64x1x2_t", "uint8x8x2_t", "uint16x4x2_t", + "uint32x2x2_t", "uint64x1x2_t", "float32x2x2_t", "poly8x8x2_t", "poly16x4x2_t", "int8x16x2_t", + "int16x8x2_t", "int32x4x2_t", "int64x2x2_t", "uint8x16x2_t", "uint16x8x2_t", "uint32x4x2_t", + "uint64x2x2_t", "float32x4x2_t", "poly8x16x2_t", "poly16x8x2_t", + + "int8x8x3_t", "int16x4x3_t", "int32x2x3_t", "int64x1x3_t", "uint8x8x3_t", "uint16x4x3_t", + "uint32x2x3_t", "uint64x1x3_t", "float32x2x3_t", "poly8x8x3_t", "poly16x4x3_t", "int8x16x3_t", + "int16x8x3_t", "int32x4x3_t", "int64x2x3_t", "uint8x16x3_t", "uint16x8x3_t", "uint32x4x3_t", + "uint64x2x3_t", "float32x4x3_t", "poly8x16x3_t", "poly16x8x3_t", + + "int8x8x4_t", "int16x4x4_t", "int32x2x4_t", "int64x1x4_t", "uint8x8x4_t", "uint16x4x4_t", + "uint32x2x4_t", "uint64x1x4_t", "float32x2x4_t", "poly8x8x4_t", "poly16x4x4_t", "int8x16x4_t", + "int16x8x4_t", "int32x4x4_t", "int64x2x4_t", "uint8x16x4_t", "uint16x8x4_t", "uint32x4x4_t", + "uint64x2x4_t", "float32x4x4_t", "poly8x16x4_t", "poly16x8x4_t" } ) + { + ret.insert( v ); + } + return ret; +} +static unordered_flat_set GetSpecial() +{ + unordered_flat_set ret; + for( auto& v : { "this", "nullptr", "true", "false", "goto", "NULL" } ) + { + ret.insert( v ); + } + return ret; +} +} + +void Tokenizer::Reset() +{ + m_isInComment = false; + m_isInPreprocessor = false; +} + +std::vector Tokenizer::Tokenize( const char* begin, const char* end ) +{ + std::vector ret; + if( m_isInPreprocessor ) + { + if( begin == end ) + { + m_isInPreprocessor = false; + return ret; + } + if( *(end-1) != '\\' ) m_isInPreprocessor = false; + ret.emplace_back( Token { begin, end, TokenColor::Preprocessor } ); + return ret; + } + const bool first = !m_isInComment; + while( begin != end ) + { + if( m_isInComment ) + { + const auto pos = begin; + for(;;) + { + while( begin != end && *begin != '*' ) begin++; + begin++; + if( begin < end ) + { + if( *begin == '/' ) + { + begin++; + ret.emplace_back( Token { pos, begin, TokenColor::Comment } ); + m_isInComment = false; + break; + } + } + else + { + ret.emplace_back( Token { pos, end, TokenColor::Comment } ); + return ret; + } + } + } + else + { + while( begin != end && isspace( *begin ) ) begin++; + if( first && begin < end && *begin == '#' ) + { + if( *(end-1) == '\\' ) m_isInPreprocessor = true; + ret.emplace_back( Token { begin, end, TokenColor::Preprocessor } ); + return ret; + } + const auto pos = begin; + const auto col = IdentifyToken( begin, end ); + ret.emplace_back( Token { pos, begin, col } ); + } + } + return ret; +} + +static bool TokenizeNumber( const char*& begin, const char* end ) +{ + const bool startNum = *begin >= '0' && *begin <= '9'; + if( *begin != '+' && *begin != '-' && !startNum ) return false; + begin++; + bool hasNum = startNum; + while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || *begin == '\'' ) ) + { + hasNum = true; + begin++; + } + if( !hasNum ) return false; + bool isFloat = false, isBinary = false; + if( begin < end ) + { + if( *begin == '.' ) + { + isFloat = true; + begin++; + while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || *begin == '\'' ) ) begin++; + } + else if( *begin == 'x' || *begin == 'X' ) + { + // hexadecimal + begin++; + while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || ( *begin >= 'a' && *begin <= 'f' ) || ( *begin >= 'A' && *begin <= 'F' ) || *begin == '\'' ) ) begin++; + } + else if( *begin == 'b' || *begin == 'B' ) + { + isBinary = true; + begin++; + while( begin < end && ( ( *begin == '0' || *begin == '1' ) || *begin == '\'' ) ) begin++; + } + } + if( !isBinary ) + { + if( begin < end && ( *begin == 'e' || *begin == 'E' || *begin == 'p' || *begin == 'P' ) ) + { + isFloat = true; + begin++; + if( begin < end && ( *begin == '+' || *begin == '-' ) ) begin++; + bool hasDigits = false; + while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || ( *begin >= 'a' && *begin <= 'f' ) || ( *begin >= 'A' && *begin <= 'F' ) || *begin == '\'' ) ) + { + hasDigits = true; + begin++; + } + if( !hasDigits ) return false; + } + if( begin < end && ( *begin == 'f' || *begin == 'F' || *begin == 'l' || *begin == 'L' ) ) begin++; + } + if( !isFloat ) + { + while( begin < end && ( *begin == 'u' || *begin == 'U' || *begin == 'l' || *begin == 'L' ) ) begin++; + } + return true; +} + +Tokenizer::TokenColor Tokenizer::IdentifyToken( const char*& begin, const char* end ) +{ + static const auto s_keywords = GetKeywords(); + static const auto s_types = GetTypes(); + static const auto s_special = GetSpecial(); + + if( *begin == '"' ) + { + begin++; + while( begin < end ) + { + if( *begin == '"' ) + { + begin++; + break; + } + begin += 1 + ( *begin == '\\' && end - begin > 1 && *(begin+1) == '"' ); + } + return TokenColor::String; + } + if( *begin == '\'' ) + { + begin++; + if( begin < end && *begin == '\\' ) begin++; + if( begin < end ) begin++; + if( begin < end && *begin == '\'' ) begin++; + return TokenColor::CharacterLiteral; + } + if( ( *begin >= 'a' && *begin <= 'z' ) || ( *begin >= 'A' && *begin <= 'Z' ) || *begin == '_' ) + { + const char* tmp = begin; + begin++; + while( begin < end && ( ( *begin >= 'a' && *begin <= 'z' ) || ( *begin >= 'A' && *begin <= 'Z' ) || ( *begin >= '0' && *begin <= '9' ) || *begin == '_' ) ) begin++; + if( begin - tmp <= 24 ) + { + char buf[25]; + memcpy( buf, tmp, begin-tmp ); + buf[begin-tmp] = '\0'; + if( s_keywords.find( buf ) != s_keywords.end() ) return TokenColor::Keyword; + if( s_types.find( buf ) != s_types.end() ) return TokenColor::Type; + if( s_special.find( buf ) != s_special.end() ) return TokenColor::Special; + } + return TokenColor::Default; + } + const char* tmp = begin; + if( TokenizeNumber( begin, end ) ) return TokenColor::Number; + begin = tmp; + if( *begin == '/' && end - begin > 1 ) + { + if( *(begin+1) == '/' ) + { + begin = end; + return TokenColor::Comment; + } + if( *(begin+1) == '*' ) + { + begin += 2; + for(;;) + { + while( begin < end && *begin != '*' ) begin++; + if( begin == end ) + { + m_isInComment = true; + return TokenColor::Comment; + } + begin++; + if( begin < end && *begin == '/' ) + { + begin++; + return TokenColor::Comment; + } + } + } + } + while( begin < end ) + { + switch( *begin ) + { + case '[': + case ']': + case '{': + case '}': + case '!': + case '%': + case '^': + case '&': + case '*': + case '(': + case ')': + case '-': + case '+': + case '=': + case '~': + case '|': + case '<': + case '>': + case '?': + case ':': + case '/': + case ';': + case ',': + case '.': + begin++; + break; + default: + goto out; + } + } +out: + if( begin != tmp ) return TokenColor::Punctuation; + begin = end; + return TokenColor::Default; +} + + +} diff --git a/server/TracySourceTokenizer.hpp b/server/TracySourceTokenizer.hpp new file mode 100644 index 00000000..68818f19 --- /dev/null +++ b/server/TracySourceTokenizer.hpp @@ -0,0 +1,53 @@ +#ifndef __TRACYSOURCETOKENIZER_HPP__ +#define __TRACYSOURCETOKENIZER_HPP__ + +#include +#include + +namespace tracy +{ + +class Tokenizer +{ +public: + enum class TokenColor : uint8_t + { + Default, + Comment, + Preprocessor, + String, + CharacterLiteral, + Keyword, + Number, + Punctuation, + Type, + Special + }; + + struct Token + { + const char* begin; + const char* end; + TokenColor color; + }; + + struct Line + { + const char* begin; + const char* end; + std::vector tokens; + }; + + void Reset(); + std::vector Tokenize( const char* begin, const char* end ); + +private: + TokenColor IdentifyToken( const char*& begin, const char* end ); + + bool m_isInComment; + bool m_isInPreprocessor; +}; + +} + +#endif diff --git a/server/TracySourceView.cpp b/server/TracySourceView.cpp index 71286426..96e3312a 100644 --- a/server/TracySourceView.cpp +++ b/server/TracySourceView.cpp @@ -496,7 +496,7 @@ void SourceView::ParseSource( const char* fileName, const Worker& worker, const { auto end = txt; while( *end != '\n' && *end != '\r' && end - m_data < sz ) end++; - m_lines.emplace_back( Line { txt, end, Tokenize( txt, end ) } ); + m_lines.emplace_back( Tokenizer::Line { txt, end, m_tokenizer.Tokenize( txt, end ) } ); if( end - m_data == sz ) break; if( *end == '\n' ) { @@ -2252,7 +2252,7 @@ static const ImVec4 SyntaxColors[] = { { 0.21f, 0.69f, 0.89f, 1 }, // special }; -void SourceView::RenderLine( const Line& line, int lineNum, uint32_t ipcnt, uint32_t iptotal, uint32_t ipmax, const Worker* worker ) +void SourceView::RenderLine( const Tokenizer::Line& line, int lineNum, uint32_t ipcnt, uint32_t iptotal, uint32_t ipmax, const Worker* worker ) { const auto ty = ImGui::GetFontSize(); auto draw = ImGui::GetWindowDrawList(); @@ -3294,306 +3294,6 @@ uint32_t SourceView::CountAsmIpStats( uint64_t addr, const Worker& worker, bool } } -namespace { -static unordered_flat_set GetKeywords() -{ - unordered_flat_set ret; - for( auto& v : { - "alignas", "alignof", "and", "and_eq", "asm", "atomic_cancel", "atomic_commit", "atomic_noexcept", - "bitand", "bitor", "break", "case", "catch", "class", "compl", "concept", "const", "consteval", - "constexpr", "constinit", "const_cast", "continue", "co_await", "co_return", "co_yield", "decltype", - "default", "delete", "do", "dynamic_cast", "else", "enum", "explicit", "export", "extern", "for", - "friend", "if", "inline", "mutable", "namespace", "new", "noexcept", "not", "not_eq", "operator", - "or", "or_eq", "private", "protected", "public", "reflexpr", "register", "reinterpret_cast", - "return", "requires", "sizeof", "static", "static_assert", "static_cast", "struct", "switch", - "synchronized", "template", "thread_local", "throw", "try", "typedef", "typeid", "typename", - "union", "using", "virtual", "volatile", "while", "xor", "xor_eq", "override", "final", "import", - "module", "transaction_safe", "transaction_safe_dynamic" } ) - { - ret.insert( v ); - } - return ret; -} -static unordered_flat_set GetTypes() -{ - unordered_flat_set ret; - for( auto& v : { - "bool", "char", "char8_t", "char16_t", "char32_t", "double", "float", "int", "long", "short", "signed", - "unsigned", "void", "wchar_t", "size_t", "int8_t", "int16_t", "int32_t", "int64_t", "int_fast8_t", - "int_fast16_t", "int_fast32_t", "int_fast64_t", "int_least8_t", "int_least16_t", "int_least32_t", - "int_least64_t", "intmax_t", "intptr_t", "uint8_t", "uint16_t", "uint32_t", "uint64_t", "uint_fast8_t", - "uint_fast16_t", "uint_fast32_t", "uint_fast64_t", "uint_least8_t", "uint_least16_t", "uint_least32_t", - "uint_least64_t", "uintmax_t", "uintptr_t", "type_info", "bad_typeid", "bad_cast", "type_index", - "clock_t", "time_t", "tm", "timespec", "ptrdiff_t", "nullptr_t", "max_align_t", "auto", - - "__m64", "__m128", "__m128i", "__m128d", "__m256", "__m256i", "__m256d", "__m512", "__m512i", - "__m512d", "__mmask8", "__mmask16", "__mmask32", "__mmask64", - - "int8x8_t", "int16x4_t", "int32x2_t", "int64x1_t", "uint8x8_t", "uint16x4_t", "uint32x2_t", - "uint64x1_t", "float32x2_t", "poly8x8_t", "poly16x4_t", "int8x16_t", "int16x8_t", "int32x4_t", - "int64x2_t", "uint8x16_t", "uint16x8_t", "uint32x4_t", "uint64x2_t", "float32x4_t", "poly8x16_t", - "poly16x8_t", - - "int8x8x2_t", "int16x4x2_t", "int32x2x2_t", "int64x1x2_t", "uint8x8x2_t", "uint16x4x2_t", - "uint32x2x2_t", "uint64x1x2_t", "float32x2x2_t", "poly8x8x2_t", "poly16x4x2_t", "int8x16x2_t", - "int16x8x2_t", "int32x4x2_t", "int64x2x2_t", "uint8x16x2_t", "uint16x8x2_t", "uint32x4x2_t", - "uint64x2x2_t", "float32x4x2_t", "poly8x16x2_t", "poly16x8x2_t", - - "int8x8x3_t", "int16x4x3_t", "int32x2x3_t", "int64x1x3_t", "uint8x8x3_t", "uint16x4x3_t", - "uint32x2x3_t", "uint64x1x3_t", "float32x2x3_t", "poly8x8x3_t", "poly16x4x3_t", "int8x16x3_t", - "int16x8x3_t", "int32x4x3_t", "int64x2x3_t", "uint8x16x3_t", "uint16x8x3_t", "uint32x4x3_t", - "uint64x2x3_t", "float32x4x3_t", "poly8x16x3_t", "poly16x8x3_t", - - "int8x8x4_t", "int16x4x4_t", "int32x2x4_t", "int64x1x4_t", "uint8x8x4_t", "uint16x4x4_t", - "uint32x2x4_t", "uint64x1x4_t", "float32x2x4_t", "poly8x8x4_t", "poly16x4x4_t", "int8x16x4_t", - "int16x8x4_t", "int32x4x4_t", "int64x2x4_t", "uint8x16x4_t", "uint16x8x4_t", "uint32x4x4_t", - "uint64x2x4_t", "float32x4x4_t", "poly8x16x4_t", "poly16x8x4_t" } ) - { - ret.insert( v ); - } - return ret; -} -static unordered_flat_set GetSpecial() -{ - unordered_flat_set ret; - for( auto& v : { "this", "nullptr", "true", "false", "goto", "NULL" } ) - { - ret.insert( v ); - } - return ret; -} -} - -static bool TokenizeNumber( const char*& begin, const char* end ) -{ - const bool startNum = *begin >= '0' && *begin <= '9'; - if( *begin != '+' && *begin != '-' && !startNum ) return false; - begin++; - bool hasNum = startNum; - while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || *begin == '\'' ) ) - { - hasNum = true; - begin++; - } - if( !hasNum ) return false; - bool isFloat = false, isBinary = false; - if( begin < end ) - { - if( *begin == '.' ) - { - isFloat = true; - begin++; - while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || *begin == '\'' ) ) begin++; - } - else if( *begin == 'x' || *begin == 'X' ) - { - // hexadecimal - begin++; - while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || ( *begin >= 'a' && *begin <= 'f' ) || ( *begin >= 'A' && *begin <= 'F' ) || *begin == '\'' ) ) begin++; - } - else if( *begin == 'b' || *begin == 'B' ) - { - isBinary = true; - begin++; - while( begin < end && ( ( *begin == '0' || *begin == '1' ) || *begin == '\'' ) ) begin++; - } - } - if( !isBinary ) - { - if( begin < end && ( *begin == 'e' || *begin == 'E' || *begin == 'p' || *begin == 'P' ) ) - { - isFloat = true; - begin++; - if( begin < end && ( *begin == '+' || *begin == '-' ) ) begin++; - bool hasDigits = false; - while( begin < end && ( ( *begin >= '0' && *begin <= '9' ) || ( *begin >= 'a' && *begin <= 'f' ) || ( *begin >= 'A' && *begin <= 'F' ) || *begin == '\'' ) ) - { - hasDigits = true; - begin++; - } - if( !hasDigits ) return false; - } - if( begin < end && ( *begin == 'f' || *begin == 'F' || *begin == 'l' || *begin == 'L' ) ) begin++; - } - if( !isFloat ) - { - while( begin < end && ( *begin == 'u' || *begin == 'U' || *begin == 'l' || *begin == 'L' ) ) begin++; - } - return true; -} - -SourceView::TokenColor SourceView::IdentifyToken( const char*& begin, const char* end ) -{ - static const auto s_keywords = GetKeywords(); - static const auto s_types = GetTypes(); - static const auto s_special = GetSpecial(); - - if( *begin == '"' ) - { - begin++; - while( begin < end ) - { - if( *begin == '"' ) - { - begin++; - break; - } - begin += 1 + ( *begin == '\\' && end - begin > 1 && *(begin+1) == '"' ); - } - return TokenColor::String; - } - if( *begin == '\'' ) - { - begin++; - if( begin < end && *begin == '\\' ) begin++; - if( begin < end ) begin++; - if( begin < end && *begin == '\'' ) begin++; - return TokenColor::CharacterLiteral; - } - if( ( *begin >= 'a' && *begin <= 'z' ) || ( *begin >= 'A' && *begin <= 'Z' ) || *begin == '_' ) - { - const char* tmp = begin; - begin++; - while( begin < end && ( ( *begin >= 'a' && *begin <= 'z' ) || ( *begin >= 'A' && *begin <= 'Z' ) || ( *begin >= '0' && *begin <= '9' ) || *begin == '_' ) ) begin++; - if( begin - tmp <= 24 ) - { - char buf[25]; - memcpy( buf, tmp, begin-tmp ); - buf[begin-tmp] = '\0'; - if( s_keywords.find( buf ) != s_keywords.end() ) return TokenColor::Keyword; - if( s_types.find( buf ) != s_types.end() ) return TokenColor::Type; - if( s_special.find( buf ) != s_special.end() ) return TokenColor::Special; - } - return TokenColor::Default; - } - const char* tmp = begin; - if( TokenizeNumber( begin, end ) ) return TokenColor::Number; - begin = tmp; - if( *begin == '/' && end - begin > 1 ) - { - if( *(begin+1) == '/' ) - { - begin = end; - return TokenColor::Comment; - } - if( *(begin+1) == '*' ) - { - begin += 2; - for(;;) - { - while( begin < end && *begin != '*' ) begin++; - if( begin == end ) - { - m_tokenizer.isInComment = true; - return TokenColor::Comment; - } - begin++; - if( begin < end && *begin == '/' ) - { - begin++; - return TokenColor::Comment; - } - } - } - } - while( begin < end ) - { - switch( *begin ) - { - case '[': - case ']': - case '{': - case '}': - case '!': - case '%': - case '^': - case '&': - case '*': - case '(': - case ')': - case '-': - case '+': - case '=': - case '~': - case '|': - case '<': - case '>': - case '?': - case ':': - case '/': - case ';': - case ',': - case '.': - begin++; - break; - default: - goto out; - } - } -out: - if( begin != tmp ) return TokenColor::Punctuation; - begin = end; - return TokenColor::Default; -} - -std::vector SourceView::Tokenize( const char* begin, const char* end ) -{ - std::vector ret; - if( m_tokenizer.isInPreprocessor ) - { - if( begin == end ) - { - m_tokenizer.isInPreprocessor = false; - return ret; - } - if( *(end-1) != '\\' ) m_tokenizer.isInPreprocessor = false; - ret.emplace_back( Token { begin, end, TokenColor::Preprocessor } ); - return ret; - } - const bool first = !m_tokenizer.isInComment; - while( begin != end ) - { - if( m_tokenizer.isInComment ) - { - const auto pos = begin; - for(;;) - { - while( begin != end && *begin != '*' ) begin++; - begin++; - if( begin < end ) - { - if( *begin == '/' ) - { - begin++; - ret.emplace_back( Token { pos, begin, TokenColor::Comment } ); - m_tokenizer.isInComment = false; - break; - } - } - else - { - ret.emplace_back( Token { pos, end, TokenColor::Comment } ); - return ret; - } - } - } - else - { - while( begin != end && isspace( *begin ) ) begin++; - if( first && begin < end && *begin == '#' ) - { - if( *(end-1) == '\\' ) m_tokenizer.isInPreprocessor = true; - ret.emplace_back( Token { begin, end, TokenColor::Preprocessor } ); - return ret; - } - const auto pos = begin; - const auto col = IdentifyToken( begin, end ); - ret.emplace_back( Token { pos, begin, col } ); - } - } - return ret; -} - void SourceView::SelectMicroArchitecture( const char* moniker ) { int idx = 0; diff --git a/server/TracySourceView.hpp b/server/TracySourceView.hpp index 8870118b..3d666810 100644 --- a/server/TracySourceView.hpp +++ b/server/TracySourceView.hpp @@ -8,6 +8,7 @@ #include "tracy_robin_hood.h" #include "TracyCharUtil.hpp" #include "TracyDecayValue.hpp" +#include "TracySourceTokenizer.hpp" #include "../common/TracyProtocol.hpp" struct ImFont; @@ -34,34 +35,6 @@ public: }; private: - enum class TokenColor : uint8_t - { - Default, - Comment, - Preprocessor, - String, - CharacterLiteral, - Keyword, - Number, - Punctuation, - Type, - Special - }; - - struct Token - { - const char* begin; - const char* end; - TokenColor color; - }; - - struct Line - { - const char* begin; - const char* end; - std::vector tokens; - }; - struct AsmOpParams { uint8_t type; @@ -153,7 +126,7 @@ private: void RenderSymbolSourceView( uint32_t iptotal, unordered_flat_map ipcount, unordered_flat_map ipcountAsm, uint32_t ipmax, const Worker& worker, const View& view ); uint64_t RenderSymbolAsmView( uint32_t iptotal, unordered_flat_map ipcount, uint32_t ipmax, const Worker& worker, View& view ); - void RenderLine( const Line& line, int lineNum, uint32_t ipcnt, uint32_t iptotal, uint32_t ipmax, const Worker* worker ); + void RenderLine( const Tokenizer::Line& line, int lineNum, uint32_t ipcnt, uint32_t iptotal, uint32_t ipmax, const Worker* worker ); void RenderAsmLine( AsmLine& line, uint32_t ipcnt, uint32_t iptotal, uint32_t ipmax, const Worker& worker, uint64_t& jumpOut, int maxAddrLen, View& view ); void SelectLine( uint32_t line, const Worker* worker, bool changeAsmLine = true, uint64_t targetAddr = 0 ); @@ -165,9 +138,6 @@ private: void SelectMicroArchitecture( const char* moniker ); - TokenColor IdentifyToken( const char*& begin, const char* end ); - std::vector Tokenize( const char* begin, const char* end ); - void ResetAsm(); void FollowRead( size_t line, RegsX86 reg, size_t limit ); void FollowWrite( size_t line, RegsX86 reg, size_t limit ); @@ -178,18 +148,6 @@ private: void Save( const Worker& worker, size_t start = 0, size_t stop = std::numeric_limits::max() ); #endif - struct TokenizerState - { - void Reset() - { - isInComment = false; - isInPreprocessor = false; - } - - bool isInComment; - bool isInPreprocessor; - }; - ImFont* m_font; const char* m_file; uint32_t m_fileStringIdx; @@ -217,7 +175,7 @@ private: bool m_atnt; uint64_t m_jumpPopupAddr; - std::vector m_lines; + std::vector m_lines; std::vector m_asm; unordered_flat_map m_locMap; @@ -233,8 +191,6 @@ private: uint32_t m_maxLine; int m_maxMnemonicLen; - TokenizerState m_tokenizer; - unordered_flat_map m_microArchOpMap; CpuArchitecture m_cpuArch; int m_selMicroArch; @@ -250,6 +206,7 @@ private: float m_asmWidth; GetWindowCallback m_gwcb; + Tokenizer m_tokenizer; }; }