llvm-project/clang/lib/Lex/LexHLSLRootSignature.cpp
Finn Plummer 3dec46d9bf
[HLSL][RootSignature] Correct RootSignatureParser to use correct SourceLocation in diagnostics (#147084)
The `SourceLocation` of a `RootSignatureToken` is incorrectly set to be
the "offset" into the concatenated string that denotes the
rootsignature. This causes an issue when the `StringLiteral` is a
multi-line expansion macro, since the offset will not account for the
characters between `StringLiteral` tokens.

This pr resolves this by retaining the `SourceLocation` information that
is kept in `StringLiteral` and then converting the offset in the
concatenated string into the proper `SourceLocation` using the
`StringLiteral::getLocationOfByte` interface. To do so, we will need to
adjust the `RootSignatureToken` to only hold its offset into the root
signature string. Then when the parser will use the token, it will need
to compute its actual `SourceLocation`.

See linked issue for more context.

For example:

```
#define DemoRootSignature \
 "CBV(b0)," \
 "RootConstants(num32BitConstants = 3, b0, invalid)"
  expected caret location ---------------^
  actual caret location ------------^
```

The caret points 5 characters early because the current offset did not
account for the characters:
```
'"' ' ' '\' ' ' '"'
 1   2   3   4   5
```

- Updates `RootSignatureParser` to retain `SourceLocation` information
by retaining the `StringLiteral` and passing the underlying `StringRef`
to the `Lexer`
- Updates `RootSignatureLexer` so that the constructed tokens only
reflect an offset into the `StringRef`
- Updates `RootSignatureParser` to directly construct its used `Lexer`
so that the `StringLiteral` is directly tied with the string used in the
`RootSignatureLexer`
- Updates `RootSignatureParser` to use
`StringLiteral::getLocationOfByte` to get the actual token location for
diagnostics
- Updates `ParseHLSLRootSignatureTest` to construct a phony
`AST`/`StringLiteral` for the test cases
- Adds a test to `RootSignature-err.hlsl` showing that the
`SourceLocation` is correctly set for diagnostics in a multi-line macro
expansion

Resolves: https://github.com/llvm/llvm-project/issues/146967
2025-07-08 09:55:51 -07:00

138 lines
4.2 KiB
C++

//=== LexHLSLRootSignature.cpp - Lex Root Signature -----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "clang/Lex/LexHLSLRootSignature.h"
namespace clang {
namespace hlsl {
using TokenKind = RootSignatureToken::Kind;
// Lexer Definitions
static bool isNumberChar(char C) {
return isdigit(C) // integer support
|| C == '.' // float support
|| C == 'e' || C == 'E' || C == '-' || C == '+' // exponent support
|| C == 'f' || C == 'F'; // explicit float support
}
RootSignatureToken RootSignatureLexer::lexToken() {
// Discard any leading whitespace
advanceBuffer(Buffer.take_while(isspace).size());
if (isEndOfBuffer())
return RootSignatureToken(TokenKind::end_of_stream, LocOffset);
// Record where this token is in the text for usage in parser diagnostics
RootSignatureToken Result(LocOffset);
char C = Buffer.front();
// Punctuators
switch (C) {
#define PUNCTUATOR(X, Y) \
case Y: { \
Result.TokKind = TokenKind::pu_##X; \
advanceBuffer(); \
return Result; \
}
#include "clang/Lex/HLSLRootSignatureTokenKinds.def"
default:
break;
}
// Number literal
if (isdigit(C) || C == '.') {
Result.NumSpelling = Buffer.take_while(isNumberChar);
// If all values are digits then we have an int literal
bool IsInteger = Result.NumSpelling.find_if_not(isdigit) == StringRef::npos;
Result.TokKind =
IsInteger ? TokenKind::int_literal : TokenKind::float_literal;
advanceBuffer(Result.NumSpelling.size());
return Result;
}
// All following tokens require at least one additional character
if (Buffer.size() <= 1) {
Result = RootSignatureToken(TokenKind::invalid, LocOffset);
return Result;
}
// Peek at the next character to deteremine token type
char NextC = Buffer[1];
// Registers: [tsub][0-9+]
if ((C == 't' || C == 's' || C == 'u' || C == 'b') && isdigit(NextC)) {
// Convert character to the register type.
switch (C) {
case 'b':
Result.TokKind = TokenKind::bReg;
break;
case 't':
Result.TokKind = TokenKind::tReg;
break;
case 'u':
Result.TokKind = TokenKind::uReg;
break;
case 's':
Result.TokKind = TokenKind::sReg;
break;
default:
llvm_unreachable("Switch for an expected token was not provided");
}
advanceBuffer();
// Lex the integer literal
Result.NumSpelling = Buffer.take_while(isNumberChar);
advanceBuffer(Result.NumSpelling.size());
return Result;
}
// Keywords and Enums:
StringRef TokSpelling =
Buffer.take_while([](char C) { return isalnum(C) || C == '_'; });
// Define a large string switch statement for all the keywords and enums
auto Switch = llvm::StringSwitch<TokenKind>(TokSpelling);
#define KEYWORD(NAME) Switch.CaseLower(#NAME, TokenKind::kw_##NAME);
#define ENUM(NAME, LIT) Switch.CaseLower(LIT, TokenKind::en_##NAME);
#include "clang/Lex/HLSLRootSignatureTokenKinds.def"
// Then attempt to retreive a string from it
Result.TokKind = Switch.Default(TokenKind::invalid);
advanceBuffer(TokSpelling.size());
return Result;
}
RootSignatureToken RootSignatureLexer::consumeToken() {
// If we previously peeked then just return the previous value over
if (NextToken && NextToken->TokKind != TokenKind::end_of_stream) {
RootSignatureToken Result = *NextToken;
NextToken = std::nullopt;
return Result;
}
return lexToken();
}
RootSignatureToken RootSignatureLexer::peekNextToken() {
// Already peeked from the current token
if (NextToken)
return *NextToken;
NextToken = lexToken();
return *NextToken;
}
} // namespace hlsl
} // namespace clang