[mlir] Report line number from file rather than split (#150982)

Add convention for lexer if the last file is contained in the first,
then the first is used for error reporting. This requires that these two
overlap to make it easy to find the corresponding spots. Enables going
from

```
within split at mlir/test/IR/invalid.mlir::10 offset :6:9: error: reference to an undefined block
```

to

```
mlir/test/IR/invalid.mlir:15:9: error: reference to an undefined block
```

This does change the split to not produce always null terminated buffers
and tools that need it, need to do so themselves (which is mostly by copying -
this may have little actual impact as previously this was a copy too).
This commit is contained in:
Jacques Pienaar 2025-07-29 13:29:00 +02:00 committed by GitHub
parent b30034da0f
commit b6a98b934f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 127 additions and 37 deletions

View File

@ -639,6 +639,10 @@ public:
/// verified correctly, failure otherwise.
LogicalResult verify();
/// Register this handler with the given context. This is intended for use
/// with the splitAndProcessBuffer function.
void registerInContext(MLIRContext *ctx);
private:
/// Process a single diagnostic.
void process(Diagnostic &diag);

View File

@ -21,10 +21,16 @@
namespace llvm {
class MemoryBuffer;
class MemoryBufferRef;
} // namespace llvm
namespace mlir {
// A function that processes a chunk of a buffer and writes the result to an
// output stream.
using ChunkBufferHandler = function_ref<LogicalResult(
std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
const llvm::MemoryBufferRef &sourceBuffer, raw_ostream &os)>;
using NoSourceChunkBufferHandler = function_ref<LogicalResult(
std::unique_ptr<llvm::MemoryBuffer> chunkBuffer, raw_ostream &os)>;
extern inline const char *const kDefaultSplitMarker = "// -----";
@ -45,6 +51,15 @@ splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
ChunkBufferHandler processChunkBuffer, raw_ostream &os,
llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
llvm::StringRef outputSplitMarker = "");
/// Same as above, but for case where the original buffer is not used while
/// processing the chunk.
LogicalResult
splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
NoSourceChunkBufferHandler processChunkBuffer,
raw_ostream &os,
llvm::StringRef inputSplitMarker = kDefaultSplitMarker,
llvm::StringRef outputSplitMarker = "");
} // namespace mlir
#endif // MLIR_SUPPORT_TOOLUTILITIES_H

View File

@ -821,15 +821,7 @@ SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
for (unsigned i = 0, e = mgr.getNumBuffers(); i != e; ++i)
(void)impl->computeExpectedDiags(out, mgr, mgr.getMemoryBuffer(i + 1));
// Register a handler to verify the diagnostics.
setHandler([&](Diagnostic &diag) {
// Process the main diagnostics.
process(diag);
// Process each of the notes.
for (auto &note : diag.getNotes())
process(note);
});
registerInContext(ctx);
}
SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler(
@ -862,6 +854,17 @@ LogicalResult SourceMgrDiagnosticVerifierHandler::verify() {
return impl->status;
}
void SourceMgrDiagnosticVerifierHandler::registerInContext(MLIRContext *ctx) {
ctx->getDiagEngine().registerHandler([&](Diagnostic &diag) {
// Process the main diagnostics.
process(diag);
// Process each of the notes.
for (auto &note : diag.getNotes())
process(note);
});
}
/// Process a single diagnostic.
void SourceMgrDiagnosticVerifierHandler::process(Diagnostic &diag) {
return process(diag.getLocation(), diag.str(), diag.getSeverity());

View File

@ -14,6 +14,8 @@
#include "mlir/Support/LLVM.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <utility>
using namespace mlir;
@ -22,17 +24,17 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
ChunkBufferHandler processChunkBuffer,
raw_ostream &os, llvm::StringRef inputSplitMarker,
llvm::StringRef outputSplitMarker) {
llvm::MemoryBufferRef originalBufferRef = originalBuffer->getMemBufferRef();
// If splitting is disabled, we process the full input buffer.
if (inputSplitMarker.empty())
return processChunkBuffer(std::move(originalBuffer), os);
return processChunkBuffer(std::move(originalBuffer), originalBufferRef, os);
const int inputSplitMarkerLen = inputSplitMarker.size();
auto *origMemBuffer = originalBuffer.get();
SmallVector<StringRef, 8> rawSourceBuffers;
const int checkLen = 2;
// Split dropping the last checkLen chars to enable flagging near misses.
origMemBuffer->getBuffer().split(rawSourceBuffers,
originalBufferRef.getBuffer().split(rawSourceBuffers,
inputSplitMarker.drop_back(checkLen));
if (rawSourceBuffers.empty())
return success();
@ -79,11 +81,17 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
auto interleaveFn = [&](StringRef subBuffer) {
auto splitLoc = SMLoc::getFromPointer(subBuffer.data());
unsigned splitLine = fileSourceMgr.getLineAndColumn(splitLoc).first;
auto subMemBuffer = llvm::MemoryBuffer::getMemBufferCopy(
subBuffer, Twine("within split at ") +
origMemBuffer->getBufferIdentifier() + ":" +
Twine(splitLine) + " offset ");
if (failed(processChunkBuffer(std::move(subMemBuffer), os)))
std::string name((Twine("within split at ") +
originalBufferRef.getBufferIdentifier() + ":" +
Twine(splitLine) + " offset ")
.str());
// Use MemoryBufferRef to avoid copying the buffer & keep at same location
// relative to the original buffer.
auto subMemBuffer =
llvm::MemoryBuffer::getMemBuffer(llvm::MemoryBufferRef(subBuffer, name),
/*RequiresNullTerminator=*/false);
if (failed(
processChunkBuffer(std::move(subMemBuffer), originalBufferRef, os)))
hadFailure = true;
};
llvm::interleave(sourceBuffers, os, interleaveFn,
@ -92,3 +100,16 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
// If any fails, then return a failure of the tool.
return failure(hadFailure);
}
LogicalResult
mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
NoSourceChunkBufferHandler processChunkBuffer,
raw_ostream &os, llvm::StringRef inputSplitMarker,
llvm::StringRef outputSplitMarker) {
auto process = [&](std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
const llvm::MemoryBufferRef &, raw_ostream &os) {
return processChunkBuffer(std::move(chunkBuffer), os);
};
return splitAndProcessBuffer(std::move(originalBuffer), process, os,
inputSplitMarker, outputSplitMarker);
}

View File

@ -508,13 +508,20 @@ performActions(raw_ostream &os,
/// Parses the memory buffer. If successfully, run a series of passes against
/// it and print the result.
static LogicalResult processBuffer(raw_ostream &os,
std::unique_ptr<MemoryBuffer> ownedBuffer,
const MlirOptMainConfig &config,
DialectRegistry &registry,
static LogicalResult
processBuffer(raw_ostream &os, std::unique_ptr<MemoryBuffer> ownedBuffer,
llvm::MemoryBufferRef sourceBuffer,
const MlirOptMainConfig &config, DialectRegistry &registry,
SourceMgrDiagnosticVerifierHandler *verifyHandler,
llvm::ThreadPoolInterface *threadPool) {
// Tell sourceMgr about this buffer, which is what the parser will pick up.
auto sourceMgr = std::make_shared<SourceMgr>();
// Add the original buffer to the source manager to use for determining
// locations.
sourceMgr->AddNewSourceBuffer(
llvm::MemoryBuffer::getMemBuffer(sourceBuffer,
/*RequiresNullTerminator=*/false),
SMLoc());
sourceMgr->AddNewSourceBuffer(std::move(ownedBuffer), SMLoc());
// Create a context just for the current buffer. Disable threading on creation
@ -522,6 +529,8 @@ static LogicalResult processBuffer(raw_ostream &os,
MLIRContext context(registry, MLIRContext::Threading::DISABLED);
if (threadPool)
context.setThreadPool(*threadPool);
if (verifyHandler)
verifyHandler->registerInContext(&context);
StringRef irdlFile = config.getIrdlFile();
if (!irdlFile.empty() && failed(loadIRDLDialects(irdlFile, context)))
@ -545,17 +554,12 @@ static LogicalResult processBuffer(raw_ostream &os,
return performActions(os, sourceMgr, &context, config);
}
SourceMgrDiagnosticVerifierHandler sourceMgrHandler(
*sourceMgr, &context, config.verifyDiagnosticsLevel());
// Do any processing requested by command line flags. We don't care whether
// these actions succeed or fail, we only care what diagnostics they produce
// and whether they match our expectations.
(void)performActions(os, sourceMgr, &context, config);
// Verify the diagnostic handler to make sure that each of the diagnostics
// matched.
return sourceMgrHandler.verify();
return success();
}
std::pair<std::string, std::string>
@ -624,14 +628,31 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream,
if (threadPoolCtx.isMultithreadingEnabled())
threadPool = &threadPoolCtx.getThreadPool();
SourceMgr sourceMgr;
sourceMgr.AddNewSourceBuffer(
llvm::MemoryBuffer::getMemBuffer(buffer->getMemBufferRef(),
/*RequiresNullTerminator=*/false),
SMLoc());
// Note: this creates a verifier handler independent of the the flag set, as
// internally if the flag is not set, a new scoped diagnostic handler is
// created which would intercept the diagnostics and verify them.
SourceMgrDiagnosticVerifierHandler sourceMgrHandler(
sourceMgr, &threadPoolCtx, config.verifyDiagnosticsLevel());
auto chunkFn = [&](std::unique_ptr<MemoryBuffer> chunkBuffer,
raw_ostream &os) {
return processBuffer(os, std::move(chunkBuffer), config, registry,
llvm::MemoryBufferRef sourceBuffer, raw_ostream &os) {
return processBuffer(
os, std::move(chunkBuffer), sourceBuffer, config, registry,
config.shouldVerifyDiagnostics() ? &sourceMgrHandler : nullptr,
threadPool);
};
return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream,
config.inputSplitMarker(),
LogicalResult status = splitAndProcessBuffer(
llvm::MemoryBuffer::getMemBuffer(buffer->getMemBufferRef(),
/*RequiresNullTerminator=*/false),
chunkFn, outputStream, config.inputSplitMarker(),
config.outputSplitMarker());
if (config.shouldVerifyDiagnostics() && failed(sourceMgrHandler.verify()))
status = failure();
return status;
}
LogicalResult mlir::MlirOptMain(int argc, char **argv,

View File

@ -135,6 +135,13 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
// Processes the memory buffer with a new MLIRContext.
auto processBuffer = [&](std::unique_ptr<llvm::MemoryBuffer> ownedBuffer,
raw_ostream &os) {
// Many of the translations expect a null-terminated buffer while splitting
// the buffer does not guarantee null-termination. Make a copy of the buffer
// to ensure null-termination.
if (!ownedBuffer->getBuffer().ends_with('\0')) {
ownedBuffer = llvm::MemoryBuffer::getMemBufferCopy(
ownedBuffer->getBuffer(), ownedBuffer->getBufferIdentifier());
}
// Temporary buffers for chained translation processing.
std::string dataIn;
std::string dataOut;

View File

@ -0,0 +1,13 @@
// RUN: not mlir-opt %s -o - --split-input-file 2>&1 | FileCheck %s
// This test verifies that diagnostic handler doesn't emit splits.
// -----
func.func @constant_out_of_range() {
// CHECK: mlir:11:8: error: 'arith.constant'
%x = "arith.constant"() {value = 100} : () -> i1
return
}

View File

@ -6,10 +6,10 @@ func.func private @foo()
// -----
// expected-error@-3 {{source must contain a single top-level operation, found: 2}}
// expected-error@-9 {{source must contain a single top-level operation, found: 2}}
func.func private @bar()
func.func private @baz()
// -----
// expected-error@-3 {{source must contain a single top-level operation, found: 0}}
// expected-error@-15 {{source must contain a single top-level operation, found: 0}}

View File

@ -201,6 +201,12 @@ int main(int argc, char **argv) {
llvm::raw_string_ostream outputStrOS(outputStr);
auto processFn = [&](std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
raw_ostream &os) {
// Split does not guarantee null-termination. Make a copy of the buffer to
// ensure null-termination.
if (!chunkBuffer->getBuffer().ends_with('\0')) {
chunkBuffer = llvm::MemoryBuffer::getMemBufferCopy(
chunkBuffer->getBuffer(), chunkBuffer->getBufferIdentifier());
}
return processBuffer(os, std::move(chunkBuffer), outputType, includeDirs,
dumpODS, includedFiles);
};