From b6a98b934f63431243ba062aa9e07a52aae05f70 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Tue, 29 Jul 2025 13:29:00 +0200 Subject: [PATCH] [mlir] Report line number from file rather than split (#150982) Add convention for lexer if the last file is contained in the first, then the first is used for error reporting. This requires that these two overlap to make it easy to find the corresponding spots. Enables going from ``` within split at mlir/test/IR/invalid.mlir::10 offset :6:9: error: reference to an undefined block ``` to ``` mlir/test/IR/invalid.mlir:15:9: error: reference to an undefined block ``` This does change the split to not produce always null terminated buffers and tools that need it, need to do so themselves (which is mostly by copying - this may have little actual impact as previously this was a copy too). --- mlir/include/mlir/IR/Diagnostics.h | 4 ++ mlir/include/mlir/Support/ToolUtilities.h | 15 +++++ mlir/lib/IR/Diagnostics.cpp | 21 ++++--- mlir/lib/Support/ToolUtilities.cpp | 39 ++++++++++--- mlir/lib/Tools/mlir-opt/MlirOptMain.cpp | 55 +++++++++++++------ .../mlir-translate/MlirTranslateMain.cpp | 7 +++ mlir/test/IR/diagnostic-nosplit.mlir | 13 +++++ mlir/test/IR/top-level.mlir | 4 +- mlir/tools/mlir-pdll/mlir-pdll.cpp | 6 ++ 9 files changed, 127 insertions(+), 37 deletions(-) create mode 100644 mlir/test/IR/diagnostic-nosplit.mlir diff --git a/mlir/include/mlir/IR/Diagnostics.h b/mlir/include/mlir/IR/Diagnostics.h index 4ed042390213..7ff718ad7f24 100644 --- a/mlir/include/mlir/IR/Diagnostics.h +++ b/mlir/include/mlir/IR/Diagnostics.h @@ -639,6 +639,10 @@ public: /// verified correctly, failure otherwise. LogicalResult verify(); + /// Register this handler with the given context. This is intended for use + /// with the splitAndProcessBuffer function. + void registerInContext(MLIRContext *ctx); + private: /// Process a single diagnostic. void process(Diagnostic &diag); diff --git a/mlir/include/mlir/Support/ToolUtilities.h b/mlir/include/mlir/Support/ToolUtilities.h index cb6ba299d28f..657f117d8d8b 100644 --- a/mlir/include/mlir/Support/ToolUtilities.h +++ b/mlir/include/mlir/Support/ToolUtilities.h @@ -21,10 +21,16 @@ namespace llvm { class MemoryBuffer; +class MemoryBufferRef; } // namespace llvm namespace mlir { +// A function that processes a chunk of a buffer and writes the result to an +// output stream. using ChunkBufferHandler = function_ref chunkBuffer, + const llvm::MemoryBufferRef &sourceBuffer, raw_ostream &os)>; +using NoSourceChunkBufferHandler = function_ref chunkBuffer, raw_ostream &os)>; extern inline const char *const kDefaultSplitMarker = "// -----"; @@ -45,6 +51,15 @@ splitAndProcessBuffer(std::unique_ptr originalBuffer, ChunkBufferHandler processChunkBuffer, raw_ostream &os, llvm::StringRef inputSplitMarker = kDefaultSplitMarker, llvm::StringRef outputSplitMarker = ""); + +/// Same as above, but for case where the original buffer is not used while +/// processing the chunk. +LogicalResult +splitAndProcessBuffer(std::unique_ptr originalBuffer, + NoSourceChunkBufferHandler processChunkBuffer, + raw_ostream &os, + llvm::StringRef inputSplitMarker = kDefaultSplitMarker, + llvm::StringRef outputSplitMarker = ""); } // namespace mlir #endif // MLIR_SUPPORT_TOOLUTILITIES_H diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp index 3e337951bcd3..776b5c6588c7 100644 --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -821,15 +821,7 @@ SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler( for (unsigned i = 0, e = mgr.getNumBuffers(); i != e; ++i) (void)impl->computeExpectedDiags(out, mgr, mgr.getMemoryBuffer(i + 1)); - // Register a handler to verify the diagnostics. - setHandler([&](Diagnostic &diag) { - // Process the main diagnostics. - process(diag); - - // Process each of the notes. - for (auto ¬e : diag.getNotes()) - process(note); - }); + registerInContext(ctx); } SourceMgrDiagnosticVerifierHandler::SourceMgrDiagnosticVerifierHandler( @@ -862,6 +854,17 @@ LogicalResult SourceMgrDiagnosticVerifierHandler::verify() { return impl->status; } +void SourceMgrDiagnosticVerifierHandler::registerInContext(MLIRContext *ctx) { + ctx->getDiagEngine().registerHandler([&](Diagnostic &diag) { + // Process the main diagnostics. + process(diag); + + // Process each of the notes. + for (auto ¬e : diag.getNotes()) + process(note); + }); +} + /// Process a single diagnostic. void SourceMgrDiagnosticVerifierHandler::process(Diagnostic &diag) { return process(diag.getLocation(), diag.str(), diag.getSeverity()); diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp index 748f92847ac5..2cf33eb30d06 100644 --- a/mlir/lib/Support/ToolUtilities.cpp +++ b/mlir/lib/Support/ToolUtilities.cpp @@ -14,6 +14,8 @@ #include "mlir/Support/LLVM.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" +#include +#include using namespace mlir; @@ -22,18 +24,18 @@ mlir::splitAndProcessBuffer(std::unique_ptr originalBuffer, ChunkBufferHandler processChunkBuffer, raw_ostream &os, llvm::StringRef inputSplitMarker, llvm::StringRef outputSplitMarker) { + llvm::MemoryBufferRef originalBufferRef = originalBuffer->getMemBufferRef(); // If splitting is disabled, we process the full input buffer. if (inputSplitMarker.empty()) - return processChunkBuffer(std::move(originalBuffer), os); + return processChunkBuffer(std::move(originalBuffer), originalBufferRef, os); const int inputSplitMarkerLen = inputSplitMarker.size(); - auto *origMemBuffer = originalBuffer.get(); SmallVector rawSourceBuffers; const int checkLen = 2; // Split dropping the last checkLen chars to enable flagging near misses. - origMemBuffer->getBuffer().split(rawSourceBuffers, - inputSplitMarker.drop_back(checkLen)); + originalBufferRef.getBuffer().split(rawSourceBuffers, + inputSplitMarker.drop_back(checkLen)); if (rawSourceBuffers.empty()) return success(); @@ -79,11 +81,17 @@ mlir::splitAndProcessBuffer(std::unique_ptr originalBuffer, auto interleaveFn = [&](StringRef subBuffer) { auto splitLoc = SMLoc::getFromPointer(subBuffer.data()); unsigned splitLine = fileSourceMgr.getLineAndColumn(splitLoc).first; - auto subMemBuffer = llvm::MemoryBuffer::getMemBufferCopy( - subBuffer, Twine("within split at ") + - origMemBuffer->getBufferIdentifier() + ":" + - Twine(splitLine) + " offset "); - if (failed(processChunkBuffer(std::move(subMemBuffer), os))) + std::string name((Twine("within split at ") + + originalBufferRef.getBufferIdentifier() + ":" + + Twine(splitLine) + " offset ") + .str()); + // Use MemoryBufferRef to avoid copying the buffer & keep at same location + // relative to the original buffer. + auto subMemBuffer = + llvm::MemoryBuffer::getMemBuffer(llvm::MemoryBufferRef(subBuffer, name), + /*RequiresNullTerminator=*/false); + if (failed( + processChunkBuffer(std::move(subMemBuffer), originalBufferRef, os))) hadFailure = true; }; llvm::interleave(sourceBuffers, os, interleaveFn, @@ -92,3 +100,16 @@ mlir::splitAndProcessBuffer(std::unique_ptr originalBuffer, // If any fails, then return a failure of the tool. return failure(hadFailure); } + +LogicalResult +mlir::splitAndProcessBuffer(std::unique_ptr originalBuffer, + NoSourceChunkBufferHandler processChunkBuffer, + raw_ostream &os, llvm::StringRef inputSplitMarker, + llvm::StringRef outputSplitMarker) { + auto process = [&](std::unique_ptr chunkBuffer, + const llvm::MemoryBufferRef &, raw_ostream &os) { + return processChunkBuffer(std::move(chunkBuffer), os); + }; + return splitAndProcessBuffer(std::move(originalBuffer), process, os, + inputSplitMarker, outputSplitMarker); +} diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp index 8f785900780f..bdcdaa407e61 100644 --- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -508,13 +508,20 @@ performActions(raw_ostream &os, /// Parses the memory buffer. If successfully, run a series of passes against /// it and print the result. -static LogicalResult processBuffer(raw_ostream &os, - std::unique_ptr ownedBuffer, - const MlirOptMainConfig &config, - DialectRegistry ®istry, - llvm::ThreadPoolInterface *threadPool) { +static LogicalResult +processBuffer(raw_ostream &os, std::unique_ptr ownedBuffer, + llvm::MemoryBufferRef sourceBuffer, + const MlirOptMainConfig &config, DialectRegistry ®istry, + SourceMgrDiagnosticVerifierHandler *verifyHandler, + llvm::ThreadPoolInterface *threadPool) { // Tell sourceMgr about this buffer, which is what the parser will pick up. auto sourceMgr = std::make_shared(); + // Add the original buffer to the source manager to use for determining + // locations. + sourceMgr->AddNewSourceBuffer( + llvm::MemoryBuffer::getMemBuffer(sourceBuffer, + /*RequiresNullTerminator=*/false), + SMLoc()); sourceMgr->AddNewSourceBuffer(std::move(ownedBuffer), SMLoc()); // Create a context just for the current buffer. Disable threading on creation @@ -522,6 +529,8 @@ static LogicalResult processBuffer(raw_ostream &os, MLIRContext context(registry, MLIRContext::Threading::DISABLED); if (threadPool) context.setThreadPool(*threadPool); + if (verifyHandler) + verifyHandler->registerInContext(&context); StringRef irdlFile = config.getIrdlFile(); if (!irdlFile.empty() && failed(loadIRDLDialects(irdlFile, context))) @@ -545,17 +554,12 @@ static LogicalResult processBuffer(raw_ostream &os, return performActions(os, sourceMgr, &context, config); } - SourceMgrDiagnosticVerifierHandler sourceMgrHandler( - *sourceMgr, &context, config.verifyDiagnosticsLevel()); - // Do any processing requested by command line flags. We don't care whether // these actions succeed or fail, we only care what diagnostics they produce // and whether they match our expectations. (void)performActions(os, sourceMgr, &context, config); - // Verify the diagnostic handler to make sure that each of the diagnostics - // matched. - return sourceMgrHandler.verify(); + return success(); } std::pair @@ -624,14 +628,31 @@ LogicalResult mlir::MlirOptMain(llvm::raw_ostream &outputStream, if (threadPoolCtx.isMultithreadingEnabled()) threadPool = &threadPoolCtx.getThreadPool(); + SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer( + llvm::MemoryBuffer::getMemBuffer(buffer->getMemBufferRef(), + /*RequiresNullTerminator=*/false), + SMLoc()); + // Note: this creates a verifier handler independent of the the flag set, as + // internally if the flag is not set, a new scoped diagnostic handler is + // created which would intercept the diagnostics and verify them. + SourceMgrDiagnosticVerifierHandler sourceMgrHandler( + sourceMgr, &threadPoolCtx, config.verifyDiagnosticsLevel()); auto chunkFn = [&](std::unique_ptr chunkBuffer, - raw_ostream &os) { - return processBuffer(os, std::move(chunkBuffer), config, registry, - threadPool); + llvm::MemoryBufferRef sourceBuffer, raw_ostream &os) { + return processBuffer( + os, std::move(chunkBuffer), sourceBuffer, config, registry, + config.shouldVerifyDiagnostics() ? &sourceMgrHandler : nullptr, + threadPool); }; - return splitAndProcessBuffer(std::move(buffer), chunkFn, outputStream, - config.inputSplitMarker(), - config.outputSplitMarker()); + LogicalResult status = splitAndProcessBuffer( + llvm::MemoryBuffer::getMemBuffer(buffer->getMemBufferRef(), + /*RequiresNullTerminator=*/false), + chunkFn, outputStream, config.inputSplitMarker(), + config.outputSplitMarker()); + if (config.shouldVerifyDiagnostics() && failed(sourceMgrHandler.verify())) + status = failure(); + return status; } LogicalResult mlir::MlirOptMain(int argc, char **argv, diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp index c11cb8d2104f..e1c8afb7c3f2 100644 --- a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp +++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp @@ -135,6 +135,13 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv, // Processes the memory buffer with a new MLIRContext. auto processBuffer = [&](std::unique_ptr ownedBuffer, raw_ostream &os) { + // Many of the translations expect a null-terminated buffer while splitting + // the buffer does not guarantee null-termination. Make a copy of the buffer + // to ensure null-termination. + if (!ownedBuffer->getBuffer().ends_with('\0')) { + ownedBuffer = llvm::MemoryBuffer::getMemBufferCopy( + ownedBuffer->getBuffer(), ownedBuffer->getBufferIdentifier()); + } // Temporary buffers for chained translation processing. std::string dataIn; std::string dataOut; diff --git a/mlir/test/IR/diagnostic-nosplit.mlir b/mlir/test/IR/diagnostic-nosplit.mlir new file mode 100644 index 000000000000..ecfb9c62469b --- /dev/null +++ b/mlir/test/IR/diagnostic-nosplit.mlir @@ -0,0 +1,13 @@ +// RUN: not mlir-opt %s -o - --split-input-file 2>&1 | FileCheck %s +// This test verifies that diagnostic handler doesn't emit splits. + + +// ----- + + + +func.func @constant_out_of_range() { + // CHECK: mlir:11:8: error: 'arith.constant' + %x = "arith.constant"() {value = 100} : () -> i1 + return +} diff --git a/mlir/test/IR/top-level.mlir b/mlir/test/IR/top-level.mlir index b571d944928c..e0adb4d82334 100644 --- a/mlir/test/IR/top-level.mlir +++ b/mlir/test/IR/top-level.mlir @@ -6,10 +6,10 @@ func.func private @foo() // ----- -// expected-error@-3 {{source must contain a single top-level operation, found: 2}} +// expected-error@-9 {{source must contain a single top-level operation, found: 2}} func.func private @bar() func.func private @baz() // ----- -// expected-error@-3 {{source must contain a single top-level operation, found: 0}} +// expected-error@-15 {{source must contain a single top-level operation, found: 0}} diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp index 88a5f3639c96..f99dcdb53fe9 100644 --- a/mlir/tools/mlir-pdll/mlir-pdll.cpp +++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp @@ -201,6 +201,12 @@ int main(int argc, char **argv) { llvm::raw_string_ostream outputStrOS(outputStr); auto processFn = [&](std::unique_ptr chunkBuffer, raw_ostream &os) { + // Split does not guarantee null-termination. Make a copy of the buffer to + // ensure null-termination. + if (!chunkBuffer->getBuffer().ends_with('\0')) { + chunkBuffer = llvm::MemoryBuffer::getMemBufferCopy( + chunkBuffer->getBuffer(), chunkBuffer->getBufferIdentifier()); + } return processBuffer(os, std::move(chunkBuffer), outputType, includeDirs, dumpODS, includedFiles); };