[llvm-remarkutil] filter: Add --sort and --dedupe flags (#187338)

Add `--sort` to emit remarks in sorted order and `--dedupe` to
deduplicate identical remarks. Only if these options are requested,
remarks need to be buffered into a sorted map before emission.

Pull Request: https://github.com/llvm/llvm-project/pull/187338
This commit is contained in:
Tobias Stadler 2026-03-18 15:30:03 -07:00 committed by GitHub
parent a67c3b7468
commit 3cf80812f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 122 additions and 1 deletions

View File

@ -0,0 +1,28 @@
--- !Analysis
Pass: pass3
Name: Remark3
DebugLoc: { File: 'path/to/func3.c', Line: 1, Column: 2 }
Function: func3
Args:
- String: ' text'
- arg3: argval3
DebugLoc: { File: 'path/to/func3.c', Line: 2, Column: 2 }
...
--- !Passed
Pass: pass1
Name: Remark1
DebugLoc: { File: 'path/to/func1.c', Line: 1, Column: 2 }
Function: func1
Args:
- String: ' text'
- arg1: argval1
...
--- !Missed
Pass: pass2
Name: Remark2
DebugLoc: { File: 'path/to/func2.c', Line: 1, Column: 2 }
Function: func2
Args:
- String: ' text'
- arg2: argval2
...

View File

@ -0,0 +1,46 @@
RUN: llvm-remarkutil filter --sort %p/Inputs/filter-unsorted.yaml | FileCheck %s --strict-whitespace --check-prefix=SORTED
RUN: llvm-remarkutil filter --dedupe %p/Inputs/filter2.yaml %p/Inputs/filter2.yaml | FileCheck %s --strict-whitespace --check-prefix=DEDUPED
RUN: llvm-remarkutil filter --sort %p/Inputs/filter.yaml %p/Inputs/filter.yaml | FileCheck %s --strict-whitespace --check-prefix=SORTED-DUP
RUN: llvm-remarkutil filter --sort --dedupe %p/Inputs/filter-unsorted.yaml %p/Inputs/filter-unsorted.yaml | FileCheck %s --strict-whitespace --check-prefix=SORTED
; SORTED: --- !Passed
; SORTED: Name: Remark1
; SORTED: ...
; SORTED-NEXT: --- !Missed
; SORTED: Name: Remark2
; SORTED: ...
; SORTED-NEXT: --- !Analysis
; SORTED: Name: Remark3
; SORTED: ...
; SORTED-NOT: {{.}}
; SORTED-DUP: --- !Passed
; SORTED-DUP: Name: Remark1
; SORTED-DUP: ...
; SORTED-DUP-NEXT: --- !Passed
; SORTED-DUP: Name: Remark1
; SORTED-DUP: ...
; SORTED-DUP-NEXT: --- !Missed
; SORTED-DUP: Name: Remark2
; SORTED-DUP: ...
; SORTED-DUP-NEXT: --- !Missed
; SORTED-DUP: Name: Remark2
; SORTED-DUP: ...
; SORTED-DUP-NEXT: --- !Analysis
; SORTED-DUP: Name: Remark3
; SORTED-DUP: ...
; SORTED-DUP-NEXT: --- !Analysis
; SORTED-DUP: Name: Remark3
; SORTED-DUP: ...
; SORTED-DUP-NOT: {{.}}
; DEDUPED: --- !Passed
; DEDUPED-NEXT: Pass: pass4
; DEDUPED-NEXT: Name: Remark4
; DEDUPED-NEXT: DebugLoc: { File: 'path/to/func4.c', Line: 1, Column: 2 }
; DEDUPED-NEXT: Function: func4
; DEDUPED-NEXT: Args:
; DEDUPED-NEXT: - String: ' text'
; DEDUPED-NEXT: - arg4: argval4
; DEDUPED-NEXT: ...
; DEDUPED-NOT: {{.}}

View File

@ -15,6 +15,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/Regex.h"
#include <map>
using namespace llvm;
using namespace remarks;
@ -44,6 +45,11 @@ static cl::opt<bool>
ExcludeOpt("exclude",
cl::desc("Keep all remarks except those matching the filter"),
cl::init(false), cl::sub(FilterSub));
static cl::opt<bool> SortOpt("sort", cl::desc("Sort remarks (expensive!)"),
cl::init(false), cl::sub(FilterSub));
static cl::opt<bool> DedupeOpt("dedupe",
cl::desc("Deduplicate remarks (expensive!)"),
cl::init(false), cl::sub(FilterSub));
REMARK_FILTER_SETUP_FUNC()
@ -52,6 +58,9 @@ namespace {
class FilterTool {
public:
Filters Filter;
bool Sort = false;
bool Dedupe = false;
bool Exclude = false;
FilterTool(Filters Filter) : Filter(std::move(Filter)) {}
@ -75,7 +84,7 @@ public:
Remark &Remark = **MaybeRemark;
if (Filter.filterRemark(Remark) == Exclude)
continue;
Serializer->emit(Remark);
emit(std::move(*MaybeRemark));
}
auto E = MaybeRemark.takeError();
if (!E.isA<EndOfFileError>())
@ -87,6 +96,7 @@ public:
void finalize() {
if (!Serializer)
return;
emitBuffered();
OF->keep();
Serializer = nullptr;
}
@ -95,6 +105,21 @@ private:
std::unique_ptr<ToolOutputFile> OF;
std::unique_ptr<RemarkSerializer> Serializer;
/// Compare Remarks through unique_ptr
struct RemarkPtrCompare {
bool operator()(const std::unique_ptr<Remark> &LHS,
const std::unique_ptr<Remark> &RHS) const {
assert(LHS && RHS && "Invalid pointers to compare.");
return *LHS < *RHS;
}
};
// Buffer all remarks if required (for sorting/deduplication).
// For now, use std::map (like the RemarkLinker) for easy sorting. We
// should be capitalizing on the fact that the strings are interned.
std::map<std::unique_ptr<Remark>, size_t, RemarkPtrCompare> Remarks;
StringTable StrTab;
/// Set up the RemarkSerializer lazily, so automatic output format detection
/// can default to the automatically detected input format from the first file
/// we process.
@ -113,6 +138,26 @@ private:
Serializer = std::move(*MaybeSerializer);
return Error::success();
}
void emit(std::unique_ptr<Remark> RPtr) {
Remark &R = *RPtr;
if (!Sort && !Dedupe) {
Serializer->emit(R);
return;
}
StrTab.internalize(R);
auto [It, Inserted] = Remarks.try_emplace(std::move(RPtr), 1);
if (!Dedupe && !Inserted)
++It->second;
}
void emitBuffered() {
for (auto &[R, Count] : Remarks) {
for (size_t I = 0; I < Count; ++I)
Serializer->emit(*R);
}
Remarks.clear();
}
};
} // namespace
@ -122,6 +167,8 @@ static Error tryFilter() {
if (!MaybeFilter)
return MaybeFilter.takeError();
FilterTool Tool(std::move(*MaybeFilter));
Tool.Sort = SortOpt;
Tool.Dedupe = DedupeOpt;
Tool.Exclude = ExcludeOpt;
for (auto &InputFileName : InputFileNames) {