From 376e3b62cd36cabf4235e085cd13df05c2bd754b Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Thu, 13 Mar 2025 12:13:09 +0800 Subject: [PATCH] [TableGen] Add `!match` operator to do regex matching (#130759) The grammar is `!match(str, regex)` and this operator produces 1 if the `str` matches the regular expression `regex`. The format of `regex` is ERE (Extended POSIX Regular Expressions). --- llvm/docs/TableGen/ProgRef.rst | 14 +++++++---- llvm/include/llvm/TableGen/Record.h | 1 + llvm/lib/TableGen/Record.cpp | 21 +++++++++++++++++ llvm/lib/TableGen/TGLexer.cpp | 1 + llvm/lib/TableGen/TGLexer.h | 1 + llvm/lib/TableGen/TGParser.cpp | 8 +++++++ llvm/test/TableGen/match.td | 36 +++++++++++++++++++++++++++++ 7 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 llvm/test/TableGen/match.td diff --git a/llvm/docs/TableGen/ProgRef.rst b/llvm/docs/TableGen/ProgRef.rst index edb97109c928..0983c6283f7e 100644 --- a/llvm/docs/TableGen/ProgRef.rst +++ b/llvm/docs/TableGen/ProgRef.rst @@ -225,11 +225,11 @@ TableGen provides "bang operators" that have a wide variety of uses: : !getdagname !getdagop !gt !head !if : !initialized !interleave !isa !le !listconcat : !listflatten !listremove !listsplat !logtwo !lt - : !mul !ne !not !or !range - : !repr !setdagarg !setdagname !setdagop !shl - : !size !sra !srl !strconcat !sub - : !subst !substr !tail !tolower !toupper - : !xor + : !match !mul !ne !not !or + : !range !repr !setdagarg !setdagname !setdagop + : !shl !size !sra !srl !strconcat + : !sub !subst !substr !tail !tolower + : !toupper !xor The ``!cond`` operator has a slightly different syntax compared to other bang operators, so it is defined separately: @@ -1878,6 +1878,10 @@ and non-0 as true. This operator produces 1 if *a* is less than *b*; 0 otherwise. The arguments must be ``bit``, ``bits``, ``int``, or ``string`` values. +``!match(``\ *str*\ `,` *regex*\ ``)`` + This operator produces 1 if the *str* matches the regular expression + *regex*. The format of *regex* is ERE (Extended POSIX Regular Expressions). + ``!mul(``\ *a*\ ``,`` *b*\ ``, ...)`` This operator multiplies *a*, *b*, etc., and produces the product. diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h index 334007524c95..ae505631b543 100644 --- a/llvm/include/llvm/TableGen/Record.h +++ b/llvm/include/llvm/TableGen/Record.h @@ -910,6 +910,7 @@ public: STRCONCAT, INTERLEAVE, CONCAT, + MATCH, EQ, NE, LE, diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp index 590656786bc6..75160e61f3f9 100644 --- a/llvm/lib/TableGen/Record.cpp +++ b/llvm/lib/TableGen/Record.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/SMLoc.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" @@ -1318,6 +1319,23 @@ const Init *BinOpInit::Fold(const Record *CurRec) const { } break; } + case MATCH: { + const auto *StrInit = dyn_cast(LHS); + if (!StrInit) + return this; + + const auto *RegexInit = dyn_cast(RHS); + if (!RegexInit) + return this; + + StringRef RegexStr = RegexInit->getValue(); + llvm::Regex Matcher(RegexStr); + if (!Matcher.isValid()) + PrintFatalError(Twine("invalid regex '") + RegexStr + Twine("'")); + + return BitInit::get(LHS->getRecordKeeper(), + Matcher.match(StrInit->getValue())); + } case LISTCONCAT: { const auto *LHSs = dyn_cast(LHS); const auto *RHSs = dyn_cast(RHS); @@ -1586,6 +1604,9 @@ std::string BinOpInit::getAsString() const { case RANGEC: return LHS->getAsString() + "..." + RHS->getAsString(); case CONCAT: Result = "!con"; break; + case MATCH: + Result = "!match"; + break; case ADD: Result = "!add"; break; case SUB: Result = "!sub"; break; case MUL: Result = "!mul"; break; diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp index 983242ade0fe..0b2f927446b1 100644 --- a/llvm/lib/TableGen/TGLexer.cpp +++ b/llvm/lib/TableGen/TGLexer.cpp @@ -644,6 +644,7 @@ tgtok::TokKind TGLexer::LexExclaim() { .Case("tolower", tgtok::XToLower) .Case("toupper", tgtok::XToUpper) .Case("repr", tgtok::XRepr) + .Case("match", tgtok::XMatch) .Default(tgtok::Error); return Kind != tgtok::Error ? Kind diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h index 668091521120..ef9205197dec 100644 --- a/llvm/lib/TableGen/TGLexer.h +++ b/llvm/lib/TableGen/TGLexer.h @@ -126,6 +126,7 @@ enum TokKind { XInterleave, XSubstr, XFind, + XMatch, XCast, XSubst, XForEach, diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp index 9a8301cffb93..787c3e64beac 100644 --- a/llvm/lib/TableGen/TGParser.cpp +++ b/llvm/lib/TableGen/TGParser.cpp @@ -1456,6 +1456,7 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { } case tgtok::XConcat: + case tgtok::XMatch: case tgtok::XADD: case tgtok::XSUB: case tgtok::XMUL: @@ -1488,6 +1489,9 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { switch (OpTok) { default: llvm_unreachable("Unhandled code!"); case tgtok::XConcat: Code = BinOpInit::CONCAT; break; + case tgtok::XMatch: + Code = BinOpInit::MATCH; + break; case tgtok::XADD: Code = BinOpInit::ADD; break; case tgtok::XSUB: Code = BinOpInit::SUB; break; case tgtok::XMUL: Code = BinOpInit::MUL; break; @@ -1525,6 +1529,10 @@ const Init *TGParser::ParseOperation(Record *CurRec, const RecTy *ItemType) { switch (OpTok) { default: llvm_unreachable("Unhandled code!"); + case tgtok::XMatch: + Type = BitRecTy::get(Records); + ArgType = StringRecTy::get(Records); + break; case tgtok::XConcat: case tgtok::XSetDagOp: Type = DagRecTy::get(Records); diff --git a/llvm/test/TableGen/match.td b/llvm/test/TableGen/match.td new file mode 100644 index 000000000000..743aebd130b0 --- /dev/null +++ b/llvm/test/TableGen/match.td @@ -0,0 +1,36 @@ +// RUN: llvm-tblgen %s | FileCheck %s +// RUN: not llvm-tblgen -DERROR1 %s 2>&1 | FileCheck --check-prefix=ERROR1 %s +// RUN: not llvm-tblgen -DERROR2 %s 2>&1 | FileCheck --check-prefix=ERROR2 %s +// RUN: not llvm-tblgen -DERROR3 %s 2>&1 | FileCheck --check-prefix=ERROR3 %s +// RUN: not llvm-tblgen -DERROR4 %s 2>&1 | FileCheck --check-prefix=ERROR4 %s +// XFAIL: vg_leak + +def test { + bit test0 = !match("123 abc ABC", "[0-9 a-z A-Z]+"); + bit test1 = !match("abc", "[0-9]+"); +} + +// CHECK-LABEL: def test { +// CHECK-NEXT: bit test0 = 1; +// CHECK-NEXT: bit test1 = 0; +// CHECK-NEXT: } + +#ifdef ERROR1 +defvar error1 = !match(123, ".*"); +// ERROR1: error: expected value of type 'string', got 'int' +#endif + +#ifdef ERROR2 +defvar error2 = !match("abc", 123); +// ERROR2: error: expected value of type 'string', got 'int' +#endif + +#ifdef ERROR3 +defvar error3 = !match("abc", "abc", "abc"); +// ERROR3: error: expected two operands to operator +#endif + +#ifdef ERROR4 +defvar error4 = !match("abc", "([)]"); +// ERROR4: error: invalid regex '([)]' +#endif