
For regex patterns that produce zero-length matches, there is one (imaginary) match in-between every character in the sequence being searched (as well as before the first character and after the last character). It's easiest to demonstrate using replacement: `std::regex_replace("abc"s, "!", "")` should produce `!a!b!c!`, where each exclamation mark makes a zero-length match visible. Currently our implementation doesn't correctly set the prefix of each zero-length match, "swallowing" the characters separating the imaginary matches -- e.g. when going through zero-length matches within `abc`, the corresponding prefixes should be `{'', 'a', 'b', 'c'}`, but before this patch they will all be empty (`{'', '', '', ''}`). This happens in the implementation of `regex_iterator::operator++`. Note that the Standard spells out quite explicitly that the prefix might need to be adjusted when dealing with zero-length matches in [`re.regiter.incr`](http://eel.is/c++draft/re.regiter.incr): > In all cases in which the call to `regex_search` returns `true`, `match.prefix().first` shall be equal to the previous value of `match[0].second`... It is unspecified how the implementation makes these adjustments. [Reproduction example](https://godbolt.org/z/8ve6G3dav) ```cpp #include <iostream> #include <regex> #include <string> int main() { std::string str = "abc"; std::regex empty_matching_pattern(""); { // The underlying problem is that `regex_iterator::operator++` doesn't update // the prefix correctly. std::sregex_iterator i(str.begin(), str.end(), empty_matching_pattern), e; std::cout << "\""; for (; i != e; ++i) { const std::ssub_match& prefix = i->prefix(); std::cout << prefix.str(); } std::cout << "\"\n"; // Before the patch: "" // After the patch: "abc" } { // `regex_replace` makes the problem very visible. std::string replaced = std::regex_replace(str, empty_matching_pattern, "!"); std::cout << "\"" << replaced << "\"\n"; // Before the patch: "!!!!" // After the patch: "!a!b!c!" } } ``` Fixes #64451 rdar://119912002
38 lines
1.2 KiB
C++
38 lines
1.2 KiB
C++
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// <regex>
|
|
|
|
// Test that replacing zero-length matches works correctly.
|
|
|
|
#include <cassert>
|
|
#include <regex>
|
|
#include <string>
|
|
#include "test_macros.h"
|
|
|
|
int main(int, char**) {
|
|
// Various patterns that produce zero-length matches.
|
|
assert(std::regex_replace("abc", std::regex(""), "!") == "!a!b!c!");
|
|
assert(std::regex_replace("abc", std::regex("X*"), "!") == "!a!b!c!");
|
|
assert(std::regex_replace("abc", std::regex("X{0,3}"), "!") == "!a!b!c!");
|
|
|
|
// Replacement string has several characters.
|
|
assert(std::regex_replace("abc", std::regex(""), "[!]") == "[!]a[!]b[!]c[!]");
|
|
|
|
// Empty replacement string.
|
|
assert(std::regex_replace("abc", std::regex(""), "") == "abc");
|
|
|
|
// Empty input.
|
|
assert(std::regex_replace("", std::regex(""), "!") == "!");
|
|
|
|
// Not all matches are zero-length.
|
|
assert(std::regex_replace("abCabCa", std::regex("C*"), "!") == "!a!b!!a!b!!a!");
|
|
|
|
return 0;
|
|
}
|