C++正则表达式
本文最后更新于:2022年3月19日 凌晨
此文章来源于👉正则表达式库 - cppreference.com,并整理
正则表达式库提供表示正则表达式的类,正则表达式是一种用于在字符串中匹配模式的微型语言。
定义于头文件
#include <iostream>
#include <iterator>
#include <string>
#include <regex>
int main()
{
std::string s = "Some people, when confronted with a problem, think "
"\"I know, I'll use regular expressions.\" "
"Now they have two problems.";
std::regex self_regex("REGULAR EXPRESSIONS",
std::regex_constants::ECMAScript | std::regex_constants::icase);
if (std::regex_search(s, self_regex)) {
std::cout << "Text contains the phrase 'regular expressions'\n";
}
std::regex word_regex("(\\w+)");
auto words_begin =
std::sregex_iterator(s.begin(), s.end(), word_regex);
auto words_end = std::sregex_iterator();
std::cout << "Found "
<< std::distance(words_begin, words_end)
<< " words\n";
const int N = 6;
std::cout << "Words longer than " << N << " characters:\n";
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
std::string match_str = match.str();
if (match_str.size() > N) {
std::cout << " " << match_str << '\n';
}
}
std::regex long_word_regex("(\\w{7,})");
std::string new_s = std::regex_replace(s, long_word_regex, "[$&]");
std::cout << new_s << '\n';
}
算法#
(C++11)#
尝试匹配一个正则表达式到整个字符序列 (函数模板)
#include <iostream>
#include <string>
#include <regex>
int main()
{
// 简单正则表达式匹配
std::string fnames[] = {"foo.txt", "bar.txt", "baz.dat", "zoidberg"};
std::regex txt_regex("[a-z]+\\.txt");
for (const auto &fname : fnames) {
std::cout << fname << ": " << std::regex_match(fname, txt_regex) << '\n';
}
// 提取子匹配
std::regex base_regex("([a-z]+)\\.txt");
std::smatch base_match;
for (const auto &fname : fnames) {
if (std::regex_match(fname, base_match, base_regex)) {
// 首个 sub_match 是整个字符串;下个
// sub_match 是首个有括号表达式。
if (base_match.size() == 2) {
std::ssub_match base_sub_match = base_match[1];
std::string base = base_sub_match.str();
std::cout << fname << " has a base of " << base << '\n';
}
}
}
// 提取几个子匹配
std::regex pieces_regex("([a-z]+)\\.([a-z]+)");
std::smatch pieces_match;
for (const auto &fname : fnames) {
if (std::regex_match(fname, pieces_match, pieces_regex)) {
std::cout << fname << '\n';
for (size_t i = 0; i < pieces_match.size(); ++i) {
std::ssub_match sub_match = pieces_match[i];
std::string piece = sub_match.str();
std::cout << " submatch " << i << ": " << piece << '\n';
}
}
}
}
(C++11)#
尝试匹配一个正则表达式到字符序列的任何部分 (函数模板)
#include <iostream>
#include <string>
#include <regex>
int main()
{
std::string lines[] = {"Roses are #ff0000",
"violets are #0000ff",
"all of my base are belong to you"};
std::regex color_regex("#([a-f0-9]{2})"
"([a-f0-9]{2})"
"([a-f0-9]{2})");
// 简单匹配
for (const auto &line : lines) {
std::cout << line << ": " << std::boolalpha
<< std::regex_search(line, color_regex) << '\n';
}
std::cout << '\n';
// 展示每个匹配中有标记子表达式的内容
std::smatch color_match;
for (const auto& line : lines) {
if(std::regex_search(line, color_match, color_regex)) {
std::cout << "matches for '" << line << "'\n";
std::cout << "Prefix: '" << color_match.prefix() << "'\n";
for (size_t i = 0; i < color_match.size(); ++i)
std::cout << i << ": " << color_match[i] << '\n';
std::cout << "Suffix: '" << color_match.suffix() << "\'\n\n";
}
}
// 重复搜索(参阅 std::regex_iterator )
std::string log(R"(
Speed: 366
Mass: 35
Speed: 378
Mass: 32
Speed: 400
Mass: 30)");
std::regex r(R"(Speed:\t\d*)");
std::smatch sm;
while(regex_search(log, sm, r))
{
std::cout << sm.str() << '\n';
log = sm.suffix();
}
// C 风格字符串演示
std::cmatch cm;
if(std::regex_search("this is a test", cm, std::regex("test")))
std::cout << "\nFound " << cm[0] << " at position " << cm.prefix().length();
}
(C++11)#
以格式化的替换文本来替换正则表达式匹配的出现位置 (函数模板)
#include <iostream>
#include <iterator>
#include <regex>
#include <string>
int main()
{
std::string text = "Quick brown fox";
std::regex vowel_re("a|e|i|o|u");
// 写结果到输出迭代器
std::regex_replace(std::ostreambuf_iterator<char>(std::cout),
text.begin(), text.end(), vowel_re, "*");
// 构造保有结果的字符串
std::cout << '\n' << std::regex_replace(text, vowel_re, "[$&]") << '\n';
}
迭代器#
regex_iterator 用于遍历在序列中找到的匹配正则表达式的整个集合。
(C++11)#
迭代一个字符序列中的所有正则表达式匹配 (类模板)
#include <regex>
#include <iterator>
#include <iostream>
#include <string>
int main()
{
const std::string s = "Quick brown fox.";
std::regex words_regex("[^\\s]+");
auto words_begin =
std::sregex_iterator(s.begin(), s.end(), words_regex);
auto words_end = std::sregex_iterator();
std::cout << "Found "
<< std::distance(words_begin, words_end)
<< " words:\n";
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
std::smatch match = *i;
std::string match_str = match.str();
std::cout << match_str << '\n';
}
}
(C++11)#
迭代给定字符串中的所有正则表达式匹配中的指定子表达式,或迭代未匹配的子字符串 (类模板)
#include <fstream>
#include <iostream>
#include <algorithm>
#include <iterator>
#include <regex>
int main()
{
std::string text = "Quick brown fox.";
// 记号化(非匹配碎片)
// 注意仅匹配二次 regex :在获得第三值时迭代器为后缀迭代器。
std::regex ws_re("\\s+"); // 空白符
std::copy( std::sregex_token_iterator(text.begin(), text.end(), ws_re, -1),
std::sregex_token_iterator(),
std::ostream_iterator<std::string>(std::cout, "\n"));
// 迭代首个子匹配
std::string html = "<p><a href=\"http://google.com\">google</a> "
"< a HREF =\"http://cppreference.com\">cppreference</a>\n</p>";
std::regex url_re("<\\s*A\\s+[^>]*href\\s*=\\s*\"([^\"]*)\"", std::regex::icase);
std::copy( std::sregex_token_iterator(html.begin(), html.end(), url_re, 1),
std::sregex_token_iterator(),
std::ostream_iterator<std::string>(std::cout, "\n"));
}
本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!