Boost.Xpressive で HTML タグの中身とタグ名を取得する
こんな感じで簡単に取得出来るみたい。
[ソース]
#define _SCL_SECURE_NO_WARNINGS #include <iostream> #include <boost/xpressive/xpressive.hpp> #include <boost/xpressive/xpressive_static.hpp> int main(){ namespace x = boost::xpressive; using x::s1; using x::_w; using x::_; // タグで囲まれた文字列を取得 x::mark_tag in_tag(1); x::mark_tag tag_name(2); x::sregex regex = "<" >> (tag_name = +_w) >> ">" >> (in_tag = -*_) >> "</" >> tag_name >> ">"; std::string source = "<del>homu</del>"; x::smatch what; if( x::regex_match(source, what, regex) ){ std::cout << what[tag_name].str() << std::endl; std::cout << what[in_tag].str() << std::endl; } std::string source2 = "<del>homu</dev>"; if(!x::regex_match(source2, what, regex) ){ std::cout << "unmatch" << std::endl; } return 0; }
[出力]
del homu unmatch
[boost]
- ver 1.49.0