Boost.Xpressive で HTML タグの中身とタグ名を取得する

こんな感じで簡単に取得出来るみたい。

[ソース]

#define _SCL_SECURE_NO_WARNINGS
#include <iostream>
#include <boost/xpressive/xpressive.hpp>
#include <boost/xpressive/xpressive_static.hpp>


int
main(){
    namespace x = boost::xpressive;
    using x::s1;
    using x::_w;
    using x::_;

    // タグで囲まれた文字列を取得
    x::mark_tag in_tag(1);
    x::mark_tag tag_name(2);
    x::sregex regex =
            "<"  >> (tag_name = +_w) >> ">"
                 >> (in_tag = -*_) >>
            "</" >> tag_name >> ">";

    std::string source = "<del>homu</del>";
    x::smatch what;
    if( x::regex_match(source, what, regex) ){
        std::cout << what[tag_name].str() << std::endl;
        std::cout << what[in_tag].str() << std::endl;
    }

    std::string source2 = "<del>homu</dev>";
    if(!x::regex_match(source2, what, regex) ){
        std::cout << "unmatch" << std::endl;
    }

    
    return 0;
}

[出力]

del
homu
unmatch

[boost]

  • ver 1.49.0