我正在尝试使用 boost::spirit::x3 将 URL 解析和分解为多个部分,如下所示:
#include <iostream>
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/home/x3.hpp>
struct UrlParts { std::string prefix, host, suffix; };
BOOST_FUSION_ADAPT_STRUCT(UrlParts, prefix, host, suffix)
UrlParts parseSpirit(std::string_view input) {
namespace x3 = boost::spirit::x3;
static const auto scheme_ = (x3::raw[+x3::char_("a-zA-Z0-9+.-") >> "://"]);
static const auto userinfo_ = (x3::raw[+~x3::char_("@") >> "@"]);
static const auto prefix_ = (-scheme_ >> -userinfo_);
static const auto port_ = (x3::raw[':' >> -x3::repeat(1, 5)[x3::digit] >> &(x3::char_("/?#") | x3::eoi)]);
static const auto host_ = (+(x3::char_("a-fxXA-F0-9:.") - port_));
static const auto path_ = (x3::char_("/?#") >> *x3::char_); // to store path+query+fragment
static const auto suffix_ = (-port_ >> -path_);
//static const auto url = x3::rule<class url, UrlParts>() = -prefix_ >> ('[' >> host_ >> ']' | host_) >> -suffix_;
static const auto url = -prefix_ >> ('[' >> host_ >> ']' | host_) >> -suffix_; // prefix & suffix are optional but host is required
// BOOST_SPIRIT_DEBUG_NODES((scheme_)(userinfo_)(host_)(port_)(path_)(url));
// Parse the input
auto iter = input.begin();
auto end = input.end();
UrlParts parts;
auto attr = std::tie(parts.prefix, parts.host, parts.suffix);
//parse(input.begin(), input.end(), x3::eps >> url >> x3::eoi, parts);
bool ret = x3::parse(iter, end, url >> x3::eoi, attr);
if (!ret) {
std::cout << "Parsing failed" << std::endl;
}
return parts;
}
int main()
{
for (auto input : {"http://usr:[email protected]:8080/file.php?abc=1#23",
"http://[::ffff:192.168.1.1]:8080/file.php?abc=1#23",
"http://::ffff:192.168.1.1/file.php?abc=1#23",
"::ffff:192.168.1.1"
}) {
std::cout << "Input: " << input << std::endl;
auto parts = parseSpirit("http://usr:[email protected]/file.php?abc=1");
std::cout << "Output: Prefix: " << parts.prefix << ", Host: " << parts.host << ", Suffix: " << parts.suffix << std::endl;
std::cout << "================" << std::endl;
}
return 0;
}
但是上面的代码无法编译并出现错误:
/usr/include/boost/spirit/home/x3/operator/detail/sequence.hpp:140:25: error: static assertion failed: Size of the passed attribute is less than expected.
140 | actual_size >= expected_size
| ~~~~~~~~~~~~^~~~~~~~~~~~~~~~
/usr/include/boost/spirit/home/x3/operator/detail/sequence.hpp:140:25: note: ‘(((int)boost::spirit::x3::detail::partition_attribute >, boost::spirit::x3::literal_string > > >, boost::spirit::x3::optional > >, boost::spirit::x3::literal_char > > > > >, boost::spirit::x3::alternative, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > >, boost::spirit::x3::literal_char >, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > > >, boost::spirit::x3::optional, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > >, boost::spirit::x3::optional, boost::spirit::x3::kleene > > > > >, std::tuple, std::allocator >&, std::__cxx11::basic_string, std::allocator >&, std::__cxx11::basic_string, std::allocator >&>, boost::spirit::x3::unused_type, void>::actual_size) >= ((int)boost::spirit::x3::detail::partition_attribute >, boost::spirit::x3::literal_string > > >, boost::spirit::x3::optional > >, boost::spirit::x3::literal_char > > > > >, boost::spirit::x3::alternative, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > >, boost::spirit::x3::literal_char >, boost::spirit::x3::plus, boost::spirit::x3::raw_directive, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > > > > >, boost::spirit::x3::optional, boost::spirit::x3::optional, boost::spirit::x3::detail::finite_count > > >, boost::spirit::x3::and_predicate, boost::spirit::x3::eoi_parser> > > > >, boost::spirit::x3::optional, boost::spirit::x3::kleene > > > > >, std::tuple, std::allocator >&, std::__cxx11::basic_string, std::allocator >&, std::__cxx11::basic_string, std::allocator >&>, boost::spirit::x3::unused_type, void>::expected_size))’ evaluates to false
有什么问题的建议吗?与
boost::spirit::qi
类似的代码可以正常工作。这意味着属性未被检测为与解析器表达式兼容。看到它在 1.76 中被破坏了:
我看到的第一个问题是尝试从两个
prefix
指令合成单个字符串 (raw[]
)。那是...行不通的。
好消息是它可以(再次?)从 1.77 开始,但总的来说,考虑对属性兼容性更明确一点,就像这个答案中的
as_type
理解 Boost.Spirit 中的列表运算符 (%) ,或许多其他https://stackoverflow.com/search?tab=newest&q=user%3a85371%20x3%20as_type&searchOn=3,甚至更多,如果你寻找我通常更喜欢的名字as
...... https://stackoverflow.com/search?q=user%3A85371+x3+as+x3%3A%3Arule
也就是说,当我查看你的代码时,你绑定了一个手动绑定的元组AND不知何故仍然适应结构?那是多余的。我假设您只想手动绑定,所以放弃适应。
接下来,无论如何,不要使用解析器组合器库作为标记器。即,不要随机地将不相关的产品组合在一起(后缀确实不应包含端口规范)。
此外,使用整数解析器解析为真实的端口号。当然,如果您无论如何都要“迂腐”地严格限制允许的位数!请看这里:
auto portnum_ = x3::uint_parser<uint16_t, 10, 1, 5>{};
auto portspec_ = ':' >> portnum_ >> &(x3::char_("/?#") | x3::eoi);
小心双重可选性。例如。由于
suffix_ = -port_ >> -path_
从字面上看只有可选元素,因此表达式
-suffix_
充其量与 suffix_
具有相同的含义。但是,在很多情况下(可选的重复结构),您将获得零长度匹配的无限循环。我想/
或
#
也应该结束用户信息的产生。并非所有事情都需要raw
。例如。我更愿意
auto userinfo_ = +~x3::char_("@/#") >> x3::char_("@");
用于用户信息。事实上,如果你这样做
auto authority_ = ('[' >> host_ >> ']' | host_) >> -portspec_;
您将正确获取解析后的主机,无需使用
[]
括号,该括号仅在 URI 语法中有意义。
您已注释掉输入结束验证,让我们重新启用它们。我们还返回更多丰富的信息(包括valid
标志)和更多测试用例:
实时编译器资源管理器
#include <boost/fusion/adapted/std_tuple.hpp>
#include <boost/spirit/home/x3.hpp>
#include <iostream>
struct UrlParts { std::string prefix, host, suffix; };
UrlParts parseSpirit(std::string_view input) {
namespace x3 = boost::spirit::x3;
auto scheme_ = x3::raw[+x3::char_("a-zA-Z0-9+.-") >> "://"];
auto userinfo_ = x3::raw[+~x3::char_("@") >> "@"];
auto prefix_ = scheme_ >> -userinfo_;
auto port_ = x3::raw[':' >> -x3::repeat(1, 5)[x3::digit] >> &(x3::char_("/?#") | x3::eoi)];
auto host_ = +(x3::char_("a-fxXA-F0-9:.") - port_);
auto path_ = x3::char_("/?#") >> *x3::char_; // to store path+query+fragment
auto suffix_ = -port_ >> -path_;
// static const auto url = = -prefix_ >> ('[' >> host_ >> ']' | host_) >>
// -suffix_;
auto url //
//= x3::rule<class url, UrlParts>() //
= -prefix_ //
//>> ('[' >> host_ >> ']' | host_) >>
// -suffix_ // prefix & suffix are optional but host is required
;
// Parse the input
auto iter = input.begin();
auto end = input.end();
UrlParts p;
auto attr = std::tie(p.prefix/*, p.host, p.suffix*/);
bool ret = x3::parse(iter, end, url >> x3::eoi, attr);
if (!ret) {
std::cout << "Parsing failed" << std::endl;
}
return p;
}
int main() {
for (auto input : {"http://usr:[email protected]:8080/file.php?abc=1#23",
"http://[::ffff:192.168.1.1]:8080/file.php?abc=1#23",
"http://::ffff:192.168.1.1/file.php?abc=1#23", "::ffff:192.168.1.1"}) {
std::cout << "Input: " << input << std::endl;
auto parts = parseSpirit("http://usr:[email protected]/file.php?abc=1");
std::cout << "Output: Prefix: " << parts.prefix << ", Host: " << parts.host << ", Suffix: " << parts.suffix << std::endl;
std::cout << "================" << std::endl;
}
}
印刷
192.168.1.1 {true, "", "", "192.168.1.1", "", unspecified}
192.168.1.1/ {true, "", "", "192.168.1.1", "/", unspecified}
192.168.1.1/file.php {true, "", "", "192.168.1.1", "/file.php", unspecified}
192.168.1.1/file.php?abc=1 {true, "", "", "192.168.1.1", "/file.php?abc=1", unspecified}
192.168.1.1:8888 {true, "", "", "192.168.1.1", "", 8888}
192.168.1.1:8888/ {true, "", "", "192.168.1.1", "/", 8888}
192.168.1.1:8888/file.php {true, "", "", "192.168.1.1", "/file.php", 8888}
192.168.1.1:8888/file.php?abc=1 {true, "", "", "192.168.1.1", "/file.php?abc=1", 8888}
::ffffff::192.168.1.1:9999/file.php?abc=1 {true, "", "", "::ffffff::192.168.1.1", "/file.php?abc=1", 9999}
http://192.168.1.1 {true, "http://", "", "192.168.1.1", "", unspecified}
http://192.168.1.1/ {true, "http://", "", "192.168.1.1", "/", unspecified}
http://192.168.1.1/file.php {true, "http://", "", "192.168.1.1", "/file.php", unspecified}
http://192.168.1.1/file.php?abc=1 {true, "http://", "", "192.168.1.1", "/file.php?abc=1", unspecified}
http://192.168.1.1:8888 {true, "http://", "", "192.168.1.1", "", 8888}
http://192.168.1.1:8888/ {true, "http://", "", "192.168.1.1", "/", 8888}
http://192.168.1.1:8888/file.php {true, "http://", "", "192.168.1.1", "/file.php", 8888}
http://192.168.1.1:8888/file.php?abc=1 {true, "http://", "", "192.168.1.1", "/file.php?abc=1", 8888}
http://::ffffff::192.168.1.1:9999/file.php?abc=1 {true, "http://", "", "::ffffff::192.168.1.1", "/file.php?abc=1", 9999}
http://[email protected] {true, "http://", "sehe@", "192.168.1.1", "", unspecified}
http://[email protected]/ {true, "http://", "sehe@", "192.168.1.1", "/", unspecified}
http://[email protected]/file.php {true, "http://", "sehe@", "192.168.1.1", "/file.php", unspecified}
http://[email protected]:8888 {true, "http://", "sehe@", "192.168.1.1", "", 8888}
http://[email protected]:8888/ {true, "http://", "sehe@", "192.168.1.1", "/", 8888}
http://[email protected]:8888/file.php {true, "http://", "sehe@", "192.168.1.1", "/file.php", 8888}
http://usr:[email protected]/file.php?abc=1 {true, "http://", "usr:pwd@", "192.168.1.1", "/file.php?abc=1", unspecified}
[email protected] {true, "", "sehe@", "192.168.1.1", "", unspecified}
[email protected]/ {true, "", "sehe@", "192.168.1.1", "/", unspecified}
[email protected]/file.php {true, "", "sehe@", "192.168.1.1", "/file.php", unspecified}
[email protected]:8888 {true, "", "sehe@", "192.168.1.1", "", 8888}
[email protected]:8888/ {true, "", "sehe@", "192.168.1.1", "/", 8888}
[email protected]:8888/file.php {true, "", "sehe@", "192.168.1.1", "/file.php", 8888}
usr:[email protected]/file.php?abc=1 {true, "", "usr:pwd@", "192.168.1.1", "/file.php?abc=1", unspecified}
但是等等 - 问题是什么?
raw[]
作为提示来解决这个问题。显然我宁愿升级 Boost:
auto userinfo_ = x3::raw[+~x3::char_("@/#") >> x3::char_("@")];
在 Boost 1.76 上查看它:https://godbolt.org/z/6bc5Ycrcr
c69881691195579e3184ef6024136356 1.76
c69881691195579e3184ef6024136356 1.84