解析这些字符串:
int main(){ for (const std::string input: std::vector<std::string> { "module simple_in_n_out();endmodule;", "module simple_in_n_out(in_1);endmodule;", "module simple_in_n_out(in_1,in_2,in_3);endmodule;", }) { parse_verilog_file(input); } return 0; }
在前两个输入和第一个字符串的push_back上都成功,但是在向向量添加更多字符串时失败:
std::string module_name; stringvec module_inputs; module_input_list %= tok.identifier[push_back(phoenix::ref(module_inputs), _1)] % qi::lit(','); module_input_list.name("module_input_list"); BOOST_SPIRIT_DEBUG_NODE(module_input_list); module_stmt %= tok.module_ >> tok.identifier[phoenix::ref(module_name) = _1] >> '(' >> -(module_input_list) >> ')' >> ';'; module_stmt.name("module"); BOOST_SPIRIT_DEBUG_NODE(module_stmt);
输出看起来像:
<module_stmt> <try>[module]</try> <module_input_list> <try>[)][;][endmodule][;]</try> <fail/> </module_input_list> <success>[endmodule][;]</success> <attributes>[]</attributes> </module_stmt> <module_stmt> <try>[endmodule][;]</try> <fail/> </module_stmt> TODO: put the module together now <module_stmt> <try></try> <fail/> </module_stmt> ------------------------- Parsing succeeded ------------------------- module name: simple_in_n_out <module_stmt> <try>[module]</try> <module_input_list> <try>[in_1][)][;][endmodule][;]</try> <success>[)][;][endmodule][;]</success> <attributes>[]</attributes> </module_input_list> <success>[endmodule][;]</success> <attributes>[]</attributes> </module_stmt> <module_stmt> <try>[endmodule][;]</try> <fail/> </module_stmt> TODO: put the module together now <module_stmt> <try></try> <fail/> </module_stmt> ------------------------- Parsing succeeded ------------------------- module name: simple_in_n_out module input: in_1 <module_stmt> <try>[module]</try> <module_input_list> <try>[in_1]</try> <success></success> <attributes>[]</attributes> </module_input_list> <fail/> </module_stmt> ------------------------- Parsing failed -------------------------
完整代码:
#define BOOST_SPIRIT_DEBUG #include "netlist/netlistlexer.h" namespace verilog { using namespace boost::spirit; using boost::phoenix::val; using boost::spirit::ascii::char_; using boost::spirit::ascii::string; /////////////////////////////////////////////////////////////////////////////// // Grammar definition /////////////////////////////////////////////////////////////////////////////// template <typename Iterator, typename Lexer> struct verilog_grammar : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > { template <typename TokenDef> verilog_grammar(TokenDef const& tok) : verilog_grammar::base_type(program) { using boost::spirit::_val; using phoenix::push_back; using qi::on_error; using qi::fail; using phoenix::construct; program = +statement ; statement = module_stmt | end_module_stmt ; module_input_list %= tok.identifier[push_back(phoenix::ref(module_inputs), _1)] % qi::lit(','); module_input_list.name("module_input_list"); BOOST_SPIRIT_DEBUG_NODE(module_input_list); module_stmt %= tok.module_ >> tok.identifier[phoenix::ref(module_name) = _1] >> '(' >> -(module_input_list) >> ')' >> ';'; module_stmt.name("module"); BOOST_SPIRIT_DEBUG_NODE(module_stmt); end_module_stmt = (tok.endmodule_ >> ';' | tok.endmodule_)[ std::cout << val("TODO: put the module together now") << "\n" ]; end_module_stmt.name("end_module_stmt"); on_error<fail> ( program , std::cout << val("Error! Expecting ") << _4 // what failed? << val(" here: \"") << construct<std::string>(_3, _2) // iterators to error-pos, end << val("\"") << std::endl ); } std::string module_name; stringvec module_inputs; typedef boost::variant<unsigned int, std::string> expression_type; typedef boost::fusion::vector<std::string,std::vector<std::string>> fustring; qi::rule<Iterator, qi::in_state_skipper<Lexer> > program, statement; qi::rule<Iterator, qi::in_state_skipper<Lexer> > module_stmt; qi::rule<Iterator, qi::in_state_skipper<Lexer> > module_input_list; qi::rule<Iterator, qi::in_state_skipper<Lexer> > end_module_stmt; }; } // end verilog namespace void parse_verilog_file(std::string str){ typedef std::string::iterator base_iterator_type; using namespace boost::spirit; typedef lex::lexertl::token< base_iterator_type, boost::mpl::vector<unsigned int, std::string> > token_type; typedef lex::lexertl::lexer<token_type> lexer_type; typedef verilog::verilog_tokens<lexer_type> verilog_tokens; typedef verilog_tokens::iterator_type iterator_type; typedef verilog::verilog_grammar<iterator_type, verilog_tokens::lexer_def> verilog_grammar; verilog_tokens tokens; // Our lexer verilog_grammar calc(tokens); // Our parser std::string::iterator it = str.begin(); iterator_type iter = tokens.begin(it, str.end()); iterator_type end = tokens.end(); bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]); if (r && iter == end) { std::cout << "-------------------------\n"; std::cout << "Parsing succeeded\n"; std::cout << "-------------------------\n"; std::cout << "module name: " << calc.module_name << "\n"; for (const std::string i: calc.module_inputs){ std::cout << " module input: " << i << "\n"; } } else { std::cout << "-------------------------\n"; std::cout << "Parsing failed\n"; std::cout << "-------------------------\n"; } } int main(){ for (const std::string input: std::vector<std::string> { "module simple_in_n_out();endmodule;", "module simple_in_n_out(in_1);endmodule;", "module simple_in_n_out(in_1,in_2,in_3);endmodule;", }) { parse_verilog_file(input); } return 0; }
netlist / netlistlexer.h:
#ifndef NETLISTLEXER_H
#define NETLISTLEXER_H
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_object.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/variant/recursive_variant.hpp>
#include <boost/foreach.hpp>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
namespace fusion = boost::fusion;
namespace phoenix = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
typedef std::vector<std::string> stringvec;
namespace verilog {
using namespace boost::spirit;
using boost::phoenix::val;
using boost::spirit::ascii::char_;
using boost::spirit::ascii::string;
///////////////////////////////////////////////////////////////////////////////
// Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct verilog_tokens : lex::lexer<Lexer>
{
verilog_tokens()
{
// define the tokens to match
identifier = "[a-zA-Z_][a-zA-Z0-9_]*";
logic_op = "[\\&\\|]";
constant = "[0-9]+";
module_ = "module";
assign_ = "assign";
endmodule_ = "endmodule";
wire_ = "wire";
input_ = "input";
output_ = "output";
inout_ = "inout";
reg_ = "reg";
begin_ = "begin";
end_ = "end";
always_ = "always";
if_ = "if";
else_ = "else";
parameter_ = "parameter";
// associate the tokens and the token set with the lexer
this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | '[' | ']' | ';' | constant | logic_op;
this->self += if_ | else_ | begin_ | end_ | always_ | reg_;
this->self += module_ | endmodule_ | assign_ | wire_ | input_ | output_ | inout_;
this->self += parameter_;
this->self += identifier;
// define the whitespace to ignore (spaces, tabs, newlines and C-style
// comments)
this->self("WS")
= lex::token_def<>("[ \\t\\n]+")
| "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"
| "\\/\\/[^\\r\\n\\f]*"
| "\\(\\*[^*]*\\*\\)"
;
}
// these tokens have no attribute
lex::token_def<lex::omit> if_, else_, begin_, end_, endmodule_;
// these tokens expose the iterator_range of the matched input sequence
lex::token_def<> always_, reg_;
lex::token_def<> module_, assign_, wire_, input_, output_, inout_;
lex::token_def<> parameter_;
// The following two tokens have an associated attribute type, 'identifier'
// carries a string (the identifier name) and 'constant' carries the
// matched integer value.
//
// Note: any token attribute type explicitly specified in a token_def<>
// declaration needs to be listed during token type definition as
// well (see the typedef for the token_type below).
//
// The conversion of the matched input to an instance of this type occurs
// once (on first access), which makes token attributes as efficient as
// possible. Moreover, token instances are constructed once by the lexer
// library. From this point on tokens are passed by reference only,
// avoiding them being copied around.
lex::token_def<std::string> identifier;
lex::token_def<unsigned int> constant;
lex::token_def<std::string> logic_op;
};
} // end verilog namespace
#endif // NETLISTLEXER_H
解析这些字符串:int main(){for(const std :: string input:std :: vector <:string>{“ module simple_in_n_out(); endmodule;”,“ module simple_in_n_out(in_1); ...
[好,我不得不克服SpiritLex¹的迷雾和一些怪癖,表明您可能没有在使用符合标准的编译器²。
When I did,我注意到实际的语法没有使用属性传播,而是使用了临时语义动作来提取一些信息³。