I'm trying to write a program to parse and evaluate mathematical, litteral and boolean expressions, for example :
- "(9/3) == 3+3*2" would be parsed as "(9/3) == (3+(3*2))" and evaluted as "false"
- "1+2/3" would be parsed as "1+(2/3)" and evaluated as "6".
- "this + is + a + test" would be parsed as "this+is+a+test" and evaluated as "thisisatest"
The program correctly parses and solves what I'm giving it, but as soon as I put parenthesis in the expressions, the parsing takes a ridiculous amount of time.
I've based my work on Sehe's impressively exhaustive answer on how to write a boolean grammar parser. I've added new operators (+, -, /, *, ==, !=) following that example.
Here is the parser
// DEFINING TYPES struct op_not {}; struct op_or {}; struct op_and {}; struct op_xor {}; struct op_equal {}; struct op_unequal {}; struct op_sum {}; struct op_difference {}; struct op_factor {}; struct op_division {}; typedef ustring var; template <typename tag> struct binop; template <typename tag> struct unop; typedef boost::variant<var, boost::recursive_wrapper<unop <op_not> >, boost::recursive_wrapper<binop<op_equal> >, boost::recursive_wrapper<binop<op_unequal> >, boost::recursive_wrapper<binop<op_and> >, boost::recursive_wrapper<binop<op_xor> >, boost::recursive_wrapper<binop<op_or> >, boost::recursive_wrapper<binop<op_difference> >, boost::recursive_wrapper<binop<op_sum> >, boost::recursive_wrapper<binop<op_factor> >, boost::recursive_wrapper<binop<op_division> > > expressionContainer; template <typename tag> struct binop { explicit binop(const expressionContainer& l , const expressionContainer& r) : oper1(l), oper2(r) { } expressionContainer oper1, oper2; }; template <typename tag> struct unop { explicit unop(const expressionContainer& o) : oper1(o) { } expressionContainer oper1; }; // EXPRESSION PARSER template <typename It, typename Skipper = boost::spirit::standard_wide::space_type> struct parserExpression : qi::grammar<It, expressionContainer(), Skipper> { parserExpression() : parserExpression::base_type(expr_) { using namespace qi; expr_ = or_.alias(); // Logical Operators or_ = (xor_ >> orOperator_ >> or_) [_val = boost::phoenix::construct<Expression::binop<op_or >>(_1, _3)] | xor_[_val = _1]; xor_ = (and_ >> xorOperator_ >> xor_) [_val = boost::phoenix::construct<Expression::binop<op_xor>>(_1, _3)] | and_[_val = _1]; and_ = (equal_ >> andOperator_ >> and_) [_val = boost::phoenix::construct<Expression::binop<op_and>>(_1, _3)] | equal_[_val = _1]; equal_ = (unequal_ >> equalOperator_ >> equal_) [_val = boost::phoenix::construct<Expression::binop<op_equal>>(_1, _3)] | unequal_[_val = _1]; unequal_ = (factor_ >> unequalOperator_ >> unequal_) [_val = boost::phoenix::construct<Expression::binop<op_unequal>>(_1, _3)] | factor_[_val = _1]; // Numerical Operators factor_ = (division_ >> factorOperator_ >> factor_) [_val = boost::phoenix::construct<Expression::binop<op_factor>>(_1, _3)] | division_[_val = _1]; division_ = (sum_ >> divisionOperator_ >> division_) [_val = boost::phoenix::construct<Expression::binop<op_division>>(_1, _3)] | sum_[_val = _1]; sum_ = (difference_ >> sumOperator_ >> sum_) [_val = boost::phoenix::construct<Expression::binop<op_sum>>(_1, _3)] | difference_[_val = _1]; difference_ = (not_ >> differenceOperator_ >> difference_) [_val = boost::phoenix::construct<Expression::binop<op_difference>>(_1, _3)] | not_[_val = _1]; // UNARY OPERATIONS not_ = (notOperator_ > simple) [_val = boost::phoenix::construct<Expression::unop <op_not>>(_2)] | simple[_val = _1]; simple = (('(' > expr_ > ')') | var_); var_ = qi::lexeme[+alnum]; notOperator_ = qi::char_('!'); andOperator_ = qi::string("&&"); orOperator_ = qi::string("||"); xorOperator_ = qi::char_("^"); equalOperator_ = qi::string("=="); unequalOperator_ = qi::string("!="); sumOperator_ = qi::char_("+"); differenceOperator_ = qi::char_("-"); factorOperator_ = qi::char_("*"); divisionOperator_ = qi::char_("/"); /*BOOST_SPIRIT_DEBUG_NODE(expr_); BOOST_SPIRIT_DEBUG_NODE(or_); BOOST_SPIRIT_DEBUG_NODE(xor_); BOOST_SPIRIT_DEBUG_NODE(and_); BOOST_SPIRIT_DEBUG_NODE(not_); BOOST_SPIRIT_DEBUG_NODE(simple); BOOST_SPIRIT_DEBUG_NODE(var_); BOOST_SPIRIT_DEBUG_NODE(notOperator_); BOOST_SPIRIT_DEBUG_NODE(andOperator_); BOOST_SPIRIT_DEBUG_NODE(orOperator_); BOOST_SPIRIT_DEBUG_NODE(xorOperator_); BOOST_SPIRIT_DEBUG_NODE(sumOperator_); BOOST_SPIRIT_DEBUG_NODE(differenceOperator_); BOOST_SPIRIT_DEBUG_NODE(factorOperator_); BOOST_SPIRIT_DEBUG_NODE(divisionOperator_);*/ } private: qi::rule<It, var(), Skipper> var_; qi::rule<It, expressionContainer(), Skipper> not_ , and_ , xor_ , or_ , equal_ , unequal_ , sum_ , difference_ , factor_ , division_ , simple , expr_; qi::rule<It, ustring(), Skipper> notOperator_ , andOperator_ , orOperator_ , xorOperator_ , equalOperator_ , unequalOperator_ , sumOperator_ , differenceOperator_ , factorOperator_ , divisionOperator_; }; With the above code, on my computer (running an Intel I5 CPU) :
- parsing "1 + 2 - 3 * 4 / 5 == 6 != 7 && 8 || 9 ^ 8 * 7 / 6 ^ 5 && 4 || 3 != 2 == 1" is instantaneous.
- parsing "(1)" takes approximately 200ms
Spirit's performance having been proved before, I'm left with the obvious question : what could I improve?