構文木を使う
ここまでは解析と評価を一度に行ってきましたが、普通は解析処理と評価/実行の処理を分けるようにします
解析処理は入力文字列が文法に合っているかどうかをチェックしながら出現した演算子や数値などの文法要素を評価の順番に並べたリストを作ることを仕事にします
この文法要素を繋げたリストは自然にツリー構造となることから、これを構文木と言います
解析処理は文字列を入力とし、構文木を出力とします
評価/実行処理は構文木を入力とします
評価/実行処理の仕事は構文木を辿って結果を得ることです
構文木はその構造自体によって、演算子の優先順位や演算子の右結合性・左結合性といった問題は自明に解決されているので、単純に先頭から順番に辿って行くだけで結果を得られるような仕組みになっています
仕組み
構文木はツリー構造です
ツリー構造の各ノードは枝と葉に分かれます
葉のノードでは「値」が必要です
枝のノードでは「演算子」と「オペランド」が必要です。「オペランド」は別の枝か葉のノードになります
ノードの実装
class node { std::string m_operator; std::list<node> m_operands; any_value_t m_value; bool eval( size_t n, any_value_t* presult ){ bool success = false; if( n < m_operands.size()){ std::list<node>::iterator p = m_operands.begin(); std::advance( p, n ); success = p->eval( presult ); } return success; } void runtime_error_devide_by_zero(){ // 0 除算 OutputDebugString( "runtime error: devide by zero.\n" ); } bool operator_comma( any_value_t* presult ){ bool success = true; for( std::list<node>::iterator p = m_operands.begin(); p != m_operands.end(); ++p ){ success = p->eval( presult ); if( !success ){ break; } } return true; } bool operator_add( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ any_value_t rhs; success = eval( 1, &rhs ); if( success ){ success = presult->operator_assign_add( rhs ); } } return success; } bool operator_sub( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ any_value_t rhs; success = eval( 1, &rhs ); if( success ){ success = presult->operator_assign_sub( rhs ); } } return success; } bool operator_mul( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ any_value_t rhs; success = eval( 1, &rhs ); if( success ){ success = presult->operator_assign_mul( rhs ); } } return success; } bool operator_div( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ any_value_t rhs; success = eval( 1, &rhs ); if( success ){ if( rhs ){ success = presult->operator_assign_div( rhs ); } else{ runtime_error_devide_by_zero(); success = false; } } } return success; } bool operator_mod( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ any_value_t rhs; success = eval( 1, &rhs ); if( success ){ if( rhs ){ success = presult->operator_assign_mod( rhs ); } else{ runtime_error_devide_by_zero(); success = false; } } } return success; } bool operator_unary_plus( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ *presult = presult->operator_unary_plus(); } return success; } bool operator_unary_minus( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ *presult = presult->operator_unary_minus(); } return success; } bool operator_unary_logical_not( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ *presult = presult->operator_unary_logical_not(); } return success; } bool operator_unary_conditional_not( any_value_t* presult ){ bool success = eval( 0, presult ); if( success ){ *presult = presult->operator_unary_conditional_not(); } return success; } public: bool eval( any_value_t* presult ){ bool success = true; if( presult ){ if( m_operator.empty()){ *presult = m_value; } else if(( m_operator == "," )&& !m_operands.empty()){ success = operator_comma( presult ); } else if(( m_operator == "+" )&&( m_operands.size() == 2 )){ success = operator_add( presult ); } else if(( m_operator == "-" )&&( m_operands.size() == 2 )){ success = operator_sub( presult ); } else if(( m_operator == "*" )&&( m_operands.size() == 2 )){ success = operator_mul( presult ); } else if(( m_operator == "/" )&&( m_operands.size() == 2 )){ success = operator_div( presult ); } else if(( m_operator == "%" )&&( m_operands.size() == 2 )){ success = operator_mod( presult ); } else if(( m_operator == "+" )&&( m_operands.size() == 1 )){ success = operator_unary_plus( presult ); } else if(( m_operator == "-" )&&( m_operands.size() == 1 )){ success = operator_unary_minus( presult ); } else if(( m_operator == "~" )&&( m_operands.size() == 1 )){ success = operator_unary_logical_not( presult ); } else if(( m_operator == "!" )&&( m_operands.size() == 1 )){ success = operator_unary_conditional_not( presult ); } else{ success = false; } } return success; } void clear(){ m_operator.clear(); m_operands.clear(); m_value.clear(); } void assign( const std::string& operator_new, const node& operand1 ){ clear(); m_operator = operator_new; m_operands.push_back( operand1 ); } void assign( const std::string& operator_new, const node& operand1, const node& operand2 ){ clear(); m_operator = operator_new; m_operands.push_back( operand1 ); m_operands.push_back( operand2 ); } void assign( const std::string& operator_new, const std::list<node>& operands ){ clear(); m_operator = operator_new; m_operands = operands; } void assign( const std::string& name, const std::string& value ){ clear(); m_value.assign( name, value ); } node& operator=( const node& rhs ){ if( this != &rhs ){ m_operator = rhs.m_operator; m_operands = rhs.m_operands; m_value = rhs.m_value; } return *this; } node(): m_operator(), m_operands(), m_value() { } node( const node& rhs ): m_operator(rhs.m_operator), m_operands(rhs.m_operands), m_value(rhs.m_value) { } node( const std::string& operator_new, const node& operand1 ): m_operator(), m_operands(), m_value() { assign( operator_new, operand1 ); } node( const std::string& operator_new, const node& operand1, const node& operand2 ): m_operator(), m_operands(), m_value() { assign( operator_new, operand1, operand2 ); } node( const std::string& operator_new, std::list<node> operands ): m_operator(), m_operands(), m_value() { assign( operator_new, operands ); } node( const std::string& name, const std::string& value ): m_operator(), m_operands(), m_value() { assign( name, value ); } };
解析処理の実装
class string_calc : protected istringstream<char> { bool skipspace(); bool operator_new_number( node* pnode ); bool constant( node* pnode ); bool operator_unary_prefix( node* pnode ); bool operator_mul_div_mod_( node* pnode ); bool operator_mul_div_mod( node* pnode ); bool operator_add_sub_( node* pnode ); bool operator_add_sub( node* pnode ); bool operator_comma( node* pnode ); bool full_expression( node* pnode ); bool compile( node* pnode ); void error_illegal_character( char c ); void error_unexpected_eof(); void error_unbalanced_parenthesis(); // とりあえず代入は無効 string_calc& operator=( const string_calc& rhs ); string_calc( const string_calc& rhs ); public: bool eval( const std::string& s, any_value_t* presult ); string_calc(); ~string_calc(); }; string_calc::string_calc(): istringstream<char>() { } string_calc::~string_calc(){ } void string_calc::error_illegal_character( char c ){ // 変な文字だからお終い OutputDebugString( stringprintf( "error: illegal caracter '%c'.\n", c )); } void string_calc::error_unexpected_eof(){ // 予期せぬ EOF OutputDebugString( "error: unexpected EOF.\n" ); } void string_calc::error_unbalanced_parenthesis(){ // 対応する () が見つからないから終わり OutputDebugString( "error: unbalanced parenthesis." ); } bool string_calc::skipspace(){ char comment = 0; while( !eof()){ char c1 = at( 0, SEEK_CUR ); char c2 = at( 1, SEEK_CUR ); char c = 0; if( comment == '/' ){ c = getch(); if( c == '\n' ){ comment = 0; } else if( _ismbblead(( unsigned int )( unsigned char )c )){ c = getch(); } } else if( comment == '*' ){ if(( c1 == '*' )&&( c2 == '/' )){ seek( 2, SEEK_CUR ); comment = 0; } else{ c = getch(); if( _ismbblead(( unsigned int )( unsigned char )c )){ c = getch(); } } } else{ if((( c1 == '/' )&&( c2 == '/' )) ||(( c1 == '/' )&&( c2 == '*' ))){ seek( 2, SEEK_CUR ); comment = c2; } else{ c = getch(); if( _ismbblead(( unsigned int )( unsigned char )c ) || !isspace(( unsigned char )c )){ ungetch( c ); break; } } } } return !eof(); } bool string_calc::operator_new_number( node* pnode ){ bool success = true; if( !skipspace()){ error_unexpected_eof(); success = false; } else{ const char* s = getp(); char* endp = const_cast<char*>(s); strtol( s, &endp, 0 ); if(( *endp == '.' )||( *endp == 'e' )||( *endp == 'E' )){ strtod( s, &endp ); size_type cch = endp - s; if( !cch ){ error_illegal_character( *s ); success = false; } else{ std::string value = gets( cch ); if( pnode ){ pnode->assign( TYPEOF_FLOAT, value ); } } } else{ size_type cch = endp - s; std::string value = gets( cch ); if( pnode ){ pnode->assign( TYPEOF_INTEGER, value ); } } } return success; } bool string_calc::constant( node* pnode ){ bool success = true; if( !skipspace()){ error_unexpected_eof(); success = false; } else{ char c = at( 0, SEEK_CUR ); if( c == '(' ){ size_type offset_prev = tell(); seek( 1, SEEK_CUR ); success = operator_comma( pnode ); if( success ){ skipspace(); c = getch(); if( c != ')' ){ error_illegal_character( c ); seek( offset_prev, SEEK_SET ); error_unbalanced_parenthesis(); success = false; } } } else if( isdigit( c ) ||( c == '.' )){ success = operator_new_number( pnode ); } else{ error_illegal_character( c ); success = false; } } return success; } bool string_calc::operator_unary_prefix( node* pnode ){ bool success = true; std::vector<std::string> prefix_unary_operators; while( skipspace()){ size_type offset = tell(); char c = at( 0, SEEK_CUR ); if(( c == '+' )||( c == '-' )||( c == '!' )||( c == '~' )){ prefix_unary_operators.push_back( gets( 1 )); } else{ break; } } if( !skipspace()){ error_unexpected_eof(); success = false; } else{ success = constant( pnode ); if( success && pnode ){ // 単項演算子は評価順が右→左なので reverse_iterator であることに注意 for( std::vector<char>::reverse_iterator p = prefix_unary_operators.rbegin(); p != prefix_unary_operators.rend(); ++p ){ *pnode = node( *p, *pnode ); } } } return success; } bool string_calc::operator_mul_div_mod_( node* pnode ){ bool success = true; if( skipspace()){ char c = at( 0, SEEK_CUR ); if( c == '*' ){ seek( 1, SEEK_CUR ); node rhs; success = operator_unary_prefix( &rhs ); if( success && pnode ){ *pnode = node( "*", *pnode, rhs ); } if( success ){ success = operator_mul_div_mod_( pnode ); } } else if( c == '/' ){ seek( 1, SEEK_CUR ); node rhs; success = operator_unary_prefix( &rhs ); if( success && pnode ){ *pnode = node( "/", *pnode, rhs ); } if( success ){ success = operator_mul_div_mod_( pnode ); } } else if( c == '%' ){ seek( 1, SEEK_CUR ); node rhs; success = operator_unary_prefix( &rhs ); if( success && pnode ){ *pnode = node( "%", *pnode, rhs ); } if( success ){ success = operator_mul_div_mod_( pnode ); } } } return success; } bool string_calc::operator_mul_div_mod( node* pnode ){ bool success = operator_unary_prefix( pnode ); if( success ){ success = operator_mul_div_mod_( pnode ); } return success; } bool string_calc::operator_add_sub_( node* pnode ){ bool success = true; if( skipspace()){ char c = at( 0, SEEK_CUR ); if( c == '+' ){ seek( 1, SEEK_CUR ); node rhs; success = operator_mul_div_mod( &rhs ); if( success && pnode ){ *pnode = node( "+", *pnode, rhs ); } if( success ){ success = operator_add_sub_( pnode ); } } else if( c == '-' ){ seek( 1, SEEK_CUR ); node rhs; success = operator_mul_div_mod( &rhs ); if( success && pnode ){ *pnode = node( "-", *pnode, rhs ); } if( success ){ success = operator_add_sub_( pnode ); } } } return success; } bool string_calc::operator_add_sub( node* pnode ){ bool success = operator_mul_div_mod( pnode ); if( success ){ success = operator_add_sub_( pnode ); } return success; } bool string_calc::operator_comma( node* pnode ){ std::list<node> operands; bool success = true; while( skipspace()){ node operand; success = operator_add_sub( &operand ); if( !success ){ break; } operands.push_back( operand ); skipspace(); char c = getch(); if( c != ',' ){ ungetch( c ); break; } } if( success ){ *pnode = node( ",", operands ); } return success; } bool string_calc::full_expression( node* pnode ){ return operator_comma( pnode ); } bool string_calc::compile( node* pnode ){ return full_expression( pnode ); } bool string_calc::eval( const std::string& s, any_value_t* presult ){ assign( s ); node syntax_tree; bool success = compile( &syntax_tree ); if( success ){ success = syntax_tree.eval( presult ); } return success; }
ここでは今までどおり解析器に評価機能を持たせていますが、完全に分けてしまってもいいでしょう