I am making a lexer & parser for a 8 bit cpu, my lexer is working fine but when I added AST class for parse, this problem started. Whats the problem and how to solve it The code takes a string input, lexer class then split it into tokens and pass the vector of tokens to parser.
#include <iostream> #include <vector> #include <string> #include <cctype> #include <memory> using namespace std; // Token Types enum TokenType { KEYWORD, OPERATOR, BRACE, SEMICOLON, IDENTIFIER, LITERAL, END_OF_FILE }; // Token Structure struct Token { TokenType type; string value; Token(TokenType type, const string &value) : type(type), value(value) {} }; // Lexer Class class Lexer { string input; size_t position; // Helper to check if a string is a keyword bool isKeyword(const string &str) { return str == "int" || str == "if"; } // Read the next character char peek() { if (position >= input.length()) return '\0'; return input[position]; } // Advance the current position char advance() { return input[position++]; } // Skip over whitespace void skipWhitespace() { while (isspace(peek())) advance(); } // Main function to tokenize input vector<Token> tokenize() { vector<Token> tokens; while (position < input.length()) { skipWhitespace(); char current = peek(); if (isalpha(current)) { // Identifiers and Keywords string identifier = ""; while (isalnum(peek())) { identifier += advance(); } if (isKeyword(identifier)) { tokens.emplace_back(KEYWORD, identifier); } else { tokens.emplace_back(IDENTIFIER, identifier); } } else if (isdigit(current)) { // Literals (Numbers) string literal = ""; while (isdigit(peek())) { literal += advance(); } tokens.emplace_back(LITERAL, literal); } else if (current == ';') { // Semicolon tokens.emplace_back(SEMICOLON, ";"); advance(); } else if (current == '{' || current == '}') { // Braces tokens.emplace_back(BRACE, string(1, current)); advance(); } else if (current == '(' || current == ')') { // Parentheses tokens.emplace_back(BRACE, string(1, current)); // You might want to introduce a new TokenType like PAREN to distinguish between braces and parentheses advance(); } else if (current == '=' || current == '+' || current == '-') { // Operators string op = ""; op += advance(); if (peek() == '=') { // Handle '==' op += advance(); } tokens.emplace_back(OPERATOR, op); } else if (current == '\0') { // End of input tokens.emplace_back(END_OF_FILE, ""); break; } else { cerr << "Unexpected character: " << current << endl; exit(1); } } return tokens; } public: Lexer(const string &input) : input(input), position(0) {} vector<Token> getTokens() { return tokenize(); } }; //Lexer class ends // AST Node Base Class struct ASTNode { virtual ~ASTNode() = default; }; // Variable Declaration Node struct VarDeclNode : public ASTNode { string varName; VarDeclNode(const string &varName) : varName(varName) {} }; // Assignment Node struct AssignmentNode : public ASTNode { string varName; shared_ptr<ASTNode> expression; AssignmentNode(const string &varName, shared_ptr<ASTNode> expression) : varName(varName), expression(expression) {} }; // Binary Operation Node struct BinOpNode : public ASTNode { string op; shared_ptr<ASTNode> left, right; BinOpNode(const string &op, shared_ptr<ASTNode> left, shared_ptr<ASTNode> right) : op(op), left(left), right(right) {} }; // Literal Node struct LiteralNode : public ASTNode { int value; LiteralNode(int value) : value(value) {} }; // Identifier (Variable) Node struct IdentifierNode : public ASTNode { string name; IdentifierNode(const string &name) : name(name) {} }; // Conditional (If) Node struct IfNode : public ASTNode { shared_ptr<ASTNode> condition; vector<shared_ptr<ASTNode>> body; IfNode(shared_ptr<ASTNode> condition, vector<shared_ptr<ASTNode>> body) : condition(condition), body(body) {} }; // Parser Class class Parser { vector<Token> tokens; size_t position; // Utility functions Token peek() { if (position >= tokens.size()) return Token(END_OF_FILE, ""); return tokens[position]; } Token advance() { return tokens[position++]; } bool match(TokenType type) { if (peek().type == type) { advance(); return true; } return false; } // Parse a program (sequence of statements) vector<shared_ptr<ASTNode>> parseProgram() { vector<shared_ptr<ASTNode>> statements; while (peek().type != END_OF_FILE) { statements.push_back(parseStatement()); } return statements; } // Parse a statement shared_ptr<ASTNode> parseStatement() { if (match(KEYWORD)) { string keyword = tokens[position - 1].value; if (keyword == "int") { return parseVarDecl(); } else if (keyword == "if") { return parseIf(); } } else if (peek().type == IDENTIFIER) { return parseAssignment(); } return nullptr; } // Parse variable declaration shared_ptr<ASTNode> parseVarDecl() { string varName = advance().value; // Identifier after "int" match(SEMICOLON); // Expect semicolon return make_shared<VarDeclNode>(varName); } // Parse assignment shared_ptr<ASTNode> parseAssignment() { string varName = advance().value; // Identifier match(OPERATOR); // Expect '=' operator auto expr = parseExpression(); match(SEMICOLON); // Expect semicolon return make_shared<AssignmentNode>(varName, expr); } // Parse an expression shared_ptr<ASTNode> parseExpression() { auto left = parsePrimary(); if (peek().type == OPERATOR && (peek().value == "+" || peek().value == "-")) { string op = advance().value; auto right = parsePrimary(); return make_shared<BinOpNode>(op, left, right); } return left; // If no binary operator, return the primary expression } // Parse primary expression (identifier or literal) shared_ptr<ASTNode> parsePrimary() { if (match(LITERAL)) { return make_shared<LiteralNode>(stoi(tokens[position - 1].value)); } else if (match(IDENTIFIER)) { return make_shared<IdentifierNode>(tokens[position - 1].value); } return nullptr; } // Parse conditional (if) statement shared_ptr<ASTNode> parseIf() { match(BRACE); // Expect '(' auto condition = parseExpression(); match(BRACE); // Expect ')' match(BRACE); // Expect '{' vector<shared_ptr<ASTNode>> body; while (!match(BRACE)) { body.push_back(parseStatement()); } return make_shared<IfNode>(condition, body); } public: Parser(const vector<Token> &tokens) : tokens(tokens), position(0) {} vector<shared_ptr<ASTNode>> parse() { return parseProgram(); } }; // Test the lexer int main() { string code = "int a; a = 10; if (a == 10) { a = a + 1; }"; cout<<"Lexer not Completed"; Lexer lexer(code); vector<Token> tokens = lexer.getTokens(); cout<<"Lexer Completed"; Parser parser(tokens); // Pass tokens to the parser auto ast = parser.parse(); // Parse the tokens into an AST cout << "Parsing completed successfully!" << endl; return 0; } I thought it had something to do with shared_ptr or library, but its not that I guess.
parseExpressionbelieves that only+and-are valid operators, but the program also uses==. Once the stream of tokens deviates from what the parser expects,parseStatement()returns null without advancing, andparseProgramspins in a loop, pushing nulls into the vector.