I'm working on a simple Pascal-like interpreter using PLY, but I'm encountering a Syntax error at ',' during parsing. The issue arises when trying to parse a source file that includes commas. Below is the code for the PLY code, as well as a sample of the source code I'm using that's causing the error.
Pascal.py
import ply.lex as lex import ply.yacc as yacc # Lexical Analysis tokens = ( 'PROGRAM', 'VAR', 'BEGIN', 'END', 'INTEGER', 'REAL', 'ASSIGN', 'SEMICOLON', 'COLON', 'COMMA', 'PLUS', 'DIVIDE', 'LPAREN', 'RPAREN', 'WRITELN', 'STRING', 'ID', 'NUMBER', 'DOT' ) # Reserved words reserved = { 'PROGRAM': 'PROGRAM', 'VAR': 'VAR', 'BEGIN': 'BEGIN', 'END': 'END', 'integer': 'INTEGER', 'real': 'REAL', 'WRITELN': 'WRITELN' } # Token definitions t_ASSIGN = r':=' t_SEMICOLON = r';' t_COLON = r':' t_COMMA = r',' t_PLUS = r'\+' t_DIVIDE = r'/' t_LPAREN = r'\(' t_RPAREN = r'\)' t_DOT = r'\.' t_ignore = ' \t' # Ignore spaces and tabs def t_STRING(t): r"'[^']*'" t.value = t.value[1:-1] # Remove surrounding quotes return t def t_ID(t): r'[a-zA-Z_][a-zA-Z_0-9]*' t.type = reserved.get(t.value, 'ID') # Check if it's a reserved word return t def t_NUMBER(t): r'\d+(\.\d+)?' t.value = float(t.value) if '.' in t.value else int(t.value) return t def t_newline(t): r'\n+' t.lexer.lineno += len(t.value) def t_error(t): print(f"Illegal character '{t.value[0]}'") t.lexer.skip(1) lexer = lex.lex() # Syntax Analysis precedence = ( ('left', 'PLUS', 'DIVIDE'), ) def p_program(p): 'program : PROGRAM ID SEMICOLON declarations BEGIN statements END DOT' interpret(p[5]) # Pass the parsed statements to the interpreter def p_declarations(p): '''declarations : VAR var_declaration | empty''' def p_var_declaration(p): '''var_declaration : ID COLON type SEMICOLON var_declaration | ID COLON type SEMICOLON''' def p_type(p): '''type : INTEGER | REAL''' def p_statements(p): '''statements : statements statement SEMICOLON | statement SEMICOLON''' if len(p) == 4: p[0] = p[1] + [p[2]] else: p[0] = [p[1]] def p_statement(p): '''statement : ID ASSIGN expression | WRITELN LPAREN writeln_args RPAREN''' if p[1] == 'WRITELN': p[0] = ('WRITELN', p[3]) else: p[0] = ('ASSIGN', p[1], p[3]) def p_writeln_args(p): '''writeln_args : writeln_args COMMA writeln_arg | writeln_arg''' if len(p) == 4: p[0] = p[1] + [p[3]] else: p[0] = [p[1]] def p_writeln_arg(p): '''writeln_arg : STRING | expression''' p[0] = p[1] def p_expression(p): '''expression : expression PLUS term | expression DIVIDE term | term''' if len(p) == 4: p[0] = (p[2], p[1], p[3]) else: p[0] = p[1] def p_term(p): '''term : ID | NUMBER''' p[0] = p[1] def p_empty(p): 'empty :' p[0] = [] def p_error(p): print(f"Syntax error at '{p.value}'" if p else "Syntax error at EOF") parser = yacc.yacc() # Interpreter variables = {} def evaluate(tree): if isinstance(tree, (int, float)): return tree if isinstance(tree, str): return variables.get(tree, 0) if tree[0] == '+': return evaluate(tree[1]) + evaluate(tree[2]) if tree[0] == '/': return evaluate(tree[1]) / evaluate(tree[2]) return 0 def interpret(statements): for stmt in statements: if stmt[0] == 'ASSIGN': variables[stmt[1]] = evaluate(stmt[2]) elif stmt[0] == 'WRITELN': print(" ".join(str(evaluate(arg)) if not isinstance(arg, str) else arg for arg in stmt[1])) # Run the Program if __name__ == '__main__': import sys if len(sys.argv) < 2: print("Usage: python Pascal.py <file>") sys.exit(1) with open(sys.argv[1], 'r') as file: source = file.read() lexer.input(source) parser.parse(source) Sample Source Code (SumAndAverage.pas)
PROGRAM SUMANDAVERAGE; VAR num1,num2,num3: integer; sum:integer; avg:real; BEGIN num1:=10; num2:=20; num3:=30; sum:=num1+num2+num3; avg:=sum/3; WRITELN('Num1 is ',num1); WRITELN('Num2 is ',num2); WRITELN('Num3 is ',num3); WRITELN('Sum 3 numbers is ',sum); WRITELN('Average is ',avg) END. Input and the expected Output
python Pascal.py SumAndAverage.pas Num1 is 10 Num2 is 20 Num3 is 30 Sum 3 numbers is 60 Average is 2.0000000000000000E+001 I've reviewed my token definitions and grammar, but I can't figure out why the parser is having trouble with the comma in the WRITELN statement. Could someone help me figure out what's going wrong?
VARstatement. Your grammar doesn't allow a comma-separated list of IDs.p.__dict__-->{'value': ',', 'lineno': 2, 'lexpos': 31, 'type': 'COMMA', 'lexer': <ply.lex.Lexer object at 0x102a5c2d0>}There error is not in theWRITELNstatement.