parsing - How do I re-write/modify my current Python interpreter's grammar structure, not too good with Python? -
this current python interpreter uses parsing rules take input , print out expression. interpreter works fine, want make , add changes of current grammar rules new grammar rules. far can grammar changes want.
this changes want make current grammar:
# <stmt-list> ::= empty | <stmt> <stmt-list> # <stmt_list> ::= <stmt> | <stmt> <stmt_list> # <factor> ::= id | intnum | ( <expr> ) # <base> ::= (<expr>) | id | number <stmt> ::= id = <expr> ; | print <expr>; <stmt> ::= id = <expr> ; | iprint <expr> ; | rprint <expr> ;
also i'm sure not sure how implement new grammar rules below interpreter, think might have them?
<prog> ::= <decl_list> <stmt_list> <decl-list> ::= <decl> | <decl> <decl_list> <decl> ::= <type> <id_list> ; <type> ::= int | real <id_list> ::= id | id {, <id_list>}
this current code current grammar:
import sys global vartable vartable = {} def main(): global itprogram, nexttoken, nextchar, nextlex, flageof, strstmt nexttoken = "" nextchar = "" flageof = false strstmt = "" try: fileprogram = open(sys.argv[1], "rt") except indexerror: print "missing input file!" return except ioerror: print "could not open \'" + sys.argv[1] + "\'!" return strprogram = fileprogram.read() itprogram = iter(strprogram) if strprogram == "": nextchar = "" else: nextchar = itprogram.next() #while not flageof: funclex() stmtlist() def funclex(): global itprogram, nexttoken, nextlex, nextchar, flageof, strstmt nexttoken = "" nextlex = "" isfloat = false try: while nextchar.isspace(): nextchar = itprogram.next() except stopiteration: nextchar = "" funclex() return try: if nextchar == "(": nexttoken = "lpara" nextlex = nextchar nextchar = itprogram.next() elif nextchar == ")": nexttoken = "rpara" nextlex = nextchar nextchar = itprogram.next() elif nextchar == "+": nexttoken = "add" nextlex = nextchar nextchar = itprogram.next() elif nextchar == "-": nexttoken = "sub" nextlex = nextchar nextchar = itprogram.next() elif nextchar == "*": nexttoken = "mult" nextlex = nextchar nextchar = itprogram.next() elif nextchar == "/": nexttoken = "div" nextlex = nextchar nextchar = itprogram.next() elif nextchar == "=": nexttoken = "assign" nextlex = nextchar nextchar = itprogram.next() elif nextchar == ";": nexttoken = "semi" nextlex = nextchar nextchar = itprogram.next() elif nextchar.isalpha(): nextlex = nextchar nextchar = itprogram.next() while nextchar.isalnum(): nextlex += nextchar nextchar = itprogram.next() if nextlex == "print": nexttoken = "print" else: nexttoken = "id" elif nextchar.isalnum(): nextlex = nextchar nextchar = itprogram.next() while nextchar.isalnum() or nextchar == ".": if nextchar == ".": isfloat = true nextlex += nextchar nextchar = itprogram.next() if isfloat: nexttoken = "float" else: nexttoken = "int" elif nextchar == "": nextlex = nextchar nexttoken = "empty" flageof = true else: nexttoken = "unknown" #print "syntax error!" except stopiteration: nextchar = "" strstmt = strstmt + nextlex + " " if nexttoken == "semi": print strstmt strstmt = "" # <stmt-list> ::= empty | <stmt> <stmt-list> def stmtlist(): global nexttoken if nexttoken == "empty": print ">>> empty .tiny file." else: while nexttoken != "empty": stmt() # <stmt> ::= id = <expr> ; | # print <expr> ; def stmt(): global nexttoken, nextlex if nexttoken == "id": varname = nextlex funclex() if nexttoken == "assign": funclex() result = expr() if result[1] != "unknown": lookupvartable(varname, result[0], result[1]) else: printerror("undefined variable.") elif nexttoken == "print": funclex() result = expr() if result[1] != "unknown" , nexttoken == "semi": print ">>> " + str(result[0]) elif result[1] == "unknown": printerror("undefined variable.") else: printerror("<stmt> syntax error.") return if nexttoken == "semi": funclex() else: printerror("<stmt> missing ';'") # <expr> ::= <term> { + <term> | - <term> } def expr(): global nexttoken, nextlex lresult = term() while nexttoken == "add" or nexttoken == "sub": operator = nexttoken funclex() rresult = term() #variable not defined if lresult[1] == "unknown" or rresult[1] == "unknown": printerror("undefined variable!") if lresult[1] != rresult[1]: #type mismatch printerror("type mismatch!") elif operator == "add": lresult = (lresult[0]+rresult[0], lresult[1]) else: lresult = (lresult[0]-rresult[0], lresult[1]) return lresult # <term> ::= <factor> { * <factor> | / <factor> } def term(): global nexttoken, nextlex lresult = factor() while nexttoken == "mult" or nexttoken == "div": operator = nexttoken funclex() rresult = factor() #variable not defined if lresult[1] == "unknown" or rresult[1] == "unknown": printerror("undefined variable!") if lresult[1] != rresult[1]: #type mismatch printerror("type mismatch!") elif operator == "mult": lresult = (lresult[0]*rresult[0], lresult[1]) else: lresult = (lresult[0]/rresult[0], lresult[1]) return lresult # <factor> ::= id | intnum | ( <expr> ) def factor(): global nexttoken, nextlex if nexttoken == "id": result = lookupvartable(nextlex, 0, "unknown") funclex() elif nexttoken == "int": result = (int(nextlex), "int") funclex() elif nexttoken == "float": result = (float(nextlex), "float") funclex() elif nexttoken == "lpara": funclex() result = expr() if nexttoken == "rpara": funclex() else: printerror("<factor>") return result def printerror(strmessage): global strstmt if strstmt != "": print strstmt print ">>> error: " + strmessage exit() def lookupvartable(varname, varvalue, vartype): #if varname not in vartable: # varvalue == "unknown" if vartype != "unknown": vartable[varname] = (varvalue, vartype) return vartable[varname] elif varname in vartable: return vartable[varname] else: return (varvalue, vartype) if __name__ == "__main__": main()
you should consider using antlr, there python port.
in meanwhile, here how can design lexer:
def parser_file(file_obj): line in file_obj: char in line: yield char mapping = {'(': 'lpara', ')': 'rpara', '+': 'add', '-': 'sub', '*': 'mul', '/': 'div', '=': 'assign', ';': 'semi'} def lexer(chars): it_char = iter(chars) char = next(it_char) while true: # skip spaces while char.isspace(): char = next(it_char) # find simple tokens if char in mapping: yield mapping[char], char char = next(it_char) continue # find complex tokens if char.isalpha(): lex = char char = next(it_char) while char.isalnum(): lex += char char = next(it_char) if lex == "print": yield "print", lex else: yield "id", lex continue elif char.isdigit(): lex = char char = next(it_char) while char.isdigit(): lex += char char = next(it_char) if char == ".": lex += char char = next(it_char) while char.isdigit(): lex += char char = next(it_char) if "." in lex: yield "float", lex else: yield "int", lex continue else: raise syntaxerror(char)
to use that, can process follow:
import io content = """\ 10 + 12.5 / 18 (8 + 3.14) """ file_obj = io.bytesio(content) token in lexer(parser_file(file_obj)): print(token)
you get:
('int', '10') ('add', '+') ('float', '12.5') ('div', '/') ('int', '18') ('lpara', '(') ('int', '8') ('add', '+') ('float', '3.14') ('rpara', ')')
you can use real file of course.
for parser: use stack build abstract syntax tree , evaluate it.
i'm sorry, it's long explain , it's irrelevant on so, consider posting on code review.
Comments
Post a Comment