From 9d4406907ac02b6f730259c989907c4d26275f09 Mon Sep 17 00:00:00 2001 From: hellerve Date: Wed, 8 Jul 2015 16:38:49 +0200 Subject: [PATCH] Implemented interpreter, almost fully documented --- improved/ast.py | 12 ++-- improved/interpreter.py | 27 ++++++++- improved/lex.py | 3 +- improved/parser.py | 109 ++++++++++++++++++++++++++++++---- improved/parser_combinator.py | 4 +- improved/tokenize.py | 4 +- 6 files changed, 137 insertions(+), 22 deletions(-) diff --git a/improved/ast.py b/improved/ast.py index 428fd7c..cbbe675 100644 --- a/improved/ast.py +++ b/improved/ast.py @@ -142,7 +142,7 @@ class BinArithmeticExp(ArithmeticExp): left_value = self.left.eval(env) right_value = self.right.eval(env) try: - return operators[self.op](left_value, right_value) + return self.operators[self.op](left_value, right_value) except KeyError: raise RuntimeError('unknown operator: ' + self.op) @@ -150,10 +150,10 @@ class RelationExp(BooleanExp): """The AST node for relational boolean expressions.""" operators = { - '<': operator.gt, - '<=': operator.ge, - '>': operator.lt, - '>=': operator.le, + '<': operator.lt, + '<=': operator.le, + '>': operator.gt, + '>=': operator.ge, '=': operator.eq, '!=': operator.ne, } @@ -191,7 +191,7 @@ class RelationExp(BooleanExp): left_value = self.left.eval(env) right_value = self.right.eval(env) try: - return operators[self.op](left_value, right_value) + return self.operators[self.op](left_value, right_value) except KeyError: raise RuntimeError('unknown operator: ' + self.op) diff --git a/improved/interpreter.py b/improved/interpreter.py index 8fc8061..75ad5e0 100644 --- a/improved/interpreter.py +++ b/improved/interpreter.py @@ -1,2 +1,27 @@ -def callIMP(chars): +"""The interpreter""" +from .parser import parse +from .tokenize import tokenize + +class ParseError(Exception): pass + +def callIMP(chars): + """ + The entry point for everyone who wants to call + IMP from within Python. The function either + returns the value or None, if the call did not + succeed. + + chars -- the string to evaluate + + returns -- the IMP environment | None + """ + if not chars: return None + tokens = tokenize(chars) + if not tokens: raise ParseError("tokens could not be generated") + parsed = parse(tokens) + if not parsed or not parsed.value: raise ParseError("tokens could not be parsed") + ast = parsed.value + env = {} + ast.eval(env) + return env diff --git a/improved/lex.py b/improved/lex.py index 2f2c284..56996eb 100644 --- a/improved/lex.py +++ b/improved/lex.py @@ -26,7 +26,8 @@ def lex(characters, token_exprs): tokens.append(token) break if not match: - sys.stderr.write('[Parser] Illegal character: %s\\n' % characters[pos]) + sys.stderr.write('[Lexer] Illegal character at %d: %s(%d)\n' + % (pos, characters[pos], ord(characters[pos]))) raise ValueError(characters[pos]) else: pos = match.end(0) diff --git a/improved/parser.py b/improved/parser.py index b4f29c1..673d41c 100644 --- a/improved/parser.py +++ b/improved/parser.py @@ -17,22 +17,52 @@ BOOLEAN_PRECEDENCE = [ ] #Helper -def precedence(value_parser, precedence_levels, combine): - def op_parser(precedence_level): - return any_op_in_list(precedence_level) ^ combine - parser = value_parser * op_parser(precedence_levels[0]) - for precedence_level in precedence_levels[1:]: - parser = parser * op_parser(precedence_level) +def precedence(value_parser, precedences, combine): + def op_parser(precedence): + return any_op_in_list(precedence) ^ combine + parser = value_parser * op_parser(precedences[0]) + for precedence in precedences[1:]: + parser = parser * op_parser(precedence) return parser def process_binop(op): + """ + Takes a binary operation as input and + returns a function that build AST nodes + given the operands. + + op -- the operation + + returns -- a function that takes two operands + and returns a Binary Arithmetic AST + node + """ return lambda l, r: BinArithmeticExp(op, l, r) def process_relop(parsed): + """" + Takes a parsed relational operation and returns + an AST node. + + parsed -- a tuple of the form ((left, operation), right) + + returns -- an AST node representing the expression + """ ((left, op), right) = parsed return RelationExp(op, left, right) def process_logic(op): + """ + Takes a logical operation and returns a function + consuming two operands, building an AST node from them. + + op -- the operation + + returns -- a function that builds Logical Expression + AST nodes + + throws -- RuntimeError on unknown operator + """ if op == 'and': return lambda l, r: AndExp(l, r) elif op == 'or': @@ -45,46 +75,105 @@ def process_group(parsed): return p def any_op_in_list(ops): + """ + Builds a keyword parser from all given operations. + + ops -- a list of operators + + returns -- a parser matching all operators + """ op_parsers = [keyword(op) for op in ops] - parser = reduce(lambda l, r: l | r, op_parsers) - return parser + return reduce(lambda l, r: l | r, op_parsers) #Parser num = Tag(INT) ^ (lambda i: int(i)) imp_id = Tag(ID) def keyword(kw): + """ + The keyword parser. Takes a keyword + and returns a Reserved AST node. + + kw -- the operation as string + + returns -- a corresponding Reserved AST node + """ return Reserved(kw, RESERVED) def arithmetic_group(): + """ + Matches groups of arithmetic operations. + + returns -- a parser matching grouped arithmetic operations + """ return keyword('(') + Lazy(arithmetic_exp) + keyword(')') ^ process_group def arithmetic_value(): + """ + Matches numbers and variables. + + returns -- a parser matching all kinds of values + """ return ((num ^ (lambda i: IntArithmeticExp(i))) | - (id ^ (lambda v: VarArithmeticExp(v)))) + (imp_id ^ (lambda v: VarArithmeticExp(v)))) def arithmetic_term(): + """ + Matches an arithmetic term. + + returns -- a parser matching an arithmetic value or group + """ return arithmetic_value() | arithmetic_group() def arithmetic_exp(): + """ + Matches arithmetic expressions, valuing precedence. + + returns -- a parser correctly matching arithmetic expressions + """ return precedence(arithmetic_term(), ARITHMETIC_PRECEDENCE, process_binop) def boolean_not(): + """ + Matches the boolean not with its arguments. + + returns -- a parser matching the boolean not + """ return keyword('not') + Lazy(boolean_term) ^ (lambda parsed: NotExp(parsed[1])) def boolean_relop(): + """ + Matches boolean relational operations. + + returns -- a parser matching all relations + """ relops = ['<', '<=', '>', '>=', '=', '!='] return arithmetic_exp() + any_op_in_list(relops) + arithmetic_exp() ^ process_relop def boolean_group(): + """ + Matches grouped boolean expressions + + returns -- a parser matching groups of boolean expressions + """ return keyword('(') + Lazy(boolean_exp) + keyword(')') ^ process_group def boolean_term(): + """ + Matches boolean terms. + + returns -- a parser matching boolean terms + """ return boolean_not() | boolean_relop() | boolean_group() def boolean_exp(): + """ + Matches boolean expressing, valuing precedence. + + returns -- a parser correctly matching arithmetic expressions + """ return precedence(boolean_term(), BOOLEAN_PRECEDENCE, process_logic) @@ -126,5 +215,5 @@ def statements(): def parser(): return Phrase(statements()) -def imp_parser(tokens): +def parse(tokens): return parser()(tokens, 0) diff --git a/improved/parser_combinator.py b/improved/parser_combinator.py index 65899ad..c0cb55a 100644 --- a/improved/parser_combinator.py +++ b/improved/parser_combinator.py @@ -91,7 +91,7 @@ class Reserved(Parser): returns -- a Result object | None """ - if pos < len(tokens) and tokens[pos] == [self.value, self.tag]: + if pos < len(tokens) and tokens[pos][0] == self.value and tokens[pos][1] is self.tag: return Result(tokens[pos][0], pos + 1) else: return None @@ -170,7 +170,7 @@ class Alternate(Parser): def __call__(self, tokens, pos): """ Calls the parser. Returns a result if either - parser matches, otherwisu returns None. + parser matches, otherwise returns None. tokens -- the token list pos -- the position to check diff --git a/improved/tokenize.py b/improved/tokenize.py index d1a27af..fe60c99 100644 --- a/improved/tokenize.py +++ b/improved/tokenize.py @@ -6,8 +6,8 @@ INT = 'INT' ID = 'ID' TOKENS = [ - (r'[ \\n\\t]+', None), - (r'#[^\\n]*', None), + (r'[ \n\t]+', None), + (r'#[^\n]*', None), (r'\:=', RESERVED), (r'\(', RESERVED), (r'\)', RESERVED),