"""A minimal parser combinator library.""" class Result(object): """The result class. Returned by every parser.""" def __init__(self, value, pos): """ The result initializer. value -- the value of the result node pos -- the position of the result node """ self.value = value self.pos = pos def __repr__(self): """ A representation to make debugging easier. returns -- A string of the form 'Result(value, position)' """ return 'Result(%s, %d)' % (self.value, self.pos) class Parser(object): """ The parser superclass. All parsers have to inherit from it. """ def __call__(self, tokens, pos): """ Makes the parser callable. All subclasses need to override this method. tokens -- the tokens with which the Parser is called pos -- the token position returns -- None """ return None def __add__(self, other): """ Concatenates a parser to another parser. returns -- a Concat object of both parsers """ return Concat(self, other) def __mul__(self, other): """ "Multiplies" the parser to another parser. returns -- an Exp object of both parsers """ return Exp(self, other) def __or__(self, other): """ Alternates between this parser and another. returns -- an Alternate object of both parsers """ return Alternate(self, other) def __xor__(self, function): """ Applies a function to the parser's results. returns -- a Process object of the parser and the function """ return Process(self, function) class Reserved(Parser): """The parser for reserved words""" def __init__(self, value, tag): """ The initialization method. value -- the reserved word tag -- the RESERVED tag """ self.value = value self.tag = tag def __call__(self, tokens, pos): """ Call parser. Returns a result if the token at the position matches the reserved word. Otherwise returns None. tokens -- the token list pos -- the position to check returns -- a Result object | None """ if pos < len(tokens) and tokens[pos][0] == self.value and tokens[pos][1] is self.tag: return Result(tokens[pos][0], pos + 1) else: return None class Tag(Parser): """ The parser for tags. Matches anything if the tag matches, regardless of its value. """ def __init__(self, tag): """ The initialization method. tag -- the tag that should match """ self.tag = tag def __call__(self, tokens, pos): """ Call parser. Returns a result if the tag of the token at the specified position matches, regardless of its value. Otherwise returns None. tokens -- the token list pos -- the position to check returns -- a Result object | None """ if pos < len(tokens) and tokens[pos][1] is self.tag: return Result(tokens[pos][0], pos + 1) else: return None class Concat(Parser): """The concat combinator. Parses sequences of two tokens.""" def __init__(self, left, right): """ The initialization method. left -- the first parser right -- the second parser """ self.left = left self.right = right def __call__(self, tokens, pos): """ Calls the parser. Returns a result tuple if both parser match, otherwise returns None. tokens -- the token list pos -- the position to check returns -- a Tuple of the Result objects of both parsers | None """ left_result = self.left(tokens, pos) if left_result: right_result = self.right(tokens, left_result.pos) if right_result: combined_value = (left_result.value, right_result.value) return Result(combined_value, right_result.pos) return None class Alternate(Parser): """The alternate combinator. Parses using either of two parsers.""" def __init__(self, left, right): """ The initialization method. left -- the first parser right -- the second parser """ self.left = left self.right = right def __call__(self, tokens, pos): """ Calls the parser. Returns a result if either parser matches, otherwise returns None. tokens -- the token list pos -- the position to check returns -- the Result object of either parser | None """ left_result = self.left(tokens, pos) if left_result: return left_result else: right_result = self.right(tokens, pos) return right_result return None class Opt(Parser): """The optional combinator. Always returns a result.""" def __init__(self, parser): """ The initialization method. parser -- the parser to wrap """ self.parser = parser def __call__(self, tokens, pos): """ Calls the parser. Returns either the parser's result or an empty result. tokens -- the token list pos -- the position to check returns -- a Result object """ result = self.parser(tokens, pos) if result: return result else: return Result(None, pos) class Rep(Parser): """The repetition combinator. Applies a parser until it fails.""" def __init__(self, parser): """ The initialization method. parser -- the parser to wrap """ self.parser = parser def __call__(self, tokens, pos): """ Calls the parser. Returns a Result that contains a list of Results (one for each successful application). tokens -- the token list pos -- the position to check returns -- a Result object """ results = [] result = self.parser(tokens, pos) while result: results.append(result.value) pos = result.pos result = self.parser(tokens, pos) return Result(results, pos) class Process(Parser): """ The process combinator. Applies a function that manipulates the parser's result. """ def __init__(self, parser, function): """ The initialization method. parser -- the parser to wrap function -- the manipulation function """ self.parser = parser self.function = function def __call__(self, tokens, pos): """ Calls the parser. Returns the Result object whose value was manipulated by the given function. tokens -- the token list pos -- the position to check returns -- a Result object | None """ result = self.parser(tokens, pos) if result: result.value = self.function(result.value) return result class Lazy(Parser): """ The lazy combinator. Builds a parser only if needed. This makes recursive parsers possible. """ def __init__(self, parser_func): """ The initialization method. parser_func -- a function building a parser """ self.parser = None self.parser_func = parser_func def __call__(self, tokens, pos): """ Builds the parser and returns its' result. tokens -- the token list pos -- the position to check returns -- a Result object | None """ if not self.parser: self.parser = self.parser_func() return self.parser(tokens, pos) class Phrase(Parser): """ The phrase combinator. Applies a parser and only succeeds if it consumed all remaining input. """ def __init__(self, parser): """ The initialization method. parser -- the parser to wrap """ self.parser = parser def __call__(self, tokens, pos): """ Calls the parser. If there is a result and nothing is left to parse, it returns the result. Otherwise it returns None. tokens -- the token list pos -- the position to check returns -- a Result object | None """ result = self.parser(tokens, pos) if result and result.pos == len(tokens): return result else: return None class Exp(Parser): """ The compound statement parser. Workaround for problems with left recursion. """ def __init__(self, parser, separator): """ The initialization method. parser -- the parser to wrap separator -- the parser that parses the compound separators """ self.parser = parser self.separator = separator def __call__(self, tokens, pos): """ Calls the parser. Like Rep, it applies a parser multiple times, but it also keeps track of the separators. tokens -- the token list pos -- the position to check returns -- a Result object | None """ result = self.parser(tokens, pos) def process_next(parsed): (sepfunc, right) = parsed return sepfunc(result.value, right) next_parser = self.separator + self.parser ^ process_next next_result = result while next_result: next_result = next_parser(tokens, result.pos) if next_result: result = next_result return result