371 lines
10 KiB
Python
371 lines
10 KiB
Python
"""A minimal parser combinator library."""
|
|
|
|
class Result(object):
|
|
"""The result class. Returned by every parser."""
|
|
def __init__(self, value, pos):
|
|
"""
|
|
The result initializer.
|
|
|
|
value -- the value of the result node
|
|
pos -- the position of the result node
|
|
"""
|
|
self.value = value
|
|
self.pos = pos
|
|
|
|
def __repr__(self):
|
|
"""
|
|
A representation to make debugging easier.
|
|
|
|
returns -- A string of the form 'Result(value, position)'
|
|
"""
|
|
return 'Result(%s, %d)' % (self.value, self.pos)
|
|
|
|
class Parser(object):
|
|
"""
|
|
The parser superclass.
|
|
All parsers have to inherit from it.
|
|
"""
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Makes the parser callable.
|
|
All subclasses need to override this method.
|
|
|
|
tokens -- the tokens with which the Parser is called
|
|
pos -- the token position
|
|
|
|
returns -- None
|
|
"""
|
|
return None
|
|
|
|
def __add__(self, other):
|
|
"""
|
|
Concatenates a parser to another parser.
|
|
|
|
returns -- a Concat object of both parsers
|
|
"""
|
|
return Concat(self, other)
|
|
|
|
def __mul__(self, other):
|
|
"""
|
|
"Multiplies" the parser to another parser.
|
|
|
|
returns -- an Exp object of both parsers
|
|
"""
|
|
return Exp(self, other)
|
|
|
|
def __or__(self, other):
|
|
"""
|
|
Alternates between this parser and another.
|
|
|
|
returns -- an Alternate object of both parsers
|
|
"""
|
|
return Alternate(self, other)
|
|
|
|
def __xor__(self, function):
|
|
"""
|
|
Applies a function to the parser's results.
|
|
|
|
returns -- a Process object of the parser and the function
|
|
"""
|
|
return Process(self, function)
|
|
|
|
class Reserved(Parser):
|
|
"""The parser for reserved words"""
|
|
def __init__(self, value, tag):
|
|
"""
|
|
The initialization method.
|
|
|
|
value -- the reserved word
|
|
tag -- the RESERVED tag
|
|
"""
|
|
self.value = value
|
|
self.tag = tag
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Call parser. Returns a result if the token at the position
|
|
matches the reserved word. Otherwise returns None.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object | None
|
|
"""
|
|
if pos < len(tokens) and tokens[pos][0] == self.value and tokens[pos][1] is self.tag:
|
|
return Result(tokens[pos][0], pos + 1)
|
|
else:
|
|
return None
|
|
|
|
class Tag(Parser):
|
|
"""
|
|
The parser for tags. Matches anything if the tag matches,
|
|
regardless of its value.
|
|
"""
|
|
def __init__(self, tag):
|
|
"""
|
|
The initialization method.
|
|
|
|
tag -- the tag that should match
|
|
"""
|
|
self.tag = tag
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Call parser. Returns a result if the tag of the
|
|
token at the specified position matches, regardless
|
|
of its value. Otherwise returns None.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object | None
|
|
"""
|
|
if pos < len(tokens) and tokens[pos][1] is self.tag:
|
|
return Result(tokens[pos][0], pos + 1)
|
|
else:
|
|
return None
|
|
|
|
class Concat(Parser):
|
|
"""The concat combinator. Parses sequences of two tokens."""
|
|
def __init__(self, left, right):
|
|
"""
|
|
The initialization method.
|
|
|
|
left -- the first parser
|
|
right -- the second parser
|
|
"""
|
|
self.left = left
|
|
self.right = right
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. Returns a result tuple if both
|
|
parser match, otherwise returns None.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Tuple of the Result objects of both parsers | None
|
|
"""
|
|
left_result = self.left(tokens, pos)
|
|
if left_result:
|
|
right_result = self.right(tokens, left_result.pos)
|
|
if right_result:
|
|
combined_value = (left_result.value, right_result.value)
|
|
return Result(combined_value, right_result.pos)
|
|
return None
|
|
|
|
class Alternate(Parser):
|
|
"""The alternate combinator. Parses using either of two parsers."""
|
|
def __init__(self, left, right):
|
|
"""
|
|
The initialization method.
|
|
|
|
left -- the first parser
|
|
right -- the second parser
|
|
"""
|
|
self.left = left
|
|
self.right = right
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. Returns a result if either
|
|
parser matches, otherwise returns None.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- the Result object of either parser | None
|
|
"""
|
|
left_result = self.left(tokens, pos)
|
|
if left_result:
|
|
return left_result
|
|
else:
|
|
right_result = self.right(tokens, pos)
|
|
return right_result
|
|
return None
|
|
|
|
class Opt(Parser):
|
|
"""The optional combinator. Always returns a result."""
|
|
def __init__(self, parser):
|
|
"""
|
|
The initialization method.
|
|
|
|
parser -- the parser to wrap
|
|
"""
|
|
self.parser = parser
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. Returns either the
|
|
parser's result or an empty result.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object
|
|
"""
|
|
result = self.parser(tokens, pos)
|
|
if result:
|
|
return result
|
|
else:
|
|
return Result(None, pos)
|
|
|
|
class Rep(Parser):
|
|
"""The repetition combinator. Applies a parser until it fails."""
|
|
def __init__(self, parser):
|
|
"""
|
|
The initialization method.
|
|
|
|
parser -- the parser to wrap
|
|
"""
|
|
self.parser = parser
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. Returns a Result
|
|
that contains a list of Results (one
|
|
for each successful application).
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object
|
|
"""
|
|
results = []
|
|
result = self.parser(tokens, pos)
|
|
while result:
|
|
results.append(result.value)
|
|
pos = result.pos
|
|
result = self.parser(tokens, pos)
|
|
return Result(results, pos)
|
|
|
|
class Process(Parser):
|
|
"""
|
|
The process combinator. Applies a function that
|
|
manipulates the parser's result.
|
|
"""
|
|
def __init__(self, parser, function):
|
|
"""
|
|
The initialization method.
|
|
|
|
parser -- the parser to wrap
|
|
function -- the manipulation function
|
|
"""
|
|
self.parser = parser
|
|
self.function = function
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. Returns the Result
|
|
object whose value was manipulated by the
|
|
given function.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object | None
|
|
"""
|
|
result = self.parser(tokens, pos)
|
|
if result:
|
|
result.value = self.function(result.value)
|
|
return result
|
|
|
|
class Lazy(Parser):
|
|
"""
|
|
The lazy combinator. Builds a parser only if needed.
|
|
This makes recursive parsers possible.
|
|
"""
|
|
def __init__(self, parser_func):
|
|
"""
|
|
The initialization method.
|
|
|
|
parser_func -- a function building a parser
|
|
"""
|
|
self.parser = None
|
|
self.parser_func = parser_func
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Builds the parser and returns its' result.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object | None
|
|
"""
|
|
if not self.parser:
|
|
self.parser = self.parser_func()
|
|
return self.parser(tokens, pos)
|
|
|
|
class Phrase(Parser):
|
|
"""
|
|
The phrase combinator. Applies a parser and
|
|
only succeeds if it consumed all remaining input.
|
|
"""
|
|
def __init__(self, parser):
|
|
"""
|
|
The initialization method.
|
|
|
|
parser -- the parser to wrap
|
|
"""
|
|
self.parser = parser
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. If there is a result
|
|
and nothing is left to parse, it returns
|
|
the result. Otherwise it returns None.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object | None
|
|
"""
|
|
result = self.parser(tokens, pos)
|
|
if result and result.pos == len(tokens):
|
|
return result
|
|
else:
|
|
return None
|
|
|
|
class Exp(Parser):
|
|
"""
|
|
The compound statement parser.
|
|
Workaround for problems with left recursion.
|
|
"""
|
|
def __init__(self, parser, separator):
|
|
"""
|
|
The initialization method.
|
|
|
|
parser -- the parser to wrap
|
|
separator -- the parser that parses the compound
|
|
separators
|
|
"""
|
|
self.parser = parser
|
|
self.separator = separator
|
|
|
|
def __call__(self, tokens, pos):
|
|
"""
|
|
Calls the parser. Like Rep, it applies
|
|
a parser multiple times, but it also
|
|
keeps track of the separators.
|
|
|
|
tokens -- the token list
|
|
pos -- the position to check
|
|
|
|
returns -- a Result object | None
|
|
"""
|
|
result = self.parser(tokens, pos)
|
|
|
|
def process_next(parsed):
|
|
(sepfunc, right) = parsed
|
|
return sepfunc(result.value, right)
|
|
next_parser = self.separator + self.parser ^ process_next
|
|
|
|
next_result = result
|
|
while next_result:
|
|
next_result = next_parser(tokens, result.pos)
|
|
if next_result:
|
|
result = next_result
|
|
return result
|