parsecomb.py->parser_combinator.py

This commit is contained in:
hellerve
2015-07-07 14:29:00 +02:00
parent f9c1f2a45d
commit ed5ea34114

View File

@@ -0,0 +1,370 @@
"""A minimal parser combinator library."""
class Result(object):
"""The result class. Returned by every parser."""
def __init__(self, value, pos):
"""
The result initializer.
value -- the value of the result node
pos -- the position of the result node
"""
self.value = value
self.pos = pos
def __repr__(self):
"""
A representation to make debugging easier.
returns -- A string of the form 'Result(value, position)'
"""
return 'Result(%s, %d)' % (self.value, self.pos)
class Parser(object):
"""
The parser superclass.
All parsers have to inherit from it.
"""
def __call__(self, tokens, pos):
"""
Makes the parser callable.
All subclasses need to override this method.
tokens -- the tokens with which the Parser is called
pos -- the token position
returns -- None
"""
return None
def __add__(self, other):
"""
Concatenates a parser to another parser.
returns -- a Concat object of both parsers
"""
return Concat(self, other)
def __mul__(self, other):
"""
"Multiplies" the parser to another parser.
returns -- an Exp object of both parsers
"""
return Exp(self, other)
def __or__(self, other):
"""
Alternates between this parser and another.
returns -- an Alternate object of both parsers
"""
return Alternate(self, other)
def __xor__(self, function):
"""
Applies a function to the parser's results.
returns -- a Process object of the parser and the function
"""
return Process(self, function)
class Reserved(Parser):
"""The parser for reserved words"""
def __init__(self, value, tag):
"""
The initialization method.
value -- the reserved word
tag -- the RESERVED tag
"""
self.value = value
self.tag = tag
def __call__(self, tokens, pos):
"""
Call parser. Returns a result if the token at the position
matches the reserved word. Otherwise returns None.
tokens -- the token list
pos -- the position to check
returns -- a Result object | None
"""
if pos < len(tokens) and tokens[pos] == [self.value, self.tag]:
return Result(tokens[pos][0], pos + 1)
else:
return None
class Tag(Parser):
"""
The parser for tags. Matches anything if the tag matches,
regardless of its value.
"""
def __init__(self, tag):
"""
The initialization method.
tag -- the tag that should match
"""
self.tag = tag
def __call__(self, tokens, pos):
"""
Call parser. Returns a result if the tag of the
token at the specified position matches, regardless
of its value. Otherwise returns None.
tokens -- the token list
pos -- the position to check
returns -- a Result object | None
"""
if pos < len(tokens) and tokens[pos][1] is self.tag:
return Result(tokens[pos][0], pos + 1)
else:
return None
class Concat(Parser):
"""The concat combinator. Parses sequences of two tokens."""
def __init__(self, left, right):
"""
The initialization method.
left -- the first parser
right -- the second parser
"""
self.left = left
self.right = right
def __call__(self, tokens, pos):
"""
Calls the parser. Returns a result tuple if both
parser match, otherwise returns None.
tokens -- the token list
pos -- the position to check
returns -- a Tuple of the Result objects of both parsers | None
"""
left_result = self.left(tokens, pos)
if left_result:
right_result = self.right(tokens, left_result.pos)
if right_result:
combined_value = (left_result.value, right_result.value)
return Result(combined_value, right_result.pos)
return None
class Alternate(Parser):
"""The alternate combinator. Parses using either of two parsers."""
def __init__(self, left, right):
"""
The initialization method.
left -- the first parser
right -- the second parser
"""
self.left = left
self.right = right
def __call__(self, tokens, pos):
"""
Calls the parser. Returns a result if either
parser matches, otherwisu returns None.
tokens -- the token list
pos -- the position to check
returns -- the Result object of either parser | None
"""
left_result = self.left(tokens, pos)
if left_result:
return left_result
else:
right_result = self.right(tokens, pos)
return right_result
return None
class Opt(Parser):
"""The optional combinator. Always returns a result."""
def __init__(self, parser):
"""
The initialization method.
parser -- the parser to wrap
"""
self.parser = parser
def __call__(self, tokens, pos):
"""
Calls the parser. Returns either the
parser's result or an empty result.
tokens -- the token list
pos -- the position to check
returns -- a Result object
"""
result = self.parser(tokens, pos)
if result:
return result
else:
return Result(None, pos)
class Rep(Parser):
"""The repetition combinator. Applies a parser until it fails."""
def __init__(self, parser):
"""
The initialization method.
parser -- the parser to wrap
"""
self.parser = parser
def __call__(self, tokens, pos):
"""
Calls the parser. Returns a Result
that contains a list of Results (one
for each successful application).
tokens -- the token list
pos -- the position to check
returns -- a Result object
"""
results = []
result = self.parser(tokens, pos)
while result:
results.append(result.value)
pos = result.pos
result = self.parser(tokens, pos)
return Result(results, pos)
class Process(Parser):
"""
The process combinator. Applies a function that
manipulates the parser's result.
"""
def __init__(self, parser, function):
"""
The initialization method.
parser -- the parser to wrap
function -- the manipulation function
"""
self.parser = parser
self.function = function
def __call__(self, tokens, pos):
"""
Calls the parser. Returns the Result
object whose value was manipulated by the
given function.
tokens -- the token list
pos -- the position to check
returns -- a Result object | None
"""
result = self.parser(tokens, pos)
if result:
result.value = self.function(result.value)
return result
class Lazy(Parser):
"""
The lazy combinator. Builds a parser only if needed.
This makes recursive parsers possible.
"""
def __init__(self, parser_func):
"""
The initialization method.
parser_func -- a function building a parser
"""
self.parser = None
self.parser_func = parser_func
def __call__(self, tokens, pos):
"""
Builds the parser and returns its' result.
tokens -- the token list
pos -- the position to check
returns -- a Result object | None
"""
if not self.parser:
self.parser = self.parser_func()
return self.parser(tokens, pos)
class Phrase(Parser):
"""
The phrase combinator. Applies a parser and
only succeeds if it consumed all remaining input.
"""
def __init__(self, parser):
"""
The initialization method.
parser -- the parser to wrap
"""
self.parser = parser
def __call__(self, tokens, pos):
"""
Calls the parser. If there is a result
and nothing is left to parse, it returns
the result. Otherwise it returns None.
tokens -- the token list
pos -- the position to check
returns -- a Result object | None
"""
result = self.parser(tokens, pos)
if result and result.pos == len(tokens):
return result
else:
return None
class Exp(Parser):
"""
The compound statement parser.
Workaround for problems with left recursion.
"""
def __init__(self, parser, separator):
"""
The initialization method.
parser -- the parser to wrap
separator -- the parser that parses the compound
separators
"""
self.parser = parser
self.separator = separator
def __call__(self, tokens, pos):
"""
Calls the parser. Like Rep, it applies
a parser multiple times, but it also
keeps track of the separators.
tokens -- the token list
pos -- the position to check
returns -- a Result object | None
"""
result = self.parser(tokens, pos)
def process_next(parsed):
(sepfunc, right) = parsed
return sepfunc(result.value, right)
next_parser = self.separator + self.parser ^ process_next
next_result = result
while next_result:
next_result = next_parser(tokens, result.pos)
if next_result:
result = next_result
return result