parsecomb.py->parser_combinator.py
This commit is contained in:
370
improved/parser_combinator.py
Normal file
370
improved/parser_combinator.py
Normal file
@@ -0,0 +1,370 @@
|
||||
"""A minimal parser combinator library."""
|
||||
|
||||
class Result(object):
|
||||
"""The result class. Returned by every parser."""
|
||||
def __init__(self, value, pos):
|
||||
"""
|
||||
The result initializer.
|
||||
|
||||
value -- the value of the result node
|
||||
pos -- the position of the result node
|
||||
"""
|
||||
self.value = value
|
||||
self.pos = pos
|
||||
|
||||
def __repr__(self):
|
||||
"""
|
||||
A representation to make debugging easier.
|
||||
|
||||
returns -- A string of the form 'Result(value, position)'
|
||||
"""
|
||||
return 'Result(%s, %d)' % (self.value, self.pos)
|
||||
|
||||
class Parser(object):
|
||||
"""
|
||||
The parser superclass.
|
||||
All parsers have to inherit from it.
|
||||
"""
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Makes the parser callable.
|
||||
All subclasses need to override this method.
|
||||
|
||||
tokens -- the tokens with which the Parser is called
|
||||
pos -- the token position
|
||||
|
||||
returns -- None
|
||||
"""
|
||||
return None
|
||||
|
||||
def __add__(self, other):
|
||||
"""
|
||||
Concatenates a parser to another parser.
|
||||
|
||||
returns -- a Concat object of both parsers
|
||||
"""
|
||||
return Concat(self, other)
|
||||
|
||||
def __mul__(self, other):
|
||||
"""
|
||||
"Multiplies" the parser to another parser.
|
||||
|
||||
returns -- an Exp object of both parsers
|
||||
"""
|
||||
return Exp(self, other)
|
||||
|
||||
def __or__(self, other):
|
||||
"""
|
||||
Alternates between this parser and another.
|
||||
|
||||
returns -- an Alternate object of both parsers
|
||||
"""
|
||||
return Alternate(self, other)
|
||||
|
||||
def __xor__(self, function):
|
||||
"""
|
||||
Applies a function to the parser's results.
|
||||
|
||||
returns -- a Process object of the parser and the function
|
||||
"""
|
||||
return Process(self, function)
|
||||
|
||||
class Reserved(Parser):
|
||||
"""The parser for reserved words"""
|
||||
def __init__(self, value, tag):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
value -- the reserved word
|
||||
tag -- the RESERVED tag
|
||||
"""
|
||||
self.value = value
|
||||
self.tag = tag
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Call parser. Returns a result if the token at the position
|
||||
matches the reserved word. Otherwise returns None.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object | None
|
||||
"""
|
||||
if pos < len(tokens) and tokens[pos] == [self.value, self.tag]:
|
||||
return Result(tokens[pos][0], pos + 1)
|
||||
else:
|
||||
return None
|
||||
|
||||
class Tag(Parser):
|
||||
"""
|
||||
The parser for tags. Matches anything if the tag matches,
|
||||
regardless of its value.
|
||||
"""
|
||||
def __init__(self, tag):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
tag -- the tag that should match
|
||||
"""
|
||||
self.tag = tag
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Call parser. Returns a result if the tag of the
|
||||
token at the specified position matches, regardless
|
||||
of its value. Otherwise returns None.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object | None
|
||||
"""
|
||||
if pos < len(tokens) and tokens[pos][1] is self.tag:
|
||||
return Result(tokens[pos][0], pos + 1)
|
||||
else:
|
||||
return None
|
||||
|
||||
class Concat(Parser):
|
||||
"""The concat combinator. Parses sequences of two tokens."""
|
||||
def __init__(self, left, right):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
left -- the first parser
|
||||
right -- the second parser
|
||||
"""
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. Returns a result tuple if both
|
||||
parser match, otherwise returns None.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Tuple of the Result objects of both parsers | None
|
||||
"""
|
||||
left_result = self.left(tokens, pos)
|
||||
if left_result:
|
||||
right_result = self.right(tokens, left_result.pos)
|
||||
if right_result:
|
||||
combined_value = (left_result.value, right_result.value)
|
||||
return Result(combined_value, right_result.pos)
|
||||
return None
|
||||
|
||||
class Alternate(Parser):
|
||||
"""The alternate combinator. Parses using either of two parsers."""
|
||||
def __init__(self, left, right):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
left -- the first parser
|
||||
right -- the second parser
|
||||
"""
|
||||
self.left = left
|
||||
self.right = right
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. Returns a result if either
|
||||
parser matches, otherwisu returns None.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- the Result object of either parser | None
|
||||
"""
|
||||
left_result = self.left(tokens, pos)
|
||||
if left_result:
|
||||
return left_result
|
||||
else:
|
||||
right_result = self.right(tokens, pos)
|
||||
return right_result
|
||||
return None
|
||||
|
||||
class Opt(Parser):
|
||||
"""The optional combinator. Always returns a result."""
|
||||
def __init__(self, parser):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
parser -- the parser to wrap
|
||||
"""
|
||||
self.parser = parser
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. Returns either the
|
||||
parser's result or an empty result.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object
|
||||
"""
|
||||
result = self.parser(tokens, pos)
|
||||
if result:
|
||||
return result
|
||||
else:
|
||||
return Result(None, pos)
|
||||
|
||||
class Rep(Parser):
|
||||
"""The repetition combinator. Applies a parser until it fails."""
|
||||
def __init__(self, parser):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
parser -- the parser to wrap
|
||||
"""
|
||||
self.parser = parser
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. Returns a Result
|
||||
that contains a list of Results (one
|
||||
for each successful application).
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object
|
||||
"""
|
||||
results = []
|
||||
result = self.parser(tokens, pos)
|
||||
while result:
|
||||
results.append(result.value)
|
||||
pos = result.pos
|
||||
result = self.parser(tokens, pos)
|
||||
return Result(results, pos)
|
||||
|
||||
class Process(Parser):
|
||||
"""
|
||||
The process combinator. Applies a function that
|
||||
manipulates the parser's result.
|
||||
"""
|
||||
def __init__(self, parser, function):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
parser -- the parser to wrap
|
||||
function -- the manipulation function
|
||||
"""
|
||||
self.parser = parser
|
||||
self.function = function
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. Returns the Result
|
||||
object whose value was manipulated by the
|
||||
given function.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object | None
|
||||
"""
|
||||
result = self.parser(tokens, pos)
|
||||
if result:
|
||||
result.value = self.function(result.value)
|
||||
return result
|
||||
|
||||
class Lazy(Parser):
|
||||
"""
|
||||
The lazy combinator. Builds a parser only if needed.
|
||||
This makes recursive parsers possible.
|
||||
"""
|
||||
def __init__(self, parser_func):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
parser_func -- a function building a parser
|
||||
"""
|
||||
self.parser = None
|
||||
self.parser_func = parser_func
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Builds the parser and returns its' result.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object | None
|
||||
"""
|
||||
if not self.parser:
|
||||
self.parser = self.parser_func()
|
||||
return self.parser(tokens, pos)
|
||||
|
||||
class Phrase(Parser):
|
||||
"""
|
||||
The phrase combinator. Applies a parser and
|
||||
only succeeds if it consumed all remaining input.
|
||||
"""
|
||||
def __init__(self, parser):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
parser -- the parser to wrap
|
||||
"""
|
||||
self.parser = parser
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. If there is a result
|
||||
and nothing is left to parse, it returns
|
||||
the result. Otherwise it returns None.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object | None
|
||||
"""
|
||||
result = self.parser(tokens, pos)
|
||||
if result and result.pos == len(tokens):
|
||||
return result
|
||||
else:
|
||||
return None
|
||||
|
||||
class Exp(Parser):
|
||||
"""
|
||||
The compound statement parser.
|
||||
Workaround for problems with left recursion.
|
||||
"""
|
||||
def __init__(self, parser, separator):
|
||||
"""
|
||||
The initialization method.
|
||||
|
||||
parser -- the parser to wrap
|
||||
separator -- the parser that parses the compound
|
||||
separators
|
||||
"""
|
||||
self.parser = parser
|
||||
self.separator = separator
|
||||
|
||||
def __call__(self, tokens, pos):
|
||||
"""
|
||||
Calls the parser. Like Rep, it applies
|
||||
a parser multiple times, but it also
|
||||
keeps track of the separators.
|
||||
|
||||
tokens -- the token list
|
||||
pos -- the position to check
|
||||
|
||||
returns -- a Result object | None
|
||||
"""
|
||||
result = self.parser(tokens, pos)
|
||||
|
||||
def process_next(parsed):
|
||||
(sepfunc, right) = parsed
|
||||
return sepfunc(result.value, right)
|
||||
next_parser = self.separator + self.parser ^ process_next
|
||||
|
||||
next_result = result
|
||||
while next_result:
|
||||
next_result = next_parser(tokens, result.pos)
|
||||
if next_result:
|
||||
result = next_result
|
||||
return result
|
Reference in New Issue
Block a user