From ffeacdcb6c4ba1077be7c952174efdef7695667f Mon Sep 17 00:00:00 2001 From: hellerve Date: Fri, 24 Feb 2017 18:38:35 +0100 Subject: [PATCH] initial (hacky af) --- .gitignore | 1 + examples/__init__.py | 0 examples/bf.py | 18 +++++ examples/calc.py | 9 +++ examples/golike.py | 13 ++++ gll/__init__.py | 2 + gll/parser.py | 152 +++++++++++++++++++++++++++++++++++++++++++ gll/result.py | 72 ++++++++++++++++++++ 8 files changed, 267 insertions(+) create mode 100644 .gitignore create mode 100644 examples/__init__.py create mode 100644 examples/bf.py create mode 100644 examples/calc.py create mode 100644 examples/golike.py create mode 100644 gll/__init__.py create mode 100644 gll/parser.py create mode 100644 gll/result.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/bf.py b/examples/bf.py new file mode 100644 index 0000000..99d6c01 --- /dev/null +++ b/examples/bf.py @@ -0,0 +1,18 @@ +import gll + +def loop(string): + parser = gll.seq(gll.skip(gll.string("[")), + gll.many(op), + gll.skip(gll.string("]")), + tag="loop") + return parser(string) + +op = (gll.string("+", tag="add") | + gll.string("-", tag="sub") | + gll.string(".", tag="out") | + gll.string(",", tag="in") | + gll.string(">", tag="fwd") | + gll.string("<", tag="bck") | + loop) + +parser = gll.all(gll.many(op), tag="program") diff --git a/examples/calc.py b/examples/calc.py new file mode 100644 index 0000000..9e8ec17 --- /dev/null +++ b/examples/calc.py @@ -0,0 +1,9 @@ +import gll + +ws = gll.skipmany(gll.whitespace(), tag="spaces") +num = gll.many1(gll.digit(), tag="number") +op = gll.string("+", tag="op") | gll.string("-", tag="op") + +expr = gll.seq(num, ws, op, ws, num, tag="expr") + +parser = gll.many(expr) diff --git a/examples/golike.py b/examples/golike.py new file mode 100644 index 0000000..2db6fb6 --- /dev/null +++ b/examples/golike.py @@ -0,0 +1,13 @@ +import gll + +ws = gll.many(gll.whitespace) + +function = function + +package_name = gll.seq(gll.skip(gll.string("package")), gl.skip(ws), + gll.regex(".*$"), tag="package_name") + +package = gll.seq(package_name, gll.many(function, tag="package_body"), + tag="package") + +parser = gll.all(package) diff --git a/gll/__init__.py b/gll/__init__.py new file mode 100644 index 0000000..5ef271d --- /dev/null +++ b/gll/__init__.py @@ -0,0 +1,2 @@ +from gll.parser import string, digit, many, many1, whitespace, skip, skipmany,\ + opt, regex, seq, all diff --git a/gll/parser.py b/gll/parser.py new file mode 100644 index 0000000..5059886 --- /dev/null +++ b/gll/parser.py @@ -0,0 +1,152 @@ +import re + +from gll.result import Success, Failure + +class Parser: + def __init__(self, fun, tag=None): + self.fun = fun + self.tag = tag + + def __call__(self, string): + return self.fun(string) + + def __or__(self, other): + def internal(string): + res = self(string) + + if res.valid: + return res + return other(string) + return Parser(internal) + + def __rshift__(self, into): + def internal(string): + res = self(string) + return Success(into(res.value), res.rest, self.tag) + return Parser(internal) + + +def string(match, tag=None): + def internal(string): + ln = len(match) + if len(string) < ln: + return Failure(string) + head = string[0:ln] + tail = string[ln:] + + if head == match: + return Success(head, tail, tag) + return Failure(string) + return Parser(internal, tag) + + +def regex(match, tag=None, reopts=None): + if not reopts: + reopts = [] + rx = regex.compile(match, *reopts) + def internal(string): + res = rx.match(string) + if res: + return Success(res.group(0), string[endpos+1:], tag) + return Failure(string) + return Parser(internal, tag) + + +def digit(tag=None): + def internal(string): + if not string: + return Failure(string) + head = string[0] + tail = string[1:] + if head.isdigit(): + return Success(head, tail, tag) + return Failure(string) + return Parser(internal, tag) + + +def many(parser, tag=None): + def internal(string): + res = parser(string) + + if not res.valid: + return Success("", string) + + resl = [] + while res.valid: + resl.append(res) + rest = res.rest + res = parser(rest) + return Success(resl, rest, tag) + return Parser(internal, tag) + + +def many1(parser, tag=None): + def internal(string): + res = parser(string) + + if not res.valid: + return res + + resl = [] + while res.valid: + resl.append(res) + rest = res.rest + res = parser(rest) + return Success(resl, rest, tag) + return Parser(internal, tag) + + +def whitespace(tag=None): + def internal(string): + head = string[0] + tail = string[1:] + if head.strip == "": + return Success(head, tail, tag) + return Failure(string) + return Parser(internal, tag) + + +def skip(parser, tag=None): + def internal(string): + res = parser(string) + + if not res.valid: + return res + return Success("", res.rest, tag) + return Parser(internal, tag) + + +def skipmany(parser, tag=None): + return many(skip(parser), tag) + + +def opt(parser, tag=None): + def internal(string): + res = parser(string) + if res.valid: + return res + return Success("", res.rest, tag) + return Parser(internal, tag) + + +def seq(*parsers, tag=None): + def internal(string): + resl = [] + rest = string + for parser in parsers: + res = parser(rest) + if not res.valid: + return Failure(rest) + rest = res.rest + resl.append(res) + return Success(resl, rest, tag) + return Parser(internal, tag) + +def all(parser, tag=None): + def internal(string): + res = parser(string) + + if res.valid and not res.rest: + return res + return Failure(res.rest) + return Parser(internal, tag) diff --git a/gll/result.py b/gll/result.py new file mode 100644 index 0000000..2621bd7 --- /dev/null +++ b/gll/result.py @@ -0,0 +1,72 @@ +class Token: + def __init__(self, value, tag): + self.value = value + self.tag = tag + + def __str__(self): + return "{}({})".format(self.tag or '', self.value) + + def __repr__(self): + return str(self) + + def __bool__(self): + return bool(self.value) + + +class Result: + def __init__(self): + raise NotImplemented("This is an abstract base class") + + +class Success(Result): + def __init__(self, value, rest, tag=None): + self.value = value + self.rest = rest + self.tag = tag + self.valid = True + + def __str__(self): + return "Result({}, {})".format(self.tag, self.value) + + def __repr__(self): + return str(self) + + def __bool__(self): + return bool(self.value) + + def tokens(self, purge=False, inner=False): + if type(self.value) == list: + for val in self.value: + if type(val) == Success: + if inner: + for tok in val.tokens(purge): + yield tok + else: + toks = list(val.tokens(purge, inner=True)) + if len(toks) == 1: + toks = toks[0] + yield Token(toks, tag=val.tag) + else: + if not purge or val: + if inner: + yield val + else: + yield Token(val, self.tag) + else: + if not purge or self.value: + if inner: + yield self.value + else: + yield Token(self.value, self.tag) + + +class Failure(Result): + def __init__(self, rest): + self.rest = rest + self.valid = False + + def __str__(self): + return "Failure(rest={})".format(self.rest) + + def __repr__(self): + return str(self)