commit b01b814dc9db0a223dfafd183958da5da894e4f1 Author: hellerve Date: Fri Apr 20 22:21:22 2018 +0000 make it so diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..2c059d7 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# rx + +`rx` is a toy regex engine. + +I followed along [a tutorial](http://dpk.io/dregs/toydregs) and translated it +into Python along the way. diff --git a/rx.py b/rx.py new file mode 100644 index 0000000..6c3b4bb --- /dev/null +++ b/rx.py @@ -0,0 +1,140 @@ +class Node(object): + def derive(self, char): + return NeverMatches + + +EmptyString = Node() +NeverMatches = Node() + + +class CharacterNode(Node): + def __init__(self, char, nxt): + self.char = char + self.nxt = nxt + + def derive(self, char): + if char == self.char: + return self.nxt + return NeverMatches + + +def new_alternation(alts): + _alts = [alt for alt in alts if alt != NeverMatches] + altsl = len(_alts) + + if altsl == 0: return NeverMatches + if altsl == 1: return _alts[0] + + return AlternationNode(_alts) + + +class AlternationNode(Node): + def __init__(self, alts): + self.alts = alts + + def derive(self, char): + return new_alternation([alt.derive(char) for alt in self.alts]) + + +class AnyNode(Node): + def __init__(self, nxt): + self.nxt = nxt + + def derive(self, char): + return self.nxt + + +class RepetitionNode(Node): + def __init__(self, nxt): + self.head = NeverMatches + self.nxt = nxt + + def derive(self, char): + return new_alternation([self.head.derive(char), self.nxt.derive(char)]) + + +class Or: + def __init__(self, alts): + self.alts = alts + + +class ZeroOrMore: + def __init__(self, rep): + self.rep = rep + + +class Any: + pass + + +def compile_str(s, tail=None): + if tail is None: + tail = EmptyString + + for char in reversed(s): + tail = CharacterNode(char, tail) + return tail + + +def compile_list(s, tail=None): + if tail is None: + tail = EmptyString + + for char in reversed(s): + tail = compile(char, tail) + return tail + + +def compile_or(or_, tail=None): + if tail is None: + tail = EmptyString + + return new_alternation([compile(alt, tail) for alt in or_.alts]) + + +def compile_zero_or_more(zero_or_more, tail=None): + if tail is None: + tail = EmptyString + + repetition = RepetitionNode(tail) + contents = compile(zero_or_more.rep, repetition) + repetition.head = contents + return repetition + + +def compile_any(tail=None): + if tail is None: + tail = EmptyString + + return AnyNode(tail) + + +def compile(expr, tail=None): + if tail is None: + tail = EmptyString + + if type(expr) is Or: return compile_or(expr, tail) + if type(expr) is ZeroOrMore: return compile_zero_or_more(expr, tail) + if type(expr) is Any: return compile_any(tail) + if type(expr) is str: return compile_str(expr, tail) + if type(expr) is list: return compile_list(expr, tail) + raise TypeError("{} is not a compilable type.".format(type(expr))) + + +class RE: + def __init__(self, reg): + self.start = compile(reg) + + def match(self, s): + state = self.start + ls = len(s) + + if (not ls) and state is EmptyString: return True + + for i, char in enumerate(s): + state = state.derive(char) + + if state is EmptyString: return i == ls-1 + if state is NeverMatches: return False + + return False