make it so

This commit is contained in:
2018-04-20 22:21:22 +00:00
commit b01b814dc9
3 changed files with 147 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
__pycache__/

6
README.md Normal file
View File

@@ -0,0 +1,6 @@
# rx
`rx` is a toy regex engine.
I followed along [a tutorial](http://dpk.io/dregs/toydregs) and translated it
into Python along the way.

140
rx.py Normal file
View File

@@ -0,0 +1,140 @@
class Node(object):
def derive(self, char):
return NeverMatches
EmptyString = Node()
NeverMatches = Node()
class CharacterNode(Node):
def __init__(self, char, nxt):
self.char = char
self.nxt = nxt
def derive(self, char):
if char == self.char:
return self.nxt
return NeverMatches
def new_alternation(alts):
_alts = [alt for alt in alts if alt != NeverMatches]
altsl = len(_alts)
if altsl == 0: return NeverMatches
if altsl == 1: return _alts[0]
return AlternationNode(_alts)
class AlternationNode(Node):
def __init__(self, alts):
self.alts = alts
def derive(self, char):
return new_alternation([alt.derive(char) for alt in self.alts])
class AnyNode(Node):
def __init__(self, nxt):
self.nxt = nxt
def derive(self, char):
return self.nxt
class RepetitionNode(Node):
def __init__(self, nxt):
self.head = NeverMatches
self.nxt = nxt
def derive(self, char):
return new_alternation([self.head.derive(char), self.nxt.derive(char)])
class Or:
def __init__(self, alts):
self.alts = alts
class ZeroOrMore:
def __init__(self, rep):
self.rep = rep
class Any:
pass
def compile_str(s, tail=None):
if tail is None:
tail = EmptyString
for char in reversed(s):
tail = CharacterNode(char, tail)
return tail
def compile_list(s, tail=None):
if tail is None:
tail = EmptyString
for char in reversed(s):
tail = compile(char, tail)
return tail
def compile_or(or_, tail=None):
if tail is None:
tail = EmptyString
return new_alternation([compile(alt, tail) for alt in or_.alts])
def compile_zero_or_more(zero_or_more, tail=None):
if tail is None:
tail = EmptyString
repetition = RepetitionNode(tail)
contents = compile(zero_or_more.rep, repetition)
repetition.head = contents
return repetition
def compile_any(tail=None):
if tail is None:
tail = EmptyString
return AnyNode(tail)
def compile(expr, tail=None):
if tail is None:
tail = EmptyString
if type(expr) is Or: return compile_or(expr, tail)
if type(expr) is ZeroOrMore: return compile_zero_or_more(expr, tail)
if type(expr) is Any: return compile_any(tail)
if type(expr) is str: return compile_str(expr, tail)
if type(expr) is list: return compile_list(expr, tail)
raise TypeError("{} is not a compilable type.".format(type(expr)))
class RE:
def __init__(self, reg):
self.start = compile(reg)
def match(self, s):
state = self.start
ls = len(s)
if (not ls) and state is EmptyString: return True
for i, char in enumerate(s):
state = state.derive(char)
if state is EmptyString: return i == ls-1
if state is NeverMatches: return False
return False