make it so
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
||||
__pycache__/
|
6
README.md
Normal file
6
README.md
Normal file
@@ -0,0 +1,6 @@
|
||||
# rx
|
||||
|
||||
`rx` is a toy regex engine.
|
||||
|
||||
I followed along [a tutorial](http://dpk.io/dregs/toydregs) and translated it
|
||||
into Python along the way.
|
140
rx.py
Normal file
140
rx.py
Normal file
@@ -0,0 +1,140 @@
|
||||
class Node(object):
|
||||
def derive(self, char):
|
||||
return NeverMatches
|
||||
|
||||
|
||||
EmptyString = Node()
|
||||
NeverMatches = Node()
|
||||
|
||||
|
||||
class CharacterNode(Node):
|
||||
def __init__(self, char, nxt):
|
||||
self.char = char
|
||||
self.nxt = nxt
|
||||
|
||||
def derive(self, char):
|
||||
if char == self.char:
|
||||
return self.nxt
|
||||
return NeverMatches
|
||||
|
||||
|
||||
def new_alternation(alts):
|
||||
_alts = [alt for alt in alts if alt != NeverMatches]
|
||||
altsl = len(_alts)
|
||||
|
||||
if altsl == 0: return NeverMatches
|
||||
if altsl == 1: return _alts[0]
|
||||
|
||||
return AlternationNode(_alts)
|
||||
|
||||
|
||||
class AlternationNode(Node):
|
||||
def __init__(self, alts):
|
||||
self.alts = alts
|
||||
|
||||
def derive(self, char):
|
||||
return new_alternation([alt.derive(char) for alt in self.alts])
|
||||
|
||||
|
||||
class AnyNode(Node):
|
||||
def __init__(self, nxt):
|
||||
self.nxt = nxt
|
||||
|
||||
def derive(self, char):
|
||||
return self.nxt
|
||||
|
||||
|
||||
class RepetitionNode(Node):
|
||||
def __init__(self, nxt):
|
||||
self.head = NeverMatches
|
||||
self.nxt = nxt
|
||||
|
||||
def derive(self, char):
|
||||
return new_alternation([self.head.derive(char), self.nxt.derive(char)])
|
||||
|
||||
|
||||
class Or:
|
||||
def __init__(self, alts):
|
||||
self.alts = alts
|
||||
|
||||
|
||||
class ZeroOrMore:
|
||||
def __init__(self, rep):
|
||||
self.rep = rep
|
||||
|
||||
|
||||
class Any:
|
||||
pass
|
||||
|
||||
|
||||
def compile_str(s, tail=None):
|
||||
if tail is None:
|
||||
tail = EmptyString
|
||||
|
||||
for char in reversed(s):
|
||||
tail = CharacterNode(char, tail)
|
||||
return tail
|
||||
|
||||
|
||||
def compile_list(s, tail=None):
|
||||
if tail is None:
|
||||
tail = EmptyString
|
||||
|
||||
for char in reversed(s):
|
||||
tail = compile(char, tail)
|
||||
return tail
|
||||
|
||||
|
||||
def compile_or(or_, tail=None):
|
||||
if tail is None:
|
||||
tail = EmptyString
|
||||
|
||||
return new_alternation([compile(alt, tail) for alt in or_.alts])
|
||||
|
||||
|
||||
def compile_zero_or_more(zero_or_more, tail=None):
|
||||
if tail is None:
|
||||
tail = EmptyString
|
||||
|
||||
repetition = RepetitionNode(tail)
|
||||
contents = compile(zero_or_more.rep, repetition)
|
||||
repetition.head = contents
|
||||
return repetition
|
||||
|
||||
|
||||
def compile_any(tail=None):
|
||||
if tail is None:
|
||||
tail = EmptyString
|
||||
|
||||
return AnyNode(tail)
|
||||
|
||||
|
||||
def compile(expr, tail=None):
|
||||
if tail is None:
|
||||
tail = EmptyString
|
||||
|
||||
if type(expr) is Or: return compile_or(expr, tail)
|
||||
if type(expr) is ZeroOrMore: return compile_zero_or_more(expr, tail)
|
||||
if type(expr) is Any: return compile_any(tail)
|
||||
if type(expr) is str: return compile_str(expr, tail)
|
||||
if type(expr) is list: return compile_list(expr, tail)
|
||||
raise TypeError("{} is not a compilable type.".format(type(expr)))
|
||||
|
||||
|
||||
class RE:
|
||||
def __init__(self, reg):
|
||||
self.start = compile(reg)
|
||||
|
||||
def match(self, s):
|
||||
state = self.start
|
||||
ls = len(s)
|
||||
|
||||
if (not ls) and state is EmptyString: return True
|
||||
|
||||
for i, char in enumerate(s):
|
||||
state = state.derive(char)
|
||||
|
||||
if state is EmptyString: return i == ls-1
|
||||
if state is NeverMatches: return False
|
||||
|
||||
return False
|
Reference in New Issue
Block a user