initial; lexer and parser

This commit is contained in:
hellerve
2015-07-07 13:51:15 +02:00
parent 0bc4bc1616
commit f9c1f2a45d
7 changed files with 490 additions and 0 deletions

33
improved/lex.py Normal file
View File

@@ -0,0 +1,33 @@
"""The lexer"""
import sys
import re
def lex(characters, token_exprs):
"""
A somewhat generic lexer.
characters -- the string to be lexed
token_exprs -- the tokens that consitute our grammar
returns -- a list of tokens of the form (contents, tag)
"""
pos = 0
tokens = []
while pos < len(characters):
match = None
for token_expr in token_exprs:
pattern, tag = token_expr
regex = re.compile(pattern)
match = regex.match(characters, pos)
if match:
text = match.group(0)
if tag:
token = (text, tag)
tokens.append(token)
break
if not match:
sys.stderr.write('[Parser] Illegal character: %s\\n' % characters[pos])
raise ValueError(characters[pos])
else:
pos = match.end(0)
return tokens