initial; lexer and parser
This commit is contained in:
33
improved/lex.py
Normal file
33
improved/lex.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""The lexer"""
|
||||
import sys
|
||||
import re
|
||||
|
||||
def lex(characters, token_exprs):
|
||||
"""
|
||||
A somewhat generic lexer.
|
||||
|
||||
characters -- the string to be lexed
|
||||
token_exprs -- the tokens that consitute our grammar
|
||||
|
||||
returns -- a list of tokens of the form (contents, tag)
|
||||
"""
|
||||
pos = 0
|
||||
tokens = []
|
||||
while pos < len(characters):
|
||||
match = None
|
||||
for token_expr in token_exprs:
|
||||
pattern, tag = token_expr
|
||||
regex = re.compile(pattern)
|
||||
match = regex.match(characters, pos)
|
||||
if match:
|
||||
text = match.group(0)
|
||||
if tag:
|
||||
token = (text, tag)
|
||||
tokens.append(token)
|
||||
break
|
||||
if not match:
|
||||
sys.stderr.write('[Parser] Illegal character: %s\\n' % characters[pos])
|
||||
raise ValueError(characters[pos])
|
||||
else:
|
||||
pos = match.end(0)
|
||||
return tokens
|
Reference in New Issue
Block a user