initial; lexer and parser
This commit is contained in:
47
improved/tokenize.py
Normal file
47
improved/tokenize.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""The tokenizer"""
|
||||
from .lex import lex
|
||||
|
||||
RESERVED = 'RESERVED'
|
||||
INT = 'INT'
|
||||
ID = 'ID'
|
||||
|
||||
TOKENS = [
|
||||
(r'[ \\n\\t]+', None),
|
||||
(r'#[^\\n]*', None),
|
||||
(r'\:=', RESERVED),
|
||||
(r'\(', RESERVED),
|
||||
(r'\)', RESERVED),
|
||||
(r';', RESERVED),
|
||||
(r'\+', RESERVED),
|
||||
(r'-', RESERVED),
|
||||
(r'\*\*', RESERVED),
|
||||
(r'\*', RESERVED),
|
||||
(r'/', RESERVED),
|
||||
(r'<=', RESERVED),
|
||||
(r'<', RESERVED),
|
||||
(r'>=', RESERVED),
|
||||
(r'>', RESERVED),
|
||||
(r'=', RESERVED),
|
||||
(r'!=', RESERVED),
|
||||
(r'and', RESERVED),
|
||||
(r'or', RESERVED),
|
||||
(r'not', RESERVED),
|
||||
(r'if', RESERVED),
|
||||
(r'then', RESERVED),
|
||||
(r'else', RESERVED),
|
||||
(r'while', RESERVED),
|
||||
(r'do', RESERVED),
|
||||
(r'end', RESERVED),
|
||||
(r'[0-9]+', INT),
|
||||
(r'[A-Za-z][A-Za-z0-9_]*', ID),
|
||||
]
|
||||
|
||||
def tokenize(characters):
|
||||
"""
|
||||
Tokenizes the input.
|
||||
|
||||
characters -- the string to be tokenized
|
||||
|
||||
returns -- a list of tuples of the form (contents, tag)
|
||||
"""
|
||||
return lex(characters, TOKENS)
|
Reference in New Issue
Block a user