"""The tokenizer""" from .lex import lex RESERVED = 'RESERVED' INT = 'INT' ID = 'ID' TOKENS = [ (r'[ \n\t]+', None), (r'#[^\n]*', None), (r'\:=', RESERVED), (r'\(', RESERVED), (r'\)', RESERVED), (r';', RESERVED), (r'\+', RESERVED), (r'-', RESERVED), (r'\*\*', RESERVED), (r'\*', RESERVED), (r'/', RESERVED), (r'<=', RESERVED), (r'<', RESERVED), (r'>=', RESERVED), (r'>', RESERVED), (r'=', RESERVED), (r'!=', RESERVED), (r'read', RESERVED), (r'write', RESERVED), (r'and', RESERVED), (r'or', RESERVED), (r'not', RESERVED), (r'if', RESERVED), (r'then', RESERVED), (r'else', RESERVED), (r'while', RESERVED), (r'for', RESERVED), (r'do', RESERVED), (r'end', RESERVED), (r'[0-9]+', INT), (r'[A-Za-z][A-Za-z0-9_]*', ID), ] def tokenize(characters): """ Tokenizes the input. characters -- the string to be tokenized returns -- a list of tuples of the form (contents, tag) """ return lex(characters, TOKENS)