#!/usr/bin/env python import re import sys class Token: def __init__(self, name, value): self.name = name self.value = value def __str__(self): return "Token(type='{}', value='{}')".format(self.name, self.value) def __unicode__(self): return self.__str__() class DefNode: def __init__(self, name, args, body): self.name = name self.args = args self.body = body def __str__(self): return "Def(name='{}', args='{}', body='{}')".format(self.name, self.args, self.body) def __unicode__(self): return self.__str__() class CallNode: def __init__(self, name, exprs): self.name = name self.exprs = exprs def __str__(self): return "Call(name='{}', exprs=[{}])".format(self.name, ", ".join(str(e) for e in self.exprs)) def __unicode__(self): return self.__str__() class IntegerNode: def __init__(self, value): self.value = value def __str__(self): return "Integer(value={})".format(self.value) def __unicode__(self): return self.__str__() class StringNode: def __init__(self, value): self.value = value def __str__(self): return "String(value={})".format(self.value) def __unicode__(self): return self.__str__() class VarNode: def __init__(self, name): self.name = name def __str__(self): return "Var(name={})".format(self.name) def __unicode__(self): return self.__str__() class ImportNode: def __init__(self, name): self.name = name def __str__(self): return "Import(name={})".format(self.name) def __unicode__(self): return self.__str__() class Tokenizer: token_types = { "def": r"\bdef\b", "end": r"\bend\b", "import": r"\bimport\b", "identifier": r"\b[a-zA-Z]+\b", "string": r'"[^"]*"', "integer": r"\b[0-9]+\b", "open_paren": r"\(", "comma": r",", "close_paren": r"\)", } def __init__(self, code): self.code = code self.token_types = {} for token_name, token_re in Tokenizer.token_types.iteritems(): self.token_types[token_name] = re.compile(token_re) def tokenize(self): res = [] while len(self.code): token = self.tokenize_once() if not token: return [] res.append(token) self.code = self.code.strip() return res def tokenize_once(self): for token_name, token_re in self.token_types.iteritems(): match = token_re.match(self.code) if match: value = match.group() self.code = self.code[len(value):] return Token(token_name, value) class Parser: def __init__(self, tokens): self.tokens = tokens def parse(self): res = [] while len(self.tokens): if self.peek("import"): res.append(self.parse_import()) else: res.append(self.parse_def()) return res def parse_def(self): self.consume("def") name = self.consume("identifier").value args = self.parse_arg_names() body = self.parse_expr() self.consume("end") return DefNode(name, args, body) def parse_expr(self): if self.peek("integer"): return self.parse_integer() if self.peek("string"): return self.parse_string() if self.peek("open_paren", 1): return self.parse_call() return self.parse_variable() def parse_import(self): self.consume("import") return ImportNode(self.consume("identifier").value) def parse_variable(self): return VarNode(self.consume("identifier").value) def parse_call(self): name = self.consume("identifier").value exprs = self.parse_arg_exprs() return CallNode(name, exprs) def parse_arg_exprs(self): res = [] self.consume("open_paren") if not self.peek("close_paren"): res.append(self.parse_expr()) while self.peek("comma"): self.consume("comma") res.append(self.parse_expr()) self.consume("close_paren") return res def parse_integer(self): return IntegerNode(int(self.consume("integer").value)) def parse_string(self): return StringNode(self.consume("string").value) def parse_arg_names(self): res = [] self.consume("open_paren") if self.peek("identifier"): res.append(self.consume("identifier").value) while self.peek("comma"): self.consume("comma") res.append(self.consume("identifier").value) self.consume("close_paren") return res def consume(self, expected): token = self.tokens.pop(0) if token.name == expected: return token raise RuntimeError("Expected token type {} but got {}".format(expected, token.name)) def peek(self, expected, idx=0): return len(self.tokens) > idx and self.tokens[idx].name == expected class CodeGen: def generate_all(self, nodes): res = "" for node in nodes: res = "{}\n{}".format(res, self.generate_proto(node)) for node in nodes: res = "{}\n{}".format(res, self.generate(node)) return res def generate_head(self, node): args = ", ".join("int {}".format(n) for n in node.args) return "int {}({})".format(node.name, args) def generate_proto(self, node): case = node.__class__ if case == DefNode: return "{};".format(self.generate_head(node)) return "" def generate(self, node): case = node.__class__ if case == DefNode: body = self.generate(node.body) return "{} {{ return {}; }}".format(self.generate_head(node), body) if case == IntegerNode: return str(node.value) if case == CallNode: return "{}({})".format(node.name, ", ".join(self.generate(e) for e in node.exprs)) if case == VarNode: return node.name if case == ImportNode: return "#include <{}.h>".format(node.name) if case == StringNode: return node.value raise RuntimeError("Unexpected node type: {}".format(case.__name__)) with open(sys.argv[1]) as f: contents = f.read() tokens = Tokenizer(contents).tokenize() ast = Parser(tokens).parse() generated = CodeGen().generate_all(ast) print(generated)