Files
c/c.py
2018-05-05 00:50:40 +02:00

248 lines
6.6 KiB
Python
Executable File

#!/usr/bin/env python
import re
import sys
class Token:
def __init__(self, name, value):
self.name = name
self.value = value
def __str__(self):
return "Token(type='{}', value='{}')".format(self.name, self.value)
def __unicode__(self):
return self.__str__()
class DefNode:
def __init__(self, name, args, body):
self.name = name
self.args = args
self.body = body
def __str__(self):
return "Def(name='{}', args='{}', body='{}')".format(self.name, self.args, self.body)
def __unicode__(self):
return self.__str__()
class CallNode:
def __init__(self, name, exprs):
self.name = name
self.exprs = exprs
def __str__(self):
return "Call(name='{}', exprs=[{}])".format(self.name, ", ".join(str(e) for e in self.exprs))
def __unicode__(self):
return self.__str__()
class IntegerNode:
def __init__(self, value):
self.value = value
def __str__(self):
return "Integer(value={})".format(self.value)
def __unicode__(self):
return self.__str__()
class StringNode:
def __init__(self, value):
self.value = value
def __str__(self):
return "String(value={})".format(self.value)
def __unicode__(self):
return self.__str__()
class VarNode:
def __init__(self, name):
self.name = name
def __str__(self):
return "Var(name={})".format(self.name)
def __unicode__(self):
return self.__str__()
class ImportNode:
def __init__(self, name):
self.name = name
def __str__(self):
return "Import(name={})".format(self.name)
def __unicode__(self):
return self.__str__()
class Tokenizer:
token_types = {
"def": r"\bdef\b",
"end": r"\bend\b",
"import": r"\bimport\b",
"identifier": r"\b[a-zA-Z]+\b",
"string": r'"[^"]*"',
"integer": r"\b[0-9]+\b",
"open_paren": r"\(",
"comma": r",",
"close_paren": r"\)",
}
def __init__(self, code):
self.code = code
self.token_types = {}
for token_name, token_re in Tokenizer.token_types.iteritems():
self.token_types[token_name] = re.compile(token_re)
def tokenize(self):
res = []
while len(self.code):
token = self.tokenize_once()
if not token:
return []
res.append(token)
self.code = self.code.strip()
return res
def tokenize_once(self):
for token_name, token_re in self.token_types.iteritems():
match = token_re.match(self.code)
if match:
value = match.group()
self.code = self.code[len(value):]
return Token(token_name, value)
class Parser:
def __init__(self, tokens):
self.tokens = tokens
def parse(self):
res = []
while len(self.tokens):
if self.peek("import"):
res.append(self.parse_import())
else:
res.append(self.parse_def())
return res
def parse_def(self):
self.consume("def")
name = self.consume("identifier").value
args = self.parse_arg_names()
body = self.parse_expr()
self.consume("end")
return DefNode(name, args, body)
def parse_expr(self):
if self.peek("integer"):
return self.parse_integer()
if self.peek("string"):
return self.parse_string()
if self.peek("open_paren", 1):
return self.parse_call()
return self.parse_variable()
def parse_import(self):
self.consume("import")
return ImportNode(self.consume("identifier").value)
def parse_variable(self):
return VarNode(self.consume("identifier").value)
def parse_call(self):
name = self.consume("identifier").value
exprs = self.parse_arg_exprs()
return CallNode(name, exprs)
def parse_arg_exprs(self):
res = []
self.consume("open_paren")
if not self.peek("close_paren"):
res.append(self.parse_expr())
while self.peek("comma"):
self.consume("comma")
res.append(self.parse_expr())
self.consume("close_paren")
return res
def parse_integer(self):
return IntegerNode(int(self.consume("integer").value))
def parse_string(self):
return StringNode(self.consume("string").value)
def parse_arg_names(self):
res = []
self.consume("open_paren")
if self.peek("identifier"):
res.append(self.consume("identifier").value)
while self.peek("comma"):
self.consume("comma")
res.append(self.consume("identifier").value)
self.consume("close_paren")
return res
def consume(self, expected):
token = self.tokens.pop(0)
if token.name == expected:
return token
raise RuntimeError("Expected token type {} but got {}".format(expected, token.name))
def peek(self, expected, idx=0):
return len(self.tokens) > idx and self.tokens[idx].name == expected
class CodeGen:
def generate_all(self, nodes):
res = ""
for node in nodes:
res = "{}\n{}".format(res, self.generate_proto(node))
for node in nodes:
res = "{}\n{}".format(res, self.generate(node))
return res
def generate_proto(self, node):
case = node.__class__
if case == DefNode:
args = ", ".join("int {}".format(n) for n in node.args)
return "int {}({});".format(node.name, args)
return ""
def generate(self, node):
case = node.__class__
if case == DefNode:
args = ", ".join("int {}".format(n) for n in node.args)
body = self.generate(node.body)
return "int {}({}) {{ return {}; }}".format(node.name, args, body)
if case == IntegerNode:
return str(node.value)
if case == CallNode:
return "{}({})".format(node.name, ", ".join(self.generate(e) for e in node.exprs))
if case == VarNode:
return node.name
if case == ImportNode:
return "#include <{}.h>".format(node.name)
if case == StringNode:
return node.value
raise RuntimeError("Unexpected node type: {}".format(case.__name__))
with open(sys.argv[1]) as f:
contents = f.read()
tokens = Tokenizer(contents).tokenize()
ast = Parser(tokens).parse()
generated = CodeGen().generate_all(ast)
print(generated)