initial
This commit is contained in:
7
README.md
Normal file
7
README.md
Normal file
@@ -0,0 +1,7 @@
|
||||
# c
|
||||
|
||||
An incredibly simple compiler from a Kaleidoscope-like language to C.
|
||||
|
||||
Inspired by Gary Bernhardt’s [screencast on building a compiler](https://www.destroyallsoftware.com/screencasts/catalog/a-compiler-from-scratch),
|
||||
but compiles to C and (kind of) supports strings, and including other source
|
||||
files. Refer to the [example file](/examples/thing.src) to see how it works.
|
247
c.py
Executable file
247
c.py
Executable file
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
import sys
|
||||
|
||||
class Token:
|
||||
def __init__(self, name, value):
|
||||
self.name = name
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return "Token(type='{}', value='{}')".format(self.name, self.value)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class DefNode:
|
||||
def __init__(self, name, args, body):
|
||||
self.name = name
|
||||
self.args = args
|
||||
self.body = body
|
||||
|
||||
def __str__(self):
|
||||
return "Def(name='{}', args='{}', body='{}')".format(self.name, self.args, self.body)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class CallNode:
|
||||
def __init__(self, name, exprs):
|
||||
self.name = name
|
||||
self.exprs = exprs
|
||||
|
||||
def __str__(self):
|
||||
return "Call(name='{}', exprs=[{}])".format(self.name, ", ".join(str(e) for e in self.exprs))
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class IntegerNode:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return "Integer(value={})".format(self.value)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
class StringNode:
|
||||
def __init__(self, value):
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return "String(value={})".format(self.value)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class VarNode:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __str__(self):
|
||||
return "Var(name={})".format(self.name)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class ImportNode:
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __str__(self):
|
||||
return "Import(name={})".format(self.name)
|
||||
|
||||
def __unicode__(self):
|
||||
return self.__str__()
|
||||
|
||||
|
||||
class Tokenizer:
|
||||
token_types = {
|
||||
"def": r"\bdef\b",
|
||||
"end": r"\bend\b",
|
||||
"import": r"\bimport\b",
|
||||
"identifier": r"\b[a-zA-Z]+\b",
|
||||
"string": r'"[^"]*"',
|
||||
"integer": r"\b[0-9]+\b",
|
||||
"open_paren": r"\(",
|
||||
"comma": r",",
|
||||
"close_paren": r"\)",
|
||||
}
|
||||
|
||||
def __init__(self, code):
|
||||
self.code = code
|
||||
self.token_types = {}
|
||||
|
||||
for token_name, token_re in Tokenizer.token_types.iteritems():
|
||||
self.token_types[token_name] = re.compile(token_re)
|
||||
|
||||
def tokenize(self):
|
||||
res = []
|
||||
while len(self.code):
|
||||
token = self.tokenize_once()
|
||||
if not token:
|
||||
return []
|
||||
res.append(token)
|
||||
self.code = self.code.strip()
|
||||
return res
|
||||
|
||||
def tokenize_once(self):
|
||||
for token_name, token_re in self.token_types.iteritems():
|
||||
match = token_re.match(self.code)
|
||||
if match:
|
||||
value = match.group()
|
||||
self.code = self.code[len(value):]
|
||||
return Token(token_name, value)
|
||||
|
||||
|
||||
class Parser:
|
||||
def __init__(self, tokens):
|
||||
self.tokens = tokens
|
||||
|
||||
def parse(self):
|
||||
res = []
|
||||
while len(self.tokens):
|
||||
if self.peek("import"):
|
||||
res.append(self.parse_import())
|
||||
else:
|
||||
res.append(self.parse_def())
|
||||
return res
|
||||
|
||||
def parse_def(self):
|
||||
self.consume("def")
|
||||
name = self.consume("identifier").value
|
||||
args = self.parse_arg_names()
|
||||
body = self.parse_expr()
|
||||
self.consume("end")
|
||||
return DefNode(name, args, body)
|
||||
|
||||
def parse_expr(self):
|
||||
if self.peek("integer"):
|
||||
return self.parse_integer()
|
||||
if self.peek("string"):
|
||||
return self.parse_string()
|
||||
if self.peek("open_paren", 1):
|
||||
return self.parse_call()
|
||||
return self.parse_variable()
|
||||
|
||||
def parse_import(self):
|
||||
self.consume("import")
|
||||
return ImportNode(self.consume("identifier").value)
|
||||
|
||||
def parse_variable(self):
|
||||
return VarNode(self.consume("identifier").value)
|
||||
|
||||
def parse_call(self):
|
||||
name = self.consume("identifier").value
|
||||
exprs = self.parse_arg_exprs()
|
||||
return CallNode(name, exprs)
|
||||
|
||||
def parse_arg_exprs(self):
|
||||
res = []
|
||||
self.consume("open_paren")
|
||||
if not self.peek("close_paren"):
|
||||
res.append(self.parse_expr())
|
||||
while self.peek("comma"):
|
||||
self.consume("comma")
|
||||
res.append(self.parse_expr())
|
||||
self.consume("close_paren")
|
||||
return res
|
||||
|
||||
def parse_integer(self):
|
||||
return IntegerNode(int(self.consume("integer").value))
|
||||
|
||||
def parse_string(self):
|
||||
return StringNode(self.consume("string").value)
|
||||
|
||||
def parse_arg_names(self):
|
||||
res = []
|
||||
self.consume("open_paren")
|
||||
if self.peek("identifier"):
|
||||
res.append(self.consume("identifier").value)
|
||||
while self.peek("comma"):
|
||||
self.consume("comma")
|
||||
res.append(self.consume("identifier").value)
|
||||
self.consume("close_paren")
|
||||
return res
|
||||
|
||||
def consume(self, expected):
|
||||
token = self.tokens.pop(0)
|
||||
|
||||
if token.name == expected:
|
||||
return token
|
||||
raise RuntimeError("Expected token type {} but got {}".format(expected, token.name))
|
||||
|
||||
def peek(self, expected, idx=0):
|
||||
return len(self.tokens) > idx and self.tokens[idx].name == expected
|
||||
|
||||
|
||||
class CodeGen:
|
||||
def generate_all(self, nodes):
|
||||
res = ""
|
||||
for node in nodes:
|
||||
res = "{}\n{}".format(res, self.generate_proto(node))
|
||||
|
||||
for node in nodes:
|
||||
res = "{}\n{}".format(res, self.generate(node))
|
||||
return res
|
||||
|
||||
def generate_proto(self, node):
|
||||
case = node.__class__
|
||||
if case == DefNode:
|
||||
args = ", ".join("int {}".format(n) for n in node.args)
|
||||
return "int {}({});".format(node.name, args)
|
||||
return ""
|
||||
|
||||
def generate(self, node):
|
||||
case = node.__class__
|
||||
if case == DefNode:
|
||||
args = ", ".join("int {}".format(n) for n in node.args)
|
||||
body = self.generate(node.body)
|
||||
return "int {}({}) {{ return {}; }}".format(node.name, args, body)
|
||||
if case == IntegerNode:
|
||||
return str(node.value)
|
||||
if case == CallNode:
|
||||
return "{}({})".format(node.name, ", ".join(self.generate(e) for e in node.exprs))
|
||||
if case == VarNode:
|
||||
return node.name
|
||||
if case == ImportNode:
|
||||
return "#include <{}.h>".format(node.name)
|
||||
if case == StringNode:
|
||||
return node.value
|
||||
raise RuntimeError("Unexpected node type: {}".format(case.__name__))
|
||||
|
||||
|
||||
with open(sys.argv[1]) as f:
|
||||
contents = f.read()
|
||||
tokens = Tokenizer(contents).tokenize()
|
||||
ast = Parser(tokens).parse()
|
||||
generated = CodeGen().generate_all(ast)
|
||||
print(generated)
|
13
examples/thing.src
Normal file
13
examples/thing.src
Normal file
@@ -0,0 +1,13 @@
|
||||
import stdio
|
||||
|
||||
def f(x, y, z)
|
||||
g(x, 2)
|
||||
end
|
||||
|
||||
def g(x, y)
|
||||
x
|
||||
end
|
||||
|
||||
def main()
|
||||
printf("hi %d!\n", f(1, 2, 3))
|
||||
end
|
Reference in New Issue
Block a user