diff --git a/README.md b/README.md index 45f92cb..1cc93e7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ # sc A simple Scheme to C compiler, in C. + +For now it can only compile very simple programs like [this](/examples/test.lisp). diff --git a/assets/prelude.h b/assets/prelude.h new file mode 100644 index 0000000..cfe03be --- /dev/null +++ b/assets/prelude.h @@ -0,0 +1,116 @@ +#include +#include + +struct Int; +struct String; +struct Closure; +union Value; + +enum Tag { VOID, INT, STRING, CLOSURE, CELL, ENV }; + +typedef union Value (*Lambda)() ; + +struct Int { + enum Tag t; + int value; +}; + +struct String { + enum Tag t; + char* value; +}; + +struct Closure { + enum Tag t; + Lambda lam; + void* env; +} ; + +struct Env { + enum Tag t; + void* env; +}; + +struct Cell { + enum Tag t; + union Value* addr; +} ; + +union Value { + enum Tag t; + struct Int z; + struct String s; + struct Closure clo; + struct Env env; + struct Cell cell; +} ; + +typedef union Value Value; + +static Value MakeClosure(Lambda lam, Value env) { + Value v; + v.clo.t = CLOSURE; + v.clo.lam = lam; + v.clo.env = env.env.env; + return v; +} + +static Value MakeInt(int n) { + Value v; + v.z.t = INT; + v.z.value = n; + return v; +} + +static Value MakeString(char* s) { + Value v; + v.s.t = STRING; + v.s.value = s; + return v; +} + +static Value MakePrimitive(Lambda prim) { + Value v; + v.clo.t = CLOSURE; + v.clo.lam = prim; + v.clo.env = NULL; + return v; +} + +static Value MakeEnv(void* env) { + Value v; + v.env.t = ENV; + v.env.env = env; + return v; +} + + +static Value NewCell(Value initialValue) { + Value v; + v.cell.t = CELL; + v.cell.addr = malloc(sizeof(Value)); + *v.cell.addr = initialValue; + return v; +} + + +Value sum(Value a, Value b) { + return MakeInt(a.z.value + b.z.value); +} + +Value product(Value a, Value b) { + return MakeInt(a.z.value * b.z.value); +} + +Value difference(Value a, Value b) { + return MakeInt(a.z.value - b.z.value); +} + +Value display(Value v) { + printf("%i\n",v.z.value); + return v; +} + +Value numEqual(Value a, Value b) { + return MakeInt(a.z.value == b.z.value); +} diff --git a/examples/test.lisp b/examples/test.lisp new file mode 100644 index 0000000..1b9f966 --- /dev/null +++ b/examples/test.lisp @@ -0,0 +1,3 @@ +(define x 1) + +(display (sum x 1)) diff --git a/main.c b/main.c index 6794099..ac0972a 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,4 @@ +#include "src/compiler.h" #include "src/parser.h" int main(int argc, char** argv) { @@ -26,7 +27,9 @@ int main(int argc, char** argv) { inp[size] = '\0'; - sc_ast_print(sc_parse(inp)); + sc_ast* ast = sc_parse(inp); + + printf("%s\n", compile(ast)); return 0; } diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..ca9afb4 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,21 @@ +#pragma once +enum tag { + PSEUDO, + ATOM, + LIST, + INT, + FLOAT, + STRING, + QUOTED, +}; + + +typedef struct sc_ast { + short tag; + char* value; + + int n_children; + struct sc_ast** children; +} sc_ast; + + diff --git a/src/compiler.c b/src/compiler.c new file mode 100644 index 0000000..a64f1e0 --- /dev/null +++ b/src/compiler.c @@ -0,0 +1,126 @@ +#include "compiler.h" + +char* get_prelude() { + char* str; + size_t size; + FILE* p = fopen("./assets/prelude.h", "r"); + + if (!p) { + puts("Error while opening the prelude file."); + exit(1); + } + + fseek(p, 0, SEEK_END); + size = ftell(p); + fseek(p, 0, SEEK_SET); + + str = malloc(size+1); + fread(str, size, 1, p); + fclose(p); + + str[size] = '\0'; + + return str; +} + +char* compile_expr(sc_ast* ast); + +char* compile_define(sc_ast* ast) { + char* rhs = compile_expr(ast->children[2]); + char* name = ast->children[1]->value; + char* res = malloc(strlen(rhs)+strlen(name)+10); + sprintf(res, "Value %s = %s", name, rhs); + return res; +} + +char* compile_list(sc_ast* ast) { + if (!strcmp(ast->children[0]->value, "define")) return compile_define(ast); + + int i; + char* tmp; + char* res = malloc(strlen(ast->children[0]->value)+4); + sprintf(res, "%s(", ast->children[0]->value); + int reslen = strlen(res)+1; + + tmp = compile_expr(ast->children[1]); + reslen += strlen(tmp)+2; + res = realloc(res, reslen); + snprintf(res, reslen, "%s%s", res, tmp); + + for (i = 2; i < ast->n_children; i++) { + tmp = compile_expr(ast->children[i]); + reslen += strlen(tmp)+2; + res = realloc(res, reslen); + snprintf(res, reslen, "%s, %s", res, tmp); + } + + res = realloc(res, reslen+1); + snprintf(res, reslen+1, "%s)", res); + + return res; +} + +char* compile(sc_ast* ast) { + int i; + char* tmp; + char* main = malloc(35); + strcpy(main, "int main(int argc, char** argv) {"); + int mainlen = strlen(main)+1; + char* res = get_prelude(); + int reslen = strlen(res)+1; + + for (i = 0; i < ast->n_children; i++) { + tmp = compile_expr(ast->children[i]); + if (!tmp) continue; + mainlen += strlen(tmp)+1; + main = realloc(main, mainlen); + snprintf(main, mainlen, "%s;%s", main, tmp); + } + + res = realloc(res, reslen+mainlen+2); + snprintf(res, reslen+mainlen+2, "%s;%s;}", res, main); + + return res; +} + +char* compile_expr(sc_ast* ast) { + char* ret = NULL; + char* tmp; + int i; + int retlen = 1; + + switch (ast->tag) { + case INT: + tmp = malloc(strlen(ast->value)+10); + sprintf(tmp, "MakeInt(%s)", ast->value); + return tmp; + case FLOAT: + tmp = malloc(strlen(ast->value)+10); + sprintf(tmp, "MakeInt(%s)", ast->value); + return tmp; + case ATOM: + return ast->value; + case STRING: + ret = malloc(strlen(ast->value)+3); + sprintf(ret, "\"%s\"", ast->value); + return ret; + case LIST: + return compile_list(ast); + case PSEUDO: + for (i = 0; i < ast->n_children; i++) { + tmp = compile(ast->children[i]); + if (!tmp) continue; + retlen += strlen(tmp); + if (ret) { + ret = realloc(ret, retlen); + snprintf(ret, retlen, "%s%s", ret, tmp); + } else { + ret = tmp; + } + } + return ret; + default: + printf("Unknown AST tag: %d\n", ast->tag); + exit(1); + } +} diff --git a/src/compiler.h b/src/compiler.h new file mode 100644 index 0000000..58c13cd --- /dev/null +++ b/src/compiler.h @@ -0,0 +1,7 @@ +#include +#include +#include + +#include "ast.h" + +char* compile(sc_ast*); diff --git a/src/parser.c b/src/parser.c index d1fc7cf..96ea2c1 100644 --- a/src/parser.c +++ b/src/parser.c @@ -23,7 +23,7 @@ char** tokenize(char* str) { str++; }else { t = str; - while (*t && *t != ' ' && *t != '(' && *t != ')') ++t; + while (*t && *t != ' ' && *t != '\n' && *t != '\t' && *t != '(' && *t != ')') ++t; tokens[len-1] = malloc(t-str+1); strncpy(tokens[len-1], str, t-str); tokens[len-1][t-str] = '\0'; @@ -91,7 +91,7 @@ parse_state* read_tokens(parse_state* state) { state->tokens++; int n = state->node->n_children++; - state->node->children = realloc(state->node->children, n+1); + state->node->children = realloc(state->node->children, (n+1)*sizeof(sc_ast)); if (!strncmp(token, "(", 2)) { sc_ast* list = make_list(); @@ -104,7 +104,7 @@ parse_state* read_tokens(parse_state* state) { } state->node->children[n] = nstate->node; - state->tokens = nstate->tokens; + state->tokens = ++nstate->tokens; free(nstate); } else { state->node->children[n] = read_token(token); @@ -119,9 +119,9 @@ sc_ast* sc_parse(char* input) { state->tokens = tokens; state->node = make_pseudo(); - state = read_tokens(state); + while (state->tokens[0]) state = read_tokens(state); - return state->node->children[0]; + return state->node; } const char* tag_to_string(int x) { diff --git a/src/parser.h b/src/parser.h index d592c7f..ac43b46 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,24 +3,7 @@ #include #include -enum tag { - PSEUDO, - ATOM, - LIST, - INT, - FLOAT, - STRING, -}; - - -typedef struct sc_ast { - short tag; - char* value; - - int n_children; - struct sc_ast** children; -} sc_ast; - +#include "ast.h" sc_ast* sc_parse(char*); void sc_ast_print(sc_ast*);