This commit is contained in:
2017-05-30 07:00:27 -04:00
commit fdeeb209b1
7 changed files with 193 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
_build/
*.byte
*.native

5
README.md Normal file
View File

@@ -0,0 +1,5 @@
# micro
A minimal compiler in ML, as seen in [this tutorial](http://troydm.github.io/blog/2014/03/29/writing-micro-compiler-in-ocaml/).
Work in progess.

47
src/codegen.ml Normal file
View File

@@ -0,0 +1,47 @@
type generator = { vars: (string, int) Hashtbl.t; file: string; chan: out_channel }
let new_generator file =
let fs = (Filename.chop_extension file) ^ ".s" in
{ vars=Hashtbl.create 100; file=fs; chan=open_out fs }
let close_generator g = close_out g.chan
let gen g v = output_string g.chan v; output_string g.chan "\n"
let bottom_var _ g =
Hashtbl.fold (fun _ v c -> if v >= c then (v+4) else c) g.vars 0
let empty_var s g i = (bottom_var s g) + 4 * (i - 1)
let var_addr s g v =
if String.length v > 6 && String.sub v 0 6 = "__temp"
then
let i = String.sub v 6 ((String.length v) - 6) in "[esp+" ^ i ^ "]"
else
try "[esp+" ^ string_of_int (Hashtbl.find g.vars v) ^ "]"
with Not_found -> syntax_error s ("identifier " ^ v ^ " not defined")
let var s g v = "dword " ^ (var_addr s g v)
let temp_var s g i =
Token.Identifier ("__temp" ^ (string_of_int (empty_var s g i)))
let is_alloc_var _ g v = Hashtbl.mem g.vars v
let alloc_var s g v =
if is_alloc_var s g v
then var s g v
else let _ = Hashtbl.replace g.vars v (empty_var s g 1) in var s g v
let token_var s g v =
match v with
| Token.Identifier i -> var s g i
| _ -> syntax_error s "identifier expected"
let unop g opcode a = gen g (" " ^ opcode ^ " " ^ a)
let binop g opcode a b = gen g (" " ^ opcode ^ " " ^ a ^ ", " ^ b)
let push g a = op g "push" a

26
src/compile.ml Normal file
View File

@@ -0,0 +1,26 @@
let compile file =
try
let g = Codegen.new_generator file in
let s = Stream.open_stream file in
let o = Filename.chop_extension file in
parse s g;
Stream.close_stream s;
Codegen.close_generator g;
let _ = Sys.command ("nasm -f macho " ^ g.file) in
let _ = Sys.command ("gcc -o " ^out ^ " " ^ out ^ ".o") in
()
with
| Syntax_error e ->
printf "syntax error: %s\n" e;
| Sys_error _ ->
print_string "no file found\n"
let help name = printf "%s <file>\n" name
let () =
if Array.length Sys.argv = 1
then help (Array.get Sys.argv 0)
else
let file = Array.get Sys.argv 1 in
printf "compiling %s\n" file
compile file

29
src/parse.ml Normal file
View File

@@ -0,0 +1,29 @@
let parse stm g =
let s = (Token.new_scanner stm) in
try
Token.program s g
with End_of_file ->
Token.syntax_error s "program reached end of file before end keyword"
let program s g =
if Token.match_token s Token.Begin then
let _ = generate_begin s g in
let _ = statements s g in
if Token.match_token s Token.End then
let _ = generate_end s g in ()
else Token.syntax_error s "program should end with end keyword"
else Token.syntax_error s "program should start with begin keyword"
let rec statements s g = if statement s g then statements s g else ()
let statement s g =
let t = next_token s in
if match t with
| Token.Read -> read s g
| Token.Write -> write s g
| Token.Identifier i -> assignment s g
| _ -> false
then
if Token.match_token s Token.Semicolon then true
else Token.syntax_error s "statement must end with semicolon"
else false

24
src/stream.ml Normal file
View File

@@ -0,0 +1,24 @@
type stream = { mutable chr: char option; mutable line_num: int; chan: in_channel }
let open_stream file = { chr=None; line_num=1; chan=open_in file }
let close_stream stm = close_in stm.chan
let read_char stm =
match stm.chr with
| None ->
let c = input_char stm.chan in
if c = '\n'
then let _ = stm.line_num <- stm.line_num + 1 in c
else c
| Some c -> stm.chr <- None; c
let unread_char stm c = stm.chr <- Some c
let is_digit c =
let code = Char.code c in code >= Char.code('0') && code <= Char.code('9')
let is_alpha c =
let code = Char.code c in
(code >= Char.code('A') && code <= Char.code('Z')) ||
(code >= Char.code('a') && code <= Char.code('z'))

59
src/token.ml Normal file
View File

@@ -0,0 +1,59 @@
type token = Begin
| End
| Identifier of string
| Read
| Write
| Literal of int
| Assign
| LeftParen
| RightParen
| Add
| Sub
| Comma
| Semicolon
type scanner = { mutable last_token: token option; stm: Stream.stream }
exception Syntax_error of string
let syntax_error s msg =
raise (Syntax_error (msg ^" on line " ^ (string_of_int s.stm.line_num)))
let rec skip_blank_chars stm =
let c = Stream.read_char stm in
match c with
| ' ' | '\t' | '\r' | '\n' -> skip_blank_chars stm
| _ -> Stream.unread_char stm c; ()
let scan s =
let stm = s.stm in
let c = Stream.read_char stm in
let rec scan_iden acc =
let nc = Stream.read_char stm in
if Stream.is_alpha nc || Stream.is_digit nc || nc='_'
then scan_iden (acc ^ (Char.escaped nc))
else let _ = Stream.unread_char stm nc in
let lc = String.lowercase acc in
if lc = "begin" then Begin
else if lc = "end" then End
else if lc = "read" then Read
else if lc = "write" then Write
else Identifier acc
in
let rec scan_lit acc =
let nc = Stream.read_char stm in
if Stream.is_digit nc
then scan_lit (acc ^ (Char.escaped nc))
else let _ = Stream.unread_char stm nc in
Literal (int_of_string acc)
in
if Stream.is_alpha c then scan_iden (Char.escaped c)
else if Stream.is_digit c then scan_lit (Char.escaped c)
else if c='+' then Add
else if c='-' then Sub
else if c=',' then Comma
else if c=';' then Semicolon
else if c='(' then LeftParen
else if c=')' then RightParen
else if c=':' && Stream.read_char stm = '=' then Assign
else syntax_error s "Could not identify token"