initial
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
_build/
|
||||
*.byte
|
||||
*.native
|
5
README.md
Normal file
5
README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
# micro
|
||||
|
||||
A minimal compiler in ML, as seen in [this tutorial](http://troydm.github.io/blog/2014/03/29/writing-micro-compiler-in-ocaml/).
|
||||
|
||||
Work in progess.
|
47
src/codegen.ml
Normal file
47
src/codegen.ml
Normal file
@@ -0,0 +1,47 @@
|
||||
type generator = { vars: (string, int) Hashtbl.t; file: string; chan: out_channel }
|
||||
|
||||
let new_generator file =
|
||||
let fs = (Filename.chop_extension file) ^ ".s" in
|
||||
{ vars=Hashtbl.create 100; file=fs; chan=open_out fs }
|
||||
|
||||
let close_generator g = close_out g.chan
|
||||
|
||||
let gen g v = output_string g.chan v; output_string g.chan "\n"
|
||||
|
||||
let bottom_var _ g =
|
||||
Hashtbl.fold (fun _ v c -> if v >= c then (v+4) else c) g.vars 0
|
||||
|
||||
let empty_var s g i = (bottom_var s g) + 4 * (i - 1)
|
||||
|
||||
let var_addr s g v =
|
||||
if String.length v > 6 && String.sub v 0 6 = "__temp"
|
||||
then
|
||||
let i = String.sub v 6 ((String.length v) - 6) in "[esp+" ^ i ^ "]"
|
||||
else
|
||||
try "[esp+" ^ string_of_int (Hashtbl.find g.vars v) ^ "]"
|
||||
with Not_found -> syntax_error s ("identifier " ^ v ^ " not defined")
|
||||
|
||||
let var s g v = "dword " ^ (var_addr s g v)
|
||||
|
||||
let temp_var s g i =
|
||||
Token.Identifier ("__temp" ^ (string_of_int (empty_var s g i)))
|
||||
|
||||
let is_alloc_var _ g v = Hashtbl.mem g.vars v
|
||||
|
||||
let alloc_var s g v =
|
||||
if is_alloc_var s g v
|
||||
then var s g v
|
||||
else let _ = Hashtbl.replace g.vars v (empty_var s g 1) in var s g v
|
||||
|
||||
let token_var s g v =
|
||||
match v with
|
||||
| Token.Identifier i -> var s g i
|
||||
| _ -> syntax_error s "identifier expected"
|
||||
|
||||
let unop g opcode a = gen g (" " ^ opcode ^ " " ^ a)
|
||||
|
||||
let binop g opcode a b = gen g (" " ^ opcode ^ " " ^ a ^ ", " ^ b)
|
||||
|
||||
let push g a = op g "push" a
|
||||
|
||||
|
26
src/compile.ml
Normal file
26
src/compile.ml
Normal file
@@ -0,0 +1,26 @@
|
||||
let compile file =
|
||||
try
|
||||
let g = Codegen.new_generator file in
|
||||
let s = Stream.open_stream file in
|
||||
let o = Filename.chop_extension file in
|
||||
parse s g;
|
||||
Stream.close_stream s;
|
||||
Codegen.close_generator g;
|
||||
let _ = Sys.command ("nasm -f macho " ^ g.file) in
|
||||
let _ = Sys.command ("gcc -o " ^out ^ " " ^ out ^ ".o") in
|
||||
()
|
||||
with
|
||||
| Syntax_error e ->
|
||||
printf "syntax error: %s\n" e;
|
||||
| Sys_error _ ->
|
||||
print_string "no file found\n"
|
||||
|
||||
let help name = printf "%s <file>\n" name
|
||||
|
||||
let () =
|
||||
if Array.length Sys.argv = 1
|
||||
then help (Array.get Sys.argv 0)
|
||||
else
|
||||
let file = Array.get Sys.argv 1 in
|
||||
printf "compiling %s\n" file
|
||||
compile file
|
29
src/parse.ml
Normal file
29
src/parse.ml
Normal file
@@ -0,0 +1,29 @@
|
||||
let parse stm g =
|
||||
let s = (Token.new_scanner stm) in
|
||||
try
|
||||
Token.program s g
|
||||
with End_of_file ->
|
||||
Token.syntax_error s "program reached end of file before end keyword"
|
||||
|
||||
let program s g =
|
||||
if Token.match_token s Token.Begin then
|
||||
let _ = generate_begin s g in
|
||||
let _ = statements s g in
|
||||
if Token.match_token s Token.End then
|
||||
let _ = generate_end s g in ()
|
||||
else Token.syntax_error s "program should end with end keyword"
|
||||
else Token.syntax_error s "program should start with begin keyword"
|
||||
|
||||
let rec statements s g = if statement s g then statements s g else ()
|
||||
|
||||
let statement s g =
|
||||
let t = next_token s in
|
||||
if match t with
|
||||
| Token.Read -> read s g
|
||||
| Token.Write -> write s g
|
||||
| Token.Identifier i -> assignment s g
|
||||
| _ -> false
|
||||
then
|
||||
if Token.match_token s Token.Semicolon then true
|
||||
else Token.syntax_error s "statement must end with semicolon"
|
||||
else false
|
24
src/stream.ml
Normal file
24
src/stream.ml
Normal file
@@ -0,0 +1,24 @@
|
||||
type stream = { mutable chr: char option; mutable line_num: int; chan: in_channel }
|
||||
|
||||
let open_stream file = { chr=None; line_num=1; chan=open_in file }
|
||||
|
||||
let close_stream stm = close_in stm.chan
|
||||
|
||||
let read_char stm =
|
||||
match stm.chr with
|
||||
| None ->
|
||||
let c = input_char stm.chan in
|
||||
if c = '\n'
|
||||
then let _ = stm.line_num <- stm.line_num + 1 in c
|
||||
else c
|
||||
| Some c -> stm.chr <- None; c
|
||||
|
||||
let unread_char stm c = stm.chr <- Some c
|
||||
|
||||
let is_digit c =
|
||||
let code = Char.code c in code >= Char.code('0') && code <= Char.code('9')
|
||||
|
||||
let is_alpha c =
|
||||
let code = Char.code c in
|
||||
(code >= Char.code('A') && code <= Char.code('Z')) ||
|
||||
(code >= Char.code('a') && code <= Char.code('z'))
|
59
src/token.ml
Normal file
59
src/token.ml
Normal file
@@ -0,0 +1,59 @@
|
||||
type token = Begin
|
||||
| End
|
||||
| Identifier of string
|
||||
| Read
|
||||
| Write
|
||||
| Literal of int
|
||||
| Assign
|
||||
| LeftParen
|
||||
| RightParen
|
||||
| Add
|
||||
| Sub
|
||||
| Comma
|
||||
| Semicolon
|
||||
|
||||
type scanner = { mutable last_token: token option; stm: Stream.stream }
|
||||
|
||||
exception Syntax_error of string
|
||||
|
||||
let syntax_error s msg =
|
||||
raise (Syntax_error (msg ^" on line " ^ (string_of_int s.stm.line_num)))
|
||||
|
||||
let rec skip_blank_chars stm =
|
||||
let c = Stream.read_char stm in
|
||||
match c with
|
||||
| ' ' | '\t' | '\r' | '\n' -> skip_blank_chars stm
|
||||
| _ -> Stream.unread_char stm c; ()
|
||||
|
||||
let scan s =
|
||||
let stm = s.stm in
|
||||
let c = Stream.read_char stm in
|
||||
let rec scan_iden acc =
|
||||
let nc = Stream.read_char stm in
|
||||
if Stream.is_alpha nc || Stream.is_digit nc || nc='_'
|
||||
then scan_iden (acc ^ (Char.escaped nc))
|
||||
else let _ = Stream.unread_char stm nc in
|
||||
let lc = String.lowercase acc in
|
||||
if lc = "begin" then Begin
|
||||
else if lc = "end" then End
|
||||
else if lc = "read" then Read
|
||||
else if lc = "write" then Write
|
||||
else Identifier acc
|
||||
in
|
||||
let rec scan_lit acc =
|
||||
let nc = Stream.read_char stm in
|
||||
if Stream.is_digit nc
|
||||
then scan_lit (acc ^ (Char.escaped nc))
|
||||
else let _ = Stream.unread_char stm nc in
|
||||
Literal (int_of_string acc)
|
||||
in
|
||||
if Stream.is_alpha c then scan_iden (Char.escaped c)
|
||||
else if Stream.is_digit c then scan_lit (Char.escaped c)
|
||||
else if c='+' then Add
|
||||
else if c='-' then Sub
|
||||
else if c=',' then Comma
|
||||
else if c=';' then Semicolon
|
||||
else if c='(' then LeftParen
|
||||
else if c=')' then RightParen
|
||||
else if c=':' && Stream.read_char stm = '=' then Assign
|
||||
else syntax_error s "Could not identify token"
|
Reference in New Issue
Block a user