initial
This commit is contained in:
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
_build/
|
||||||
|
*.byte
|
||||||
|
*.native
|
5
README.md
Normal file
5
README.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# micro
|
||||||
|
|
||||||
|
A minimal compiler in ML, as seen in [this tutorial](http://troydm.github.io/blog/2014/03/29/writing-micro-compiler-in-ocaml/).
|
||||||
|
|
||||||
|
Work in progess.
|
47
src/codegen.ml
Normal file
47
src/codegen.ml
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
type generator = { vars: (string, int) Hashtbl.t; file: string; chan: out_channel }
|
||||||
|
|
||||||
|
let new_generator file =
|
||||||
|
let fs = (Filename.chop_extension file) ^ ".s" in
|
||||||
|
{ vars=Hashtbl.create 100; file=fs; chan=open_out fs }
|
||||||
|
|
||||||
|
let close_generator g = close_out g.chan
|
||||||
|
|
||||||
|
let gen g v = output_string g.chan v; output_string g.chan "\n"
|
||||||
|
|
||||||
|
let bottom_var _ g =
|
||||||
|
Hashtbl.fold (fun _ v c -> if v >= c then (v+4) else c) g.vars 0
|
||||||
|
|
||||||
|
let empty_var s g i = (bottom_var s g) + 4 * (i - 1)
|
||||||
|
|
||||||
|
let var_addr s g v =
|
||||||
|
if String.length v > 6 && String.sub v 0 6 = "__temp"
|
||||||
|
then
|
||||||
|
let i = String.sub v 6 ((String.length v) - 6) in "[esp+" ^ i ^ "]"
|
||||||
|
else
|
||||||
|
try "[esp+" ^ string_of_int (Hashtbl.find g.vars v) ^ "]"
|
||||||
|
with Not_found -> syntax_error s ("identifier " ^ v ^ " not defined")
|
||||||
|
|
||||||
|
let var s g v = "dword " ^ (var_addr s g v)
|
||||||
|
|
||||||
|
let temp_var s g i =
|
||||||
|
Token.Identifier ("__temp" ^ (string_of_int (empty_var s g i)))
|
||||||
|
|
||||||
|
let is_alloc_var _ g v = Hashtbl.mem g.vars v
|
||||||
|
|
||||||
|
let alloc_var s g v =
|
||||||
|
if is_alloc_var s g v
|
||||||
|
then var s g v
|
||||||
|
else let _ = Hashtbl.replace g.vars v (empty_var s g 1) in var s g v
|
||||||
|
|
||||||
|
let token_var s g v =
|
||||||
|
match v with
|
||||||
|
| Token.Identifier i -> var s g i
|
||||||
|
| _ -> syntax_error s "identifier expected"
|
||||||
|
|
||||||
|
let unop g opcode a = gen g (" " ^ opcode ^ " " ^ a)
|
||||||
|
|
||||||
|
let binop g opcode a b = gen g (" " ^ opcode ^ " " ^ a ^ ", " ^ b)
|
||||||
|
|
||||||
|
let push g a = op g "push" a
|
||||||
|
|
||||||
|
|
26
src/compile.ml
Normal file
26
src/compile.ml
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
let compile file =
|
||||||
|
try
|
||||||
|
let g = Codegen.new_generator file in
|
||||||
|
let s = Stream.open_stream file in
|
||||||
|
let o = Filename.chop_extension file in
|
||||||
|
parse s g;
|
||||||
|
Stream.close_stream s;
|
||||||
|
Codegen.close_generator g;
|
||||||
|
let _ = Sys.command ("nasm -f macho " ^ g.file) in
|
||||||
|
let _ = Sys.command ("gcc -o " ^out ^ " " ^ out ^ ".o") in
|
||||||
|
()
|
||||||
|
with
|
||||||
|
| Syntax_error e ->
|
||||||
|
printf "syntax error: %s\n" e;
|
||||||
|
| Sys_error _ ->
|
||||||
|
print_string "no file found\n"
|
||||||
|
|
||||||
|
let help name = printf "%s <file>\n" name
|
||||||
|
|
||||||
|
let () =
|
||||||
|
if Array.length Sys.argv = 1
|
||||||
|
then help (Array.get Sys.argv 0)
|
||||||
|
else
|
||||||
|
let file = Array.get Sys.argv 1 in
|
||||||
|
printf "compiling %s\n" file
|
||||||
|
compile file
|
29
src/parse.ml
Normal file
29
src/parse.ml
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
let parse stm g =
|
||||||
|
let s = (Token.new_scanner stm) in
|
||||||
|
try
|
||||||
|
Token.program s g
|
||||||
|
with End_of_file ->
|
||||||
|
Token.syntax_error s "program reached end of file before end keyword"
|
||||||
|
|
||||||
|
let program s g =
|
||||||
|
if Token.match_token s Token.Begin then
|
||||||
|
let _ = generate_begin s g in
|
||||||
|
let _ = statements s g in
|
||||||
|
if Token.match_token s Token.End then
|
||||||
|
let _ = generate_end s g in ()
|
||||||
|
else Token.syntax_error s "program should end with end keyword"
|
||||||
|
else Token.syntax_error s "program should start with begin keyword"
|
||||||
|
|
||||||
|
let rec statements s g = if statement s g then statements s g else ()
|
||||||
|
|
||||||
|
let statement s g =
|
||||||
|
let t = next_token s in
|
||||||
|
if match t with
|
||||||
|
| Token.Read -> read s g
|
||||||
|
| Token.Write -> write s g
|
||||||
|
| Token.Identifier i -> assignment s g
|
||||||
|
| _ -> false
|
||||||
|
then
|
||||||
|
if Token.match_token s Token.Semicolon then true
|
||||||
|
else Token.syntax_error s "statement must end with semicolon"
|
||||||
|
else false
|
24
src/stream.ml
Normal file
24
src/stream.ml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
type stream = { mutable chr: char option; mutable line_num: int; chan: in_channel }
|
||||||
|
|
||||||
|
let open_stream file = { chr=None; line_num=1; chan=open_in file }
|
||||||
|
|
||||||
|
let close_stream stm = close_in stm.chan
|
||||||
|
|
||||||
|
let read_char stm =
|
||||||
|
match stm.chr with
|
||||||
|
| None ->
|
||||||
|
let c = input_char stm.chan in
|
||||||
|
if c = '\n'
|
||||||
|
then let _ = stm.line_num <- stm.line_num + 1 in c
|
||||||
|
else c
|
||||||
|
| Some c -> stm.chr <- None; c
|
||||||
|
|
||||||
|
let unread_char stm c = stm.chr <- Some c
|
||||||
|
|
||||||
|
let is_digit c =
|
||||||
|
let code = Char.code c in code >= Char.code('0') && code <= Char.code('9')
|
||||||
|
|
||||||
|
let is_alpha c =
|
||||||
|
let code = Char.code c in
|
||||||
|
(code >= Char.code('A') && code <= Char.code('Z')) ||
|
||||||
|
(code >= Char.code('a') && code <= Char.code('z'))
|
59
src/token.ml
Normal file
59
src/token.ml
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
type token = Begin
|
||||||
|
| End
|
||||||
|
| Identifier of string
|
||||||
|
| Read
|
||||||
|
| Write
|
||||||
|
| Literal of int
|
||||||
|
| Assign
|
||||||
|
| LeftParen
|
||||||
|
| RightParen
|
||||||
|
| Add
|
||||||
|
| Sub
|
||||||
|
| Comma
|
||||||
|
| Semicolon
|
||||||
|
|
||||||
|
type scanner = { mutable last_token: token option; stm: Stream.stream }
|
||||||
|
|
||||||
|
exception Syntax_error of string
|
||||||
|
|
||||||
|
let syntax_error s msg =
|
||||||
|
raise (Syntax_error (msg ^" on line " ^ (string_of_int s.stm.line_num)))
|
||||||
|
|
||||||
|
let rec skip_blank_chars stm =
|
||||||
|
let c = Stream.read_char stm in
|
||||||
|
match c with
|
||||||
|
| ' ' | '\t' | '\r' | '\n' -> skip_blank_chars stm
|
||||||
|
| _ -> Stream.unread_char stm c; ()
|
||||||
|
|
||||||
|
let scan s =
|
||||||
|
let stm = s.stm in
|
||||||
|
let c = Stream.read_char stm in
|
||||||
|
let rec scan_iden acc =
|
||||||
|
let nc = Stream.read_char stm in
|
||||||
|
if Stream.is_alpha nc || Stream.is_digit nc || nc='_'
|
||||||
|
then scan_iden (acc ^ (Char.escaped nc))
|
||||||
|
else let _ = Stream.unread_char stm nc in
|
||||||
|
let lc = String.lowercase acc in
|
||||||
|
if lc = "begin" then Begin
|
||||||
|
else if lc = "end" then End
|
||||||
|
else if lc = "read" then Read
|
||||||
|
else if lc = "write" then Write
|
||||||
|
else Identifier acc
|
||||||
|
in
|
||||||
|
let rec scan_lit acc =
|
||||||
|
let nc = Stream.read_char stm in
|
||||||
|
if Stream.is_digit nc
|
||||||
|
then scan_lit (acc ^ (Char.escaped nc))
|
||||||
|
else let _ = Stream.unread_char stm nc in
|
||||||
|
Literal (int_of_string acc)
|
||||||
|
in
|
||||||
|
if Stream.is_alpha c then scan_iden (Char.escaped c)
|
||||||
|
else if Stream.is_digit c then scan_lit (Char.escaped c)
|
||||||
|
else if c='+' then Add
|
||||||
|
else if c='-' then Sub
|
||||||
|
else if c=',' then Comma
|
||||||
|
else if c=';' then Semicolon
|
||||||
|
else if c='(' then LeftParen
|
||||||
|
else if c=')' then RightParen
|
||||||
|
else if c=':' && Stream.read_char stm = '=' then Assign
|
||||||
|
else syntax_error s "Could not identify token"
|
Reference in New Issue
Block a user