added compilation to assembly (binary is still weird)
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,3 +1,5 @@
|
||||
_build/
|
||||
*.byte
|
||||
*.native
|
||||
*.o
|
||||
*.s
|
||||
|
8
examples/basic.mc
Normal file
8
examples/basic.mc
Normal file
@@ -0,0 +1,8 @@
|
||||
begin
|
||||
a := 1;
|
||||
b := a + 1;
|
||||
b := b + 1;
|
||||
write (a,b);
|
||||
read(a,b);
|
||||
write (a+10, b+10);
|
||||
end
|
@@ -19,7 +19,7 @@ let var_addr s g v =
|
||||
let i = String.sub v 6 ((String.length v) - 6) in "[esp+" ^ i ^ "]"
|
||||
else
|
||||
try "[esp+" ^ string_of_int (Hashtbl.find g.vars v) ^ "]"
|
||||
with Not_found -> syntax_error s ("identifier " ^ v ^ " not defined")
|
||||
with Not_found -> Token.syntax_error s ("identifier " ^ v ^ " not defined")
|
||||
|
||||
let var s g v = "dword " ^ (var_addr s g v)
|
||||
|
||||
@@ -36,12 +36,102 @@ let alloc_var s g v =
|
||||
let token_var s g v =
|
||||
match v with
|
||||
| Token.Identifier i -> var s g i
|
||||
| _ -> syntax_error s "identifier expected"
|
||||
| _ -> Token.syntax_error s "identifier expected"
|
||||
|
||||
let unop g opcode a = gen g (" " ^ opcode ^ " " ^ a)
|
||||
|
||||
let binop g opcode a b = gen g (" " ^ opcode ^ " " ^ a ^ ", " ^ b)
|
||||
|
||||
let push g a = op g "push" a
|
||||
let push g a = unop g "push" a
|
||||
|
||||
|
||||
let generate_begin _ g = gen g
|
||||
"extern printf\n\
|
||||
extern scanf\n\
|
||||
\n\
|
||||
section .data\n\
|
||||
inf: db '%d', 0\n\
|
||||
ouf: db '%d', 10, 0\n\
|
||||
\n\
|
||||
section .text\n\
|
||||
global main\n\
|
||||
\n\
|
||||
main:\n\
|
||||
sub esp, 1000"
|
||||
|
||||
let generate_end _ g = gen g
|
||||
" add esp, 1000\n\
|
||||
exit:\n\
|
||||
mov eax, 1 ; sys_exit\n\
|
||||
mov ebx, 0\n\
|
||||
int 80h"
|
||||
|
||||
let generate_read s g id =
|
||||
match id with
|
||||
| Token.Identifier i ->
|
||||
binop g "lea" "eax" (var_addr s g i);
|
||||
push g "eax";
|
||||
push g "inf";
|
||||
unop g "call" "scanf";
|
||||
binop g "add " "esp" "8"
|
||||
| _ -> Token.syntax_error s "generate read called with invalid argument"
|
||||
|
||||
let generate_reads s g = List.iter (generate_read s g)
|
||||
|
||||
let generate_write s g id =
|
||||
match id with
|
||||
| Token.Identifier i ->
|
||||
push g (var s g i);
|
||||
push g "ouf";
|
||||
unop g "call" "printf";
|
||||
binop g "add " "esp" "8"
|
||||
| _ -> Token.syntax_error s "generate write called with invalid argument"
|
||||
|
||||
let generate_copy s g a b =
|
||||
match a with
|
||||
| Token.Identifier i ->
|
||||
(match b with
|
||||
| Token.Identifier i2 ->
|
||||
binop g "mov " "eax" (var s g i2);
|
||||
binop g "mov " (var s g i) "eax"
|
||||
| Token.Literal l -> binop g "mov " (var s g i) (string_of_int l)
|
||||
| _ -> Token.syntax_error s "generate copy called with invalid argument")
|
||||
| _ -> Token.syntax_error s "generate copy called with invalid argument"
|
||||
|
||||
let generate_assign s g a b =
|
||||
match a with
|
||||
| Token.Identifier i -> let _ = alloc_var s g i in generate_copy s g a b
|
||||
| _ -> Token.syntax_error s "generate assign called with invalid argument"
|
||||
|
||||
let generate_add s g d id1 id2 =
|
||||
match (id1, id2) with
|
||||
| (Token.Identifier _, Token.Identifier i2) ->
|
||||
let v = temp_var s g d in
|
||||
let vi = token_var s g v in
|
||||
let _ = generate_copy s g v id1 in
|
||||
let _ = binop g "add " vi (var s g i2) in v
|
||||
| (Token.Identifier _, Token.Literal l2) ->
|
||||
let v = temp_var s g d in
|
||||
let vi = token_var s g v in
|
||||
let _ = generate_copy s g v id1 in
|
||||
let _ = binop g "add " vi (string_of_int l2) in v
|
||||
| _ -> Token.syntax_error s "generate exp called with invalid argument"
|
||||
|
||||
let generate_sub s g d id1 id2 =
|
||||
match (id1, id2) with
|
||||
| (Token.Identifier _, Token.Identifier i2) ->
|
||||
let v = temp_var s g d in
|
||||
let vi = token_var s g v in
|
||||
let _ = generate_copy s g v id1 in
|
||||
let _ = binop g "sub " vi (var s g i2) in v
|
||||
| (Token.Identifier _, Token.Literal l2) ->
|
||||
let v = temp_var s g d in
|
||||
let vi = token_var s g v in
|
||||
let _ = generate_copy s g v id1 in
|
||||
let _ = binop g "sub " vi (string_of_int l2) in v
|
||||
| (Token.Literal _, Token.Identifier i2) ->
|
||||
let v = temp_var s g d in
|
||||
let vi = token_var s g v in
|
||||
let _ = generate_copy s g v id1 in
|
||||
let _ = binop g "sub " vi (var s g i2) in v
|
||||
| _ -> Token.syntax_error s "generate exp called with invalid argument"
|
||||
|
@@ -1,26 +1,26 @@
|
||||
let compile file =
|
||||
try
|
||||
let g = Codegen.new_generator file in
|
||||
let s = Stream.open_stream file in
|
||||
let s = Charstream.open_stream file in
|
||||
let o = Filename.chop_extension file in
|
||||
parse s g;
|
||||
Stream.close_stream s;
|
||||
Parse.parse s g;
|
||||
Charstream.close_stream s;
|
||||
Codegen.close_generator g;
|
||||
let _ = Sys.command ("nasm -f macho " ^ g.file) in
|
||||
let _ = Sys.command ("gcc -o " ^out ^ " " ^ out ^ ".o") in
|
||||
let _ = Sys.command ("gcc -o " ^ o ^ " " ^ o ^ ".o") in
|
||||
()
|
||||
with
|
||||
| Syntax_error e ->
|
||||
printf "syntax error: %s\n" e;
|
||||
| Token.Syntax_error e ->
|
||||
Format.printf "syntax error: %s\n" e;
|
||||
| Sys_error _ ->
|
||||
print_string "no file found\n"
|
||||
|
||||
let help name = printf "%s <file>\n" name
|
||||
let help name = Format.printf "%s <file>\n" name
|
||||
|
||||
let () =
|
||||
if Array.length Sys.argv = 1
|
||||
then help (Array.get Sys.argv 0)
|
||||
else
|
||||
let file = Array.get Sys.argv 1 in
|
||||
printf "compiling %s\n" file
|
||||
Format.printf "compiling %s\n" file;
|
||||
compile file
|
||||
|
139
src/parse.ml
139
src/parse.ml
@@ -1,29 +1,134 @@
|
||||
let parse stm g =
|
||||
let s = (Token.new_scanner stm) in
|
||||
try
|
||||
Token.program s g
|
||||
with End_of_file ->
|
||||
Token.syntax_error s "program reached end of file before end keyword"
|
||||
let add s g d l r =
|
||||
match (l, r) with
|
||||
| (Token.Literal l1,Token.Literal l2) ->Token.Literal (l1+l2)
|
||||
| (Token.Identifier _, Token.Literal _) -> Codegen.generate_add s g d l r
|
||||
| (Token.Literal _, Token.Identifier _) -> Codegen.generate_add s g d r l
|
||||
| _ -> Token.syntax_error s "expected literal or identifier for add operation"
|
||||
|
||||
let program s g =
|
||||
if Token.match_token s Token.Begin then
|
||||
let _ = generate_begin s g in
|
||||
let _ = statements s g in
|
||||
if Token.match_token s Token.End then
|
||||
let _ = generate_end s g in ()
|
||||
else Token.syntax_error s "program should end with end keyword"
|
||||
else Token.syntax_error s "program should start with begin keyword"
|
||||
let sub s g d l r =
|
||||
match (l, r) with
|
||||
| (Token.Literal l1,Token.Literal l2) ->Token.Literal (l1-l2)
|
||||
| (Token.Identifier _, Token.Literal _) -> Codegen.generate_sub s g d l r
|
||||
| (Token.Literal _, Token.Identifier _) -> Codegen.generate_sub s g d l r
|
||||
| _ -> Token.syntax_error s "expected literal or identifier for sub operation"
|
||||
|
||||
let rec statements s g = if statement s g then statements s g else ()
|
||||
let rec expression s g d =
|
||||
let primary s =
|
||||
match Token.next_token s with
|
||||
| Token.LeftParen ->
|
||||
let _ = Token.match_token s Token.LeftParen in
|
||||
let e = expression s g (d+1) in
|
||||
if Token.match_token s Token.RightParen
|
||||
then Some e
|
||||
else Token.syntax_error s "right paren expected in expression"
|
||||
| Token.Identifier i ->
|
||||
let _ = Token.match_token s (Token.Identifier i) in
|
||||
Some (Token.Identifier i)
|
||||
| Token.Literal l ->
|
||||
let _ = Token.match_token s (Token.Literal l) in
|
||||
Some (Token.Literal l)
|
||||
| _ -> None
|
||||
in
|
||||
let lp = primary s in
|
||||
match lp with
|
||||
| Some l ->
|
||||
(match Token.next_token s with
|
||||
| Token.Add ->
|
||||
let _ = Token.match_token s Add in
|
||||
add s g d l (expression s g (d+1))
|
||||
| Token.Sub ->
|
||||
let _ = Token.match_token s Sub in
|
||||
sub s g d l (expression s g (d+1))
|
||||
| _ -> l)
|
||||
| None -> Token.syntax_error s "literal or identifier expected"
|
||||
|
||||
let write s g =
|
||||
let rec expressions c =
|
||||
let e = expression s g 1 in
|
||||
if match e with
|
||||
| Token.Identifier _ -> let _ = Codegen.generate_write s g e in true
|
||||
| Token.Literal _ -> let _ = Codegen.generate_write s g e in true
|
||||
| _ -> false
|
||||
then if (Token.next_token s) = Token.Comma
|
||||
then let _ = Token.match_token s Token.Comma in expressions (c+1)
|
||||
else (c+1)
|
||||
else c
|
||||
in
|
||||
if Token.match_token s Token.Write then
|
||||
if Token.match_token s Token.LeftParen then
|
||||
if expressions 0 > 0 then
|
||||
if Token.match_token s Token.RightParen then true
|
||||
else Token.syntax_error s "right paren expected in write statement"
|
||||
else Token.syntax_error s "write statement expects at least one expression"
|
||||
else Token.syntax_error s "left paren expected in write statement"
|
||||
else Token.syntax_error s "write statement expected"
|
||||
|
||||
let identifiers s =
|
||||
let rec idens ids =
|
||||
match Token.next_token s with
|
||||
| Token.Identifier i ->
|
||||
let _ = Token.match_next s in
|
||||
let n = Token.next_token s in
|
||||
if n = Token.Comma
|
||||
then let _ = Token.match_token s Token.Comma in idens (Token.Identifier i :: ids)
|
||||
else idens (Token.Identifier i :: ids)
|
||||
| _ -> ids
|
||||
in idens []
|
||||
|
||||
let read s g =
|
||||
if Token.match_token s Token.Read then
|
||||
if Token.match_token s Token.LeftParen then
|
||||
let ids = identifiers s in
|
||||
if ids = []
|
||||
then Token.syntax_error s "read statement expects comma seperated identifier(s)"
|
||||
else if Token.match_token s Token.RightParen
|
||||
then let _ = Codegen.generate_reads s g (List.rev ids) in true
|
||||
else Token.syntax_error s "right paren expected in read statement"
|
||||
else Token.syntax_error s "left paren expected in read statement"
|
||||
else Token.syntax_error s "read statement expected"
|
||||
|
||||
let assignment s g =
|
||||
let id = Token.match_next s in
|
||||
match id with
|
||||
Token.Identifier i ->
|
||||
if Token.match_token s Token.Assign
|
||||
then
|
||||
let new_var = if Codegen.is_alloc_var s g i then 0 else 1 in
|
||||
let id2 = expression s g (1+new_var) in
|
||||
match id2 with
|
||||
| Token.Literal _ ->
|
||||
let _ = Codegen.generate_assign s g id id2 in true
|
||||
| Token.Identifier _ ->
|
||||
let _ = Codegen.generate_assign s g id id2 in true
|
||||
| _ -> Token.syntax_error s "literal or identifier expected"
|
||||
else Token.syntax_error s "assign symbol expected"
|
||||
| _ -> Token.syntax_error s "identifier expected"
|
||||
|
||||
let statement s g =
|
||||
let t = next_token s in
|
||||
let t = Token.next_token s in
|
||||
if match t with
|
||||
| Token.Read -> read s g
|
||||
| Token.Write -> write s g
|
||||
| Token.Identifier i -> assignment s g
|
||||
| Token.Identifier _ -> assignment s g
|
||||
| _ -> false
|
||||
then
|
||||
if Token.match_token s Token.Semicolon then true
|
||||
else Token.syntax_error s "statement must end with semicolon"
|
||||
else false
|
||||
|
||||
let rec statements s g = if statement s g then statements s g else ()
|
||||
|
||||
let program s g =
|
||||
if Token.match_token s Token.Begin then
|
||||
let _ = Codegen.generate_begin s g in
|
||||
let _ = statements s g in
|
||||
if Token.match_token s Token.End then
|
||||
let _ = Codegen.generate_end s g in ()
|
||||
else Token.syntax_error s "program should end with end keyword"
|
||||
else Token.syntax_error s "program should start with begin keyword"
|
||||
|
||||
let parse stm g =
|
||||
let s = (Token.new_scanner stm) in
|
||||
try program s g
|
||||
with End_of_file ->
|
||||
Token.syntax_error s "program reached end of file before end keyword"
|
||||
|
46
src/token.ml
46
src/token.ml
@@ -12,7 +12,7 @@ type token = Begin
|
||||
| Comma
|
||||
| Semicolon
|
||||
|
||||
type scanner = { mutable last_token: token option; stm: Stream.stream }
|
||||
type scanner = { mutable last_token: token option; stm: Charstream.stream }
|
||||
|
||||
exception Syntax_error of string
|
||||
|
||||
@@ -20,20 +20,20 @@ let syntax_error s msg =
|
||||
raise (Syntax_error (msg ^" on line " ^ (string_of_int s.stm.line_num)))
|
||||
|
||||
let rec skip_blank_chars stm =
|
||||
let c = Stream.read_char stm in
|
||||
let c = Charstream.read_char stm in
|
||||
match c with
|
||||
| ' ' | '\t' | '\r' | '\n' -> skip_blank_chars stm
|
||||
| _ -> Stream.unread_char stm c; ()
|
||||
| _ -> Charstream.unread_char stm c; ()
|
||||
|
||||
let scan s =
|
||||
let stm = s.stm in
|
||||
let c = Stream.read_char stm in
|
||||
let c = Charstream.read_char stm in
|
||||
let rec scan_iden acc =
|
||||
let nc = Stream.read_char stm in
|
||||
if Stream.is_alpha nc || Stream.is_digit nc || nc='_'
|
||||
let nc = Charstream.read_char stm in
|
||||
if Charstream.is_alpha nc || Charstream.is_digit nc || nc='_'
|
||||
then scan_iden (acc ^ (Char.escaped nc))
|
||||
else let _ = Stream.unread_char stm nc in
|
||||
let lc = String.lowercase acc in
|
||||
else let _ = Charstream.unread_char stm nc in
|
||||
let lc = String.lowercase_ascii acc in
|
||||
if lc = "begin" then Begin
|
||||
else if lc = "end" then End
|
||||
else if lc = "read" then Read
|
||||
@@ -41,19 +41,37 @@ let scan s =
|
||||
else Identifier acc
|
||||
in
|
||||
let rec scan_lit acc =
|
||||
let nc = Stream.read_char stm in
|
||||
if Stream.is_digit nc
|
||||
let nc = Charstream.read_char stm in
|
||||
if Charstream.is_digit nc
|
||||
then scan_lit (acc ^ (Char.escaped nc))
|
||||
else let _ = Stream.unread_char stm nc in
|
||||
else let _ = Charstream.unread_char stm nc in
|
||||
Literal (int_of_string acc)
|
||||
in
|
||||
if Stream.is_alpha c then scan_iden (Char.escaped c)
|
||||
else if Stream.is_digit c then scan_lit (Char.escaped c)
|
||||
if Charstream.is_alpha c then scan_iden (Char.escaped c)
|
||||
else if Charstream.is_digit c then scan_lit (Char.escaped c)
|
||||
else if c='+' then Add
|
||||
else if c='-' then Sub
|
||||
else if c=',' then Comma
|
||||
else if c=';' then Semicolon
|
||||
else if c='(' then LeftParen
|
||||
else if c=')' then RightParen
|
||||
else if c=':' && Stream.read_char stm = '=' then Assign
|
||||
else if c=':' && Charstream.read_char stm = '=' then Assign
|
||||
else syntax_error s "Could not identify token"
|
||||
|
||||
|
||||
let new_scanner stm = { last_token=None; stm=stm }
|
||||
|
||||
let match_next s =
|
||||
match s.last_token with
|
||||
| None -> let _ = skip_blank_chars s.stm in scan s
|
||||
| Some tn -> s.last_token <- None; tn
|
||||
|
||||
let match_token s t = match_next s = t
|
||||
|
||||
let next_token s =
|
||||
match s.last_token with
|
||||
| None ->
|
||||
(skip_blank_chars s.stm;
|
||||
let t = scan s in
|
||||
s.last_token <- Some t; t)
|
||||
| Some t -> t
|
||||
|
Reference in New Issue
Block a user