added compilation to assembly (binary is still weird)

This commit is contained in:
2017-05-30 08:58:16 -04:00
parent 721b090971
commit 53f66f6edc
7 changed files with 265 additions and 42 deletions

2
.gitignore vendored
View File

@@ -1,3 +1,5 @@
_build/
*.byte
*.native
*.o
*.s

8
examples/basic.mc Normal file
View File

@@ -0,0 +1,8 @@
begin
a := 1;
b := a + 1;
b := b + 1;
write (a,b);
read(a,b);
write (a+10, b+10);
end

View File

@@ -19,7 +19,7 @@ let var_addr s g v =
let i = String.sub v 6 ((String.length v) - 6) in "[esp+" ^ i ^ "]"
else
try "[esp+" ^ string_of_int (Hashtbl.find g.vars v) ^ "]"
with Not_found -> syntax_error s ("identifier " ^ v ^ " not defined")
with Not_found -> Token.syntax_error s ("identifier " ^ v ^ " not defined")
let var s g v = "dword " ^ (var_addr s g v)
@@ -36,12 +36,102 @@ let alloc_var s g v =
let token_var s g v =
match v with
| Token.Identifier i -> var s g i
| _ -> syntax_error s "identifier expected"
| _ -> Token.syntax_error s "identifier expected"
let unop g opcode a = gen g (" " ^ opcode ^ " " ^ a)
let binop g opcode a b = gen g (" " ^ opcode ^ " " ^ a ^ ", " ^ b)
let push g a = op g "push" a
let push g a = unop g "push" a
let generate_begin _ g = gen g
"extern printf\n\
extern scanf\n\
\n\
section .data\n\
inf: db '%d', 0\n\
ouf: db '%d', 10, 0\n\
\n\
section .text\n\
global main\n\
\n\
main:\n\
sub esp, 1000"
let generate_end _ g = gen g
" add esp, 1000\n\
exit:\n\
mov eax, 1 ; sys_exit\n\
mov ebx, 0\n\
int 80h"
let generate_read s g id =
match id with
| Token.Identifier i ->
binop g "lea" "eax" (var_addr s g i);
push g "eax";
push g "inf";
unop g "call" "scanf";
binop g "add " "esp" "8"
| _ -> Token.syntax_error s "generate read called with invalid argument"
let generate_reads s g = List.iter (generate_read s g)
let generate_write s g id =
match id with
| Token.Identifier i ->
push g (var s g i);
push g "ouf";
unop g "call" "printf";
binop g "add " "esp" "8"
| _ -> Token.syntax_error s "generate write called with invalid argument"
let generate_copy s g a b =
match a with
| Token.Identifier i ->
(match b with
| Token.Identifier i2 ->
binop g "mov " "eax" (var s g i2);
binop g "mov " (var s g i) "eax"
| Token.Literal l -> binop g "mov " (var s g i) (string_of_int l)
| _ -> Token.syntax_error s "generate copy called with invalid argument")
| _ -> Token.syntax_error s "generate copy called with invalid argument"
let generate_assign s g a b =
match a with
| Token.Identifier i -> let _ = alloc_var s g i in generate_copy s g a b
| _ -> Token.syntax_error s "generate assign called with invalid argument"
let generate_add s g d id1 id2 =
match (id1, id2) with
| (Token.Identifier _, Token.Identifier i2) ->
let v = temp_var s g d in
let vi = token_var s g v in
let _ = generate_copy s g v id1 in
let _ = binop g "add " vi (var s g i2) in v
| (Token.Identifier _, Token.Literal l2) ->
let v = temp_var s g d in
let vi = token_var s g v in
let _ = generate_copy s g v id1 in
let _ = binop g "add " vi (string_of_int l2) in v
| _ -> Token.syntax_error s "generate exp called with invalid argument"
let generate_sub s g d id1 id2 =
match (id1, id2) with
| (Token.Identifier _, Token.Identifier i2) ->
let v = temp_var s g d in
let vi = token_var s g v in
let _ = generate_copy s g v id1 in
let _ = binop g "sub " vi (var s g i2) in v
| (Token.Identifier _, Token.Literal l2) ->
let v = temp_var s g d in
let vi = token_var s g v in
let _ = generate_copy s g v id1 in
let _ = binop g "sub " vi (string_of_int l2) in v
| (Token.Literal _, Token.Identifier i2) ->
let v = temp_var s g d in
let vi = token_var s g v in
let _ = generate_copy s g v id1 in
let _ = binop g "sub " vi (var s g i2) in v
| _ -> Token.syntax_error s "generate exp called with invalid argument"

View File

@@ -1,26 +1,26 @@
let compile file =
try
let g = Codegen.new_generator file in
let s = Stream.open_stream file in
let s = Charstream.open_stream file in
let o = Filename.chop_extension file in
parse s g;
Stream.close_stream s;
Parse.parse s g;
Charstream.close_stream s;
Codegen.close_generator g;
let _ = Sys.command ("nasm -f macho " ^ g.file) in
let _ = Sys.command ("gcc -o " ^out ^ " " ^ out ^ ".o") in
let _ = Sys.command ("gcc -o " ^ o ^ " " ^ o ^ ".o") in
()
with
| Syntax_error e ->
printf "syntax error: %s\n" e;
| Token.Syntax_error e ->
Format.printf "syntax error: %s\n" e;
| Sys_error _ ->
print_string "no file found\n"
let help name = printf "%s <file>\n" name
let help name = Format.printf "%s <file>\n" name
let () =
if Array.length Sys.argv = 1
then help (Array.get Sys.argv 0)
else
let file = Array.get Sys.argv 1 in
printf "compiling %s\n" file
Format.printf "compiling %s\n" file;
compile file

View File

@@ -1,29 +1,134 @@
let parse stm g =
let s = (Token.new_scanner stm) in
try
Token.program s g
with End_of_file ->
Token.syntax_error s "program reached end of file before end keyword"
let add s g d l r =
match (l, r) with
| (Token.Literal l1,Token.Literal l2) ->Token.Literal (l1+l2)
| (Token.Identifier _, Token.Literal _) -> Codegen.generate_add s g d l r
| (Token.Literal _, Token.Identifier _) -> Codegen.generate_add s g d r l
| _ -> Token.syntax_error s "expected literal or identifier for add operation"
let program s g =
if Token.match_token s Token.Begin then
let _ = generate_begin s g in
let _ = statements s g in
if Token.match_token s Token.End then
let _ = generate_end s g in ()
else Token.syntax_error s "program should end with end keyword"
else Token.syntax_error s "program should start with begin keyword"
let sub s g d l r =
match (l, r) with
| (Token.Literal l1,Token.Literal l2) ->Token.Literal (l1-l2)
| (Token.Identifier _, Token.Literal _) -> Codegen.generate_sub s g d l r
| (Token.Literal _, Token.Identifier _) -> Codegen.generate_sub s g d l r
| _ -> Token.syntax_error s "expected literal or identifier for sub operation"
let rec statements s g = if statement s g then statements s g else ()
let rec expression s g d =
let primary s =
match Token.next_token s with
| Token.LeftParen ->
let _ = Token.match_token s Token.LeftParen in
let e = expression s g (d+1) in
if Token.match_token s Token.RightParen
then Some e
else Token.syntax_error s "right paren expected in expression"
| Token.Identifier i ->
let _ = Token.match_token s (Token.Identifier i) in
Some (Token.Identifier i)
| Token.Literal l ->
let _ = Token.match_token s (Token.Literal l) in
Some (Token.Literal l)
| _ -> None
in
let lp = primary s in
match lp with
| Some l ->
(match Token.next_token s with
| Token.Add ->
let _ = Token.match_token s Add in
add s g d l (expression s g (d+1))
| Token.Sub ->
let _ = Token.match_token s Sub in
sub s g d l (expression s g (d+1))
| _ -> l)
| None -> Token.syntax_error s "literal or identifier expected"
let write s g =
let rec expressions c =
let e = expression s g 1 in
if match e with
| Token.Identifier _ -> let _ = Codegen.generate_write s g e in true
| Token.Literal _ -> let _ = Codegen.generate_write s g e in true
| _ -> false
then if (Token.next_token s) = Token.Comma
then let _ = Token.match_token s Token.Comma in expressions (c+1)
else (c+1)
else c
in
if Token.match_token s Token.Write then
if Token.match_token s Token.LeftParen then
if expressions 0 > 0 then
if Token.match_token s Token.RightParen then true
else Token.syntax_error s "right paren expected in write statement"
else Token.syntax_error s "write statement expects at least one expression"
else Token.syntax_error s "left paren expected in write statement"
else Token.syntax_error s "write statement expected"
let identifiers s =
let rec idens ids =
match Token.next_token s with
| Token.Identifier i ->
let _ = Token.match_next s in
let n = Token.next_token s in
if n = Token.Comma
then let _ = Token.match_token s Token.Comma in idens (Token.Identifier i :: ids)
else idens (Token.Identifier i :: ids)
| _ -> ids
in idens []
let read s g =
if Token.match_token s Token.Read then
if Token.match_token s Token.LeftParen then
let ids = identifiers s in
if ids = []
then Token.syntax_error s "read statement expects comma seperated identifier(s)"
else if Token.match_token s Token.RightParen
then let _ = Codegen.generate_reads s g (List.rev ids) in true
else Token.syntax_error s "right paren expected in read statement"
else Token.syntax_error s "left paren expected in read statement"
else Token.syntax_error s "read statement expected"
let assignment s g =
let id = Token.match_next s in
match id with
Token.Identifier i ->
if Token.match_token s Token.Assign
then
let new_var = if Codegen.is_alloc_var s g i then 0 else 1 in
let id2 = expression s g (1+new_var) in
match id2 with
| Token.Literal _ ->
let _ = Codegen.generate_assign s g id id2 in true
| Token.Identifier _ ->
let _ = Codegen.generate_assign s g id id2 in true
| _ -> Token.syntax_error s "literal or identifier expected"
else Token.syntax_error s "assign symbol expected"
| _ -> Token.syntax_error s "identifier expected"
let statement s g =
let t = next_token s in
let t = Token.next_token s in
if match t with
| Token.Read -> read s g
| Token.Write -> write s g
| Token.Identifier i -> assignment s g
| Token.Identifier _ -> assignment s g
| _ -> false
then
if Token.match_token s Token.Semicolon then true
else Token.syntax_error s "statement must end with semicolon"
else false
let rec statements s g = if statement s g then statements s g else ()
let program s g =
if Token.match_token s Token.Begin then
let _ = Codegen.generate_begin s g in
let _ = statements s g in
if Token.match_token s Token.End then
let _ = Codegen.generate_end s g in ()
else Token.syntax_error s "program should end with end keyword"
else Token.syntax_error s "program should start with begin keyword"
let parse stm g =
let s = (Token.new_scanner stm) in
try program s g
with End_of_file ->
Token.syntax_error s "program reached end of file before end keyword"

View File

@@ -12,7 +12,7 @@ type token = Begin
| Comma
| Semicolon
type scanner = { mutable last_token: token option; stm: Stream.stream }
type scanner = { mutable last_token: token option; stm: Charstream.stream }
exception Syntax_error of string
@@ -20,20 +20,20 @@ let syntax_error s msg =
raise (Syntax_error (msg ^" on line " ^ (string_of_int s.stm.line_num)))
let rec skip_blank_chars stm =
let c = Stream.read_char stm in
let c = Charstream.read_char stm in
match c with
| ' ' | '\t' | '\r' | '\n' -> skip_blank_chars stm
| _ -> Stream.unread_char stm c; ()
| _ -> Charstream.unread_char stm c; ()
let scan s =
let stm = s.stm in
let c = Stream.read_char stm in
let c = Charstream.read_char stm in
let rec scan_iden acc =
let nc = Stream.read_char stm in
if Stream.is_alpha nc || Stream.is_digit nc || nc='_'
let nc = Charstream.read_char stm in
if Charstream.is_alpha nc || Charstream.is_digit nc || nc='_'
then scan_iden (acc ^ (Char.escaped nc))
else let _ = Stream.unread_char stm nc in
let lc = String.lowercase acc in
else let _ = Charstream.unread_char stm nc in
let lc = String.lowercase_ascii acc in
if lc = "begin" then Begin
else if lc = "end" then End
else if lc = "read" then Read
@@ -41,19 +41,37 @@ let scan s =
else Identifier acc
in
let rec scan_lit acc =
let nc = Stream.read_char stm in
if Stream.is_digit nc
let nc = Charstream.read_char stm in
if Charstream.is_digit nc
then scan_lit (acc ^ (Char.escaped nc))
else let _ = Stream.unread_char stm nc in
else let _ = Charstream.unread_char stm nc in
Literal (int_of_string acc)
in
if Stream.is_alpha c then scan_iden (Char.escaped c)
else if Stream.is_digit c then scan_lit (Char.escaped c)
if Charstream.is_alpha c then scan_iden (Char.escaped c)
else if Charstream.is_digit c then scan_lit (Char.escaped c)
else if c='+' then Add
else if c='-' then Sub
else if c=',' then Comma
else if c=';' then Semicolon
else if c='(' then LeftParen
else if c=')' then RightParen
else if c=':' && Stream.read_char stm = '=' then Assign
else if c=':' && Charstream.read_char stm = '=' then Assign
else syntax_error s "Could not identify token"
let new_scanner stm = { last_token=None; stm=stm }
let match_next s =
match s.last_token with
| None -> let _ = skip_blank_chars s.stm in scan s
| Some tn -> s.last_token <- None; tn
let match_token s t = match_next s = t
let next_token s =
match s.last_token with
| None ->
(skip_blank_chars s.stm;
let t = scan s in
s.last_token <- Some t; t)
| Some t -> t