diff --git a/.gitignore b/.gitignore index 41226d5..dcbff01 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ _build/ *.byte *.native +*.o +*.s diff --git a/examples/basic.mc b/examples/basic.mc new file mode 100644 index 0000000..bc2e45f --- /dev/null +++ b/examples/basic.mc @@ -0,0 +1,8 @@ +begin + a := 1; + b := a + 1; + b := b + 1; + write (a,b); + read(a,b); + write (a+10, b+10); +end diff --git a/src/stream.ml b/src/charstream.ml similarity index 100% rename from src/stream.ml rename to src/charstream.ml diff --git a/src/codegen.ml b/src/codegen.ml index 6f6cd23..881a332 100644 --- a/src/codegen.ml +++ b/src/codegen.ml @@ -19,7 +19,7 @@ let var_addr s g v = let i = String.sub v 6 ((String.length v) - 6) in "[esp+" ^ i ^ "]" else try "[esp+" ^ string_of_int (Hashtbl.find g.vars v) ^ "]" - with Not_found -> syntax_error s ("identifier " ^ v ^ " not defined") + with Not_found -> Token.syntax_error s ("identifier " ^ v ^ " not defined") let var s g v = "dword " ^ (var_addr s g v) @@ -36,12 +36,102 @@ let alloc_var s g v = let token_var s g v = match v with | Token.Identifier i -> var s g i - | _ -> syntax_error s "identifier expected" + | _ -> Token.syntax_error s "identifier expected" let unop g opcode a = gen g (" " ^ opcode ^ " " ^ a) let binop g opcode a b = gen g (" " ^ opcode ^ " " ^ a ^ ", " ^ b) -let push g a = op g "push" a +let push g a = unop g "push" a +let generate_begin _ g = gen g +"extern printf\n\ +extern scanf\n\ +\n\ +section .data\n\ + inf: db '%d', 0\n\ + ouf: db '%d', 10, 0\n\ +\n\ +section .text\n\ + global main\n\ +\n\ +main:\n\ + sub esp, 1000" + +let generate_end _ g = gen g +" add esp, 1000\n\ +exit:\n\ + mov eax, 1 ; sys_exit\n\ + mov ebx, 0\n\ + int 80h" + +let generate_read s g id = + match id with + | Token.Identifier i -> + binop g "lea" "eax" (var_addr s g i); + push g "eax"; + push g "inf"; + unop g "call" "scanf"; + binop g "add " "esp" "8" + | _ -> Token.syntax_error s "generate read called with invalid argument" + +let generate_reads s g = List.iter (generate_read s g) + +let generate_write s g id = + match id with + | Token.Identifier i -> + push g (var s g i); + push g "ouf"; + unop g "call" "printf"; + binop g "add " "esp" "8" + | _ -> Token.syntax_error s "generate write called with invalid argument" + +let generate_copy s g a b = + match a with + | Token.Identifier i -> + (match b with + | Token.Identifier i2 -> + binop g "mov " "eax" (var s g i2); + binop g "mov " (var s g i) "eax" + | Token.Literal l -> binop g "mov " (var s g i) (string_of_int l) + | _ -> Token.syntax_error s "generate copy called with invalid argument") + | _ -> Token.syntax_error s "generate copy called with invalid argument" + +let generate_assign s g a b = + match a with + | Token.Identifier i -> let _ = alloc_var s g i in generate_copy s g a b + | _ -> Token.syntax_error s "generate assign called with invalid argument" + +let generate_add s g d id1 id2 = + match (id1, id2) with + | (Token.Identifier _, Token.Identifier i2) -> + let v = temp_var s g d in + let vi = token_var s g v in + let _ = generate_copy s g v id1 in + let _ = binop g "add " vi (var s g i2) in v + | (Token.Identifier _, Token.Literal l2) -> + let v = temp_var s g d in + let vi = token_var s g v in + let _ = generate_copy s g v id1 in + let _ = binop g "add " vi (string_of_int l2) in v + | _ -> Token.syntax_error s "generate exp called with invalid argument" + +let generate_sub s g d id1 id2 = + match (id1, id2) with + | (Token.Identifier _, Token.Identifier i2) -> + let v = temp_var s g d in + let vi = token_var s g v in + let _ = generate_copy s g v id1 in + let _ = binop g "sub " vi (var s g i2) in v + | (Token.Identifier _, Token.Literal l2) -> + let v = temp_var s g d in + let vi = token_var s g v in + let _ = generate_copy s g v id1 in + let _ = binop g "sub " vi (string_of_int l2) in v + | (Token.Literal _, Token.Identifier i2) -> + let v = temp_var s g d in + let vi = token_var s g v in + let _ = generate_copy s g v id1 in + let _ = binop g "sub " vi (var s g i2) in v + | _ -> Token.syntax_error s "generate exp called with invalid argument" diff --git a/src/compile.ml b/src/compile.ml index 43f4d99..ec6a15d 100644 --- a/src/compile.ml +++ b/src/compile.ml @@ -1,26 +1,26 @@ let compile file = try let g = Codegen.new_generator file in - let s = Stream.open_stream file in + let s = Charstream.open_stream file in let o = Filename.chop_extension file in - parse s g; - Stream.close_stream s; + Parse.parse s g; + Charstream.close_stream s; Codegen.close_generator g; let _ = Sys.command ("nasm -f macho " ^ g.file) in - let _ = Sys.command ("gcc -o " ^out ^ " " ^ out ^ ".o") in + let _ = Sys.command ("gcc -o " ^ o ^ " " ^ o ^ ".o") in () with - | Syntax_error e -> - printf "syntax error: %s\n" e; + | Token.Syntax_error e -> + Format.printf "syntax error: %s\n" e; | Sys_error _ -> print_string "no file found\n" -let help name = printf "%s \n" name +let help name = Format.printf "%s \n" name let () = if Array.length Sys.argv = 1 then help (Array.get Sys.argv 0) else let file = Array.get Sys.argv 1 in - printf "compiling %s\n" file + Format.printf "compiling %s\n" file; compile file diff --git a/src/parse.ml b/src/parse.ml index 8be0fa4..ecffb97 100644 --- a/src/parse.ml +++ b/src/parse.ml @@ -1,29 +1,134 @@ -let parse stm g = - let s = (Token.new_scanner stm) in - try - Token.program s g - with End_of_file -> - Token.syntax_error s "program reached end of file before end keyword" +let add s g d l r = + match (l, r) with + | (Token.Literal l1,Token.Literal l2) ->Token.Literal (l1+l2) + | (Token.Identifier _, Token.Literal _) -> Codegen.generate_add s g d l r + | (Token.Literal _, Token.Identifier _) -> Codegen.generate_add s g d r l + | _ -> Token.syntax_error s "expected literal or identifier for add operation" -let program s g = - if Token.match_token s Token.Begin then - let _ = generate_begin s g in - let _ = statements s g in - if Token.match_token s Token.End then - let _ = generate_end s g in () - else Token.syntax_error s "program should end with end keyword" - else Token.syntax_error s "program should start with begin keyword" +let sub s g d l r = + match (l, r) with + | (Token.Literal l1,Token.Literal l2) ->Token.Literal (l1-l2) + | (Token.Identifier _, Token.Literal _) -> Codegen.generate_sub s g d l r + | (Token.Literal _, Token.Identifier _) -> Codegen.generate_sub s g d l r + | _ -> Token.syntax_error s "expected literal or identifier for sub operation" -let rec statements s g = if statement s g then statements s g else () +let rec expression s g d = + let primary s = + match Token.next_token s with + | Token.LeftParen -> + let _ = Token.match_token s Token.LeftParen in + let e = expression s g (d+1) in + if Token.match_token s Token.RightParen + then Some e + else Token.syntax_error s "right paren expected in expression" + | Token.Identifier i -> + let _ = Token.match_token s (Token.Identifier i) in + Some (Token.Identifier i) + | Token.Literal l -> + let _ = Token.match_token s (Token.Literal l) in + Some (Token.Literal l) + | _ -> None + in + let lp = primary s in + match lp with + | Some l -> + (match Token.next_token s with + | Token.Add -> + let _ = Token.match_token s Add in + add s g d l (expression s g (d+1)) + | Token.Sub -> + let _ = Token.match_token s Sub in + sub s g d l (expression s g (d+1)) + | _ -> l) + | None -> Token.syntax_error s "literal or identifier expected" + +let write s g = + let rec expressions c = + let e = expression s g 1 in + if match e with + | Token.Identifier _ -> let _ = Codegen.generate_write s g e in true + | Token.Literal _ -> let _ = Codegen.generate_write s g e in true + | _ -> false + then if (Token.next_token s) = Token.Comma + then let _ = Token.match_token s Token.Comma in expressions (c+1) + else (c+1) + else c + in + if Token.match_token s Token.Write then + if Token.match_token s Token.LeftParen then + if expressions 0 > 0 then + if Token.match_token s Token.RightParen then true + else Token.syntax_error s "right paren expected in write statement" + else Token.syntax_error s "write statement expects at least one expression" + else Token.syntax_error s "left paren expected in write statement" + else Token.syntax_error s "write statement expected" + +let identifiers s = + let rec idens ids = + match Token.next_token s with + | Token.Identifier i -> + let _ = Token.match_next s in + let n = Token.next_token s in + if n = Token.Comma + then let _ = Token.match_token s Token.Comma in idens (Token.Identifier i :: ids) + else idens (Token.Identifier i :: ids) + | _ -> ids + in idens [] + +let read s g = + if Token.match_token s Token.Read then + if Token.match_token s Token.LeftParen then + let ids = identifiers s in + if ids = [] + then Token.syntax_error s "read statement expects comma seperated identifier(s)" + else if Token.match_token s Token.RightParen + then let _ = Codegen.generate_reads s g (List.rev ids) in true + else Token.syntax_error s "right paren expected in read statement" + else Token.syntax_error s "left paren expected in read statement" + else Token.syntax_error s "read statement expected" + +let assignment s g = + let id = Token.match_next s in + match id with + Token.Identifier i -> + if Token.match_token s Token.Assign + then + let new_var = if Codegen.is_alloc_var s g i then 0 else 1 in + let id2 = expression s g (1+new_var) in + match id2 with + | Token.Literal _ -> + let _ = Codegen.generate_assign s g id id2 in true + | Token.Identifier _ -> + let _ = Codegen.generate_assign s g id id2 in true + | _ -> Token.syntax_error s "literal or identifier expected" + else Token.syntax_error s "assign symbol expected" + | _ -> Token.syntax_error s "identifier expected" let statement s g = - let t = next_token s in + let t = Token.next_token s in if match t with | Token.Read -> read s g | Token.Write -> write s g - | Token.Identifier i -> assignment s g + | Token.Identifier _ -> assignment s g | _ -> false then if Token.match_token s Token.Semicolon then true else Token.syntax_error s "statement must end with semicolon" else false + +let rec statements s g = if statement s g then statements s g else () + +let program s g = + if Token.match_token s Token.Begin then + let _ = Codegen.generate_begin s g in + let _ = statements s g in + if Token.match_token s Token.End then + let _ = Codegen.generate_end s g in () + else Token.syntax_error s "program should end with end keyword" + else Token.syntax_error s "program should start with begin keyword" + +let parse stm g = + let s = (Token.new_scanner stm) in + try program s g + with End_of_file -> + Token.syntax_error s "program reached end of file before end keyword" diff --git a/src/token.ml b/src/token.ml index 16cfec3..63b57a8 100644 --- a/src/token.ml +++ b/src/token.ml @@ -12,7 +12,7 @@ type token = Begin | Comma | Semicolon -type scanner = { mutable last_token: token option; stm: Stream.stream } +type scanner = { mutable last_token: token option; stm: Charstream.stream } exception Syntax_error of string @@ -20,20 +20,20 @@ let syntax_error s msg = raise (Syntax_error (msg ^" on line " ^ (string_of_int s.stm.line_num))) let rec skip_blank_chars stm = - let c = Stream.read_char stm in + let c = Charstream.read_char stm in match c with | ' ' | '\t' | '\r' | '\n' -> skip_blank_chars stm - | _ -> Stream.unread_char stm c; () + | _ -> Charstream.unread_char stm c; () let scan s = let stm = s.stm in - let c = Stream.read_char stm in + let c = Charstream.read_char stm in let rec scan_iden acc = - let nc = Stream.read_char stm in - if Stream.is_alpha nc || Stream.is_digit nc || nc='_' + let nc = Charstream.read_char stm in + if Charstream.is_alpha nc || Charstream.is_digit nc || nc='_' then scan_iden (acc ^ (Char.escaped nc)) - else let _ = Stream.unread_char stm nc in - let lc = String.lowercase acc in + else let _ = Charstream.unread_char stm nc in + let lc = String.lowercase_ascii acc in if lc = "begin" then Begin else if lc = "end" then End else if lc = "read" then Read @@ -41,19 +41,37 @@ let scan s = else Identifier acc in let rec scan_lit acc = - let nc = Stream.read_char stm in - if Stream.is_digit nc + let nc = Charstream.read_char stm in + if Charstream.is_digit nc then scan_lit (acc ^ (Char.escaped nc)) - else let _ = Stream.unread_char stm nc in + else let _ = Charstream.unread_char stm nc in Literal (int_of_string acc) in - if Stream.is_alpha c then scan_iden (Char.escaped c) - else if Stream.is_digit c then scan_lit (Char.escaped c) + if Charstream.is_alpha c then scan_iden (Char.escaped c) + else if Charstream.is_digit c then scan_lit (Char.escaped c) else if c='+' then Add else if c='-' then Sub else if c=',' then Comma else if c=';' then Semicolon else if c='(' then LeftParen else if c=')' then RightParen - else if c=':' && Stream.read_char stm = '=' then Assign + else if c=':' && Charstream.read_char stm = '=' then Assign else syntax_error s "Could not identify token" + + +let new_scanner stm = { last_token=None; stm=stm } + +let match_next s = + match s.last_token with + | None -> let _ = skip_blank_chars s.stm in scan s + | Some tn -> s.last_token <- None; tn + +let match_token s t = match_next s = t + +let next_token s = + match s.last_token with + | None -> + (skip_blank_chars s.stm; + let t = scan s in + s.last_token <- Some t; t) + | Some t -> t