diff --git a/src/passes/1-parser/ligodity.ml b/src/passes/1-parser/ligodity.ml index b56d63ea2..a520ca0e5 100644 --- a/src/passes/1-parser/ligodity.ml +++ b/src/passes/1-parser/ligodity.ml @@ -1,26 +1,27 @@ open Trace -open Parser_ligodity + module Parser = Parser_ligodity.Parser module AST = Parser_ligodity.AST +module ParserLog = Parser_ligodity.ParserLog +module LexToken = Parser_ligodity.LexToken +module Lexer = Lexer.Make(LexToken) let parse_file (source: string) : AST.t result = - (* let pp_input = - * let prefix = Filename.(source |> basename |> remove_extension) - * and suffix = ".pp.ligo" - * in prefix ^ suffix in *) - - (* let cpp_cmd = Printf.sprintf "cpp -traditional-cpp %s > %s" - * source pp_input in - * let%bind () = sys_command cpp_cmd in *) - let pp_input = - source - in + let prefix = Filename.(source |> basename |> remove_extension) + and suffix = ".pp.mligo" + in prefix ^ suffix in + + let cpp_cmd = Printf.sprintf "cpp -traditional-cpp %s > %s" + source pp_input in + let%bind () = sys_command cpp_cmd in + let%bind channel = generic_try (simple_error "error opening file") @@ (fun () -> open_in pp_input) in let lexbuf = Lexing.from_channel channel in - let read = Lexer.get_token in + let Lexer.{read ; close ; _} = + Lexer.open_token_stream None in specific_try (function | Parser.Error -> ( let start = Lexing.lexeme_start_p lexbuf in @@ -34,19 +35,6 @@ let parse_file (source: string) : AST.t result = in simple_error str ) - | Lexer.Error err -> ( - let start = Lexing.lexeme_start_p lexbuf in - let end_ = Lexing.lexeme_end_p lexbuf in - let str = Format.sprintf - "Lexer error (%s) at \"%s\" from (%d, %d) to (%d, %d). In file \"%s|%s\"\n" - (err.value) - (Lexing.lexeme lexbuf) - start.pos_lnum (start.pos_cnum - start.pos_bol) - end_.pos_lnum (end_.pos_cnum - end_.pos_bol) - start.pos_fname source - in - simple_error str - ) | exn -> let start = Lexing.lexeme_start_p lexbuf in let end_ = Lexing.lexeme_end_p lexbuf in @@ -59,13 +47,17 @@ let parse_file (source: string) : AST.t result = start.pos_fname source in simple_error str - ) @@ (fun () -> Parser.program read lexbuf) >>? fun raw -> + ) @@ (fun () -> + let raw = Parser.contract read lexbuf in + close () ; + raw + ) >>? fun raw -> ok raw let parse_string (s:string) : AST.t result = - let lexbuf = Lexing.from_string s in - let read = Lexer.get_token in + let Lexer.{read ; close ; _} = + Lexer.open_token_stream None in specific_try (function | Parser.Error -> ( let start = Lexing.lexeme_start_p lexbuf in @@ -78,12 +70,17 @@ let parse_string (s:string) : AST.t result = simple_error str ) | _ -> simple_error "unrecognized parse_ error" - ) @@ (fun () -> Parser.program read lexbuf) >>? fun raw -> + ) @@ (fun () -> + let raw = Parser.contract read lexbuf in + close () ; + raw + ) >>? fun raw -> ok raw let parse_expression (s:string) : AST.expr result = let lexbuf = Lexing.from_string s in - let read = Lexer.get_token in + let Lexer.{read ; close; _} = + Lexer.open_token_stream None in specific_try (function | Parser.Error -> ( let start = Lexing.lexeme_start_p lexbuf in @@ -107,5 +104,9 @@ let parse_expression (s:string) : AST.expr result = start.pos_fname s in simple_error str - ) @@ (fun () -> Parser.interactive_expr read lexbuf) >>? fun raw -> + ) @@ (fun () -> + let raw = Parser.interactive_expr read lexbuf in + close () ; + raw + ) >>? fun raw -> ok raw diff --git a/src/passes/1-parser/ligodity/.links b/src/passes/1-parser/ligodity/.links index 8af33d655..1f30004d4 100644 --- a/src/passes/1-parser/ligodity/.links +++ b/src/passes/1-parser/ligodity/.links @@ -4,4 +4,18 @@ $HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli $HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli $HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml +$HOME/git/ligo/src/parser/shared/Lexer.mli +$HOME/git/ligo/src/parser/shared/Lexer.mll +$HOME/git/ligo/src/parser/shared/Error.mli +$HOME/git/ligo/src/parser/shared/EvalOpt.ml +$HOME/git/ligo/src/parser/shared/EvalOpt.mli +$HOME/git/ligo/src/parser/shared/FQueue.ml +$HOME/git/ligo/src/parser/shared/FQueue.mli +$HOME/git/ligo/src/parser/shared/LexerLog.mli +$HOME/git/ligo/src/parser/shared/LexerLog.ml +$HOME/git/ligo/src/parser/shared/Markup.ml +$HOME/git/ligo/src/parser/shared/Markup.mli +$HOME/git/ligo/src/parser/shared/Utils.mli +$HOME/git/ligo/src/parser/shared/Utils.ml +$HOME/git/ligo/src/parser/shared/Version.ml Stubs/Simple_utils.ml diff --git a/src/passes/1-parser/ligodity/AST.ml b/src/passes/1-parser/ligodity/AST.ml index aecb8c277..ae4729117 100644 --- a/src/passes/1-parser/ligodity/AST.ml +++ b/src/passes/1-parser/ligodity/AST.ml @@ -4,6 +4,15 @@ type 'a reg = 'a Region.reg +let rec last to_region = function + [] -> Region.ghost +| [x] -> to_region x +| _::t -> last to_region t + +let nsepseq_to_region to_region (hd,tl) = + let reg (_, item) = to_region item in + Region.cover (to_region hd) (last reg tl) + (* Keywords of OCaml *) type keyword = Region.t @@ -136,7 +145,7 @@ and type_expr = | TSum of (variant reg, vbar) Utils.nsepseq reg | TRecord of record_type | TApp of (type_constr * type_tuple) reg -| TFun of (type_expr * arrow * type_expr) reg +| TFun of (type_expr * arrow * type_expr) reg | TPar of type_expr par reg | TAlias of variable @@ -344,8 +353,6 @@ and conditional = { (* Projecting regions of the input source code *) -let sprintf = Printf.sprintf - let type_expr_to_region = function TProd {region; _} | TSum {region; _} @@ -406,358 +413,6 @@ let expr_to_region = function | ESeq {region; _} | ERecord {region; _} | EConstr {region; _} -> region -(* Printing the tokens with their source locations *) - -let print_nsepseq sep print (head,tail) = - let print_aux ((sep_reg:Region.t), item) = - Printf.printf "%s: %s\n" (sep_reg#compact `Byte) sep; - print item - in print head; List.iter print_aux tail - -let print_sepseq sep print = function - None -> () -| Some seq -> print_nsepseq sep print seq - -let print_csv print = print_nsepseq "," print - -let print_token (reg: Region.t) conc = - Printf.printf "%s: %s\n" (reg#compact `Byte) conc - -let print_var Region.{region; value} = - Printf.printf "%s: Ident %s\n" (region#compact `Byte) value - -let print_uident Region.{region; value} = - Printf.printf "%s: Uident %s\n" (region#compact `Byte) value - -let print_str Region.{region; value} = - Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value - -let print_bytes Region.{region; value=lexeme, abstract} = - Printf.printf "%s: Bytes (\"%s\", \"0x%s\")\n" - (region#compact `Byte) lexeme (Hex.to_string abstract) - -let rec print_tokens {decl;eof} = - Utils.nseq_iter print_statement decl; print_token eof "EOF" - -and print_statement = function - Let {value=kwd_let, let_binding; _} -> - print_token kwd_let "let"; - print_let_binding let_binding -| LetEntry {value=kwd_let_entry, let_binding; _} -> - print_token kwd_let_entry "let%entry"; - print_let_binding let_binding -| TypeDecl {value={kwd_type; name; eq; type_expr}; _} -> - print_token kwd_type "type"; - print_var name; - print_token eq "="; - print_type_expr type_expr - -and print_type_expr = function - TProd prod -> print_cartesian prod -| TSum {value; _} -> print_nsepseq "|" print_variant value -| TRecord t -> print_record_type t -| TApp app -> print_type_app app -| TPar par -> print_type_par par -| TAlias var -> print_var var -| TFun t -> print_fun_type t - -and print_fun_type {value; _} = - let domain, arrow, range = value in - print_type_expr domain; - print_token arrow "->"; - print_type_expr range - -and print_type_app {value; _} = - let type_constr, type_tuple = value in - print_type_tuple type_tuple; - print_var type_constr - -and print_type_tuple {value; _} = - let {lpar; inside; rpar} = value in - print_token lpar "("; - print_nsepseq "," print_type_expr inside; - print_token rpar ")" - -and print_type_par {value={lpar;inside=t;rpar}; _} = - print_token lpar "("; - print_type_expr t; - print_token rpar ")" - -and print_projection node = - let {struct_name; selector; field_path} = node in - print_var struct_name; - print_token selector "."; - print_nsepseq "." print_selection field_path - -and print_selection = function - FieldName id -> print_var id -| Component {value; _} -> - let {lpar; inside; rpar} = value in - let Region.{value=lexeme,z; region} = inside in - print_token lpar "("; - print_token region - (sprintf "Int %s (%s)" lexeme (Z.to_string z)); - print_token rpar ")" - -and print_cartesian Region.{value;_} = - print_nsepseq "*" print_type_expr value - -and print_variant {value = {constr; args}; _} = - print_uident constr; - match args with - None -> () - | Some (kwd_of, cartesian) -> - print_token kwd_of "of"; - print_cartesian cartesian - -and print_record_type record_type = - print_injection print_field_decl record_type - -and print_field_decl {value; _} = - let {field_name; colon; field_type} = value - in print_var field_name; - print_token colon ":"; - print_type_expr field_type - -and print_injection : - 'a.('a -> unit) -> 'a injection reg -> unit = - fun print {value; _} -> - let {opening; elements; terminator; closing} = value in - print_opening opening; - print_sepseq ";" print elements; - print_terminator terminator; - print_closing closing - -and print_opening = function - Begin region -> print_token region "begin" -| With region -> print_token region "with" -| LBrace region -> print_token region "{" -| LBracket region -> print_token region "[" - -and print_closing = function - End region -> print_token region "end" -| RBrace region -> print_token region "}" -| RBracket region -> print_token region "]" - -and print_terminator = function - Some semi -> print_token semi ";" -| None -> () - -and print_let_binding {bindings; lhs_type; eq; let_rhs} = - List.iter print_pattern bindings; - (match lhs_type with - None -> () - | Some (colon, type_expr) -> - print_token colon ":"; - print_type_expr type_expr); - (print_token eq "="; print_expr let_rhs) - -and print_pattern = function - PTuple {value=patterns;_} -> print_csv print_pattern patterns -| PList p -> print_list_pattern p -| PVar {region; value} -> - Printf.printf "%s: PVar %s\n" (region#compact `Byte) value -| PUnit {value=lpar,rpar; _} -> - print_token lpar "("; print_token rpar ")" -| PInt {region; value=lex,z} -> - print_token region (sprintf "PInt %s (%s)" lex (Z.to_string z)) -| PTrue kwd_true -> print_token kwd_true "true" -| PFalse kwd_false -> print_token kwd_false "false" -| PString s -> print_str s -| PWild wild -> print_token wild "_" -| PPar {value={lpar;inside=p;rpar}; _} -> - print_token lpar "("; print_pattern p; print_token rpar ")" -| PConstr p -> print_constr_pattern p -| PRecord r -> print_record_pattern r -| PTyped t -> print_typed_pattern t - -and print_list_pattern = function - Sugar p -> print_injection print_pattern p -| PCons p -> print_raw p - -and print_raw {value=p1,c,p2; _} = - print_pattern p1; print_token c "::"; print_pattern p2 - -and print_typed_pattern {value; _} = - let {pattern; colon; type_expr} = value in - print_pattern pattern; - print_token colon ":"; - print_type_expr type_expr - -and print_record_pattern record_pattern = - print_injection print_field_pattern record_pattern - -and print_field_pattern {value; _} = - let {field_name; eq; pattern} = value in - print_var field_name; - print_token eq "="; - print_pattern pattern - -and print_constr_pattern {value=constr, p_opt; _} = - print_uident constr; - match p_opt with - None -> () - | Some pattern -> print_pattern pattern - -and print_expr = function - ELetIn {value;_} -> print_let_in value -| ECond cond -> print_conditional cond -| ETuple {value;_} -> print_csv print_expr value -| ECase {value;_} -> print_match_expr value -| EFun e -> print_fun_expr e - -| EAnnot e -> print_annot_expr e -| ELogic e -> print_logic_expr e -| EArith e -> print_arith_expr e -| EString e -> print_string_expr e - -| ECall {value=f,l; _} -> - print_expr f; Utils.nseq_iter print_expr l -| EVar v -> print_var v -| EProj p -> print_projection p.value -| EUnit {value=lpar,rpar; _} -> - print_token lpar "("; print_token rpar ")" -| EBytes b -> print_bytes b -| EPar {value={lpar;inside=e;rpar}; _} -> - print_token lpar "("; print_expr e; print_token rpar ")" -| EList e -> print_list_expr e -| ESeq seq -> print_sequence seq -| ERecord e -> print_record_expr e -| EConstr {value=constr,None; _} -> print_uident constr -| EConstr {value=(constr, Some arg); _} -> - print_uident constr; print_expr arg - -and print_annot_expr {value=e,t; _} = - print_expr e; - print_token Region.ghost ":"; - print_type_expr t - -and print_list_expr = function - Cons {value={arg1;op;arg2}; _} -> - print_expr arg1; - print_token op "::"; - print_expr arg2 -| List e -> print_injection print_expr e -(*| Append {value=e1,append,e2; _} -> - print_expr e1; - print_token append "@"; - print_expr e2 *) - -and print_arith_expr = function - Add {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "+"; print_expr arg2 -| Sub {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "-"; print_expr arg2 -| Mult {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "*"; print_expr arg2 -| Div {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "/"; print_expr arg2 -| Mod {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "mod"; print_expr arg2 -| Neg {value={op;arg}; _} -> print_token op "-"; print_expr arg -| Int {region; value=lex,z} -> - print_token region (sprintf "Int %s (%s)" lex (Z.to_string z)) -| Mtz {region; value=lex,z} -> - print_token region (sprintf "Mtz %s (%s)" lex (Z.to_string z)) -| Nat {region; value=lex,z} -> - print_token region (sprintf "Nat %s (%s)" lex (Z.to_string z)) - -and print_string_expr = function - Cat {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "^"; print_expr arg2 -| String s -> print_str s - -and print_logic_expr = function - BoolExpr e -> print_bool_expr e -| CompExpr e -> print_comp_expr e - -and print_bool_expr = function - Or {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "||"; print_expr arg2 -| And {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "&&"; print_expr arg2 -| Not {value={op;arg}; _} -> print_token op "not"; print_expr arg -| True kwd_true -> print_token kwd_true "true" -| False kwd_false -> print_token kwd_false "false" - -and print_comp_expr = function - Lt {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "<"; print_expr arg2 -| Leq {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "<="; print_expr arg2 -| Gt {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op ">"; print_expr arg2 -| Geq {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op ">="; print_expr arg2 -| Neq {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "<>"; print_expr arg2 -| Equal {value={arg1;op;arg2}; _} -> - print_expr arg1; print_token op "="; print_expr arg2 - -and print_record_expr e = - print_injection print_field_assign e - -and print_field_assign {value; _} = - let {field_name; assignment; field_expr} = value in - print_var field_name; - print_token assignment "="; - print_expr field_expr - -and print_sequence seq = print_injection print_expr seq - -and print_match_expr expr = - let {kwd_match; expr; opening; - lead_vbar; cases; closing} = expr in - print_token kwd_match "match"; - print_expr expr; - print_opening opening; - print_token_opt lead_vbar "|"; - print_cases cases; - print_closing closing - -and print_token_opt = function - None -> fun _ -> () -| Some region -> print_token region - -and print_cases {value; _} = - print_nsepseq "|" print_case_clause value - -and print_case_clause {value; _} = - let {pattern; arrow; rhs} = value in - print_pattern pattern; - print_token arrow "->"; - print_expr rhs - -and print_let_in (bind: let_in) = - let {kwd_let; binding; kwd_in; body} = bind in - print_token kwd_let "let"; - print_let_binding binding; - print_token kwd_in "in"; - print_expr body - -and print_fun_expr {value; _} = - let {kwd_fun; params; p_annot; arrow; body} = value in - print_token kwd_fun "fun"; - (match p_annot with - None -> List.iter print_pattern params - | Some (colon, type_expr) -> - print_token colon ":"; - print_type_expr type_expr); - print_token arrow "->"; - print_expr body - -and print_conditional {value; _} = - let open Region in - let {kwd_if; test; kwd_then; ifso; kwd_else; ifnot} = value - in print_token ghost "("; - print_token kwd_if "if"; - print_expr test; - print_token kwd_then "then"; - print_expr ifso; - print_token kwd_else "else"; - print_expr ifnot; - print_token ghost ")" - let rec unpar = function EPar {value={inside=expr;_}; _} -> unpar expr | e -> e diff --git a/src/passes/1-parser/ligodity/AST.mli b/src/passes/1-parser/ligodity/AST.mli index f782ebd10..3e4001536 100644 --- a/src/passes/1-parser/ligodity/AST.mli +++ b/src/passes/1-parser/ligodity/AST.mli @@ -1,6 +1,8 @@ +(* Abstract Syntax Tree (AST) for Ligodity *) + [@@@warning "-30"] -(* Abstract Syntax Tree (AST) for Mini-ML *) +open Utils (* Regions @@ -15,6 +17,9 @@ type 'a reg = 'a Region.reg +val last : ('a -> Region.t) -> 'a list -> Region.t +val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t + (* Some keywords of OCaml *) type keyword = Region.t @@ -457,16 +462,6 @@ val norm : ?reg:(Region.t * kwd_fun) -> pattern Utils.nseq -> sep -> expr -> fun let f l = let n = l in n *) -(* Printing the tokens reconstructed from the AST. This is very useful - for debugging, as the output of [print_token ast] can be textually - compared to that of [Lexer.trace] (see module [LexerMain]). The - optional parameter [undo] is bound to [true] if the caller wants - the AST to be unparsed before printing (those nodes that have been - normalised with function [norm_let] and [norm_fun]). *) - -val print_tokens : (*?undo:bool ->*) ast -> unit - - (* Projecting regions from sundry nodes of the AST. See the first comment at the beginning of this file. *) @@ -481,9 +476,3 @@ val type_expr_to_region : type_expr -> Region.t contains. *) val unpar : expr -> expr - -(* TODO *) - -val print_projection : projection -> unit -val print_pattern : pattern -> unit -val print_expr : expr -> unit diff --git a/src/passes/1-parser/ligodity/EvalOpt.ml b/src/passes/1-parser/ligodity/EvalOpt.ml deleted file mode 100644 index f2d6d280c..000000000 --- a/src/passes/1-parser/ligodity/EvalOpt.ml +++ /dev/null @@ -1,123 +0,0 @@ -(* Parsing the command-line option for CameLIGO *) - -type options = { - input : string option; - libs : string list; - verbose : Utils.String.Set.t -} - -let abort msg = - Utils.highlight (Printf.sprintf "Command-line error: %s" msg); exit 1 - -let printf = Printf.printf -let sprintf = Printf.sprintf -let print = print_endline - -(* Help *) - -let help () = - let file = Filename.basename Sys.argv.(0) in - printf "Usage: %s [