From fddce3257a9fc8894bdd4f1308dc33b7412c44ab Mon Sep 17 00:00:00 2001 From: Christian Rinderknecht Date: Sun, 4 Aug 2019 18:03:54 +0200 Subject: [PATCH] First draft of the PascaLIGO parser using streams. --- src/parser/pascaligo/.SParser.ml.tag | 2 + src/parser/pascaligo/.SParserMain.tag | 0 src/parser/pascaligo/SParser.ml | 377 ++++++++++++++++++++++++++ src/parser/pascaligo/SParserMain.ml | 124 +++++++++ 4 files changed, 503 insertions(+) create mode 100644 src/parser/pascaligo/.SParser.ml.tag create mode 100644 src/parser/pascaligo/.SParserMain.tag create mode 100644 src/parser/pascaligo/SParser.ml create mode 100644 src/parser/pascaligo/SParserMain.ml diff --git a/src/parser/pascaligo/.SParser.ml.tag b/src/parser/pascaligo/.SParser.ml.tag new file mode 100644 index 000000000..ea961256f --- /dev/null +++ b/src/parser/pascaligo/.SParser.ml.tag @@ -0,0 +1,2 @@ +ocamldep: -pp camlp5o +ocamlc: -pp camlp5o \ No newline at end of file diff --git a/src/parser/pascaligo/.SParserMain.tag b/src/parser/pascaligo/.SParserMain.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/parser/pascaligo/SParser.ml b/src/parser/pascaligo/SParser.ml new file mode 100644 index 000000000..94fb6ca81 --- /dev/null +++ b/src/parser/pascaligo/SParser.ml @@ -0,0 +1,377 @@ +open LexToken + +let semi = parser [< 'SEMI _ >] -> () +let vbar = parser [< 'VBAR _ >] -> () +let end_ = parser [< 'End _ >] -> () +let rbracket = parser [< 'RBRACKET _ >] -> () +let times = parser [< 'TIMES _ >] -> () +let arrow = parser [< 'ARROW _ >] -> fun _ _ -> () +let comma = parser [< 'COMMA _ >] -> () +let rbrace = parser [< 'RBRACE _ >] -> () +let equal = parser [< 'EQUAL _ >] -> () +let ass = parser [< 'ASS _ >] -> () +let dot = parser [< 'DOT _ >] -> () +let down = parser [< 'Down _ >] -> () +let or_ = parser [< 'Or _ >] -> fun _ _ -> () +let and_ = parser [< 'And _ >] -> fun _ _ -> () +let cat = parser [< 'CAT _ >] -> fun _ _ -> () +let cons = parser [< 'CONS _ >] -> () +let cons' = parser [< 'CONS _ >] -> fun _ _ -> () +let list = parser [< 'List _ >] -> () +let map = parser [< 'Map _ >] -> () +let set = parser [< 'Set _ >] -> () + +let left_assoc item op = + let rec op_elem i = parser + [< f=op; j=item; r = op_elem (f i j) ?! >] -> r + | [< >] -> i + in parser [< i=item; r = op_elem i >] -> r + +let rec right_assoc item op = parser + [< i=item >] -> i +| [< i=item; f=op; j = right_assoc item op ?! >] -> f i j + +let opt item = parser + [< i=item >] -> Some i +| [< >] -> None + +let rec series item sep term = parser + [< i=item; l = after_item item sep term >] -> i,l + +and after_item item sep term = parser + [< t=term >] -> + [], None, t +| [< s=sep; io = item_or_closing item sep term >] -> + match io with + `Some (item, items, term, closing) -> + (s, item)::items, term, closing + | `Closing closing -> + [], Some s, closing + +and item_or_closing item sep term = parser + [< t=term >] -> + `Closing t +| [< s = series item sep term >] -> + let item, (items, term, closing) = s + in `Some (item, items, term, closing) + +(* Compound constructs *) + +let par item = parser + [< 'LPAR _; _=item; 'RPAR _ >] -> () + +let brackets item = parser + [< 'LBRACKET _; _=item; 'RBRACKET _ >] -> () + +(* Sequences *) + +(* Possibly empty sequence of items *) + +let rec seq item = parser + [< h,t = nseq item >] -> h::t + +(* Non-empty sequence of items *) + +and nseq item = parser + [< i=item; l = seq item >] -> i,l + +(* Non-empty separated sequence of items *) + +and nsepseq item sep = parser + [< i=item >] -> i, [] +| [< i=item; s=sep; h,t = nsepseq item sep ?! >] -> i, (s,h)::t + +(* Possibly empty separated sequence of items *) + +and sepseq item sep = opt (nsepseq item sep) + +(* Main *) + +let rec contract = parser + [< _ = nseq declaration; 'EOF _ >] -> () + +and declaration = parser + [< _ = type_decl; _ = opt semi >] -> () +| [< _ = const_decl; _ = opt semi >] -> () +| [< _ = lambda_decl; _ = opt semi >] -> () + +and type_decl = parser + [< 'Type _; 'Ident _; 'Is _; _=type_expr >] -> () + +and type_expr = parser + [< _=cartesian >] -> () +| [< _=nsepseq variant vbar >] -> () +| [< 'VBAR _; _ = nsepseq variant vbar >] -> () +| [< 'Record _; _ = series field_decl semi end_ >] -> () +| [< 'Record _; 'LBRACKET _; _ = series field_decl semi rbracket >] -> () + +and variant = parser + [< 'Constr _; 'Of _; _=cartesian >] -> () +| [< 'Constr _ >] -> () + +and cartesian = parser + [< _ = nsepseq function_type times >] -> () + +and function_type strm = right_assoc core_type arrow strm + +and core_type = parser + [< 'Ident _ >] -> () +| [< 'Ident _; _=type_tuple >] -> () +| [< 'Map _; _=type_tuple >] -> () +| [< 'Set _; _ = par type_expr >] -> () +| [< 'List _; _ = par type_expr >] -> () +| [< _ = par type_expr >] -> () + +and type_tuple = parser + [< _ = par (nsepseq type_expr comma) >] -> () + +and field_decl = parser + [< 'Ident _; 'COLON _; _=type_expr >] -> () + +(* Function and procedure declarations *) + +and lambda_decl = parser + [< _=fun_decl >] -> () +| [< _=proc_decl >] -> () +| [< _=entry_decl >] -> () + +and fun_decl = parser + [< 'Function _; 'Ident _; _=parameters; 'COLON _; + _=type_expr; 'Is _; _ = seq local_decl; _=block; + 'With _; _=expr >] -> () + +and entry_decl = parser + [< 'Entrypoint _; 'Ident _; _=entry_params; + 'COLON _; _ = type_expr; 'Is _; _ = seq local_decl; + _=block; 'With _; _=expr >] -> () + +and entry_params = parser + [< p = par (nsepseq entry_param_decl semi) >] -> p + +and proc_decl = parser + [< 'Procedure _; 'Ident _; _parameters; 'Is _; + _ = seq local_decl; _=block >] -> () + +and parameters = parser + [< p = par (nsepseq param_decl semi) >] -> p + +and param_decl = parser + [< 'Var _; 'Ident _; 'COLON _; _=param_type >] -> () +| [< 'Const _; 'Ident _; 'COLON _; _=param_type >] -> () + +and entry_param_decl = parser + [< _ = param_decl >] -> () +| [< 'Storage _; 'Ident _; 'COLON _; _=param_type >] -> () + +and param_type = parser [< c = cartesian >] -> c + +and block = parser + [< 'Begin _; _ = series statement semi end_ >] -> () +| [< 'Block _; 'LBRACE _; _ = series statement semi rbrace >] -> () + +and statement = parser + [< _=instruction >] -> () +| [< _=data_decl >] -> () + +and data_decl = parser + [< _=const_decl >] -> () +| [< _=var_decl >] -> () + +and const_decl = parser + [< 'Const _; _ = unqualified_decl equal >] -> () + +and var_decl = parser + [< 'Var _; _ = unqualified_decl ass >] -> () + +and local_decl = parser + [< _=fun_decl; _ = opt semi >] -> () +| [< _=proc_decl; _ = opt semi >] -> () +| [< _=data_decl; _ = opt semi >] -> () + +and unqualified_decl op = parser + [< 'Ident _; 'COLON _; _=type_expr; _=op; _=expr >] -> () + +and instruction = parser + [< _=block >] -> () +| [< 'If _; _=expr; 'Then _; _=if_clause; _ = opt semi; + 'Else _; _=if_clause >] -> () +| [< _ = case instruction >] -> () +| [< 'Ident _; _=arguments >] -> () +| [< 'Ident _; 'ASS _; _=expr >] -> () +| [< 'Ident _; _ = brackets expr; 'ASS _; _=expr >] -> () +| [< 'Ident _; 'DOT _; _ = nsepseq selection dot; + _ = opt (brackets expr); 'ASS _; _=expr >] -> () +| [< _=loop >] -> () +| [< 'Fail _; _=expr >] -> () +| [< 'Skip _ >] -> () +| [< 'Patch _; _=expr; 'From _; 'Map _; _=path >] -> () +| [< 'Patch _; _=expr; 'From _; 'Set _; _=path >] -> () + +and path = parser + [< 'Ident _ >] -> () +| [< 'Ident _; 'DOT _; _ = nsepseq selection dot >] -> () + +and injection kind element = parser + [< _=kind; _ = series element semi end_ >] -> () +| [< _=kind; 'End _ >] -> () +| [< _=kind; 'LBRACKET _; _ = bracketed element >] -> () + +and bracketed element = parser + [< _ = series element semi rbracket >] -> () +| [< 'RBRACKET _ >] -> () + +and binding = parser + [< _=expr; 'ARROW _; _=expr >] -> () + +and if_clause = parser + [< _=instruction >] -> () +| [< 'LBRACE _; _ = series statement comma rbrace >] -> () + +and case rhs = parser + [< 'Case _; _=expr; 'Of _; _ = cases rhs; 'End _ >] -> () +| [< 'Case _; _=expr; 'Of _; 'LBRACKET _; _ = cases rhs; + 'RBRACKET _ >] -> () + +and cases rhs = parser + [< _ = nsepseq (case_clause rhs) vbar >] -> () +| [< 'VBAR _; _ = nsepseq (case_clause rhs) vbar >] -> () + +and case_clause rhs = parser + [< _=pattern; 'ARROW _; _=rhs >] -> () + +and loop = parser + [< 'While _; _=expr; _=block >] -> () +| [< 'For _; 'Ident _; 'ASS _; _=expr; _ = opt down; + 'To _; _=expr; _ = opt step_clause >] -> () +| [< 'For _; 'Ident _; 'In _; _=expr; _=block >] -> () +| [< 'For _; 'Ident _; 'ARROW _; 'Ident _; 'In _; + _=expr; _=block >] -> () + +and step_clause = parser + [< 'Step _; _=expr >] -> () + +(* Expressions *) + +and interactive = parser + [< _=expr; 'EOF _ >] -> () + +and expr = parser + [< _ = case expr >] -> () +| [< _=disj_expr >] -> () + +and disj_expr strm = left_assoc conj_expr or_ strm + +and conj_expr strm = left_assoc set_membership and_ strm + +and set_membership = parser + [< _ = core_expr; 'Contains _; _=set_membership >] -> () +| [< _ = comp_expr >] -> () + +and comp_expr strm = left_assoc cat_expr op_comp strm + +and op_comp = parser + [< 'LT _ >] -> fun _ _ -> () +| [< 'LEQ _ >] -> fun _ _ -> () +| [< 'GT _ >] -> fun _ _ -> () +| [< 'GEQ _ >] -> fun _ _ -> () +| [< 'EQUAL _ >] -> fun _ _ -> () +| [< 'NEQ _ >] -> fun _ _ -> () + +and cat_expr strm = right_assoc cons_expr cat strm + +and cons_expr strm = left_assoc add_expr cons' strm + +and add_expr strm = left_assoc mult_expr add_op strm + +and add_op = parser + [< 'PLUS _ >] -> fun _ _ -> () +| [< 'MINUS _ >] -> fun _ _ -> () + +and mult_expr strm = left_assoc unary_expr mult_op strm + +and mult_op = parser + [< 'TIMES _ >] -> fun _ _ -> () +| [< 'SLASH _ >] -> fun _ _ -> () +| [< 'Mod _ >] -> fun _ _ -> () + +and unary_expr = parser + [< 'MINUS _; _=core_expr >] -> () +| [< 'Not _; _=core_expr >] -> () +| [< _=core_expr >] -> () + +and core_expr = parser + [< 'Int _ >] -> () +| [< 'Nat _ >] -> () +| [< 'Mtz _ >] -> () +| [< 'Ident _ >] -> () +| [< 'Ident _; _ = brackets expr >] -> () +| [< 'Ident _; 'DOT _; _ = nsepseq selection dot; + _ = opt (brackets expr) >] -> () +| [< 'Ident _; _=arguments >] -> () +| [< 'String _ >] -> () +| [< 'Bytes _ >] -> () +| [< 'C_False _ >] -> () +| [< 'C_True _ >] -> () +| [< 'C_Unit _ >] -> () +| [< 'C_None _ >] -> () +| [< 'C_Some _; _=arguments >] -> () +| [< 'Constr _; _ = opt arguments >] -> () +| [< _ = par paren_expr >] -> () +| [< _ = injection list expr >] -> () +| [< 'Nil _ >] -> () +| [< _=structure >] -> () + +and paren_expr = parser + [< _=disj_expr; 'COLON _; _=type_expr >] -> () +| [< _=disj_expr >] -> () +| [< _=disj_expr; 'COMMA _; _ = nsepseq expr comma >] -> () +| [< _ = case expr >] -> () +| [< _ = case expr; 'COMMA _; _ = nsepseq expr comma >] -> () + +and structure = parser + [< _ = injection map binding >] -> () +| [< _ = injection set expr >] -> () +| [< _=record_expr >] -> () + +and selection = parser + [< 'Ident _ >] -> () +| [< 'Int _ >] -> () + +and record_expr = parser + [< 'Record _; _ = series field_assignment semi end_ >] -> () +| [< 'Record _; 'LBRACKET _; + _ = series field_assignment semi rbracket >] -> () + +and field_assignment = parser + [< 'Ident _; 'EQUAL _; _=expr >] -> () + +and arguments = parser + [< _ = par (nsepseq expr comma) >] -> () + +(* Patterns *) + +and pattern = parser + [< _ = nsepseq core_pattern cons >] -> () + +and core_pattern = parser + [< 'Ident _ >] -> () +| [< 'WILD _ >] -> () +| [< 'Int _ >] -> () +| [< 'String _ >] -> () +| [< 'C_Unit _ >] -> () +| [< 'C_False _ >] -> () +| [< 'C_True _ >] -> () +| [< 'C_None _ >] -> () +| [< 'C_Some _; _ = par core_pattern >] -> () +| [< 'Constr _; _ = opt tuple_pattern >] -> () +| [< _ = injection list core_pattern >] -> () +| [< 'Nil _ >] -> () +| [< _ = par paren_pattern >] -> () + +and paren_pattern = parser + [< _=core_pattern; 'CONS _; _=pattern >] -> () +| [< _=core_pattern >] -> () +| [< _=core_pattern; 'COMMA _; _ = nsepseq core_pattern comma >] -> () + +and tuple_pattern = parser + [< _ = par (nsepseq core_pattern comma) >] -> () diff --git a/src/parser/pascaligo/SParserMain.ml b/src/parser/pascaligo/SParserMain.ml new file mode 100644 index 000000000..d6b91d0f1 --- /dev/null +++ b/src/parser/pascaligo/SParserMain.ml @@ -0,0 +1,124 @@ +(* Driver for the parser of PascaLIGO *) + +open SParser (* TEMPORARY *) + +(* Error printing and exception tracing *) + +let () = Printexc.record_backtrace true + +(* Reading the command-line options *) + +let options = EvalOpt.read () + +open EvalOpt + +(* Auxiliary functions *) + +let sprintf = Printf.sprintf + +(* Extracting the input file *) + +let file = + match options.input with + None | Some "-" -> false + | Some _ -> true + +(* Error printing and exception tracing *) + +let () = Printexc.record_backtrace true + +let external_ text = + Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; + +type Error.t += ParseError + +let error_to_string = function + ParseError -> "Syntax error.\n" +| _ -> assert false + +let print_error ?(offsets=true) mode Region.{region; value} ~file = + let msg = error_to_string value in + let reg = region#to_string ~file ~offsets mode in + Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + +(* Path for CPP inclusions (#include) *) + +let lib_path = + match options.libs with + [] -> "" + | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + in List.fold_right mk_I libs "" + +(* Preprocessing the input source and opening the input channels *) + +let prefix = + match options.input with + None | Some "-" -> "temp" + | Some file -> Filename.(file |> basename |> remove_extension) + +let suffix = ".pp.ligo" + +let pp_input = + if Utils.String.Set.mem "cpp" options.verbose + then prefix ^ suffix + else let pp_input, pp_out = Filename.open_temp_file prefix suffix + in close_out pp_out; pp_input + +let cpp_cmd = + match options.input with + None | Some "-" -> + Printf.sprintf "cpp -traditional-cpp%s - > %s" + lib_path pp_input + | Some file -> + Printf.sprintf "cpp -traditional-cpp%s %s > %s" + lib_path file pp_input + +let () = + if Utils.String.Set.mem "cpp" options.verbose + then Printf.eprintf "%s\n%!" cpp_cmd; + if Sys.command cpp_cmd <> 0 then + external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + +(* Instanciating the lexer *) + +module Lexer = Lexer.Make (LexToken) + +module Log = LexerLog.Make (Lexer) + +let Lexer.{read; buffer; get_pos; get_last; close} = + Lexer.open_token_stream (Some pp_input) + +and cout = stdout + +let log = Log.output_token ~offsets:options.offsets + options.mode options.cmd cout + +and close_all () = close (); close_out cout + +(* Tokeniser *) + +let tokeniser = read ~log + +(* Main *) + +let () = + try + let ast = Parser.contract tokeniser buffer in + if Utils.String.Set.mem "ast" options.verbose + then begin + ParserLog.offsets := options.offsets; + ParserLog.mode := options.mode; + ParserLog.print_tokens ast + end + with + Lexer.Error err -> + close_all (); + Lexer.print_error ~offsets:options.offsets + options.mode err ~file + | Parser.Error -> + let region = get_last () in + let error = Region.{region; value=ParseError} in + let () = close_all () in + print_error ~offsets:options.offsets + options.mode error ~file + | Sys_error msg -> Utils.highlight msg