From 94295fa281721222ee843b25ac54e0cb0543f0ad Mon Sep 17 00:00:00 2001 From: Benjamin Canou Date: Sat, 4 Nov 2017 00:16:05 +0100 Subject: [PATCH] Micheline: resilient parser for better error reporting --- .../embedded/alpha/client_proto_context.ml | 12 +- .../embedded/alpha/client_proto_programs.ml | 104 ++++--- .../embedded/alpha/client_proto_programs.mli | 3 +- .../embedded/alpha/michelson_v1_emacs.ml | 36 ++- .../alpha/michelson_v1_error_reporter.ml | 9 +- .../embedded/alpha/michelson_v1_parser.ml | 26 +- .../embedded/alpha/michelson_v1_parser.mli | 4 +- .../embedded/alpha/michelson_v1_printer.ml | 4 +- src/micheline/micheline_parser.ml | 265 +++++++++++------- src/micheline/micheline_parser.mli | 10 +- src/proto/alpha/michelson_v1_primitives.ml | 32 ++- src/proto/alpha/michelson_v1_primitives.mli | 3 +- 12 files changed, 311 insertions(+), 197 deletions(-) diff --git a/src/client/embedded/alpha/client_proto_context.ml b/src/client/embedded/alpha/client_proto_context.ml index c3a26a76f..9179544ed 100644 --- a/src/client/embedded/alpha/client_proto_context.ml +++ b/src/client/embedded/alpha/client_proto_context.ml @@ -40,13 +40,18 @@ let get_branch rpc_config block branch = Client_node_rpcs.Blocks.info rpc_config block >>=? fun { net_id ; hash } -> return (net_id, hash) +let parse_expression arg = + Lwt.return + (Micheline_parser.no_parsing_error + (Michelson_v1_parser.parse_expression arg)) + let transfer rpc_config block ?force ?branch ~source ~src_pk ~src_sk ~destination ?arg ~amount ~fee () = get_branch rpc_config block branch >>=? fun (net_id, branch) -> begin match arg with | Some arg -> - Lwt.return (Michelson_v1_parser.parse_expression arg) >>=? fun { expanded = arg } -> + parse_expression arg >>=? fun { expanded = arg } -> return (Some arg) | None -> return None end >>=? fun parameters -> @@ -106,7 +111,7 @@ let originate_contract rpc_config block ?force ?branch ~source ~src_pk ~src_sk ~manager_pkh ~balance ?delegatable ?delegatePubKey ~code ~init ~fee ~spendable () = - Lwt.return (Michelson_v1_parser.parse_expression init) >>=? fun { expanded = storage } -> + parse_expression init >>=? fun { expanded = storage } -> Client_proto_rpcs.Context.Contract.counter rpc_config block source >>=? fun pcounter -> let counter = Int32.succ pcounter in @@ -383,7 +388,8 @@ let commands () = combine with -init if the storage type is not unit" @@ stop) begin fun (fee, delegate, force, delegatable, spendable, init, no_print_source) - neu (_, manager) balance (_, source) { expanded = code } cctxt -> + neu (_, manager) balance (_, source) program cctxt -> + Lwt.return (Micheline_parser.no_parsing_error program) >>=? fun { expanded = code } -> check_contract cctxt neu >>=? fun () -> get_delegate_pkh cctxt delegate >>=? fun delegate -> get_manager cctxt source >>=? fun (_src_name, _src_pkh, src_pk, src_sk) -> diff --git a/src/client/embedded/alpha/client_proto_programs.ml b/src/client/embedded/alpha/client_proto_programs.ml index 1c8e99a86..ff5db0241 100644 --- a/src/client/embedded/alpha/client_proto_programs.ml +++ b/src/client/embedded/alpha/client_proto_programs.ml @@ -13,18 +13,15 @@ open Client_proto_args open Michelson_v1_printer module Program = Client_aliases.Alias (struct - type t = Michelson_v1_parser.parsed + type t = Michelson_v1_parser.parsed Micheline_parser.parsing_result let encoding = Data_encoding.conv - (fun { Michelson_v1_parser.source } -> source) - (fun source -> - match Michelson_v1_parser.parse_toplevel source with - | Ok parsed -> parsed - | Error _ -> Pervasives.failwith "could not decode Michelson program alias") + (fun ({ Michelson_v1_parser.source }, _) -> source) + (fun source -> Michelson_v1_parser.parse_toplevel source) Data_encoding.string let of_source _cctxt source = - Lwt.return (Michelson_v1_parser.parse_toplevel source) - let to_source _ { Michelson_v1_parser.source } = return source + return (Michelson_v1_parser.parse_toplevel source) + let to_source _ ({ Michelson_v1_parser.source }, _) = return source let name = "program" end) @@ -33,7 +30,7 @@ let group = title = "Commands for managing the record of known programs" } let data_parameter = - Cli_entries.parameter (fun _ data -> Lwt.return (Michelson_v1_parser.parse_expression data)) + Cli_entries.parameter (fun _ data -> return (Michelson_v1_parser.parse_expression data)) let commands () = let open Cli_entries in @@ -70,7 +67,9 @@ let commands () = @@ Program.fresh_alias_param @@ Program.source_param @@ stop) - (fun () name hash cctxt -> Program.add cctxt name hash) ; + (fun () name program cctxt -> + Lwt.return (Micheline_parser.no_parsing_error program) >>=? fun program -> + Program.add cctxt name (program, [])) ; command ~group ~desc: "forget a remembered program" no_options @@ -85,8 +84,8 @@ let commands () = @@ Program.alias_param @@ stop) (fun () (_, program) cctxt -> - Program.to_source cctxt program >>=? fun source -> - cctxt.message "%s\n" source >>= fun () -> + Lwt.return (Micheline_parser.no_parsing_error program) >>=? fun program -> + cctxt.message "%s\n" program.source >>= fun () -> return ()) ; command ~group ~desc: "ask the node to run a program" @@ -101,53 +100,62 @@ let commands () = data_parameter @@ stop) (fun (trace_stack, amount, no_print_source) program storage input cctxt -> + Lwt.return (Micheline_parser.no_parsing_error program) >>=? fun program -> + Lwt.return (Micheline_parser.no_parsing_error storage) >>=? fun storage -> + Lwt.return (Micheline_parser.no_parsing_error input) >>=? fun input -> let print_errors errs = cctxt.warning "%a" (Michelson_v1_error_reporter.report_errors ~details:false ~show_source: (not no_print_source) - ~parsed:program) errs >>= fun () -> + ~parsed: program) errs >>= fun () -> cctxt.error "error running program" >>= fun () -> return () in - if trace_stack then - Client_proto_rpcs.Helpers.trace_code cctxt.rpc_config - cctxt.config.block program.expanded (storage.expanded, input.expanded, amount) >>= function - | Ok (storage, output, trace) -> - cctxt.message - "@[@[storage@,%a@]@,\ - @[output@,%a@]@,@[trace@,%a@]@]@." - print_expr storage - print_expr output - (Format.pp_print_list - (fun ppf (loc, gas, stack) -> - Format.fprintf ppf - "- @[location: %d (remaining gas: %d)@,\ - [ @[%a ]@]@]" - loc gas - (Format.pp_print_list print_expr) - stack)) - trace >>= fun () -> - return () - | Error errs -> print_errors errs - else - Client_proto_rpcs.Helpers.run_code cctxt.rpc_config - cctxt.config.block program.expanded (storage.expanded, input.expanded, amount) >>= function - | Ok (storage, output) -> - cctxt.message "@[@[storage@,%a@]@,@[output@,%a@]@]@." - print_expr storage - print_expr output >>= fun () -> - return () - | Error errs -> - print_errors errs); + begin + if trace_stack then + Client_proto_rpcs.Helpers.trace_code cctxt.rpc_config + cctxt.config.block program.expanded + (storage.expanded, input.expanded, amount) >>=? fun (storage, output, trace) -> + cctxt.message + "@[@[storage@,%a@]@,\ + @[output@,%a@]@,@[trace@,%a@]@]@." + print_expr storage + print_expr output + (Format.pp_print_list + (fun ppf (loc, gas, stack) -> + Format.fprintf ppf + "- @[location: %d (remaining gas: %d)@,\ + [ @[%a ]@]@]" + loc gas + (Format.pp_print_list print_expr) + stack)) + trace >>= fun () -> + return () + else + Client_proto_rpcs.Helpers.run_code cctxt.rpc_config + cctxt.config.block program.expanded + (storage.expanded, input.expanded, amount) >>=? fun (storage, output) -> + cctxt.message "@[@[storage@,%a@]@,@[output@,%a@]@]@." + print_expr storage + print_expr output >>= fun () -> + return () + end >>= function + | Ok () -> return () + | Error errs -> + print_errors errs); command ~group ~desc: "ask the node to typecheck a program" (args3 show_types_switch emacs_mode_switch no_print_source_flag) (prefixes [ "typecheck" ; "program" ] @@ Program.source_param @@ stop) - (fun (show_types, emacs_mode, no_print_source) program cctxt -> - Client_proto_rpcs.Helpers.typecheck_code - cctxt.rpc_config cctxt.config.block program.expanded >>= fun res -> + (fun (show_types, emacs_mode, no_print_source) (program, errors) cctxt -> + begin match errors with + | [] -> + Client_proto_rpcs.Helpers.typecheck_code + cctxt.rpc_config cctxt.config.block program.expanded + | errors -> Lwt.return (Error errors) + end >>= fun res -> if emacs_mode then let type_map, errs = match res with | Ok type_map -> type_map, [] @@ -189,6 +197,8 @@ let commands () = data_parameter @@ stop) (fun no_print_source data exp_ty cctxt -> + Lwt.return (Micheline_parser.no_parsing_error data) >>=? fun data -> + Lwt.return (Micheline_parser.no_parsing_error exp_ty) >>=? fun exp_ty -> Client_proto_rpcs.Helpers.typecheck_data cctxt.Client_commands.rpc_config cctxt.config.block (data.expanded, exp_ty.expanded) >>= function | Ok () -> @@ -211,6 +221,7 @@ let commands () = data_parameter @@ stop) (fun () data cctxt -> + Lwt.return (Micheline_parser.no_parsing_error data) >>=? fun data -> Client_proto_rpcs.Helpers.hash_data cctxt.Client_commands.rpc_config cctxt.config.block (data.expanded) >>= function | Ok hash -> @@ -233,6 +244,7 @@ let commands () = @@ Client_keys.Secret_key.alias_param @@ stop) (fun () data (_, key) cctxt -> + Lwt.return (Micheline_parser.no_parsing_error data) >>=? fun data -> Client_proto_rpcs.Helpers.hash_data cctxt.rpc_config cctxt.config.block (data.expanded) >>= function | Ok hash -> diff --git a/src/client/embedded/alpha/client_proto_programs.mli b/src/client/embedded/alpha/client_proto_programs.mli index 2a1bf9c71..ce9edba89 100644 --- a/src/client/embedded/alpha/client_proto_programs.mli +++ b/src/client/embedded/alpha/client_proto_programs.mli @@ -7,6 +7,7 @@ (* *) (**************************************************************************) -module Program : Client_aliases.Alias with type t = Michelson_v1_parser.parsed +module Program : Client_aliases.Alias + with type t = Michelson_v1_parser.parsed Micheline_parser.parsing_result val commands: unit -> Client_commands.command list diff --git a/src/client/embedded/alpha/michelson_v1_emacs.ml b/src/client/embedded/alpha/michelson_v1_emacs.ml index af1e9820c..84dd2fd1b 100644 --- a/src/client/embedded/alpha/michelson_v1_emacs.ml +++ b/src/client/embedded/alpha/michelson_v1_emacs.ml @@ -83,7 +83,8 @@ let first_error_location errs = | Transfer_in_dip loc :: _ | Invalid_constant (loc, _, _) :: _ | Invalid_contract (loc, _) :: _ - | Comparable_type_expected (loc, _) :: _ -> loc + | Comparable_type_expected (loc, _) :: _ + | Michelson_v1_primitives.Invalid_primitive_name loc :: _ -> loc | _ :: rest -> find rest in find errs @@ -91,6 +92,9 @@ let report_errors ppf (parsed, errs) = Format.fprintf ppf "(@[%a@])" (Format.pp_print_list (fun ppf err -> + let find_location loc = + let oloc = List.assoc loc parsed.Michelson_v1_parser.unexpansion_table in + fst (List.assoc oloc parsed.expansion_table) in let errs, loc = match err with | Environment.Ecoproto_error (top :: errs) -> @@ -98,19 +102,33 @@ let report_errors ppf (parsed, errs) = begin match top with | Ill_typed_contract (expr, _) | Ill_typed_data (_, expr, _) -> - if expr = parsed.Michelson_v1_parser.expanded then - first_error_location (top :: errs) - else 0 - | _ -> 0 + if expr = parsed.expanded then + find_location (first_error_location (top :: errs)) + else find_location 0 + | Michelson_v1_primitives.Invalid_primitive_name loc -> + find_location loc + | _ -> find_location 0 end - | err -> [ err ], 0 in + | Invalid_utf8_sequence (point, _) + | Unexpected_character (point, _) + | Undefined_escape_sequence (point, _) + | Missing_break_after_number point as err -> + [ err ], { start = point ; stop = point } + | Unterminated_string loc + | Unterminated_integer loc + | Unterminated_comment loc + | Unclosed { loc } + | Unexpected { loc } + | Extra { loc } as err -> + [ err ], loc + | Misaligned node as err -> + [ err ], location node + | err -> [ err ], find_location 0 in let message = Format.asprintf "%a" (Michelson_v1_error_reporter.report_errors ~details:false ~show_source:false ~parsed) errs in - let { start = { point = s } ; stop = { point = e } } = - let oloc = List.assoc loc parsed.unexpansion_table in - fst (List.assoc oloc parsed.expansion_table) in + let { start = { point = s } ; stop = { point = e } } = loc in Format.fprintf ppf "(%d %d %S)" (s + 1) (e + 1) message)) errs diff --git a/src/client/embedded/alpha/michelson_v1_error_reporter.ml b/src/client/embedded/alpha/michelson_v1_error_reporter.ml index 5ba52b922..5a2c87fff 100644 --- a/src/client/embedded/alpha/michelson_v1_error_reporter.ml +++ b/src/client/embedded/alpha/michelson_v1_error_reporter.ml @@ -72,7 +72,8 @@ let collect_error_locations errs = | Invalid_contract (loc, _) | Comparable_type_expected (loc, _) | Overflow loc - | Reject loc) :: rest -> + | Reject loc + | Michelson_v1_primitives.Invalid_primitive_name loc) :: rest -> collect (loc :: acc) rest | _ :: rest -> collect acc rest in collect [] errs @@ -341,9 +342,7 @@ let report_errors ~details ~show_source ?parsed ppf errs = print_ty (None, tyb) | Reject _ -> Format.fprintf ppf "Script reached FAIL instruction" | Overflow _ -> Format.fprintf ppf "Unexpected arithmetic overflow" - | err -> - Format.fprintf ppf "%a" - Environment.Error_monad.pp_print_error [ err ] + | err -> Format.fprintf ppf "%a" Environment.Error_monad.pp err end ; if rest <> [] then Format.fprintf ppf "@," ; print_trace locations rest in @@ -351,5 +350,5 @@ let report_errors ~details ~show_source ?parsed ppf errs = (Format.pp_print_list (fun ppf -> function | Environment.Ecoproto_error errs -> print_trace (fun _ -> None) errs - | err -> pp_print_error ppf [ err ])) + | err -> pp ppf err)) errs diff --git a/src/client/embedded/alpha/michelson_v1_parser.ml b/src/client/embedded/alpha/michelson_v1_parser.ml index 77db678f1..95da56c8d 100644 --- a/src/client/embedded/alpha/michelson_v1_parser.ml +++ b/src/client/embedded/alpha/michelson_v1_parser.ml @@ -17,7 +17,7 @@ type parsed = expansion_table : (int * (Micheline_parser.location * int list)) list ; unexpansion_table : (int * int) list } -let expand_all source ast = +let expand_all source ast errors = let unexpanded, loc_table = extract_locations ast in let rec expand expr = @@ -49,20 +49,28 @@ let expand_all source ast = (l, (ploc, elocs))) (List.sort compare loc_table) (List.sort compare grouped) in - Environment.wrap_error (Michelson_v1_primitives.prims_of_strings expanded) >>? fun expanded -> - ok { source ; unexpanded ; expanded ; expansion_table ; unexpansion_table } + match Environment.wrap_error (Michelson_v1_primitives.prims_of_strings expanded) with + | Ok expanded -> + { source ; unexpanded ; expanded ; + expansion_table ; unexpansion_table }, + errors + | Error errs -> + { source ; unexpanded ; + expanded = Micheline.strip_locations (Seq ((), [], None)) ; + expansion_table ; unexpansion_table }, + errs @ errors let parse_toplevel ?check source = - Micheline_parser.tokenize source >>? fun tokens -> - Micheline_parser.parse_toplevel ?check tokens >>? fun asts -> + let tokens, lexing_errors = Micheline_parser.tokenize source in + let asts, parsing_errors = Micheline_parser.parse_toplevel ?check tokens in let ast = match asts with | [ ast ] -> ast | asts -> let start = min_point asts and stop = max_point asts in Seq ({ start ; stop }, asts, None) in - expand_all source ast + expand_all source ast (lexing_errors @ parsing_errors) let parse_expression ?check source = - Micheline_parser.tokenize source >>? fun tokens -> - Micheline_parser.parse_expression ?check tokens >>? fun ast -> - expand_all source ast + let tokens, lexing_errors = Micheline_parser.tokenize source in + let ast, parsing_errors = Micheline_parser.parse_expression ?check tokens in + expand_all source ast (lexing_errors @ parsing_errors) diff --git a/src/client/embedded/alpha/michelson_v1_parser.mli b/src/client/embedded/alpha/michelson_v1_parser.mli index 8ea5ed18f..417d293bc 100644 --- a/src/client/embedded/alpha/michelson_v1_parser.mli +++ b/src/client/embedded/alpha/michelson_v1_parser.mli @@ -25,5 +25,5 @@ type parsed = expression. *) } -val parse_toplevel : ?check:bool -> string -> parsed tzresult -val parse_expression : ?check:bool -> string -> parsed tzresult +val parse_toplevel : ?check:bool -> string -> parsed Micheline_parser.parsing_result +val parse_expression : ?check:bool -> string -> parsed Micheline_parser.parsing_result diff --git a/src/client/embedded/alpha/michelson_v1_printer.ml b/src/client/embedded/alpha/michelson_v1_printer.ml index beecc5654..dd1afb24b 100644 --- a/src/client/embedded/alpha/michelson_v1_printer.ml +++ b/src/client/embedded/alpha/michelson_v1_printer.ml @@ -99,8 +99,8 @@ let unparse ?type_map parse expanded = |> Micheline_printer.printable (fun n -> n) |> Format.asprintf "%a" Micheline_printer.print_expr in match parse source with - | Ok res -> res - | Error _ -> Pervasives.failwith "Michelson_v1_printer.unexpand" + | res, [] -> res + | _, _ :: _ -> Pervasives.failwith "Michelson_v1_printer.unexpand" let unparse_toplevel ?type_map = unparse ?type_map Michelson_v1_parser.parse_toplevel let unparse_expression = unparse Michelson_v1_parser.parse_expression diff --git a/src/micheline/micheline_parser.ml b/src/micheline/micheline_parser.ml index 3970e2e94..cffba0271 100644 --- a/src/micheline/micheline_parser.ml +++ b/src/micheline/micheline_parser.ml @@ -10,6 +10,8 @@ open Error_monad open Micheline +type 'a parsing_result = 'a * error list + type point = { point : int ; byte : int ; @@ -111,17 +113,20 @@ let tokenize source = let tok start stop token = { loc = { start ; stop } ; token } in let stack = ref [] in - let next () = + let errors = ref [] in + let rec next () = match !stack with | charloc :: charlocs -> stack := charlocs ; - ok charloc + charloc | [] -> let loc = here () in match Uutf.decode decoder with | `Await -> assert false - | `Malformed s -> error (Invalid_utf8_sequence (loc, s)) - | `Uchar _ | `End as other -> ok (other, loc) in + | `Malformed s -> + errors := Invalid_utf8_sequence (loc, s) :: !errors ; + next () + | `Uchar _ | `End as other -> other, loc in let back charloc = stack := charloc :: !stack in let uchar_to_char c = @@ -130,22 +135,25 @@ let tokenize source = else None in let rec skip acc = - next () >>? function - | `End, _ -> ok (List.rev acc) + match next () with + | `End, _ -> List.rev acc | `Uchar c, start -> begin match uchar_to_char c with | Some ('a'..'z' | 'A'..'Z') -> ident acc start (fun s -> Ident s) | Some '@' -> ident acc start (fun s -> Annot s) | Some '-' -> - begin next () >>? function + begin match next () with | `End, stop -> - error (Unterminated_integer { start ; stop }) - | `Uchar c, stop -> + errors := Unterminated_integer { start ; stop } :: !errors ; + List.rev acc + | `Uchar c, stop as first -> begin match uchar_to_char c with | Some '0' -> base acc start | Some ('1'..'9') -> integer `dec acc start false | Some _ | None -> - error (Unterminated_integer { start ; stop }) + errors := Unterminated_integer { start ; stop } :: !errors ; + back first ; + skip acc end end | Some '0' -> base acc start @@ -159,26 +167,31 @@ let tokenize source = | Some '"' -> string acc [] start | Some '#' -> eol_comment acc start | Some '/' -> - begin next () >>? function + begin match next () with | `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') -> comment acc start 0 - | (`Uchar _ | `End), _ -> - error (Unexpected_character (start, "/")) + | (`Uchar _ | `End), _ as charloc -> + errors := Unexpected_character (start, "/") :: !errors ; + back charloc ; + skip acc end | Some _ | None -> let byte = Uutf.decoder_byte_count decoder in let s = String.sub source start.byte (byte - start.byte) in - error (Unexpected_character (start, s)) + errors := Unexpected_character (start, s) :: !errors ; + skip acc end and base acc start = - next () >>? function + match next () with | (`Uchar c, stop) as charloc -> begin match uchar_to_char c with | Some ('0'.. '9') -> integer `dec acc start false | Some 'x' -> integer `hex acc start true | Some 'b' -> integer `bin acc start true | Some ('a' | 'c'..'w' | 'y' | 'z' | 'A'..'Z') -> - error (Missing_break_after_number stop) + errors := Missing_break_after_number stop :: !errors ; + back charloc ; + skip (tok start stop (Int "0") :: acc) | Some _ | None -> back charloc ; skip (tok start stop (Int "0") :: acc) @@ -191,46 +204,57 @@ let tokenize source = let value = String.sub source start.byte (stop.byte - start.byte) in tok start stop (Int value) in - next () >>? function + match next () with | (`Uchar c, stop) as charloc -> + let missing_break () = + errors := Missing_break_after_number stop :: !errors ; + back charloc ; + skip (tok stop :: acc) in begin match base, Uchar.to_char c with | `dec, ('0'.. '9') -> integer `dec acc start false | `dec, ('a'..'z' | 'A'..'Z') -> - error (Missing_break_after_number stop) + missing_break () | `hex, ('0'..'9' | 'a'..'f' | 'A'..'F') -> integer `hex acc start false | `hex, ('g'..'z' | 'G'..'Z') -> - error (Missing_break_after_number stop) + missing_break () | `bin, ('0' | '1') -> integer `bin acc start false | `bin, ('2'..'9' | 'a'..'z' | 'A'..'Z') -> - error (Missing_break_after_number stop) + missing_break () | (`bin | `hex), _ when first -> - error (Unterminated_integer { start ; stop }) + errors := Unterminated_integer { start ; stop } :: !errors ; + back charloc ; + skip (tok stop :: acc) | _ -> back charloc ; skip (tok stop :: acc) end | (`End, stop) as other -> - if first && base = `bin || base = `hex then - error (Unterminated_integer { start ; stop }) - else begin - back other ; - skip (tok stop :: acc) - end + if first && base = `bin || base = `hex then begin + errors := Unterminated_integer { start ; stop } :: !errors + end ; + back other ; + skip (tok stop :: acc) and string acc sacc start = let tok () = tok start (here ()) (String (String.concat "" (List.rev sacc))) in - next () >>? function - | `End, stop -> error (Unterminated_string { start ; stop }) + match next () with + | `End, stop -> + errors := Unterminated_string { start ; stop } :: !errors ; + skip (tok () :: acc) | `Uchar c, stop -> match uchar_to_char c with | Some '"' -> skip (tok () :: acc) - | Some '\n' -> error (Unterminated_string { start ; stop }) + | Some '\n' -> + errors := Unterminated_string { start ; stop } :: !errors ; + skip (tok () :: acc) | Some '\\' -> - begin next () >>? function - | `End, stop -> error (Unterminated_string { start ; stop }) + begin match next () with + | `End, stop -> + errors := Unterminated_string { start ; stop } :: !errors ; + skip (tok () :: acc) | `Uchar c, loc -> match uchar_to_char c with | Some '"' -> string acc ("\"" :: sacc) start @@ -242,7 +266,8 @@ let tokenize source = | Some _ | None -> let byte = Uutf.decoder_byte_count decoder in let s = String.sub source loc.byte (byte - loc.byte) in - error (Undefined_escape_sequence (loc, s)) + errors := Undefined_escape_sequence (loc, s) :: !errors ; + string acc sacc start end | Some _ | None -> let byte = Uutf.decoder_byte_count decoder in @@ -253,7 +278,7 @@ let tokenize source = let name = String.sub source start.byte (stop.byte - start.byte) in tok start stop (ret name) in - next () >>? function + match next () with | (`Uchar c, stop) as charloc -> begin match uchar_to_char c with | Some ('a'..'z' | 'A'..'Z' | '_' | '0'..'9') -> @@ -266,12 +291,15 @@ let tokenize source = back other ; skip (tok stop :: acc) and comment acc start lvl = - next () >>? function - | `End, stop -> error (Unterminated_comment { start ; stop }) + match next () with + | `End, stop -> + errors := Unterminated_comment { start ; stop } :: !errors ; + let text = String.sub source start.byte (stop.byte - start.byte) in + skip (tok start stop (Comment text) :: acc) | `Uchar c, _ -> begin match uchar_to_char c with | Some '*' -> - begin next () >>? function + begin match next () with | `Uchar c, _ when Uchar.equal c (Uchar.of_char '/') -> if lvl = 0 then let stop = here () in @@ -285,7 +313,7 @@ let tokenize source = comment acc start lvl end | Some '/' -> - begin next () >>? function + begin match next () with | `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') -> comment acc start (lvl + 1) | other -> @@ -298,7 +326,7 @@ let tokenize source = let tok stop = let text = String.sub source start.byte (stop.byte - start.byte) in tok start stop (Eol_comment text) in - next () >>? function + match next () with | `Uchar c, stop -> begin match uchar_to_char c with | Some '\n' -> skip (tok stop :: acc) @@ -307,7 +335,8 @@ let tokenize source = | (_, stop) as other -> back other ; skip (tok stop :: acc) in - skip [] + let tokens = skip [] in + tokens, List.rev !errors type node = (location, string) Micheline.node @@ -379,7 +408,7 @@ type error += Extra of token type error += Misaligned of node type error += Empty -let rec parse ?(check = true) tokens stack = +let rec parse ?(check = true) errors tokens stack = (* Two steps: - 1. parse without checking indentation [parse] - 2. check indentation [check] (inlined in 1) *) @@ -397,60 +426,66 @@ let rec parse ?(check = true) tokens stack = assert false (* Return *) | Expression (Some result) :: _, [] -> - ok [ result ] - | Expression (Some _) :: _, token :: _ -> - error (Unexpected token) + [ result ], List.rev errors + | Expression (Some _) :: _, token :: rem -> + let errors = Unexpected token :: errors in + parse ~check errors rem (* skip *) stack | Expression None :: _, [] -> - error Empty + let errors = Empty :: errors in + let ghost = { start = point_zero ; stop = point_zero} in + [ Seq (ghost, [], None) ], List.rev errors | Toplevel [ Seq (_, exprs, _) as expr ] :: [], [] -> - (if check then do_check ~toplevel: true expr else ok ()) >>? fun () -> - ok exprs + let errors = if check then do_check ~toplevel: false errors expr else errors in + exprs, List.rev errors | Toplevel exprs :: [], [] -> let exprs = List.rev exprs in let loc = { start = min_point exprs ; stop = max_point exprs } in - let expr = Micheline.Seq (loc, exprs, None) in - (if check then do_check ~toplevel: true expr else ok ()) >>? fun () -> - ok exprs + let expr = Seq (loc, exprs, None) in + let errors = if check then do_check ~toplevel: true errors expr else errors in + exprs, List.rev errors (* Ignore comments *) | _, { token = Eol_comment _ | Comment _ } :: rest -> - parse ~check rest stack + parse ~check errors rest stack | (Expression None | Sequence _ | Toplevel _) :: _, ({ token = Int _ | String _ } as token):: { token = Eol_comment _ | Comment _ } :: rest | (Wrapped _ | Unwrapped _) :: _, ({ token = Open_paren } as token) :: { token = Eol_comment _ | Comment _ } :: rest -> - parse ~check (token :: rest) stack + parse ~check errors (token :: rest) stack (* Erroneous states *) | (Wrapped _ | Unwrapped _) :: _ , ({ token = Open_paren } as token) - :: { token = Open_paren | Open_brace } :: _ + :: { token = Open_paren | Open_brace } :: rem | Unwrapped _ :: Expression _ :: _ , - ({ token = Semi | Close_brace | Close_paren } as token) :: _ + ({ token = Semi | Close_brace | Close_paren } as token) :: rem | Expression None :: _ , - ({ token = Semi | Close_brace | Close_paren | Open_paren } as token) :: _ -> - error (Unexpected token) + ({ token = Semi | Close_brace | Close_paren | Open_paren } as token) :: rem -> + let errors = Unexpected token :: errors in + parse ~check errors rem (* skip *) stack | (Sequence _ | Toplevel _) :: _ , - { token = Semi } :: ({ token = Semi } as token) :: _ -> - error (Extra token) + ({ token = Semi } as valid) :: ({ token = Semi } as token) :: rem -> + let errors = Unexpected token :: errors in + parse ~check errors (valid (* skip *) :: rem) stack | (Wrapped _ | Unwrapped _) :: _ , { token = Open_paren } - :: ({ token = Int _ | String _ | Annot _ | Close_paren } as token) :: _ + :: ({ token = Int _ | String _ | Annot _ | Close_paren } as token) :: rem | (Expression None | Sequence _ | Toplevel _) :: _, - { token = Int _ | String _ } :: ({ token = Ident _ | Int _ | String _ | Annot _ | Close_paren | Open_paren | Open_brace } as token) :: _ + { token = Int _ | String _ } :: ({ token = Ident _ | Int _ | String _ | Annot _ | Close_paren | Open_paren | Open_brace } as token) :: rem | Unwrapped (_, _, _, _) :: Toplevel _ :: _, - ({ token = Close_brace } as token) :: _ + ({ token = Close_brace } as token) :: rem | Unwrapped (_, _, _, _) :: _, - ({ token = Close_paren } as token) :: _ + ({ token = Close_paren } as token) :: rem | Toplevel _ :: [], - ({ token = Close_paren } as token) :: _ + ({ token = Close_paren } as token) :: rem | Toplevel _ :: [], - ({ token = Close_brace } as token) :: _ + ({ token = Close_brace } as token) :: rem | _, - ({ token = Annot _ } as token) :: _ -> - error (Unexpected token) + ({ token = Annot _ } as token) :: rem -> + let errors = Unexpected token :: errors in + parse ~check errors rem (* skip *) stack | Wrapped (token, _, _, _) :: _, ({ token = Close_brace | Semi } :: _ | []) | (Sequence _ | Toplevel _) :: _, @@ -459,39 +494,45 @@ let rec parse ?(check = true) tokens stack = ({ token = Open_paren } as token) :: ({ token = Close_brace | Semi } :: _ | []) | (Sequence (token, _, _) :: _ | Unwrapped _ :: Sequence (token, _, _) :: _), ({ token = Close_paren } :: _ | [])-> - error (Unclosed token) + let errors = Unclosed token :: errors in + let fake = + { token with token = match token.token with + | Open_paren -> Close_paren + | Open_brace -> Close_brace + | _ -> assert false } in + parse ~check errors (fake :: (* insert *) tokens) stack (* Valid states *) | (Toplevel _ | Sequence (_, _, _)) :: _ , { token = Ident name ; loc } :: { token = Annot annot } :: rest -> let mode = Unwrapped (loc, name, [], Some annot) in - parse ~check rest (push_mode mode stack) + parse ~check errors rest (push_mode mode stack) | (Expression None | Toplevel _ | Sequence (_, _, _)) :: _ , { token = Ident name ; loc } :: rest -> let mode = Unwrapped (loc, name, [], None) in - parse ~check rest (push_mode mode stack) + parse ~check errors rest (push_mode mode stack) | (Unwrapped _ | Wrapped _) :: _, { token = Int value ; loc } :: rest | (Expression None | Sequence _ | Toplevel _) :: _, { token = Int value ; loc } :: ([] | { token = Semi | Close_brace} :: _ as rest) -> let expr : node = Int (loc, value) in - (if check then do_check ~toplevel: false expr else ok ()) >>? fun () -> - parse ~check rest (fill_mode expr stack) + let errors = if check then do_check ~toplevel: false errors expr else errors in + parse ~check errors rest (fill_mode expr stack) | (Unwrapped _ | Wrapped _) :: _, { token = String contents ; loc } :: rest | (Expression None | Sequence _ | Toplevel _) :: _, { token = String contents ; loc } :: ([] | { token = Semi | Close_brace} :: _ as rest) -> let expr : node = String (loc, contents) in - (if check then do_check ~toplevel: false expr else ok ()) >>? fun () -> - parse ~check rest (fill_mode expr stack) + let errors = if check then do_check ~toplevel: false errors expr else errors in + parse ~check errors rest (fill_mode expr stack) | Sequence ({ loc = { start } }, exprs, annot) :: _ , { token = Close_brace ; loc = { stop } } :: rest -> let exprs = List.rev exprs in let expr = Micheline.Seq ({ start ; stop }, exprs, annot) in - (if check then do_check ~toplevel: false expr else ok ()) >>? fun () -> - parse ~check rest (fill_mode expr (pop_mode stack)) + let errors = if check then do_check ~toplevel: false errors expr else errors in + parse ~check errors rest (fill_mode expr (pop_mode stack)) | (Sequence _ | Toplevel _) :: _ , { token = Semi } :: rest -> - parse ~check rest stack + parse ~check errors rest stack | Unwrapped ({ start ; stop }, name, exprs, annot) :: Expression _ :: _, ([] as rest) | Unwrapped ({ start ; stop }, name, exprs, annot) :: Toplevel _ :: _, @@ -503,81 +544,82 @@ let rec parse ?(check = true) tokens stack = let exprs = List.rev exprs in let stop = if exprs = [] then stop else max_point exprs in let expr = Micheline.Prim ({ start ; stop }, name, exprs, annot) in - (if check then do_check ~toplevel: false expr else ok ()) >>? fun () -> - parse ~check rest (fill_mode expr (pop_mode stack)) + let errors = if check then do_check ~toplevel: false errors expr else errors in + parse ~check errors rest (fill_mode expr (pop_mode stack)) | (Wrapped _ | Unwrapped _) :: _ , ({ token = Open_paren } as token) :: { token = Ident name } :: { token = Annot annot } :: rest -> let mode = Wrapped (token, name, [], Some annot) in - parse ~check rest (push_mode mode stack) + parse ~check errors rest (push_mode mode stack) | (Wrapped _ | Unwrapped _) :: _ , ({ token = Open_paren } as token) :: { token = Ident name } :: rest -> let mode = Wrapped (token, name, [], None) in - parse ~check rest (push_mode mode stack) + parse ~check errors rest (push_mode mode stack) | (Wrapped _ | Unwrapped _) :: _ , { token = Ident name ; loc } :: rest -> let expr = Micheline.Prim (loc, name, [], None) in - (if check then do_check ~toplevel: false expr else ok ()) >>? fun () -> - parse ~check rest (fill_mode expr stack) + let errors = if check then do_check ~toplevel: false errors expr else errors in + parse ~check errors rest (fill_mode expr stack) | (Wrapped _ | Unwrapped _ | Toplevel _ | Sequence _ | Expression None) :: _ , ({ token = Open_brace } as token) :: { token = Annot annot } :: rest -> let mode = Sequence (token, [], Some annot) in - parse ~check rest (push_mode mode stack) + parse ~check errors rest (push_mode mode stack) | (Wrapped _ | Unwrapped _ | Toplevel _ | Sequence _ | Expression None) :: _ , ({ token = Open_brace } as token) :: rest -> let mode = Sequence (token, [], None) in - parse ~check rest (push_mode mode stack) + parse ~check errors rest (push_mode mode stack) (* indentation checker *) -and do_check ?(toplevel = false) = function +and do_check ?(toplevel = false) errors = function | Seq ({ start ; stop }, [], _) as expr -> if start.column >= stop.column then - error (Misaligned expr) - else ok () + Misaligned expr :: errors + else errors | Prim ({ start ; stop }, _, first :: rest, _) | Seq ({ start ; stop }, first :: rest, _) as expr -> let { column = first_column ; line = first_line } = min_point [ first ] in if start.column >= stop.column then - error (Misaligned expr) + Misaligned expr :: errors else if not toplevel && start.column >= first_column then - error (Misaligned expr) + Misaligned expr :: errors else (* In a sequence or in the arguments of a primitive, we require all items to be aligned, but we relax the rule to allow consecutive items to be writtem on the same line. *) - let rec in_line_or_aligned prev_start_line = function - | [] -> ok () + let rec in_line_or_aligned prev_start_line errors = function + | [] -> errors | expr :: rest -> let { column ; line = start_line } = min_point [ expr ] in let { line = stop_line } = max_point [ expr ] in - if stop_line <> prev_start_line - && column <> first_column then - error (Misaligned expr) - else - in_line_or_aligned start_line rest in - in_line_or_aligned first_line rest - | Prim (_, _, [], _) | String _ | Int _ -> ok () + let errors = + if stop_line <> prev_start_line + && column <> first_column then + Misaligned expr :: errors + else + errors in + in_line_or_aligned start_line errors rest in + in_line_or_aligned first_line errors rest + | Prim (_, _, [], _) | String _ | Int _ -> errors let parse_expression ?check tokens = let result = match tokens with | ({ token = Open_paren } as token) :: { token = Ident name } :: { token = Annot annot } :: rest -> let mode = Wrapped (token, name, [], Some annot) in - parse ?check rest [ mode ; Expression None ] + parse ?check [] rest [ mode ; Expression None ] | ({ token = Open_paren } as token) :: { token = Ident name } :: rest -> let mode = Wrapped (token, name, [], None) in - parse ?check rest [ mode ; Expression None ] + parse ?check [] rest [ mode ; Expression None ] | _ -> - parse ?check tokens [ Expression None ] in + parse ?check [] tokens [ Expression None ] in match result with - | Ok [ single ] -> Ok single - | Ok _ -> assert false - | Error errs -> Error errs + | [ single ], errors -> single, errors + | _ -> assert false let parse_toplevel ?check tokens = - parse ?check tokens [ Toplevel [] ] + parse ?check [] tokens [ Toplevel [] ] let print_point ppf { line ; column } = Format.fprintf ppf - "at line %d character %d" + "At line %d character %d" line column let print_token_kind ppf = function @@ -594,17 +636,22 @@ let print_location ppf loc = if loc.start.line = loc.stop.line then if loc.start.column = loc.stop.column then Format.fprintf ppf - "at line %d character %d" + "At line %d character %d" loc.start.line loc.start.column else Format.fprintf ppf - "at line %d characters %d to %d" + "At line %d characters %d to %d" loc.start.line loc.start.column loc.stop.column else Format.fprintf ppf - "from line %d character %d to line %d character %d" + "From line %d character %d to line %d character %d" loc.start.line loc.start.column loc.stop.line loc.stop.column +let no_parsing_error (ast, errors) = + match errors with + | [] -> ok ast + | errors -> Error errors + let () = register_error_kind `Permanent ~id: "micheline.parse_error.invalid_utf8_sequence" diff --git a/src/micheline/micheline_parser.mli b/src/micheline/micheline_parser.mli index 02b09f057..bd565ad16 100644 --- a/src/micheline/micheline_parser.mli +++ b/src/micheline/micheline_parser.mli @@ -9,6 +9,10 @@ open Error_monad +type 'a parsing_result = 'a * error list + +val no_parsing_error : 'a parsing_result -> 'a tzresult + type point = { point : int ; byte : int ; @@ -42,7 +46,7 @@ type token = { token : token_value ; loc : location } -val tokenize : string -> token list tzresult +val tokenize : string -> token list parsing_result type node = (location, string) Micheline.node @@ -67,9 +71,9 @@ type error += Extra of token type error += Misaligned of node type error += Empty -val parse_toplevel : ?check:bool -> token list -> node list tzresult +val parse_toplevel : ?check:bool -> token list -> node list parsing_result -val parse_expression : ?check:bool -> token list -> node tzresult +val parse_expression : ?check:bool -> token list -> node parsing_result val print_location : Format.formatter -> location -> unit diff --git a/src/proto/alpha/michelson_v1_primitives.ml b/src/proto/alpha/michelson_v1_primitives.ml index 2c005460e..f86b3e4a0 100644 --- a/src/proto/alpha/michelson_v1_primitives.ml +++ b/src/proto/alpha/michelson_v1_primitives.ml @@ -9,8 +9,9 @@ open Micheline -type error += Unknown_primitive of string +type error += Unknown_primitive_name of string type error += Invalid_case of string +type error += Invalid_primitive_name of Micheline.canonical_location type prim = | K_parameter @@ -341,15 +342,17 @@ let prim_of_string = function | "unit" -> ok T_unit | n -> if valid_case n then - error (Unknown_primitive n) + error (Unknown_primitive_name n) else error (Invalid_case n) let prims_of_strings expr = let rec convert = function | Int _ | String _ as expr -> ok expr - | Prim (_, prim, args, annot) -> - prim_of_string prim >>? fun prim -> + | Prim (loc, prim, args, annot) -> + Error_monad.record_trace + (Invalid_primitive_name loc) + (prim_of_string prim) >>? fun prim -> List.fold_left (fun acc arg -> acc >>? fun args -> @@ -609,10 +612,10 @@ let () = ~pp:(fun ppf n -> Format.fprintf ppf "Unknown primitive %s." n) Data_encoding.(obj1 (req "wrongPrimitiveName" string)) (function - | Unknown_primitive got -> Some got + | Unknown_primitive_name got -> Some got | _ -> None) (fun got -> - Unknown_primitive got) ; + Unknown_primitive_name got) ; register_error_kind `Permanent ~id:"invalidPrimitiveNameCaseTypeError" @@ -620,9 +623,24 @@ let () = ~description: "In a script or data expression, a primitive name is \ neither uppercase, lowercase or capitalized." + ~pp:(fun ppf n -> Format.fprintf ppf "Primitive %s has invalid case." n) Data_encoding.(obj1 (req "wrongPrimitiveName" string)) (function | Invalid_case name -> Some name | _ -> None) (fun name -> - Invalid_case name) + Invalid_case name) ; + register_error_kind + `Permanent + ~id:"invalidPrimitiveNameTypeErro" + ~title: "Invalid primitive name (typechecking error)" + ~description: + "In a script or data expression, a primitive name is \ + unknown or has a wrong case." + ~pp:(fun ppf _ -> Format.fprintf ppf "Invalid primitive.") + Data_encoding.(obj1 (req "location" Micheline.canonical_location_encoding)) + (function + | Invalid_primitive_name loc -> Some loc + | _ -> None) + (fun loc -> + Invalid_primitive_name loc) diff --git a/src/proto/alpha/michelson_v1_primitives.mli b/src/proto/alpha/michelson_v1_primitives.mli index 299d76a43..6acc902f3 100644 --- a/src/proto/alpha/michelson_v1_primitives.mli +++ b/src/proto/alpha/michelson_v1_primitives.mli @@ -7,8 +7,9 @@ (* *) (**************************************************************************) -type error += Unknown_primitive of string (* `Permanent *) +type error += Unknown_primitive_name of string (* `Permanent *) type error += Invalid_case of string (* `Permanent *) +type error += Invalid_primitive_name of Micheline.canonical_location (* `Permanent *) type prim = | K_parameter