diff --git a/src/passes/1-parser/cameligo/.gitignore b/src/passes/1-parser/cameligo/.gitignore new file mode 100644 index 000000000..cca52dc59 --- /dev/null +++ b/src/passes/1-parser/cameligo/.gitignore @@ -0,0 +1,13 @@ +_build/* +*/_build +*~ +.merlin +*/.merlin +*.install +/Version.ml +/dune-project +/Parser.mli +/Parser.ml +/Lexer.ml +/LexToken.ml +/Tests \ No newline at end of file diff --git a/src/passes/1-parser/cameligo/.unlexer.tag b/src/passes/1-parser/cameligo/.unlexer.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/passes/1-parser/cameligo/LexToken.mll b/src/passes/1-parser/cameligo/LexToken.mll index 172b97eec..0871c0d32 100644 --- a/src/passes/1-parser/cameligo/LexToken.mll +++ b/src/passes/1-parser/cameligo/LexToken.mll @@ -201,7 +201,7 @@ let to_lexeme = function | Int i | Nat i | Mutez i -> fst i.Region.value -| String s -> s.Region.value +| String s -> String.escaped s.Region.value | Bytes b -> fst b.Region.value | Begin _ -> "begin" diff --git a/src/passes/1-parser/cameligo/ParErr.ml b/src/passes/1-parser/cameligo/ParErr.ml new file mode 100644 index 000000000..7debe48ef --- /dev/null +++ b/src/passes/1-parser/cameligo/ParErr.ml @@ -0,0 +1,416 @@ + +(* This file was auto-generated based on "Parser.msg". *) + +(* Please note that the function [message] can raise [Not_found]. *) + +let message = + fun s -> + match s with + | 0 -> + "\n" + | 1 -> + "\n" + | 2 -> + "\n" + | 3 -> + "\n" + | 4 -> + "\n" + | 5 -> + "\n" + | 7 -> + "\n" + | 49 -> + "\n" + | 51 -> + "\n" + | 52 -> + "\n" + | 53 -> + "\n" + | 18 -> + "\n" + | 8 -> + "\n" + | 9 -> + "\n" + | 10 -> + "\n" + | 42 -> + "\n" + | 43 -> + "\n" + | 46 -> + "\n" + | 47 -> + "\n" + | 33 -> + "\n" + | 460 -> + "\n" + | 27 -> + "\n" + | 31 -> + "\n" + | 28 -> + "\n" + | 35 -> + "\n" + | 12 -> + "\n" + | 16 -> + "\n" + | 6 -> + "\n" + | 13 -> + "\n" + | 61 -> + "\n" + | 133 -> + "\n" + | 373 -> + "\n" + | 375 -> + "\n" + | 134 -> + "\n" + | 136 -> + "\n" + | 137 -> + "\n" + | 153 -> + "\n" + | 374 -> + "\n" + | 63 -> + "\n" + | 142 -> + "\n" + | 143 -> + "\n" + | 128 -> + "\n" + | 145 -> + "\n" + | 72 -> + "\n" + | 94 -> + "\n" + | 106 -> + "\n" + | 95 -> + "\n" + | 108 -> + "\n" + | 109 -> + "\n" + | 110 -> + "\n" + | 73 -> + "\n" + | 91 -> + "\n" + | 93 -> + "\n" + | 92 -> + "\n" + | 90 -> + "\n" + | 77 -> + "\n" + | 78 -> + "\n" + | 65 -> + "\n" + | 66 -> + "\n" + | 67 -> + "\n" + | 120 -> + "\n" + | 121 -> + "\n" + | 124 -> + "\n" + | 125 -> + "\n" + | 147 -> + "\n" + | 148 -> + "\n" + | 149 -> + "\n" + | 157 -> + "\n" + | 156 -> + "\n" + | 463 -> + "\n" + | 465 -> + "\n" + | 217 -> + "\n" + | 242 -> + "\n" + | 219 -> + "\n" + | 221 -> + "\n" + | 215 -> + "\n" + | 226 -> + "\n" + | 255 -> + "\n" + | 256 -> + "\n" + | 243 -> + "\n" + | 264 -> + "\n" + | 228 -> + "\n" + | 257 -> + "\n" + | 258 -> + "\n" + | 266 -> + "\n" + | 268 -> + "\n" + | 270 -> + "\n" + | 272 -> + "\n" + | 274 -> + "\n" + | 192 -> + "\n" + | 259 -> + "\n" + | 285 -> + "\n" + | 288 -> + "\n" + | 245 -> + "\n" + | 293 -> + "\n" + | 262 -> + "\n" + | 160 -> + "\n" + | 164 -> + "\n" + | 429 -> + "\n" + | 332 -> + "\n" + | 313 -> + "\n" + | 431 -> + "\n" + | 315 -> + "\n" + | 316 -> + "\n" + | 317 -> + "\n" + | 432 -> + "\n" + | 445 -> + "\n" + | 446 -> + "\n" + | 433 -> + "\n" + | 434 -> + "\n" + | 435 -> + "\n" + | 436 -> + "\n" + | 437 -> + "\n" + | 438 -> + "\n" + | 440 -> + "\n" + | 328 -> + "\n" + | 330 -> + "\n" + | 334 -> + "\n" + | 331 -> + "\n" + | 329 -> + "\n" + | 340 -> + "\n" + | 341 -> + "\n" + | 342 -> + "\n" + | 343 -> + "\n" + | 344 -> + "\n" + | 345 -> + "\n" + | 367 -> + "\n" + | 346 -> + "\n" + | 348 -> + "\n" + | 441 -> + "\n" + | 443 -> + "\n" + | 447 -> + "\n" + | 430 -> + "\n" + | 312 -> + "\n" + | 428 -> + "\n" + | 165 -> + "\n" + | 167 -> + "\n" + | 168 -> + "\n" + | 169 -> + "\n" + | 163 -> + "\n" + | 448 -> + "\n" + | 450 -> + "\n" + | 451 -> + "\n" + | 166 -> + "\n" + | 235 -> + "\n" + | 236 -> + "\n" + | 239 -> + "\n" + | 240 -> + "\n" + | 425 -> + "\n" + | 170 -> + "\n" + | 171 -> + "\n" + | 172 -> + "\n" + | 418 -> + "\n" + | 419 -> + "\n" + | 422 -> + "\n" + | 423 -> + "\n" + | 174 -> + "\n" + | 304 -> + "\n" + | 305 -> + "\n" + | 405 -> + "\n" + | 412 -> + "\n" + | 404 -> + "\n" + | 306 -> + "\n" + | 308 -> + "\n" + | 320 -> + "\n" + | 321 -> + "\n" + | 322 -> + "\n" + | 323 -> + "\n" + | 324 -> + "\n" + | 325 -> + "\n" + | 326 -> + "\n" + | 327 -> + "\n" + | 378 -> + "\n" + | 379 -> + "\n" + | 381 -> + "\n" + | 335 -> + "\n" + | 310 -> + "\n" + | 307 -> + "\n" + | 395 -> + "\n" + | 396 -> + "\n" + | 397 -> + "\n" + | 398 -> + "\n" + | 399 -> + "\n" + | 400 -> + "\n" + | 408 -> + "\n" + | 401 -> + "\n" + | 403 -> + "\n" + | 175 -> + "\n" + | 176 -> + "\n" + | 179 -> + "\n" + | 180 -> + "\n" + | 183 -> + "\n" + | 302 -> + "\n" + | 300 -> + "\n" + | 185 -> + "\n" + | 187 -> + "\n" + | 188 -> + "\n" + | 189 -> + "\n" + | 190 -> + "\n" + | 195 -> + "\n" + | 214 -> + "\n" + | 194 -> + "\n" + | 210 -> + "\n" + | _ -> + raise Not_found diff --git a/src/passes/1-parser/cameligo/Parser.mly b/src/passes/1-parser/cameligo/Parser.mly index 8863da3f1..11f858752 100644 --- a/src/passes/1-parser/cameligo/Parser.mly +++ b/src/passes/1-parser/cameligo/Parser.mly @@ -566,6 +566,7 @@ core_expr: "" { EArith (Int $1) } | "" { EArith (Mutez $1) } | "" { EArith (Nat $1) } +| "" { EBytes $1 } | "" | module_field { EVar $1 } | projection { EProj $1 } | "" { EString (String $1) } diff --git a/src/passes/1-parser/cameligo/ParserAPI.ml b/src/passes/1-parser/cameligo/ParserAPI.ml index 7ae5c5ad4..df82173a9 100644 --- a/src/passes/1-parser/cameligo/ParserAPI.ml +++ b/src/passes/1-parser/cameligo/ParserAPI.ml @@ -1,57 +1,82 @@ -(** Generic parser for LIGO *) - -module type PARSER = - sig - (* The type of tokens *) - - type token - - (* This exception is raised by the monolithic API functions *) - - exception Error - - (* The monolithic API *) - - val contract : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> AST.t - - (* The incremental API *) - - module MenhirInterpreter : - sig - include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE - with type token = token - end - - module Incremental : - sig - val contract : Lexing.position -> AST.t MenhirInterpreter.checkpoint - end - end - -(* Main functor *) - -module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) = - struct - - module I = Parser.MenhirInterpreter - - (* The parser has successfully produced a semantic value. *) - - let success v = v - - (* The parser has suspended itself because of a syntax error. Stop. *) - - let fail _checkpoint = raise Parser.Error - - (* The generic parsing function *) - - let incr_contract Lexer.{read; buffer; close; _} : AST.t = - let supplier = I.lexer_lexbuf_to_supplier read buffer in - let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in - let ast = I.loop_handle success fail supplier parser - in close (); ast - - let mono_contract = Parser.contract - - end +(* Generic parser for LIGO *) + +(* Main functor *) + +module Make (Lexer: Lexer.S with module Token := LexToken) + (Parser: module type of Parser) + (ParErr: sig val message : int -> string end) = + struct + module I = Parser.MenhirInterpreter + module S = MenhirLib.General (* Streams *) + + (* The call [stack checkpoint] extracts the parser's stack out of + a checkpoint. *) + + let stack = function + I.HandlingError env -> I.stack env + | _ -> assert false + + (* The call [state checkpoint] extracts the number of the current + state out of a parser checkpoint. *) + + let state checkpoint : int = + match Lazy.force (stack checkpoint) with + S.Nil -> 0 (* WARNING: Hack. The first state should be 0. *) + | S.Cons (I.Element (s,_,_,_),_) -> I.number s + + (* The parser has successfully produced a semantic value. *) + + let success v = v + + (* The parser has suspended itself because of a syntax error. Stop. *) + + type message = string + type valid = Lexer.token + type invalid = Lexer.token + type error = message * valid option * invalid + + exception Point of error + + let failure get_win checkpoint = + let message = ParErr.message (state checkpoint) in + match get_win () with + Lexer.Nil -> assert false + | Lexer.One invalid -> + raise (Point (message, None, invalid)) + | Lexer.Two (invalid, valid) -> + raise (Point (message, Some valid, invalid)) + + (* The two Menhir APIs are called from the following two functions. *) + + let incr_contract Lexer.{read; buffer; get_win; close; _} : AST.t = + let supplier = I.lexer_lexbuf_to_supplier read buffer + and failure = failure get_win in + let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in + let ast = I.loop_handle success failure supplier parser + in close (); ast + + let mono_contract = Parser.contract + + (* Errors *) + + let format_error ?(offsets=true) mode (msg, valid_opt, invalid) = + let invalid_region = LexToken.to_region invalid in + let header = + "Parse error " ^ invalid_region#to_string ~offsets mode in + let trailer = + match valid_opt with + None -> + if LexToken.is_eof invalid then "" + else let invalid_lexeme = LexToken.to_lexeme invalid in + Printf.sprintf ", before \"%s\"" invalid_lexeme + | Some valid -> + let valid_lexeme = LexToken.to_lexeme valid in + let s = Printf.sprintf ", after \"%s\"" valid_lexeme in + if LexToken.is_eof invalid then s + else + let invalid_lexeme = LexToken.to_lexeme invalid in + Printf.sprintf "%s and before \"%s\"" s invalid_lexeme in + let header = header ^ trailer in + header ^ (if msg = "" then ".\n" else ":\n" ^ msg) + + end diff --git a/src/passes/1-parser/cameligo/ParserAPI.mli b/src/passes/1-parser/cameligo/ParserAPI.mli index ff3fe4854..7d969a33c 100644 --- a/src/passes/1-parser/cameligo/ParserAPI.mli +++ b/src/passes/1-parser/cameligo/ParserAPI.mli @@ -1,39 +1,22 @@ (** Generic parser API for LIGO *) -module type PARSER = +module Make (Lexer: Lexer.S with module Token := LexToken) + (Parser: module type of Parser) + (ParErr: sig val message: int -> string end) : sig - (* The type of tokens *) + (* Monolithic and incremental APIs of Menhir for parsing *) - type token - - (* This exception is raised by the monolithic API functions *) - - exception Error - - (* The monolithic API *) - - val contract : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> AST.t - - (* The incremental API *) - - module MenhirInterpreter : - sig - include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE - with type token = token - end - - module Incremental : - sig - val contract : Lexing.position -> AST.t MenhirInterpreter.checkpoint - end - - end - -(* Main functor *) - -module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) : - sig val mono_contract : (Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> AST.t val incr_contract : Lexer.instance -> AST.t + + (* Error handling *) + + type message = string + type valid = Lexer.token + type invalid = Lexer.token + type error = message * valid option * invalid + + exception Point of error + + val format_error : ?offsets:bool -> [`Byte | `Point] -> error -> string end diff --git a/src/passes/1-parser/cameligo/ParserMain.ml b/src/passes/1-parser/cameligo/ParserMain.ml index faa7ce70a..8ed546f50 100644 --- a/src/passes/1-parser/cameligo/ParserMain.ml +++ b/src/passes/1-parser/cameligo/ParserMain.ml @@ -1,16 +1,14 @@ -(** Driver for the parser of CameLIGO *) +(** Driver for the CameLIGO parser *) let extension = ".mligo" let options = EvalOpt.read "CameLIGO" extension +open Printf + (** Error printing and exception tracing *) let () = Printexc.record_backtrace true -(** Auxiliary functions -*) -let sprintf = Printf.sprintf - (** Extracting the input file *) let file = @@ -23,17 +21,7 @@ let file = let () = Printexc.record_backtrace true let external_ text = - Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; - -type error = SyntaxError - -let error_to_string = function - SyntaxError -> "Syntax error.\n" - -let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value - and reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + Utils.highlight (sprintf "External error: %s" text); exit 1;; (** {1 Preprocessing the input source and opening the input channels} *) @@ -42,7 +30,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file = let lib_path = match options#libs with [] -> "" - | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + | libs -> let mk_I dir path = sprintf " -I %s%s" dir path in List.fold_right mk_I libs "" let prefix = @@ -61,26 +49,26 @@ let pp_input = let cpp_cmd = match options#input with None | Some "-" -> - Printf.sprintf "cpp -traditional-cpp%s - > %s" + sprintf "cpp -traditional-cpp%s - > %s" lib_path pp_input | Some file -> - Printf.sprintf "cpp -traditional-cpp%s %s > %s" + sprintf "cpp -traditional-cpp%s %s > %s" lib_path file pp_input let () = if Utils.String.Set.mem "cpp" options#verbose - then Printf.eprintf "%s\n%!" cpp_cmd; + then eprintf "%s\n%!" cpp_cmd; if Sys.command cpp_cmd <> 0 then - external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + external_ (sprintf "the command \"%s\" failed." cpp_cmd) (** {1 Instanciating the lexer} *) module Lexer = Lexer.Make (LexToken) module Log = LexerLog.Make (Lexer) -module ParserFront = ParserAPI.Make (Lexer) (Parser) +module ParserFront = ParserAPI.Make (Lexer) (Parser) (ParErr) let lexer_inst = Lexer.open_token_stream (Some pp_input) -let Lexer.{read; buffer; get_pos; get_last; close} = lexer_inst +let Lexer.{read; buffer; get_win; get_pos; get_last; close} = lexer_inst and cout = stdout @@ -97,10 +85,10 @@ let tokeniser = read ~log let () = try - (* The incremental API *) - let ast = ParserFront.incr_contract lexer_inst in - (* The monolithic API *) - (* let ast = ParserFront.mono_contract tokeniser buffer in *) + let ast = + if options#mono + then ParserFront.mono_contract tokeniser buffer + else ParserFront.incr_contract lexer_inst in if Utils.String.Set.mem "ast" options#verbose then let buffer = Buffer.create 131 in let state = ParserLog.mk_state @@ -122,14 +110,36 @@ let () = Buffer.output_buffer stdout buffer end with + (* Lexing errors *) Lexer.Error err -> close_all (); - Lexer.print_error ~offsets:options#offsets - options#mode err ~file - | Parser.Error -> - let region = get_last () in - let error = Region.{region; value=SyntaxError} in + let msg = + Lexer.format_error ~offsets:options#offsets + options#mode err ~file + in prerr_string msg + + (* Incremental API of Menhir *) + | ParserFront.Point point -> let () = close_all () in - print_error ~offsets:options#offsets - options#mode error ~file + let error = + ParserFront.format_error ~offsets:options#offsets + options#mode point + in eprintf "\027[31m%s\027[0m%!" error + + (* Monolithic API of Menhir *) + | Parser.Error -> + let () = close_all () in + let invalid, valid_opt = + match get_win () with + Lexer.Nil -> + assert false (* Safe: There is always at least EOF. *) + | Lexer.One invalid -> invalid, None + | Lexer.Two (invalid, valid) -> invalid, Some valid in + let point = "", valid_opt, invalid in + let error = + ParserFront.format_error ~offsets:options#offsets + options#mode point + in eprintf "\027[31m%s\027[0m%!" error + + (* I/O errors *) | Sys_error msg -> Utils.highlight msg diff --git a/src/passes/1-parser/cameligo/dune b/src/passes/1-parser/cameligo/dune index 5604f5065..786b20f31 100644 --- a/src/passes/1-parser/cameligo/dune +++ b/src/passes/1-parser/cameligo/dune @@ -1,43 +1,43 @@ (ocamllex LexToken) (menhir - (merge_into Parser) - (modules ParToken Parser) - (flags -la 1 --table --strict --explain --external-tokens LexToken)) + (merge_into Parser) + (modules ParToken Parser) + (flags -la 1 --table --strict --explain --external-tokens LexToken)) (library - (name parser_cameligo) - (public_name ligo.parser.cameligo) - (modules AST cameligo Parser ParserLog LexToken) - (libraries - menhirLib - parser_shared - str - simple-utils - tezos-utils - getopt ) - (preprocess - (pps bisect_ppx --conditional) ) - (flags (:standard -open Simple_utils -open Parser_shared )) -) + (name parser_cameligo) + (public_name ligo.parser.cameligo) + (modules AST cameligo Parser ParserLog LexToken) + (libraries + menhirLib + parser_shared + str + simple-utils + tezos-utils + getopt ) + (preprocess + (pps bisect_ppx --conditional) ) + (flags (:standard -open Simple_utils -open Parser_shared))) (executable (name LexerMain) - (libraries - parser_cameligo) - (modules - LexerMain) + (libraries parser_cameligo) + (modules LexerMain) (preprocess (pps bisect_ppx --conditional)) (flags (:standard -open Parser_shared -open Parser_cameligo))) (executable (name ParserMain) - (libraries - parser_cameligo) + (libraries parser_cameligo) (modules - ParserAPI - ParserMain) + ParErr ParserAPI ParserMain) (preprocess (pps bisect_ppx --conditional)) (flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo))) + +(executable + (name Unlexer) + (libraries str) + (modules Unlexer)) diff --git a/src/passes/1-parser/cameligo/unlexer.ml b/src/passes/1-parser/cameligo/unlexer.ml new file mode 100644 index 000000000..1d4ac5fef --- /dev/null +++ b/src/passes/1-parser/cameligo/unlexer.ml @@ -0,0 +1,109 @@ +(** Converting the textual representation of tokens produced by Menhir + into concrete syntax *) + +(* See [ParToken.mly] *) + +let gen_sym prefix = + let count = ref 0 in + fun () -> incr count; + prefix ^ string_of_int !count + +let id_sym = gen_sym "id" +and ctor_sym = gen_sym "C" + +let concrete = function + (* Keywords *) + +| "Begin" -> "begin" +| "Else" -> "else" +| "End" -> "end" +| "False" -> "false" +| "Fun" -> "fun" +| "If" -> "if" +| "In" -> "in" +| "Let" -> "let" +| "Match" -> "match" +| "Mod" -> "mod" +| "Not" -> "not" +| "Of" -> "of" +| "Or" -> "or" +| "Then" -> "then" +| "True" -> "true" +| "Type" -> "type" +| "With" -> "with" + + (* Data constructors *) + +| "C_None" -> "None" +| "C_Some" -> "Some" + + (* Symbols *) + +| "MINUS" -> "-" +| "PLUS" -> "+" +| "SLASH" -> "/" +| "TIMES" -> "*" + +| "LPAR" -> "(" +| "RPAR" -> ")" +| "LBRACKET" -> "[" +| "RBRACKET" -> "]" +| "LBRACE" -> "{" +| "RBRACE" -> "}" + +| "ARROW" -> "->" +| "CONS" -> "::" +| "CAT" -> "^" +| "DOT" -> "." + +| "COMMA" -> "," +| "SEMI" -> ";" +| "COLON" -> ":" +| "VBAR" -> "|" + +| "WILD" -> "_" + +| "EQ" -> "=" +| "NE" -> "<>" +| "LT" -> "<" +| "GT" -> ">" +| "LE" -> "<=" +| "GE" -> ">=" + +| "BOOL_OR" -> "||" +| "BOOL_AND" -> "&&" + + (* Literals *) + +| "String" -> "\"a string\"" +| "Bytes" -> "0xAA" +| "Int" -> "1" +| "Nat" -> "1n" +| "Mutez" -> "1mutez" +| "Ident" -> id_sym () +| "Constr" -> ctor_sym () + + (* Virtual tokens *) + +| "EOF" -> "" + + (* For completeness of open sum types *) + +| _ -> "" + +(* Unlexing a sentence *) + +let unlex (sentence: string) : Buffer.t = + let tokens = Str.split (Str.regexp " ") sentence in + let lexemes = List.map concrete tokens in + let buffer = Buffer.create 31 in + let rec trans = function + [] -> () + | [s] -> Buffer.add_string buffer s + | s::l -> Buffer.add_string buffer (s ^ " "); trans l + in trans lexemes; buffer + +(* Reading one line from the standard input channel and unlex it. *) + +let out = unlex (input_line stdin) |> Buffer.contents +let () = Printf.printf "%s\n" out diff --git a/src/passes/1-parser/pascaligo/.gitignore b/src/passes/1-parser/pascaligo/.gitignore index 5bb749771..cca52dc59 100644 --- a/src/passes/1-parser/pascaligo/.gitignore +++ b/src/passes/1-parser/pascaligo/.gitignore @@ -10,3 +10,4 @@ _build/* /Parser.ml /Lexer.ml /LexToken.ml +/Tests \ No newline at end of file diff --git a/src/passes/1-parser/pascaligo/LexToken.mll b/src/passes/1-parser/pascaligo/LexToken.mll index 16f4dd96a..090a25825 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mll +++ b/src/passes/1-parser/pascaligo/LexToken.mll @@ -224,7 +224,7 @@ let proj_token = function let to_lexeme = function (* Literals *) - String s -> s.Region.value + String s -> String.escaped s.Region.value | Bytes b -> fst b.Region.value | Int i | Nat i diff --git a/src/passes/1-parser/pascaligo/ParErr.ml b/src/passes/1-parser/pascaligo/ParErr.ml new file mode 100644 index 000000000..1e07f3913 --- /dev/null +++ b/src/passes/1-parser/pascaligo/ParErr.ml @@ -0,0 +1,514 @@ + +(* This file was auto-generated based on "Parser.msg". *) + +(* Please note that the function [message] can raise [Not_found]. *) + +let message = + fun s -> + match s with + | 0 -> + "\n" + | 1 -> + "\n" + | 2 -> + "\n" + | 3 -> + "\n" + | 25 -> + "\n" + | 5 -> + "\n" + | 7 -> + "\n" + | 8 -> + "\n" + | 48 -> + "\n" + | 9 -> + "\n" + | 10 -> + "\n" + | 52 -> + "\n" + | 53 -> + "\n" + | 56 -> + "\n" + | 57 -> + "\n" + | 59 -> + "\n" + | 11 -> + "\n" + | 12 -> + "\n" + | 20 -> + "\n" + | 21 -> + "\n" + | 13 -> + "\n" + | 6 -> + "\n" + | 61 -> + "\n" + | 34 -> + "\n" + | 15 -> + "\n" + | 64 -> + "\n" + | 517 -> + "\n" + | 29 -> + "\n" + | 32 -> + "\n" + | 515 -> + "\n" + | 35 -> + "\n" + | 26 -> + "\n" + | 39 -> + "\n" + | 27 -> + "\n" + | 18 -> + "\n" + | 67 -> + "\n" + | 70 -> + "\n" + | 71 -> + "\n" + | 72 -> + "\n" + | 73 -> + "\n" + | 80 -> + "\n" + | 81 -> + "\n" + | 76 -> + "\n" + | 77 -> + "\n" + | 78 -> + "\n" + | 85 -> + "\n" + | 86 -> + "\n" + | 87 -> + "\n" + | 88 -> + "\n" + | 512 -> + "\n" + | 358 -> + "\n" + | 359 -> + "\n" + | 499 -> + "\n" + | 362 -> + "\n" + | 360 -> + "\n" + | 361 -> + "\n" + | 363 -> + "\n" + | 364 -> + "\n" + | 365 -> + "\n" + | 366 -> + "\n" + | 367 -> + "\n" + | 475 -> + "\n" + | 476 -> + "\n" + | 477 -> + "\n" + | 478 -> + "\n" + | 496 -> + "\n" + | 503 -> + "\n" + | 502 -> + "\n" + | 371 -> + "\n" + | 372 -> + "\n" + | 373 -> + "\n" + | 374 -> + "\n" + | 378 -> + "\n" + | 380 -> + "\n" + | 382 -> + "\n" + | 383 -> + "\n" + | 387 -> + "\n" + | 384 -> + "\n" + | 385 -> + "\n" + | 389 -> + "\n" + | 390 -> + "\n" + | 391 -> + "\n" + | 393 -> + "\n" + | 395 -> + "\n" + | 399 -> + "\n" + | 396 -> + "\n" + | 397 -> + "\n" + | 375 -> + "\n" + | 381 -> + "\n" + | 404 -> + "\n" + | 405 -> + "\n" + | 406 -> + "\n" + | 492 -> + "\n" + | 493 -> + "\n" + | 494 -> + "\n" + | 407 -> + "\n" + | 488 -> + "\n" + | 408 -> + "\n" + | 452 -> + "\n" + | 447 -> + "\n" + | 453 -> + "\n" + | 409 -> + "\n" + | 410 -> + "\n" + | 416 -> + "\n" + | 420 -> + "\n" + | 421 -> + "\n" + | 411 -> + "\n" + | 424 -> + "\n" + | 425 -> + "\n" + | 426 -> + "\n" + | 413 -> + "\n" + | 415 -> + "\n" + | 435 -> + "\n" + | 436 -> + "\n" + | 437 -> + "\n" + | 440 -> + "\n" + | 441 -> + "\n" + | 469 -> + "\n" + | 470 -> + "\n" + | 473 -> + "\n" + | 472 -> + "\n" + | 438 -> + "\n" + | 467 -> + "\n" + | 439 -> + "\n" + | 69 -> + "\n" + | 428 -> + "\n" + | 429 -> + "\n" + | 430 -> + "\n" + | 431 -> + "\n" + | 432 -> + "\n" + | 508 -> + "\n" + | 521 -> + "\n" + | 159 -> + "\n" + | 523 -> + "\n" + | 137 -> + "\n" + | 150 -> + "\n" + | 166 -> + "\n" + | 167 -> + "\n" + | 158 -> + "\n" + | 173 -> + "\n" + | 152 -> + "\n" + | 168 -> + "\n" + | 169 -> + "\n" + | 175 -> + "\n" + | 177 -> + "\n" + | 179 -> + "\n" + | 181 -> + "\n" + | 183 -> + "\n" + | 160 -> + "\n" + | 170 -> + "\n" + | 157 -> + "\n" + | 163 -> + "\n" + | 187 -> + "\n" + | 92 -> + "\n" + | 318 -> + "\n" + | 319 -> + "\n" + | 322 -> + "\n" + | 323 -> + "\n" + | 356 -> + "\n" + | 351 -> + "\n" + | 353 -> + "\n" + | 93 -> + "\n" + | 94 -> + "\n" + | 338 -> + "\n" + | 95 -> + "\n" + | 96 -> + "\n" + | 342 -> + "\n" + | 343 -> + "\n" + | 346 -> + "\n" + | 347 -> + "\n" + | 349 -> + "\n" + | 97 -> + "\n" + | 136 -> + "\n" + | 101 -> + "\n" + | 195 -> + "\n" + | 196 -> + "\n" + | 198 -> + "\n" + | 199 -> + "\n" + | 202 -> + "\n" + | 203 -> + "\n" + | 334 -> + "\n" + | 329 -> + "\n" + | 331 -> + "\n" + | 102 -> + "\n" + | 103 -> + "\n" + | 326 -> + "\n" + | 312 -> + "\n" + | 314 -> + "\n" + | 104 -> + "\n" + | 308 -> + "\n" + | 306 -> + "\n" + | 309 -> + "\n" + | 310 -> + "\n" + | 304 -> + "\n" + | 134 -> + "\n" + | 106 -> + "\n" + | 296 -> + "\n" + | 297 -> + "\n" + | 298 -> + "\n" + | 299 -> + "\n" + | 300 -> + "\n" + | 107 -> + "\n" + | 108 -> + "\n" + | 285 -> + "\n" + | 286 -> + "\n" + | 132 -> + "\n" + | 155 -> + "\n" + | 288 -> + "\n" + | 291 -> + "\n" + | 292 -> + "\n" + | 128 -> + "\n" + | 110 -> + "\n" + | 113 -> + "\n" + | 208 -> + "\n" + | 209 -> + "\n" + | 247 -> + "\n" + | 271 -> + "\n" + | 248 -> + "\n" + | 250 -> + "\n" + | 251 -> + "\n" + | 272 -> + "\n" + | 278 -> + "\n" + | 277 -> + "\n" + | 281 -> + "\n" + | 280 -> + "\n" + | 218 -> + "\n" + | 261 -> + "\n" + | 262 -> + "\n" + | 265 -> + "\n" + | 266 -> + "\n" + | 269 -> + "\n" + | 255 -> + "\n" + | 257 -> + "\n" + | 219 -> + "\n" + | 244 -> + "\n" + | 245 -> + "\n" + | 253 -> + "\n" + | 241 -> + "\n" + | 210 -> + "\n" + | 275 -> + "\n" + | 211 -> + "\n" + | 223 -> + "\n" + | 224 -> + "\n" + | 240 -> + "\n" + | 225 -> + "\n" + | 226 -> + "\n" + | 234 -> + "\n" + | 114 -> + "\n" + | 118 -> + "\n" + | 206 -> + "\n" + | 119 -> + "\n" + | 125 -> + "\n" + | _ -> + raise Not_found diff --git a/src/passes/1-parser/pascaligo/ParserAPI.ml b/src/passes/1-parser/pascaligo/ParserAPI.ml index 7ae5c5ad4..df82173a9 100644 --- a/src/passes/1-parser/pascaligo/ParserAPI.ml +++ b/src/passes/1-parser/pascaligo/ParserAPI.ml @@ -1,57 +1,82 @@ -(** Generic parser for LIGO *) - -module type PARSER = - sig - (* The type of tokens *) - - type token - - (* This exception is raised by the monolithic API functions *) - - exception Error - - (* The monolithic API *) - - val contract : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> AST.t - - (* The incremental API *) - - module MenhirInterpreter : - sig - include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE - with type token = token - end - - module Incremental : - sig - val contract : Lexing.position -> AST.t MenhirInterpreter.checkpoint - end - end - -(* Main functor *) - -module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) = - struct - - module I = Parser.MenhirInterpreter - - (* The parser has successfully produced a semantic value. *) - - let success v = v - - (* The parser has suspended itself because of a syntax error. Stop. *) - - let fail _checkpoint = raise Parser.Error - - (* The generic parsing function *) - - let incr_contract Lexer.{read; buffer; close; _} : AST.t = - let supplier = I.lexer_lexbuf_to_supplier read buffer in - let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in - let ast = I.loop_handle success fail supplier parser - in close (); ast - - let mono_contract = Parser.contract - - end +(* Generic parser for LIGO *) + +(* Main functor *) + +module Make (Lexer: Lexer.S with module Token := LexToken) + (Parser: module type of Parser) + (ParErr: sig val message : int -> string end) = + struct + module I = Parser.MenhirInterpreter + module S = MenhirLib.General (* Streams *) + + (* The call [stack checkpoint] extracts the parser's stack out of + a checkpoint. *) + + let stack = function + I.HandlingError env -> I.stack env + | _ -> assert false + + (* The call [state checkpoint] extracts the number of the current + state out of a parser checkpoint. *) + + let state checkpoint : int = + match Lazy.force (stack checkpoint) with + S.Nil -> 0 (* WARNING: Hack. The first state should be 0. *) + | S.Cons (I.Element (s,_,_,_),_) -> I.number s + + (* The parser has successfully produced a semantic value. *) + + let success v = v + + (* The parser has suspended itself because of a syntax error. Stop. *) + + type message = string + type valid = Lexer.token + type invalid = Lexer.token + type error = message * valid option * invalid + + exception Point of error + + let failure get_win checkpoint = + let message = ParErr.message (state checkpoint) in + match get_win () with + Lexer.Nil -> assert false + | Lexer.One invalid -> + raise (Point (message, None, invalid)) + | Lexer.Two (invalid, valid) -> + raise (Point (message, Some valid, invalid)) + + (* The two Menhir APIs are called from the following two functions. *) + + let incr_contract Lexer.{read; buffer; get_win; close; _} : AST.t = + let supplier = I.lexer_lexbuf_to_supplier read buffer + and failure = failure get_win in + let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in + let ast = I.loop_handle success failure supplier parser + in close (); ast + + let mono_contract = Parser.contract + + (* Errors *) + + let format_error ?(offsets=true) mode (msg, valid_opt, invalid) = + let invalid_region = LexToken.to_region invalid in + let header = + "Parse error " ^ invalid_region#to_string ~offsets mode in + let trailer = + match valid_opt with + None -> + if LexToken.is_eof invalid then "" + else let invalid_lexeme = LexToken.to_lexeme invalid in + Printf.sprintf ", before \"%s\"" invalid_lexeme + | Some valid -> + let valid_lexeme = LexToken.to_lexeme valid in + let s = Printf.sprintf ", after \"%s\"" valid_lexeme in + if LexToken.is_eof invalid then s + else + let invalid_lexeme = LexToken.to_lexeme invalid in + Printf.sprintf "%s and before \"%s\"" s invalid_lexeme in + let header = header ^ trailer in + header ^ (if msg = "" then ".\n" else ":\n" ^ msg) + + end diff --git a/src/passes/1-parser/pascaligo/ParserAPI.mli b/src/passes/1-parser/pascaligo/ParserAPI.mli index ff3fe4854..afc0fb8ba 100644 --- a/src/passes/1-parser/pascaligo/ParserAPI.mli +++ b/src/passes/1-parser/pascaligo/ParserAPI.mli @@ -1,39 +1,22 @@ (** Generic parser API for LIGO *) -module type PARSER = +module Make (Lexer: Lexer.S with module Token := LexToken) + (Parser: module type of Parser) + (ParErr: module type of ParErr) : sig - (* The type of tokens *) + (* Monolithic and incremental APIs of Menhir for parsing *) - type token - - (* This exception is raised by the monolithic API functions *) - - exception Error - - (* The monolithic API *) - - val contract : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> AST.t - - (* The incremental API *) - - module MenhirInterpreter : - sig - include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE - with type token = token - end - - module Incremental : - sig - val contract : Lexing.position -> AST.t MenhirInterpreter.checkpoint - end - - end - -(* Main functor *) - -module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) : - sig val mono_contract : (Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> AST.t val incr_contract : Lexer.instance -> AST.t + + (* Error handling *) + + type message = string + type valid = Lexer.token + type invalid = Lexer.token + type error = message * valid option * invalid + + exception Point of error + + val format_error : ?offsets:bool -> [`Byte | `Point] -> error -> string end diff --git a/src/passes/1-parser/pascaligo/ParserMain.ml b/src/passes/1-parser/pascaligo/ParserMain.ml index 8e64c56eb..489008453 100644 --- a/src/passes/1-parser/pascaligo/ParserMain.ml +++ b/src/passes/1-parser/pascaligo/ParserMain.ml @@ -1,16 +1,14 @@ -(** Driver for the parser of PascaLIGO *) +(** Driver for the PascaLIGO parser *) let extension = ".ligo" let options = EvalOpt.read "PascaLIGO" extension +open Printf + (** Error printing and exception tracing *) let () = Printexc.record_backtrace true -(** Auxiliary functions -*) -let sprintf = Printf.sprintf - (** Extracting the input file *) let file = @@ -23,17 +21,7 @@ let file = let () = Printexc.record_backtrace true let external_ text = - Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; - -type error = SyntaxError - -let error_to_string = function - SyntaxError -> "Syntax error.\n" - -let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value - and reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + Utils.highlight (sprintf "External error: %s" text); exit 1;; (** {1 Preprocessing the input source and opening the input channels} *) @@ -42,7 +30,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file = let lib_path = match options#libs with [] -> "" - | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + | libs -> let mk_I dir path = sprintf " -I %s%s" dir path in List.fold_right mk_I libs "" let prefix = @@ -61,26 +49,26 @@ let pp_input = let cpp_cmd = match options#input with None | Some "-" -> - Printf.sprintf "cpp -traditional-cpp%s - > %s" + sprintf "cpp -traditional-cpp%s - > %s" lib_path pp_input | Some file -> - Printf.sprintf "cpp -traditional-cpp%s %s > %s" + sprintf "cpp -traditional-cpp%s %s > %s" lib_path file pp_input let () = if Utils.String.Set.mem "cpp" options#verbose - then Printf.eprintf "%s\n%!" cpp_cmd; + then eprintf "%s\n%!" cpp_cmd; if Sys.command cpp_cmd <> 0 then - external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + external_ (sprintf "the command \"%s\" failed." cpp_cmd) (** {1 Instanciating the lexer} *) module Lexer = Lexer.Make (LexToken) module Log = LexerLog.Make (Lexer) -module ParserFront = ParserAPI.Make (Lexer) (Parser) +module ParserFront = ParserAPI.Make (Lexer) (Parser) (ParErr) let lexer_inst = Lexer.open_token_stream (Some pp_input) -let Lexer.{read; buffer; get_pos; get_last; close} = lexer_inst +let Lexer.{read; buffer; get_win; get_pos; get_last; close} = lexer_inst and cout = stdout @@ -97,10 +85,10 @@ let tokeniser = read ~log let () = try - (* The incremental API *) - let ast = ParserFront.incr_contract lexer_inst in - (* The monolithic API *) - (* let ast = ParserFront.mono_contract tokeniser buffer in *) + let ast = + if options#mono + then ParserFront.mono_contract tokeniser buffer + else ParserFront.incr_contract lexer_inst in if Utils.String.Set.mem "ast" options#verbose then let buffer = Buffer.create 131 in let state = ParserLog.mk_state @@ -122,14 +110,36 @@ let () = Buffer.output_buffer stdout buffer end with + (* Lexing errors *) Lexer.Error err -> close_all (); - Lexer.print_error ~offsets:options#offsets - options#mode err ~file - | Parser.Error -> - let region = get_last () in - let error = Region.{region; value=SyntaxError} in + let msg = + Lexer.format_error ~offsets:options#offsets + options#mode err ~file + in prerr_string msg + + (* Incremental API of Menhir *) + | ParserFront.Point point -> let () = close_all () in - print_error ~offsets:options#offsets - options#mode error ~file + let error = + ParserFront.format_error ~offsets:options#offsets + options#mode point + in eprintf "\027[31m%s\027[0m%!" error + + (* Monolithic API of Menhir *) + | Parser.Error -> + let () = close_all () in + let invalid, valid_opt = + match get_win () with + Lexer.Nil -> + assert false (* Safe: There is always at least EOF. *) + | Lexer.One invalid -> invalid, None + | Lexer.Two (invalid, valid) -> invalid, Some valid in + let point = "", valid_opt, invalid in + let error = + ParserFront.format_error ~offsets:options#offsets + options#mode point + in eprintf "\027[31m%s\027[0m%!" error + + (* I/O errors *) | Sys_error msg -> Utils.highlight msg diff --git a/src/passes/1-parser/pascaligo/dune b/src/passes/1-parser/pascaligo/dune index 28fee7d04..908455acb 100644 --- a/src/passes/1-parser/pascaligo/dune +++ b/src/passes/1-parser/pascaligo/dune @@ -18,34 +18,29 @@ ) (preprocess (pps bisect_ppx --conditional)) - (flags (:standard -open Parser_shared -open Simple_utils)) -) + (flags (:standard -open Parser_shared -open Simple_utils))) (executable (name LexerMain) (libraries - hex - simple-utils - tezos-utils - parser_pascaligo) - (modules - LexerMain) + hex simple-utils tezos-utils parser_pascaligo) + (modules LexerMain) (preprocess (pps bisect_ppx --conditional)) - (flags (:standard -open Parser_shared -open Parser_pascaligo)) -) + (flags (:standard -open Parser_shared -open Parser_pascaligo))) (executable (name ParserMain) - (libraries - parser_pascaligo) - (modules ParserMain) + (libraries parser_pascaligo) + (modules + ParErr ParserAPI ParserMain) (preprocess (pps bisect_ppx --conditional)) (flags (:standard -open Simple_utils -open Parser_shared -open Parser_pascaligo))) (executable (name Unlexer) + (libraries str) (preprocess (pps bisect_ppx --conditional)) (modules Unlexer)) diff --git a/src/passes/1-parser/reasonligo/.gitignore b/src/passes/1-parser/reasonligo/.gitignore new file mode 100644 index 000000000..cca52dc59 --- /dev/null +++ b/src/passes/1-parser/reasonligo/.gitignore @@ -0,0 +1,13 @@ +_build/* +*/_build +*~ +.merlin +*/.merlin +*.install +/Version.ml +/dune-project +/Parser.mli +/Parser.ml +/Lexer.ml +/LexToken.ml +/Tests \ No newline at end of file diff --git a/src/passes/1-parser/reasonligo/.unlexer.tag b/src/passes/1-parser/reasonligo/.unlexer.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/passes/1-parser/reasonligo/LexToken.mli b/src/passes/1-parser/reasonligo/LexToken.mli index 7c438ba02..b5fc9e74d 100644 --- a/src/passes/1-parser/reasonligo/LexToken.mli +++ b/src/passes/1-parser/reasonligo/LexToken.mli @@ -65,7 +65,7 @@ type t = (* Comparisons *) | EQ of Region.t (* "=" *) -| EQEQ of Region.t (* "=" *) +| EQEQ of Region.t (* "==" *) | NE of Region.t (* "!=" *) | LT of Region.t (* "<" *) | GT of Region.t (* ">" *) diff --git a/src/passes/1-parser/reasonligo/ParErr.ml b/src/passes/1-parser/reasonligo/ParErr.ml new file mode 100644 index 000000000..594f9ecd4 --- /dev/null +++ b/src/passes/1-parser/reasonligo/ParErr.ml @@ -0,0 +1,510 @@ + +(* This file was auto-generated based on "Parser.msg". *) + +(* Please note that the function [message] can raise [Not_found]. *) + +let message = + fun s -> + match s with + | 0 -> + "\n" + | 1 -> + "\n" + | 2 -> + "\n" + | 3 -> + "\n" + | 4 -> + "\n" + | 5 -> + "\n" + | 55 -> + "\n" + | 6 -> + "\n" + | 52 -> + "\n" + | 54 -> + "\n" + | 7 -> + "\n" + | 38 -> + "\n" + | 8 -> + "\n" + | 9 -> + "\n" + | 32 -> + "\n" + | 36 -> + "\n" + | 35 -> + "\n" + | 10 -> + "\n" + | 31 -> + "\n" + | 11 -> + "\n" + | 509 -> + "\n" + | 503 -> + "\n" + | 48 -> + "\n" + | 12 -> + "\n" + | 19 -> + "\n" + | 20 -> + "\n" + | 43 -> + "\n" + | 46 -> + "\n" + | 49 -> + "\n" + | 13 -> + "\n" + | 14 -> + "\n" + | 60 -> + "\n" + | 65 -> + "\n" + | 505 -> + "\n" + | 145 -> + "\n" + | 146 -> + "\n" + | 144 -> + "\n" + | 329 -> + "\n" + | 331 -> + "\n" + | 330 -> + "\n" + | 61 -> + "\n" + | 64 -> + "\n" + | 59 -> + "\n" + | 143 -> + "\n" + | 338 -> + "\n" + | 340 -> + "\n" + | 339 -> + "\n" + | 151 -> + "\n" + | 152 -> + "\n" + | 78 -> + "\n" + | 325 -> + "\n" + | 327 -> + "\n" + | 326 -> + "\n" + | 92 -> + "\n" + | 155 -> + "\n" + | 118 -> + "\n" + | 125 -> + "\n" + | 87 -> + "\n" + | 105 -> + "\n" + | 107 -> + "\n" + | 108 -> + "\n" + | 106 -> + "\n" + | 88 -> + "\n" + | 93 -> + "\n" + | 80 -> + "\n" + | 81 -> + "\n" + | 82 -> + "\n" + | 132 -> + "\n" + | 334 -> + "\n" + | 336 -> + "\n" + | 335 -> + "\n" + | 133 -> + "\n" + | 136 -> + "\n" + | 137 -> + "\n" + | 157 -> + "\n" + | 159 -> + "\n" + | 158 -> + "\n" + | 512 -> + "\n" + | 218 -> + "\n" + | 514 -> + "\n" + | 216 -> + "\n" + | 250 -> + "\n" + | 248 -> + "\n" + | 249 -> + "\n" + | 230 -> + "\n" + | 235 -> + "\n" + | 252 -> + "\n" + | 254 -> + "\n" + | 255 -> + "\n" + | 258 -> + "\n" + | 219 -> + "\n" + | 226 -> + "\n" + | 227 -> + "\n" + | 260 -> + "\n" + | 262 -> + "\n" + | 264 -> + "\n" + | 266 -> + "\n" + | 194 -> + "\n" + | 195 -> + "\n" + | 206 -> + "\n" + | 215 -> + "\n" + | 199 -> + "\n" + | 207 -> + "\n" + | 208 -> + "\n" + | 196 -> + "\n" + | 197 -> + "\n" + | 198 -> + "\n" + | 256 -> + "\n" + | 257 -> + "\n" + | 277 -> + "\n" + | 233 -> + "\n" + | 279 -> + "\n" + | 67 -> + "\n" + | 463 -> + "\n" + | 464 -> + "\n" + | 387 -> + "\n" + | 121 -> + "\n" + | 122 -> + "\n" + | 120 -> + "\n" + | 466 -> + "\n" + | 467 -> + "\n" + | 483 -> + "\n" + | 492 -> + "\n" + | 469 -> + "\n" + | 470 -> + "\n" + | 468 -> + "\n" + | 471 -> + "\n" + | 472 -> + "\n" + | 473 -> + "\n" + | 475 -> + "\n" + | 476 -> + "\n" + | 477 -> + "\n" + | 478 -> + "\n" + | 487 -> + "\n" + | 488 -> + "\n" + | 474 -> + "\n" + | 499 -> + "\n" + | 497 -> + "\n" + | 465 -> + "\n" + | 321 -> + "\n" + | 315 -> + "\n" + | 316 -> + "\n" + | 318 -> + "\n" + | 317 -> + "\n" + | 314 -> + "\n" + | 71 -> + "\n" + | 410 -> + "\n" + | 298 -> + "\n" + | 304 -> + "\n" + | 305 -> + "\n" + | 308 -> + "\n" + | 309 -> + "\n" + | 300 -> + "\n" + | 178 -> + "\n" + | 73 -> + "\n" + | 75 -> + "\n" + | 419 -> + "\n" + | 420 -> + "\n" + | 77 -> + "\n" + | 160 -> + "\n" + | 412 -> + "\n" + | 413 -> + "\n" + | 415 -> + "\n" + | 416 -> + "\n" + | 193 -> + "\n" + | 229 -> + "\n" + | 74 -> + "\n" + | 447 -> + "\n" + | 448 -> + "\n" + | 456 -> + "\n" + | 457 -> + "\n" + | 459 -> + "\n" + | 460 -> + "\n" + | 449 -> + "\n" + | 450 -> + "\n" + | 76 -> + "\n" + | 440 -> + "\n" + | 441 -> + "\n" + | 425 -> + "\n" + | 422 -> + "\n" + | 428 -> + "\n" + | 429 -> + "\n" + | 434 -> + "\n" + | 438 -> + "\n" + | 437 -> + "\n" + | 433 -> + "\n" + | 423 -> + "\n" + | 427 -> + "\n" + | 162 -> + "\n" + | 163 -> + "\n" + | 290 -> + "\n" + | 295 -> + "\n" + | 296 -> + "\n" + | 357 -> + "\n" + | 400 -> + "\n" + | 401 -> + "\n" + | 402 -> + "\n" + | 403 -> + "\n" + | 404 -> + "\n" + | 405 -> + "\n" + | 399 -> + "\n" + | 297 -> + "\n" + | 311 -> + "\n" + | 312 -> + "\n" + | 322 -> + "\n" + | 323 -> + "\n" + | 377 -> + "\n" + | 384 -> + "\n" + | 342 -> + "\n" + | 343 -> + "\n" + | 324 -> + "\n" + | 344 -> + "\n" + | 345 -> + "\n" + | 346 -> + "\n" + | 370 -> + "\n" + | 371 -> + "\n" + | 372 -> + "\n" + | 373 -> + "\n" + | 379 -> + "\n" + | 380 -> + "\n" + | 369 -> + "\n" + | 393 -> + "\n" + | 391 -> + "\n" + | 313 -> + "\n" + | 348 -> + "\n" + | 349 -> + "\n" + | 347 -> + "\n" + | 350 -> + "\n" + | 351 -> + "\n" + | 352 -> + "\n" + | 359 -> + "\n" + | 360 -> + "\n" + | 361 -> + "\n" + | 362 -> + "\n" + | 364 -> + "\n" + | 363 -> + "\n" + | 358 -> + "\n" + | 292 -> + "\n" + | 293 -> + "\n" + | 164 -> + "\n" + | 165 -> + "\n" + | 166 -> + "\n" + | 167 -> + "\n" + | 168 -> + "\n" + | 169 -> + "\n" + | 174 -> + "\n" + | 175 -> + "\n" + | 176 -> + "\n" + | 188 -> + "\n" + | 237 -> + "\n" + | _ -> + raise Not_found diff --git a/src/passes/1-parser/reasonligo/ParserAPI.ml b/src/passes/1-parser/reasonligo/ParserAPI.ml index 7ae5c5ad4..df82173a9 100644 --- a/src/passes/1-parser/reasonligo/ParserAPI.ml +++ b/src/passes/1-parser/reasonligo/ParserAPI.ml @@ -1,57 +1,82 @@ -(** Generic parser for LIGO *) - -module type PARSER = - sig - (* The type of tokens *) - - type token - - (* This exception is raised by the monolithic API functions *) - - exception Error - - (* The monolithic API *) - - val contract : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> AST.t - - (* The incremental API *) - - module MenhirInterpreter : - sig - include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE - with type token = token - end - - module Incremental : - sig - val contract : Lexing.position -> AST.t MenhirInterpreter.checkpoint - end - end - -(* Main functor *) - -module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) = - struct - - module I = Parser.MenhirInterpreter - - (* The parser has successfully produced a semantic value. *) - - let success v = v - - (* The parser has suspended itself because of a syntax error. Stop. *) - - let fail _checkpoint = raise Parser.Error - - (* The generic parsing function *) - - let incr_contract Lexer.{read; buffer; close; _} : AST.t = - let supplier = I.lexer_lexbuf_to_supplier read buffer in - let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in - let ast = I.loop_handle success fail supplier parser - in close (); ast - - let mono_contract = Parser.contract - - end +(* Generic parser for LIGO *) + +(* Main functor *) + +module Make (Lexer: Lexer.S with module Token := LexToken) + (Parser: module type of Parser) + (ParErr: sig val message : int -> string end) = + struct + module I = Parser.MenhirInterpreter + module S = MenhirLib.General (* Streams *) + + (* The call [stack checkpoint] extracts the parser's stack out of + a checkpoint. *) + + let stack = function + I.HandlingError env -> I.stack env + | _ -> assert false + + (* The call [state checkpoint] extracts the number of the current + state out of a parser checkpoint. *) + + let state checkpoint : int = + match Lazy.force (stack checkpoint) with + S.Nil -> 0 (* WARNING: Hack. The first state should be 0. *) + | S.Cons (I.Element (s,_,_,_),_) -> I.number s + + (* The parser has successfully produced a semantic value. *) + + let success v = v + + (* The parser has suspended itself because of a syntax error. Stop. *) + + type message = string + type valid = Lexer.token + type invalid = Lexer.token + type error = message * valid option * invalid + + exception Point of error + + let failure get_win checkpoint = + let message = ParErr.message (state checkpoint) in + match get_win () with + Lexer.Nil -> assert false + | Lexer.One invalid -> + raise (Point (message, None, invalid)) + | Lexer.Two (invalid, valid) -> + raise (Point (message, Some valid, invalid)) + + (* The two Menhir APIs are called from the following two functions. *) + + let incr_contract Lexer.{read; buffer; get_win; close; _} : AST.t = + let supplier = I.lexer_lexbuf_to_supplier read buffer + and failure = failure get_win in + let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in + let ast = I.loop_handle success failure supplier parser + in close (); ast + + let mono_contract = Parser.contract + + (* Errors *) + + let format_error ?(offsets=true) mode (msg, valid_opt, invalid) = + let invalid_region = LexToken.to_region invalid in + let header = + "Parse error " ^ invalid_region#to_string ~offsets mode in + let trailer = + match valid_opt with + None -> + if LexToken.is_eof invalid then "" + else let invalid_lexeme = LexToken.to_lexeme invalid in + Printf.sprintf ", before \"%s\"" invalid_lexeme + | Some valid -> + let valid_lexeme = LexToken.to_lexeme valid in + let s = Printf.sprintf ", after \"%s\"" valid_lexeme in + if LexToken.is_eof invalid then s + else + let invalid_lexeme = LexToken.to_lexeme invalid in + Printf.sprintf "%s and before \"%s\"" s invalid_lexeme in + let header = header ^ trailer in + header ^ (if msg = "" then ".\n" else ":\n" ^ msg) + + end diff --git a/src/passes/1-parser/reasonligo/ParserAPI.mli b/src/passes/1-parser/reasonligo/ParserAPI.mli index ff3fe4854..7d969a33c 100644 --- a/src/passes/1-parser/reasonligo/ParserAPI.mli +++ b/src/passes/1-parser/reasonligo/ParserAPI.mli @@ -1,39 +1,22 @@ (** Generic parser API for LIGO *) -module type PARSER = +module Make (Lexer: Lexer.S with module Token := LexToken) + (Parser: module type of Parser) + (ParErr: sig val message: int -> string end) : sig - (* The type of tokens *) + (* Monolithic and incremental APIs of Menhir for parsing *) - type token - - (* This exception is raised by the monolithic API functions *) - - exception Error - - (* The monolithic API *) - - val contract : (Lexing.lexbuf -> token) -> Lexing.lexbuf -> AST.t - - (* The incremental API *) - - module MenhirInterpreter : - sig - include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE - with type token = token - end - - module Incremental : - sig - val contract : Lexing.position -> AST.t MenhirInterpreter.checkpoint - end - - end - -(* Main functor *) - -module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) : - sig val mono_contract : (Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> AST.t val incr_contract : Lexer.instance -> AST.t + + (* Error handling *) + + type message = string + type valid = Lexer.token + type invalid = Lexer.token + type error = message * valid option * invalid + + exception Point of error + + val format_error : ?offsets:bool -> [`Byte | `Point] -> error -> string end diff --git a/src/passes/1-parser/reasonligo/ParserMain.ml b/src/passes/1-parser/reasonligo/ParserMain.ml index f855beb52..0af4c4a76 100644 --- a/src/passes/1-parser/reasonligo/ParserMain.ml +++ b/src/passes/1-parser/reasonligo/ParserMain.ml @@ -1,16 +1,14 @@ -(** Driver for the LIGO parser *) +(** Driver for the Reason LIGO parser *) let extension = ".religo" let options = EvalOpt.read "ReasonLIGO" extension +open Printf + (** Error printing and exception tracing *) let () = Printexc.record_backtrace true -(** Auxiliary functions -*) -let sprintf = Printf.sprintf - (** Extracting the input file *) let file = @@ -23,17 +21,7 @@ let file = let () = Printexc.record_backtrace true let external_ text = - Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; - -type error = SyntaxError - -let error_to_string = function - SyntaxError -> "Syntax error.\n" - -let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value - and reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + Utils.highlight (sprintf "External error: %s" text); exit 1;; (** {1 Preprocessing the input source and opening the input channels} *) @@ -42,7 +30,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file = let lib_path = match options#libs with [] -> "" - | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + | libs -> let mk_I dir path = sprintf " -I %s%s" dir path in List.fold_right mk_I libs "" let prefix = @@ -61,26 +49,26 @@ let pp_input = let cpp_cmd = match options#input with None | Some "-" -> - Printf.sprintf "cpp -traditional-cpp%s - > %s" + sprintf "cpp -traditional-cpp%s - > %s" lib_path pp_input | Some file -> - Printf.sprintf "cpp -traditional-cpp%s %s > %s" + sprintf "cpp -traditional-cpp%s %s > %s" lib_path file pp_input let () = if Utils.String.Set.mem "cpp" options#verbose - then Printf.eprintf "%s\n%!" cpp_cmd; + then eprintf "%s\n%!" cpp_cmd; if Sys.command cpp_cmd <> 0 then - external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + external_ (sprintf "the command \"%s\" failed." cpp_cmd) (** {1 Instanciating the lexer} *) module Lexer = Lexer.Make (LexToken) module Log = LexerLog.Make (Lexer) -module ParserFront = ParserAPI.Make (Lexer) (Parser) +module ParserFront = ParserAPI.Make (Lexer) (Parser) (ParErr) let lexer_inst = Lexer.open_token_stream (Some pp_input) -let Lexer.{read; buffer; get_pos; get_last; close} = lexer_inst +let Lexer.{read; buffer; get_win; get_pos; get_last; close} = lexer_inst and cout = stdout @@ -97,10 +85,10 @@ let tokeniser = read ~log let () = try - (* The incremental API *) - let ast = ParserFront.incr_contract lexer_inst in - (* The monolithic API *) - (* let ast = ParserFront.mono_contract tokeniser buffer in *) + let ast = + if options#mono + then ParserFront.mono_contract tokeniser buffer + else ParserFront.incr_contract lexer_inst in if Utils.String.Set.mem "ast" options#verbose then let buffer = Buffer.create 131 in let state = ParserLog.mk_state @@ -122,14 +110,36 @@ let () = Buffer.output_buffer stdout buffer end with + (* Lexing errors *) Lexer.Error err -> close_all (); - Lexer.print_error ~offsets:options#offsets - options#mode err ~file - | Parser.Error -> - let region = get_last () in - let error = Region.{region; value=SyntaxError} in + let msg = + Lexer.format_error ~offsets:options#offsets + options#mode err ~file + in prerr_string msg + + (* Incremental API of Menhir *) + | ParserFront.Point point -> let () = close_all () in - print_error ~offsets:options#offsets - options#mode error ~file + let error = + ParserFront.format_error ~offsets:options#offsets + options#mode point + in eprintf "\027[31m%s\027[0m%!" error + + (* Monolithic API of Menhir *) + | Parser.Error -> + let () = close_all () in + let invalid, valid_opt = + match get_win () with + Lexer.Nil -> + assert false (* Safe: There is always at least EOF. *) + | Lexer.One invalid -> invalid, None + | Lexer.Two (invalid, valid) -> invalid, Some valid in + let point = "", valid_opt, invalid in + let error = + ParserFront.format_error ~offsets:options#offsets + options#mode point + in eprintf "\027[31m%s\027[0m%!" error + + (* I/O errors *) | Sys_error msg -> Utils.highlight msg diff --git a/src/passes/1-parser/reasonligo/dune b/src/passes/1-parser/reasonligo/dune index fc7b1d15e..12b0c6d27 100644 --- a/src/passes/1-parser/reasonligo/dune +++ b/src/passes/1-parser/reasonligo/dune @@ -8,43 +8,40 @@ (library (name parser_reasonligo) (public_name ligo.parser.reasonligo) - (modules SyntaxError reasonligo LexToken Parser) + (modules + SyntaxError reasonligo LexToken Parser) (libraries - menhirLib - parser_shared - parser_cameligo - str - simple-utils - tezos-utils - getopt - ) + menhirLib + parser_shared + parser_cameligo + str + simple-utils + tezos-utils + getopt) (preprocess - (pps bisect_ppx --conditional) - ) - (flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo )) -) + (pps bisect_ppx --conditional)) + (flags (:standard -open Simple_utils -open Parser_shared -open Parser_cameligo))) (executable (name LexerMain) - (libraries - parser_reasonligo) - (modules - LexerMain) + (libraries parser_reasonligo) + (modules LexerMain) (preprocess - (pps bisect_ppx --conditional) - ) - (flags (:standard -open Parser_shared -open Parser_reasonligo)) -) + (pps bisect_ppx --conditional)) + (flags (:standard -open Parser_shared -open Parser_reasonligo))) (executable (name ParserMain) (libraries - parser_reasonligo - parser_cameligo) + parser_reasonligo + parser_cameligo) (modules - ParserAPI - ParserMain) + ParErr ParserAPI ParserMain) (preprocess - (pps bisect_ppx --conditional) - ) + (pps bisect_ppx --conditional)) (flags (:standard -open Simple_utils -open Parser_cameligo -open Parser_shared -open Parser_reasonligo))) + +(executable + (name Unlexer) + (libraries str) + (modules Unlexer)) diff --git a/src/passes/1-parser/reasonligo/unlexer.ml b/src/passes/1-parser/reasonligo/unlexer.ml new file mode 100644 index 000000000..6628024d9 --- /dev/null +++ b/src/passes/1-parser/reasonligo/unlexer.ml @@ -0,0 +1,103 @@ +(** Converting the textual representation of tokens produced by Menhir + into concrete syntax *) + +(* See [ParToken.mly] *) + +let gen_sym prefix = + let count = ref 0 in + fun () -> incr count; + prefix ^ string_of_int !count + +let id_sym = gen_sym "id" +and ctor_sym = gen_sym "C" + +let concrete = function + (* Keywords *) + +| "Else" -> "else" +| "False" -> "false" +| "If" -> "if" +| "Let" -> "let" +| "Switch" -> "switch" +| "Mod" -> "mod" +| "Or" -> "or" +| "True" -> "true" +| "Type" -> "type" + + (* Data constructors *) + +| "C_None" -> "None" +| "C_Some" -> "Some" + + (* Symbols *) + +| "MINUS" -> "-" +| "PLUS" -> "+" +| "SLASH" -> "/" +| "TIMES" -> "*" + +| "LPAR" -> "(" +| "RPAR" -> ")" +| "LBRACKET" -> "[" +| "RBRACKET" -> "]" +| "LBRACE" -> "{" +| "RBRACE" -> "}" + +| "CAT" -> "++" +| "DOT" -> "." +| "ELLIPSIS" -> "..." + +| "COMMA" -> "," +| "SEMI" -> ";" +| "COLON" -> ":" +| "VBAR" -> "|" + +| "WILD" -> "_" + +| "EQ" -> "=" +| "EQEQ" -> "==" +| "NE" -> "!=" +| "LT" -> "<" +| "GT" -> ">" +| "LE" -> "<=" +| "GE" -> ">=" +| "ARROW" -> "=>" + +| "NOT" -> "!" +| "BOOL_OR" -> "||" +| "BOOL_AND" -> "&&" + + (* Literals *) + +| "String" -> "\"a string\"" +| "Bytes" -> "0xAA" +| "Int" -> "1" +| "Nat" -> "1n" +| "Mutez" -> "1mutez" +| "Ident" -> id_sym () +| "Constr" -> ctor_sym () + + (* Virtual tokens *) + +| "EOF" -> "" + + (* For completeness of open sum types *) + +| _ -> "" + +(* Unlexing a sentence *) + +let unlex (sentence: string) : Buffer.t = + let tokens = Str.split (Str.regexp " ") sentence in + let lexemes = List.map concrete tokens in + let buffer = Buffer.create 31 in + let rec trans = function + [] -> () + | [s] -> Buffer.add_string buffer s + | s::l -> Buffer.add_string buffer (s ^ " "); trans l + in trans lexemes; buffer + +(* Reading one line from the standard input channel and unlex it. *) + +let out = unlex (input_line stdin) |> Buffer.contents +let () = Printf.printf "%s\n" out diff --git a/src/passes/1-parser/shared/EvalOpt.ml b/src/passes/1-parser/shared/EvalOpt.ml index 68e2b1f94..7889c9c18 100644 --- a/src/passes/1-parser/shared/EvalOpt.ml +++ b/src/passes/1-parser/shared/EvalOpt.ml @@ -13,10 +13,11 @@ type options = < verbose : Utils.String.Set.t; offsets : bool; mode : [`Byte | `Point]; - cmd : command + cmd : command; + mono : bool > -let make ~input ~libs ~verbose ~offsets ~mode ~cmd = +let make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono = object method input = input method libs = libs @@ -24,6 +25,7 @@ let make ~input ~libs ~verbose ~offsets ~mode ~cmd = method offsets = offsets method mode = mode method cmd = cmd + method mono = mono end (** {1 Auxiliary functions} *) @@ -49,6 +51,7 @@ let help language extension () = print " -q, --quiet No output, except errors (default)"; print " --columns Columns for source locations"; print " --bytes Bytes for source locations"; + print " --mono Use Menhir monolithic API"; print " --verbose= cmdline, cpp, ast-tokens, ast (colon-separated)"; print " --version Commit hash on stdout"; print " -h, --help This help"; @@ -70,6 +73,7 @@ and verbose = ref Utils.String.Set.empty and input = ref None and libs = ref [] and verb_str = ref "" +and mono = ref false let split_at_colon = Str.(split (regexp ":")) @@ -89,6 +93,7 @@ let specs language extension = 'q', "quiet", set quiet true, None; noshort, "columns", set columns true, None; noshort, "bytes", set bytes true, None; + noshort, "mono", set mono true, None; noshort, "verbose", None, Some add_verbose; 'h', "help", Some (help language extension), None; noshort, "version", Some version, None @@ -124,6 +129,7 @@ let print_opt () = printf "quiet = %b\n" !quiet; printf "columns = %b\n" !columns; printf "bytes = %b\n" !bytes; + printf "mono = %b\b" !mono; printf "verbose = %s\n" !verb_str; printf "input = %s\n" (string_of quote !input); printf "libs = %s\n" (string_of_path !libs) @@ -151,6 +157,7 @@ let check extension = and quiet = !quiet and offsets = not !columns and mode = if !bytes then `Byte else `Point + and mono = !mono and verbose = !verbose and libs = !libs in @@ -164,6 +171,7 @@ let check extension = printf "quiet = %b\n" quiet; printf "offsets = %b\n" offsets; printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point"); + printf "mono = %b\n" mono; printf "verbose = %s\n" !verb_str; printf "input = %s\n" (string_of quote input); printf "libs = %s\n" (string_of_path libs) @@ -178,7 +186,7 @@ let check extension = | false, false, false, true -> Tokens | _ -> abort "Choose one of -q, -c, -u, -t." - in make ~input ~libs ~verbose ~offsets ~mode ~cmd + in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono (** {1 Parsing the command-line options} *) diff --git a/src/passes/1-parser/shared/EvalOpt.mli b/src/passes/1-parser/shared/EvalOpt.mli index e3b006e38..3882ccf7a 100644 --- a/src/passes/1-parser/shared/EvalOpt.mli +++ b/src/passes/1-parser/shared/EvalOpt.mli @@ -49,7 +49,8 @@ type options = < verbose : Utils.String.Set.t; offsets : bool; mode : [`Byte | `Point]; - cmd : command + cmd : command; + mono : bool > val make : @@ -58,7 +59,9 @@ val make : verbose:Utils.String.Set.t -> offsets:bool -> mode:[`Byte | `Point] -> - cmd:command -> options + cmd:command -> + mono:bool -> + options (** Parsing the command-line options on stdin. The first parameter is the name of the concrete syntax, e.g., "pascaligo", and the second diff --git a/src/passes/1-parser/shared/Lexer.mli b/src/passes/1-parser/shared/Lexer.mli index e52d1d09f..c8d291f46 100644 --- a/src/passes/1-parser/shared/Lexer.mli +++ b/src/passes/1-parser/shared/Lexer.mli @@ -124,9 +124,17 @@ module type S = type file_path = string type logger = Markup.t list -> token -> unit + type window = + Nil + | One of token + | Two of token * token + + val slide : token -> window -> window + type instance = { read : ?log:logger -> Lexing.lexbuf -> token; buffer : Lexing.lexbuf; + get_win : unit -> window; get_pos : unit -> Pos.t; get_last : unit -> Region.t; close : unit -> unit @@ -142,9 +150,9 @@ module type S = exception Error of error Region.reg - val print_error : + val format_error : ?offsets:bool -> [`Byte | `Point] -> - error Region.reg -> file:bool -> unit + error Region.reg -> file:bool -> string end diff --git a/src/passes/1-parser/shared/Lexer.mll b/src/passes/1-parser/shared/Lexer.mll index b9c41b895..23e6a92a5 100644 --- a/src/passes/1-parser/shared/Lexer.mll +++ b/src/passes/1-parser/shared/Lexer.mll @@ -140,34 +140,43 @@ module type TOKEN = (* The module type for lexers is [S]. *) -module type S = sig - module Token : TOKEN - type token = Token.token +module type S = + sig + module Token : TOKEN + type token = Token.token - type file_path = string - type logger = Markup.t list -> token -> unit + type file_path = string + type logger = Markup.t list -> token -> unit - type instance = { - read : ?log:logger -> Lexing.lexbuf -> token; - buffer : Lexing.lexbuf; - get_pos : unit -> Pos.t; - get_last : unit -> Region.t; - close : unit -> unit - } + type window = + Nil + | One of token + | Two of token * token - val open_token_stream : file_path option -> instance + val slide : token -> window -> window - (* Error reporting *) + type instance = { + read : ?log:logger -> Lexing.lexbuf -> token; + buffer : Lexing.lexbuf; + get_win : unit -> window; + get_pos : unit -> Pos.t; + get_last : unit -> Region.t; + close : unit -> unit + } - type error - - val error_to_string : error -> string + val open_token_stream : file_path option -> instance - exception Error of error Region.reg + (* Error reporting *) - val print_error : ?offsets:bool -> [`Byte | `Point] -> - error Region.reg -> file:bool -> unit -end + type error + + val error_to_string : error -> string + + exception Error of error Region.reg + + val format_error : ?offsets:bool -> [`Byte | `Point] -> + error Region.reg -> file:bool -> string + end (* The functorised interface @@ -212,7 +221,27 @@ module Make (Token: TOKEN) : (S with module Token = Token) = (* STATE *) - (* Beyond tokens, the result of lexing is a state. The type + (** The type [buffer] models a two-slot buffer of tokens for + reporting after a parse error. + + In [Two(t1,t2)], the token [t2] is the next to be sent to the + parser. + + The call [slide token buffer] pushes the token [token] in the + buffer [buffer]. If the buffer is full, that is, it is + [Two(t1,t2)], then the token [t2] is discarded to make room for + [token]. + *) + type window = + Nil + | One of token + | Two of token * token + + let slide token = function + Nil -> One token + | One t | Two (t,_) -> Two (token,t) + + (** Beyond tokens, the result of lexing is a state. The type [state] represents the logical state of the lexing engine, that is, a value which is threaded during scanning and which denotes useful, high-level information beyond what the type @@ -238,6 +267,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = updated after a single character has been matched: that depends on the regular expression that matched the lexing buffer. + The field [win] is a two-token window, that is, a buffer that + contains the last recognised token, and the penultimate (if + any). + The fields [decoder] and [supply] offer the support needed for the lexing of UTF-8 encoded characters in comments (the only place where they are allowed in LIGO). The former is the @@ -246,10 +279,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = it to [decoder]. See the documentation of the third-party library Uutf. *) - type state = { units : (Markup.t list * token) FQueue.t; markup : Markup.t list; + window : window; last : Region.t; pos : Pos.t; decoder : Uutf.decoder; @@ -401,10 +434,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = exception Error of error Region.reg - let print_error ?(offsets=true) mode Region.{region; value} ~file = + let format_error ?(offsets=true) mode Region.{region; value} ~file = let msg = error_to_string value in let reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg) + sprintf "\027[31mLexical error %s:\n%s\027[0m%!" reg msg let fail region value = raise (Error Region.{region; value}) @@ -804,6 +837,7 @@ type logger = Markup.t list -> token -> unit type instance = { read : ?log:logger -> Lexing.lexbuf -> token; buffer : Lexing.lexbuf; + get_win : unit -> window; get_pos : unit -> Pos.t; get_last : unit -> Region.t; close : unit -> unit @@ -820,13 +854,15 @@ let open_token_stream file_path_opt = let supply = Uutf.Manual.src decoder in let state = ref {units = FQueue.empty; last = Region.ghost; + window = Nil; pos; markup = []; decoder; supply} in let get_pos () = !state.pos - and get_last () = !state.last in + and get_last () = !state.last + and get_win () = !state.window in let patch_buffer (start, stop) buffer = let open Lexing in @@ -883,7 +919,9 @@ let open_token_stream file_path_opt = read_token ~log buffer | Some (units, (left_mark, token)) -> log left_mark token; - state := {!state with units; last = Token.to_region token}; + state := {!state with units; + last = Token.to_region token; + window = slide token !state.window}; check_right_context token buffer; patch_buffer (Token.to_region token)#byte_pos buffer; token in @@ -896,7 +934,7 @@ let open_token_stream file_path_opt = None | Some "-" -> () | Some file_path -> reset ~file:file_path buffer and close () = close_in cin in - {read = read_token; buffer; get_pos; get_last; close} + {read = read_token; buffer; get_win; get_pos; get_last; close} end (* of functor [Make] in HEADER *) (* END TRAILER *) diff --git a/src/passes/1-parser/shared/LexerLog.ml b/src/passes/1-parser/shared/LexerLog.ml index 65655a720..ce5172045 100644 --- a/src/passes/1-parser/shared/LexerLog.ml +++ b/src/passes/1-parser/shared/LexerLog.ml @@ -65,9 +65,11 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) = let file = match file_path_opt with None | Some "-" -> false - | Some _ -> true in - Lexer.print_error ~offsets mode e ~file; - close_all () + | Some _ -> true in + let msg = + Lexer.format_error ~offsets mode e ~file + in prerr_string msg; + close_all () in iter () with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg) diff --git a/src/passes/1-parser/shared/dune b/src/passes/1-parser/shared/dune index 7777459d0..ca41804a8 100644 --- a/src/passes/1-parser/shared/dune +++ b/src/passes/1-parser/shared/dune @@ -19,9 +19,7 @@ Markup FQueue EvalOpt - Version - )) - + Version)) (rule (targets Version.ml) diff --git a/vendors/ligo-utils/simple-utils/cover.sh b/vendors/ligo-utils/simple-utils/cover.sh index e4717b5ca..b7331dd37 100755 --- a/vendors/ligo-utils/simple-utils/cover.sh +++ b/vendors/ligo-utils/simple-utils/cover.sh @@ -4,7 +4,7 @@ # Menhir and generates minimal inputs that cover all of them and only # them. -set -x +# set -x # ==================================================================== # General Settings and wrappers @@ -111,8 +111,8 @@ done usage () { cat <.mly - --lex-tokens=.mli + --par-tokens=.mly + --lex-tokens=.mli --unlexer= --ext= --dir= @@ -121,7 +121,7 @@ Usage: $(basename $0) [-h|--help] Generates in directory a set of LIGO source files with extension covering all erroneous states of the LR automaton produced by Menhir from .mly, .mly, -.mli and .msg (see script `messages.sh` for +.mli and .msg (see script messages.sh for generating the latter). The LIGO files will be numbered with their corresponding state number in the automaton. The executable reads a line on stdin of tokens and produces a line of corresponding diff --git a/vendors/ligo-utils/simple-utils/par_err.sh b/vendors/ligo-utils/simple-utils/par_err.sh new file mode 100755 index 000000000..181ab987b --- /dev/null +++ b/vendors/ligo-utils/simple-utils/par_err.sh @@ -0,0 +1,199 @@ +#!/bin/sh + +# This script calls Menhir with a message file, which generates the +# corresponding OCaml file. + +# set -x + +# ==================================================================== +# General Settings and wrappers + +script=$(basename $0) + +print_nl () { test "$quiet" != "yes" && echo "$1"; } + +print () { test "$quiet" != "yes" && printf "$1"; } + +fatal_error () { + echo "$script: fatal error:" + echo "$1" 1>&2 + exit 1 +} + +warn () { + print_nl "$script: warning:" + print_nl "$1" +} + +failed () { + printf "\033[31mFAILED$1\033[0m\n" +} + +emphasise () { + printf "\033[31m$1\033[0m\n" +} + +display () { + printf "\033[31m"; cat $1; printf "\033[0m" +} + +# ==================================================================== +# Parsing loop +# +while : ; do + case "$1" in + "") break;; + --par-tokens=*) + if test -n "$par_tokens"; then + fatal_error "Repeated option --par-tokens."; fi + par_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --par-tokens) + no_eq=$1 + break + ;; + --lex-tokens=*) + if test -n "$lex_tokens"; then + fatal_error "Repeated option --lex-tokens."; fi + lex_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --lex-tokens) + no_eq=$1 + break + ;; + --out=*) + if test -n "$out"; then + fatal_error "Repeated option --out."; fi + out=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --out) + no_eq=$1 + break + ;; + -h | --help | -help) + help=yes + ;; + # Invalid option + # + -*) + fatal_error "Invalid option \"$1\"." + ;; + # Invalid argument + # + *) + if test -n "$parser_arg"; then + fatal_error "Only one Menhir specification allowed."; fi + parser=$1 + esac + shift +done + +# ==================================================================== +# Help +# +usage () { + cat <.mly + --lex-tokens=.mli + --out=.ml + .mly + +Generates .ml from .msg and the parser specification +(see messages.sh) in the current directory. + +The following options, if given, must be given only once. + +Display control: + -h, --help display this help and exit + +Mandatory options: + --lex-tokens=.mli the lexical tokens + --par-tokens=.mly the syntactical tokens + --out=.ml +EOF + exit 1 +} + +if test "$help" = "yes"; then usage; fi + +# ==================================================================== +# Checking the command-line options and arguments and applying some of +# them. + +# It is a common mistake to forget the "=" in GNU long-option style. + +if test -n "$no_eq" +then + fatal_error "Long option style $no_eq must be followed by \"=\"." +fi + +# Checking options + +if test -z "$parser"; then + fatal_error "No parser specification."; fi + +if test -z "$par_tokens"; then + fatal_error "No syntactical tokens specification (use --par-tokens)."; fi + +if test -z "$lex_tokens"; then + fatal_error "No lexical tokens specification (use --lex-tokens)."; fi + +if test ! -e "$parser"; then + fatal_error "Parser specification \"$parser\" not found."; fi + +if test ! -e "$lex_tokens"; then + fatal_error "Lexical tokens specification \"$lex_tokens\" not found."; fi + +if test ! -e "$par_tokens"; then + fatal_error "Syntactical tokens specification \"$par_tokens\" not found."; fi + +parser_ext=$(expr "$parser" : ".*\.mly$") +if test "$parser_ext" = "0"; then + fatal_error "Parser specification must have extension \".mly\"."; fi + +par_tokens_ext=$(expr "$par_tokens" : ".*\.mly$") +if test "$par_tokens_ext" = "0"; then + fatal_error "Syntactical tokens specification must have extension \".mly\"." +fi + +lex_tokens_ext=$(expr "$lex_tokens" : ".*\.mli$") +if test "$lex_tokens_ext" = "0"; then + fatal_error "Lexical tokens specification must have extension \".mli\"." +fi + +mly=$parser +parser_base=$(basename $mly .mly) +par_tokens_base=$(basename $par_tokens .mly) +lex_tokens_base=$(basename $lex_tokens .mli) + +# Checking the presence of the messages + +msg=$parser_base.msg +if test ! -e $msg; then + fatal_error "File $msg not found."; fi + +# Checking the output file + +if test -z "$out"; then + fatal_error "Output file missing (use --out)."; fi + +# ==================================================================== +# Menhir's flags + +flags="--table --strict --external-tokens $lex_tokens_base \ + --base $parser_base $par_tokens" + +# =================================================================== +# Generating source code from error messages + +err=.$msg.err + +printf "Making $out from $msg... " +menhir --compile-errors $msg $flags $mly > $out 2> $err +if test "$?" = "0" +then printf "done.\n" + rm -f $err +else failed ":" + display "$err" +fi