diff --git a/src/passes/1-parser/cameligo/LexToken.mll b/src/passes/1-parser/cameligo/LexToken.mll index 172b97eec..0871c0d32 100644 --- a/src/passes/1-parser/cameligo/LexToken.mll +++ b/src/passes/1-parser/cameligo/LexToken.mll @@ -201,7 +201,7 @@ let to_lexeme = function | Int i | Nat i | Mutez i -> fst i.Region.value -| String s -> s.Region.value +| String s -> String.escaped s.Region.value | Bytes b -> fst b.Region.value | Begin _ -> "begin" diff --git a/src/passes/1-parser/cameligo/ParErr.ml b/src/passes/1-parser/cameligo/ParErr.ml new file mode 100644 index 000000000..98dd3439d --- /dev/null +++ b/src/passes/1-parser/cameligo/ParErr.ml @@ -0,0 +1,416 @@ + +(* This file was auto-generated based on "Parser.msg". *) + +(* Please note that the function [message] can raise [Not_found]. *) + +let message = + fun s -> + match s with + | 0 -> + "\n" + | 1 -> + "\n" + | 2 -> + "\n" + | 3 -> + "\n" + | 4 -> + "\n" + | 5 -> + "\n" + | 7 -> + "\n" + | 49 -> + "\n" + | 51 -> + "\n" + | 52 -> + "\n" + | 53 -> + "\n" + | 18 -> + "\n" + | 8 -> + "\n" + | 9 -> + "\n" + | 10 -> + "\n" + | 42 -> + "\n" + | 43 -> + "\n" + | 46 -> + "\n" + | 47 -> + "\n" + | 33 -> + "\n" + | 459 -> + "\n" + | 27 -> + "\n" + | 31 -> + "\n" + | 28 -> + "\n" + | 35 -> + "\n" + | 12 -> + "\n" + | 16 -> + "\n" + | 6 -> + "\n" + | 13 -> + "\n" + | 61 -> + "\n" + | 133 -> + "\n" + | 372 -> + "\n" + | 374 -> + "\n" + | 134 -> + "\n" + | 136 -> + "\n" + | 137 -> + "\n" + | 153 -> + "\n" + | 373 -> + "\n" + | 63 -> + "\n" + | 142 -> + "\n" + | 143 -> + "\n" + | 128 -> + "\n" + | 145 -> + "\n" + | 72 -> + "\n" + | 94 -> + "\n" + | 106 -> + "\n" + | 95 -> + "\n" + | 108 -> + "\n" + | 109 -> + "\n" + | 110 -> + "\n" + | 73 -> + "\n" + | 91 -> + "\n" + | 93 -> + "\n" + | 92 -> + "\n" + | 90 -> + "\n" + | 77 -> + "\n" + | 78 -> + "\n" + | 65 -> + "\n" + | 66 -> + "\n" + | 67 -> + "\n" + | 120 -> + "\n" + | 121 -> + "\n" + | 124 -> + "\n" + | 125 -> + "\n" + | 147 -> + "\n" + | 148 -> + "\n" + | 149 -> + "\n" + | 157 -> + "\n" + | 156 -> + "\n" + | 462 -> + "\n" + | 464 -> + "\n" + | 216 -> + "\n" + | 241 -> + "\n" + | 218 -> + "\n" + | 220 -> + "\n" + | 214 -> + "\n" + | 225 -> + "\n" + | 254 -> + "\n" + | 255 -> + "\n" + | 242 -> + "\n" + | 263 -> + "\n" + | 227 -> + "\n" + | 256 -> + "\n" + | 257 -> + "\n" + | 265 -> + "\n" + | 267 -> + "\n" + | 269 -> + "\n" + | 271 -> + "\n" + | 273 -> + "\n" + | 192 -> + "\n" + | 258 -> + "\n" + | 284 -> + "\n" + | 287 -> + "\n" + | 244 -> + "\n" + | 292 -> + "\n" + | 261 -> + "\n" + | 160 -> + "\n" + | 164 -> + "\n" + | 428 -> + "\n" + | 331 -> + "\n" + | 312 -> + "\n" + | 430 -> + "\n" + | 314 -> + "\n" + | 315 -> + "\n" + | 316 -> + "\n" + | 431 -> + "\n" + | 444 -> + "\n" + | 445 -> + "\n" + | 432 -> + "\n" + | 433 -> + "\n" + | 434 -> + "\n" + | 435 -> + "\n" + | 436 -> + "\n" + | 437 -> + "\n" + | 439 -> + "\n" + | 327 -> + "\n" + | 329 -> + "\n" + | 333 -> + "\n" + | 330 -> + "\n" + | 328 -> + "\n" + | 339 -> + "\n" + | 340 -> + "\n" + | 341 -> + "\n" + | 342 -> + "\n" + | 343 -> + "\n" + | 344 -> + "\n" + | 366 -> + "\n" + | 345 -> + "\n" + | 347 -> + "\n" + | 440 -> + "\n" + | 442 -> + "\n" + | 446 -> + "\n" + | 429 -> + "\n" + | 311 -> + "\n" + | 427 -> + "\n" + | 165 -> + "\n" + | 167 -> + "\n" + | 168 -> + "\n" + | 169 -> + "\n" + | 163 -> + "\n" + | 447 -> + "\n" + | 449 -> + "\n" + | 450 -> + "\n" + | 166 -> + "\n" + | 234 -> + "\n" + | 235 -> + "\n" + | 238 -> + "\n" + | 239 -> + "\n" + | 424 -> + "\n" + | 170 -> + "\n" + | 171 -> + "\n" + | 172 -> + "\n" + | 417 -> + "\n" + | 418 -> + "\n" + | 421 -> + "\n" + | 422 -> + "\n" + | 174 -> + "\n" + | 303 -> + "\n" + | 304 -> + "\n" + | 404 -> + "\n" + | 411 -> + "\n" + | 403 -> + "\n" + | 305 -> + "\n" + | 307 -> + "\n" + | 319 -> + "\n" + | 320 -> + "\n" + | 321 -> + "\n" + | 322 -> + "\n" + | 323 -> + "\n" + | 324 -> + "\n" + | 325 -> + "\n" + | 326 -> + "\n" + | 377 -> + "\n" + | 378 -> + "\n" + | 380 -> + "\n" + | 334 -> + "\n" + | 309 -> + "\n" + | 306 -> + "\n" + | 394 -> + "\n" + | 395 -> + "\n" + | 396 -> + "\n" + | 397 -> + "\n" + | 398 -> + "\n" + | 399 -> + "\n" + | 407 -> + "\n" + | 400 -> + "\n" + | 402 -> + "\n" + | 175 -> + "\n" + | 176 -> + "\n" + | 179 -> + "\n" + | 180 -> + "\n" + | 183 -> + "\n" + | 301 -> + "\n" + | 299 -> + "\n" + | 185 -> + "\n" + | 187 -> + "\n" + | 188 -> + "\n" + | 189 -> + "\n" + | 190 -> + "\n" + | 194 -> + "\n" + | 213 -> + "\n" + | 193 -> + "\n" + | 209 -> + "\n" + | _ -> + raise Not_found diff --git a/src/passes/1-parser/cameligo/ParserAPI.ml b/src/passes/1-parser/cameligo/ParserAPI.ml index 7ae5c5ad4..412ae199f 100644 --- a/src/passes/1-parser/cameligo/ParserAPI.ml +++ b/src/passes/1-parser/cameligo/ParserAPI.ml @@ -28,13 +28,46 @@ module type PARSER = end end +(* Errors *) + +module type PAR_ERR = + sig + val message : int -> string (* From error states to messages *) + end + +let format_error ?(offsets=true) mode Region.{region; value} ~file = + let reg = region#to_string ~file ~offsets mode in + Printf.sprintf "\027[31mParse error %s:\n%s\027[0m%!" reg value + (* Main functor *) module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) = + (Parser: PARSER with type token = Lexer.Token.token) + (ParErr: PAR_ERR) = struct + type message = string + type valid = Lexer.token + type invalid = Lexer.token + + exception Point of message * valid option * invalid module I = Parser.MenhirInterpreter + module S = MenhirLib.General (* Streams *) + + (* The call [stack checkpoint] extracts the parser's stack out of + a checkpoint. *) + + let stack = function + I.HandlingError env -> I.stack env + | _ -> assert false + + (* The call [state checkpoint] extracts the number of the current + state out of a parser checkpoint. *) + + let state checkpoint : int = + match Lazy.force (stack checkpoint) with + S.Nil -> 0 (* WARNING: Hack. The first state should be 0. *) + | S.Cons (I.Element (s,_,_,_),_) -> I.number s (* The parser has successfully produced a semantic value. *) @@ -42,14 +75,24 @@ module Make (Lexer: Lexer.S) (* The parser has suspended itself because of a syntax error. Stop. *) - let fail _checkpoint = raise Parser.Error + (* let fail _checkpoint = raise Parser.Error *) + + let failure get_win checkpoint = + let message = ParErr.message (state checkpoint) in + match get_win () with + Lexer.Nil -> assert false + | Lexer.One invalid -> + raise (Point (message, None, invalid)) + | Lexer.Two (invalid, valid) -> + raise (Point (message, Some valid, invalid)) (* The generic parsing function *) - let incr_contract Lexer.{read; buffer; close; _} : AST.t = - let supplier = I.lexer_lexbuf_to_supplier read buffer in - let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in - let ast = I.loop_handle success fail supplier parser + let incr_contract Lexer.{read; buffer; get_win; close; _} : AST.t = + let supplier = I.lexer_lexbuf_to_supplier read buffer + and failure = failure get_win in + let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in + let ast = I.loop_handle success failure supplier parser in close (); ast let mono_contract = Parser.contract diff --git a/src/passes/1-parser/cameligo/ParserAPI.mli b/src/passes/1-parser/cameligo/ParserAPI.mli index ff3fe4854..f3eeaaba8 100644 --- a/src/passes/1-parser/cameligo/ParserAPI.mli +++ b/src/passes/1-parser/cameligo/ParserAPI.mli @@ -29,11 +29,29 @@ module type PARSER = end +(* Errors *) + +module type PAR_ERR = + sig + val message : int -> string (* From error states to messages *) + end + +val format_error : + ?offsets:bool -> [`Byte | `Point] -> + string Region.reg -> file:bool -> string + (* Main functor *) module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) : + (Parser: PARSER with type token = Lexer.Token.token) + (ParErr: PAR_ERR) : sig + type message = string + type valid = Lexer.token + type invalid = Lexer.token + + exception Point of message * valid option * invalid + val mono_contract : (Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> AST.t val incr_contract : Lexer.instance -> AST.t end diff --git a/src/passes/1-parser/cameligo/ParserMain.ml b/src/passes/1-parser/cameligo/ParserMain.ml index faa7ce70a..32cbd604b 100644 --- a/src/passes/1-parser/cameligo/ParserMain.ml +++ b/src/passes/1-parser/cameligo/ParserMain.ml @@ -1,16 +1,14 @@ -(** Driver for the parser of CameLIGO *) +(** Driver for the CameLIGO parser *) let extension = ".mligo" let options = EvalOpt.read "CameLIGO" extension +open Printf + (** Error printing and exception tracing *) let () = Printexc.record_backtrace true -(** Auxiliary functions -*) -let sprintf = Printf.sprintf - (** Extracting the input file *) let file = @@ -23,17 +21,7 @@ let file = let () = Printexc.record_backtrace true let external_ text = - Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; - -type error = SyntaxError - -let error_to_string = function - SyntaxError -> "Syntax error.\n" - -let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value - and reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + Utils.highlight (sprintf "External error: %s" text); exit 1;; (** {1 Preprocessing the input source and opening the input channels} *) @@ -42,7 +30,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file = let lib_path = match options#libs with [] -> "" - | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + | libs -> let mk_I dir path = sprintf " -I %s%s" dir path in List.fold_right mk_I libs "" let prefix = @@ -61,26 +49,26 @@ let pp_input = let cpp_cmd = match options#input with None | Some "-" -> - Printf.sprintf "cpp -traditional-cpp%s - > %s" + sprintf "cpp -traditional-cpp%s - > %s" lib_path pp_input | Some file -> - Printf.sprintf "cpp -traditional-cpp%s %s > %s" + sprintf "cpp -traditional-cpp%s %s > %s" lib_path file pp_input let () = if Utils.String.Set.mem "cpp" options#verbose - then Printf.eprintf "%s\n%!" cpp_cmd; + then eprintf "%s\n%!" cpp_cmd; if Sys.command cpp_cmd <> 0 then - external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + external_ (sprintf "the command \"%s\" failed." cpp_cmd) (** {1 Instanciating the lexer} *) module Lexer = Lexer.Make (LexToken) module Log = LexerLog.Make (Lexer) -module ParserFront = ParserAPI.Make (Lexer) (Parser) +module ParserFront = ParserAPI.Make (Lexer) (Parser) (ParErr) let lexer_inst = Lexer.open_token_stream (Some pp_input) -let Lexer.{read; buffer; get_pos; get_last; close} = lexer_inst +let Lexer.{read; buffer; get_win; get_pos; get_last; close} = lexer_inst and cout = stdout @@ -97,10 +85,10 @@ let tokeniser = read ~log let () = try - (* The incremental API *) - let ast = ParserFront.incr_contract lexer_inst in - (* The monolithic API *) - (* let ast = ParserFront.mono_contract tokeniser buffer in *) + let ast = + if options#mono + then ParserFront.mono_contract tokeniser buffer + else ParserFront.incr_contract lexer_inst in if Utils.String.Set.mem "ast" options#verbose then let buffer = Buffer.create 131 in let state = ParserLog.mk_state @@ -122,14 +110,52 @@ let () = Buffer.output_buffer stdout buffer end with + (* Lexing errors *) Lexer.Error err -> close_all (); - Lexer.print_error ~offsets:options#offsets - options#mode err ~file + let msg = + Lexer.format_error ~offsets:options#offsets + options#mode err ~file + in prerr_string msg + + (* Incremental API of Menhir *) + | ParserFront.Point (message, valid_opt, invalid) -> + let () = close_all () in + let invalid_lexeme = Lexer.Token.to_lexeme invalid in + let invalid_region = Lexer.Token.to_region invalid in + let header = + "Parse error " ^ + invalid_region#to_string ~offsets:options#offsets + options#mode in + let after = + match valid_opt with + None -> "," + | Some valid -> + let valid_lexeme = Lexer.Token.to_lexeme valid + in sprintf ", after \"%s\" and" valid_lexeme in + let header = header ^ after in + let before = sprintf " before \"%s\"" invalid_lexeme in + let header = header ^ before in + eprintf "\027[31m%s:\n%s\027[0m%!" header message + + (* Monolithic API of Menhir *) | Parser.Error -> - let region = get_last () in - let error = Region.{region; value=SyntaxError} in let () = close_all () in - print_error ~offsets:options#offsets - options#mode error ~file + let token = + match get_win () with + Lexer.Nil -> + assert false (* Safe: There is always at least EOF. *) + | Lexer.One token + | Lexer.Two (token, _) -> token in + let lexeme = Lexer.Token.to_lexeme token + and region = Lexer.Token.to_region token in + let msg = sprintf "Syntax error on \"%s\".\n" lexeme in + let error = Region.{region; value=msg} in + let () = close_all () in + let msg = + ParserAPI.format_error ~offsets:options#offsets + options#mode error ~file + in prerr_string msg + + (* I/O errors *) | Sys_error msg -> Utils.highlight msg diff --git a/src/passes/1-parser/pascaligo/LexToken.mll b/src/passes/1-parser/pascaligo/LexToken.mll index 16f4dd96a..090a25825 100644 --- a/src/passes/1-parser/pascaligo/LexToken.mll +++ b/src/passes/1-parser/pascaligo/LexToken.mll @@ -224,7 +224,7 @@ let proj_token = function let to_lexeme = function (* Literals *) - String s -> s.Region.value + String s -> String.escaped s.Region.value | Bytes b -> fst b.Region.value | Int i | Nat i diff --git a/src/passes/1-parser/pascaligo/ParErr.ml b/src/passes/1-parser/pascaligo/ParErr.ml new file mode 100644 index 000000000..1e07f3913 --- /dev/null +++ b/src/passes/1-parser/pascaligo/ParErr.ml @@ -0,0 +1,514 @@ + +(* This file was auto-generated based on "Parser.msg". *) + +(* Please note that the function [message] can raise [Not_found]. *) + +let message = + fun s -> + match s with + | 0 -> + "\n" + | 1 -> + "\n" + | 2 -> + "\n" + | 3 -> + "\n" + | 25 -> + "\n" + | 5 -> + "\n" + | 7 -> + "\n" + | 8 -> + "\n" + | 48 -> + "\n" + | 9 -> + "\n" + | 10 -> + "\n" + | 52 -> + "\n" + | 53 -> + "\n" + | 56 -> + "\n" + | 57 -> + "\n" + | 59 -> + "\n" + | 11 -> + "\n" + | 12 -> + "\n" + | 20 -> + "\n" + | 21 -> + "\n" + | 13 -> + "\n" + | 6 -> + "\n" + | 61 -> + "\n" + | 34 -> + "\n" + | 15 -> + "\n" + | 64 -> + "\n" + | 517 -> + "\n" + | 29 -> + "\n" + | 32 -> + "\n" + | 515 -> + "\n" + | 35 -> + "\n" + | 26 -> + "\n" + | 39 -> + "\n" + | 27 -> + "\n" + | 18 -> + "\n" + | 67 -> + "\n" + | 70 -> + "\n" + | 71 -> + "\n" + | 72 -> + "\n" + | 73 -> + "\n" + | 80 -> + "\n" + | 81 -> + "\n" + | 76 -> + "\n" + | 77 -> + "\n" + | 78 -> + "\n" + | 85 -> + "\n" + | 86 -> + "\n" + | 87 -> + "\n" + | 88 -> + "\n" + | 512 -> + "\n" + | 358 -> + "\n" + | 359 -> + "\n" + | 499 -> + "\n" + | 362 -> + "\n" + | 360 -> + "\n" + | 361 -> + "\n" + | 363 -> + "\n" + | 364 -> + "\n" + | 365 -> + "\n" + | 366 -> + "\n" + | 367 -> + "\n" + | 475 -> + "\n" + | 476 -> + "\n" + | 477 -> + "\n" + | 478 -> + "\n" + | 496 -> + "\n" + | 503 -> + "\n" + | 502 -> + "\n" + | 371 -> + "\n" + | 372 -> + "\n" + | 373 -> + "\n" + | 374 -> + "\n" + | 378 -> + "\n" + | 380 -> + "\n" + | 382 -> + "\n" + | 383 -> + "\n" + | 387 -> + "\n" + | 384 -> + "\n" + | 385 -> + "\n" + | 389 -> + "\n" + | 390 -> + "\n" + | 391 -> + "\n" + | 393 -> + "\n" + | 395 -> + "\n" + | 399 -> + "\n" + | 396 -> + "\n" + | 397 -> + "\n" + | 375 -> + "\n" + | 381 -> + "\n" + | 404 -> + "\n" + | 405 -> + "\n" + | 406 -> + "\n" + | 492 -> + "\n" + | 493 -> + "\n" + | 494 -> + "\n" + | 407 -> + "\n" + | 488 -> + "\n" + | 408 -> + "\n" + | 452 -> + "\n" + | 447 -> + "\n" + | 453 -> + "\n" + | 409 -> + "\n" + | 410 -> + "\n" + | 416 -> + "\n" + | 420 -> + "\n" + | 421 -> + "\n" + | 411 -> + "\n" + | 424 -> + "\n" + | 425 -> + "\n" + | 426 -> + "\n" + | 413 -> + "\n" + | 415 -> + "\n" + | 435 -> + "\n" + | 436 -> + "\n" + | 437 -> + "\n" + | 440 -> + "\n" + | 441 -> + "\n" + | 469 -> + "\n" + | 470 -> + "\n" + | 473 -> + "\n" + | 472 -> + "\n" + | 438 -> + "\n" + | 467 -> + "\n" + | 439 -> + "\n" + | 69 -> + "\n" + | 428 -> + "\n" + | 429 -> + "\n" + | 430 -> + "\n" + | 431 -> + "\n" + | 432 -> + "\n" + | 508 -> + "\n" + | 521 -> + "\n" + | 159 -> + "\n" + | 523 -> + "\n" + | 137 -> + "\n" + | 150 -> + "\n" + | 166 -> + "\n" + | 167 -> + "\n" + | 158 -> + "\n" + | 173 -> + "\n" + | 152 -> + "\n" + | 168 -> + "\n" + | 169 -> + "\n" + | 175 -> + "\n" + | 177 -> + "\n" + | 179 -> + "\n" + | 181 -> + "\n" + | 183 -> + "\n" + | 160 -> + "\n" + | 170 -> + "\n" + | 157 -> + "\n" + | 163 -> + "\n" + | 187 -> + "\n" + | 92 -> + "\n" + | 318 -> + "\n" + | 319 -> + "\n" + | 322 -> + "\n" + | 323 -> + "\n" + | 356 -> + "\n" + | 351 -> + "\n" + | 353 -> + "\n" + | 93 -> + "\n" + | 94 -> + "\n" + | 338 -> + "\n" + | 95 -> + "\n" + | 96 -> + "\n" + | 342 -> + "\n" + | 343 -> + "\n" + | 346 -> + "\n" + | 347 -> + "\n" + | 349 -> + "\n" + | 97 -> + "\n" + | 136 -> + "\n" + | 101 -> + "\n" + | 195 -> + "\n" + | 196 -> + "\n" + | 198 -> + "\n" + | 199 -> + "\n" + | 202 -> + "\n" + | 203 -> + "\n" + | 334 -> + "\n" + | 329 -> + "\n" + | 331 -> + "\n" + | 102 -> + "\n" + | 103 -> + "\n" + | 326 -> + "\n" + | 312 -> + "\n" + | 314 -> + "\n" + | 104 -> + "\n" + | 308 -> + "\n" + | 306 -> + "\n" + | 309 -> + "\n" + | 310 -> + "\n" + | 304 -> + "\n" + | 134 -> + "\n" + | 106 -> + "\n" + | 296 -> + "\n" + | 297 -> + "\n" + | 298 -> + "\n" + | 299 -> + "\n" + | 300 -> + "\n" + | 107 -> + "\n" + | 108 -> + "\n" + | 285 -> + "\n" + | 286 -> + "\n" + | 132 -> + "\n" + | 155 -> + "\n" + | 288 -> + "\n" + | 291 -> + "\n" + | 292 -> + "\n" + | 128 -> + "\n" + | 110 -> + "\n" + | 113 -> + "\n" + | 208 -> + "\n" + | 209 -> + "\n" + | 247 -> + "\n" + | 271 -> + "\n" + | 248 -> + "\n" + | 250 -> + "\n" + | 251 -> + "\n" + | 272 -> + "\n" + | 278 -> + "\n" + | 277 -> + "\n" + | 281 -> + "\n" + | 280 -> + "\n" + | 218 -> + "\n" + | 261 -> + "\n" + | 262 -> + "\n" + | 265 -> + "\n" + | 266 -> + "\n" + | 269 -> + "\n" + | 255 -> + "\n" + | 257 -> + "\n" + | 219 -> + "\n" + | 244 -> + "\n" + | 245 -> + "\n" + | 253 -> + "\n" + | 241 -> + "\n" + | 210 -> + "\n" + | 275 -> + "\n" + | 211 -> + "\n" + | 223 -> + "\n" + | 224 -> + "\n" + | 240 -> + "\n" + | 225 -> + "\n" + | 226 -> + "\n" + | 234 -> + "\n" + | 114 -> + "\n" + | 118 -> + "\n" + | 206 -> + "\n" + | 119 -> + "\n" + | 125 -> + "\n" + | _ -> + raise Not_found diff --git a/src/passes/1-parser/pascaligo/ParserAPI.ml b/src/passes/1-parser/pascaligo/ParserAPI.ml index 7ae5c5ad4..0c78cdeec 100644 --- a/src/passes/1-parser/pascaligo/ParserAPI.ml +++ b/src/passes/1-parser/pascaligo/ParserAPI.ml @@ -28,13 +28,46 @@ module type PARSER = end end +(* Errors *) + +module type PAR_ERR = + sig + val message : int -> string (* From error states to messages *) + end + +let format_error ?(offsets=true) mode Region.{region; value} ~file = + let reg = region#to_string ~file ~offsets mode in + Printf.sprintf "\027[31mParse error %s:\n%s\027[0m%!" reg value + (* Main functor *) module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) = + (Parser: PARSER with type token = Lexer.Token.token) + (ParErr: PAR_ERR) = struct + type message = string + type valid = Lexer.token + type invalid = Lexer.token + + exception Point of message * valid option * invalid module I = Parser.MenhirInterpreter + module S = MenhirLib.General (* Streams *) + + (* The call [stack checkpoint] extracts the parser's stack out of + a checkpoint. *) + + let stack = function + I.HandlingError env -> I.stack env + | _ -> assert false + + (* The call [state checkpoint] extracts the number of the current + state out of a parser checkpoint. *) + + let state checkpoint : int = + match Lazy.force (stack checkpoint) with + S.Nil -> 0 (* WARNING: Hack. The first state should be 0. *) + | S.Cons (I.Element (s,_,_,_),_) -> I.number s (* The parser has successfully produced a semantic value. *) @@ -42,14 +75,22 @@ module Make (Lexer: Lexer.S) (* The parser has suspended itself because of a syntax error. Stop. *) - let fail _checkpoint = raise Parser.Error + let failure get_win checkpoint = + let message = ParErr.message (state checkpoint) in + match get_win () with + Lexer.Nil -> assert false + | Lexer.One invalid -> + raise (Point (message, None, invalid)) + | Lexer.Two (invalid, valid) -> + raise (Point (message, Some valid, invalid)) (* The generic parsing function *) - let incr_contract Lexer.{read; buffer; close; _} : AST.t = - let supplier = I.lexer_lexbuf_to_supplier read buffer in - let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in - let ast = I.loop_handle success fail supplier parser + let incr_contract Lexer.{read; buffer; get_win; close; _} : AST.t = + let supplier = I.lexer_lexbuf_to_supplier read buffer + and failure = failure get_win in + let parser = Parser.Incremental.contract buffer.Lexing.lex_curr_p in + let ast = I.loop_handle success failure supplier parser in close (); ast let mono_contract = Parser.contract diff --git a/src/passes/1-parser/pascaligo/ParserAPI.mli b/src/passes/1-parser/pascaligo/ParserAPI.mli index ff3fe4854..f3eeaaba8 100644 --- a/src/passes/1-parser/pascaligo/ParserAPI.mli +++ b/src/passes/1-parser/pascaligo/ParserAPI.mli @@ -29,11 +29,29 @@ module type PARSER = end +(* Errors *) + +module type PAR_ERR = + sig + val message : int -> string (* From error states to messages *) + end + +val format_error : + ?offsets:bool -> [`Byte | `Point] -> + string Region.reg -> file:bool -> string + (* Main functor *) module Make (Lexer: Lexer.S) - (Parser: PARSER with type token = Lexer.Token.token) : + (Parser: PARSER with type token = Lexer.Token.token) + (ParErr: PAR_ERR) : sig + type message = string + type valid = Lexer.token + type invalid = Lexer.token + + exception Point of message * valid option * invalid + val mono_contract : (Lexing.lexbuf -> Lexer.token) -> Lexing.lexbuf -> AST.t val incr_contract : Lexer.instance -> AST.t end diff --git a/src/passes/1-parser/pascaligo/ParserMain.ml b/src/passes/1-parser/pascaligo/ParserMain.ml index 8e64c56eb..46564b994 100644 --- a/src/passes/1-parser/pascaligo/ParserMain.ml +++ b/src/passes/1-parser/pascaligo/ParserMain.ml @@ -1,16 +1,14 @@ -(** Driver for the parser of PascaLIGO *) +(** Driver for the PascaLIGO parser *) let extension = ".ligo" let options = EvalOpt.read "PascaLIGO" extension +open Printf + (** Error printing and exception tracing *) let () = Printexc.record_backtrace true -(** Auxiliary functions -*) -let sprintf = Printf.sprintf - (** Extracting the input file *) let file = @@ -23,17 +21,7 @@ let file = let () = Printexc.record_backtrace true let external_ text = - Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; - -type error = SyntaxError - -let error_to_string = function - SyntaxError -> "Syntax error.\n" - -let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value - and reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + Utils.highlight (sprintf "External error: %s" text); exit 1;; (** {1 Preprocessing the input source and opening the input channels} *) @@ -42,7 +30,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file = let lib_path = match options#libs with [] -> "" - | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + | libs -> let mk_I dir path = sprintf " -I %s%s" dir path in List.fold_right mk_I libs "" let prefix = @@ -61,26 +49,26 @@ let pp_input = let cpp_cmd = match options#input with None | Some "-" -> - Printf.sprintf "cpp -traditional-cpp%s - > %s" + sprintf "cpp -traditional-cpp%s - > %s" lib_path pp_input | Some file -> - Printf.sprintf "cpp -traditional-cpp%s %s > %s" + sprintf "cpp -traditional-cpp%s %s > %s" lib_path file pp_input let () = if Utils.String.Set.mem "cpp" options#verbose - then Printf.eprintf "%s\n%!" cpp_cmd; + then eprintf "%s\n%!" cpp_cmd; if Sys.command cpp_cmd <> 0 then - external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + external_ (sprintf "the command \"%s\" failed." cpp_cmd) (** {1 Instanciating the lexer} *) module Lexer = Lexer.Make (LexToken) module Log = LexerLog.Make (Lexer) -module ParserFront = ParserAPI.Make (Lexer) (Parser) +module ParserFront = ParserAPI.Make (Lexer) (Parser) (ParErr) let lexer_inst = Lexer.open_token_stream (Some pp_input) -let Lexer.{read; buffer; get_pos; get_last; close} = lexer_inst +let Lexer.{read; buffer; get_win; get_pos; get_last; close} = lexer_inst and cout = stdout @@ -97,10 +85,10 @@ let tokeniser = read ~log let () = try - (* The incremental API *) - let ast = ParserFront.incr_contract lexer_inst in - (* The monolithic API *) - (* let ast = ParserFront.mono_contract tokeniser buffer in *) + let ast = + if options#mono + then ParserFront.mono_contract tokeniser buffer + else ParserFront.incr_contract lexer_inst in if Utils.String.Set.mem "ast" options#verbose then let buffer = Buffer.create 131 in let state = ParserLog.mk_state @@ -122,14 +110,52 @@ let () = Buffer.output_buffer stdout buffer end with + (* Lexing errors *) Lexer.Error err -> close_all (); - Lexer.print_error ~offsets:options#offsets - options#mode err ~file + let msg = + Lexer.format_error ~offsets:options#offsets + options#mode err ~file + in prerr_string msg + + (* Incremental API of Menhir *) + | ParserFront.Point (message, valid_opt, invalid) -> + let () = close_all () in + let invalid_lexeme = Lexer.Token.to_lexeme invalid in + let invalid_region = Lexer.Token.to_region invalid in + let header = + "Parse error " ^ + invalid_region#to_string ~offsets:options#offsets + options#mode in + let after = + match valid_opt with + None -> "," + | Some valid -> + let valid_lexeme = Lexer.Token.to_lexeme valid + in sprintf ", after \"%s\" and" valid_lexeme in + let header = header ^ after in + let before = sprintf " before \"%s\"" invalid_lexeme in + let header = header ^ before in + eprintf "\027[31m%s:\n%s\027[0m%!" header message + + (* Monolithic API of Menhir *) | Parser.Error -> - let region = get_last () in - let error = Region.{region; value=SyntaxError} in let () = close_all () in - print_error ~offsets:options#offsets - options#mode error ~file + let token = + match get_win () with + Lexer.Nil -> + assert false (* Safe: There is always at least EOF. *) + | Lexer.One token + | Lexer.Two (token, _) -> token in + let lexeme = Lexer.Token.to_lexeme token + and region = Lexer.Token.to_region token in + let msg = sprintf "Syntax error on \"%s\".\n" lexeme in + let error = Region.{region; value=msg} in + let () = close_all () in + let msg = + ParserAPI.format_error ~offsets:options#offsets + options#mode error ~file + in prerr_string msg + + (* I/O errors *) | Sys_error msg -> Utils.highlight msg diff --git a/src/passes/1-parser/reasonligo/ParserMain.ml b/src/passes/1-parser/reasonligo/ParserMain.ml index f855beb52..b8965a110 100644 --- a/src/passes/1-parser/reasonligo/ParserMain.ml +++ b/src/passes/1-parser/reasonligo/ParserMain.ml @@ -1,16 +1,14 @@ -(** Driver for the LIGO parser *) +(** Driver for the Reason LIGO parser *) let extension = ".religo" let options = EvalOpt.read "ReasonLIGO" extension +open Printf + (** Error printing and exception tracing *) let () = Printexc.record_backtrace true -(** Auxiliary functions -*) -let sprintf = Printf.sprintf - (** Extracting the input file *) let file = @@ -23,17 +21,7 @@ let file = let () = Printexc.record_backtrace true let external_ text = - Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; - -type error = SyntaxError - -let error_to_string = function - SyntaxError -> "Syntax error.\n" - -let print_error ?(offsets=true) mode Region.{region; value} ~file = - let msg = error_to_string value - and reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg) + Utils.highlight (sprintf "External error: %s" text); exit 1;; (** {1 Preprocessing the input source and opening the input channels} *) @@ -42,7 +30,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file = let lib_path = match options#libs with [] -> "" - | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path + | libs -> let mk_I dir path = sprintf " -I %s%s" dir path in List.fold_right mk_I libs "" let prefix = @@ -61,26 +49,26 @@ let pp_input = let cpp_cmd = match options#input with None | Some "-" -> - Printf.sprintf "cpp -traditional-cpp%s - > %s" + sprintf "cpp -traditional-cpp%s - > %s" lib_path pp_input | Some file -> - Printf.sprintf "cpp -traditional-cpp%s %s > %s" + sprintf "cpp -traditional-cpp%s %s > %s" lib_path file pp_input let () = if Utils.String.Set.mem "cpp" options#verbose - then Printf.eprintf "%s\n%!" cpp_cmd; + then eprintf "%s\n%!" cpp_cmd; if Sys.command cpp_cmd <> 0 then - external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) + external_ (sprintf "the command \"%s\" failed." cpp_cmd) (** {1 Instanciating the lexer} *) module Lexer = Lexer.Make (LexToken) module Log = LexerLog.Make (Lexer) -module ParserFront = ParserAPI.Make (Lexer) (Parser) +module ParserFront = ParserAPI.Make (Lexer) (Parser) (ParErr) let lexer_inst = Lexer.open_token_stream (Some pp_input) -let Lexer.{read; buffer; get_pos; get_last; close} = lexer_inst +let Lexer.{read; buffer; get_win; get_pos; get_last; close} = lexer_inst and cout = stdout @@ -97,10 +85,10 @@ let tokeniser = read ~log let () = try - (* The incremental API *) - let ast = ParserFront.incr_contract lexer_inst in - (* The monolithic API *) - (* let ast = ParserFront.mono_contract tokeniser buffer in *) + let ast = + if options#mono + then ParserFront.mono_contract tokeniser buffer + else ParserFront.incr_contract lexer_inst in if Utils.String.Set.mem "ast" options#verbose then let buffer = Buffer.create 131 in let state = ParserLog.mk_state @@ -122,14 +110,52 @@ let () = Buffer.output_buffer stdout buffer end with + (* Lexing errors *) Lexer.Error err -> close_all (); - Lexer.print_error ~offsets:options#offsets - options#mode err ~file + let msg = + Lexer.format_error ~offsets:options#offsets + options#mode err ~file + in prerr_string msg + + (* Incremental API of Menhir *) + | ParserFront.Point (message, valid_opt, invalid) -> + let () = close_all () in + let invalid_lexeme = Lexer.Token.to_lexeme invalid in + let invalid_region = Lexer.Token.to_region invalid in + let header = + "Parse error " ^ + invalid_region#to_string ~offsets:options#offsets + options#mode in + let after = + match valid_opt with + None -> "," + | Some valid -> + let valid_lexeme = Lexer.Token.to_lexeme valid + in sprintf ", after \"%s\" and" valid_lexeme in + let header = header ^ after in + let before = sprintf " before \"%s\"" invalid_lexeme in + let header = header ^ before in + eprintf "\027[31m%s:\n%s\027[0m%!" header message + + (* Monolithic API of Menhir *) | Parser.Error -> - let region = get_last () in - let error = Region.{region; value=SyntaxError} in let () = close_all () in - print_error ~offsets:options#offsets - options#mode error ~file + let token = + match get_win () with + Lexer.Nil -> + assert false (* Safe: There is always at least EOF. *) + | Lexer.One token + | Lexer.Two (token, _) -> token in + let lexeme = Lexer.Token.to_lexeme token + and region = Lexer.Token.to_region token in + let msg = sprintf "Syntax error on \"%s\".\n" lexeme in + let error = Region.{region; value=msg} in + let () = close_all () in + let msg = + ParserAPI.format_error ~offsets:options#offsets + options#mode error ~file + in prerr_string msg + + (* I/O errors *) | Sys_error msg -> Utils.highlight msg diff --git a/src/passes/1-parser/shared/EvalOpt.ml b/src/passes/1-parser/shared/EvalOpt.ml index 68e2b1f94..7889c9c18 100644 --- a/src/passes/1-parser/shared/EvalOpt.ml +++ b/src/passes/1-parser/shared/EvalOpt.ml @@ -13,10 +13,11 @@ type options = < verbose : Utils.String.Set.t; offsets : bool; mode : [`Byte | `Point]; - cmd : command + cmd : command; + mono : bool > -let make ~input ~libs ~verbose ~offsets ~mode ~cmd = +let make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono = object method input = input method libs = libs @@ -24,6 +25,7 @@ let make ~input ~libs ~verbose ~offsets ~mode ~cmd = method offsets = offsets method mode = mode method cmd = cmd + method mono = mono end (** {1 Auxiliary functions} *) @@ -49,6 +51,7 @@ let help language extension () = print " -q, --quiet No output, except errors (default)"; print " --columns Columns for source locations"; print " --bytes Bytes for source locations"; + print " --mono Use Menhir monolithic API"; print " --verbose= cmdline, cpp, ast-tokens, ast (colon-separated)"; print " --version Commit hash on stdout"; print " -h, --help This help"; @@ -70,6 +73,7 @@ and verbose = ref Utils.String.Set.empty and input = ref None and libs = ref [] and verb_str = ref "" +and mono = ref false let split_at_colon = Str.(split (regexp ":")) @@ -89,6 +93,7 @@ let specs language extension = 'q', "quiet", set quiet true, None; noshort, "columns", set columns true, None; noshort, "bytes", set bytes true, None; + noshort, "mono", set mono true, None; noshort, "verbose", None, Some add_verbose; 'h', "help", Some (help language extension), None; noshort, "version", Some version, None @@ -124,6 +129,7 @@ let print_opt () = printf "quiet = %b\n" !quiet; printf "columns = %b\n" !columns; printf "bytes = %b\n" !bytes; + printf "mono = %b\b" !mono; printf "verbose = %s\n" !verb_str; printf "input = %s\n" (string_of quote !input); printf "libs = %s\n" (string_of_path !libs) @@ -151,6 +157,7 @@ let check extension = and quiet = !quiet and offsets = not !columns and mode = if !bytes then `Byte else `Point + and mono = !mono and verbose = !verbose and libs = !libs in @@ -164,6 +171,7 @@ let check extension = printf "quiet = %b\n" quiet; printf "offsets = %b\n" offsets; printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point"); + printf "mono = %b\n" mono; printf "verbose = %s\n" !verb_str; printf "input = %s\n" (string_of quote input); printf "libs = %s\n" (string_of_path libs) @@ -178,7 +186,7 @@ let check extension = | false, false, false, true -> Tokens | _ -> abort "Choose one of -q, -c, -u, -t." - in make ~input ~libs ~verbose ~offsets ~mode ~cmd + in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono (** {1 Parsing the command-line options} *) diff --git a/src/passes/1-parser/shared/EvalOpt.mli b/src/passes/1-parser/shared/EvalOpt.mli index e3b006e38..3882ccf7a 100644 --- a/src/passes/1-parser/shared/EvalOpt.mli +++ b/src/passes/1-parser/shared/EvalOpt.mli @@ -49,7 +49,8 @@ type options = < verbose : Utils.String.Set.t; offsets : bool; mode : [`Byte | `Point]; - cmd : command + cmd : command; + mono : bool > val make : @@ -58,7 +59,9 @@ val make : verbose:Utils.String.Set.t -> offsets:bool -> mode:[`Byte | `Point] -> - cmd:command -> options + cmd:command -> + mono:bool -> + options (** Parsing the command-line options on stdin. The first parameter is the name of the concrete syntax, e.g., "pascaligo", and the second diff --git a/src/passes/1-parser/shared/Lexer.mli b/src/passes/1-parser/shared/Lexer.mli index 50754e45f..8cb219fc9 100644 --- a/src/passes/1-parser/shared/Lexer.mli +++ b/src/passes/1-parser/shared/Lexer.mli @@ -124,9 +124,17 @@ module type S = type file_path = string type logger = Markup.t list -> token -> unit + type window = + Nil + | One of token + | Two of token * token + + val slide : token -> window -> window + type instance = { read : ?log:logger -> Lexing.lexbuf -> token; buffer : Lexing.lexbuf; + get_win : unit -> window; get_pos : unit -> Pos.t; get_last : unit -> Region.t; close : unit -> unit @@ -140,9 +148,9 @@ module type S = exception Error of error Region.reg - val print_error : + val format_error : ?offsets:bool -> [`Byte | `Point] -> - error Region.reg -> file:bool -> unit + error Region.reg -> file:bool -> string end diff --git a/src/passes/1-parser/shared/Lexer.mll b/src/passes/1-parser/shared/Lexer.mll index 1e8e382fa..f4acdda1a 100644 --- a/src/passes/1-parser/shared/Lexer.mll +++ b/src/passes/1-parser/shared/Lexer.mll @@ -140,31 +140,41 @@ module type TOKEN = (* The module type for lexers is [S]. *) -module type S = sig - module Token : TOKEN - type token = Token.token +module type S = + sig + module Token : TOKEN + type token = Token.token - type file_path = string - type logger = Markup.t list -> token -> unit + type file_path = string + type logger = Markup.t list -> token -> unit - type instance = { - read : ?log:logger -> Lexing.lexbuf -> token; - buffer : Lexing.lexbuf; - get_pos : unit -> Pos.t; - get_last : unit -> Region.t; - close : unit -> unit - } + type window = + Nil + | One of token + | Two of token * token - val open_token_stream : file_path option -> instance + val slide : token -> window -> window - (* Error reporting *) + type instance = { + read : ?log:logger -> Lexing.lexbuf -> token; + buffer : Lexing.lexbuf; + get_win : unit -> window; + get_pos : unit -> Pos.t; + get_last : unit -> Region.t; + close : unit -> unit + } - type error - exception Error of error Region.reg + val open_token_stream : file_path option -> instance - val print_error : ?offsets:bool -> [`Byte | `Point] -> - error Region.reg -> file:bool -> unit -end + (* Error reporting *) + + type error + + exception Error of error Region.reg + + val format_error : ?offsets:bool -> [`Byte | `Point] -> + error Region.reg -> file:bool -> string + end (* The functorised interface @@ -209,7 +219,27 @@ module Make (Token: TOKEN) : (S with module Token = Token) = (* STATE *) - (* Beyond tokens, the result of lexing is a state. The type + (** The type [buffer] models a two-slot buffer of tokens for + reporting after a parse error. + + In [Two(t1,t2)], the token [t2] is the next to be sent to the + parser. + + The call [slide token buffer] pushes the token [token] in the + buffer [buffer]. If the buffer is full, that is, it is + [Two(t1,t2)], then the token [t2] is discarded to make room for + [token]. + *) + type window = + Nil + | One of token + | Two of token * token + + let slide token = function + Nil -> One token + | One t | Two (t,_) -> Two (token,t) + + (** Beyond tokens, the result of lexing is a state. The type [state] represents the logical state of the lexing engine, that is, a value which is threaded during scanning and which denotes useful, high-level information beyond what the type @@ -235,6 +265,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = updated after a single character has been matched: that depends on the regular expression that matched the lexing buffer. + The field [win] is a two-token window, that is, a buffer that + contains the last recognised token, and the penultimate (if + any). + The fields [decoder] and [supply] offer the support needed for the lexing of UTF-8 encoded characters in comments (the only place where they are allowed in LIGO). The former is the @@ -243,10 +277,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = it to [decoder]. See the documentation of the third-party library Uutf. *) - type state = { units : (Markup.t list * token) FQueue.t; markup : Markup.t list; + window : window; last : Region.t; pos : Pos.t; decoder : Uutf.decoder; @@ -398,10 +432,10 @@ module Make (Token: TOKEN) : (S with module Token = Token) = exception Error of error Region.reg - let print_error ?(offsets=true) mode Region.{region; value} ~file = + let format_error ?(offsets=true) mode Region.{region; value} ~file = let msg = error_to_string value in let reg = region#to_string ~file ~offsets mode in - Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg) + sprintf "\027[31mLexical error %s:\n%s\027[0m%!" reg msg let fail region value = raise (Error Region.{region; value}) @@ -801,6 +835,7 @@ type logger = Markup.t list -> token -> unit type instance = { read : ?log:logger -> Lexing.lexbuf -> token; buffer : Lexing.lexbuf; + get_win : unit -> window; get_pos : unit -> Pos.t; get_last : unit -> Region.t; close : unit -> unit @@ -817,13 +852,15 @@ let open_token_stream file_path_opt = let supply = Uutf.Manual.src decoder in let state = ref {units = FQueue.empty; last = Region.ghost; + window = Nil; pos; markup = []; decoder; supply} in let get_pos () = !state.pos - and get_last () = !state.last in + and get_last () = !state.last + and get_win () = !state.window in let patch_buffer (start, stop) buffer = let open Lexing in @@ -880,7 +917,9 @@ let open_token_stream file_path_opt = read_token ~log buffer | Some (units, (left_mark, token)) -> log left_mark token; - state := {!state with units; last = Token.to_region token}; + state := {!state with units; + last = Token.to_region token; + window = slide token !state.window}; check_right_context token buffer; patch_buffer (Token.to_region token)#byte_pos buffer; token in @@ -893,7 +932,7 @@ let open_token_stream file_path_opt = None | Some "-" -> () | Some file_path -> reset ~file:file_path buffer and close () = close_in cin in - {read = read_token; buffer; get_pos; get_last; close} + {read = read_token; buffer; get_win; get_pos; get_last; close} end (* of functor [Make] in HEADER *) (* END TRAILER *) diff --git a/src/passes/1-parser/shared/LexerLog.ml b/src/passes/1-parser/shared/LexerLog.ml index 65655a720..ce5172045 100644 --- a/src/passes/1-parser/shared/LexerLog.ml +++ b/src/passes/1-parser/shared/LexerLog.ml @@ -65,9 +65,11 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) = let file = match file_path_opt with None | Some "-" -> false - | Some _ -> true in - Lexer.print_error ~offsets mode e ~file; - close_all () + | Some _ -> true in + let msg = + Lexer.format_error ~offsets mode e ~file + in prerr_string msg; + close_all () in iter () with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg) diff --git a/vendors/ligo-utils/simple-utils/cover.sh b/vendors/ligo-utils/simple-utils/cover.sh index e4717b5ca..01281ef6d 100755 --- a/vendors/ligo-utils/simple-utils/cover.sh +++ b/vendors/ligo-utils/simple-utils/cover.sh @@ -4,7 +4,7 @@ # Menhir and generates minimal inputs that cover all of them and only # them. -set -x +# set -x # ==================================================================== # General Settings and wrappers diff --git a/vendors/ligo-utils/simple-utils/par_err.sh b/vendors/ligo-utils/simple-utils/par_err.sh new file mode 100755 index 000000000..4ca5cb5d1 --- /dev/null +++ b/vendors/ligo-utils/simple-utils/par_err.sh @@ -0,0 +1,199 @@ +#!/bin/sh + +# This script calls Menhir with a message file, which generates the +# corresponding OCaml file. + +# set -x + +# ==================================================================== +# General Settings and wrappers + +script=$(basename $0) + +print_nl () { test "$quiet" != "yes" && echo "$1"; } + +print () { test "$quiet" != "yes" && printf "$1"; } + +fatal_error () { + echo "$script: fatal error:" + echo "$1" 1>&2 + exit 1 +} + +warn () { + print_nl "$script: warning:" + print_nl "$1" +} + +failed () { + printf "\033[31mFAILED$1\033[0m\n" +} + +emphasise () { + printf "\033[31m$1\033[0m\n" +} + +display () { + printf "\033[31m"; cat $1; printf "\033[0m" +} + +# ==================================================================== +# Parsing loop +# +while : ; do + case "$1" in + "") break;; + --par-tokens=*) + if test -n "$par_tokens"; then + fatal_error "Repeated option --par-tokens."; fi + par_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --par-tokens) + no_eq=$1 + break + ;; + --lex-tokens=*) + if test -n "$lex_tokens"; then + fatal_error "Repeated option --lex-tokens."; fi + lex_tokens=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --lex-tokens) + no_eq=$1 + break + ;; + --out=*) + if test -n "$out"; then + fatal_error "Repeated option --out."; fi + out=$(expr "$1" : "[^=]*=\(.*\)") + ;; + --out) + no_eq=$1 + break + ;; + -h | --help | -help) + help=yes + ;; + # Invalid option + # + -*) + fatal_error "Invalid option \"$1\"." + ;; + # Invalid argument + # + *) + if test -n "$parser_arg"; then + fatal_error "Only one Menhir specification allowed."; fi + parser=$1 + esac + shift +done + +# ==================================================================== +# Help +# +usage () { + cat <.mly + --lex-tokens=.mli + --out=.ml + .mly + +Generates .ml from .msg and the parser specification +(see messages.sh) in the current directory. + +The following options, if given, must be given only once. + +Display control: + -h, --help display this help and exit + +Mandatory options: + --lex-tokens=.mli the lexical tokens + --par-tokens=.mly the syntactical tokens + --out=.ml +EOF + exit 1 +} + +if test "$help" = "yes"; then usage; fi + +# ==================================================================== +# Checking the command-line options and arguments and applying some of +# them. + +# It is a common mistake to forget the "=" in GNU long-option style. + +if test -n "$no_eq" +then + fatal_error "Long option style $no_eq must be followed by \"=\"." +fi + +# Checking options + +if test -z "$parser"; then + fatal_error "No parser specification."; fi + +if test -z "$par_tokens"; then + fatal_error "No syntactical tokens specification (use --par-tokens)."; fi + +if test -z "$lex_tokens"; then + fatal_error "No lexical tokens specification (use --lex-tokens)."; fi + +if test ! -e "$parser"; then + fatal_error "Parser specification \"$parser\" not found."; fi + +if test ! -e "$lex_tokens"; then + fatal_error "Lexical tokens specification \"$lex_tokens\" not found."; fi + +if test ! -e "$par_tokens"; then + fatal_error "Syntactical tokens specification \"$par_tokens\" not found."; fi + +parser_ext=$(expr "$parser" : ".*\.mly$") +if test "$parser_ext" = "0"; then + fatal_error "Parser specification must have extension \".mly\"."; fi + +par_tokens_ext=$(expr "$par_tokens" : ".*\.mly$") +if test "$par_tokens_ext" = "0"; then + fatal_error "Syntactical tokens specification must have extension \".mly\"." +fi + +lex_tokens_ext=$(expr "$lex_tokens" : ".*\.mli$") +if test "$lex_tokens_ext" = "0"; then + fatal_error "Lexical tokens specification must have extension \".mli\"." +fi + +mly=$parser +parser_base=$(basename $mly .mly) +par_tokens_base=$(basename $par_tokens .mly) +lex_tokens_base=$(basename $lex_tokens .mli) + +# Checking the presence of the messages + +msg=$parser_base.msg +if test ! -e $msg; then + fatal_error "File $msg not found."; fi + +# Checking the output file + +if test -z "$out"; then + fatal_error "Output file missing (use --out)."; fi + +# ==================================================================== +# Menhir's flags + +flags="--table --strict --external-tokens $lex_tokens_base \ + --base $parser_base $par_tokens" + +# =================================================================== +# Generating source code from error messages + +err=.$msg.err + +printf "Making $out from $msg... " +menhir --compile-errors $msg $flags $mly > $out 2> $err +if test "$?" = "0" +then printf "done.\n" + rm -f $err +else failed ":" + display "$err" +fi