diff --git a/src/passes/1-parser/pascaligo/ParserMain.ml b/src/passes/1-parser/pascaligo/ParserMain.ml index 809673f86..795a16c4b 100644 --- a/src/passes/1-parser/pascaligo/ParserMain.ml +++ b/src/passes/1-parser/pascaligo/ParserMain.ml @@ -37,13 +37,13 @@ let parse parser : ('a, string Region.reg) Stdlib.result = | Scoping.Error (Scoping.Duplicate_parameter name) -> let token = Lexer.Token.mk_ident name.Region.value name.Region.region in - (match token with - (* Cannot fail because [name] is a not a - reserved name for the lexer. *) - Stdlib.Error _ -> assert false - | Ok invalid -> - issue_error ("Duplicate parameter.\nHint: Change the name.\n", - None, invalid)) + (match token with + (* Cannot fail because [name] is a not a + reserved name for the lexer. *) + Stdlib.Error _ -> assert false + | Ok invalid -> + issue_error ("Duplicate parameter.\nHint: Change the name.\n", + None, invalid)) | Scoping.Error (Scoping.Reserved_name name) -> let token = @@ -91,8 +91,9 @@ let parse parser : ('a, string Region.reg) Stdlib.result = None, invalid in issue_error point) -(* Preprocessing the input source with CPP *) +(* Preprocessing the input source *) +(* module SSet = Utils.String.Set let sprintf = Printf.sprintf @@ -130,22 +131,51 @@ let cpp_cmd = let () = if Sys.command cpp_cmd <> 0 then Printf.eprintf "External error: \"%s\" failed." cpp_cmd + *) -(* Instantiating the lexer and calling the parser *) -let lexer_inst = - match Lexer.open_token_stream (Lexer.File pp_input) with - Ok instance -> - if IO.options#expr - then - match parse (fun () -> Unit.apply instance Unit.parse_expr) with - Stdlib.Ok _ -> () - | Error Region.{value; _} -> - Printf.eprintf "\027[31m%s\027[0m%!" value - else - (match parse (fun () -> Unit.apply instance Unit.parse_contract) with - Stdlib.Ok _ -> () - | Error Region.{value; _} -> - Printf.eprintf "\027[31m%s\027[0m%!" value) - | Stdlib.Error (Lexer.File_opening msg) -> - Printf.eprintf "\027[31m%s\027[0m%!" msg +(* Preprocessing the input source *) + +let preproc cin : unit = + let close () = flush_all (); close_in cin in + let buffer = Lexing.from_channel cin in + let open Lexing in + let () = + match IO.options#input with + None | Some "-" -> () + | Some pos_fname -> + buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname} in + match Preproc.lex IO.options buffer with + Stdlib.Error (pp_buffer, err) -> + if Utils.String.Set.mem "preproc" IO.options#verbose then + Printf.printf "%s\n%!" (Buffer.contents pp_buffer); + let Region.{value; _} = + Preproc.format ~offsets:IO.options#offsets ~file:true err + in close (); Printf.eprintf "\027[31m%s\027[0m%!" value + | Stdlib.Ok pp_buffer -> + (* Running the lexer and the parser on the preprocessed input *) + + let source = Lexer.String (Buffer.contents pp_buffer) in + match Lexer.open_token_stream source with + Stdlib.Ok instance -> + if IO.options#expr + then + match parse (fun () -> Unit.apply instance Unit.parse_expr) with + Stdlib.Ok _ -> () + | Error Region.{value; _} -> + close (); Printf.eprintf "\027[31m%s\027[0m%!" value + else + (match parse (fun () -> Unit.apply instance Unit.parse_contract) with + Stdlib.Ok _ -> () + | Error Region.{value; _} -> + close (); Printf.eprintf "\027[31m%s\027[0m%!" value) + | Stdlib.Error (Lexer.File_opening msg) -> + flush_all (); Printf.eprintf "\027[31m%s\027[0m%!" msg + +let () = + match IO.options#input with + Some "-" | None -> preproc stdin + | Some file_path -> + try open_in file_path |> preproc with + Sys_error msg -> + (flush_all (); Printf.eprintf "\027[31m%s\027[0m%!" msg) diff --git a/src/passes/1-parser/shared/Lexer.mll b/src/passes/1-parser/shared/Lexer.mll index 45d924769..90ba832d5 100644 --- a/src/passes/1-parser/shared/Lexer.mll +++ b/src/passes/1-parser/shared/Lexer.mll @@ -402,51 +402,51 @@ module Make (Token: TOKEN) : (S with module Token = Token) = let error_to_string = function Invalid_utf8_sequence -> - "Invalid UTF-8 sequence.\n" + "Invalid UTF-8 sequence." | Unexpected_character c -> - sprintf "Unexpected character '%s'.\n" (Char.escaped c) + sprintf "Unexpected character '%s'." (Char.escaped c) | Undefined_escape_sequence -> "Undefined escape sequence.\n\ - Hint: Remove or replace the sequence.\n" + Hint: Remove or replace the sequence." | Missing_break -> "Missing break.\n\ - Hint: Insert some space.\n" + Hint: Insert some space." | Unterminated_string -> "Unterminated string.\n\ - Hint: Close with double quotes.\n" + Hint: Close with double quotes." | Unterminated_integer -> "Unterminated integer.\n\ - Hint: Remove the sign or proceed with a natural number.\n" + Hint: Remove the sign or proceed with a natural number." | Odd_lengthed_bytes -> "The length of the byte sequence is an odd number.\n\ - Hint: Add or remove a digit.\n" + Hint: Add or remove a digit." | Unterminated_comment -> "Unterminated comment.\n\ - Hint: Close with \"*)\".\n" + Hint: Close with \"*)\"." | Orphan_minus -> "Orphan minus sign.\n\ - Hint: Remove the trailing space.\n" + Hint: Remove the trailing space." | Non_canonical_zero -> "Non-canonical zero.\n\ - Hint: Use 0.\n" + Hint: Use 0." | Negative_byte_sequence -> "Negative byte sequence.\n\ - Hint: Remove the leading minus sign.\n" + Hint: Remove the leading minus sign." | Broken_string -> "The string starting here is interrupted by a line break.\n\ Hint: Remove the break, close the string before or insert a \ - backslash.\n" + backslash." | Invalid_character_in_string -> "Invalid character in string.\n\ - Hint: Remove or replace the character.\n" + Hint: Remove or replace the character." | Reserved_name s -> sprintf "Reserved name: \"%s\".\n\ - Hint: Change the name.\n" s + Hint: Change the name." s | Invalid_symbol -> "Invalid symbol.\n\ - Hint: Check the LIGO syntax you use.\n" + Hint: Check the LIGO syntax you use." | Invalid_natural -> - "Invalid natural." + "Invalid natural number." | Invalid_attribute -> "Invalid attribute." @@ -455,7 +455,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) = let format_error ?(offsets=true) mode Region.{region; value} ~file = let msg = error_to_string value and reg = region#to_string ~file ~offsets mode in - let value = sprintf "Lexical error %s:\n%s" reg msg + let value = sprintf "Lexical error %s:\n%s\n" reg msg in Region.{value; region} let fail region value = raise (Error Region.{region; value}) diff --git a/src/passes/1-parser/shared/LexerUnit.ml b/src/passes/1-parser/shared/LexerUnit.ml index cb67f4038..0016450b8 100644 --- a/src/passes/1-parser/shared/LexerUnit.ml +++ b/src/passes/1-parser/shared/LexerUnit.ml @@ -37,29 +37,29 @@ module Make (IO: IO) (Lexer: Lexer.S) = | Stdlib.Ok pp_buffer -> (* Running the lexer on the preprocessed input *) - let preproc_str = Buffer.contents pp_buffer in - match Lexer.open_token_stream (Lexer.String preproc_str) with - Ok Lexer.{read; buffer; close; _} -> - let close_all () = flush_all (); close () in - let rec read_tokens tokens = - match read ~log:(fun _ _ -> ()) buffer with - token -> - if Lexer.Token.is_eof token - then Stdlib.Ok (List.rev tokens) - else read_tokens (token::tokens) - | exception Lexer.Error error -> - let file = - match IO.options#input with - None | Some "-" -> false - | Some _ -> true in - let msg = - Lexer.format_error ~offsets:IO.options#offsets - IO.options#mode ~file error - in Stdlib.Error msg in - let result = read_tokens [] - in close_all (); result - | Stdlib.Error (Lexer.File_opening msg) -> - flush_all (); Stdlib.Error (Region.wrap_ghost msg) in + let source = Lexer.String (Buffer.contents pp_buffer) in + match Lexer.open_token_stream source with + Ok Lexer.{read; buffer; close; _} -> + let close_all () = flush_all (); close () in + let rec read_tokens tokens = + match read ~log:(fun _ _ -> ()) buffer with + token -> + if Lexer.Token.is_eof token + then Stdlib.Ok (List.rev tokens) + else read_tokens (token::tokens) + | exception Lexer.Error error -> + let file = + match IO.options#input with + None | Some "-" -> false + | Some _ -> true in + let msg = + Lexer.format_error ~offsets:IO.options#offsets + IO.options#mode ~file error + in Stdlib.Error msg in + let result = read_tokens [] + in close_all (); result + | Stdlib.Error (Lexer.File_opening msg) -> + flush_all (); Stdlib.Error (Region.wrap_ghost msg) in match IO.options#input with Some "-" | None -> preproc stdin | Some file_path -> @@ -78,7 +78,7 @@ module Make (IO: IO) (Lexer: Lexer.S) = let () = match IO.options#input with None | Some "-" -> () - | Some pos_fname -> + | Some pos_fname -> buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname} in match Preproc.lex IO.options buffer with Stdlib.Error (pp_buffer, err) -> diff --git a/vendors/Preprocessor/E_Lexer.mll b/vendors/Preprocessor/E_Lexer.mll index 3da329603..79b9307f2 100644 --- a/vendors/Preprocessor/E_Lexer.mll +++ b/vendors/Preprocessor/E_Lexer.mll @@ -36,7 +36,7 @@ let error_to_string = function let format ?(offsets=true) Region.{region; value} ~file = let msg = error_to_string value and reg = region#to_string ~file ~offsets `Byte in - let value = sprintf "Preprocessing error %s:\n%s" reg msg + let value = sprintf "Preprocessing error %s:\n%s\n" reg msg in Region.{value; region} exception Error of error Region.reg diff --git a/vendors/Preprocessor/Preproc.mll b/vendors/Preprocessor/Preproc.mll index e01a67b9a..c453d821d 100644 --- a/vendors/Preprocessor/Preproc.mll +++ b/vendors/Preprocessor/Preproc.mll @@ -8,6 +8,15 @@ module Pos = Simple_utils.Pos let sprintf = Printf.sprintf +(* Rolling back one lexeme _within the current semantic action_ *) + +let rollback buffer = + let open Lexing in + let len = String.length (lexeme buffer) in + let pos_cnum = buffer.lex_curr_p.pos_cnum - len in + buffer.lex_curr_pos <- buffer.lex_curr_pos - len; + buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum} + (* STRING PROCESSING *) (* The value of [mk_str len p] ("make string") is a string of length @@ -196,7 +205,7 @@ let error_to_string = function let format ?(offsets=true) Region.{region; value} ~file = let msg = error_to_string value and reg = region#to_string ~file ~offsets `Byte in - let value = sprintf "Preprocessing error %s:\n%s" reg msg + let value = sprintf "Preprocessing error %s:\n%s\n" reg msg in Region.{value; region} exception Error of (Buffer.t * error Region.reg) @@ -722,6 +731,12 @@ and in_string opening state = parse | eof { stop Open_string state opening } | _ { copy state lexbuf; in_string opening state lexbuf } +and preproc state = parse + eof { state } +| _ { rollback lexbuf; + print state (sprintf "# 1 \"%s\"\n" + Lexing.(lexbuf.lex_start_p.pos_fname)); + scan state lexbuf } { (* START OF TRAILER *) @@ -741,7 +756,7 @@ let lex opt buffer = opt; dir = [] } in - match scan state buffer with + match preproc state buffer with state -> List.iter close_in state.incl; Stdlib.Ok state.out | exception Error e -> Stdlib.Error e