Enabled the preprocessing by CPP of #include directives in Ligo.

Now it is possible to write anywhere in a Ligo source file

and have error messages refer to the correct file and line. This
is done by extending the lexer to recognise #line
directives (generated by CPP).

I added the stage "cpp" for the --verbose command-line option: if
given, the preprocessed source file is created in the current
directory with the name <input>.pp.li, where <input>.li is the
original input file. Also the CPP command actually run is
printed.

I added setter to modules [Pos] and [Region].
This commit is contained in:
Christian Rinderknecht 2019-03-12 18:33:29 +01:00
parent 29df2ff9aa
commit 269fd475ad
No known key found for this signature in database
GPG Key ID: 9446816CFD267040
8 changed files with 173 additions and 24 deletions

View File

@ -20,7 +20,7 @@ let help () =
print_endline " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations";
print_endline " --verbose=<stages> cmdline, ast";
print_endline " --verbose=<stages> cmdline, cpp, ast";
print_endline " --version Commit hash on stdout";
print_endline " -h, --help This help";
exit 0

View File

@ -46,13 +46,30 @@ let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line line_num buffer =
let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num}
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset ~file ?(line=1) buffer =
(* Default value per the [Lexing] standard module convention *)
reset_file ~file buffer; reset_line line buffer
let reset_offset ~offset buffer =
assert (offset >= 0);
Printf.printf "[reset] offset=%i\n" offset;
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *)
@ -192,12 +209,12 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
(* STATE *)
(* Beyond tokens, the result of lexing is a state (a so-called
_state monad_). The type [state] represents the logical state
of the lexing engine, that is, a value which is threaded during
scanning and which denotes useful, high-level information
beyond what the type [Lexing.lexbuf] in the standard library
already provides for all generic lexers.
(* Beyond tokens, the result of lexing is a state. The type
[state] represents the logical state of the lexing engine, that
is, a value which is threaded during scanning and which denotes
useful, high-level information beyond what the type
[Lexing.lexbuf] in the standard library already provides for
all generic lexers.
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
@ -427,6 +444,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
let nl = ['\n' '\r'] | "\r\n"
let blank = ' ' | '\t'
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural
@ -446,6 +464,7 @@ let symbol = ';' | ','
| '#' | '|' | "->" | ":=" | '=' | ':'
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
| '+' | '-' | '*' | '.' | '_' | '^'
let string = [^'"' '\\' '\n']* (* For strings of #include *)
(* RULES *)
@ -487,6 +506,43 @@ and scan state = parse
let state = scan_line thread state lexbuf |> push_line
in scan state lexbuf }
(* Management of #include CPP directives
An input Ligo program may contain GNU CPP (C preprocessor)
directives, and the entry modules (named *Main.ml) run CPP on them
in traditional mode:
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
The main interest in using CPP is that it can stand for a poor
man's (flat) module system for Ligo thanks to #include
directives, and the traditional mode leaves the markup mostly
undisturbed.
Some of the #line resulting from processing #include directives
deal with system file headers and thus have to be ignored for our
purpose. Moreover, these #line directives may also carry some
additional flags:
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
of which 1 and 2 indicate, respectively, the start of a new file
and the return from a file (after its inclusion has been
processed).
*)
| '#' blank* ("line" blank+)? (integer as line) blank+
'"' (string as file) '"' {
let _, _, state = sync state lexbuf in
let flags, state = scan_flags state [] lexbuf in
let () = ignore flags in
let line = int_of_string line
and file = Filename.basename file in
let pos = state.pos#set ~file ~line ~offset:0 in
let state = {state with pos} in
scan state lexbuf
}
(* Some special errors
Some special errors are recognised in the semantic actions of the
@ -517,6 +573,18 @@ and scan state = parse
| _ as c { let region, _, _ = sync state lexbuf
in fail region (Unexpected_character c) }
(* Scanning CPP #include flags *)
and scan_flags state acc = parse
blank+ { let _, _, state = sync state lexbuf
in scan_flags state acc lexbuf }
| integer as code { let _, _, state = sync state lexbuf in
let acc = int_of_string code :: acc
in scan_flags state acc lexbuf }
| nl { List.rev acc, push_newline state lexbuf }
| eof { let _, _, state = sync state lexbuf
in List.rev acc, state (* TODO *) }
(* Finishing a string *)
and scan_string thread state = parse

View File

@ -9,9 +9,38 @@ let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Running the lexer on the input file *)
module Lexer = Lexer.Make (LexToken)
let () = Lexer.trace ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.input EvalOpt.cmd
EvalOpt.mode (Some pp_input) EvalOpt.cmd

View File

@ -34,12 +34,40 @@ let lib_path =
in List.fold_right mk_I libs ""
*)
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Instanciating the lexer *)
module Lexer = Lexer.Make (LexToken)
let Lexer.{read; buffer; get_pos; get_last; close} =
Lexer.open_token_stream EvalOpt.input
Lexer.open_token_stream (Some pp_input)
and cout = stdout

18
Pos.ml
View File

@ -7,6 +7,8 @@ type t = <
set_file : string -> t;
set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
new_line : string -> t;
add_nl : t;
@ -44,8 +46,20 @@ let make ~byte ~point_num ~point_bol =
val point_bol = point_bol
method point_bol = point_bol
method set_file file = {< byte = Lexing.{byte with pos_fname = file} >}
method set_line line = {< byte = Lexing.{byte with pos_lnum = line} >}
method set_file file =
{< byte = Lexing.{byte with pos_fname = file} >}
method set_line line =
{< byte = Lexing.{byte with pos_lnum = line} >}
method set_offset offset =
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
method set ~file ~line ~offset =
let pos = self#set_file file in
let pos = pos#set_line line in
let pos = pos#set_offset offset
in pos
(* The string must not contain '\n'. See [new_line]. *)

View File

@ -36,6 +36,8 @@ type t = <
set_file : string -> t;
set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
(* The call [pos#new_line s], where the string [s] is either "\n" or
"\c\r", updates the position [pos] with a new line. *)

View File

@ -10,6 +10,7 @@ type t = <
shift_bytes : int -> t;
shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *)
@ -55,6 +56,11 @@ let make ~(start: Pos.t) ~(stop: Pos.t) =
and stop = stop#shift_one_uchar len
in {< start = start; stop = stop >}
method set_file name =
let start = start#set_file name
and stop = stop#set_file name
in {< start = start; stop = stop >}
(* Getters *)
method file = start#file

View File

@ -24,10 +24,12 @@ type t = <
translation of region [region] of [n] bytes forward in the
file. The call [region#shift_one_uchar n] is similar, except that
it assumes that [n] is the number of bytes making up one unicode
point. *)
point. The call [region#set_file f] sets the file name to be
[f]. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *)