Enabled the preprocessing by CPP of #include directives in Ligo.

Now it is possible to write anywhere in a Ligo source file

and have error messages refer to the correct file and line. This
is done by extending the lexer to recognise #line
directives (generated by CPP).

I added the stage "cpp" for the --verbose command-line option: if
given, the preprocessed source file is created in the current
directory with the name <input>.pp.li, where <input>.li is the
original input file. Also the CPP command actually run is
printed.

I added setter to modules [Pos] and [Region].
This commit is contained in:
Christian Rinderknecht 2019-03-12 18:33:29 +01:00
parent 29df2ff9aa
commit 269fd475ad
No known key found for this signature in database
GPG Key ID: 9446816CFD267040
8 changed files with 173 additions and 24 deletions

View File

@ -20,7 +20,7 @@ let help () =
print_endline " -q, --quiet No output, except errors (default)"; print_endline " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations"; print_endline " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations"; print_endline " --bytes Bytes for source locations";
print_endline " --verbose=<stages> cmdline, ast"; print_endline " --verbose=<stages> cmdline, cpp, ast";
print_endline " --version Commit hash on stdout"; print_endline " --version Commit hash on stdout";
print_endline " -h, --help This help"; print_endline " -h, --help This help";
exit 0 exit 0

View File

@ -46,13 +46,30 @@ let reset_file ~file buffer =
let open Lexing in let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file} buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line line_num buffer = let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num} buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset ~file ?(line=1) buffer = let reset_offset ~offset buffer =
(* Default value per the [Lexing] standard module convention *) assert (offset >= 0);
reset_file ~file buffer; reset_line line buffer Printf.printf "[reset] offset=%i\n" offset;
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *) (* Rolling back one lexeme _within the current semantic action_ *)
@ -192,12 +209,12 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
(* STATE *) (* STATE *)
(* Beyond tokens, the result of lexing is a state (a so-called (* Beyond tokens, the result of lexing is a state. The type
_state monad_). The type [state] represents the logical state [state] represents the logical state of the lexing engine, that
of the lexing engine, that is, a value which is threaded during is, a value which is threaded during scanning and which denotes
scanning and which denotes useful, high-level information useful, high-level information beyond what the type
beyond what the type [Lexing.lexbuf] in the standard library [Lexing.lexbuf] in the standard library already provides for
already provides for all generic lexers. all generic lexers.
Tokens are the smallest units used by the parser to build the Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised abstract syntax tree. The state includes a queue of recognised
@ -427,6 +444,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *) let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
let nl = ['\n' '\r'] | "\r\n" let nl = ['\n' '\r'] | "\r\n"
let blank = ' ' | '\t'
let digit = ['0'-'9'] let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural let integer = '-'? natural
@ -446,6 +464,7 @@ let symbol = ';' | ','
| '#' | '|' | "->" | ":=" | '=' | ':' | '#' | '|' | "->" | ":=" | '=' | ':'
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/=" | "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
| '+' | '-' | '*' | '.' | '_' | '^' | '+' | '-' | '*' | '.' | '_' | '^'
let string = [^'"' '\\' '\n']* (* For strings of #include *)
(* RULES *) (* RULES *)
@ -487,6 +506,43 @@ and scan state = parse
let state = scan_line thread state lexbuf |> push_line let state = scan_line thread state lexbuf |> push_line
in scan state lexbuf } in scan state lexbuf }
(* Management of #include CPP directives
An input Ligo program may contain GNU CPP (C preprocessor)
directives, and the entry modules (named *Main.ml) run CPP on them
in traditional mode:
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
The main interest in using CPP is that it can stand for a poor
man's (flat) module system for Ligo thanks to #include
directives, and the traditional mode leaves the markup mostly
undisturbed.
Some of the #line resulting from processing #include directives
deal with system file headers and thus have to be ignored for our
purpose. Moreover, these #line directives may also carry some
additional flags:
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
of which 1 and 2 indicate, respectively, the start of a new file
and the return from a file (after its inclusion has been
processed).
*)
| '#' blank* ("line" blank+)? (integer as line) blank+
'"' (string as file) '"' {
let _, _, state = sync state lexbuf in
let flags, state = scan_flags state [] lexbuf in
let () = ignore flags in
let line = int_of_string line
and file = Filename.basename file in
let pos = state.pos#set ~file ~line ~offset:0 in
let state = {state with pos} in
scan state lexbuf
}
(* Some special errors (* Some special errors
Some special errors are recognised in the semantic actions of the Some special errors are recognised in the semantic actions of the
@ -517,6 +573,18 @@ and scan state = parse
| _ as c { let region, _, _ = sync state lexbuf | _ as c { let region, _, _ = sync state lexbuf
in fail region (Unexpected_character c) } in fail region (Unexpected_character c) }
(* Scanning CPP #include flags *)
and scan_flags state acc = parse
blank+ { let _, _, state = sync state lexbuf
in scan_flags state acc lexbuf }
| integer as code { let _, _, state = sync state lexbuf in
let acc = int_of_string code :: acc
in scan_flags state acc lexbuf }
| nl { List.rev acc, push_newline state lexbuf }
| eof { let _, _, state = sync state lexbuf
in List.rev acc, state (* TODO *) }
(* Finishing a string *) (* Finishing a string *)
and scan_string thread state = parse and scan_string thread state = parse

View File

@ -9,9 +9,38 @@ let () = Printexc.record_backtrace true
let external_ text = let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Running the lexer on the input file *) (* Running the lexer on the input file *)
module Lexer = Lexer.Make (LexToken) module Lexer = Lexer.Make (LexToken)
let () = Lexer.trace ~offsets:EvalOpt.offsets let () = Lexer.trace ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.input EvalOpt.cmd EvalOpt.mode (Some pp_input) EvalOpt.cmd

View File

@ -34,12 +34,40 @@ let lib_path =
in List.fold_right mk_I libs "" in List.fold_right mk_I libs ""
*) *)
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Instanciating the lexer *) (* Instanciating the lexer *)
module Lexer = Lexer.Make (LexToken) module Lexer = Lexer.Make (LexToken)
let Lexer.{read; buffer; get_pos; get_last; close} = let Lexer.{read; buffer; get_pos; get_last; close} =
Lexer.open_token_stream EvalOpt.input Lexer.open_token_stream (Some pp_input)
and cout = stdout and cout = stdout

18
Pos.ml
View File

@ -7,6 +7,8 @@ type t = <
set_file : string -> t; set_file : string -> t;
set_line : int -> t; set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
new_line : string -> t; new_line : string -> t;
add_nl : t; add_nl : t;
@ -44,8 +46,20 @@ let make ~byte ~point_num ~point_bol =
val point_bol = point_bol val point_bol = point_bol
method point_bol = point_bol method point_bol = point_bol
method set_file file = {< byte = Lexing.{byte with pos_fname = file} >} method set_file file =
method set_line line = {< byte = Lexing.{byte with pos_lnum = line} >} {< byte = Lexing.{byte with pos_fname = file} >}
method set_line line =
{< byte = Lexing.{byte with pos_lnum = line} >}
method set_offset offset =
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
method set ~file ~line ~offset =
let pos = self#set_file file in
let pos = pos#set_line line in
let pos = pos#set_offset offset
in pos
(* The string must not contain '\n'. See [new_line]. *) (* The string must not contain '\n'. See [new_line]. *)

View File

@ -36,6 +36,8 @@ type t = <
set_file : string -> t; set_file : string -> t;
set_line : int -> t; set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
(* The call [pos#new_line s], where the string [s] is either "\n" or (* The call [pos#new_line s], where the string [s] is either "\n" or
"\c\r", updates the position [pos] with a new line. *) "\c\r", updates the position [pos] with a new line. *)

View File

@ -10,6 +10,7 @@ type t = <
shift_bytes : int -> t; shift_bytes : int -> t;
shift_one_uchar : int -> t; shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *) (* Getters *)
@ -55,6 +56,11 @@ let make ~(start: Pos.t) ~(stop: Pos.t) =
and stop = stop#shift_one_uchar len and stop = stop#shift_one_uchar len
in {< start = start; stop = stop >} in {< start = start; stop = stop >}
method set_file name =
let start = start#set_file name
and stop = stop#set_file name
in {< start = start; stop = stop >}
(* Getters *) (* Getters *)
method file = start#file method file = start#file

View File

@ -24,10 +24,12 @@ type t = <
translation of region [region] of [n] bytes forward in the translation of region [region] of [n] bytes forward in the
file. The call [region#shift_one_uchar n] is similar, except that file. The call [region#shift_one_uchar n] is similar, except that
it assumes that [n] is the number of bytes making up one unicode it assumes that [n] is the number of bytes making up one unicode
point. *) point. The call [region#set_file f] sets the file name to be
[f]. *)
shift_bytes : int -> t; shift_bytes : int -> t;
shift_one_uchar : int -> t; shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *) (* Getters *)