Enabled the preprocessing by CPP of #include directives in Ligo.
Now it is possible to write anywhere in a Ligo source file and have error messages refer to the correct file and line. This is done by extending the lexer to recognise #line directives (generated by CPP). I added the stage "cpp" for the --verbose command-line option: if given, the preprocessed source file is created in the current directory with the name <input>.pp.li, where <input>.li is the original input file. Also the CPP command actually run is printed. I added setter to modules [Pos] and [Region].
This commit is contained in:
parent
29df2ff9aa
commit
269fd475ad
@ -20,7 +20,7 @@ let help () =
|
||||
print_endline " -q, --quiet No output, except errors (default)";
|
||||
print_endline " --columns Columns for source locations";
|
||||
print_endline " --bytes Bytes for source locations";
|
||||
print_endline " --verbose=<stages> cmdline, ast";
|
||||
print_endline " --verbose=<stages> cmdline, cpp, ast";
|
||||
print_endline " --version Commit hash on stdout";
|
||||
print_endline " -h, --help This help";
|
||||
exit 0
|
||||
|
100
Lexer.mll
100
Lexer.mll
@ -46,13 +46,30 @@ let reset_file ~file buffer =
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||
|
||||
let reset_line line_num buffer =
|
||||
let reset_line ~line buffer =
|
||||
assert (line >= 0);
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num}
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
|
||||
|
||||
let reset ~file ?(line=1) buffer =
|
||||
(* Default value per the [Lexing] standard module convention *)
|
||||
reset_file ~file buffer; reset_line line buffer
|
||||
let reset_offset ~offset buffer =
|
||||
assert (offset >= 0);
|
||||
Printf.printf "[reset] offset=%i\n" offset;
|
||||
let open Lexing in
|
||||
let bol = buffer.lex_curr_p.pos_bol in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
|
||||
|
||||
let reset ?file ?line ?offset buffer =
|
||||
let () =
|
||||
match file with
|
||||
Some file -> reset_file ~file buffer
|
||||
| None -> () in
|
||||
let () =
|
||||
match line with
|
||||
Some line -> reset_line ~line buffer
|
||||
| None -> () in
|
||||
match offset with
|
||||
Some offset -> reset_offset ~offset buffer
|
||||
| None -> ()
|
||||
|
||||
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||
|
||||
@ -192,14 +209,14 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
|
||||
|
||||
(* STATE *)
|
||||
|
||||
(* Beyond tokens, the result of lexing is a state (a so-called
|
||||
_state monad_). The type [state] represents the logical state
|
||||
of the lexing engine, that is, a value which is threaded during
|
||||
scanning and which denotes useful, high-level information
|
||||
beyond what the type [Lexing.lexbuf] in the standard library
|
||||
already provides for all generic lexers.
|
||||
(* Beyond tokens, the result of lexing is a state. The type
|
||||
[state] represents the logical state of the lexing engine, that
|
||||
is, a value which is threaded during scanning and which denotes
|
||||
useful, high-level information beyond what the type
|
||||
[Lexing.lexbuf] in the standard library already provides for
|
||||
all generic lexers.
|
||||
|
||||
Tokens are the smallest units used by the parser to build the
|
||||
Tokens are the smallest units used by the parser to build the
|
||||
abstract syntax tree. The state includes a queue of recognised
|
||||
tokens, with the markup at the left of its lexeme until either
|
||||
the start of the file or the end of the previously recognised
|
||||
@ -225,7 +242,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
|
||||
[supply] that takes a byte, a start index and a length and feed
|
||||
it to [decoder]. See the documentation of the third-party
|
||||
library Uutf.
|
||||
*)
|
||||
*)
|
||||
|
||||
type state = {
|
||||
units : (Markup.t list * token) FQueue.t;
|
||||
@ -427,6 +444,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
|
||||
|
||||
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
|
||||
let nl = ['\n' '\r'] | "\r\n"
|
||||
let blank = ' ' | '\t'
|
||||
let digit = ['0'-'9']
|
||||
let natural = digit | digit (digit | '_')* digit
|
||||
let integer = '-'? natural
|
||||
@ -446,6 +464,7 @@ let symbol = ';' | ','
|
||||
| '#' | '|' | "->" | ":=" | '=' | ':'
|
||||
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
|
||||
| '+' | '-' | '*' | '.' | '_' | '^'
|
||||
let string = [^'"' '\\' '\n']* (* For strings of #include *)
|
||||
|
||||
(* RULES *)
|
||||
|
||||
@ -479,14 +498,51 @@ and scan state = parse
|
||||
|
||||
| "(*" { let opening, _, state = sync state lexbuf in
|
||||
let thread = {opening; len=2; acc=['*';'(']} in
|
||||
let state = scan_block thread state lexbuf |> push_block
|
||||
let state = scan_block thread state lexbuf |> push_block
|
||||
in scan state lexbuf }
|
||||
|
||||
| "//" { let opening, _, state = sync state lexbuf in
|
||||
| "//" { let opening, _, state = sync state lexbuf in
|
||||
let thread = {opening; len=2; acc=['/';'/']} in
|
||||
let state = scan_line thread state lexbuf |> push_line
|
||||
let state = scan_line thread state lexbuf |> push_line
|
||||
in scan state lexbuf }
|
||||
|
||||
(* Management of #include CPP directives
|
||||
|
||||
An input Ligo program may contain GNU CPP (C preprocessor)
|
||||
directives, and the entry modules (named *Main.ml) run CPP on them
|
||||
in traditional mode:
|
||||
|
||||
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
|
||||
|
||||
The main interest in using CPP is that it can stand for a poor
|
||||
man's (flat) module system for Ligo thanks to #include
|
||||
directives, and the traditional mode leaves the markup mostly
|
||||
undisturbed.
|
||||
|
||||
Some of the #line resulting from processing #include directives
|
||||
deal with system file headers and thus have to be ignored for our
|
||||
purpose. Moreover, these #line directives may also carry some
|
||||
additional flags:
|
||||
|
||||
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
|
||||
|
||||
of which 1 and 2 indicate, respectively, the start of a new file
|
||||
and the return from a file (after its inclusion has been
|
||||
processed).
|
||||
*)
|
||||
|
||||
| '#' blank* ("line" blank+)? (integer as line) blank+
|
||||
'"' (string as file) '"' {
|
||||
let _, _, state = sync state lexbuf in
|
||||
let flags, state = scan_flags state [] lexbuf in
|
||||
let () = ignore flags in
|
||||
let line = int_of_string line
|
||||
and file = Filename.basename file in
|
||||
let pos = state.pos#set ~file ~line ~offset:0 in
|
||||
let state = {state with pos} in
|
||||
scan state lexbuf
|
||||
}
|
||||
|
||||
(* Some special errors
|
||||
|
||||
Some special errors are recognised in the semantic actions of the
|
||||
@ -517,6 +573,18 @@ and scan state = parse
|
||||
| _ as c { let region, _, _ = sync state lexbuf
|
||||
in fail region (Unexpected_character c) }
|
||||
|
||||
(* Scanning CPP #include flags *)
|
||||
|
||||
and scan_flags state acc = parse
|
||||
blank+ { let _, _, state = sync state lexbuf
|
||||
in scan_flags state acc lexbuf }
|
||||
| integer as code { let _, _, state = sync state lexbuf in
|
||||
let acc = int_of_string code :: acc
|
||||
in scan_flags state acc lexbuf }
|
||||
| nl { List.rev acc, push_newline state lexbuf }
|
||||
| eof { let _, _, state = sync state lexbuf
|
||||
in List.rev acc, state (* TODO *) }
|
||||
|
||||
(* Finishing a string *)
|
||||
|
||||
and scan_string thread state = parse
|
||||
|
31
LexerMain.ml
31
LexerMain.ml
@ -9,9 +9,38 @@ let () = Printexc.record_backtrace true
|
||||
let external_ text =
|
||||
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
||||
|
||||
(* Preprocessing the input source and opening the input channels *)
|
||||
|
||||
let prefix =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" -> "temp"
|
||||
| Some file -> Filename.(file |> basename |> remove_extension)
|
||||
|
||||
let suffix = ".pp.li"
|
||||
|
||||
let pp_input =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then prefix ^ suffix
|
||||
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
|
||||
in close_out pp_out; pp_input
|
||||
|
||||
let cpp_cmd =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" ->
|
||||
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
|
||||
| Some file ->
|
||||
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
|
||||
|
||||
let () =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then Printf.eprintf "%s\n%!" cpp_cmd;
|
||||
if Sys.command cpp_cmd <> 0 then
|
||||
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
|
||||
|
||||
(* Running the lexer on the input file *)
|
||||
|
||||
module Lexer = Lexer.Make (LexToken)
|
||||
|
||||
|
||||
let () = Lexer.trace ~offsets:EvalOpt.offsets
|
||||
EvalOpt.mode EvalOpt.input EvalOpt.cmd
|
||||
EvalOpt.mode (Some pp_input) EvalOpt.cmd
|
||||
|
@ -34,12 +34,40 @@ let lib_path =
|
||||
in List.fold_right mk_I libs ""
|
||||
*)
|
||||
|
||||
(* Preprocessing the input source and opening the input channels *)
|
||||
|
||||
let prefix =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" -> "temp"
|
||||
| Some file -> Filename.(file |> basename |> remove_extension)
|
||||
|
||||
let suffix = ".pp.li"
|
||||
|
||||
let pp_input =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then prefix ^ suffix
|
||||
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
|
||||
in close_out pp_out; pp_input
|
||||
|
||||
let cpp_cmd =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" ->
|
||||
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
|
||||
| Some file ->
|
||||
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
|
||||
|
||||
let () =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then Printf.eprintf "%s\n%!" cpp_cmd;
|
||||
if Sys.command cpp_cmd <> 0 then
|
||||
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
|
||||
|
||||
(* Instanciating the lexer *)
|
||||
|
||||
module Lexer = Lexer.Make (LexToken)
|
||||
|
||||
let Lexer.{read; buffer; get_pos; get_last; close} =
|
||||
Lexer.open_token_stream EvalOpt.input
|
||||
Lexer.open_token_stream (Some pp_input)
|
||||
|
||||
and cout = stdout
|
||||
|
||||
|
18
Pos.ml
18
Pos.ml
@ -7,6 +7,8 @@ type t = <
|
||||
|
||||
set_file : string -> t;
|
||||
set_line : int -> t;
|
||||
set_offset : int -> t;
|
||||
set : file:string -> line:int -> offset:int -> t;
|
||||
new_line : string -> t;
|
||||
add_nl : t;
|
||||
|
||||
@ -44,8 +46,20 @@ let make ~byte ~point_num ~point_bol =
|
||||
val point_bol = point_bol
|
||||
method point_bol = point_bol
|
||||
|
||||
method set_file file = {< byte = Lexing.{byte with pos_fname = file} >}
|
||||
method set_line line = {< byte = Lexing.{byte with pos_lnum = line} >}
|
||||
method set_file file =
|
||||
{< byte = Lexing.{byte with pos_fname = file} >}
|
||||
|
||||
method set_line line =
|
||||
{< byte = Lexing.{byte with pos_lnum = line} >}
|
||||
|
||||
method set_offset offset =
|
||||
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
|
||||
|
||||
method set ~file ~line ~offset =
|
||||
let pos = self#set_file file in
|
||||
let pos = pos#set_line line in
|
||||
let pos = pos#set_offset offset
|
||||
in pos
|
||||
|
||||
(* The string must not contain '\n'. See [new_line]. *)
|
||||
|
||||
|
6
Pos.mli
6
Pos.mli
@ -34,8 +34,10 @@ type t = <
|
||||
|
||||
(* Setters *)
|
||||
|
||||
set_file : string -> t;
|
||||
set_line : int -> t;
|
||||
set_file : string -> t;
|
||||
set_line : int -> t;
|
||||
set_offset : int -> t;
|
||||
set : file:string -> line:int -> offset:int -> t;
|
||||
|
||||
(* The call [pos#new_line s], where the string [s] is either "\n" or
|
||||
"\c\r", updates the position [pos] with a new line. *)
|
||||
|
@ -10,6 +10,7 @@ type t = <
|
||||
|
||||
shift_bytes : int -> t;
|
||||
shift_one_uchar : int -> t;
|
||||
set_file : string -> t;
|
||||
|
||||
(* Getters *)
|
||||
|
||||
@ -55,6 +56,11 @@ let make ~(start: Pos.t) ~(stop: Pos.t) =
|
||||
and stop = stop#shift_one_uchar len
|
||||
in {< start = start; stop = stop >}
|
||||
|
||||
method set_file name =
|
||||
let start = start#set_file name
|
||||
and stop = stop#set_file name
|
||||
in {< start = start; stop = stop >}
|
||||
|
||||
(* Getters *)
|
||||
|
||||
method file = start#file
|
||||
|
@ -24,10 +24,12 @@ type t = <
|
||||
translation of region [region] of [n] bytes forward in the
|
||||
file. The call [region#shift_one_uchar n] is similar, except that
|
||||
it assumes that [n] is the number of bytes making up one unicode
|
||||
point. *)
|
||||
point. The call [region#set_file f] sets the file name to be
|
||||
[f]. *)
|
||||
|
||||
shift_bytes : int -> t;
|
||||
shift_one_uchar : int -> t;
|
||||
set_file : string -> t;
|
||||
|
||||
(* Getters *)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user