Enabled the preprocessing by CPP of #include directives in Ligo.
Now it is possible to write anywhere in a Ligo source file and have error messages refer to the correct file and line. This is done by extending the lexer to recognise #line directives (generated by CPP). I added the stage "cpp" for the --verbose command-line option: if given, the preprocessed source file is created in the current directory with the name <input>.pp.li, where <input>.li is the original input file. Also the CPP command actually run is printed. I added setter to modules [Pos] and [Region].
This commit is contained in:
parent
29df2ff9aa
commit
269fd475ad
@ -20,7 +20,7 @@ let help () =
|
|||||||
print_endline " -q, --quiet No output, except errors (default)";
|
print_endline " -q, --quiet No output, except errors (default)";
|
||||||
print_endline " --columns Columns for source locations";
|
print_endline " --columns Columns for source locations";
|
||||||
print_endline " --bytes Bytes for source locations";
|
print_endline " --bytes Bytes for source locations";
|
||||||
print_endline " --verbose=<stages> cmdline, ast";
|
print_endline " --verbose=<stages> cmdline, cpp, ast";
|
||||||
print_endline " --version Commit hash on stdout";
|
print_endline " --version Commit hash on stdout";
|
||||||
print_endline " -h, --help This help";
|
print_endline " -h, --help This help";
|
||||||
exit 0
|
exit 0
|
||||||
|
90
Lexer.mll
90
Lexer.mll
@ -46,13 +46,30 @@ let reset_file ~file buffer =
|
|||||||
let open Lexing in
|
let open Lexing in
|
||||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||||
|
|
||||||
let reset_line line_num buffer =
|
let reset_line ~line buffer =
|
||||||
|
assert (line >= 0);
|
||||||
let open Lexing in
|
let open Lexing in
|
||||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num}
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
|
||||||
|
|
||||||
let reset ~file ?(line=1) buffer =
|
let reset_offset ~offset buffer =
|
||||||
(* Default value per the [Lexing] standard module convention *)
|
assert (offset >= 0);
|
||||||
reset_file ~file buffer; reset_line line buffer
|
Printf.printf "[reset] offset=%i\n" offset;
|
||||||
|
let open Lexing in
|
||||||
|
let bol = buffer.lex_curr_p.pos_bol in
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
|
||||||
|
|
||||||
|
let reset ?file ?line ?offset buffer =
|
||||||
|
let () =
|
||||||
|
match file with
|
||||||
|
Some file -> reset_file ~file buffer
|
||||||
|
| None -> () in
|
||||||
|
let () =
|
||||||
|
match line with
|
||||||
|
Some line -> reset_line ~line buffer
|
||||||
|
| None -> () in
|
||||||
|
match offset with
|
||||||
|
Some offset -> reset_offset ~offset buffer
|
||||||
|
| None -> ()
|
||||||
|
|
||||||
(* Rolling back one lexeme _within the current semantic action_ *)
|
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||||
|
|
||||||
@ -192,12 +209,12 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
|
|||||||
|
|
||||||
(* STATE *)
|
(* STATE *)
|
||||||
|
|
||||||
(* Beyond tokens, the result of lexing is a state (a so-called
|
(* Beyond tokens, the result of lexing is a state. The type
|
||||||
_state monad_). The type [state] represents the logical state
|
[state] represents the logical state of the lexing engine, that
|
||||||
of the lexing engine, that is, a value which is threaded during
|
is, a value which is threaded during scanning and which denotes
|
||||||
scanning and which denotes useful, high-level information
|
useful, high-level information beyond what the type
|
||||||
beyond what the type [Lexing.lexbuf] in the standard library
|
[Lexing.lexbuf] in the standard library already provides for
|
||||||
already provides for all generic lexers.
|
all generic lexers.
|
||||||
|
|
||||||
Tokens are the smallest units used by the parser to build the
|
Tokens are the smallest units used by the parser to build the
|
||||||
abstract syntax tree. The state includes a queue of recognised
|
abstract syntax tree. The state includes a queue of recognised
|
||||||
@ -427,6 +444,7 @@ module Make (Token: TOKEN) : (S with module Token = Token) =
|
|||||||
|
|
||||||
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
|
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
|
||||||
let nl = ['\n' '\r'] | "\r\n"
|
let nl = ['\n' '\r'] | "\r\n"
|
||||||
|
let blank = ' ' | '\t'
|
||||||
let digit = ['0'-'9']
|
let digit = ['0'-'9']
|
||||||
let natural = digit | digit (digit | '_')* digit
|
let natural = digit | digit (digit | '_')* digit
|
||||||
let integer = '-'? natural
|
let integer = '-'? natural
|
||||||
@ -446,6 +464,7 @@ let symbol = ';' | ','
|
|||||||
| '#' | '|' | "->" | ":=" | '=' | ':'
|
| '#' | '|' | "->" | ":=" | '=' | ':'
|
||||||
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
|
| "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
|
||||||
| '+' | '-' | '*' | '.' | '_' | '^'
|
| '+' | '-' | '*' | '.' | '_' | '^'
|
||||||
|
let string = [^'"' '\\' '\n']* (* For strings of #include *)
|
||||||
|
|
||||||
(* RULES *)
|
(* RULES *)
|
||||||
|
|
||||||
@ -487,6 +506,43 @@ and scan state = parse
|
|||||||
let state = scan_line thread state lexbuf |> push_line
|
let state = scan_line thread state lexbuf |> push_line
|
||||||
in scan state lexbuf }
|
in scan state lexbuf }
|
||||||
|
|
||||||
|
(* Management of #include CPP directives
|
||||||
|
|
||||||
|
An input Ligo program may contain GNU CPP (C preprocessor)
|
||||||
|
directives, and the entry modules (named *Main.ml) run CPP on them
|
||||||
|
in traditional mode:
|
||||||
|
|
||||||
|
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
|
||||||
|
|
||||||
|
The main interest in using CPP is that it can stand for a poor
|
||||||
|
man's (flat) module system for Ligo thanks to #include
|
||||||
|
directives, and the traditional mode leaves the markup mostly
|
||||||
|
undisturbed.
|
||||||
|
|
||||||
|
Some of the #line resulting from processing #include directives
|
||||||
|
deal with system file headers and thus have to be ignored for our
|
||||||
|
purpose. Moreover, these #line directives may also carry some
|
||||||
|
additional flags:
|
||||||
|
|
||||||
|
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
|
||||||
|
|
||||||
|
of which 1 and 2 indicate, respectively, the start of a new file
|
||||||
|
and the return from a file (after its inclusion has been
|
||||||
|
processed).
|
||||||
|
*)
|
||||||
|
|
||||||
|
| '#' blank* ("line" blank+)? (integer as line) blank+
|
||||||
|
'"' (string as file) '"' {
|
||||||
|
let _, _, state = sync state lexbuf in
|
||||||
|
let flags, state = scan_flags state [] lexbuf in
|
||||||
|
let () = ignore flags in
|
||||||
|
let line = int_of_string line
|
||||||
|
and file = Filename.basename file in
|
||||||
|
let pos = state.pos#set ~file ~line ~offset:0 in
|
||||||
|
let state = {state with pos} in
|
||||||
|
scan state lexbuf
|
||||||
|
}
|
||||||
|
|
||||||
(* Some special errors
|
(* Some special errors
|
||||||
|
|
||||||
Some special errors are recognised in the semantic actions of the
|
Some special errors are recognised in the semantic actions of the
|
||||||
@ -517,6 +573,18 @@ and scan state = parse
|
|||||||
| _ as c { let region, _, _ = sync state lexbuf
|
| _ as c { let region, _, _ = sync state lexbuf
|
||||||
in fail region (Unexpected_character c) }
|
in fail region (Unexpected_character c) }
|
||||||
|
|
||||||
|
(* Scanning CPP #include flags *)
|
||||||
|
|
||||||
|
and scan_flags state acc = parse
|
||||||
|
blank+ { let _, _, state = sync state lexbuf
|
||||||
|
in scan_flags state acc lexbuf }
|
||||||
|
| integer as code { let _, _, state = sync state lexbuf in
|
||||||
|
let acc = int_of_string code :: acc
|
||||||
|
in scan_flags state acc lexbuf }
|
||||||
|
| nl { List.rev acc, push_newline state lexbuf }
|
||||||
|
| eof { let _, _, state = sync state lexbuf
|
||||||
|
in List.rev acc, state (* TODO *) }
|
||||||
|
|
||||||
(* Finishing a string *)
|
(* Finishing a string *)
|
||||||
|
|
||||||
and scan_string thread state = parse
|
and scan_string thread state = parse
|
||||||
|
31
LexerMain.ml
31
LexerMain.ml
@ -9,9 +9,38 @@ let () = Printexc.record_backtrace true
|
|||||||
let external_ text =
|
let external_ text =
|
||||||
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
||||||
|
|
||||||
|
(* Preprocessing the input source and opening the input channels *)
|
||||||
|
|
||||||
|
let prefix =
|
||||||
|
match EvalOpt.input with
|
||||||
|
None | Some "-" -> "temp"
|
||||||
|
| Some file -> Filename.(file |> basename |> remove_extension)
|
||||||
|
|
||||||
|
let suffix = ".pp.li"
|
||||||
|
|
||||||
|
let pp_input =
|
||||||
|
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||||
|
then prefix ^ suffix
|
||||||
|
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
|
||||||
|
in close_out pp_out; pp_input
|
||||||
|
|
||||||
|
let cpp_cmd =
|
||||||
|
match EvalOpt.input with
|
||||||
|
None | Some "-" ->
|
||||||
|
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
|
||||||
|
| Some file ->
|
||||||
|
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
|
||||||
|
|
||||||
|
let () =
|
||||||
|
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||||
|
then Printf.eprintf "%s\n%!" cpp_cmd;
|
||||||
|
if Sys.command cpp_cmd <> 0 then
|
||||||
|
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
|
||||||
|
|
||||||
(* Running the lexer on the input file *)
|
(* Running the lexer on the input file *)
|
||||||
|
|
||||||
module Lexer = Lexer.Make (LexToken)
|
module Lexer = Lexer.Make (LexToken)
|
||||||
|
|
||||||
|
|
||||||
let () = Lexer.trace ~offsets:EvalOpt.offsets
|
let () = Lexer.trace ~offsets:EvalOpt.offsets
|
||||||
EvalOpt.mode EvalOpt.input EvalOpt.cmd
|
EvalOpt.mode (Some pp_input) EvalOpt.cmd
|
||||||
|
@ -34,12 +34,40 @@ let lib_path =
|
|||||||
in List.fold_right mk_I libs ""
|
in List.fold_right mk_I libs ""
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
(* Preprocessing the input source and opening the input channels *)
|
||||||
|
|
||||||
|
let prefix =
|
||||||
|
match EvalOpt.input with
|
||||||
|
None | Some "-" -> "temp"
|
||||||
|
| Some file -> Filename.(file |> basename |> remove_extension)
|
||||||
|
|
||||||
|
let suffix = ".pp.li"
|
||||||
|
|
||||||
|
let pp_input =
|
||||||
|
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||||
|
then prefix ^ suffix
|
||||||
|
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
|
||||||
|
in close_out pp_out; pp_input
|
||||||
|
|
||||||
|
let cpp_cmd =
|
||||||
|
match EvalOpt.input with
|
||||||
|
None | Some "-" ->
|
||||||
|
Printf.sprintf "cpp -traditional-cpp - -o %s" pp_input
|
||||||
|
| Some file ->
|
||||||
|
Printf.sprintf "cpp -traditional-cpp %s -o %s" file pp_input
|
||||||
|
|
||||||
|
let () =
|
||||||
|
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||||
|
then Printf.eprintf "%s\n%!" cpp_cmd;
|
||||||
|
if Sys.command cpp_cmd <> 0 then
|
||||||
|
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
|
||||||
|
|
||||||
(* Instanciating the lexer *)
|
(* Instanciating the lexer *)
|
||||||
|
|
||||||
module Lexer = Lexer.Make (LexToken)
|
module Lexer = Lexer.Make (LexToken)
|
||||||
|
|
||||||
let Lexer.{read; buffer; get_pos; get_last; close} =
|
let Lexer.{read; buffer; get_pos; get_last; close} =
|
||||||
Lexer.open_token_stream EvalOpt.input
|
Lexer.open_token_stream (Some pp_input)
|
||||||
|
|
||||||
and cout = stdout
|
and cout = stdout
|
||||||
|
|
||||||
|
18
Pos.ml
18
Pos.ml
@ -7,6 +7,8 @@ type t = <
|
|||||||
|
|
||||||
set_file : string -> t;
|
set_file : string -> t;
|
||||||
set_line : int -> t;
|
set_line : int -> t;
|
||||||
|
set_offset : int -> t;
|
||||||
|
set : file:string -> line:int -> offset:int -> t;
|
||||||
new_line : string -> t;
|
new_line : string -> t;
|
||||||
add_nl : t;
|
add_nl : t;
|
||||||
|
|
||||||
@ -44,8 +46,20 @@ let make ~byte ~point_num ~point_bol =
|
|||||||
val point_bol = point_bol
|
val point_bol = point_bol
|
||||||
method point_bol = point_bol
|
method point_bol = point_bol
|
||||||
|
|
||||||
method set_file file = {< byte = Lexing.{byte with pos_fname = file} >}
|
method set_file file =
|
||||||
method set_line line = {< byte = Lexing.{byte with pos_lnum = line} >}
|
{< byte = Lexing.{byte with pos_fname = file} >}
|
||||||
|
|
||||||
|
method set_line line =
|
||||||
|
{< byte = Lexing.{byte with pos_lnum = line} >}
|
||||||
|
|
||||||
|
method set_offset offset =
|
||||||
|
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
|
||||||
|
|
||||||
|
method set ~file ~line ~offset =
|
||||||
|
let pos = self#set_file file in
|
||||||
|
let pos = pos#set_line line in
|
||||||
|
let pos = pos#set_offset offset
|
||||||
|
in pos
|
||||||
|
|
||||||
(* The string must not contain '\n'. See [new_line]. *)
|
(* The string must not contain '\n'. See [new_line]. *)
|
||||||
|
|
||||||
|
2
Pos.mli
2
Pos.mli
@ -36,6 +36,8 @@ type t = <
|
|||||||
|
|
||||||
set_file : string -> t;
|
set_file : string -> t;
|
||||||
set_line : int -> t;
|
set_line : int -> t;
|
||||||
|
set_offset : int -> t;
|
||||||
|
set : file:string -> line:int -> offset:int -> t;
|
||||||
|
|
||||||
(* The call [pos#new_line s], where the string [s] is either "\n" or
|
(* The call [pos#new_line s], where the string [s] is either "\n" or
|
||||||
"\c\r", updates the position [pos] with a new line. *)
|
"\c\r", updates the position [pos] with a new line. *)
|
||||||
|
@ -10,6 +10,7 @@ type t = <
|
|||||||
|
|
||||||
shift_bytes : int -> t;
|
shift_bytes : int -> t;
|
||||||
shift_one_uchar : int -> t;
|
shift_one_uchar : int -> t;
|
||||||
|
set_file : string -> t;
|
||||||
|
|
||||||
(* Getters *)
|
(* Getters *)
|
||||||
|
|
||||||
@ -55,6 +56,11 @@ let make ~(start: Pos.t) ~(stop: Pos.t) =
|
|||||||
and stop = stop#shift_one_uchar len
|
and stop = stop#shift_one_uchar len
|
||||||
in {< start = start; stop = stop >}
|
in {< start = start; stop = stop >}
|
||||||
|
|
||||||
|
method set_file name =
|
||||||
|
let start = start#set_file name
|
||||||
|
and stop = stop#set_file name
|
||||||
|
in {< start = start; stop = stop >}
|
||||||
|
|
||||||
(* Getters *)
|
(* Getters *)
|
||||||
|
|
||||||
method file = start#file
|
method file = start#file
|
||||||
|
@ -24,10 +24,12 @@ type t = <
|
|||||||
translation of region [region] of [n] bytes forward in the
|
translation of region [region] of [n] bytes forward in the
|
||||||
file. The call [region#shift_one_uchar n] is similar, except that
|
file. The call [region#shift_one_uchar n] is similar, except that
|
||||||
it assumes that [n] is the number of bytes making up one unicode
|
it assumes that [n] is the number of bytes making up one unicode
|
||||||
point. *)
|
point. The call [region#set_file f] sets the file name to be
|
||||||
|
[f]. *)
|
||||||
|
|
||||||
shift_bytes : int -> t;
|
shift_bytes : int -> t;
|
||||||
shift_one_uchar : int -> t;
|
shift_one_uchar : int -> t;
|
||||||
|
set_file : string -> t;
|
||||||
|
|
||||||
(* Getters *)
|
(* Getters *)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user