2020-03-23 22:43:06 +04:00
|
|
|
(* Simple preprocessor based on C#, to be processed by [ocamllex]. *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
{
|
2020-03-24 20:47:24 +04:00
|
|
|
(* START OF HEADER *)
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
module Region = Simple_utils.Region
|
2020-03-24 21:52:00 +04:00
|
|
|
module Pos = Simple_utils.Pos
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
let sprintf = Printf.sprintf
|
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
(* STRING PROCESSING *)
|
|
|
|
|
|
|
|
(* The value of [mk_str len p] ("make string") is a string of length
|
|
|
|
[len] containing the [len] characters in the list [p], in reverse
|
|
|
|
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
|
|
|
|
|
|
|
|
let mk_str (len: int) (p: char list) : string =
|
|
|
|
let () = assert (len = List.length p) in
|
|
|
|
let bytes = Bytes.make len ' ' in
|
|
|
|
let rec fill i = function
|
|
|
|
[] -> bytes
|
|
|
|
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
|
|
|
in fill (len-1) p |> Bytes.to_string
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* The type [mode] defines the two scanning modes of the preprocessor:
|
|
|
|
either we copy the current characters or we skip them. *)
|
|
|
|
|
|
|
|
type mode = Copy | Skip
|
|
|
|
|
|
|
|
(* Trace of directives
|
|
|
|
|
|
|
|
We keep track of directives #if, #elif, #else, #region and #endregion.
|
|
|
|
*)
|
|
|
|
|
|
|
|
type cond = If of mode | Elif of mode | Else | Region
|
|
|
|
type trace = cond list
|
|
|
|
|
|
|
|
(* Line offsets
|
|
|
|
|
|
|
|
The value [Inline] of type [offset] means that the current location
|
|
|
|
cannot be reached from the start of the line with only white
|
|
|
|
space. The same holds for the special value [Prefix 0]. Values of
|
|
|
|
the form [Prefix n] mean that the current location can be reached
|
|
|
|
from the start of the line with [n] white spaces (padding). These
|
|
|
|
distinctions are needed because preprocessor directives cannot
|
|
|
|
occur inside lines.
|
|
|
|
*)
|
|
|
|
|
|
|
|
type offset = Prefix of int | Inline
|
|
|
|
|
|
|
|
(* Environments *)
|
|
|
|
|
|
|
|
module Env = Set.Make (String)
|
|
|
|
|
|
|
|
let rec eval env =
|
|
|
|
let open E_AST
|
|
|
|
in function
|
|
|
|
Or (e1,e2) -> eval env e1 || eval env e2
|
|
|
|
| And (e1,e2) -> eval env e1 && eval env e2
|
|
|
|
| Eq (e1,e2) -> eval env e1 = eval env e2
|
|
|
|
| Neq (e1,e2) -> eval env e1 != eval env e2
|
|
|
|
| Not e -> not (eval env e)
|
|
|
|
| True -> true
|
|
|
|
| False -> false
|
|
|
|
| Ident id -> Env.mem id env
|
|
|
|
|
|
|
|
(* The type [state] groups the information that needs to be threaded
|
2020-03-27 22:30:39 +04:00
|
|
|
along the scanning functions:
|
|
|
|
* the field [env] records the symbols defined;
|
|
|
|
* the field [mode] informs whether the preprocessor is in copying or
|
|
|
|
skipping mode;
|
|
|
|
* the field [offset] tells whether the current location can be
|
|
|
|
reached from the start of the line with only white space;
|
|
|
|
* the field [trace] is a stack of previous, still active conditional
|
|
|
|
directives;
|
|
|
|
* the field [out] keeps the output buffer;
|
|
|
|
* the field [incl] is a list of opened input channels (#include);
|
|
|
|
* the field [opt] holds the CLI options;
|
|
|
|
* the field [dir] is the file system's path to the the current input
|
|
|
|
file.
|
|
|
|
*)
|
2020-03-24 20:47:24 +04:00
|
|
|
|
|
|
|
type state = {
|
|
|
|
env : Env.t;
|
|
|
|
mode : mode;
|
|
|
|
offset : offset;
|
|
|
|
trace : trace;
|
|
|
|
out : Buffer.t;
|
2020-03-25 21:52:23 +04:00
|
|
|
incl : in_channel list;
|
2020-03-27 22:30:39 +04:00
|
|
|
opt : EvalOpt.options;
|
|
|
|
dir : string list
|
2020-03-24 20:47:24 +04:00
|
|
|
}
|
|
|
|
|
2020-03-27 22:30:39 +04:00
|
|
|
(* Directories *)
|
|
|
|
|
|
|
|
let push_dir dir state =
|
|
|
|
if dir = "." then state else {state with dir = dir :: state.dir}
|
|
|
|
|
|
|
|
let mk_path state =
|
|
|
|
String.concat Filename.dir_sep (List.rev state.dir)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
(* ERRORS *)
|
|
|
|
|
2020-03-25 00:05:39 +04:00
|
|
|
type error =
|
|
|
|
Invalid_directive of string
|
|
|
|
| Directive_inside_line
|
|
|
|
| Missing_endif
|
|
|
|
| Invalid_line_indicator of string
|
|
|
|
| No_line_indicator
|
|
|
|
| End_line_indicator
|
|
|
|
| Newline_in_string
|
|
|
|
| Open_comment
|
|
|
|
| Open_string
|
|
|
|
| Dangling_endif
|
|
|
|
| Open_region_in_conditional
|
|
|
|
| Dangling_endregion
|
|
|
|
| Conditional_in_region
|
|
|
|
| If_follows_elif
|
|
|
|
| Else_follows_else
|
|
|
|
| Dangling_else
|
|
|
|
| Elif_follows_else
|
|
|
|
| Dangling_elif
|
|
|
|
| Reserved_symbol of string
|
|
|
|
| Multiply_defined_symbol of string
|
|
|
|
| Error_directive of string
|
|
|
|
| Parse_error
|
|
|
|
| No_line_comment_or_blank
|
|
|
|
| Invalid_symbol
|
2020-03-25 21:52:23 +04:00
|
|
|
| File_not_found of string
|
2020-03-26 19:51:08 +04:00
|
|
|
| Invalid_character of char
|
2020-03-25 00:05:39 +04:00
|
|
|
|
|
|
|
let error_to_string = function
|
|
|
|
Invalid_directive name ->
|
|
|
|
sprintf "Invalid directive \"%s\"." name
|
|
|
|
| Directive_inside_line ->
|
|
|
|
sprintf "Directive inside a line."
|
|
|
|
| Missing_endif ->
|
|
|
|
sprintf "Missing #endif directive."
|
|
|
|
| Invalid_line_indicator id ->
|
|
|
|
sprintf "Invalid line indicator \"%s\".\n\
|
|
|
|
Hint: Try \"default\" or \"hidden\"." id
|
|
|
|
| No_line_indicator ->
|
|
|
|
sprintf "Missing line indicator."
|
|
|
|
| End_line_indicator ->
|
|
|
|
sprintf "Invalid ending of numerical line indicator.\n\
|
|
|
|
Hint: Try a string, end of line, or a line comment."
|
|
|
|
| Newline_in_string ->
|
|
|
|
sprintf "Invalid newline character in string."
|
|
|
|
| Open_comment ->
|
|
|
|
sprintf "Unterminated comment."
|
|
|
|
| Open_string ->
|
|
|
|
sprintf "Unterminated string.\n\
|
|
|
|
Hint: Close with double quotes."
|
|
|
|
| Dangling_endif ->
|
|
|
|
sprintf "Dangling #endif directive.\n\
|
|
|
|
Hint: Remove it or add a #if before."
|
|
|
|
| Open_region_in_conditional ->
|
|
|
|
sprintf "Unterminated of #region in conditional.\n\
|
|
|
|
Hint: Close with #endregion before #endif."
|
|
|
|
| Dangling_endregion ->
|
|
|
|
sprintf "Dangling #endregion directive.\n\
|
|
|
|
Hint: Remove it or use #region before."
|
|
|
|
| Conditional_in_region ->
|
|
|
|
sprintf "Conditional in region.\n\
|
|
|
|
Hint: Remove the conditional or the region."
|
|
|
|
| If_follows_elif ->
|
|
|
|
sprintf "Directive #if found in a clause #elif."
|
|
|
|
| Else_follows_else ->
|
|
|
|
sprintf "Directive #else found in a clause #else."
|
|
|
|
| Dangling_else ->
|
|
|
|
sprintf "Directive #else without #if."
|
|
|
|
| Elif_follows_else ->
|
|
|
|
sprintf "Directive #elif found in a clause #else."
|
|
|
|
| Dangling_elif ->
|
|
|
|
sprintf "Dangling #elif directive.\n\
|
|
|
|
Hint: Remove it or add a #if before."
|
|
|
|
| Reserved_symbol sym ->
|
|
|
|
sprintf "Reserved symbol \"%s\".\n\
|
|
|
|
Hint: Use another symbol." sym
|
|
|
|
| Multiply_defined_symbol sym ->
|
|
|
|
sprintf "Multiply-defined symbol \"%s\".\n\
|
|
|
|
Hint: Change the name or remove one definition." sym
|
|
|
|
| Error_directive msg ->
|
|
|
|
msg
|
|
|
|
| Parse_error ->
|
|
|
|
"Parse error in expression."
|
|
|
|
| No_line_comment_or_blank ->
|
|
|
|
"Line comment or whitespace expected."
|
|
|
|
| Invalid_symbol ->
|
|
|
|
"Expected a symbol (identifier)."
|
2020-03-25 21:52:23 +04:00
|
|
|
| File_not_found name ->
|
|
|
|
sprintf "File \"%s\" to include not found." name
|
2020-03-26 19:51:08 +04:00
|
|
|
| Invalid_character c ->
|
|
|
|
E_Lexer.error_to_string (E_Lexer.Invalid_character c)
|
2020-03-25 00:05:39 +04:00
|
|
|
|
|
|
|
let format ?(offsets=true) Region.{region; value} ~file =
|
|
|
|
let msg = error_to_string value
|
|
|
|
and reg = region#to_string ~file ~offsets `Byte in
|
|
|
|
let value = sprintf "Preprocessing error %s:\n%s" reg msg
|
|
|
|
in Region.{value; region}
|
|
|
|
|
2020-03-26 19:51:08 +04:00
|
|
|
exception Error of (Buffer.t * error Region.reg)
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
let mk_reg buffer =
|
|
|
|
let start = Lexing.lexeme_start_p buffer |> Pos.from_byte
|
|
|
|
and stop = Lexing.lexeme_end_p buffer |> Pos.from_byte
|
|
|
|
in Region.make ~start ~stop
|
|
|
|
|
2020-03-26 19:51:08 +04:00
|
|
|
(* IMPORTANT : Make sure the function [stop] remains the only one
|
|
|
|
raising [Error]. *)
|
|
|
|
|
|
|
|
let stop value state region =
|
|
|
|
List.iter close_in state.incl;
|
|
|
|
raise (Error (state.out, Region.{region; value}))
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let fail error state buffer = stop error state (mk_reg buffer)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* The function [reduce_cond] is called when a #endif directive is
|
|
|
|
found, and the trace (see type [trace] above) needs updating. *)
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let reduce_cond state region =
|
|
|
|
let rec reduce = function
|
2020-03-25 00:05:39 +04:00
|
|
|
[] -> stop Dangling_endif state region
|
2020-03-25 21:52:23 +04:00
|
|
|
| If mode::trace -> {state with mode; trace; offset = Prefix 0}
|
2020-03-25 00:05:39 +04:00
|
|
|
| Region::_ -> stop Open_region_in_conditional state region
|
2020-03-24 20:47:24 +04:00
|
|
|
| _::trace -> reduce trace
|
|
|
|
in reduce state.trace
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
(* The function [reduce_region] is called when a #endregion directive is
|
2020-01-27 19:05:47 +04:00
|
|
|
read, and the trace needs updating. *)
|
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
let reduce_region state region =
|
2020-03-24 20:47:24 +04:00
|
|
|
match state.trace with
|
2020-03-25 00:05:39 +04:00
|
|
|
[] -> stop Dangling_endregion state region
|
2020-03-25 21:52:23 +04:00
|
|
|
| Region::trace -> {state with trace; offset = Prefix 0}
|
2020-03-25 00:05:39 +04:00
|
|
|
| _ -> stop Conditional_in_region state region
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* The function [extend] is called when encountering conditional
|
|
|
|
directives #if, #else and #elif. As its name suggests, it extends
|
|
|
|
the current trace with the current conditional directive, whilst
|
|
|
|
performing some validity checks. *)
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let extend cond state region =
|
|
|
|
match cond, state.trace with
|
2020-03-25 00:05:39 +04:00
|
|
|
If _, Elif _::_ -> stop If_follows_elif state region
|
|
|
|
| Else, Else::_ -> stop Else_follows_else state region
|
|
|
|
| Else, [] -> stop Dangling_else state region
|
|
|
|
| Elif _, Else::_ -> stop Elif_follows_else state region
|
|
|
|
| Elif _, [] -> stop Dangling_elif state region
|
2020-03-24 20:47:24 +04:00
|
|
|
| hd, tl -> hd::tl
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* The function [last_mode] seeks the last mode as recorded in the
|
|
|
|
trace (see type [trace] above). *)
|
|
|
|
|
|
|
|
let rec last_mode = function
|
|
|
|
[] -> assert false
|
|
|
|
| (If mode | Elif mode)::_ -> mode
|
|
|
|
| _::trace -> last_mode trace
|
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
(* Finding a file to #include *)
|
|
|
|
|
|
|
|
let rec find base = function
|
|
|
|
[] -> None
|
|
|
|
| dir::dirs ->
|
2020-03-27 22:30:39 +04:00
|
|
|
let path =
|
|
|
|
if dir = "." || dir = "" then base
|
|
|
|
else dir ^ Filename.dir_sep ^ base in
|
|
|
|
try Some (path, open_in path) with
|
2020-03-25 21:52:23 +04:00
|
|
|
Sys_error _ -> find base dirs
|
|
|
|
|
2020-03-27 22:30:39 +04:00
|
|
|
let find dir file libs =
|
|
|
|
let path =
|
|
|
|
if dir = "." || dir = "" then file
|
|
|
|
else dir ^ Filename.dir_sep ^ file in
|
|
|
|
try Some (path, open_in path) with
|
2020-03-25 21:52:23 +04:00
|
|
|
Sys_error _ ->
|
|
|
|
let base = Filename.basename file in
|
|
|
|
if base = file then find file libs else None
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* PRINTING *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* Copying the current lexeme to [stdout] *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* End of lines *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* Copying a string *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let print state string = Buffer.add_string state.out string
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* Expanding the offset into whitespace *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let expand_offset state =
|
|
|
|
match state.offset with
|
|
|
|
Prefix 0 | Inline -> ()
|
|
|
|
| Prefix n -> print state (String.make n ' ')
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
(* Evaluating a preprocessor expression
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
The evaluation of conditional directives may involve symbols whose
|
|
|
|
value may be defined using #define directives, or undefined by
|
|
|
|
means of #undef. Therefore, we need to evaluate conditional
|
|
|
|
expressions in an environment made of a set of defined symbols.
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
Note that we rely on an external lexer and parser for the
|
|
|
|
conditional expressions. See modules [E_Lexer] and [E_Parser].
|
|
|
|
*)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let expr state buffer : mode =
|
2020-03-23 22:43:06 +04:00
|
|
|
let ast =
|
|
|
|
try E_Parser.expr E_Lexer.scan buffer with
|
2020-03-26 19:51:08 +04:00
|
|
|
E_Lexer.Error Region.{value; region} ->
|
|
|
|
(match value with
|
|
|
|
E_Lexer.Invalid_character c ->
|
|
|
|
stop (Invalid_character c) state region)
|
|
|
|
| E_Parser.Error ->
|
|
|
|
fail Parse_error state buffer in
|
2020-03-24 20:47:24 +04:00
|
|
|
let () = print state "\n" in
|
|
|
|
if eval state.env ast then Copy else Skip
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* DIRECTIVES *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let directives = [
|
2020-03-25 21:52:23 +04:00
|
|
|
"define"; "elif"; "else"; "endif"; "endregion"; "error";
|
|
|
|
"if"; "include"; "line"; "region"; "undef" (* "warning" *)
|
|
|
|
]
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
(* END OF HEADER *)
|
|
|
|
}
|
|
|
|
|
|
|
|
(* REGULAR EXPRESSIONS *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
let nl = '\n' | '\r' | "\r\n"
|
|
|
|
let blank = ' ' | '\t'
|
|
|
|
let digit = ['0'-'9']
|
|
|
|
let natural = digit | digit (digit | '_')* digit
|
|
|
|
let small = ['a'-'z']
|
|
|
|
let capital = ['A'-'Z']
|
|
|
|
let letter = small | capital
|
2020-03-24 20:47:24 +04:00
|
|
|
let ident = letter (letter | '_' | digit)*
|
|
|
|
let directive = '#' (blank* as space) (small+ as id)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Rules *)
|
|
|
|
|
|
|
|
(* The rule [scan] scans the input buffer for directives, strings,
|
|
|
|
comments, blanks, new lines and end of file characters. As a
|
|
|
|
result, either the matched input is copied to [stdout] or not,
|
|
|
|
depending on the compilation directives. If not copied, new line
|
|
|
|
characters are output.
|
|
|
|
|
|
|
|
Scanning is triggered by the function call [scan env mode offset
|
|
|
|
trace lexbuf], where [env] is the set of defined symbols
|
|
|
|
(introduced by `#define'), [mode] specifies whether we are copying
|
|
|
|
or skipping the input, [offset] informs about the location in the
|
|
|
|
line (either there is a prefix of blanks, or at least a non-blank
|
|
|
|
character has been read), and [trace] is the stack of conditional
|
|
|
|
directives read so far.
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix
|
2020-03-25 21:52:23 +04:00
|
|
|
0; trace=[]; incl=[]; opt}], meaning that we start with an empty
|
|
|
|
environment, that copying the input is enabled by default, and that
|
|
|
|
we are at the start of a line and no previous conditional
|
|
|
|
directives have been read yet. The field [opt] is the CLI options.
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
When an "#if" is matched, the trace is extended by the call [extend
|
|
|
|
lexbuf (If mode) trace], during the evaluation of which the
|
|
|
|
syntactic validity of having encountered an "#if" is checked (for
|
|
|
|
example, it would be invalid had an "#elif" been last read). Note
|
|
|
|
that the current mode is stored in the trace with the current
|
|
|
|
directive -- that mode may be later restored (see below for some
|
|
|
|
examples). Moreover, the directive would be deemed invalid if its
|
|
|
|
current position in the line (that is, its offset) were not
|
|
|
|
preceeded by blanks or nothing, otherwise the rule [expr] is called
|
|
|
|
to scan the boolean expression associated with the "#if": if it
|
|
|
|
evaluates to [true], the result is [Copy], meaning that we may copy
|
|
|
|
what follows, otherwise skip it -- the actual decision depending on
|
|
|
|
the current mode. That new mode is used if we were in copy mode,
|
|
|
|
and the offset is reset to the start of a new line (as we read a
|
|
|
|
new line in [expr]); otherwise we were in skipping mode and the
|
|
|
|
value of the conditional expression must be ignored (but not its
|
|
|
|
syntax), and we continue skipping the input.
|
|
|
|
|
|
|
|
When an "#else" is matched, the trace is extended with [Else],
|
|
|
|
then, if the directive is not at a wrong offset, the rest of the
|
2020-03-23 22:43:06 +04:00
|
|
|
line is scanned with [skip_line]. If we were in copy mode, the new
|
2020-01-27 19:05:47 +04:00
|
|
|
mode toggles to skipping mode; otherwise, the trace is searched for
|
|
|
|
the last encountered "#if" of "#elif" and the associated mode is
|
|
|
|
restored.
|
|
|
|
|
|
|
|
The case "#elif" is the result of the fusion (in the technical
|
|
|
|
sense) of the code for dealing with an "#else" followed by an
|
|
|
|
"#if".
|
|
|
|
|
|
|
|
When an "#endif" is matched, the trace is reduced, that is, all
|
|
|
|
conditional directives are popped until an [If mode'] is found and
|
|
|
|
[mode'] is restored as the current mode.
|
|
|
|
|
|
|
|
Consider the following four cases, where the modes (Copy/Skip) are
|
|
|
|
located between the lines:
|
|
|
|
|
|
|
|
Copy ----+ Copy ----+
|
|
|
|
#if true | #if true |
|
|
|
|
Copy | Copy |
|
|
|
|
#else | #else |
|
|
|
|
+-- Skip --+ | +-- Skip --+ |
|
|
|
|
#if true | | | #if false | | |
|
|
|
|
| Skip | | | Skip | |
|
|
|
|
#else | | | #else | | |
|
|
|
|
+-> Skip | | +-> Skip | |
|
|
|
|
#endif | | #endif | |
|
|
|
|
Skip <-+ | Skip <-+ |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <---+ Copy <---+
|
|
|
|
|
|
|
|
|
|
|
|
+-- Copy ----+ Copy --+-+
|
|
|
|
#if false | | #if false | |
|
|
|
|
| Skip | Skip | |
|
|
|
|
#else | | #else | |
|
|
|
|
+-> Copy --+ | +-+-- Copy <-+ |
|
|
|
|
#if true | | #if false | | |
|
|
|
|
Copy | | | | Skip |
|
|
|
|
#else | | #else | | |
|
|
|
|
Skip | | | +-> Copy |
|
|
|
|
#endif | | #endif | |
|
|
|
|
Copy <-+ | +---> Copy |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <---+ Copy <---+
|
|
|
|
|
|
|
|
The following four cases feature #elif. Note that we put between
|
|
|
|
brackets the mode saved for the #elif, which is sometimes restored
|
|
|
|
later.
|
|
|
|
|
|
|
|
Copy --+ Copy --+
|
|
|
|
#if true | #if true |
|
|
|
|
Copy | Copy |
|
|
|
|
#elif true +--[Skip] | #elif false +--[Skip] |
|
|
|
|
| Skip | | Skip |
|
|
|
|
#else | | #else | |
|
|
|
|
+-> Skip | +-> Skip |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <-+ Copy <-+
|
|
|
|
|
|
|
|
|
|
|
|
+-- Copy --+-+ +-- Copy ----+
|
|
|
|
#if false | | | #if false | |
|
|
|
|
| Skip | | | Skip |
|
|
|
|
#elif true +->[Copy] | | #elif false +->[Copy]--+ |
|
|
|
|
Copy <-+ | Skip | |
|
|
|
|
#else | #else | |
|
|
|
|
Skip | Copy <-+ |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <---+ Copy <---+
|
|
|
|
|
|
|
|
Note how "#elif" indeed behaves like an "#else" followed by an
|
|
|
|
"#if", and the mode stored with the data constructor [Elif]
|
|
|
|
corresponds to the mode before the virtual "#if".
|
|
|
|
|
|
|
|
Important note: Comments and strings are recognised as such only in
|
|
|
|
copy mode, which is a different behaviour from the preprocessor of
|
|
|
|
GNU GCC, which always does.
|
2020-03-25 21:52:23 +04:00
|
|
|
*)
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
rule scan state = parse
|
2020-03-27 22:30:39 +04:00
|
|
|
nl { expand_offset state; proc_nl state lexbuf;
|
2020-03-23 22:43:06 +04:00
|
|
|
scan {state with offset = Prefix 0} lexbuf }
|
|
|
|
| blank { match state.offset with
|
2020-03-26 19:51:08 +04:00
|
|
|
Prefix n ->
|
|
|
|
scan {state with offset = Prefix (n+1)} lexbuf
|
|
|
|
| Inline ->
|
|
|
|
if state.mode = Copy then copy state lexbuf;
|
|
|
|
scan state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
| directive {
|
|
|
|
if not (List.mem id directives)
|
2020-03-25 00:05:39 +04:00
|
|
|
then fail (Invalid_directive id) state lexbuf;
|
2020-03-23 22:43:06 +04:00
|
|
|
if state.offset = Inline
|
2020-03-25 00:05:39 +04:00
|
|
|
then fail Directive_inside_line state lexbuf;
|
2020-03-24 20:47:24 +04:00
|
|
|
let region = mk_reg lexbuf in
|
2020-01-27 19:05:47 +04:00
|
|
|
match id with
|
|
|
|
"include" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
|
2020-03-25 21:52:23 +04:00
|
|
|
and file = Lexing.(lexbuf.lex_curr_p.pos_fname) in
|
|
|
|
let base = Filename.basename file
|
|
|
|
and reg, incl_file = scan_inclusion state lexbuf in
|
2020-03-27 22:30:39 +04:00
|
|
|
let incl_dir = Filename.dirname incl_file in
|
|
|
|
let path = mk_path state in
|
|
|
|
let incl_path, incl_chan =
|
|
|
|
match find path incl_file state.opt#libs with
|
|
|
|
Some p -> p
|
2020-03-25 21:52:23 +04:00
|
|
|
| None -> stop (File_not_found incl_file) state reg in
|
2020-03-26 22:32:48 +04:00
|
|
|
let incl_buf = Lexing.from_channel incl_chan in
|
|
|
|
let () =
|
|
|
|
let open Lexing in
|
|
|
|
incl_buf.lex_curr_p <-
|
|
|
|
{incl_buf.lex_curr_p with pos_fname = incl_file} in
|
2020-03-27 22:30:39 +04:00
|
|
|
let state = {state with incl = incl_chan::state.incl} in
|
2020-03-26 22:32:48 +04:00
|
|
|
let state' =
|
|
|
|
{state with env=Env.empty; mode=Copy; trace=[]} in
|
2020-03-27 22:30:39 +04:00
|
|
|
let state' = push_dir incl_dir state' in
|
|
|
|
print state (sprintf "\n# 1 \"%s\" 1\n" incl_path);
|
2020-03-26 22:32:48 +04:00
|
|
|
let state' = scan state' incl_buf in
|
2020-03-27 22:30:39 +04:00
|
|
|
let state = {state with incl = state'.incl} in
|
|
|
|
let path =
|
|
|
|
if path = "" then base
|
|
|
|
else path ^ Filename.dir_sep ^ base in
|
|
|
|
print state (sprintf "\n# %i \"%s\" 2" (line+1) path);
|
2020-03-23 22:43:06 +04:00
|
|
|
scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "if" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let mode = expr state lexbuf in
|
|
|
|
let mode = if state.mode = Copy then mode else Skip in
|
2020-03-24 20:47:24 +04:00
|
|
|
let trace = extend (If state.mode) state region in
|
2020-03-23 22:43:06 +04:00
|
|
|
let state = {state with mode; offset = Prefix 0; trace}
|
|
|
|
in scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "else" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let () = skip_line state lexbuf in
|
|
|
|
let mode = match state.mode with
|
|
|
|
Copy -> Skip
|
|
|
|
| Skip -> last_mode state.trace in
|
2020-03-24 20:47:24 +04:00
|
|
|
let trace = extend Else state region
|
2020-03-23 22:43:06 +04:00
|
|
|
in scan {state with mode; offset = Prefix 0; trace} lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "elif" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let mode = expr state lexbuf in
|
|
|
|
let trace, mode =
|
|
|
|
match state.mode with
|
2020-03-24 20:47:24 +04:00
|
|
|
Copy -> extend (Elif Skip) state region, Skip
|
2020-03-23 22:43:06 +04:00
|
|
|
| Skip -> let old_mode = last_mode state.trace
|
2020-03-24 20:47:24 +04:00
|
|
|
in extend (Elif old_mode) state region,
|
2020-03-23 22:43:06 +04:00
|
|
|
if old_mode = Copy then mode else Skip
|
|
|
|
in scan {state with mode; offset = Prefix 0; trace} lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "endif" ->
|
2020-03-25 21:52:23 +04:00
|
|
|
skip_line state lexbuf;
|
|
|
|
scan (reduce_cond state region) lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "define" ->
|
2020-03-24 20:47:24 +04:00
|
|
|
let id, region = variable state lexbuf in
|
2020-03-23 22:43:06 +04:00
|
|
|
if id="true" || id="false"
|
2020-03-25 00:05:39 +04:00
|
|
|
then stop (Reserved_symbol id) state region;
|
2020-03-23 22:43:06 +04:00
|
|
|
if Env.mem id state.env
|
2020-03-25 00:05:39 +04:00
|
|
|
then stop (Multiply_defined_symbol id) state region;
|
2020-03-23 22:43:06 +04:00
|
|
|
let state = {state with env = Env.add id state.env;
|
|
|
|
offset = Prefix 0}
|
|
|
|
in scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "undef" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let id, _ = variable state lexbuf in
|
|
|
|
let state = {state with env = Env.remove id state.env;
|
|
|
|
offset = Prefix 0}
|
|
|
|
in scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "error" ->
|
2020-03-25 00:05:39 +04:00
|
|
|
stop (Error_directive (message [] lexbuf)) state region
|
2020-01-27 19:05:47 +04:00
|
|
|
| "region" ->
|
|
|
|
let msg = message [] lexbuf
|
2020-03-23 22:43:06 +04:00
|
|
|
in expand_offset state;
|
|
|
|
print state ("#" ^ space ^ "region" ^ msg ^ "\n");
|
|
|
|
let state =
|
|
|
|
{state with offset = Prefix 0; trace=Region::state.trace}
|
|
|
|
in scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "endregion" ->
|
|
|
|
let msg = message [] lexbuf
|
2020-03-23 22:43:06 +04:00
|
|
|
in expand_offset state;
|
|
|
|
print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
|
2020-03-25 21:52:23 +04:00
|
|
|
scan (reduce_region state region) lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "line" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
expand_offset state;
|
|
|
|
print state ("#" ^ space ^ "line");
|
|
|
|
line_ind state lexbuf;
|
|
|
|
scan {state with offset = Prefix 0} lexbuf
|
2020-03-26 19:51:08 +04:00
|
|
|
(*
|
|
|
|
| "warning" ->
|
|
|
|
let start_p, end_p = region in
|
|
|
|
let msg = message [] lexbuf in
|
|
|
|
let open Lexing
|
|
|
|
in prerr_endline
|
|
|
|
("Warning at line " ^ string_of_int start_p.pos_lnum
|
|
|
|
^ ", char "
|
|
|
|
^ string_of_int (start_p.pos_cnum - start_p.pos_bol)
|
|
|
|
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
|
|
|
|
^ ":\n" ^ msg);
|
|
|
|
scan env mode (Prefix 0) trace lexbuf
|
|
|
|
*)
|
2020-01-27 19:05:47 +04:00
|
|
|
| _ -> assert false
|
|
|
|
}
|
2020-03-23 22:43:06 +04:00
|
|
|
| eof { match state.trace with
|
|
|
|
[] -> expand_offset state; state
|
2020-03-25 00:05:39 +04:00
|
|
|
| _ -> fail Missing_endif state lexbuf }
|
2020-03-23 22:43:06 +04:00
|
|
|
| '"' { if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
expand_offset state;
|
|
|
|
copy state lexbuf;
|
|
|
|
in_string (mk_reg lexbuf) state lexbuf
|
|
|
|
end;
|
|
|
|
scan {state with offset=Inline} lexbuf }
|
|
|
|
| "//" { if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
expand_offset state;
|
|
|
|
copy state lexbuf;
|
|
|
|
in_line_com state lexbuf
|
|
|
|
end;
|
|
|
|
scan {state with offset=Inline} lexbuf }
|
|
|
|
| "/*" { if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
expand_offset state;
|
|
|
|
copy state lexbuf;
|
2020-03-25 21:52:23 +04:00
|
|
|
if state.opt#lang = EvalOpt.ReasonLIGO then
|
|
|
|
reasonLIGO_com (mk_reg lexbuf) state lexbuf
|
|
|
|
end;
|
|
|
|
scan {state with offset=Inline} lexbuf }
|
|
|
|
| "(*" { if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
expand_offset state;
|
|
|
|
copy state lexbuf;
|
|
|
|
if state.opt#lang = EvalOpt.CameLIGO
|
|
|
|
|| state.opt#lang = EvalOpt.PascaLIGO then
|
|
|
|
cameLIGO_com (mk_reg lexbuf) state lexbuf
|
2020-03-23 22:43:06 +04:00
|
|
|
end;
|
|
|
|
scan {state with offset=Inline} lexbuf }
|
|
|
|
| _ { if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
expand_offset state;
|
|
|
|
copy state lexbuf
|
|
|
|
end;
|
|
|
|
scan {state with offset=Inline} lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Support for #define and #undef *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and variable state = parse
|
2020-03-24 20:47:24 +04:00
|
|
|
blank+ { let id = symbol state lexbuf
|
2020-03-23 22:43:06 +04:00
|
|
|
in skip_line state lexbuf; id }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
and symbol state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
ident as id { id, mk_reg lexbuf }
|
2020-03-25 00:05:39 +04:00
|
|
|
| _ { fail Invalid_symbol state lexbuf }
|
2020-03-24 20:47:24 +04:00
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Line indicator (#line) *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and line_ind state = parse
|
|
|
|
blank* { copy state lexbuf; line_indicator state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and line_indicator state = parse
|
2020-03-24 20:47:24 +04:00
|
|
|
natural { copy state lexbuf; end_indicator state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
| ident as id {
|
|
|
|
match id with
|
|
|
|
"default" | "hidden" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
print state (id ^ message [] lexbuf)
|
2020-03-25 21:52:23 +04:00
|
|
|
| _ -> fail (Invalid_line_indicator id) state lexbuf }
|
|
|
|
| _ { fail No_line_indicator state lexbuf }
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
and end_indicator state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
blank+ { copy state lexbuf; end_indicator state lexbuf }
|
|
|
|
| nl { proc_nl state lexbuf }
|
|
|
|
| eof { copy state lexbuf }
|
2020-03-23 22:43:06 +04:00
|
|
|
| "//" { copy state lexbuf;
|
2020-03-25 21:52:23 +04:00
|
|
|
print state (message [] lexbuf ^ "\n") }
|
2020-03-23 22:43:06 +04:00
|
|
|
| '"' { copy state lexbuf;
|
|
|
|
in_string (mk_reg lexbuf) state lexbuf;
|
2020-03-25 21:52:23 +04:00
|
|
|
opt_line_com state lexbuf }
|
|
|
|
| _ { fail End_line_indicator state lexbuf }
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
and opt_line_com state = parse
|
|
|
|
nl { proc_nl state lexbuf }
|
|
|
|
| eof { copy state lexbuf }
|
|
|
|
| blank+ { copy state lexbuf; opt_line_com state lexbuf }
|
|
|
|
| "//" { print state ("//" ^ message [] lexbuf) }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* New lines and verbatim sequence of characters *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and skip_line state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
nl { proc_nl state lexbuf }
|
|
|
|
| blank+ { skip_line state lexbuf }
|
|
|
|
| "//" { in_line_com {state with mode=Skip} lexbuf }
|
2020-03-25 00:05:39 +04:00
|
|
|
| _ { fail No_line_comment_or_blank state lexbuf }
|
2020-03-25 21:52:23 +04:00
|
|
|
| eof { () }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
and message acc = parse
|
|
|
|
nl { Lexing.new_line lexbuf;
|
|
|
|
mk_str (List.length acc) acc }
|
|
|
|
| eof { mk_str (List.length acc) acc }
|
2020-03-23 22:43:06 +04:00
|
|
|
| _ as c { message (c::acc) lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Comments *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and in_line_com state = parse
|
|
|
|
nl { proc_nl state lexbuf }
|
|
|
|
| eof { () }
|
|
|
|
| _ { if state.mode = Copy then copy state lexbuf;
|
|
|
|
in_line_com state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
and reasonLIGO_com opening state = parse
|
|
|
|
nl { proc_nl state lexbuf; reasonLIGO_com opening state lexbuf }
|
2020-03-26 19:51:08 +04:00
|
|
|
| "*/" { copy state lexbuf }
|
|
|
|
| eof { stop Open_comment state opening }
|
2020-03-25 21:52:23 +04:00
|
|
|
| _ { copy state lexbuf; reasonLIGO_com opening state lexbuf }
|
|
|
|
|
|
|
|
and cameLIGO_com opening state = parse
|
|
|
|
nl { proc_nl state lexbuf; cameLIGO_com opening state lexbuf }
|
|
|
|
| "*)" { copy state lexbuf }
|
|
|
|
| eof { stop Open_comment state opening }
|
|
|
|
| _ { copy state lexbuf; cameLIGO_com opening state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Included filename *)
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
and scan_inclusion state = parse
|
|
|
|
blank+ { scan_inclusion state lexbuf }
|
|
|
|
| '"' { in_inclusion (mk_reg lexbuf) [] 0 state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
and in_inclusion opening acc len state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
'"' { let closing = mk_reg lexbuf
|
|
|
|
in Region.cover opening closing,
|
|
|
|
mk_str len acc }
|
|
|
|
| nl { fail Newline_in_string state lexbuf }
|
|
|
|
| eof { stop Open_string state opening }
|
2020-03-24 20:47:24 +04:00
|
|
|
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Strings *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and in_string opening state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
|
|
|
|
| '"' { copy state lexbuf }
|
|
|
|
| nl { fail Newline_in_string state lexbuf }
|
|
|
|
| eof { stop Open_string state opening }
|
|
|
|
| _ { copy state lexbuf; in_string opening state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
|
|
|
|
{
|
2020-03-24 20:47:24 +04:00
|
|
|
(* START OF TRAILER *)
|
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
(* The function [lex] is a wrapper of [scan], which also checks that
|
2020-03-24 20:47:24 +04:00
|
|
|
the trace is empty at the end. Note that we discard the state at
|
|
|
|
the end. *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-26 19:51:08 +04:00
|
|
|
let lex opt buffer =
|
2020-03-23 22:43:06 +04:00
|
|
|
let state = {
|
|
|
|
env = Env.empty;
|
|
|
|
mode = Copy;
|
|
|
|
offset = Prefix 0;
|
|
|
|
trace = [];
|
|
|
|
out = Buffer.create 80;
|
2020-03-25 21:52:23 +04:00
|
|
|
incl = [];
|
2020-03-27 22:30:39 +04:00
|
|
|
opt;
|
|
|
|
dir = []
|
2020-03-23 22:43:06 +04:00
|
|
|
} in
|
2020-03-26 19:51:08 +04:00
|
|
|
match scan state buffer with
|
|
|
|
state -> List.iter close_in state.incl;
|
|
|
|
Stdlib.Ok state.out
|
|
|
|
| exception Error e -> Stdlib.Error e
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* END OF TRAILER *)
|
2020-01-27 19:05:47 +04:00
|
|
|
}
|