2020-03-23 22:43:06 +04:00
|
|
|
(* Simple preprocessor based on C#, to be processed by [ocamllex]. *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
{
|
2020-03-24 20:47:24 +04:00
|
|
|
(* START OF HEADER *)
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
module Region = Simple_utils.Region
|
2020-03-24 21:52:00 +04:00
|
|
|
module Pos = Simple_utils.Pos
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
let sprintf = Printf.sprintf
|
|
|
|
|
2020-04-01 21:22:46 +04:00
|
|
|
(* Rolling back one lexeme _within the current semantic action_ *)
|
|
|
|
|
|
|
|
let rollback buffer =
|
|
|
|
let open Lexing in
|
|
|
|
let len = String.length (lexeme buffer) in
|
|
|
|
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
|
|
|
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
|
|
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
(* STRING PROCESSING *)
|
|
|
|
|
|
|
|
(* The value of [mk_str len p] ("make string") is a string of length
|
|
|
|
[len] containing the [len] characters in the list [p], in reverse
|
|
|
|
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
|
|
|
|
|
|
|
|
let mk_str (len: int) (p: char list) : string =
|
|
|
|
let () = assert (len = List.length p) in
|
|
|
|
let bytes = Bytes.make len ' ' in
|
|
|
|
let rec fill i = function
|
|
|
|
[] -> bytes
|
|
|
|
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
|
|
|
in fill (len-1) p |> Bytes.to_string
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* The type [mode] defines the two scanning modes of the preprocessor:
|
|
|
|
either we copy the current characters or we skip them. *)
|
|
|
|
|
|
|
|
type mode = Copy | Skip
|
|
|
|
|
|
|
|
(* Trace of directives
|
|
|
|
|
|
|
|
We keep track of directives #if, #elif, #else, #region and #endregion.
|
|
|
|
*)
|
|
|
|
|
|
|
|
type cond = If of mode | Elif of mode | Else | Region
|
|
|
|
type trace = cond list
|
|
|
|
|
|
|
|
(* Environments *)
|
|
|
|
|
|
|
|
module Env = Set.Make (String)
|
|
|
|
|
|
|
|
let rec eval env =
|
|
|
|
let open E_AST
|
|
|
|
in function
|
|
|
|
Or (e1,e2) -> eval env e1 || eval env e2
|
|
|
|
| And (e1,e2) -> eval env e1 && eval env e2
|
|
|
|
| Eq (e1,e2) -> eval env e1 = eval env e2
|
|
|
|
| Neq (e1,e2) -> eval env e1 != eval env e2
|
|
|
|
| Not e -> not (eval env e)
|
|
|
|
| True -> true
|
|
|
|
| False -> false
|
|
|
|
| Ident id -> Env.mem id env
|
|
|
|
|
|
|
|
(* The type [state] groups the information that needs to be threaded
|
2020-03-27 22:30:39 +04:00
|
|
|
along the scanning functions:
|
|
|
|
* the field [env] records the symbols defined;
|
|
|
|
* the field [mode] informs whether the preprocessor is in copying or
|
|
|
|
skipping mode;
|
|
|
|
* the field [trace] is a stack of previous, still active conditional
|
|
|
|
directives;
|
|
|
|
* the field [out] keeps the output buffer;
|
|
|
|
* the field [incl] is a list of opened input channels (#include);
|
|
|
|
* the field [opt] holds the CLI options;
|
|
|
|
* the field [dir] is the file system's path to the the current input
|
|
|
|
file.
|
|
|
|
*)
|
2020-03-24 20:47:24 +04:00
|
|
|
|
|
|
|
type state = {
|
2020-04-08 22:24:34 +04:00
|
|
|
env : Env.t;
|
|
|
|
mode : mode;
|
|
|
|
trace : trace;
|
|
|
|
out : Buffer.t;
|
|
|
|
incl : in_channel list;
|
|
|
|
opt : EvalOpt.options;
|
2020-04-09 18:18:26 +04:00
|
|
|
dir : string list
|
2020-03-24 20:47:24 +04:00
|
|
|
}
|
|
|
|
|
2020-03-27 22:30:39 +04:00
|
|
|
(* Directories *)
|
|
|
|
|
|
|
|
let push_dir dir state =
|
|
|
|
if dir = "." then state else {state with dir = dir :: state.dir}
|
|
|
|
|
|
|
|
let mk_path state =
|
|
|
|
String.concat Filename.dir_sep (List.rev state.dir)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
(* ERRORS *)
|
|
|
|
|
2020-03-25 00:05:39 +04:00
|
|
|
type error =
|
2020-04-03 21:06:35 +04:00
|
|
|
Directive_inside_line
|
2020-03-25 00:05:39 +04:00
|
|
|
| Missing_endif
|
|
|
|
| Invalid_line_indicator of string
|
|
|
|
| No_line_indicator
|
|
|
|
| End_line_indicator
|
|
|
|
| Newline_in_string
|
2020-04-24 22:54:13 +04:00
|
|
|
| Unterminated_string
|
2020-03-25 00:05:39 +04:00
|
|
|
| Dangling_endif
|
|
|
|
| Open_region_in_conditional
|
|
|
|
| Dangling_endregion
|
|
|
|
| Conditional_in_region
|
|
|
|
| If_follows_elif
|
|
|
|
| Else_follows_else
|
|
|
|
| Dangling_else
|
|
|
|
| Elif_follows_else
|
|
|
|
| Dangling_elif
|
|
|
|
| Reserved_symbol of string
|
|
|
|
| Multiply_defined_symbol of string
|
|
|
|
| Error_directive of string
|
|
|
|
| Parse_error
|
|
|
|
| Invalid_symbol
|
2020-03-25 21:52:23 +04:00
|
|
|
| File_not_found of string
|
2020-03-26 19:51:08 +04:00
|
|
|
| Invalid_character of char
|
2020-04-24 22:54:13 +04:00
|
|
|
| Unterminated_comment of string
|
2020-05-15 23:23:49 +04:00
|
|
|
| Unterminated_inclusion
|
2020-03-25 00:05:39 +04:00
|
|
|
|
|
|
|
let error_to_string = function
|
2020-04-03 21:06:35 +04:00
|
|
|
Directive_inside_line ->
|
2020-03-25 00:05:39 +04:00
|
|
|
sprintf "Directive inside a line."
|
|
|
|
| Missing_endif ->
|
|
|
|
sprintf "Missing #endif directive."
|
|
|
|
| Invalid_line_indicator id ->
|
|
|
|
sprintf "Invalid line indicator \"%s\".\n\
|
|
|
|
Hint: Try \"default\" or \"hidden\"." id
|
|
|
|
| No_line_indicator ->
|
|
|
|
sprintf "Missing line indicator."
|
|
|
|
| End_line_indicator ->
|
|
|
|
sprintf "Invalid ending of numerical line indicator.\n\
|
|
|
|
Hint: Try a string, end of line, or a line comment."
|
|
|
|
| Newline_in_string ->
|
|
|
|
sprintf "Invalid newline character in string."
|
2020-04-24 22:54:13 +04:00
|
|
|
| Unterminated_string ->
|
2020-03-25 00:05:39 +04:00
|
|
|
sprintf "Unterminated string.\n\
|
|
|
|
Hint: Close with double quotes."
|
|
|
|
| Dangling_endif ->
|
|
|
|
sprintf "Dangling #endif directive.\n\
|
|
|
|
Hint: Remove it or add a #if before."
|
|
|
|
| Open_region_in_conditional ->
|
|
|
|
sprintf "Unterminated of #region in conditional.\n\
|
|
|
|
Hint: Close with #endregion before #endif."
|
|
|
|
| Dangling_endregion ->
|
|
|
|
sprintf "Dangling #endregion directive.\n\
|
|
|
|
Hint: Remove it or use #region before."
|
|
|
|
| Conditional_in_region ->
|
|
|
|
sprintf "Conditional in region.\n\
|
|
|
|
Hint: Remove the conditional or the region."
|
|
|
|
| If_follows_elif ->
|
|
|
|
sprintf "Directive #if found in a clause #elif."
|
|
|
|
| Else_follows_else ->
|
|
|
|
sprintf "Directive #else found in a clause #else."
|
|
|
|
| Dangling_else ->
|
|
|
|
sprintf "Directive #else without #if."
|
|
|
|
| Elif_follows_else ->
|
|
|
|
sprintf "Directive #elif found in a clause #else."
|
|
|
|
| Dangling_elif ->
|
|
|
|
sprintf "Dangling #elif directive.\n\
|
|
|
|
Hint: Remove it or add a #if before."
|
|
|
|
| Reserved_symbol sym ->
|
|
|
|
sprintf "Reserved symbol \"%s\".\n\
|
|
|
|
Hint: Use another symbol." sym
|
|
|
|
| Multiply_defined_symbol sym ->
|
|
|
|
sprintf "Multiply-defined symbol \"%s\".\n\
|
|
|
|
Hint: Change the name or remove one definition." sym
|
|
|
|
| Error_directive msg ->
|
|
|
|
msg
|
|
|
|
| Parse_error ->
|
|
|
|
"Parse error in expression."
|
|
|
|
| Invalid_symbol ->
|
|
|
|
"Expected a symbol (identifier)."
|
2020-03-25 21:52:23 +04:00
|
|
|
| File_not_found name ->
|
|
|
|
sprintf "File \"%s\" to include not found." name
|
2020-03-26 19:51:08 +04:00
|
|
|
| Invalid_character c ->
|
|
|
|
E_Lexer.error_to_string (E_Lexer.Invalid_character c)
|
2020-04-24 22:54:13 +04:00
|
|
|
| Unterminated_comment ending ->
|
2020-05-15 23:23:49 +04:00
|
|
|
sprintf "Unterminated comment.\n\
|
|
|
|
Hint: Close with \"%s\"." ending
|
|
|
|
| Unterminated_inclusion ->
|
|
|
|
sprintf "Unterminated #include directive.\n\
|
|
|
|
Hint: Add as a string the name of the file to be included."
|
2020-03-25 00:05:39 +04:00
|
|
|
|
|
|
|
let format ?(offsets=true) Region.{region; value} ~file =
|
|
|
|
let msg = error_to_string value
|
|
|
|
and reg = region#to_string ~file ~offsets `Byte in
|
2020-04-09 20:41:55 +04:00
|
|
|
let value = sprintf "Preprocessing error %s:\n%s" reg msg
|
2020-03-25 00:05:39 +04:00
|
|
|
in Region.{value; region}
|
|
|
|
|
2020-03-26 19:51:08 +04:00
|
|
|
exception Error of (Buffer.t * error Region.reg)
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
let mk_reg buffer =
|
|
|
|
let start = Lexing.lexeme_start_p buffer |> Pos.from_byte
|
|
|
|
and stop = Lexing.lexeme_end_p buffer |> Pos.from_byte
|
|
|
|
in Region.make ~start ~stop
|
|
|
|
|
2020-03-26 19:51:08 +04:00
|
|
|
(* IMPORTANT : Make sure the function [stop] remains the only one
|
|
|
|
raising [Error]. *)
|
|
|
|
|
|
|
|
let stop value state region =
|
|
|
|
List.iter close_in state.incl;
|
|
|
|
raise (Error (state.out, Region.{region; value}))
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let fail error state buffer = stop error state (mk_reg buffer)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* The function [reduce_cond] is called when a #endif directive is
|
|
|
|
found, and the trace (see type [trace] above) needs updating. *)
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let reduce_cond state region =
|
|
|
|
let rec reduce = function
|
2020-03-25 00:05:39 +04:00
|
|
|
[] -> stop Dangling_endif state region
|
2020-04-24 22:54:13 +04:00
|
|
|
| If mode::trace -> {state with mode; trace}
|
2020-03-25 00:05:39 +04:00
|
|
|
| Region::_ -> stop Open_region_in_conditional state region
|
2020-03-24 20:47:24 +04:00
|
|
|
| _::trace -> reduce trace
|
|
|
|
in reduce state.trace
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
(* The function [reduce_region] is called when a #endregion directive is
|
2020-01-27 19:05:47 +04:00
|
|
|
read, and the trace needs updating. *)
|
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
let reduce_region state region =
|
2020-03-24 20:47:24 +04:00
|
|
|
match state.trace with
|
2020-03-25 00:05:39 +04:00
|
|
|
[] -> stop Dangling_endregion state region
|
2020-04-24 22:54:13 +04:00
|
|
|
| Region::trace -> {state with trace}
|
2020-03-25 00:05:39 +04:00
|
|
|
| _ -> stop Conditional_in_region state region
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* The function [extend] is called when encountering conditional
|
|
|
|
directives #if, #else and #elif. As its name suggests, it extends
|
|
|
|
the current trace with the current conditional directive, whilst
|
|
|
|
performing some validity checks. *)
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let extend cond state region =
|
|
|
|
match cond, state.trace with
|
2020-03-25 00:05:39 +04:00
|
|
|
If _, Elif _::_ -> stop If_follows_elif state region
|
|
|
|
| Else, Else::_ -> stop Else_follows_else state region
|
|
|
|
| Else, [] -> stop Dangling_else state region
|
|
|
|
| Elif _, Else::_ -> stop Elif_follows_else state region
|
|
|
|
| Elif _, [] -> stop Dangling_elif state region
|
2020-03-24 20:47:24 +04:00
|
|
|
| hd, tl -> hd::tl
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* The function [last_mode] seeks the last mode as recorded in the
|
|
|
|
trace (see type [trace] above). *)
|
|
|
|
|
|
|
|
let rec last_mode = function
|
|
|
|
[] -> assert false
|
|
|
|
| (If mode | Elif mode)::_ -> mode
|
|
|
|
| _::trace -> last_mode trace
|
|
|
|
|
2020-03-25 21:52:23 +04:00
|
|
|
(* Finding a file to #include *)
|
|
|
|
|
|
|
|
let rec find base = function
|
|
|
|
[] -> None
|
|
|
|
| dir::dirs ->
|
2020-03-27 22:30:39 +04:00
|
|
|
let path =
|
|
|
|
if dir = "." || dir = "" then base
|
|
|
|
else dir ^ Filename.dir_sep ^ base in
|
|
|
|
try Some (path, open_in path) with
|
2020-03-25 21:52:23 +04:00
|
|
|
Sys_error _ -> find base dirs
|
|
|
|
|
2020-03-27 22:30:39 +04:00
|
|
|
let find dir file libs =
|
|
|
|
let path =
|
|
|
|
if dir = "." || dir = "" then file
|
|
|
|
else dir ^ Filename.dir_sep ^ file in
|
|
|
|
try Some (path, open_in path) with
|
2020-03-25 21:52:23 +04:00
|
|
|
Sys_error _ ->
|
|
|
|
let base = Filename.basename file in
|
|
|
|
if base = file then find file libs else None
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* PRINTING *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* Copying the current lexeme to [stdout] *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-04-24 22:54:13 +04:00
|
|
|
(* End of lines are always copied *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* Copying a string *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let print state string = Buffer.add_string state.out string
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
(* Evaluating a preprocessor expression
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
The evaluation of conditional directives may involve symbols whose
|
|
|
|
value may be defined using #define directives, or undefined by
|
|
|
|
means of #undef. Therefore, we need to evaluate conditional
|
|
|
|
expressions in an environment made of a set of defined symbols.
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
Note that we rely on an external lexer and parser for the
|
|
|
|
conditional expressions. See modules [E_Lexer] and [E_Parser].
|
|
|
|
*)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let expr state buffer : mode =
|
2020-03-23 22:43:06 +04:00
|
|
|
let ast =
|
|
|
|
try E_Parser.expr E_Lexer.scan buffer with
|
2020-03-26 19:51:08 +04:00
|
|
|
E_Lexer.Error Region.{value; region} ->
|
|
|
|
(match value with
|
|
|
|
E_Lexer.Invalid_character c ->
|
|
|
|
stop (Invalid_character c) state region)
|
|
|
|
| E_Parser.Error ->
|
|
|
|
fail Parse_error state buffer in
|
2020-03-24 20:47:24 +04:00
|
|
|
let () = print state "\n" in
|
|
|
|
if eval state.env ast then Copy else Skip
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* DIRECTIVES *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
let directives = [
|
2020-03-25 21:52:23 +04:00
|
|
|
"define"; "elif"; "else"; "endif"; "endregion"; "error";
|
2020-04-07 20:33:46 +04:00
|
|
|
"if"; "include"; (*"line";*) "region"; "undef" (* "; warning" *)
|
2020-03-25 21:52:23 +04:00
|
|
|
]
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
(* END OF HEADER *)
|
|
|
|
}
|
|
|
|
|
|
|
|
(* REGULAR EXPRESSIONS *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
let nl = '\n' | '\r' | "\r\n"
|
|
|
|
let blank = ' ' | '\t'
|
|
|
|
let digit = ['0'-'9']
|
|
|
|
let natural = digit | digit (digit | '_')* digit
|
|
|
|
let small = ['a'-'z']
|
|
|
|
let capital = ['A'-'Z']
|
|
|
|
let letter = small | capital
|
2020-03-24 20:47:24 +04:00
|
|
|
let ident = letter (letter | '_' | digit)*
|
|
|
|
let directive = '#' (blank* as space) (small+ as id)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-04-24 22:54:13 +04:00
|
|
|
(* Comments *)
|
|
|
|
|
|
|
|
let pascaligo_block_comment_opening = "(*"
|
|
|
|
let pascaligo_block_comment_closing = "*)"
|
|
|
|
let pascaligo_line_comment = "//"
|
|
|
|
|
|
|
|
let cameligo_block_comment_opening = "(*"
|
|
|
|
let cameligo_block_comment_closing = "*)"
|
|
|
|
let cameligo_line_comment = "//"
|
|
|
|
|
|
|
|
let reasonligo_block_comment_opening = "/*"
|
|
|
|
let reasonligo_block_comment_closing = "*/"
|
|
|
|
let reasonligo_line_comment = "//"
|
|
|
|
|
|
|
|
let block_comment_openings =
|
|
|
|
pascaligo_block_comment_opening
|
|
|
|
| cameligo_block_comment_opening
|
|
|
|
| reasonligo_block_comment_opening
|
|
|
|
|
|
|
|
let block_comment_closings =
|
|
|
|
pascaligo_block_comment_closing
|
|
|
|
| cameligo_block_comment_closing
|
|
|
|
| reasonligo_block_comment_closing
|
|
|
|
|
|
|
|
let line_comments =
|
|
|
|
pascaligo_line_comment
|
|
|
|
| cameligo_line_comment
|
|
|
|
| reasonligo_line_comment
|
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
(* Rules *)
|
|
|
|
|
|
|
|
(* The rule [scan] scans the input buffer for directives, strings,
|
|
|
|
comments, blanks, new lines and end of file characters. As a
|
|
|
|
result, either the matched input is copied to [stdout] or not,
|
|
|
|
depending on the compilation directives. If not copied, new line
|
|
|
|
characters are output.
|
|
|
|
|
2020-04-24 22:54:13 +04:00
|
|
|
Scanning is triggered by the function call [scan env mode trace
|
|
|
|
lexbuf], where [env] is the set of defined symbols (introduced by
|
|
|
|
`#define'), [mode] specifies whether we are copying or skipping the
|
|
|
|
input, and [trace] is the stack of conditional directives read so
|
|
|
|
far.
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-04-24 22:54:13 +04:00
|
|
|
The first call is [scan {env=Env.empty; mode=Copy; trace=[];
|
|
|
|
incl=[]; opt}], meaning that we start with an empty environment,
|
|
|
|
that copying the input is enabled by default, and that we are at
|
|
|
|
the start of a line and no previous conditional directives have
|
|
|
|
been read yet. The field [opt] is the CLI options.
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
When an "#if" is matched, the trace is extended by the call [extend
|
|
|
|
lexbuf (If mode) trace], during the evaluation of which the
|
|
|
|
syntactic validity of having encountered an "#if" is checked (for
|
|
|
|
example, it would be invalid had an "#elif" been last read). Note
|
|
|
|
that the current mode is stored in the trace with the current
|
|
|
|
directive -- that mode may be later restored (see below for some
|
|
|
|
examples). Moreover, the directive would be deemed invalid if its
|
|
|
|
current position in the line (that is, its offset) were not
|
|
|
|
preceeded by blanks or nothing, otherwise the rule [expr] is called
|
|
|
|
to scan the boolean expression associated with the "#if": if it
|
|
|
|
evaluates to [true], the result is [Copy], meaning that we may copy
|
|
|
|
what follows, otherwise skip it -- the actual decision depending on
|
|
|
|
the current mode. That new mode is used if we were in copy mode,
|
|
|
|
and the offset is reset to the start of a new line (as we read a
|
|
|
|
new line in [expr]); otherwise we were in skipping mode and the
|
|
|
|
value of the conditional expression must be ignored (but not its
|
|
|
|
syntax), and we continue skipping the input.
|
|
|
|
|
2020-04-24 22:54:13 +04:00
|
|
|
When an "#else" is matched, the trace is extended with [Else], then
|
|
|
|
the rest of the line is scanned with [skip_line]. If we were in
|
|
|
|
copy mode, the new mode toggles to skipping mode; otherwise, the
|
|
|
|
trace is searched for the last encountered "#if" of "#elif" and the
|
|
|
|
associated mode is restored.
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
The case "#elif" is the result of the fusion (in the technical
|
|
|
|
sense) of the code for dealing with an "#else" followed by an
|
|
|
|
"#if".
|
|
|
|
|
|
|
|
When an "#endif" is matched, the trace is reduced, that is, all
|
|
|
|
conditional directives are popped until an [If mode'] is found and
|
|
|
|
[mode'] is restored as the current mode.
|
|
|
|
|
|
|
|
Consider the following four cases, where the modes (Copy/Skip) are
|
|
|
|
located between the lines:
|
|
|
|
|
|
|
|
Copy ----+ Copy ----+
|
|
|
|
#if true | #if true |
|
|
|
|
Copy | Copy |
|
|
|
|
#else | #else |
|
|
|
|
+-- Skip --+ | +-- Skip --+ |
|
|
|
|
#if true | | | #if false | | |
|
|
|
|
| Skip | | | Skip | |
|
|
|
|
#else | | | #else | | |
|
|
|
|
+-> Skip | | +-> Skip | |
|
|
|
|
#endif | | #endif | |
|
|
|
|
Skip <-+ | Skip <-+ |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <---+ Copy <---+
|
|
|
|
|
|
|
|
|
|
|
|
+-- Copy ----+ Copy --+-+
|
|
|
|
#if false | | #if false | |
|
|
|
|
| Skip | Skip | |
|
|
|
|
#else | | #else | |
|
|
|
|
+-> Copy --+ | +-+-- Copy <-+ |
|
|
|
|
#if true | | #if false | | |
|
|
|
|
Copy | | | | Skip |
|
|
|
|
#else | | #else | | |
|
|
|
|
Skip | | | +-> Copy |
|
|
|
|
#endif | | #endif | |
|
|
|
|
Copy <-+ | +---> Copy |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <---+ Copy <---+
|
|
|
|
|
|
|
|
The following four cases feature #elif. Note that we put between
|
|
|
|
brackets the mode saved for the #elif, which is sometimes restored
|
|
|
|
later.
|
|
|
|
|
|
|
|
Copy --+ Copy --+
|
|
|
|
#if true | #if true |
|
|
|
|
Copy | Copy |
|
|
|
|
#elif true +--[Skip] | #elif false +--[Skip] |
|
|
|
|
| Skip | | Skip |
|
|
|
|
#else | | #else | |
|
|
|
|
+-> Skip | +-> Skip |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <-+ Copy <-+
|
|
|
|
|
|
|
|
|
|
|
|
+-- Copy --+-+ +-- Copy ----+
|
|
|
|
#if false | | | #if false | |
|
|
|
|
| Skip | | | Skip |
|
|
|
|
#elif true +->[Copy] | | #elif false +->[Copy]--+ |
|
|
|
|
Copy <-+ | Skip | |
|
|
|
|
#else | #else | |
|
|
|
|
Skip | Copy <-+ |
|
|
|
|
#endif | #endif |
|
|
|
|
Copy <---+ Copy <---+
|
|
|
|
|
|
|
|
Note how "#elif" indeed behaves like an "#else" followed by an
|
|
|
|
"#if", and the mode stored with the data constructor [Elif]
|
|
|
|
corresponds to the mode before the virtual "#if".
|
|
|
|
|
|
|
|
Important note: Comments and strings are recognised as such only in
|
|
|
|
copy mode, which is a different behaviour from the preprocessor of
|
|
|
|
GNU GCC, which always does.
|
2020-04-24 22:54:13 +04:00
|
|
|
*)
|
2020-03-23 22:43:06 +04:00
|
|
|
|
|
|
|
rule scan state = parse
|
2020-04-24 22:54:13 +04:00
|
|
|
nl { proc_nl state lexbuf; scan state lexbuf }
|
|
|
|
| blank { if state.mode = Copy then copy state lexbuf;
|
|
|
|
scan state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
| directive {
|
2020-04-24 22:54:13 +04:00
|
|
|
let region = mk_reg lexbuf in
|
2020-01-27 19:05:47 +04:00
|
|
|
if not (List.mem id directives)
|
2020-04-03 21:06:35 +04:00
|
|
|
then begin
|
|
|
|
if state.mode = Copy then copy state lexbuf;
|
|
|
|
scan state lexbuf
|
|
|
|
end
|
|
|
|
else
|
2020-04-24 22:54:13 +04:00
|
|
|
if region#start#offset `Byte > 0
|
2020-04-03 21:06:35 +04:00
|
|
|
then fail Directive_inside_line state lexbuf
|
|
|
|
else
|
2020-01-27 19:05:47 +04:00
|
|
|
match id with
|
|
|
|
"include" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
|
2020-03-25 21:52:23 +04:00
|
|
|
and file = Lexing.(lexbuf.lex_curr_p.pos_fname) in
|
|
|
|
let base = Filename.basename file
|
|
|
|
and reg, incl_file = scan_inclusion state lexbuf in
|
2020-03-27 22:30:39 +04:00
|
|
|
let incl_dir = Filename.dirname incl_file in
|
|
|
|
let path = mk_path state in
|
|
|
|
let incl_path, incl_chan =
|
|
|
|
match find path incl_file state.opt#libs with
|
|
|
|
Some p -> p
|
2020-03-25 21:52:23 +04:00
|
|
|
| None -> stop (File_not_found incl_file) state reg in
|
2020-04-09 20:41:55 +04:00
|
|
|
let () = print state (sprintf "\n# 1 \"%s\" 1\n" incl_path) in
|
2020-03-26 22:32:48 +04:00
|
|
|
let incl_buf = Lexing.from_channel incl_chan in
|
|
|
|
let () =
|
|
|
|
let open Lexing in
|
|
|
|
incl_buf.lex_curr_p <-
|
|
|
|
{incl_buf.lex_curr_p with pos_fname = incl_file} in
|
2020-03-27 22:30:39 +04:00
|
|
|
let state = {state with incl = incl_chan::state.incl} in
|
2020-04-16 18:56:11 +04:00
|
|
|
let state' = {state with mode=Copy; trace=[]} in
|
2020-04-09 20:41:55 +04:00
|
|
|
let state' = scan (push_dir incl_dir state') incl_buf in
|
2020-04-16 18:56:11 +04:00
|
|
|
let state = {state with env=state'.env; incl=state'.incl} in
|
|
|
|
let path = if path = "" then base
|
|
|
|
else path ^ Filename.dir_sep ^ base in
|
2020-03-27 22:30:39 +04:00
|
|
|
print state (sprintf "\n# %i \"%s\" 2" (line+1) path);
|
2020-03-23 22:43:06 +04:00
|
|
|
scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "if" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let mode = expr state lexbuf in
|
|
|
|
let mode = if state.mode = Copy then mode else Skip in
|
2020-03-24 20:47:24 +04:00
|
|
|
let trace = extend (If state.mode) state region in
|
2020-04-24 22:54:13 +04:00
|
|
|
let state = {state with mode; trace}
|
2020-03-23 22:43:06 +04:00
|
|
|
in scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "else" ->
|
2020-04-24 22:54:13 +04:00
|
|
|
let () = skip_line state lexbuf in
|
|
|
|
let mode = match state.mode with
|
|
|
|
Copy -> Skip
|
|
|
|
| Skip -> last_mode state.trace in
|
2020-03-24 20:47:24 +04:00
|
|
|
let trace = extend Else state region
|
2020-04-24 22:54:13 +04:00
|
|
|
in scan {state with mode; trace} lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "elif" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let mode = expr state lexbuf in
|
|
|
|
let trace, mode =
|
|
|
|
match state.mode with
|
2020-03-24 20:47:24 +04:00
|
|
|
Copy -> extend (Elif Skip) state region, Skip
|
2020-03-23 22:43:06 +04:00
|
|
|
| Skip -> let old_mode = last_mode state.trace
|
2020-03-24 20:47:24 +04:00
|
|
|
in extend (Elif old_mode) state region,
|
2020-03-23 22:43:06 +04:00
|
|
|
if old_mode = Copy then mode else Skip
|
2020-04-24 22:54:13 +04:00
|
|
|
in scan {state with mode; trace} lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "endif" ->
|
2020-03-25 21:52:23 +04:00
|
|
|
skip_line state lexbuf;
|
|
|
|
scan (reduce_cond state region) lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "define" ->
|
2020-03-24 20:47:24 +04:00
|
|
|
let id, region = variable state lexbuf in
|
2020-05-15 23:23:49 +04:00
|
|
|
if state.mode = Copy then
|
|
|
|
if id="true" || id="false"
|
|
|
|
then stop (Reserved_symbol id) state region
|
|
|
|
else
|
|
|
|
if Env.mem id state.env
|
|
|
|
then stop (Multiply_defined_symbol id) state region
|
|
|
|
else
|
|
|
|
let state = {state with env = Env.add id state.env}
|
|
|
|
in scan state lexbuf
|
|
|
|
else scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "undef" ->
|
2020-03-23 22:43:06 +04:00
|
|
|
let id, _ = variable state lexbuf in
|
2020-05-15 23:23:49 +04:00
|
|
|
if state.mode = Copy then
|
|
|
|
let state = {state with env = Env.remove id state.env}
|
|
|
|
in scan state lexbuf
|
|
|
|
else scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "error" ->
|
2020-03-25 00:05:39 +04:00
|
|
|
stop (Error_directive (message [] lexbuf)) state region
|
2020-01-27 19:05:47 +04:00
|
|
|
| "region" ->
|
|
|
|
let msg = message [] lexbuf
|
2020-04-24 22:54:13 +04:00
|
|
|
in print state ("#" ^ space ^ "region" ^ msg ^ "\n");
|
|
|
|
let state = {state with trace=Region::state.trace}
|
2020-03-23 22:43:06 +04:00
|
|
|
in scan state lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| "endregion" ->
|
|
|
|
let msg = message [] lexbuf
|
2020-04-24 22:54:13 +04:00
|
|
|
in print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
|
2020-03-25 21:52:23 +04:00
|
|
|
scan (reduce_region state region) lexbuf
|
2020-01-27 19:05:47 +04:00
|
|
|
| _ -> assert false
|
|
|
|
}
|
2020-04-24 22:54:13 +04:00
|
|
|
|
2020-05-15 23:23:49 +04:00
|
|
|
| eof { if state.trace = [] then state
|
|
|
|
else fail Missing_endif state lexbuf }
|
2020-04-24 22:54:13 +04:00
|
|
|
|
2020-05-15 23:23:49 +04:00
|
|
|
| '"' { if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
copy state lexbuf;
|
|
|
|
scan (in_string (mk_reg lexbuf) state lexbuf) lexbuf
|
|
|
|
end
|
|
|
|
else scan state lexbuf }
|
2020-04-24 22:54:13 +04:00
|
|
|
|
|
|
|
| block_comment_openings {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
match state.opt#block with
|
|
|
|
Some block when block#opening = lexeme ->
|
|
|
|
if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
copy state lexbuf;
|
|
|
|
let state = in_block block (mk_reg lexbuf) state lexbuf
|
|
|
|
in scan state lexbuf
|
|
|
|
end
|
|
|
|
else scan state lexbuf
|
|
|
|
| Some _ | None ->
|
|
|
|
let n = String.length lexeme in
|
|
|
|
begin
|
|
|
|
rollback lexbuf;
|
|
|
|
assert (n > 0);
|
|
|
|
scan (scan_n_char n state lexbuf) lexbuf
|
|
|
|
end }
|
|
|
|
|
|
|
|
| line_comments {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
match state.opt#line with
|
|
|
|
Some line when line = lexeme ->
|
|
|
|
if state.mode = Copy then
|
|
|
|
begin
|
|
|
|
copy state lexbuf;
|
|
|
|
scan (in_line_com state lexbuf) lexbuf
|
|
|
|
end
|
|
|
|
else scan state lexbuf
|
|
|
|
| Some _ | None ->
|
|
|
|
let n = String.length lexeme in
|
|
|
|
begin
|
|
|
|
rollback lexbuf;
|
|
|
|
assert (n > 0);
|
|
|
|
scan (scan_n_char n state lexbuf) lexbuf
|
|
|
|
end }
|
|
|
|
|
|
|
|
| _ { if state.mode = Copy then copy state lexbuf;
|
|
|
|
scan state lexbuf }
|
|
|
|
|
|
|
|
(* Scanning a series of characters *)
|
|
|
|
|
|
|
|
and scan_n_char n state = parse
|
|
|
|
_ { if state.mode = Copy then copy state lexbuf;
|
|
|
|
if n = 1 then state else scan_n_char (n-1) state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Support for #define and #undef *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and variable state = parse
|
2020-03-24 20:47:24 +04:00
|
|
|
blank+ { let id = symbol state lexbuf
|
2020-03-23 22:43:06 +04:00
|
|
|
in skip_line state lexbuf; id }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
and symbol state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
ident as id { id, mk_reg lexbuf }
|
2020-03-25 00:05:39 +04:00
|
|
|
| _ { fail Invalid_symbol state lexbuf }
|
2020-03-24 20:47:24 +04:00
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
(* New lines and verbatim sequence of characters *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and skip_line state = parse
|
2020-04-24 22:54:13 +04:00
|
|
|
nl { proc_nl state lexbuf }
|
2020-05-15 23:23:49 +04:00
|
|
|
| eof { rollback lexbuf }
|
|
|
|
| blank+
|
|
|
|
| _ { skip_line state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
and message acc = parse
|
|
|
|
nl { Lexing.new_line lexbuf;
|
|
|
|
mk_str (List.length acc) acc }
|
2020-05-15 23:23:49 +04:00
|
|
|
| eof { rollback lexbuf;
|
|
|
|
mk_str (List.length acc) acc }
|
2020-03-23 22:43:06 +04:00
|
|
|
| _ as c { message (c::acc) lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Comments *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and in_line_com state = parse
|
2020-04-24 22:54:13 +04:00
|
|
|
nl { proc_nl state lexbuf; state }
|
2020-05-15 23:23:49 +04:00
|
|
|
| eof { rollback lexbuf; state }
|
2020-03-23 22:43:06 +04:00
|
|
|
| _ { if state.mode = Copy then copy state lexbuf;
|
|
|
|
in_line_com state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-04-24 22:54:13 +04:00
|
|
|
and in_block block opening state = parse
|
|
|
|
'"' | block_comment_openings {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
if block#opening = lexeme || lexeme = "\""
|
|
|
|
then let () = copy state lexbuf in
|
|
|
|
let opening' = mk_reg lexbuf in
|
|
|
|
let next = if lexeme = "\"" then in_string
|
|
|
|
else in_block block in
|
|
|
|
let state = next opening' state lexbuf
|
|
|
|
in in_block block opening state lexbuf
|
|
|
|
else let () = rollback lexbuf in
|
|
|
|
let n = String.length lexeme in
|
|
|
|
let () = assert (n > 0) in
|
|
|
|
let state = scan_n_char n state lexbuf
|
|
|
|
in in_block block opening state lexbuf }
|
|
|
|
|
|
|
|
| block_comment_closings {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
if block#closing = lexeme
|
|
|
|
then (copy state lexbuf; state)
|
|
|
|
else let () = rollback lexbuf in
|
|
|
|
let n = String.length lexeme in
|
|
|
|
let () = assert (n > 0) in
|
|
|
|
let state = scan_n_char n state lexbuf
|
|
|
|
in in_block block opening state lexbuf }
|
|
|
|
|
|
|
|
| nl { proc_nl state lexbuf; in_block block opening state lexbuf }
|
|
|
|
| eof { let err = Unterminated_comment (block#closing)
|
|
|
|
in stop err state opening }
|
|
|
|
| _ { copy state lexbuf; in_block block opening state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Included filename *)
|
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
and scan_inclusion state = parse
|
2020-05-15 23:23:49 +04:00
|
|
|
blank+ { scan_inclusion state lexbuf }
|
|
|
|
| '"' { in_inclusion (mk_reg lexbuf) [] 0 state lexbuf }
|
|
|
|
| nl | eof { fail Unterminated_inclusion state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
and in_inclusion opening acc len state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
'"' { let closing = mk_reg lexbuf
|
2020-05-15 23:23:49 +04:00
|
|
|
in Region.cover opening closing, mk_str len acc }
|
2020-03-25 21:52:23 +04:00
|
|
|
| nl { fail Newline_in_string state lexbuf }
|
2020-04-24 22:54:13 +04:00
|
|
|
| eof { stop Unterminated_string state opening }
|
2020-03-24 20:47:24 +04:00
|
|
|
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
(* Strings *)
|
|
|
|
|
2020-03-23 22:43:06 +04:00
|
|
|
and in_string opening state = parse
|
2020-03-25 21:52:23 +04:00
|
|
|
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
|
2020-04-24 22:54:13 +04:00
|
|
|
| '"' { copy state lexbuf; state }
|
2020-05-15 23:23:49 +04:00
|
|
|
| eof { rollback lexbuf; state }
|
2020-03-25 21:52:23 +04:00
|
|
|
| _ { copy state lexbuf; in_string opening state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-04-01 21:22:46 +04:00
|
|
|
and preproc state = parse
|
|
|
|
eof { state }
|
2020-04-09 18:18:26 +04:00
|
|
|
| _ { let open Lexing in
|
|
|
|
let () = rollback lexbuf in
|
|
|
|
let name = lexbuf.lex_start_p.pos_fname in
|
|
|
|
let () = if name <> "" then
|
|
|
|
print state (sprintf "# 1 \"%s\"\n" name)
|
|
|
|
in scan state lexbuf }
|
2020-01-27 19:05:47 +04:00
|
|
|
|
|
|
|
{
|
2020-03-24 20:47:24 +04:00
|
|
|
(* START OF TRAILER *)
|
|
|
|
|
2020-01-27 19:05:47 +04:00
|
|
|
(* The function [lex] is a wrapper of [scan], which also checks that
|
2020-03-24 20:47:24 +04:00
|
|
|
the trace is empty at the end. Note that we discard the state at
|
|
|
|
the end. *)
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-04-09 18:18:26 +04:00
|
|
|
let lex opt buffer =
|
2020-04-09 20:41:55 +04:00
|
|
|
let path = buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
|
|
|
let dir = [Filename.dirname path] in
|
2020-03-23 22:43:06 +04:00
|
|
|
let state = {
|
|
|
|
env = Env.empty;
|
|
|
|
mode = Copy;
|
|
|
|
trace = [];
|
|
|
|
out = Buffer.create 80;
|
2020-03-25 21:52:23 +04:00
|
|
|
incl = [];
|
2020-03-27 22:30:39 +04:00
|
|
|
opt;
|
2020-04-09 20:41:55 +04:00
|
|
|
dir
|
2020-03-23 22:43:06 +04:00
|
|
|
} in
|
2020-04-01 21:22:46 +04:00
|
|
|
match preproc state buffer with
|
2020-03-26 19:51:08 +04:00
|
|
|
state -> List.iter close_in state.incl;
|
|
|
|
Stdlib.Ok state.out
|
|
|
|
| exception Error e -> Stdlib.Error e
|
2020-01-27 19:05:47 +04:00
|
|
|
|
2020-03-24 20:47:24 +04:00
|
|
|
(* END OF TRAILER *)
|
2020-01-27 19:05:47 +04:00
|
|
|
}
|