ligo/vendors/Preproc/Preproc.mll

676 lines
25 KiB
OCaml
Raw Normal View History

2020-03-23 22:43:06 +04:00
(* Simple preprocessor based on C#, to be processed by [ocamllex]. *)
2020-01-27 19:05:47 +04:00
{
2020-03-24 20:47:24 +04:00
(* START OF HEADER *)
2020-03-23 22:43:06 +04:00
module Region = Simple_utils.Region
2020-03-24 21:52:00 +04:00
module Pos = Simple_utils.Pos
2020-03-23 22:43:06 +04:00
let sprintf = Printf.sprintf
2020-01-27 19:05:47 +04:00
(* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length
[len] containing the [len] characters in the list [p], in reverse
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
let mk_str (len: int) (p: char list) : string =
let () = assert (len = List.length p) in
let bytes = Bytes.make len ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (len-1) p |> Bytes.to_string
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
2020-03-24 20:47:24 +04:00
(* The type [mode] defines the two scanning modes of the preprocessor:
either we copy the current characters or we skip them. *)
type mode = Copy | Skip
(* Trace of directives
We keep track of directives #if, #elif, #else, #region and #endregion.
*)
type cond = If of mode | Elif of mode | Else | Region
type trace = cond list
(* Line offsets
The value [Inline] of type [offset] means that the current location
cannot be reached from the start of the line with only white
space. The same holds for the special value [Prefix 0]. Values of
the form [Prefix n] mean that the current location can be reached
from the start of the line with [n] white spaces (padding). These
distinctions are needed because preprocessor directives cannot
occur inside lines.
*)
type offset = Prefix of int | Inline
(* Environments *)
module Env = Set.Make (String)
let rec eval env =
let open E_AST
in function
Or (e1,e2) -> eval env e1 || eval env e2
| And (e1,e2) -> eval env e1 && eval env e2
| Eq (e1,e2) -> eval env e1 = eval env e2
| Neq (e1,e2) -> eval env e1 != eval env e2
| Not e -> not (eval env e)
| True -> true
| False -> false
| Ident id -> Env.mem id env
(* The type [state] groups the information that needs to be threaded
along the scanning functions. *)
type state = {
env : Env.t;
mode : mode;
offset : offset;
trace : trace;
out : Buffer.t;
incl : in_channel list
}
2020-03-23 22:43:06 +04:00
(* ERRORS *)
module Error =
struct
type t =
Invalid_directive of string
| Directive_inside_line
| Missing_endif
| Invalid_line_indicator of string
| No_line_indicator
| End_line_indicator
| Newline_in_string
2020-03-24 20:47:24 +04:00
| Open_comment
| Open_string
2020-03-23 22:43:06 +04:00
| Dangling_endif
2020-03-24 20:47:24 +04:00
| Open_region_in_conditional
2020-03-23 22:43:06 +04:00
| Dangling_endregion
| Conditional_in_region
| If_follows_elif
| Else_follows_else
| Dangling_else
| Elif_follows_else
| Dangling_elif
| Reserved_symbol of string
| Multiply_defined_symbol of string
| Error_directive of string
| Parse_error
| No_line_comment_or_blank
2020-03-24 20:47:24 +04:00
| Invalid_symbol
2020-03-23 22:43:06 +04:00
let to_string = function
Invalid_directive name ->
2020-03-24 21:43:15 +04:00
sprintf "Invalid directive \"%s\"." name
2020-03-23 22:43:06 +04:00
| Directive_inside_line ->
2020-03-24 21:43:15 +04:00
sprintf "Directive inside a line."
2020-03-23 22:43:06 +04:00
| Missing_endif ->
2020-03-24 21:43:15 +04:00
sprintf "Missing #endif directive."
2020-03-23 22:43:06 +04:00
| Invalid_line_indicator id ->
sprintf "Invalid line indicator \"%s\".\n\
2020-03-24 21:43:15 +04:00
Hint: Try \"default\" or \"hidden\"." id
2020-03-23 22:43:06 +04:00
| No_line_indicator ->
2020-03-24 21:43:15 +04:00
sprintf "Missing line indicator."
2020-03-23 22:43:06 +04:00
| End_line_indicator ->
sprintf "Invalid ending of numerical line indicator.\n\
2020-03-24 21:43:15 +04:00
Hint: Try a string, end of line, or a line comment."
2020-03-23 22:43:06 +04:00
| Newline_in_string ->
2020-03-24 21:43:15 +04:00
sprintf "Invalid newline character in string."
2020-03-24 20:47:24 +04:00
| Open_comment ->
2020-03-24 21:43:15 +04:00
sprintf "Unterminated comment."
2020-03-24 20:47:24 +04:00
| Open_string ->
2020-03-23 22:43:06 +04:00
sprintf "Unterminated string.\n\
2020-03-24 21:43:15 +04:00
Hint: Close with double quotes."
2020-03-23 22:43:06 +04:00
| Dangling_endif ->
sprintf "Dangling #endif directive.\n\
2020-03-24 21:43:15 +04:00
Hint: Remove it or add a #if before."
2020-03-24 20:47:24 +04:00
| Open_region_in_conditional ->
2020-03-23 22:43:06 +04:00
sprintf "Unterminated of #region in conditional.\n\
2020-03-24 21:43:15 +04:00
Hint: Close with #endregion before #endif."
2020-03-23 22:43:06 +04:00
| Dangling_endregion ->
sprintf "Dangling #endregion directive.\n\
2020-03-24 21:43:15 +04:00
Hint: Remove it or use #region before."
2020-03-23 22:43:06 +04:00
| Conditional_in_region ->
sprintf "Conditional in region.\n\
2020-03-24 21:43:15 +04:00
Hint: Remove the conditional or the region."
2020-03-23 22:43:06 +04:00
| If_follows_elif ->
2020-03-24 21:43:15 +04:00
sprintf "Directive #if found in a clause #elif."
2020-03-23 22:43:06 +04:00
| Else_follows_else ->
2020-03-24 21:43:15 +04:00
sprintf "Directive #else found in a clause #else."
2020-03-23 22:43:06 +04:00
| Dangling_else ->
2020-03-24 21:43:15 +04:00
sprintf "Directive #else without #if."
2020-03-23 22:43:06 +04:00
| Elif_follows_else ->
2020-03-24 21:43:15 +04:00
sprintf "Directive #elif found in a clause #else."
2020-03-23 22:43:06 +04:00
| Dangling_elif ->
sprintf "Dangling #elif directive.\n\
2020-03-24 21:43:15 +04:00
Hint: Remove it or add a #if before."
2020-03-23 22:43:06 +04:00
| Reserved_symbol sym ->
sprintf "Reserved symbol \"%s\".\n\
2020-03-24 21:43:15 +04:00
Hint: Use another symbol." sym
2020-03-23 22:43:06 +04:00
| Multiply_defined_symbol sym ->
sprintf "Multiply-defined symbol \"%s\".\n\
2020-03-24 21:43:15 +04:00
Hint: Change the name or remove one definition." sym
2020-03-23 22:43:06 +04:00
| Error_directive msg ->
2020-03-24 21:43:15 +04:00
msg
2020-03-23 22:43:06 +04:00
| Parse_error ->
2020-03-24 21:43:15 +04:00
"Parse error in expression."
2020-03-23 22:43:06 +04:00
| No_line_comment_or_blank ->
2020-03-24 21:43:15 +04:00
"Line comment or whitespace expected."
2020-03-24 20:47:24 +04:00
| Invalid_symbol ->
2020-03-24 21:43:15 +04:00
"Expected a symbol (identifier)."
2020-03-23 22:43:06 +04:00
let format ?(offsets=true) Region.{region; value} ~file =
let msg = to_string value
and reg = region#to_string ~file ~offsets `Byte in
let value = sprintf "Preprocessing error %s:\n%s" reg msg
in Region.{value; region}
end
2020-03-24 20:47:24 +04:00
exception Error of state * Error.t Region.reg
2020-03-23 22:43:06 +04:00
let mk_reg buffer =
let start = Lexing.lexeme_start_p buffer |> Pos.from_byte
and stop = Lexing.lexeme_end_p buffer |> Pos.from_byte
in Region.make ~start ~stop
2020-03-24 20:47:24 +04:00
let stop value state region = raise (Error (state, Region.{region; value}))
let fail error state buffer = stop error state (mk_reg buffer)
2020-01-27 19:05:47 +04:00
(* The function [reduce_cond] is called when a #endif directive is
found, and the trace (see type [trace] above) needs updating. *)
2020-03-24 20:47:24 +04:00
let reduce_cond state region =
let rec reduce = function
[] -> stop Error.Dangling_endif state region
| If mode::trace -> trace, mode
| Region::_ -> stop Error.Open_region_in_conditional state region
| _::trace -> reduce trace
in reduce state.trace
2020-01-27 19:05:47 +04:00
(* The function [reduce_reg] is called when a #endregion directive is
read, and the trace needs updating. *)
2020-03-24 20:47:24 +04:00
let reduce_reg state region =
match state.trace with
[] -> stop Error.Dangling_endregion state region
| Region::trace -> trace
| _ -> stop Error.Conditional_in_region state region
2020-01-27 19:05:47 +04:00
(* The function [extend] is called when encountering conditional
directives #if, #else and #elif. As its name suggests, it extends
the current trace with the current conditional directive, whilst
performing some validity checks. *)
2020-03-24 20:47:24 +04:00
let extend cond state region =
match cond, state.trace with
If _, Elif _::_ -> stop Error.If_follows_elif state region
| Else, Else::_ -> stop Error.Else_follows_else state region
| Else, [] -> stop Error.Dangling_else state region
| Elif _, Else::_ -> stop Error.Elif_follows_else state region
| Elif _, [] -> stop Error.Dangling_elif state region
| hd, tl -> hd::tl
2020-01-27 19:05:47 +04:00
(* The function [last_mode] seeks the last mode as recorded in the
trace (see type [trace] above). *)
let rec last_mode = function
[] -> assert false
| (If mode | Elif mode)::_ -> mode
| _::trace -> last_mode trace
2020-03-24 20:47:24 +04:00
(* PRINTING *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
(* Copying the current lexeme to [stdout] *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
(* End of lines *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
(* Copying a string *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let print state string = Buffer.add_string state.out string
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
(* Expanding the offset into whitespace *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let expand_offset state =
match state.offset with
Prefix 0 | Inline -> ()
| Prefix n -> print state (String.make n ' ')
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
(* Evaluating a preprocessor expression
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
The evaluation of conditional directives may involve symbols whose
value may be defined using #define directives, or undefined by
means of #undef. Therefore, we need to evaluate conditional
expressions in an environment made of a set of defined symbols.
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
Note that we rely on an external lexer and parser for the
conditional expressions. See modules [E_Lexer] and [E_Parser].
*)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let expr state buffer : mode =
2020-03-23 22:43:06 +04:00
let ast =
try E_Parser.expr E_Lexer.scan buffer with
E_Parser.Error ->
let region = mk_reg buffer in
let value = Error.Parse_error
2020-03-24 20:47:24 +04:00
in raise (Error (state, Region.{value; region})) in
let () = print state "\n" in
if eval state.env ast then Copy else Skip
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
(* DIRECTIVES *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let directives = [
"if"; "else"; "elif"; "endif"; "define"; "undef";
"error"; (*"warning";*) "line"; "region"; "endregion";
"include"]
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
(* END OF HEADER *)
}
(* REGULAR EXPRESSIONS *)
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
let nl = '\n' | '\r' | "\r\n"
let blank = ' ' | '\t'
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
2020-03-24 20:47:24 +04:00
let ident = letter (letter | '_' | digit)*
let directive = '#' (blank* as space) (small+ as id)
2020-01-27 19:05:47 +04:00
(* Rules *)
(* The rule [scan] scans the input buffer for directives, strings,
comments, blanks, new lines and end of file characters. As a
result, either the matched input is copied to [stdout] or not,
depending on the compilation directives. If not copied, new line
characters are output.
Scanning is triggered by the function call [scan env mode offset
trace lexbuf], where [env] is the set of defined symbols
(introduced by `#define'), [mode] specifies whether we are copying
or skipping the input, [offset] informs about the location in the
line (either there is a prefix of blanks, or at least a non-blank
character has been read), and [trace] is the stack of conditional
directives read so far.
2020-03-23 22:43:06 +04:00
The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix
0; trace=[]}], meaning that we start with an empty environment,
that copying the input is enabled by default, and that we are at
the start of a line and no previous conditional directives have
been read yet.
2020-01-27 19:05:47 +04:00
When an "#if" is matched, the trace is extended by the call [extend
lexbuf (If mode) trace], during the evaluation of which the
syntactic validity of having encountered an "#if" is checked (for
example, it would be invalid had an "#elif" been last read). Note
that the current mode is stored in the trace with the current
directive -- that mode may be later restored (see below for some
examples). Moreover, the directive would be deemed invalid if its
current position in the line (that is, its offset) were not
preceeded by blanks or nothing, otherwise the rule [expr] is called
to scan the boolean expression associated with the "#if": if it
evaluates to [true], the result is [Copy], meaning that we may copy
what follows, otherwise skip it -- the actual decision depending on
the current mode. That new mode is used if we were in copy mode,
and the offset is reset to the start of a new line (as we read a
new line in [expr]); otherwise we were in skipping mode and the
value of the conditional expression must be ignored (but not its
syntax), and we continue skipping the input.
When an "#else" is matched, the trace is extended with [Else],
then, if the directive is not at a wrong offset, the rest of the
2020-03-23 22:43:06 +04:00
line is scanned with [skip_line]. If we were in copy mode, the new
2020-01-27 19:05:47 +04:00
mode toggles to skipping mode; otherwise, the trace is searched for
the last encountered "#if" of "#elif" and the associated mode is
restored.
The case "#elif" is the result of the fusion (in the technical
sense) of the code for dealing with an "#else" followed by an
"#if".
When an "#endif" is matched, the trace is reduced, that is, all
conditional directives are popped until an [If mode'] is found and
[mode'] is restored as the current mode.
Consider the following four cases, where the modes (Copy/Skip) are
located between the lines:
Copy ----+ Copy ----+
#if true | #if true |
Copy | Copy |
#else | #else |
+-- Skip --+ | +-- Skip --+ |
#if true | | | #if false | | |
| Skip | | | Skip | |
#else | | | #else | | |
+-> Skip | | +-> Skip | |
#endif | | #endif | |
Skip <-+ | Skip <-+ |
#endif | #endif |
Copy <---+ Copy <---+
+-- Copy ----+ Copy --+-+
#if false | | #if false | |
| Skip | Skip | |
#else | | #else | |
+-> Copy --+ | +-+-- Copy <-+ |
#if true | | #if false | | |
Copy | | | | Skip |
#else | | #else | | |
Skip | | | +-> Copy |
#endif | | #endif | |
Copy <-+ | +---> Copy |
#endif | #endif |
Copy <---+ Copy <---+
The following four cases feature #elif. Note that we put between
brackets the mode saved for the #elif, which is sometimes restored
later.
Copy --+ Copy --+
#if true | #if true |
Copy | Copy |
#elif true +--[Skip] | #elif false +--[Skip] |
| Skip | | Skip |
#else | | #else | |
+-> Skip | +-> Skip |
#endif | #endif |
Copy <-+ Copy <-+
+-- Copy --+-+ +-- Copy ----+
#if false | | | #if false | |
| Skip | | | Skip |
#elif true +->[Copy] | | #elif false +->[Copy]--+ |
Copy <-+ | Skip | |
#else | #else | |
Skip | Copy <-+ |
#endif | #endif |
Copy <---+ Copy <---+
Note how "#elif" indeed behaves like an "#else" followed by an
"#if", and the mode stored with the data constructor [Elif]
corresponds to the mode before the virtual "#if".
Important note: Comments and strings are recognised as such only in
copy mode, which is a different behaviour from the preprocessor of
GNU GCC, which always does.
2020-03-23 22:43:06 +04:00
*)
rule scan state = parse
nl { proc_nl state lexbuf;
scan {state with offset = Prefix 0} lexbuf }
| blank { match state.offset with
Prefix n -> scan {state with offset = Prefix (n+1)} lexbuf
| Inline -> copy state lexbuf; scan state lexbuf }
2020-01-27 19:05:47 +04:00
| directive {
if not (List.mem id directives)
2020-03-24 20:47:24 +04:00
then fail (Error.Invalid_directive id) state lexbuf;
2020-03-23 22:43:06 +04:00
if state.offset = Inline
2020-03-24 20:47:24 +04:00
then fail Error.Directive_inside_line state lexbuf;
let region = mk_reg lexbuf in
2020-01-27 19:05:47 +04:00
match id with
"include" ->
2020-03-23 22:43:06 +04:00
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
and file = Lexing.(lexbuf.lex_curr_p.pos_fname)
|> Filename.basename
2020-03-24 20:47:24 +04:00
and incl_file = scan_inclusion state lexbuf in
2020-03-23 22:43:06 +04:00
print state (sprintf "# 1 \"%s\" 1\n" incl_file);
let incl_chan = open_in incl_file in
let state = {state with incl = incl_chan::state.incl} in
cat state (Lexing.from_channel incl_chan);
print state (sprintf "# %i \"%s\" 2\n" (line+1) file);
scan state lexbuf
2020-01-27 19:05:47 +04:00
| "if" ->
2020-03-23 22:43:06 +04:00
let mode = expr state lexbuf in
let mode = if state.mode = Copy then mode else Skip in
2020-03-24 20:47:24 +04:00
let trace = extend (If state.mode) state region in
2020-03-23 22:43:06 +04:00
let state = {state with mode; offset = Prefix 0; trace}
in scan state lexbuf
2020-01-27 19:05:47 +04:00
| "else" ->
2020-03-23 22:43:06 +04:00
let () = skip_line state lexbuf in
let mode = match state.mode with
Copy -> Skip
| Skip -> last_mode state.trace in
2020-03-24 20:47:24 +04:00
let trace = extend Else state region
2020-03-23 22:43:06 +04:00
in scan {state with mode; offset = Prefix 0; trace} lexbuf
2020-01-27 19:05:47 +04:00
| "elif" ->
2020-03-23 22:43:06 +04:00
let mode = expr state lexbuf in
let trace, mode =
match state.mode with
2020-03-24 20:47:24 +04:00
Copy -> extend (Elif Skip) state region, Skip
2020-03-23 22:43:06 +04:00
| Skip -> let old_mode = last_mode state.trace
2020-03-24 20:47:24 +04:00
in extend (Elif old_mode) state region,
2020-03-23 22:43:06 +04:00
if old_mode = Copy then mode else Skip
in scan {state with mode; offset = Prefix 0; trace} lexbuf
2020-01-27 19:05:47 +04:00
| "endif" ->
2020-03-23 22:43:06 +04:00
let () = skip_line state lexbuf in
2020-03-24 20:47:24 +04:00
let trace, mode = reduce_cond state region
2020-03-23 22:43:06 +04:00
in scan {state with mode; offset = Prefix 0; trace} lexbuf
2020-01-27 19:05:47 +04:00
| "define" ->
2020-03-24 20:47:24 +04:00
let id, region = variable state lexbuf in
2020-03-23 22:43:06 +04:00
if id="true" || id="false"
2020-03-24 20:47:24 +04:00
then stop (Error.Reserved_symbol id) state region;
2020-03-23 22:43:06 +04:00
if Env.mem id state.env
2020-03-24 20:47:24 +04:00
then stop (Error.Multiply_defined_symbol id) state region;
2020-03-23 22:43:06 +04:00
let state = {state with env = Env.add id state.env;
offset = Prefix 0}
in scan state lexbuf
2020-01-27 19:05:47 +04:00
| "undef" ->
2020-03-23 22:43:06 +04:00
let id, _ = variable state lexbuf in
let state = {state with env = Env.remove id state.env;
offset = Prefix 0}
in scan state lexbuf
2020-01-27 19:05:47 +04:00
| "error" ->
2020-03-24 20:47:24 +04:00
stop (Error.Error_directive (message [] lexbuf)) state region
2020-03-23 22:43:06 +04:00
(*
2020-01-27 19:05:47 +04:00
| "warning" ->
2020-03-24 20:47:24 +04:00
let start_p, end_p = region in
2020-01-27 19:05:47 +04:00
let msg = message [] lexbuf in
let open Lexing
in prerr_endline
("Warning at line " ^ string_of_int start_p.pos_lnum
^ ", char "
^ string_of_int (start_p.pos_cnum - start_p.pos_bol)
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
^ ":\n" ^ msg);
scan env mode (Prefix 0) trace lexbuf
2020-03-23 22:43:06 +04:00
*)
2020-01-27 19:05:47 +04:00
| "region" ->
let msg = message [] lexbuf
2020-03-23 22:43:06 +04:00
in expand_offset state;
print state ("#" ^ space ^ "region" ^ msg ^ "\n");
let state =
{state with offset = Prefix 0; trace=Region::state.trace}
in scan state lexbuf
2020-01-27 19:05:47 +04:00
| "endregion" ->
let msg = message [] lexbuf
2020-03-23 22:43:06 +04:00
in expand_offset state;
print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
let state =
{state with offset = Prefix 0;
2020-03-24 20:47:24 +04:00
trace = reduce_reg state region}
2020-03-23 22:43:06 +04:00
in scan state lexbuf
2020-01-27 19:05:47 +04:00
| "line" ->
2020-03-23 22:43:06 +04:00
expand_offset state;
print state ("#" ^ space ^ "line");
line_ind state lexbuf;
scan {state with offset = Prefix 0} lexbuf
2020-01-27 19:05:47 +04:00
| _ -> assert false
}
2020-03-23 22:43:06 +04:00
| eof { match state.trace with
[] -> expand_offset state; state
2020-03-24 20:47:24 +04:00
| _ -> fail Error.Missing_endif state lexbuf }
2020-03-23 22:43:06 +04:00
| '"' { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| "//" { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
in_line_com state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| "/*" { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
in_block_com (mk_reg lexbuf) state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| _ { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf
end;
scan {state with offset=Inline} lexbuf }
2020-01-27 19:05:47 +04:00
(* Support for #define and #undef *)
2020-03-23 22:43:06 +04:00
and variable state = parse
2020-03-24 20:47:24 +04:00
blank+ { let id = symbol state lexbuf
2020-03-23 22:43:06 +04:00
in skip_line state lexbuf; id }
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
and symbol state = parse
ident as id { id, mk_reg lexbuf }
| _ { fail Error.Invalid_symbol state lexbuf }
2020-01-27 19:05:47 +04:00
(* Line indicator (#line) *)
2020-03-23 22:43:06 +04:00
and line_ind state = parse
blank* { copy state lexbuf; line_indicator state lexbuf }
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
and line_indicator state = parse
2020-03-24 20:47:24 +04:00
natural { copy state lexbuf; end_indicator state lexbuf }
2020-01-27 19:05:47 +04:00
| ident as id {
match id with
"default" | "hidden" ->
2020-03-23 22:43:06 +04:00
print state (id ^ message [] lexbuf)
2020-03-24 20:47:24 +04:00
| _ -> fail (Error.Invalid_line_indicator id) state lexbuf }
| _ { fail Error.No_line_indicator state lexbuf }
2020-03-23 22:43:06 +04:00
and end_indicator state = parse
blank+ { copy state lexbuf; end_indicator state lexbuf }
2020-03-24 20:47:24 +04:00
| nl { proc_nl state lexbuf }
2020-03-23 22:43:06 +04:00
| eof { copy state lexbuf }
| "//" { copy state lexbuf;
print state (message [] lexbuf ^ "\n") }
| '"' { copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf;
opt_line_com state lexbuf }
2020-03-24 20:47:24 +04:00
| _ { fail Error.End_line_indicator state lexbuf }
2020-03-23 22:43:06 +04:00
and opt_line_com state = parse
nl { proc_nl state lexbuf }
| eof { copy state lexbuf }
| blank+ { copy state lexbuf; opt_line_com state lexbuf }
| "//" { print state ("//" ^ message [] lexbuf) }
2020-01-27 19:05:47 +04:00
(* New lines and verbatim sequence of characters *)
2020-03-23 22:43:06 +04:00
and skip_line state = parse
2020-03-24 20:47:24 +04:00
nl { proc_nl state lexbuf }
| blank+ { skip_line state lexbuf }
| "//" { in_line_com {state with mode=Skip} lexbuf }
| _ { fail Error.No_line_comment_or_blank state lexbuf }
| eof { () }
2020-01-27 19:05:47 +04:00
and message acc = parse
nl { Lexing.new_line lexbuf;
mk_str (List.length acc) acc }
| eof { mk_str (List.length acc) acc }
2020-03-23 22:43:06 +04:00
| _ as c { message (c::acc) lexbuf }
2020-01-27 19:05:47 +04:00
(* Comments *)
2020-03-23 22:43:06 +04:00
and in_line_com state = parse
nl { proc_nl state lexbuf }
| eof { () }
| _ { if state.mode = Copy then copy state lexbuf;
in_line_com state lexbuf }
2020-01-27 19:05:47 +04:00
2020-03-23 22:43:06 +04:00
and in_block_com opening state = parse
nl { proc_nl state lexbuf; in_block_com opening state lexbuf }
| "*/" { copy state lexbuf }
2020-03-24 20:47:24 +04:00
| eof { stop Error.Open_comment state opening }
2020-03-23 22:43:06 +04:00
| _ { copy state lexbuf; in_block_com opening state lexbuf }
2020-01-27 19:05:47 +04:00
(* Include a file *)
2020-03-23 22:43:06 +04:00
and cat state = parse
eof { () }
| _ { copy state lexbuf; cat state lexbuf }
2020-01-27 19:05:47 +04:00
(* Included filename *)
2020-03-24 20:47:24 +04:00
and scan_inclusion state = parse
blank+ { scan_inclusion state lexbuf }
| '"' { in_inclusion (mk_reg lexbuf) [] 0 state lexbuf }
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
and in_inclusion opening acc len state = parse
'"' { mk_str len acc }
| nl { fail Error.Newline_in_string state lexbuf }
| eof { stop Error.Open_string state opening }
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
2020-01-27 19:05:47 +04:00
(* Strings *)
2020-03-23 22:43:06 +04:00
and in_string opening state = parse
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
| '"' { copy state lexbuf }
2020-03-24 20:47:24 +04:00
| nl { fail Error.Newline_in_string state lexbuf }
| eof { stop Error.Open_string state opening }
2020-03-23 22:43:06 +04:00
| _ { copy state lexbuf; in_string opening state lexbuf }
2020-01-27 19:05:47 +04:00
{
2020-03-24 20:47:24 +04:00
(* START OF TRAILER *)
2020-01-27 19:05:47 +04:00
(* The function [lex] is a wrapper of [scan], which also checks that
2020-03-24 20:47:24 +04:00
the trace is empty at the end. Note that we discard the state at
the end. *)
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
let lex buffer : Buffer.t =
2020-03-23 22:43:06 +04:00
let state = {
env = Env.empty;
mode = Copy;
offset = Prefix 0;
trace = [];
out = Buffer.create 80;
incl = []
} in
2020-03-24 20:47:24 +04:00
let state = scan state buffer in
let () = List.iter close_in state.incl
in state.out
2020-01-27 19:05:47 +04:00
2020-03-24 20:47:24 +04:00
(* END OF TRAILER *)
2020-01-27 19:05:47 +04:00
}