[WIP] Does not compile yet.

This commit is contained in:
Christian Rinderknecht 2020-03-23 19:43:06 +01:00
parent 926a83f7df
commit 37faf9022e
17 changed files with 745 additions and 500 deletions

View File

@ -1 +1,5 @@
$HOME/git/OCaml-build/Makefile $HOME/git/OCaml-build/Makefile
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml

View File

@ -1,33 +0,0 @@
(* This module is only used for testing modules [Escan] and [Eparser]
as units *)
module Lexer = struct
let run () =
match Array.length Sys.argv with
2 -> Escan.trace Sys.argv.(1)
| _ -> prerr_endline ("Usage: " ^ Sys.argv.(0) ^ " [file]")
end
module Parser = struct
let run () =
if Array.length Sys.argv = 2
then
match open_in Sys.argv.(1) with
exception Sys_error msg -> prerr_endline msg
| cin ->
let buffer = Lexing.from_channel cin in
let open Error in
let () =
try
let tree = Eparser.pp_expression Escan.token buffer in
let value = Preproc.(eval Env.empty tree)
in (print_string (string_of_bool value);
print_newline ())
with Lexer diag -> print "Lexical" diag
| Parser diag -> print "Syntactical" diag
| Eparser.Error -> print "" ("Parse", mk_seg buffer, 1)
in close_in cin
else prerr_endline ("Usage: " ^ Sys.argv.(0) ^ " [file]")
end
let _ = Parser.run()

108
vendors/Preproc/E_Lexer.mll vendored Normal file
View File

@ -0,0 +1,108 @@
(* Auxiliary scanner for boolean expressions of the C# preprocessor *)
{
(* START HEADER *)
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
let sprintf = Printf.sprintf
open E_Parser
(* Concrete syntax of tokens. See module [E_Parser]. *)
let string_of_token = function
True -> "true"
| False -> "false"
| Ident id -> id
| OR -> "||"
| AND -> "&&"
| EQ -> "=="
| NEQ -> "!="
| NOT -> "!"
| LPAR -> "("
| RPAR -> ")"
| EOL -> "EOL"
(* Errors *)
module Error =
struct
type t = Invalid_character of char
let to_string = function
Invalid_character c ->
sprintf "Invalid character '%c' (%d).\n" c (Char.code c)
let format ?(offsets=true) Region.{region; value} ~file =
let msg = to_string value
and reg = region#to_string ~file ~offsets `Byte in
let value = sprintf "Preprocessing error %s:\n%s" reg msg
in Region.{value; region}
end
exception Error of Error.t Region.reg
let mk_reg buffer =
let start = Lexing.lexeme_start_p buffer |> Pos.from_byte
and stop = Lexing.lexeme_end_p buffer |> Pos.from_byte
in Region.make ~start ~stop
let stop value region = raise (Error Region.{region; value})
let fail error buffer = stop error (mk_reg buffer)
(* END HEADER *)
}
(* Regular expressions for literals *)
(* White space *)
let newline = '\n' | '\r' | "\r\n"
let blank = ' ' | '\t'
(* Unicode escape sequences *)
let digit = ['0'-'9']
let hexdigit = digit | ['A'-'F' 'a'-'f']
let four_hex = hexdigit hexdigit hexdigit hexdigit
let uni_esc = "\\u" four_hex | "\\U" four_hex four_hex
(* Identifiers *)
let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']
let letter = lowercase | uppercase | uni_esc
let start = '_' | letter
let alphanum = letter | digit | '_'
let ident = start alphanum*
(* Rules *)
rule scan = parse
blank+ { scan lexbuf }
| newline { Lexing.new_line lexbuf; EOL }
| eof { EOL }
| "true" { True }
| "false" { False }
| ident as id { Ident id }
| '(' { LPAR }
| ')' { RPAR }
| "||" { OR }
| "&&" { AND }
| "==" { EQ }
| "!=" { NEQ }
| "!" { NOT }
| "//" { inline_com lexbuf }
| _ as c { fail (Error.Invalid_character c) lexbuf }
and inline_com = parse
newline { Lexing.new_line lexbuf; EOL }
| eof { EOL }
| _ { inline_com lexbuf }
{
(* START TRAILER *)
(* END TRAILER *)
}

53
vendors/Preproc/E_Main.ml vendored Normal file
View File

@ -0,0 +1,53 @@
(* This module is only used for testing modules [Escan] and [E_Parser]
as units *)
module Lexer = struct
open E_Lexer
let run () =
let options = EvalOpt.read () in
match open_in options#input with
cin ->
let buffer = Lexing.from_channel cin in
let rec iter () =
match E_Lexer.scan buffer with
E_Parser.EOL -> close_in cin; close_out stdout
| t -> begin
output_string stdout (string_of_token t);
output_string stdout "\n";
flush stdout;
iter ()
end
| exception E_Lexer.Error err ->
let form = Error.format ~offsets:options#offsets
err
~file:options#input
in output_string stdout (form ^ "\n")
in iter ()
| exception Sys_error msg -> prerr_endline msg
end
module Parser = struct
let run () =
if Array.length Sys.argv = 2
then
match open_in Sys.argv.(1) with
exception Sys_error msg -> prerr_endline msg
| cin ->
let buffer = Lexing.from_channel cin in
let open Error in
let () =
try
let tree = E_Parser.pp_expression E_Lexer.token buffer in
let value = Preproc.(eval Env.empty tree)
in (print_string (string_of_bool value);
print_newline ())
with Lexer diag -> print "Lexical" diag
| Parser diag -> print "Syntactical" diag
| E_Parser.Error -> print "" ("Parse", mk_seg buffer, 1)
in close_in cin
else prerr_endline ("Usage: " ^ Sys.argv.(0) ^ " [file]")
end
let _ = Parser.run()

50
vendors/Preproc/E_Parser.mly vendored Normal file
View File

@ -0,0 +1,50 @@
%{
(* Grammar for boolean expressions in preprocessing directives of C# *)
%}
%token <string> Ident "<ident>"
%token True "true"
%token False "false"
%token OR "||"
%token AND "&&"
%token EQ "=="
%token NEQ "!="
%token NOT "!"
%token LPAR "("
%token RPAR ")"
%token EOL
(* Entries *)
%start expr
%type <E_AST.t> expr
%%
(* Grammar *)
expr:
or_expr EOL { $1 }
or_expr:
or_expr "||" and_expr { E_AST.Or ($1,$3) }
| and_expr { $1 }
and_expr:
and_expr "&&" unary_expr { E_AST.And ($1,$3) }
| equality_expr { $1 }
equality_expr:
equality_expr "==" unary_expr { E_AST.Eq ($1,$3) }
| equality_expr "!=" unary_expr { E_AST.Neq ($1,$3) }
| unary_expr { $1 }
unary_expr:
primary_expr { $1 }
| "!" unary_expr { E_AST.Not $2 }
primary_expr:
"true" { E_AST.True }
| "false" { E_AST.False }
| "<ident>" { E_AST.Ident $1 }
| "(" or_expr ")" { $2 }

View File

@ -1,50 +0,0 @@
%{
(* Grammar for boolean expressions in preprocessing directives of C# *)
%}
%token True False
%token <string> Ident
%token OR AND EQ NEQ NOT EOL LPAR RPAR
(* Entries *)
%start pp_expression
%type <Etree.t> pp_expression
%%
(* Grammar *)
pp_expression:
e=pp_or_expression EOL { e }
pp_or_expression:
e=pp_and_expression { e }
| e1=pp_or_expression OR e2=pp_and_expression {
Etree.Or (e1,e2)
}
pp_and_expression:
e=pp_equality_expression { e }
| e1=pp_and_expression AND e2=pp_unary_expression {
Etree.And (e1,e2)
}
pp_equality_expression:
e=pp_unary_expression { e }
| e1=pp_equality_expression EQ e2=pp_unary_expression {
Etree.Eq (e1,e2)
}
| e1=pp_equality_expression NEQ e2=pp_unary_expression {
Etree.Neq (e1,e2)
}
pp_unary_expression:
e=pp_primary_expression { e }
| NOT e=pp_unary_expression { Etree.Not e }
pp_primary_expression:
True { Etree.True }
| False { Etree.False }
| id=Ident { Etree.Ident id }
| LPAR e=pp_or_expression RPAR { e }

View File

@ -1,31 +0,0 @@
(* This module provides support for managing and printing errors when
preprocessing C# source files. *)
type message = string
type start = Lexing.position
type stop = Lexing.position
type seg = start * stop
let mk_seg buffer =
Lexing.(lexeme_start_p buffer, lexeme_end_p buffer)
type vline = int
exception Lexer of (message * seg * vline)
exception Parser of (message * seg * vline)
let print (kind: string) (msg, (start, stop), vend) =
let open Lexing in
let delta = vend - stop.pos_lnum in
let vstart = start.pos_lnum + delta
in assert (msg <> "");
prerr_endline
((if kind = "" then msg else kind) ^ " error at line "
^ string_of_int vstart ^ ", char "
^ string_of_int (start.pos_cnum - start.pos_bol)
^ (if stop.pos_lnum = start.pos_lnum
then "--" ^ string_of_int (stop.pos_cnum - stop.pos_bol)
else " to line " ^ string_of_int vend
^ ", char "
^ string_of_int (stop.pos_cnum - stop.pos_bol))
^ (if kind = "" then "." else ":\n" ^ msg))

View File

@ -1,95 +0,0 @@
{
(* Auxiliary scanner for boolean expressions of the C# preprocessor *)
(* Concrete syntax of tokens. See module [Eparser]. *)
let string_of_token =
let open Eparser
in function True -> "true"
| False -> "false"
| Ident id -> id
| OR -> "||"
| AND -> "&&"
| EQ -> "=="
| NEQ -> "!="
| NOT -> "!"
| LPAR -> "("
| RPAR -> ")"
| EOL -> "EOL"
}
(* Regular expressions for literals *)
(* White space *)
let newline = '\n' | '\r' | "\r\n"
let blank = ' ' | '\t'
(* Unicode escape sequences *)
let digit = ['0'-'9']
let hexdigit = digit | ['A'-'F' 'a'-'f']
let four_hex = hexdigit hexdigit hexdigit hexdigit
let uni_esc = "\\u" four_hex | "\\U" four_hex four_hex
(* Identifiers *)
let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']
let letter = lowercase | uppercase | uni_esc
let start = '_' | letter
let alphanum = letter | digit | '_'
let ident = start alphanum*
(* Rules *)
rule token = parse
blank+ { token lexbuf }
| newline { Lexing.new_line lexbuf; Eparser.EOL }
| eof { Eparser.EOL }
| "true" { Eparser.True }
| "false" { Eparser.False }
| ident as id { Eparser.Ident id }
| '(' { Eparser.LPAR }
| ')' { Eparser.RPAR }
| "||" { Eparser.OR }
| "&&" { Eparser.AND }
| "==" { Eparser.EQ }
| "!=" { Eparser.NEQ }
| "!" { Eparser.NOT }
| "//" { inline_com lexbuf }
| _ as c { let code = Char.code c in
let msg = "Invalid character " ^ String.make 1 c
^ " (" ^ string_of_int code ^ ")."
in raise Error.(Lexer (msg, mk_seg lexbuf, 1))
}
and inline_com = parse
newline { Lexing.new_line lexbuf; Eparser.EOL }
| eof { Eparser.EOL }
| _ { inline_com lexbuf }
{
(* Standalone lexer for debugging purposes. See module [Topexp]. *)
type filename = string
let trace (name: filename) =
match open_in name with
cin ->
let buffer = Lexing.from_channel cin
and cout = stdout in
let rec iter () =
match token buffer with
Eparser.EOL -> close_in cin; close_out cout
| t -> begin
output_string cout (string_of_token t);
output_string cout "\n";
flush cout;
iter ()
end
| exception Error.Lexer diag -> Error.print "Lexical" diag
in iter ()
| exception Sys_error msg -> prerr_endline msg
}

105
vendors/Preproc/EvalOpt.ml vendored Normal file
View File

@ -0,0 +1,105 @@
(* Parsing command-line options *)
(* The type [options] gathers the command-line options. *)
type language = PascaLIGO | CameLIGO | ReasonLIGO
type options = <
input : string;
libs : string list;
lang : language;
offsets : bool
>
let make ~input ~libs ~lang ~offsets =
object
method input = input
method libs = libs
method lang = lang
method offsets = offsets
end
(* Auxiliary functions and modules *)
let printf = Printf.printf
let sprintf = Printf.sprintf
let print = print_endline
(* Printing a string in red to standard error *)
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
(* Failure *)
let abort msg =
highlight (sprintf "Command-line error: %s\n" msg); exit 1
(* Help *)
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] <input>\n" file;
printf "where <input> is the source file,\n";
print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)";
print " --columns Columns for source locations";
print " -h, --help This help";
exit 0
(* Specifying the command-line options a la GNU *)
let input = ref None
and libs = ref []
and lang = ref None
and columns = ref false
let split_at_colon = Str.(split (regexp ":"))
let add_path p = libs := !libs @ split_at_colon p
let specs =
let open!Getopt in [
'I', nolong, None, Some add_path;
'h', "help", Some help, None;
noshort, "columns", set columns true, None
]
(* Handler of anonymous arguments *)
let anonymous arg =
match !input with
None ->
(match Filename.extension arg with
".ligo" -> lang := Some PascaLIGO
| ".mligo" -> lang := Some CameLIGO
| ".religo" -> lang := Some ReasonLIGO
| _ -> abort (sprintf "Wrong file extension."));
input := Some arg
| Some _ -> abort (sprintf "Multiple inputs.")
(* Checking options and exporting them as non-mutable values *)
let check () =
let libs = !libs
and offsets = not !columns
and lang =
match !lang with
Some lang -> lang
| None -> assert false
and input =
match !input with
Some file -> file
| None -> abort "Missing input file."
in make ~input ~libs ~lang ~offsets
(* Parsing the command-line options *)
let read () =
try
Getopt.parse_cmdline specs anonymous;
check ()
with Getopt.Error msg -> abort msg

25
vendors/Preproc/EvalOpt.mli vendored Normal file
View File

@ -0,0 +1,25 @@
(* Parsing the command-line options of the LIGO preprocessor *)
(* The type [options] gathers the command-line options. *)
type language = PascaLIGO | CameLIGO | ReasonLIGO
type options = <
input : string;
libs : string list;
lang : language;
offsets : bool
>
val make :
input:string ->
libs:string list ->
lang:language ->
offsets:bool ->
options
(* Parsing the command-line options on stdin. The first parameter is
the name of the concrete syntax. This is needed to correctly handle
comments. *)
val read : unit -> options

View File

@ -1,6 +1,13 @@
(* Preprocessor for C#, to be processed by [ocamllex]. *) (* Simple preprocessor based on C#, to be processed by [ocamllex]. *)
{ {
(* START HEADER *)
module Region = Simple_utils.Region
(*module Pos = Simple_utils.Pos*)
let sprintf = Printf.sprintf
(* STRING PROCESSING *) (* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length (* The value of [mk_str len p] ("make string") is a string of length
@ -25,27 +32,112 @@ let explode s acc =
| i -> s.[i-1] :: push (i-1) | i -> s.[i-1] :: push (i-1)
in push (String.length s) in push (String.length s)
(* ERROR HANDLING *) (* ERRORS *)
let stop msg seg = raise (Error.Lexer (msg, seg,1)) module Error =
let fail msg buffer = stop msg (Error.mk_seg buffer) struct
type t =
Invalid_directive of string
| Directive_inside_line
| Missing_endif
| Invalid_line_indicator of string
| No_line_indicator
| End_line_indicator
| Newline_in_string
| Unterminated_comment
| Unterminated_string
| Dangling_endif
| Unterminated_region_in_conditional
| Dangling_endregion
| Conditional_in_region
| If_follows_elif
| Else_follows_else
| Dangling_else
| Elif_follows_else
| Dangling_elif
| Reserved_symbol of string
| Multiply_defined_symbol of string
| Error_directive of string
| Parse_error
| No_line_comment_or_blank
exception Local_err of Error.message let to_string = function
Invalid_directive name ->
sprintf "Invalid directive \"%s\".\n" name
| Directive_inside_line ->
sprintf "Directive inside a line.\n"
| Missing_endif ->
sprintf "Missing #endif directive.\n"
| Invalid_line_indicator id ->
sprintf "Invalid line indicator \"%s\".\n\
Hint: Try \"default\" or \"hidden\".\n" id
| No_line_indicator ->
sprintf "Missing line indicator.\n"
| End_line_indicator ->
sprintf "Invalid ending of numerical line indicator.\n\
Hint: Try a string, end of line, or a line comment.\n"
| Newline_in_string ->
sprintf "Invalid newline character in string.\n"
| Unterminated_comment ->
sprintf "Unterminated comment.\n"
| Unterminated_string ->
sprintf "Unterminated string.\n\
Hint: Close with double quotes.\n"
| Dangling_endif ->
sprintf "Dangling #endif directive.\n\
Hint: Remove it or add a #if before.\n"
| Unterminated_region_in_conditional ->
sprintf "Unterminated of #region in conditional.\n\
Hint: Close with #endregion before #endif.\n"
| Dangling_endregion ->
sprintf "Dangling #endregion directive.\n\
Hint: Remove it or use #region before.\n"
| Conditional_in_region ->
sprintf "Conditional in region.\n\
Hint: Remove the conditional or the region.\n"
| If_follows_elif ->
sprintf "Directive #if found in a clause #elif.\n"
| Else_follows_else ->
sprintf "Directive #else found in a clause #else.\n"
| Dangling_else ->
sprintf "Directive #else without #if.\n"
| Elif_follows_else ->
sprintf "Directive #elif found in a clause #else.\n"
| Dangling_elif ->
sprintf "Dangling #elif directive.\n\
Hint: Remove it or add a #if before.\n"
| Reserved_symbol sym ->
sprintf "Reserved symbol \"%s\".\n\
Hint: Use another symbol.\n" sym
| Multiply_defined_symbol sym ->
sprintf "Multiply-defined symbol \"%s\".\n\
Hint: Change the name or remove one definition.\n" sym
| Error_directive msg ->
msg ^ "\n"
| Parse_error ->
"Parse error in expression.\n"
| No_line_comment_or_blank ->
"Line comment or whitespace expected.\n"
let handle_err scan buffer = let format ?(offsets=true) Region.{region; value} ~file =
try scan buffer with Local_err msg -> fail msg buffer let msg = to_string value
and reg = region#to_string ~file ~offsets `Byte in
let value = sprintf "Preprocessing error %s:\n%s" reg msg
in Region.{value; region}
end
exception Error of Error.t Region.reg
let mk_reg buffer =
let start = Lexing.lexeme_start_p buffer |> Pos.from_byte
and stop = Lexing.lexeme_end_p buffer |> Pos.from_byte
in Region.make ~start ~stop
let stop value region = raise (Error Region.{region; value})
let fail error buffer = stop error (mk_reg buffer)
(* LEXING ENGINE *) (* LEXING ENGINE *)
(* Copying the current lexeme to [stdout] *)
let copy buffer = print_string (Lexing.lexeme buffer)
(* End of lines *)
let handle_nl buffer = Lexing.new_line buffer; copy buffer
(* C# PREPROCESSOR DIRECTIVES *) (* C# PREPROCESSOR DIRECTIVES *)
(* The type [mode] defines the two scanning modes of the preprocessor: (* The type [mode] defines the two scanning modes of the preprocessor:
@ -64,37 +156,32 @@ type trace = cond list
(* The function [reduce_cond] is called when a #endif directive is (* The function [reduce_cond] is called when a #endif directive is
found, and the trace (see type [trace] above) needs updating. *) found, and the trace (see type [trace] above) needs updating. *)
let rec reduce_cond seg = function let rec reduce_cond reg = function
[] -> stop "Dangling #endif." seg [] -> stop Error.Dangling_endif reg
| If mode::trace -> trace, mode | If mode::trace -> trace, mode
| Region::_ -> stop "Invalid scoping of #region" seg | Region::_ -> stop Error.Unterminated_region_in_conditional reg
| _::trace -> reduce_cond seg trace | _::trace -> reduce_cond reg trace
(* The function [reduce_reg] is called when a #endregion directive is (* The function [reduce_reg] is called when a #endregion directive is
read, and the trace needs updating. *) read, and the trace needs updating. *)
let reduce_reg seg = function let reduce_reg reg = function
[] -> stop "Dangling #endregion." seg [] -> stop Error.Dangling_endregion reg
| Region::trace -> trace | Region::trace -> trace
| _ -> stop "Invalid scoping of #endregion" seg | _ -> stop Error.Conditional_in_region reg
(* The function [extend] is called when encountering conditional (* The function [extend] is called when encountering conditional
directives #if, #else and #elif. As its name suggests, it extends directives #if, #else and #elif. As its name suggests, it extends
the current trace with the current conditional directive, whilst the current trace with the current conditional directive, whilst
performing some validity checks. *) performing some validity checks. *)
let extend seg cond trace = let extend reg cond trace =
match cond, trace with match cond, trace with
If _, Elif _::_ -> If _, Elif _::_ -> stop Error.If_follows_elif reg
stop "Directive #if cannot follow #elif." seg | Else, Else::_ -> stop Error.Else_follows_else reg
| Else, Else::_ -> | Else, [] -> stop Error.Dangling_else reg
stop "Directive #else cannot follow #else." seg | Elif _, Else::_ -> stop Error.Elif_follows_else reg
| Else, [] -> | Elif _, [] -> stop Error.Dangling_elif reg
stop "Dangling #else." seg
| Elif _, Else::_ ->
stop "Directive #elif cannot follow #else." seg
| Elif _, [] ->
stop "Dangling #elif." seg
| _ -> cond::trace | _ -> cond::trace
(* The function [last_mode] seeks the last mode as recorded in the (* The function [last_mode] seeks the last mode as recorded in the
@ -118,32 +205,19 @@ let rec last_mode = function
type offset = Prefix of int | Inline type offset = Prefix of int | Inline
let expand = function
Prefix 0 | Inline -> ()
| Prefix n -> print_string (String.make n ' ')
(* Directives *) (* Directives *)
let directives = [ let directives = [
"if"; "else"; "elif"; "endif"; "define"; "undef"; "if"; "else"; "elif"; "endif"; "define"; "undef";
"error"; "warning"; "line"; "region"; "endregion"; "error"; (*"warning";*) "line"; "region"; "endregion";
"include"] "include"]
(* Environments and preprocessor expressions (* Environments *)
The evaluation of conditional directives may involve symbols whose
value may be defined using #define directives, or undefined by
means of #undef. Therefore, we need to evaluate conditional
expressions in an environment made of a set of defined symbols.
Note that we rely on an external lexer and parser for the
conditional expressions. See modules [Escan] and [Eparser].
*)
module Env = Set.Make (String) module Env = Set.Make (String)
let rec eval env = let rec eval env =
let open Etree let open E_AST
in function in function
Or (e1,e2) -> eval env e1 || eval env e2 Or (e1,e2) -> eval env e1 || eval env e2
| And (e1,e2) -> eval env e1 && eval env e2 | And (e1,e2) -> eval env e1 && eval env e2
@ -154,63 +228,73 @@ in function
| False -> false | False -> false
| Ident id -> Env.mem id env | Ident id -> Env.mem id env
let expr env buffer = (* The type [state] groups the information that needs to be threaded
let tree = Eparser.pp_expression Escan.token buffer along the scanning functions. *)
in if eval env tree then Copy else Skip
type state = {
env : Env.t;
mode : mode;
offset : offset;
trace : trace;
out : Buffer.t;
incl : in_channel list
}
(* Evaluating a preprocessor expression
The evaluation of conditional directives may involve symbols whose
value may be defined using #define directives, or undefined by
means of #undef. Therefore, we need to evaluate conditional
expressions in an environment made of a set of defined symbols.
Note that we rely on an external lexer and parser for the
conditional expressions. See modules [E_Lexer] and [E_Parser].
*)
let expr state buffer =
let ast =
try E_Parser.expr E_Lexer.scan buffer with
E_Parser.Error ->
let region = mk_reg buffer in
let value = Error.Parse_error
in raise (Error Region.{value; region})
in if eval state.env ast then Copy else Skip
(* PRINTING *)
(* Copying the current lexeme to [stdout] *)
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
(* End of lines *)
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
(* Copying a string *)
let print state string = Buffer.add_string state.out string
(* Expanding the offset into whitespace *)
let expand_offset state =
match state.offset with
Prefix 0 | Inline -> ()
| Prefix n -> print state (String.make n ' ')
(* END OF HEADER *) (* END OF HEADER *)
} }
(* REGULAR EXPRESSIONS *) (* REGULAR EXPRESSIONS *)
(* White space *)
let nl = '\n' | '\r' | "\r\n" let nl = '\n' | '\r' | "\r\n"
let blank = ' ' | '\t' let blank = ' ' | '\t'
(* Integers *)
let int_suf = 'U' | 'u' | 'L' | 'l' | "UL" | "Ul" | "uL"
| "ul" | "LU" | "Lu" | "lU" | "lu"
let digit = ['0'-'9'] let digit = ['0'-'9']
let dec = digit+ int_suf? let natural = digit | digit (digit | '_')* digit
let hexdigit = digit | ['A'-'F' 'a'-'f'] let decimal = digit+ '.' digit+
let hex_pre = "0x" | "0X" let small = ['a'-'z']
let hexa = hex_pre hexdigit+ int_suf? let capital = ['A'-'Z']
let integer = dec | hexa let letter = small | capital
let ident = small (letter | '_' | digit)*
(* Unicode escape sequences *)
let four_hex = hexdigit hexdigit hexdigit hexdigit
let uni_esc = "\\u" four_hex | "\\U" four_hex four_hex
(* Identifiers *)
let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']
let letter = lowercase | uppercase | uni_esc
let start = '_' | letter
let alphanum = letter | digit | '_'
let ident = start alphanum*
(* Real *)
let decimal = digit+
let exponent = ['e' 'E'] ['+' '-']? decimal
let real_suf = ['F' 'f' 'D' 'd' 'M' 'm']
let real = (decimal? '.')? decimal exponent? real_suf?
(* Characters *)
let single = [^ '\n' '\r']
let esc = "\\'" | "\\\"" | "\\\\" | "\\0" | "\\a" | "\\b" | "\\f"
| "\\n" | "\\r" | "\\t" | "\\v"
let hex_esc = "\\x" hexdigit hexdigit? hexdigit? hexdigit?
let character = single | esc | hex_esc | uni_esc
let char = "'" character "'"
(* Directives *)
let directive = '#' (blank* as space) (ident as id) let directive = '#' (blank* as space) (ident as id)
(* Rules *) (* Rules *)
@ -229,10 +313,11 @@ let directive = '#' (blank* as space) (ident as id)
character has been read), and [trace] is the stack of conditional character has been read), and [trace] is the stack of conditional
directives read so far. directives read so far.
The first call is [scan Env.empty Copy (Prefix 0) []], meaning that The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix
we start with an empty environment, that copying the input is 0; trace=[]}], meaning that we start with an empty environment,
enabled by default, and that we are at the start of a line and no that copying the input is enabled by default, and that we are at
previous conditional directives have been read yet. the start of a line and no previous conditional directives have
been read yet.
When an "#if" is matched, the trace is extended by the call [extend When an "#if" is matched, the trace is extended by the call [extend
lexbuf (If mode) trace], during the evaluation of which the lexbuf (If mode) trace], during the evaluation of which the
@ -254,7 +339,7 @@ let directive = '#' (blank* as space) (ident as id)
When an "#else" is matched, the trace is extended with [Else], When an "#else" is matched, the trace is extended with [Else],
then, if the directive is not at a wrong offset, the rest of the then, if the directive is not at a wrong offset, the rest of the
line is scanned with [pp_newline]. If we were in copy mode, the new line is scanned with [skip_line]. If we were in copy mode, the new
mode toggles to skipping mode; otherwise, the trace is searched for mode toggles to skipping mode; otherwise, the trace is searched for
the last encountered "#if" of "#elif" and the associated mode is the last encountered "#if" of "#elif" and the associated mode is
restored. restored.
@ -333,72 +418,75 @@ let directive = '#' (blank* as space) (ident as id)
GNU GCC, which always does. GNU GCC, which always does.
*) *)
rule scan env mode offset trace = parse rule scan state = parse
nl { handle_nl lexbuf; nl { proc_nl state lexbuf;
scan env mode (Prefix 0) trace lexbuf } scan {state with offset = Prefix 0} lexbuf }
| blank { match offset with | blank { match state.offset with
Prefix n -> scan env mode (Prefix (n+1)) trace lexbuf Prefix n -> scan {state with offset = Prefix (n+1)} lexbuf
| Inline -> copy lexbuf; | Inline -> copy state lexbuf; scan state lexbuf }
scan env mode Inline trace lexbuf }
| directive { | directive {
if not (List.mem id directives) if not (List.mem id directives)
then fail "Invalid preprocessing directive." lexbuf then fail (Error.Invalid_directive id) lexbuf;
else if offset = Inline if state.offset = Inline
then fail "Directive invalid inside line." lexbuf then fail Error.Directive_inside_line lexbuf;
else let seg = Error.mk_seg lexbuf in let reg = mk_reg lexbuf in
match id with match id with
"include" -> "include" ->
let curr_line = Lexing.(lexbuf.lex_curr_p.pos_lnum) let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
and curr_file = Lexing.(lexbuf.lex_curr_p.pos_fname) and file = Lexing.(lexbuf.lex_curr_p.pos_fname)
|> Filename.basename |> Filename.basename
and incl_file = scan_inclusion lexbuf in and incl_file = scan_inclusion lexbuf in
let incl_buffer = print state (sprintf "# 1 \"%s\" 1\n" incl_file);
open_in incl_file |> Lexing.from_channel in let incl_chan = open_in incl_file in
Printf.printf "# 1 \"%s\" 1\n" incl_file; let state = {state with incl = incl_chan::state.incl} in
cat incl_buffer; cat state (Lexing.from_channel incl_chan);
Printf.printf "# %i \"%s\" 2\n" (curr_line+1) curr_file; print state (sprintf "# %i \"%s\" 2\n" (line+1) file);
scan env mode offset trace lexbuf scan state lexbuf
| "if" -> | "if" ->
let mode' = expr env lexbuf in let mode = expr state lexbuf in
let new_mode = if mode = Copy then mode' else Skip in let mode = if state.mode = Copy then mode else Skip in
let trace' = extend seg (If mode) trace let trace = extend reg (If state.mode) state.trace in
in scan env new_mode (Prefix 0) trace' lexbuf let state = {state with mode; offset = Prefix 0; trace}
in scan state lexbuf
| "else" -> | "else" ->
let () = pp_newline lexbuf in let () = skip_line state lexbuf in
let new_mode = let mode = match state.mode with
if mode = Copy then Skip else last_mode trace in Copy -> Skip
let trace' = extend seg Else trace | Skip -> last_mode state.trace in
in scan env new_mode (Prefix 0) trace' lexbuf let trace = extend reg Else state.trace
in scan {state with mode; offset = Prefix 0; trace} lexbuf
| "elif" -> | "elif" ->
let mode' = expr env lexbuf in let mode = expr state lexbuf in
let trace', new_mode = let trace, mode =
match mode with match state.mode with
Copy -> extend seg (Elif Skip) trace, Skip Copy -> extend reg (Elif Skip) state.trace, Skip
| Skip -> let old_mode = last_mode trace | Skip -> let old_mode = last_mode state.trace
in extend seg (Elif old_mode) trace, in extend reg (Elif old_mode) state.trace,
if old_mode = Copy then mode' else Skip if old_mode = Copy then mode else Skip
in scan env new_mode (Prefix 0) trace' lexbuf in scan {state with mode; offset = Prefix 0; trace} lexbuf
| "endif" -> | "endif" ->
let () = pp_newline lexbuf in let () = skip_line state lexbuf in
let trace', new_mode = reduce_cond seg trace let trace, mode = reduce_cond reg state.trace
in scan env new_mode (Prefix 0) trace' lexbuf in scan {state with mode; offset = Prefix 0; trace} lexbuf
| "define" -> | "define" ->
let id, seg = ident env lexbuf let id, reg = variable state lexbuf in
in if id="true" || id="false" if id="true" || id="false"
then let msg = "Symbol \"" ^ id ^ "\" cannot be defined." then stop (Error.Reserved_symbol id) reg;
in stop msg seg if Env.mem id state.env
else if Env.mem id env then stop (Error.Multiply_defined_symbol id) reg;
then let msg = "Symbol \"" ^ id let state = {state with env = Env.add id state.env;
^ "\" was already defined." offset = Prefix 0}
in stop msg seg in scan state lexbuf
else scan (Env.add id env) mode (Prefix 0) trace lexbuf
| "undef" -> | "undef" ->
let id, _ = ident env lexbuf let id, _ = variable state lexbuf in
in scan (Env.remove id env) mode (Prefix 0) trace lexbuf let state = {state with env = Env.remove id state.env;
offset = Prefix 0}
in scan state lexbuf
| "error" -> | "error" ->
stop (message [] lexbuf) seg stop (Error.Error_directive (message [] lexbuf)) reg
(*
| "warning" -> | "warning" ->
let start_p, end_p = seg in let start_p, end_p = reg in
let msg = message [] lexbuf in let msg = message [] lexbuf in
let open Lexing let open Lexing
in prerr_endline in prerr_endline
@ -408,98 +496,107 @@ rule scan env mode offset trace = parse
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol) ^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
^ ":\n" ^ msg); ^ ":\n" ^ msg);
scan env mode (Prefix 0) trace lexbuf scan env mode (Prefix 0) trace lexbuf
*)
| "region" -> | "region" ->
let msg = message [] lexbuf let msg = message [] lexbuf
in expand offset; in expand_offset state;
print_endline ("#" ^ space ^ "region" ^ msg); print state ("#" ^ space ^ "region" ^ msg ^ "\n");
scan env mode (Prefix 0) (Region::trace) lexbuf let state =
{state with offset = Prefix 0; trace=Region::state.trace}
in scan state lexbuf
| "endregion" -> | "endregion" ->
let msg = message [] lexbuf let msg = message [] lexbuf
in expand offset; in expand_offset state;
print_endline ("#" ^ space ^ "endregion" ^ msg); print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
scan env mode (Prefix 0) (reduce_reg seg trace) lexbuf let state =
{state with offset = Prefix 0;
trace = reduce_reg reg state.trace}
in scan state lexbuf
| "line" -> | "line" ->
expand offset; expand_offset state;
print_string ("#" ^ space ^ "line"); print state ("#" ^ space ^ "line");
line_ind lexbuf; line_ind state lexbuf;
scan env mode (Prefix 0) trace lexbuf scan {state with offset = Prefix 0} lexbuf
| _ -> assert false | _ -> assert false
} }
| eof { match trace with | eof { match state.trace with
[] -> expand offset; flush stdout; (env, trace) [] -> expand_offset state; state
| _ -> fail "Missing #endif." lexbuf } | _ -> fail Error.Missing_endif lexbuf }
| '"' { if mode = Copy then begin | '"' { if state.mode = Copy then
expand offset; copy lexbuf; begin
handle_err in_norm_str lexbuf expand_offset state;
copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf
end; end;
scan env mode Inline trace lexbuf } scan {state with offset=Inline} lexbuf }
| "@\"" { if mode = Copy then begin | "//" { if state.mode = Copy then
expand offset; copy lexbuf; begin
handle_err in_verb_str lexbuf expand_offset state;
copy state lexbuf;
in_line_com state lexbuf
end; end;
scan env mode Inline trace lexbuf } scan {state with offset=Inline} lexbuf }
| "//" { if mode = Copy then begin | "/*" { if state.mode = Copy then
expand offset; copy lexbuf; begin
in_line_com mode lexbuf expand_offset state;
copy state lexbuf;
in_block_com (mk_reg lexbuf) state lexbuf
end; end;
scan env mode Inline trace lexbuf } scan {state with offset=Inline} lexbuf }
| "/*" { if mode = Copy then begin | _ { if state.mode = Copy then
expand offset; copy lexbuf; begin
handle_err in_block_com lexbuf expand_offset state;
copy state lexbuf
end; end;
scan env mode Inline trace lexbuf } scan {state with offset=Inline} lexbuf }
| _ { if mode = Copy then (expand offset; copy lexbuf);
scan env mode Inline trace lexbuf }
(* Support for #define and #undef *) (* Support for #define and #undef *)
and ident env = parse and variable state = parse
blank* { let r = __ident env lexbuf blank* { let id = __ident lexbuf
in pp_newline lexbuf; r } in skip_line state lexbuf; id }
and __ident env = parse and __ident = parse
ident as id { id, Error.mk_seg lexbuf } ident as id { id, mk_reg lexbuf }
(* Line indicator (#line) *) (* Line indicator (#line) *)
and line_ind = parse and line_ind state = parse
blank* as space { print_string space; line_indicator lexbuf } blank* { copy state lexbuf; line_indicator state lexbuf }
and line_indicator = parse and line_indicator state = parse
decimal as ind { decimal { copy state lexbuf; end_indicator state lexbuf }
print_string ind; | nl | eof { fail Error.No_line_indicator lexbuf }
end_indicator lexbuf
}
| ident as id { | ident as id {
match id with match id with
"default" | "hidden" -> "default" | "hidden" ->
print_endline (id ^ message [] lexbuf) print state (id ^ message [] lexbuf)
| _ -> fail "Invalid line indicator." lexbuf | _ -> fail (Error.Invalid_line_indicator id) lexbuf }
}
| nl | eof { fail "Line indicator expected." lexbuf }
and end_indicator = parse and end_indicator state = parse
blank* nl { copy lexbuf; handle_nl lexbuf } blank+ { copy state lexbuf; end_indicator state lexbuf }
| blank* eof { copy lexbuf } | nl { copy state lexbuf; proc_nl state lexbuf }
| blank* "//" { copy lexbuf; print_endline (message [] lexbuf) } | eof { copy state lexbuf }
| blank+ '"' { copy lexbuf; | "//" { copy state lexbuf;
handle_err in_norm_str lexbuf; print state (message [] lexbuf ^ "\n") }
opt_line_com lexbuf } | '"' { copy state lexbuf;
| _ { fail "Line comment or blank expected." lexbuf } in_string (mk_reg lexbuf) state lexbuf;
opt_line_com state lexbuf }
| _ { fail Error.End_line_indicator lexbuf }
and opt_line_com = parse and opt_line_com state = parse
nl { handle_nl lexbuf } nl { proc_nl state lexbuf }
| eof { copy lexbuf } | eof { copy state lexbuf }
| blank+ { copy lexbuf; opt_line_com lexbuf } | blank+ { copy state lexbuf; opt_line_com state lexbuf }
| "//" { print_endline ("//" ^ message [] lexbuf) } | "//" { print state ("//" ^ message [] lexbuf) }
(* New lines and verbatim sequence of characters *) (* New lines and verbatim sequence of characters *)
and pp_newline = parse and skip_line state = parse
nl { handle_nl lexbuf } nl { proc_nl state lexbuf }
| blank+ { pp_newline lexbuf } | blank+ { skip_line state lexbuf }
| "//" { in_line_com Skip lexbuf } | "//" { in_line_com {state with mode=Skip} lexbuf }
| _ { fail "Only a single-line comment allowed." lexbuf } | _ { fail Error.No_line_comment_or_blank lexbuf }
and message acc = parse and message acc = parse
nl { Lexing.new_line lexbuf; nl { Lexing.new_line lexbuf;
@ -509,50 +606,45 @@ and message acc = parse
(* Comments *) (* Comments *)
and in_line_com mode = parse and in_line_com state = parse
nl { handle_nl lexbuf } nl { proc_nl state lexbuf }
| eof { flush stdout } | eof { () }
| _ { if mode = Copy then copy lexbuf; in_line_com mode lexbuf } | _ { if state.mode = Copy then copy state lexbuf;
in_line_com state lexbuf }
and in_block_com = parse and in_block_com opening state = parse
nl { handle_nl lexbuf; in_block_com lexbuf } nl { proc_nl state lexbuf; in_block_com opening state lexbuf }
| "*/" { copy lexbuf } | "*/" { copy state lexbuf }
| eof { raise (Local_err "Unterminated comment.") } | eof { stop Error.Unterminated_comment opening }
| _ { copy lexbuf; in_block_com lexbuf } | _ { copy state lexbuf; in_block_com opening state lexbuf }
(* Include a file *) (* Include a file *)
and cat = parse and cat state = parse
eof { () } eof { () }
| _ { copy lexbuf; cat lexbuf } | _ { copy state lexbuf; cat state lexbuf }
(* Included filename *) (* Included filename *)
and scan_inclusion = parse and scan_inclusion = parse
blank+ { scan_inclusion lexbuf } blank+ { scan_inclusion lexbuf }
| '"' { handle_err (in_inclusion [] 0) lexbuf } | '"' { in_inclusion (mk_reg lexbuf) [] 0 lexbuf }
and in_inclusion acc len = parse and in_inclusion opening acc len = parse
'"' { mk_str len acc } '"' { mk_str len acc }
| nl { fail "Newline invalid in string." lexbuf } | nl { fail Error.Newline_in_string lexbuf }
| eof { raise (Local_err "Unterminated string.") } | eof { stop Error.Unterminated_string opening }
| _ as c { in_inclusion (c::acc) (len+1) lexbuf } | _ as c { in_inclusion opening (c::acc) (len+1) lexbuf }
(* Strings *) (* Strings *)
and in_norm_str = parse and in_string opening state = parse
"\\\"" { copy lexbuf; in_norm_str lexbuf } "\\\"" { copy state lexbuf; in_string opening state lexbuf }
| '"' { copy lexbuf } | '"' { copy state lexbuf }
| nl { fail "Newline invalid in string." lexbuf } | nl { fail Error.Newline_in_string lexbuf }
| eof { raise (Local_err "Unterminated string.") } | eof { stop Error.Unterminated_string opening }
| _ { copy lexbuf; in_norm_str lexbuf } | _ { copy state lexbuf; in_string opening state lexbuf }
and in_verb_str = parse
"\"\"" { copy lexbuf; in_verb_str lexbuf }
| '"' { copy lexbuf }
| nl { handle_nl lexbuf; in_verb_str lexbuf }
| eof { raise (Local_err "Unterminated string.") }
| _ { copy lexbuf; in_verb_str lexbuf }
{ {
(* The function [lex] is a wrapper of [scan], which also checks that (* The function [lex] is a wrapper of [scan], which also checks that
@ -560,26 +652,38 @@ and in_verb_str = parse
environment at the end. *) environment at the end. *)
let lex buffer = let lex buffer =
let _env, trace = scan Env.empty Copy (Prefix 0) [] buffer let state = {
in assert (trace = []) env = Env.empty;
mode = Copy;
offset = Prefix 0;
trace = [];
out = Buffer.create 80;
incl = []
} in
let state = scan state buffer
in ()
(* Exported definitions *) (* Exported definitions *)
(*
type filename = string let trace options : unit =
match open_in options#input with
let trace (name: filename) : unit =
match open_in name with
cin -> cin ->
let open Lexing in let open Lexing in
let buffer = from_channel cin in let buffer = from_channel cin in
let pos_fname = Filename.basename name in let pos_fname = Filename.basename options#input in
let () = buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname} in let () = buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname}
let open Error
in (try lex buffer with in (try lex buffer with
Lexer diag -> print "Lexical" diag Error err ->
| Parser diag -> print "Syntactical" diag let msg =
| Eparser.Error -> print "" ("Parse", mk_seg buffer, 1)); Error.format ~offsets:options#offsets err ~file:options#input
close_in cin; flush stdout in prerr_endline msg
| E_Lexer.Error err ->
let msg =
E_Lexer.Error.format ~offsets:options#offsets
err ~file:options#input
in prerr_endline msg
| Sys_error msg -> prerr_endline msg);
close_in cin
| exception Sys_error msg -> prerr_endline msg | exception Sys_error msg -> prerr_endline msg
*)
} }

View File

@ -1,5 +1,5 @@
(* This is the entry point of the C# preprocessor. See [Makefile.cfg]. *) (* This is the entry point of the C# preprocessor. See [Makefile.cfg]. *)
match Array.length Sys.argv with let options = EvalOpt.read ()
2 -> Preproc.trace Sys.argv.(1)
| _ -> prerr_endline ("Usage: " ^ Sys.argv.(0) ^ " [file]") let () = Preproc.trace options

View File

@ -3,6 +3,8 @@ set -x
ocamllex.opt Escan.mll ocamllex.opt Escan.mll
ocamllex.opt Preproc.mll ocamllex.opt Preproc.mll
menhir -la 1 Eparser.mly menhir -la 1 Eparser.mly
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c EvalOpt.mli
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c EvalOpt.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Etree.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Etree.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Error.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Error.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Etree.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Etree.ml
@ -17,7 +19,7 @@ ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Escan.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Preproc.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c Preproc.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c EMain.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c EMain.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c EMain.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c EMain.ml
ocamlfind ocamlopt -o EMain.opt Etree.cmx Eparser.cmx Error.cmx Escan.cmx Preproc.cmx EMain.cmx ocamlfind ocamlopt -o EMain.opt EvalOpt.cmx Etree.cmx Eparser.cmx Error.cmx Escan.cmx Preproc.cmx EMain.cmx
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c ProcMain.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c ProcMain.ml
ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c ProcMain.ml ocamlfind ocamlopt -strict-sequence -w +A-48-4 -c ProcMain.ml
ocamlfind ocamlopt -o ProcMain.opt Etree.cmx Eparser.cmx Error.cmx Escan.cmx Preproc.cmx ProcMain.cmx ocamlfind ocamlopt -o ProcMain.opt EvalOpt.cmx Etree.cmx Eparser.cmx Error.cmx Escan.cmx Preproc.cmx ProcMain.cmx

15
vendors/Preproc/dune vendored
View File

@ -1,13 +1,16 @@
(ocamllex Escan Preproc) (ocamllex E_Lexer Preproc)
(menhir (menhir
(modules Eparser)) (modules E_Parser))
(library (library
(name PreProc) (name PreProc)
; (public_name ligo.preproc) (public_name ligo.preproc)
(libraries
getopt
simple-utils)
(wrapped false) (wrapped false)
(modules Eparser Error Escan Etree Preproc)) (modules E_Parser E_Lexer E_AST Preproc))
(test (test
(modules ProcMain) (modules ProcMain)
@ -15,6 +18,6 @@
(name ProcMain)) (name ProcMain))
(test (test
(modules EMain) (modules E_Main)
(libraries PreProc) (libraries PreProc)
(name EMain)) (name E_Main))