Parsing the command line by calling [EvalOpt.read], not a side-effect.

This commit is contained in:
Christian Rinderknecht 2019-07-24 15:41:52 +02:00
parent d2f4d00011
commit dc4f14d469
6 changed files with 214 additions and 167 deletions

View File

@ -1,8 +1,26 @@
(* Parsing the command-line option for testing the LIGO lexer and (* Parsing the command-line options of PascaLIGO *)
parser *)
(* The type [command] denotes some possible behaviours of the
compiler. *)
type command = Quiet | Copy | Units | Tokens
(* The type [options] gathers the command-line options. *)
type options = {
input : string option;
libs : string list;
verbose : Utils.String.Set.t;
offsets : bool;
mode : [`Byte | `Point];
cmd : command
}
(* Auxiliary functions *)
let printf = Printf.printf let printf = Printf.printf
let sprintf = Printf.sprintf let sprintf = Printf.sprintf
let print = print_endline
let abort msg = let abort msg =
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1 Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
@ -12,18 +30,18 @@ let abort msg =
let help () = let help () =
let file = Filename.basename Sys.argv.(0) in let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file; printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file;
print_endline "where <input>.ligo is the LIGO source file (default: stdin),"; print "where <input>.ligo is the PascaLIGO source file (default: stdin),";
print_endline "and each <option> (if any) is one of the following:"; print "and each <option> (if any) is one of the following:";
print_endline " -I <paths> Library paths (colon-separated)"; print " -I <paths> Library paths (colon-separated)";
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)"; print " -c, --copy Print lexemes of tokens and markup (lexer)";
print_endline " -t, --tokens Print tokens (lexer)"; print " -t, --tokens Print tokens (lexer)";
print_endline " -u, --units Print tokens and markup (lexer)"; print " -u, --units Print tokens and markup (lexer)";
print_endline " -q, --quiet No output, except errors (default)"; print " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations"; print " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations"; print " --bytes Bytes for source locations";
print_endline " --verbose=<stages> cmdline, cpp, ast (colon-separated)"; print " --verbose=<stages> cmdline, cpp, ast (colon-separated)";
print_endline " --version Commit hash on stdout"; print " --version Commit hash on stdout";
print_endline " -h, --help This help"; print " -h, --help This help";
exit 0 exit 0
(* Version *) (* Version *)
@ -32,15 +50,16 @@ let version () = printf "%s\n" Version.version; exit 0
(* Specifying the command-line options a la GNU *) (* Specifying the command-line options a la GNU *)
let copy = ref false let copy = ref false
and tokens = ref false and tokens = ref false
and units = ref false and units = ref false
and quiet = ref false and quiet = ref false
and columns = ref false and columns = ref false
and bytes = ref false and bytes = ref false
and verbose = ref Utils.String.Set.empty and verbose = ref Utils.String.Set.empty
and input = ref None and input = ref None
and libs = ref [] and libs = ref []
and verb_str = ref ""
let split_at_colon = Str.(split (regexp ":")) let split_at_colon = Str.(split (regexp ":"))
@ -71,28 +90,12 @@ let specs =
let anonymous arg = let anonymous arg =
match !input with match !input with
None -> input := Some arg None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs") | Some s -> Printf.printf "s=%s\n" s;
;; abort (sprintf "Multiple inputs")
(* Parsing the command-line options *)
try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
;; ;;
(* Checking options and exporting them as non-mutable values *) (* Checking options and exporting them as non-mutable values *)
type command = Quiet | Copy | Units | Tokens
let cmd =
match !quiet, !copy, !units, !tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
let string_of convert = function let string_of convert = function
None -> "None" None -> "None"
| Some s -> sprintf "Some %s" (convert s) | Some s -> sprintf "Some %s" (convert s)
@ -103,11 +106,6 @@ let string_of_path p =
let quote s = sprintf "\"%s\"" s let quote s = sprintf "\"%s\"" s
let verbose_str =
let apply e a =
if a <> "" then sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () = let print_opt () =
printf "COMMAND LINE\n"; printf "COMMAND LINE\n";
printf "copy = %b\n" !copy; printf "copy = %b\n" !copy;
@ -116,46 +114,70 @@ let print_opt () =
printf "quiet = %b\n" !quiet; printf "quiet = %b\n" !quiet;
printf "columns = %b\n" !columns; printf "columns = %b\n" !columns;
printf "bytes = %b\n" !bytes; printf "bytes = %b\n" !bytes;
printf "verbose = \"%s\"\n" verbose_str; printf "verbose = %s\n" !verb_str;
printf "input = %s\n" (string_of quote !input); printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs) printf "libs = %s\n" (string_of_path !libs)
;; ;;
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();; let check () =
let () =
if Utils.String.Set.mem "cmdline" !verbose then print_opt () in
let input = let input =
match !input with match !input with
None | Some "-" -> !input None | Some "-" -> !input
| Some file_path -> | Some file_path ->
if Filename.check_suffix file_path ".ligo" if Filename.check_suffix file_path ".ligo"
then if Sys.file_exists file_path then if Sys.file_exists file_path
then Some file_path then Some file_path
else abort "Source file not found." else abort "Source file not found."
else abort "Source file lacks the extension .ligo." else abort "Source file lacks the extension .ligo." in
(* Exporting remaining options as non-mutable values *) (* Exporting remaining options as non-mutable values *)
let copy = !copy let copy = !copy
and tokens = !tokens and tokens = !tokens
and units = !units and units = !units
and quiet = !quiet and quiet = !quiet
and offsets = not !columns and offsets = not !columns
and mode = if !bytes then `Byte else `Point and mode = if !bytes then `Byte else `Point
and verbose = !verbose and verbose = !verbose
and libs = !libs and libs = !libs in
;;
if Utils.String.Set.mem "cmdline" verbose then let () =
begin if Utils.String.Set.mem "cmdline" verbose then
printf "\nEXPORTED COMMAND LINE\n"; begin
printf "copy = %b\n" copy; printf "\nEXPORTED COMMAND LINE\n";
printf "tokens = %b\n" tokens; printf "copy = %b\n" copy;
printf "units = %b\n" units; printf "tokens = %b\n" tokens;
printf "quiet = %b\n" quiet; printf "units = %b\n" units;
printf "offsets = %b\n" offsets; printf "quiet = %b\n" quiet;
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point"); printf "offsets = %b\n" offsets;
printf "verbose = \"%s\"\n" verbose_str; printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
printf "input = %s\n" (string_of quote input); printf "verbose = %s\n" !verb_str;
printf "I = %s\n" (string_of_path libs) printf "input = %s\n" (string_of quote input);
end printf "libs = %s\n" (string_of_path libs)
;; end in
let cmd =
match quiet, copy, units, tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
in {input; libs; verbose; offsets; mode; cmd}
(* Parsing the command-line options *)
let read () =
try
Getopt.parse_cmdline specs anonymous;
(verb_str :=
let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose "");
check ()
with Getopt.Error msg -> abort msg

View File

@ -1,35 +1,8 @@
(* Parsing the command-line option for testing the LIGO lexer and (* Parsing the command-line options of PascaLIGO *)
parser *)
(* If the value [offsets] is [true], then the user requested that (* The type [command] denotes some possible behaviours of the
messages about source positions and regions be expressed in terms compiler. The constructors are
of horizontal offsets. *)
val offsets : bool
(* If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points. *)
val mode : [`Byte | `Point]
(* If the option [verbose] is set to a list of predefined stages of
the compiler chain, then more information may be displayed about
those stages. *)
val verbose : Utils.String.Set.t
(* If the value [input] is [None] or [Some "-"], the input is standard
input. If [Some f], then the input is the file whose name (file
path) is [f]. *)
val input : string option
(* Paths where to find LIGO files for inclusion *)
val libs : string list
(* If the value [cmd] is
* [Quiet], then no output from the lexer and parser should be * [Quiet], then no output from the lexer and parser should be
expected, safe error messages: this is the default value; expected, safe error messages: this is the default value;
* [Copy], then lexemes of tokens and markup will be printed to * [Copy], then lexemes of tokens and markup will be printed to
@ -43,4 +16,37 @@ val libs : string list
type command = Quiet | Copy | Units | Tokens type command = Quiet | Copy | Units | Tokens
val cmd : command (* The type [options] gathers the command-line options.
If the field [input] is [Some src], the name of the PascaLIGO
source file, with the extension ".ligo", is [src]. If [input] is
[Some "-"] or [None], the source file is read from standard input.
The field [libs] is the paths where to find PascaLIGO files for
inclusion (#include).
The field [verbose] is a set of stages of the compiler chain,
about which more information may be displayed.
If the field [offsets] is [true], then the user requested that
messages about source positions and regions be expressed in terms
of horizontal offsets.
If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points.
*)
type options = {
input : string option;
libs : string list;
verbose : Utils.String.Set.t;
offsets : bool;
mode : [`Byte | `Point];
cmd : command
}
(* Parsing the command-line options on stdin *)
val read : unit -> options

View File

@ -6,7 +6,7 @@
early reject potentially misleading or poorly written early reject potentially misleading or poorly written
LIGO contracts; LIGO contracts;
(2) provide precise error messages with hint as how to fix the (2) provide precise error messages with hints as how to fix the
issue, which is achieved by consulting the lexical issue, which is achieved by consulting the lexical
right-context of lexemes; right-context of lexemes;
@ -15,23 +15,24 @@
specification: this is achieved by using the most general specification: this is achieved by using the most general
regular expressions to match the lexing buffer and broadly regular expressions to match the lexing buffer and broadly
distinguish the syntactic categories, and then delegating a distinguish the syntactic categories, and then delegating a
finer, protocol-dependent, second analysis to an external finer, second analysis to an external module making the
module making the tokens (hence a functor below); tokens (hence a functor below);
(4) support unit testing (lexing of the whole input with debug (4) support unit testing (lexing of the whole input with debug
traces); traces).
The limitation to the protocol independence lies in the errors that A limitation to the independence with respect to the LIGO version
the external module building the tokens (which is lies in the errors that the external module building the tokens
protocol-dependent) may have to report. Indeed these errors have to (which may be version-dependent) may have to report. Indeed these
be contextualised by the lexer in terms of input source regions, so errors have to be contextualised by the lexer in terms of input
useful error messages can be printed, therefore they are part of source regions, so useful error messages can be printed, therefore
the signature [TOKEN] that parameterise the functor generated they are part of the signature [TOKEN] that parameterises the
here. For instance, if, in a future release of LIGO, new tokens may functor generated here. For instance, if, in a future release of
be added, and the recognition of their lexemes may entail new LIGO, new tokens may be added, and the recognition of their lexemes
errors, the signature [TOKEN] will have to be augmented and the may entail new errors, the signature [TOKEN] will have to be
lexer specification changed. However, it is more likely that augmented and the lexer specification changed. However, in
instructions or types are added, instead of new kinds of tokens. practice, it is more likely that instructions or types are added,
instead of new kinds of tokens.
*) *)
module Region = Simple_utils.Region module Region = Simple_utils.Region
@ -96,7 +97,7 @@ module type TOKEN =
[open_token_stream], which returns [open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer, * a function [read] that extracts tokens from a lexing buffer,
* together with a lexing buffer [buffer] to read from, together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer, * a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and * a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last * a function [get_last] that returns the region of the last
@ -105,12 +106,11 @@ module type TOKEN =
Note that a module [Token] is exported too, because the signature Note that a module [Token] is exported too, because the signature
of the exported functions depend on it. of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or The call [read ~log] evaluates in a lexer (also known as a
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
parser generated by Menhir. suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
The argument labelled [log] is a logger. It may print a token and markup to a given channel, at the caller's discretion.
its left markup to a given channel, at the caller's discretion.
*) *)
module type S = module type S =
@ -135,7 +135,8 @@ module type S =
exception Error of Error.t Region.reg exception Error of Error.t Region.reg
val print_error : ?offsets:bool -> [`Byte | `Point] -> val print_error :
?offsets:bool -> [`Byte | `Point] ->
Error.t Region.reg -> file:bool -> unit Error.t Region.reg -> file:bool -> unit
end end

View File

@ -1,12 +1,7 @@
(* Standalone lexer for debugging purposes *) (* Embedding the lexer of PascaLIGO in a debug module *)
let sprintf = Printf.sprintf let sprintf = Printf.sprintf
let file =
match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true
module type S = module type S =
sig sig
module Lexer : Lexer.S module Lexer : Lexer.S
@ -39,7 +34,8 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
let output_nl str = output (str ^ "\n") in let output_nl str = output (str ^ "\n") in
match command with match command with
EvalOpt.Quiet -> () EvalOpt.Quiet -> ()
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl | EvalOpt.Tokens ->
Token.to_string token ~offsets mode |> output_nl
| EvalOpt.Copy -> | EvalOpt.Copy ->
let lexeme = Token.to_lexeme token let lexeme = Token.to_lexeme token
and apply acc markup = Markup.to_lexeme markup :: acc and apply acc markup = Markup.to_lexeme markup :: acc
@ -67,6 +63,10 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
if Token.is_eof token then close_all () if Token.is_eof token then close_all ()
else iter () else iter ()
| exception Lexer.Error e -> | exception Lexer.Error e ->
let file =
match file_path_opt with
None | Some "-" -> false
| Some _ -> true in
Lexer.print_error ~offsets mode e ~file; Lexer.print_error ~offsets mode e ~file;
close_all () close_all ()
in iter () in iter ()

View File

@ -1,18 +1,22 @@
(* Driver for the lexer of LIGO *) (* Driver for the lexer of PascaLIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
(* Error printing and exception tracing *) (* Error printing and exception tracing *)
let () = Printexc.record_backtrace true let () = Printexc.record_backtrace true
(* Running the lexer on the source *)
let options = EvalOpt.read ()
open EvalOpt
let external_ text = let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;; Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Path for CPP inclusions (#include) *) (* Path for CPP inclusions (#include) *)
let lib_path = let lib_path =
match EvalOpt.libs with match options.libs with
[] -> "" [] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs "" in List.fold_right mk_I libs ""
@ -20,20 +24,20 @@ let lib_path =
(* Preprocessing the input source and opening the input channels *) (* Preprocessing the input source and opening the input channels *)
let prefix = let prefix =
match EvalOpt.input with match options.input with
None | Some "-" -> "temp" None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension) | Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.ligo" let suffix = ".pp.ligo"
let pp_input = let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose if Utils.String.Set.mem "cpp" options.verbose
then prefix ^ suffix then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input in close_out pp_out; pp_input
let cpp_cmd = let cpp_cmd =
match EvalOpt.input with match options.input with
None | Some "-" -> None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - > %s" Printf.sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input lib_path pp_input
@ -42,7 +46,7 @@ let cpp_cmd =
lib_path file pp_input lib_path file pp_input
let () = let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose if Utils.String.Set.mem "cpp" options.verbose
then Printf.eprintf "%s\n%!" cpp_cmd; then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
@ -53,5 +57,5 @@ module Lexer = Lexer.Make (LexToken)
module Log = LexerLog.Make (Lexer) module Log = LexerLog.Make (Lexer)
let () = Log.trace ~offsets:EvalOpt.offsets let () = Log.trace ~offsets:options.offsets
EvalOpt.mode (Some pp_input) EvalOpt.cmd options.mode (Some pp_input) options.cmd

View File

@ -1,11 +1,23 @@
(* Driver for the parser of LIGO *) (* Driver for the parser of PascaLIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *) (* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
(* Reading the command-line options *)
let options = EvalOpt.read ()
open EvalOpt
(* Auxiliary functions *)
let sprintf = Printf.sprintf let sprintf = Printf.sprintf
(* Extracting the input file *)
let file = let file =
match EvalOpt.input with match options.input with
None | Some "-" -> false None | Some "-" -> false
| Some _ -> true | Some _ -> true
@ -30,7 +42,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file =
(* Path for CPP inclusions (#include) *) (* Path for CPP inclusions (#include) *)
let lib_path = let lib_path =
match EvalOpt.libs with match options.libs with
[] -> "" [] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs "" in List.fold_right mk_I libs ""
@ -38,20 +50,20 @@ let lib_path =
(* Preprocessing the input source and opening the input channels *) (* Preprocessing the input source and opening the input channels *)
let prefix = let prefix =
match EvalOpt.input with match options.input with
None | Some "-" -> "temp" None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension) | Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.ligo" let suffix = ".pp.ligo"
let pp_input = let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose if Utils.String.Set.mem "cpp" options.verbose
then prefix ^ suffix then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input in close_out pp_out; pp_input
let cpp_cmd = let cpp_cmd =
match EvalOpt.input with match options.input with
None | Some "-" -> None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - > %s" Printf.sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input lib_path pp_input
@ -60,7 +72,7 @@ let cpp_cmd =
lib_path file pp_input lib_path file pp_input
let () = let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose if Utils.String.Set.mem "cpp" options.verbose
then Printf.eprintf "%s\n%!" cpp_cmd; then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd) external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
@ -76,8 +88,8 @@ let Lexer.{read; buffer; get_pos; get_last; close} =
and cout = stdout and cout = stdout
let log = Log.output_token ~offsets:EvalOpt.offsets let log = Log.output_token ~offsets:options.offsets
EvalOpt.mode EvalOpt.cmd cout options.mode options.cmd cout
and close_all () = close (); close_out cout and close_all () = close (); close_out cout
@ -90,19 +102,21 @@ let tokeniser = read ~log
let () = let () =
try try
let ast = Parser.contract tokeniser buffer in let ast = Parser.contract tokeniser buffer in
if Utils.String.Set.mem "ast" EvalOpt.verbose if Utils.String.Set.mem "ast" options.verbose
then begin then begin
ParserLog.offsets := EvalOpt.offsets; ParserLog.offsets := options.offsets;
ParserLog.mode := EvalOpt.mode; ParserLog.mode := options.mode;
ParserLog.print_tokens ast ParserLog.print_tokens ast
end end
with with
Lexer.Error err -> Lexer.Error err ->
close_all (); close_all ();
Lexer.print_error ~offsets EvalOpt.mode err ~file Lexer.print_error ~offsets:options.offsets
options.mode err ~file
| Parser.Error -> | Parser.Error ->
let region = get_last () in let region = get_last () in
let error = Region.{region; value=ParseError} in let error = Region.{region; value=ParseError} in
let () = close_all () in let () = close_all () in
print_error ~offsets EvalOpt.mode error ~file print_error ~offsets:options.offsets
options.mode error ~file
| Sys_error msg -> Utils.highlight msg | Sys_error msg -> Utils.highlight msg