Parsing the command line by calling [EvalOpt.read], not a side-effect.

This commit is contained in:
Christian Rinderknecht 2019-07-24 15:41:52 +02:00
parent d2f4d00011
commit dc4f14d469
6 changed files with 214 additions and 167 deletions

View File

@ -1,8 +1,26 @@
(* Parsing the command-line option for testing the LIGO lexer and
parser *)
(* Parsing the command-line options of PascaLIGO *)
(* The type [command] denotes some possible behaviours of the
compiler. *)
type command = Quiet | Copy | Units | Tokens
(* The type [options] gathers the command-line options. *)
type options = {
input : string option;
libs : string list;
verbose : Utils.String.Set.t;
offsets : bool;
mode : [`Byte | `Point];
cmd : command
}
(* Auxiliary functions *)
let printf = Printf.printf
let sprintf = Printf.sprintf
let print = print_endline
let abort msg =
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
@ -12,18 +30,18 @@ let abort msg =
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file;
print_endline "where <input>.ligo is the LIGO source file (default: stdin),";
print_endline "and each <option> (if any) is one of the following:";
print_endline " -I <paths> Library paths (colon-separated)";
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)";
print_endline " -t, --tokens Print tokens (lexer)";
print_endline " -u, --units Print tokens and markup (lexer)";
print_endline " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations";
print_endline " --verbose=<stages> cmdline, cpp, ast (colon-separated)";
print_endline " --version Commit hash on stdout";
print_endline " -h, --help This help";
print "where <input>.ligo is the PascaLIGO source file (default: stdin),";
print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)";
print " -c, --copy Print lexemes of tokens and markup (lexer)";
print " -t, --tokens Print tokens (lexer)";
print " -u, --units Print tokens and markup (lexer)";
print " -q, --quiet No output, except errors (default)";
print " --columns Columns for source locations";
print " --bytes Bytes for source locations";
print " --verbose=<stages> cmdline, cpp, ast (colon-separated)";
print " --version Commit hash on stdout";
print " -h, --help This help";
exit 0
(* Version *)
@ -41,6 +59,7 @@ and bytes = ref false
and verbose = ref Utils.String.Set.empty
and input = ref None
and libs = ref []
and verb_str = ref ""
let split_at_colon = Str.(split (regexp ":"))
@ -71,28 +90,12 @@ let specs =
let anonymous arg =
match !input with
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
;;
(* Parsing the command-line options *)
try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
| Some s -> Printf.printf "s=%s\n" s;
abort (sprintf "Multiple inputs")
;;
(* Checking options and exporting them as non-mutable values *)
type command = Quiet | Copy | Units | Tokens
let cmd =
match !quiet, !copy, !units, !tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
let string_of convert = function
None -> "None"
| Some s -> sprintf "Some %s" (convert s)
@ -103,11 +106,6 @@ let string_of_path p =
let quote s = sprintf "\"%s\"" s
let verbose_str =
let apply e a =
if a <> "" then sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () =
printf "COMMAND LINE\n";
printf "copy = %b\n" !copy;
@ -116,14 +114,16 @@ let print_opt () =
printf "quiet = %b\n" !quiet;
printf "columns = %b\n" !columns;
printf "bytes = %b\n" !bytes;
printf "verbose = \"%s\"\n" verbose_str;
printf "verbose = %s\n" !verb_str;
printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs)
;;
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
let check () =
let () =
if Utils.String.Set.mem "cmdline" !verbose then print_opt () in
let input =
let input =
match !input with
None | Some "-" -> !input
| Some file_path ->
@ -131,21 +131,21 @@ let input =
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .ligo."
else abort "Source file lacks the extension .ligo." in
(* Exporting remaining options as non-mutable values *)
(* Exporting remaining options as non-mutable values *)
let copy = !copy
and tokens = !tokens
and units = !units
and quiet = !quiet
and offsets = not !columns
and mode = if !bytes then `Byte else `Point
and verbose = !verbose
and libs = !libs
;;
let copy = !copy
and tokens = !tokens
and units = !units
and quiet = !quiet
and offsets = not !columns
and mode = if !bytes then `Byte else `Point
and verbose = !verbose
and libs = !libs in
if Utils.String.Set.mem "cmdline" verbose then
let () =
if Utils.String.Set.mem "cmdline" verbose then
begin
printf "\nEXPORTED COMMAND LINE\n";
printf "copy = %b\n" copy;
@ -154,8 +154,30 @@ if Utils.String.Set.mem "cmdline" verbose then
printf "quiet = %b\n" quiet;
printf "offsets = %b\n" offsets;
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
printf "verbose = \"%s\"\n" verbose_str;
printf "verbose = %s\n" !verb_str;
printf "input = %s\n" (string_of quote input);
printf "I = %s\n" (string_of_path libs)
end
;;
printf "libs = %s\n" (string_of_path libs)
end in
let cmd =
match quiet, copy, units, tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
in {input; libs; verbose; offsets; mode; cmd}
(* Parsing the command-line options *)
let read () =
try
Getopt.parse_cmdline specs anonymous;
(verb_str :=
let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose "");
check ()
with Getopt.Error msg -> abort msg

View File

@ -1,35 +1,8 @@
(* Parsing the command-line option for testing the LIGO lexer and
parser *)
(* Parsing the command-line options of PascaLIGO *)
(* If the value [offsets] is [true], then the user requested that
messages about source positions and regions be expressed in terms
of horizontal offsets. *)
(* The type [command] denotes some possible behaviours of the
compiler. The constructors are
val offsets : bool
(* If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points. *)
val mode : [`Byte | `Point]
(* If the option [verbose] is set to a list of predefined stages of
the compiler chain, then more information may be displayed about
those stages. *)
val verbose : Utils.String.Set.t
(* If the value [input] is [None] or [Some "-"], the input is standard
input. If [Some f], then the input is the file whose name (file
path) is [f]. *)
val input : string option
(* Paths where to find LIGO files for inclusion *)
val libs : string list
(* If the value [cmd] is
* [Quiet], then no output from the lexer and parser should be
expected, safe error messages: this is the default value;
* [Copy], then lexemes of tokens and markup will be printed to
@ -43,4 +16,37 @@ val libs : string list
type command = Quiet | Copy | Units | Tokens
val cmd : command
(* The type [options] gathers the command-line options.
If the field [input] is [Some src], the name of the PascaLIGO
source file, with the extension ".ligo", is [src]. If [input] is
[Some "-"] or [None], the source file is read from standard input.
The field [libs] is the paths where to find PascaLIGO files for
inclusion (#include).
The field [verbose] is a set of stages of the compiler chain,
about which more information may be displayed.
If the field [offsets] is [true], then the user requested that
messages about source positions and regions be expressed in terms
of horizontal offsets.
If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points.
*)
type options = {
input : string option;
libs : string list;
verbose : Utils.String.Set.t;
offsets : bool;
mode : [`Byte | `Point];
cmd : command
}
(* Parsing the command-line options on stdin *)
val read : unit -> options

View File

@ -6,7 +6,7 @@
early reject potentially misleading or poorly written
LIGO contracts;
(2) provide precise error messages with hint as how to fix the
(2) provide precise error messages with hints as how to fix the
issue, which is achieved by consulting the lexical
right-context of lexemes;
@ -15,23 +15,24 @@
specification: this is achieved by using the most general
regular expressions to match the lexing buffer and broadly
distinguish the syntactic categories, and then delegating a
finer, protocol-dependent, second analysis to an external
module making the tokens (hence a functor below);
finer, second analysis to an external module making the
tokens (hence a functor below);
(4) support unit testing (lexing of the whole input with debug
traces);
traces).
The limitation to the protocol independence lies in the errors that
the external module building the tokens (which is
protocol-dependent) may have to report. Indeed these errors have to
be contextualised by the lexer in terms of input source regions, so
useful error messages can be printed, therefore they are part of
the signature [TOKEN] that parameterise the functor generated
here. For instance, if, in a future release of LIGO, new tokens may
be added, and the recognition of their lexemes may entail new
errors, the signature [TOKEN] will have to be augmented and the
lexer specification changed. However, it is more likely that
instructions or types are added, instead of new kinds of tokens.
A limitation to the independence with respect to the LIGO version
lies in the errors that the external module building the tokens
(which may be version-dependent) may have to report. Indeed these
errors have to be contextualised by the lexer in terms of input
source regions, so useful error messages can be printed, therefore
they are part of the signature [TOKEN] that parameterises the
functor generated here. For instance, if, in a future release of
LIGO, new tokens may be added, and the recognition of their lexemes
may entail new errors, the signature [TOKEN] will have to be
augmented and the lexer specification changed. However, in
practice, it is more likely that instructions or types are added,
instead of new kinds of tokens.
*)
module Region = Simple_utils.Region
@ -96,7 +97,7 @@ module type TOKEN =
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
* together with a lexing buffer [buffer] to read from,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
@ -105,12 +106,11 @@ module type TOKEN =
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
parser generated by Menhir.
The argument labelled [log] is a logger. It may print a token and
its left markup to a given channel, at the caller's discretion.
The call [read ~log] evaluates in a lexer (also known as a
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion.
*)
module type S =
@ -135,7 +135,8 @@ module type S =
exception Error of Error.t Region.reg
val print_error : ?offsets:bool -> [`Byte | `Point] ->
val print_error :
?offsets:bool -> [`Byte | `Point] ->
Error.t Region.reg -> file:bool -> unit
end

View File

@ -1,12 +1,7 @@
(* Standalone lexer for debugging purposes *)
(* Embedding the lexer of PascaLIGO in a debug module *)
let sprintf = Printf.sprintf
let file =
match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true
module type S =
sig
module Lexer : Lexer.S
@ -39,7 +34,8 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
let output_nl str = output (str ^ "\n") in
match command with
EvalOpt.Quiet -> ()
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
| EvalOpt.Tokens ->
Token.to_string token ~offsets mode |> output_nl
| EvalOpt.Copy ->
let lexeme = Token.to_lexeme token
and apply acc markup = Markup.to_lexeme markup :: acc
@ -67,6 +63,10 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
if Token.is_eof token then close_all ()
else iter ()
| exception Lexer.Error e ->
let file =
match file_path_opt with
None | Some "-" -> false
| Some _ -> true in
Lexer.print_error ~offsets mode e ~file;
close_all ()
in iter ()

View File

@ -1,18 +1,22 @@
(* Driver for the lexer of LIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
(* Driver for the lexer of PascaLIGO *)
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
(* Running the lexer on the source *)
let options = EvalOpt.read ()
open EvalOpt
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Path for CPP inclusions (#include) *)
let lib_path =
match EvalOpt.libs with
match options.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
@ -20,20 +24,20 @@ let lib_path =
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
match options.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.ligo"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
if Utils.String.Set.mem "cpp" options.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
match options.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input
@ -42,7 +46,7 @@ let cpp_cmd =
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
if Utils.String.Set.mem "cpp" options.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
@ -53,5 +57,5 @@ module Lexer = Lexer.Make (LexToken)
module Log = LexerLog.Make (Lexer)
let () = Log.trace ~offsets:EvalOpt.offsets
EvalOpt.mode (Some pp_input) EvalOpt.cmd
let () = Log.trace ~offsets:options.offsets
options.mode (Some pp_input) options.cmd

View File

@ -1,11 +1,23 @@
(* Driver for the parser of LIGO *)
(* Driver for the parser of PascaLIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
(* Reading the command-line options *)
let options = EvalOpt.read ()
open EvalOpt
(* Auxiliary functions *)
let sprintf = Printf.sprintf
(* Extracting the input file *)
let file =
match EvalOpt.input with
match options.input with
None | Some "-" -> false
| Some _ -> true
@ -30,7 +42,7 @@ let print_error ?(offsets=true) mode Region.{region; value} ~file =
(* Path for CPP inclusions (#include) *)
let lib_path =
match EvalOpt.libs with
match options.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
@ -38,20 +50,20 @@ let lib_path =
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
match options.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.ligo"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
if Utils.String.Set.mem "cpp" options.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
match options.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - > %s"
lib_path pp_input
@ -60,7 +72,7 @@ let cpp_cmd =
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
if Utils.String.Set.mem "cpp" options.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
@ -76,8 +88,8 @@ let Lexer.{read; buffer; get_pos; get_last; close} =
and cout = stdout
let log = Log.output_token ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.cmd cout
let log = Log.output_token ~offsets:options.offsets
options.mode options.cmd cout
and close_all () = close (); close_out cout
@ -90,19 +102,21 @@ let tokeniser = read ~log
let () =
try
let ast = Parser.contract tokeniser buffer in
if Utils.String.Set.mem "ast" EvalOpt.verbose
if Utils.String.Set.mem "ast" options.verbose
then begin
ParserLog.offsets := EvalOpt.offsets;
ParserLog.mode := EvalOpt.mode;
ParserLog.offsets := options.offsets;
ParserLog.mode := options.mode;
ParserLog.print_tokens ast
end
with
Lexer.Error err ->
close_all ();
Lexer.print_error ~offsets EvalOpt.mode err ~file
Lexer.print_error ~offsets:options.offsets
options.mode err ~file
| Parser.Error ->
let region = get_last () in
let error = Region.{region; value=ParseError} in
let () = close_all () in
print_error ~offsets EvalOpt.mode error ~file
print_error ~offsets:options.offsets
options.mode error ~file
| Sys_error msg -> Utils.highlight msg