Eased the translation from Ligodity AST to Liquidity AST.

More precisely,

  * I commented out the operator "@" on lists in Ligodity (it can
    be implemented as a function, as a workaround).

  * I removed the parallel "let" construct (hence the "and" keyword).

  * I renamed the type "field_assignment" into "field_assign", in
    order to match Pascaligo AST.

  * The reading of the command-line options is now done by
    calling the function [EvalOpt.read], instead of an ugly
    side-effect when loading the binary of the module. Options
    are now found in a record of type [EvalOpt.options].

  * I added support in the Ligodity lexer for #include CPP
    directives.
This commit is contained in:
Christian Rinderknecht 2019-05-15 15:03:15 +02:00 committed by Georges Dupéron
parent fdf7704a7c
commit af8d1083b7
12 changed files with 228 additions and 134 deletions

View File

@ -109,14 +109,12 @@ type t = {
and ast = t
and declaration =
Let of (kwd_let * let_bindings) reg
Let of (kwd_let * let_binding) reg
| LetEntry of (kwd_let_entry * let_binding) reg
| TypeDecl of type_decl reg
(* Non-recursive values *)
and let_bindings = (let_binding, kwd_and) Utils.nsepseq
and let_binding = {
pattern : pattern;
lhs_type : (colon * type_expr) option;
@ -238,7 +236,7 @@ and closing =
and list_expr =
Cons of cons bin_op reg
| List of expr injection reg
| Append of (expr * append * expr) reg
(*| Append of (expr * append * expr) reg*)
and string_expr =
Cat of cat bin_op reg
@ -295,9 +293,9 @@ and selection =
FieldName of variable
| Component of (string * Z.t) reg par reg
and record_expr = field_assignment reg injection reg
and record_expr = field_assign reg injection reg
and field_assignment = {
and field_assign = {
field_name : field_name;
assignment : equal;
field_expr : expr
@ -320,7 +318,7 @@ and 'a case_clause = {
rhs : 'a
}
and let_in = kwd_let * let_bindings * kwd_in * expr
and let_in = kwd_let * let_binding * kwd_in * expr
and fun_expr = (kwd_fun * variable * arrow * expr) reg
@ -372,7 +370,8 @@ let region_of_string_expr = function
String {region;_} | Cat {region;_} -> region
let region_of_list_expr = function
Cons {region; _} | List {region; _} | Append {region; _} -> region
Cons {region; _} | List {region; _}
(* | Append {region; _}*) -> region
let region_of_expr = function
ELogic e -> region_of_logic_expr e
@ -399,9 +398,9 @@ let norm_fun region kwd_fun pattern eq expr =
PVar v -> kwd_fun, v, eq, expr
| _ -> let value = Utils.gen_sym () in
let fresh = Region.{region=Region.ghost; value} in
let bindings = {pattern; eq;
lhs_type=None; let_rhs = EVar fresh}, [] in
let let_in = ghost_let, bindings, ghost_in, expr in
let binding = {pattern; eq;
lhs_type=None; let_rhs = EVar fresh} in
let let_in = ghost_let, binding, ghost_in, expr in
let expr = ELetIn {value=let_in; region=Region.ghost}
in kwd_fun, fresh, ghost_arrow, expr
in Region.{region; value}
@ -433,7 +432,7 @@ let rec unparse' = function
EFun {value=_,var,arrow,expr; _} ->
if var.region#is_ghost then
match expr with
ELetIn {value = _,({pattern;eq;_},[]),_,expr; _} ->
ELetIn {value = _,{pattern;eq;_},_,expr; _} ->
if eq#is_ghost then
let patterns, sep, e = unparse' expr
in Utils.nseq_cons pattern patterns, sep, e
@ -485,9 +484,9 @@ let rec print_tokens ?(undo=false) {decl;eof} =
Utils.nseq_iter (print_statement undo) decl; print_token eof "EOF"
and print_statement undo = function
Let {value=kwd_let, let_bindings; _} ->
Let {value=kwd_let, let_binding; _} ->
print_token kwd_let "let";
print_let_bindings undo let_bindings
print_let_binding undo let_binding
| LetEntry {value=kwd_let_entry, let_binding; _} ->
print_token kwd_let_entry "let%entry";
print_let_binding undo let_binding
@ -588,8 +587,6 @@ and print_terminator = function
Some semi -> print_token semi ";"
| None -> ()
and print_let_bindings undo = print_nsepseq "and" (print_let_binding undo)
and print_let_binding undo {pattern; lhs_type; eq; let_rhs} =
print_pattern pattern;
(match lhs_type with
@ -706,10 +703,10 @@ and print_list_expr undo = function
print_token op "::";
print_expr undo arg2
| List e -> print_injection (print_expr undo) e
| Append {value=e1,append,e2; _} ->
(*| Append {value=e1,append,e2; _} ->
print_expr undo e1;
print_token append "@";
print_expr undo e2
print_expr undo e2 *)
and print_arith_expr undo = function
Add {value={arg1;op;arg2}; _} ->
@ -763,9 +760,9 @@ and print_comp_expr undo = function
print_expr undo arg1; print_token op "="; print_expr undo arg2
and print_record_expr undo e =
print_injection (print_field_assignment undo) e
print_injection (print_field_assign undo) e
and print_field_assignment undo {value; _} =
and print_field_assign undo {value; _} =
let {field_name; assignment; field_expr} = value in
print_var field_name;
print_token assignment "=";
@ -796,9 +793,9 @@ and print_case_clause undo {value; _} =
print_token arrow "->";
print_expr undo rhs
and print_let_in undo (kwd_let, let_bindings, kwd_in, expr) =
and print_let_in undo (kwd_let, let_binding, kwd_in, expr) =
print_token kwd_let "let";
print_let_bindings undo let_bindings;
print_let_binding undo let_binding;
print_token kwd_in "in";
print_expr undo expr
@ -819,3 +816,7 @@ and print_conditional undo {value; _} =
print_token kwd_else "else";
print_expr undo ifnot;
print_token ghost ")"
let rec unpar = function
EPar {value={inside=expr;_}; _} -> unpar expr
| e -> e

View File

@ -118,15 +118,12 @@ and ast = t
and eof = Region.t
and declaration =
Let of (kwd_let * let_bindings) reg (* let p = e and ... *)
| LetEntry of (kwd_let_entry * let_binding) reg (* let%entry p = e and ... *)
Let of (kwd_let * let_binding) reg (* let p = e *)
| LetEntry of (kwd_let_entry * let_binding) reg (* let%entry p = e *)
| TypeDecl of type_decl reg (* type ... *)
(* Non-recursive values *)
and let_bindings =
(let_binding, kwd_and) Utils.nsepseq (* p1 = e1 and p2 = e2 ... *)
and let_binding = { (* p = e p : t = e *)
pattern : pattern;
lhs_type : (colon * type_expr) option;
@ -248,7 +245,7 @@ and closing =
and list_expr =
Cons of cat bin_op reg (* e1 :: e3 *)
| List of expr injection reg (* [e1; e2; ...] *)
| Append of (expr * append * expr) reg (* e1 @ e2 *)
(*| Append of (expr * append * expr) reg *) (* e1 @ e2 *)
and string_expr =
Cat of cat bin_op reg (* e1 ^ e2 *)
@ -305,9 +302,9 @@ and selection =
FieldName of variable
| Component of (string * Z.t) reg par reg
and record_expr = field_assignment reg injection reg
and record_expr = field_assign reg injection reg
and field_assignment = {
and field_assign = {
field_name : field_name;
assignment : equal;
field_expr : expr
@ -330,7 +327,7 @@ and 'a case_clause = {
rhs : 'a
}
and let_in = kwd_let * let_bindings * kwd_in * expr
and let_in = kwd_let * let_binding * kwd_in * expr
and fun_expr = (kwd_fun * variable * arrow * expr) reg
@ -479,3 +476,11 @@ val print_tokens : ?undo:bool -> ast -> unit
val region_of_pattern : pattern -> Region.t
val region_of_expr : expr -> Region.t
(* Simplifications *)
(* The call [unpar e] is the expression [e] if [e] is not
parenthesised, otherwise it is the non-parenthesised expressions it
contains. *)
val unpar : expr -> expr

View File

@ -1,5 +1,14 @@
(* Parsing the command-line option for the Mini-ML compiler/interpreter *)
type options = {
input : string option;
eval : bool;
compile : string option;
libs : string list;
verbose : Utils.String.Set.t;
raw_edits : bool
}
let abort msg =
Utils.highlight (Printf.sprintf "Command-line error: %s" msg); exit 1
@ -37,6 +46,8 @@ and verbose = ref Utils.String.Set.empty
and libs = ref []
and raw_edits = ref false
let verb_str = ref ""
let set_opt var err =
Some (fun x -> if !var = None then var := Some x else raise (Getopt.Error err))
@ -69,11 +80,6 @@ let anonymous arg =
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
(* Parsing the command-line options *)
let () = try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
(* Checking options *)
let string_of convert = function
@ -86,21 +92,18 @@ let string_of_path p =
let quote s = Printf.sprintf "\"%s\"" s
let verb_str =
let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () =
printf "COMMAND LINE\n";
printf "input = %s\n" (string_of quote !input);
printf "compile = %s\n" (string_of quote !compile);
printf "eval = %B\n" !eval;
printf "raw_edits = %b\n" !raw_edits;
printf "verbose = %s\n" verb_str;
printf "verbose = %s\n" !verb_str;
printf "libs = %s\n" (string_of_path !libs)
let () = if Utils.String.Set.mem "cmdline" !verbose then print_opt ()
let check () =
let () =
if Utils.String.Set.mem "cmdline" !verbose then print_opt () in
let input =
match !input with
@ -113,7 +116,7 @@ let input =
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .mml."
else abort "Source file lacks the extension .mml." in
let compile =
match !compile with
@ -126,14 +129,14 @@ let compile =
| Some compile' ->
if Filename.check_suffix compile' ".ml"
then !compile
else abort "The extension of the target OCaml file is not .ml"
else abort "The extension of the target OCaml file is not .ml" in
(* Exporting remaining options as non-mutable values *)
let eval = !eval
and verbose = !verbose
and libs = !libs
and raw_edits = !raw_edits
and raw_edits = !raw_edits in
let () =
if Utils.String.Set.mem "cmdline" verbose then
@ -143,6 +146,20 @@ let () =
printf "compile = %s\n" (string_of quote compile);
printf "eval = %B\n" eval;
printf "raw_edits = %B\n" raw_edits;
printf "verbose = %s\n" verb_str;
printf "verbose = %s\n" !verb_str;
printf "I = %s\n" (string_of_path libs)
end
in {input; eval; compile; libs; verbose; raw_edits}
(* Parsing the command-line options *)
let read () =
try
Getopt.parse_cmdline specs anonymous;
(verb_str :=
let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose "");
check ()
with Getopt.Error msg -> abort msg

View File

@ -5,8 +5,6 @@
[Some "-"] or [None], the source file is read from standard
input. *)
val input : string option
(* The Mini-ML source file can be processed in two mutually exclusive
manners: if the value [eval] is set to [true], the source is
interpreted; if the value [compile] is not [None], the source is
@ -14,11 +12,13 @@ val input : string option
nothing is done with the source. Note: if [compile] is [Some "-"],
the compiled code is sent to standard output. *)
val eval : bool
val compile : string option
type options = {
input : string option;
eval : bool;
compile : string option;
libs : string list;
verbose : Utils.String.Set.t;
raw_edits : bool
}
(* TODO *)
val libs : string list
val verbose : Utils.String.Set.t
val raw_edits : bool
val read : unit -> options

View File

@ -17,20 +17,22 @@ exception Error of message Region.reg
tokens to the given channel. If no logger is given to [get_token],
no printing takes place while the lexer runs.
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
The call [reset ~file ~line ~offset buffer] modifies in-place the
lexing buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], the current line is
[line] and the offset on that line is [offset]. This function is
useful when lexing a file that has been previously preprocessed by
the C preprocessor, in which case the argument [file] is the name
of the file that was preprocessed, _not_ the preprocessed file (of
which the user is not normally aware). By default, the [line]
argument is [1].
*)
type logger = out_channel * (out_channel -> Token.t -> unit)
val get_token : ?log:logger -> Lexing.lexbuf -> Token.t
val reset : file:string -> ?line:int -> Lexing.lexbuf -> unit
val reset : ?file:string -> ?line:int -> ?offset:int -> Lexing.lexbuf -> unit
val reset_file : file:string -> Lexing.lexbuf -> unit
(* Debugging *)

View File

@ -78,7 +78,6 @@ let fail region value = raise (Error Region.{region; value})
(* KEYWORDS *)
let keywords = Token.[
"and", Some And;
"begin", Some Begin;
"else", Some Else;
"false", Some False;
@ -99,6 +98,7 @@ let keywords = Token.[
(* Reserved *)
"and", None;
"as", None;
"asr", None;
"assert", None;
@ -152,12 +152,28 @@ let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line lnum buffer =
let reset_line ~line buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = lnum}
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset ~file ?(line=1) buffer =
reset_file ~file buffer; reset_line line buffer
let reset_offset ~offset buffer =
assert (offset >= 0);
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Hack to roll back one lexeme in the current semantic action *)
(*
@ -222,7 +238,7 @@ rule scan = parse
| "->" { Token.ARROW }
| "::" { Token.CONS }
| "^" { Token.CAT }
| "@" { Token.APPEND }
(*| "@" { Token.APPEND }*)
| "=" { Token.EQ }
| "<>" { Token.NE }
@ -294,10 +310,54 @@ rule scan = parse
let () = ignore thread
in scan lexbuf }
(* Management of #include CPP directives
An input LIGO program may contain GNU CPP (C preprocessor)
directives, and the entry modules (named *Main.ml) run CPP on them
in traditional mode:
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
The main interest in using CPP is that it can stand for a poor
man's (flat) module system for LIGO thanks to #include
directives, and the traditional mode leaves the markup mostly
undisturbed.
Some of the #line resulting from processing #include directives
deal with system file headers and thus have to be ignored for our
purpose. Moreover, these #line directives may also carry some
additional flags:
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
of which 1 and 2 indicate, respectively, the start of a new file
and the return from a file (after its inclusion has been
processed).
*)
| '#' blank* ("line" blank+)? (integer as line) blank+
'"' (string as file) '"' {
let flags = scan_flags [] lexbuf in
let () = ignore flags in
let line = int_of_string line
and file = Filename.basename file in
let () = reset ~file ~line ~offset:0 lexbuf
in scan lexbuf
}
| _ as c { let msg = sprintf "Invalid character '%s'."
(Char.escaped c)
in error lexbuf msg }
(* Scanning CPP #include flags *)
and scan_flags acc = parse
blank+ { scan_flags acc lexbuf }
| integer as code { let acc = int_of_string code :: acc
in scan_flags acc lexbuf }
| nl { Lexing.new_line lexbuf; List.rev acc }
| eof { List.rev acc }
(* Finishing a string *)
and scan_string thread = parse
@ -371,8 +431,8 @@ let iter action file_opt =
try
let cin, reset =
match file_opt with
None | Some "-" -> stdin, fun ?(line=1) _ -> ignore line
| Some file -> open_in file, reset ~file in
None | Some "-" -> stdin, ignore
| Some file -> open_in file, reset_file ~file in
let buffer = Lexing.from_channel cin in
let rec iter () =
try

View File

@ -6,7 +6,11 @@ Printexc.record_backtrace true;;
(* Running the lexer on the source *)
if Utils.String.Set.mem "lexer" EvalOpt.verbose then
Lexer.trace EvalOpt.input
else Lexer.iter (fun _lexbuf _out _token -> ()) EvalOpt.input
let options = EvalOpt.read ();;
open EvalOpt;;
if Utils.String.Set.mem "lexer" options.verbose then
Lexer.trace options.input
else Lexer.iter (fun _lexbuf _out _token -> ()) options.input
;;

View File

@ -18,7 +18,7 @@
%token ARROW
%token CONS
%token CAT
%token APPEND
(*%token APPEND*)
%token DOT
%token COMMA
@ -46,7 +46,7 @@
%token <string * Z.t> Mtz
%token <string * Z.t> Nat
%token And
(*%token And*)
%token Begin
%token Else
%token End

View File

@ -149,7 +149,7 @@ program:
nseq(declaration) eof { {decl=$1; eof=$2} }
declaration:
reg(kwd(Let) let_bindings {$1,$2}) { Let $1 }
reg(kwd(Let) let_binding {$1,$2}) { Let $1 }
| reg(kwd(LetEntry) let_binding {$1,$2}) { LetEntry $1 }
| reg(type_decl) { TypeDecl $1 }
@ -234,9 +234,6 @@ field_decl:
(* Non-recursive definitions *)
let_bindings:
nsepseq(let_binding, kwd(And)) { $1 }
let_binding:
ident nseq(sub_irrefutable) type_annotation? eq expr {
let let_rhs = EFun (norm $2 $4 $5) in
@ -389,7 +386,7 @@ case_clause(right_expr):
pattern arrow right_expr { {pattern=$1; arrow=$2; rhs=$3} }
let_expr(right_expr):
reg(kwd(Let) let_bindings kwd(In) right_expr {$1,$2,$3,$4}) {
reg(kwd(Let) let_binding kwd(In) right_expr {$1,$2,$3,$4}) {
ELetIn $1 }
fun_expr(right_expr):
@ -447,14 +444,16 @@ ne_expr:
cat_expr_level:
reg(cat_expr) { EString (Cat $1) }
| reg(append_expr) { EList (Append $1) }
(*| reg(append_expr) { EList (Append $1) } *)
| cons_expr_level { $1 }
cat_expr:
bin_op(cons_expr_level, sym(CAT), cat_expr_level) { $1 }
(*
append_expr:
cons_expr_level sym(APPEND) cat_expr_level { $1,$2,$3 }
*)
cons_expr_level:
reg(cons_expr) { EList (Cons $1) }

View File

@ -4,10 +4,16 @@
Printexc.record_backtrace true;;
(* Reading the command-line options *)
let options = EvalOpt.read ()
open EvalOpt
(* Path to the Mini-ML standard library *)
let lib_path =
match EvalOpt.libs with
match options.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
@ -15,9 +21,9 @@ let lib_path =
(* Opening the input channel and setting the lexing engine *)
let cin, reset =
match EvalOpt.input with
None | Some "-" -> stdin, fun ?(line=1) _buffer -> ignore line
| Some file -> open_in file, Lexer.reset ~file
match options.input with
None | Some "-" -> stdin, ignore
| Some file -> open_in file, Lexer.reset_file ~file
let buffer = Lexing.from_channel cin
let () = reset buffer
@ -25,14 +31,14 @@ let () = reset buffer
(* Tokeniser *)
let tokeniser =
if Utils.String.Set.mem "lexer" EvalOpt.verbose then
if Utils.String.Set.mem "lexer" options.verbose then
Lexer.get_token ~log:(stdout, Lexer.output_token buffer)
else Lexer.get_token ?log:None
let () =
try
let ast = Parser.program tokeniser buffer in
if Utils.String.Set.mem "unparsing" EvalOpt.verbose then
if Utils.String.Set.mem "unparsing" options.verbose then
AST.print_tokens ~undo:true ast
else () (* AST.print_tokens ast *)
with

View File

@ -6,7 +6,7 @@ type t =
ARROW
| CONS
| CAT
| APPEND
(*| APPEND*)
| MINUS
| PLUS
| SLASH
@ -44,7 +44,7 @@ type t =
(* Keywords *)
| And
(*| And*)
| Begin
| Else
| End
@ -87,7 +87,7 @@ let to_string = function
ARROW -> "->"
| CONS -> "::"
| CAT -> "^"
| APPEND -> "@"
(*| APPEND -> "@"*)
| MINUS -> "-"
| PLUS -> "+"
| SLASH -> "/"
@ -119,7 +119,7 @@ let to_string = function
| Mtz (lex,z) -> sprintf "Mtz %s (%s)" lex (Z.to_string z)
| Str n -> sprintf "Str \"%s\"" n
| Bytes (lex,h) -> sprintf "Bytes %s (0x%s)" lex (Hex.to_string h)
| And -> "and"
(*| And -> "and"*)
| Begin -> "begin"
| Else -> "else"
| End -> "end"

View File

@ -6,7 +6,7 @@ type t =
ARROW (* "->" *)
| CONS (* "::" *)
| CAT (* "^" *)
| APPEND (* "@" *)
(*| APPEND (* "@" *)*)
(* Arithmetics *)
@ -60,7 +60,7 @@ type t =
(* Keywords *)
| And
(*| And*)
| Begin
| Else
| End