ligo/src/parser/ligodity/AST.mli

526 lines
16 KiB
OCaml
Raw Normal View History

[@@@warning "-30"]
(* Abstract Syntax Tree (AST) for Mini-ML *)
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
type 'a reg = 'a Region.reg
(* Some keywords of OCaml *)
type keyword = Region.t
type kwd_and = Region.t
type kwd_begin = Region.t
type kwd_else = Region.t
type kwd_end = Region.t
type kwd_false = Region.t
type kwd_fun = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_let = Region.t
type kwd_let_entry = Region.t
type kwd_match = Region.t
type kwd_mod = Region.t
type kwd_not = Region.t
type kwd_of = Region.t
type kwd_or = Region.t
type kwd_then = Region.t
type kwd_true = Region.t
type kwd_type = Region.t
type kwd_with = Region.t
(* Symbols *)
type arrow = Region.t (* "->" *)
type cons = Region.t (* "::" *)
type cat = Region.t (* "^" *)
type append = Region.t (* "@" *)
type dot = Region.t (* "." *)
(* Arithmetic operators *)
type minus = Region.t (* "-" *)
type plus = Region.t (* "+" *)
type slash = Region.t (* "/" *)
type times = Region.t (* "*" *)
(* Boolean operators *)
type bool_or = Region.t (* "||" *)
type bool_and = Region.t (* "&&" *)
(* Comparisons *)
type equal = Region.t (* "=" *)
type neq = Region.t (* "<>" *)
type lt = Region.t (* "<" *)
type gt = Region.t (* ">" *)
type leq = Region.t (* "=<" *)
type geq = Region.t (* ">=" *)
(* Compounds *)
type lpar = Region.t (* "(" *)
type rpar = Region.t (* ")" *)
type lbracket = Region.t (* "[" *)
type rbracket = Region.t (* "]" *)
type lbrace = Region.t (* "{" *)
type rbrace = Region.t (* "}" *)
(* Separators *)
type comma = Region.t (* "," *)
type semi = Region.t (* ";" *)
type vbar = Region.t (* "|" *)
type colon = Region.t
(* Wildcard *)
type wild = Region.t (* "_" *)
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type type_constr = string reg
type constr = string reg
(* Parentheses *)
type 'a par = {
lpar : lpar;
inside : 'a;
rpar : rpar
}
type the_unit = lpar * rpar
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* The Abstract Syntax Tree (finally) *)
type t = {
decl : declaration Utils.nseq;
eof : eof
}
and ast = t
and eof = Region.t
and declaration =
Let of (kwd_let * let_bindings) reg (* let p = e and ... *)
| LetEntry of (kwd_let_entry * let_binding) reg (* let%entry p = e and ... *)
| TypeDecl of type_decl reg (* type ... *)
(* Non-recursive values *)
and let_bindings =
(let_binding, kwd_and) Utils.nsepseq (* p1 = e1 and p2 = e2 ... *)
and let_binding = { (* p = e p : t = e *)
pattern : pattern;
lhs_type : (colon * type_expr) option;
eq : equal;
let_rhs : expr
}
(* Recursive types *)
and type_decl = {
kwd_type : kwd_type;
name : type_name;
eq : equal;
type_expr : type_expr
}
and type_expr =
TProd of cartesian
| TSum of (variant reg, vbar) Utils.nsepseq reg
| TRecord of record_type
| TApp of (type_constr * type_tuple) reg
| TFun of (type_expr * arrow * type_expr) reg
| TPar of type_expr par reg
| TAlias of variable
and cartesian = (type_expr, times) Utils.nsepseq reg
and variant = {
constr : constr;
args : (kwd_of * cartesian) option
}
and record_type = field_decl reg injection reg
and field_decl = {
field_name : field_name;
colon : colon;
field_type : type_expr
}
and type_tuple = (type_expr, comma) Utils.nsepseq par
and 'a injection = {
opening : opening;
elements : ('a, semi) Utils.sepseq;
terminator : semi option;
closing : closing
}
and opening =
Begin of kwd_begin
| LBrace of lbrace
and closing =
End of kwd_end
| RBrace of rbrace
and pattern =
PTuple of (pattern, comma) Utils.nsepseq reg (* p1, p2, ... *)
| PList of (pattern, semi) Utils.sepseq brackets reg (* [p1; p2; ...] *)
| PVar of variable (* x *)
| PUnit of the_unit reg (* () *)
| PInt of (string * Z.t) reg (* 7 *)
| PTrue of kwd_true (* true *)
| PFalse of kwd_false (* false *)
| PString of string reg (* "foo" *)
| PWild of wild (* _ *)
| PCons of (pattern * cons * pattern) reg (* p1 :: p2 *)
| PPar of pattern par reg (* (p) *)
| PConstr of (constr * pattern option) reg (* A B(3,"") *)
| PRecord of record_pattern (* {a=...; ...} *)
| PTyped of typed_pattern reg (* (x : int) *)
and typed_pattern = {
pattern : pattern;
colon : colon;
type_expr : type_expr
}
and record_pattern = field_pattern reg injection reg
and field_pattern = {
field_name : field_name;
eq : equal;
pattern : pattern
}
and expr =
LetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *)
| Fun of fun_expr (* fun x -> e *)
| If of conditional (* if e1 then e2 else e3 *)
| ETuple of (expr, comma) Utils.nsepseq reg (* e1, e2, ... *)
| Match of match_expr reg (* p1 -> e1 | p2 -> e2 | ... *)
| Seq of sequence (* begin e1; e2; ... ; en end *)
| ERecord of record_expr (* {f1=e1; ... } *)
| Append of (expr * append * expr) reg (* e1 @ e2 *)
| Cons of (expr * cons * expr) reg (* e1 :: e2 *)
| ELogic of logic_expr
| EArith of arith_expr
| EString of string_expr
| Call of (expr * expr) reg (* f e *)
| Path of path reg (* x x.y.z *)
| Unit of the_unit reg (* () *)
| Par of expr par reg (* (e) *)
| EList of (expr, semi) Utils.sepseq brackets reg (* [e1; e2; ...] *)
| EConstr of constr
(*| Extern of extern*)
and string_expr =
Cat of cat bin_op reg (* e1 ^ e2 *)
| String of string reg (* "foo" *)
and arith_expr =
Add of plus bin_op reg (* e1 + e2 *)
| Sub of minus bin_op reg (* e1 - e2 *)
| Mult of times bin_op reg (* e1 * e2 *)
| Div of slash bin_op reg (* e1 / e2 *)
| Mod of kwd_mod bin_op reg (* e1 mod e2 *)
| Neg of minus un_op reg (* -e *)
| Int of (string * Z.t) reg (* 12345 *)
| Nat of (string * Z.t) reg (* 3p *)
| Mtz of (string * Z.t) reg (* 1.00tz 3tz *)
and logic_expr =
BoolExpr of bool_expr
| CompExpr of comp_expr
and bool_expr =
Or of kwd_or bin_op reg
| And of kwd_and bin_op reg
| Not of kwd_not un_op reg
| True of kwd_true
| False of kwd_false
and 'a bin_op = {
op : 'a;
arg1 : expr;
arg2 : expr
}
and 'a un_op = {
op : 'a;
arg : expr
}
and comp_expr =
Lt of lt bin_op reg
| Leq of leq bin_op reg
| Gt of gt bin_op reg
| Geq of geq bin_op reg
| Equal of equal bin_op reg
| Neq of neq bin_op reg
(*
| Lt of (expr * lt * expr) reg
| LEq of (expr * le * expr) reg
| Gt of (expr * gt * expr) reg
| GEq of (expr * ge * expr) reg
| NEq of (expr * ne * expr) reg
| Eq of (expr * eq * expr) reg
*)
and path = {
module_proj : (constr * dot) option;
value_proj : (selection, dot) Utils.nsepseq
}
and selection =
Name of variable
| Component of (string * Z.t) reg par reg
and record_expr = field_assignment reg injection reg
and field_assignment = {
field_name : field_name;
assignment : equal;
field_expr : expr
}
and sequence = expr injection reg
and match_expr = kwd_match * expr * kwd_with * cases
and cases =
vbar option * (pattern * arrow * expr, vbar) Utils.nsepseq
and let_in = kwd_let * let_bindings * kwd_in * expr
and fun_expr = (kwd_fun * variable * arrow * expr) reg
and conditional =
IfThen of (kwd_if * expr * kwd_then * expr) reg
| IfThenElse of (kwd_if * expr * kwd_then * expr * kwd_else * expr) reg
(*
and extern =
Cast of cast_expr
| Print of print_expr
| Scanf of scanf_expr
| PolyEq of (variable * variable) (* polymorphic equality *)
and cast_expr =
StringOfInt of variable (* string_of_int x *)
| StringOfBool of variable (* string_of_bool x *)
and print_expr =
PrintString of variable (* print_string x *)
| PrintInt of variable (* print_int x *)
and scanf_expr =
ScanfString of variable (* scanf_string x *)
| ScanfInt of variable (* scanf_int x *)
| ScanfBool of variable (* scanf_bool x *)
*)
(* Normalising nodes of the AST so the interpreter is more uniform and
no source regions are lost in order to enable all manner of
source-to-source transformations from the rewritten AST and the
initial source.
The first kind of expressions to be normalised is lambdas, like:
fun a -> fun b -> a
fun a b -> a
fun a (b,c) -> a
to become
fun a -> fun b -> a
fun a -> fun b -> a
fun a -> fun x -> let (b,c) = x in a
The second kind is let-bindings introducing functions without the
"fun" keyword, like
let g a b = a
let h a (b,c) = a
which become
let g = fun a -> fun b -> a
let h = fun a -> fun x -> let (b,c) = x in a
The former is actually a subcase of the latter. Indeed, the general
shape of the former is
fun <patterns> -> <expr>
and the latter is
let <ident> <patterns> = <expr>
The isomorphic parts are "<patterns> -> <expr>" and "<patterns> =
<expr>".
The call [norm patterns sep expr], where [sep] is a region either
of an "->" or a "=", evaluates in a function expression (lambda),
as expected. In order to get the regions right in the case of
lambdas, additional regions are passed: [norm ~reg:(total,kwd_fun)
patterns sep expr], where [total] is the region for the whole
lambda (even if the resulting lambda is actually longer: we want to
keep the region of the original), and the region of the original
"fun" keyword.
*)
type sep = Region.t
val norm : ?reg:(Region.t * kwd_fun) -> pattern Utils.nseq -> sep -> expr -> fun_expr
(* Undoing the above rewritings (for debugging by comparison with the
lexer, and to feed the source-to-source transformations with only
tokens that originated from the original input.
Unparsing is performed on an expression which is expected to be a
series "fun ... -> fun ... -> ...". Either this expression is the
right-hand side of a let, or it is not. These two cases are
distinguished by the function [unparse], depending on the first
keyword "fun" being concrete or ghostly (virtual). In the former
case, we are unparsing an expression which was originally starting
with "fun"; in the latter, we are unparsing an expression that was
parsed on the right-hand side of a let construct. In other words,
in the former case, we expect to reconstruct
let f p_1 ... p_n = e
whereas, in the second case, we want to obtain
fun p_1 ... p_n -> e
In any case, the heart of the unparsing is the same, and this is
why the data constructors [`Fun] and [`Let] of the type [unparsed]
share a common type: [pattern * Region.t * expr], the region can
either actually denote the alias type [arrow] or [eq]. Let us
assume a value of this triple [patterns, separator_region,
expression]. Then the context (handled by [unparse]) decides if
[separator_region] is the region of a "=" sign or "->".
There are two forms to be unparsed:
fun x_1 -> let p_1 = x_1 in ... fun x_n -> let p_n = x_n in e
or
fun p_1 -> ... fun p_n -> e
in the first case, the rightmost "=" becomes [separator_region]
above, whereas, in the second case, it is the rightmost "->".
Here are some example covering all cases:
let rec f = fun a -> fun b -> a
let rec g = fun a b -> a
let rec h = fun a (b,c) -> a
let rec fst = fun (x,_) -> x
let rec g a b = a
let rec h (b,c) a (d,e) = a
let len = (fun n _ -> n)
let f l = let n = l in n
*)
type unparsed = [
`Fun of (kwd_fun * (pattern Utils.nseq * arrow * expr))
| `Let of (pattern Utils.nseq * equal * expr)
| `Idem of expr
]
val unparse : expr -> unparsed
(* Conversions to type [string] *)
(*
val to_string : t -> string
val pattern_to_string : pattern -> string
*)
(* Printing the tokens reconstructed from the AST. This is very useful
for debugging, as the output of [print_token ast] can be textually
compared to that of [Lexer.trace] (see module [LexerMain]). The
optional parameter [undo] is bound to [true] if the caller wants
the AST to be unparsed before printing (those nodes that have been
normalised with function [norm_let] and [norm_fun]). *)
val print_tokens : ?undo:bool -> ast -> unit
(* Projecting regions from sundry nodes of the AST. See the first
comment at the beginning of this file. *)
val region_of_pattern : pattern -> Region.t
val region_of_expr : expr -> Region.t
(* Removing all outermost parentheses from a given expression *)
val rm_par : expr -> expr
(* Predicates on expressions *)
val is_var : expr -> bool
val is_call : expr -> bool
val is_fun : expr -> bool
(* Variables *)
(*
module Vars : Set.S with type elt = string
module FreeVars : Set.S with type elt = variable
(* The value of the call [vars t] is a pair of sets: the first is the
set of variables whose definitions are in the scope at the end of
the program corresponding to the AST [t], the second is the set of
free variables in that same AST.
Computing free variables is useful because we do not want to
escape a variable that is a predefined variable in OCaml, when we
translate the program to OCaml: this way, we make sure that an
unbound variable is caught before the translation (where it would
be wrongly captured by the OCaml compiler).
Dually, computing bound variables is useful when compiling to
OCaml.
*)
val vars : t -> Vars.t * FreeVars.t
*)