ligo/src/passes/1-parser/pascaligo/AST.ml

804 lines
17 KiB
OCaml
Raw Normal View History

2019-05-13 00:56:22 +04:00
(* Abstract Syntax Tree (AST) for LIGO *)
(* To disable warning about multiply-defined record labels. *)
[@@@warning "-30-40-42"]
2019-05-13 00:56:22 +04:00
(* Utilities *)
open Utils
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
module Region = Simple_utils.Region
2019-05-13 00:56:22 +04:00
type 'a reg = 'a Region.reg
(* Keywords of LIGO *)
type keyword = Region.t
type kwd_and = Region.t
2020-01-20 13:57:07 +04:00
type kwd_attributes = Region.t
2019-05-13 00:56:22 +04:00
type kwd_begin = Region.t
type kwd_block = Region.t
type kwd_case = Region.t
type kwd_const = Region.t
type kwd_contains = Region.t
type kwd_down = Region.t
type kwd_else = Region.t
type kwd_end = Region.t
type kwd_for = Region.t
type kwd_from = Region.t
type kwd_function = Region.t
2020-02-21 01:31:47 +04:00
type kwd_recursive = Region.t
2019-05-13 00:56:22 +04:00
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_is = Region.t
type kwd_list = Region.t
type kwd_map = Region.t
type kwd_mod = Region.t
type kwd_nil = Region.t
type kwd_not = Region.t
type kwd_of = Region.t
type kwd_or = Region.t
type kwd_patch = Region.t
type kwd_record = Region.t
type kwd_remove = Region.t
type kwd_set = Region.t
type kwd_skip = Region.t
type kwd_step = Region.t
type kwd_then = Region.t
type kwd_to = Region.t
type kwd_type = Region.t
type kwd_var = Region.t
type kwd_while = Region.t
type kwd_with = Region.t
(* Data constructors *)
type c_False = Region.t
type c_None = Region.t
type c_Some = Region.t
type c_True = Region.t
type c_Unit = Region.t
(* Symbols *)
type semi = Region.t (* ";" *)
type comma = Region.t (* "," *)
type lpar = Region.t (* "(" *)
type rpar = Region.t (* ")" *)
type lbrace = Region.t (* "{" *)
type rbrace = Region.t (* "}" *)
type lbracket = Region.t (* "[" *)
type rbracket = Region.t (* "]" *)
type cons = Region.t (* "#" *)
type vbar = Region.t (* "|" *)
type arrow = Region.t (* "->" *)
type assign = Region.t (* ":=" *)
type equal = Region.t (* "=" *)
type colon = Region.t (* ":" *)
type lt = Region.t (* "<" *)
type leq = Region.t (* "<=" *)
type gt = Region.t (* ">" *)
type geq = Region.t (* ">=" *)
type neq = Region.t (* "=/=" *)
type plus = Region.t (* "+" *)
type minus = Region.t (* "-" *)
type slash = Region.t (* "/" *)
type times = Region.t (* "*" *)
type dot = Region.t (* "." *)
type wild = Region.t (* "_" *)
type cat = Region.t (* "^" *)
2019-05-13 00:56:22 +04:00
(* Virtual tokens *)
type eof = Region.t
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type map_name = string reg
type set_name = string reg
type constr = string reg
2020-01-20 13:57:07 +04:00
type attribute = string reg
2019-05-13 00:56:22 +04:00
(* Parentheses *)
type 'a par = {
lpar : lpar;
inside : 'a;
rpar : rpar
}
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* Braced compounds *)
type 'a braces = {
lbrace : lbrace;
inside : 'a;
rbrace : rbrace
}
(* The Abstract Syntax Tree *)
type t = {
decl : declaration nseq;
eof : eof
}
and ast = t
and declaration =
2020-01-20 13:57:07 +04:00
TypeDecl of type_decl reg
| ConstDecl of const_decl reg
2020-01-20 13:57:07 +04:00
| FunDecl of fun_decl reg
| AttrDecl of attr_decl
and attr_decl = string reg ne_injection reg
2019-05-13 00:56:22 +04:00
and const_decl = {
kwd_const : kwd_const;
name : variable;
colon : colon;
const_type : type_expr;
equal : equal;
init : expr;
2020-01-16 23:36:04 +04:00
terminator : semi option;
attributes : attr_decl option
2019-05-13 00:56:22 +04:00
}
(* Type declarations *)
and type_decl = {
kwd_type : kwd_type;
name : type_name;
kwd_is : kwd_is;
type_expr : type_expr;
terminator : semi option
}
and type_expr =
TProd of cartesian
| TSum of (variant reg, vbar) nsepseq reg
| TRecord of field_decl reg ne_injection reg
2019-05-13 00:56:22 +04:00
| TApp of (type_name * type_tuple) reg
| TFun of (type_expr * arrow * type_expr) reg
2019-05-13 00:56:22 +04:00
| TPar of type_expr par reg
2019-11-06 20:23:49 +04:00
| TVar of variable
| TStringLiteral of Lexer.lexeme reg
2019-05-13 00:56:22 +04:00
and cartesian = (type_expr, times) nsepseq reg
and variant = {
constr : constr;
2019-11-06 20:23:49 +04:00
arg : (kwd_of * type_expr) option
2019-05-13 00:56:22 +04:00
}
and field_decl = {
field_name : field_name;
colon : colon;
field_type : type_expr
}
and type_tuple = (type_expr, comma) nsepseq par reg
(* Function and procedure declarations *)
and fun_expr = {
2020-02-21 01:31:47 +04:00
kwd_recursive: kwd_recursive option;
2019-11-06 20:23:49 +04:00
kwd_function : kwd_function;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
Refactoring of comments (for [dune build @doc]). Refactoring of parsing command-line arguments * The type [options] is now abstract and implemented as an object type to avoid struggling with scoping and type inference when record types share some common field names. Refactoring of ParserLog for PascaLIGO and CameLIGO * The immediate motivation behind that refactoring was to remove the use of a couple of global references. A consequence is that we have a nicer and more compact code, by threading a state. The files [pascaligo/Tests/pp.ligo] and [ligodity/Tests/pp.mligo]. * Another consequence is that the choice of making strings from AST nodes depends on the CLI (offsets? mode?). After this refactoring, that choice is hardcoded in the simplifiers in a few places (TODO), waiting for a general solution that would have all CL options flow through the compiler. * I removed the use of vendors [x_option.ml], [x_map.ml] and [x_list.ml] when handling optional values. (Less dependencies this way.) Refactoring of the ASTs * I removed the node [local_decl], which was set to [[]] already in a previous commit (which removed local declarations as being redundant, as statements could already be instructions or declarations). * I changed [StrLit] to [String] in the AST of CameLIGO and ReasonLIGO. * I also changed the type [fun_expr] so now either a block is present, and therefore followed by the [with] keyword, or it is not. (Before, the presence of a block was not enforced in the type with the presence of the keyword.) Notes * [LexerMain.ml] and [ParserMain.ml] for CameLIGO and PascaLIGO are almost identical and differ in the same way (language name and file extension), which suggests that they should be in the [shared] folder and instanciated as a functor in the future (TODO). * I removed the blank characters at the end of many lines in the parser of ReasonLIGO.
2019-12-13 15:21:52 +04:00
return : expr
}
and fun_decl = {
2020-02-21 01:31:47 +04:00
kwd_recursive: kwd_recursive option;
kwd_function : kwd_function;
fun_name : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
block_with : (block reg * kwd_with) option;
return : expr;
2020-01-16 23:36:04 +04:00
terminator : semi option;
attributes : attr_decl option
2019-11-06 20:23:49 +04:00
}
2019-05-13 00:56:22 +04:00
and parameters = (param_decl, semi) nsepseq par reg
and param_decl =
ParamConst of param_const reg
| ParamVar of param_var reg
and param_const = {
kwd_const : kwd_const;
var : variable;
colon : colon;
param_type : type_expr
}
and param_var = {
kwd_var : kwd_var;
var : variable;
colon : colon;
param_type : type_expr
}
and block = {
opening : block_opening;
statements : statements;
terminator : semi option;
closing : block_closing
}
and block_opening =
Block of kwd_block * lbrace
| Begin of kwd_begin
and block_closing =
Block of rbrace
| End of kwd_end
and statements = (statement, semi) nsepseq
and statement =
Instr of instruction
| Data of data_decl
| Attr of attr_decl
2019-05-13 00:56:22 +04:00
and data_decl =
LocalConst of const_decl reg
| LocalVar of var_decl reg
| LocalFun of fun_decl reg
2019-05-13 00:56:22 +04:00
and var_decl = {
kwd_var : kwd_var;
name : variable;
colon : colon;
var_type : type_expr;
assign : assign;
init : expr;
2020-01-16 23:36:04 +04:00
terminator : semi option;
2019-05-13 00:56:22 +04:00
}
and instruction =
Cond of conditional reg
| CaseInstr of if_clause case reg
2019-05-13 00:56:22 +04:00
| Assign of assignment reg
| Loop of loop
| ProcCall of fun_call
| Skip of kwd_skip
| RecordPatch of record_patch reg
| MapPatch of map_patch reg
| SetPatch of set_patch reg
| MapRemove of map_remove reg
| SetRemove of set_remove reg
and set_remove = {
kwd_remove : kwd_remove;
element : expr;
kwd_from : kwd_from;
kwd_set : kwd_set;
set : path
}
and map_remove = {
kwd_remove : kwd_remove;
key : expr;
kwd_from : kwd_from;
kwd_map : kwd_map;
map : path
}
and set_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
set_inj : expr ne_injection reg
2019-05-13 00:56:22 +04:00
}
and map_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
map_inj : binding reg ne_injection reg
2019-05-13 00:56:22 +04:00
}
and binding = {
source : expr;
arrow : arrow;
image : expr
}
and record_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
2020-01-09 21:23:37 +04:00
record_inj : record reg
2019-05-13 00:56:22 +04:00
}
and cond_expr = {
kwd_if : kwd_if;
test : expr;
kwd_then : kwd_then;
ifso : expr;
terminator : semi option;
kwd_else : kwd_else;
ifnot : expr
}
2019-05-13 00:56:22 +04:00
and conditional = {
kwd_if : kwd_if;
test : expr;
kwd_then : kwd_then;
ifso : if_clause;
terminator : semi option;
kwd_else : kwd_else;
ifnot : if_clause
}
and if_clause =
ClauseInstr of instruction
2019-10-17 20:33:58 +04:00
| ClauseBlock of clause_block
and clause_block =
LongBlock of block reg
| ShortBlock of (statements * semi option) braces reg
2019-05-13 00:56:22 +04:00
and set_membership = {
set : expr;
kwd_contains : kwd_contains;
element : expr
}
and 'a case = {
kwd_case : kwd_case;
expr : expr;
opening : opening;
lead_vbar : vbar option;
cases : ('a case_clause reg, vbar) nsepseq reg;
closing : closing
}
and 'a case_clause = {
pattern : pattern;
arrow : arrow;
rhs : 'a
}
and assignment = {
lhs : lhs;
assign : assign;
rhs : rhs
}
and lhs =
Path of path
| MapPath of map_lookup reg
2019-10-07 18:24:56 +04:00
and rhs = expr
2019-05-13 00:56:22 +04:00
and loop =
While of while_loop reg
| For of for_loop
and while_loop = {
kwd_while : kwd_while;
cond : expr;
block : block reg
}
and for_loop =
ForInt of for_int reg
| ForCollect of for_collect reg
and for_int = {
kwd_for : kwd_for;
assign : var_assign reg;
kwd_to : kwd_to;
bound : expr;
kwd_step : kwd_step option;
step : expr option;
block : block reg
2019-05-13 00:56:22 +04:00
}
and var_assign = {
name : variable;
assign : assign;
expr : expr
}
and for_collect = {
kwd_for : kwd_for;
var : variable;
bind_to : (arrow * variable) option;
kwd_in : kwd_in;
collection : collection;
expr : expr;
block : block reg
2019-05-13 00:56:22 +04:00
}
and collection =
Map of kwd_map
| Set of kwd_set
| List of kwd_list
2019-05-13 00:56:22 +04:00
(* Expressions *)
and expr =
ECase of expr case reg
| ECond of cond_expr reg
2019-05-13 00:56:22 +04:00
| EAnnot of annot_expr reg
| ELogic of logic_expr
| EArith of arith_expr
| EString of string_expr
| EList of list_expr
| ESet of set_expr
| EConstr of constr_expr
2020-01-09 21:23:37 +04:00
| ERecord of record reg
2019-05-13 00:56:22 +04:00
| EProj of projection reg
2020-01-09 21:23:37 +04:00
| EUpdate of update reg
2019-05-13 00:56:22 +04:00
| EMap of map_expr
| EVar of Lexer.lexeme reg
| ECall of fun_call
| EBytes of (Lexer.lexeme * Hex.t) reg
| EUnit of c_Unit
| ETuple of tuple_expr
| EPar of expr par reg
| EFun of fun_expr reg
2019-05-13 00:56:22 +04:00
and annot_expr = (expr * type_expr)
and set_expr =
SetInj of expr injection reg
| SetMem of set_membership reg
and 'a injection = {
opening : opening;
elements : ('a, semi) sepseq;
terminator : semi option;
closing : closing
}
and 'a ne_injection = {
opening : opening;
ne_elements : ('a, semi) nsepseq;
terminator : semi option;
closing : closing
}
2019-05-13 00:56:22 +04:00
and opening =
Kwd of keyword
| KwdBracket of keyword * lbracket
and closing =
End of kwd_end
| RBracket of rbracket
and map_expr =
MapLookUp of map_lookup reg
| MapInj of binding reg injection reg
2019-11-06 20:23:49 +04:00
| BigMapInj of binding reg injection reg
2019-05-13 00:56:22 +04:00
and map_lookup = {
path : path;
index : expr brackets reg
}
and path =
Name of variable
| Path of projection reg
and logic_expr =
BoolExpr of bool_expr
| CompExpr of comp_expr
and bool_expr =
Or of kwd_or bin_op reg
| And of kwd_and bin_op reg
| Not of kwd_not un_op reg
| False of c_False
| True of c_True
and 'a bin_op = {
op : 'a;
arg1 : expr;
arg2 : expr
}
and 'a un_op = {
op : 'a;
arg : expr
}
and comp_expr =
Lt of lt bin_op reg
| Leq of leq bin_op reg
| Gt of gt bin_op reg
| Geq of geq bin_op reg
| Equal of equal bin_op reg
| Neq of neq bin_op reg
and arith_expr =
Add of plus bin_op reg
| Sub of minus bin_op reg
| Mult of times bin_op reg
| Div of slash bin_op reg
| Mod of kwd_mod bin_op reg
| Neg of minus un_op reg
| Int of (Lexer.lexeme * Z.t) reg
| Nat of (Lexer.lexeme * Z.t) reg
2019-10-27 20:50:24 +04:00
| Mutez of (Lexer.lexeme * Z.t) reg
2019-05-13 00:56:22 +04:00
and string_expr =
Cat of cat bin_op reg
| String of Lexer.lexeme reg
and list_expr =
2019-11-06 20:23:49 +04:00
ECons of cons bin_op reg
| EListComp of expr injection reg
| ENil of kwd_nil
2019-05-13 00:56:22 +04:00
and constr_expr =
SomeApp of (c_Some * arguments) reg
2019-11-06 20:23:49 +04:00
| NoneExpr of c_None
| ConstrApp of (constr * arguments option) reg
2019-05-13 00:56:22 +04:00
and field_assign = {
field_name : field_name;
equal : equal;
field_expr : expr
}
2020-01-20 13:57:07 +04:00
2020-01-09 21:23:37 +04:00
and record = field_assign reg ne_injection
2019-05-13 00:56:22 +04:00
and projection = {
struct_name : variable;
selector : dot;
field_path : (selection, dot) nsepseq
}
2020-01-09 21:23:37 +04:00
and update = {
record : path;
kwd_with : kwd_with;
updates : field_path_assign reg ne_injection reg
}
and field_path_assign = {
field_path : (field_name, dot) nsepseq;
equal : equal;
field_expr : expr
2020-01-09 21:23:37 +04:00
}
2019-05-13 00:56:22 +04:00
and selection =
FieldName of field_name
| Component of (Lexer.lexeme * Z.t) reg
and tuple_expr = (expr, comma) nsepseq par reg
2019-05-13 00:56:22 +04:00
and fun_call = (expr * arguments) reg
2019-05-13 00:56:22 +04:00
and arguments = tuple_expr
2019-05-13 00:56:22 +04:00
(* Patterns *)
and pattern =
2019-11-06 20:23:49 +04:00
PConstr of constr_pattern
2019-05-13 00:56:22 +04:00
| PVar of Lexer.lexeme reg
| PWild of wild
| PInt of (Lexer.lexeme * Z.t) reg
| PNat of (Lexer.lexeme * Z.t) reg
2019-05-13 00:56:22 +04:00
| PBytes of (Lexer.lexeme * Hex.t) reg
| PString of Lexer.lexeme reg
| PList of list_pattern
| PTuple of tuple_pattern
2019-11-06 20:23:49 +04:00
and constr_pattern =
PUnit of c_Unit
| PFalse of c_False
| PTrue of c_True
| PNone of c_None
| PSomeApp of (c_Some * pattern par reg) reg
| PConstrApp of (constr * tuple_pattern option) reg
and tuple_pattern = (pattern, comma) nsepseq par reg
2019-05-13 00:56:22 +04:00
and list_pattern =
2019-11-06 20:23:49 +04:00
PListComp of pattern injection reg
| PNil of kwd_nil
| PParCons of (pattern * cons * pattern) par reg
| PCons of (pattern, cons) nsepseq reg
2019-05-13 00:56:22 +04:00
2019-05-13 00:56:22 +04:00
(* Projecting regions *)
let rec last to_region = function
[] -> Region.ghost
| [x] -> to_region x
| _::t -> last to_region t
let nseq_to_region to_region (hd,tl) =
Region.cover (to_region hd) (last to_region tl)
let nsepseq_to_region to_region (hd,tl) =
let reg (_, item) = to_region item in
Region.cover (to_region hd) (last reg tl)
let sepseq_to_region to_region = function
None -> Region.ghost
| Some seq -> nsepseq_to_region to_region seq
2019-05-13 00:56:22 +04:00
let type_expr_to_region = function
TProd {region; _}
| TSum {region; _}
| TRecord {region; _}
| TApp {region; _}
| TFun {region; _}
| TPar {region; _}
| TStringLiteral {region; _}
2019-11-06 20:23:49 +04:00
| TVar {region; _} -> region
2019-05-13 00:56:22 +04:00
let rec expr_to_region = function
| ELogic e -> logic_expr_to_region e
| EArith e -> arith_expr_to_region e
| EString e -> string_expr_to_region e
| EAnnot e -> annot_expr_to_region e
| EList e -> list_expr_to_region e
| ESet e -> set_expr_to_region e
| EConstr e -> constr_expr_to_region e
| ERecord e -> record_expr_to_region e
| EMap e -> map_expr_to_region e
| ETuple e -> tuple_expr_to_region e
2020-01-09 21:23:37 +04:00
| EUpdate {region; _}
2019-05-13 00:56:22 +04:00
| EProj {region; _}
| EVar {region; _}
| ECall {region; _}
| EBytes {region; _}
| EUnit region
| ECase {region;_}
| ECond {region; _}
| EPar {region; _}
| EFun {region; _} -> region
2019-05-13 00:56:22 +04:00
and tuple_expr_to_region {region; _} = region
2019-05-13 00:56:22 +04:00
and map_expr_to_region = function
MapLookUp {region; _}
| MapInj {region; _} -> region
2019-10-21 15:04:28 +04:00
| BigMapInj {region; _} -> region
2019-05-13 00:56:22 +04:00
and set_expr_to_region = function
SetInj {region; _}
| SetMem {region; _} -> region
and logic_expr_to_region = function
BoolExpr e -> bool_expr_to_region e
| CompExpr e -> comp_expr_to_region e
and bool_expr_to_region = function
Or {region; _}
| And {region; _}
| Not {region; _}
| False region
| True region -> region
and comp_expr_to_region = function
Lt {region; _}
| Leq {region; _}
| Gt {region; _}
| Geq {region; _}
| Equal {region; _}
| Neq {region; _} -> region
and arith_expr_to_region = function
2019-10-17 20:33:58 +04:00
Add {region; _}
2019-05-13 00:56:22 +04:00
| Sub {region; _}
| Mult {region; _}
| Div {region; _}
| Mod {region; _}
| Neg {region; _}
| Int {region; _}
| Nat {region; _}
2019-10-27 20:50:24 +04:00
| Mutez {region; _} -> region
2019-05-13 00:56:22 +04:00
and string_expr_to_region = function
Cat {region; _}
| String {region; _} -> region
2019-10-17 20:33:58 +04:00
and annot_expr_to_region {region; _} = region
2019-05-13 00:56:22 +04:00
and list_expr_to_region = function
2019-11-06 20:23:49 +04:00
ECons {region; _}
| EListComp {region; _}
| ENil region -> region
2019-05-13 00:56:22 +04:00
and constr_expr_to_region = function
NoneExpr region
| ConstrApp {region; _}
| SomeApp {region; _} -> region
and record_expr_to_region {region; _} = region
let path_to_region = function
Name var -> var.region
| Path {region; _} -> region
let instr_to_region = function
2019-10-17 20:33:58 +04:00
Cond {region; _}
| CaseInstr {region; _}
| Assign {region; _}
| Loop While {region; _}
| Loop For ForInt {region; _}
| Loop For ForCollect {region; _}
| ProcCall {region; _}
| Skip region
| RecordPatch {region; _}
| MapPatch {region; _}
| SetPatch {region; _}
| MapRemove {region; _}
| SetRemove {region; _} -> region
let clause_block_to_region = function
LongBlock {region; _}
| ShortBlock {region; _} -> region
2019-05-13 00:56:22 +04:00
let if_clause_to_region = function
2019-11-06 20:23:49 +04:00
ClauseInstr instr -> instr_to_region instr
2019-10-17 20:33:58 +04:00
| ClauseBlock clause_block -> clause_block_to_region clause_block
2019-05-13 00:56:22 +04:00
let pattern_to_region = function
2019-11-06 20:23:49 +04:00
PVar {region; _}
2019-05-13 00:56:22 +04:00
| PWild region
| PInt {region; _}
| PNat {region; _}
2019-05-13 00:56:22 +04:00
| PBytes {region; _}
| PString {region; _}
2019-11-06 20:23:49 +04:00
| PConstr PUnit region
| PConstr PFalse region
| PConstr PTrue region
| PConstr PNone region
| PConstr PSomeApp {region; _}
| PConstr PConstrApp {region; _}
| PList PListComp {region; _}
2019-05-13 00:56:22 +04:00
| PList PNil region
2019-11-06 20:23:49 +04:00
| PList PParCons {region; _}
| PList PCons {region; _}
2019-05-13 00:56:22 +04:00
| PTuple {region; _} -> region
let lhs_to_region : lhs -> Region.t = function
Path path -> path_to_region path
| MapPath {region; _} -> region
2019-10-07 18:24:56 +04:00
let rhs_to_region = expr_to_region
2019-05-13 00:56:22 +04:00
let selection_to_region = function
FieldName {region; _}
| Component {region; _} -> region
2019-12-04 21:30:52 +04:00
let map_ne_injection f ne_injection =
{ ne_injection with ne_elements = nsepseq_map f ne_injection.ne_elements }