diff --git a/src/ligo/parser/.Lexer.ml.tag b/src/ligo/parser/.Lexer.ml.tag new file mode 100644 index 000000000..051eeceb0 --- /dev/null +++ b/src/ligo/parser/.Lexer.ml.tag @@ -0,0 +1 @@ +ocamlc: -w -42 diff --git a/src/ligo/parser/.LexerMain.tag b/src/ligo/parser/.LexerMain.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/ligo/parser/.Parser.mly.tag b/src/ligo/parser/.Parser.mly.tag new file mode 100644 index 000000000..100f7bb69 --- /dev/null +++ b/src/ligo/parser/.Parser.mly.tag @@ -0,0 +1 @@ +--explain --external-tokens LexToken --base Parser ParToken.mly diff --git a/src/ligo/parser/.ParserMain.tag b/src/ligo/parser/.ParserMain.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/ligo/parser/.links b/src/ligo/parser/.links new file mode 100644 index 000000000..b29b57639 --- /dev/null +++ b/src/ligo/parser/.links @@ -0,0 +1,2 @@ +$HOME/git/OCaml-build/Makefile +$HOME/git/OCaml-build/Makefile.cfg diff --git a/src/ligo/parser/AST.ml b/src/ligo/parser/AST.ml new file mode 100644 index 000000000..914950944 --- /dev/null +++ b/src/ligo/parser/AST.ml @@ -0,0 +1,1059 @@ +(* Abstract Syntax Tree (AST) for Ligo *) + +(* To disable warning about multiply-defined record labels. *) + +[@@@warning "-30-42"] + +(* Utilities *) + +open Utils + +(* Regions + + The AST carries all the regions where tokens have been found by the + lexer, plus additional regions corresponding to whole subtrees + (like entire expressions, patterns etc.). These regions are needed + for error reporting and source-to-source transformations. To make + these pervasive regions more legible, we define singleton types for + the symbols, keywords etc. with suggestive names like "kwd_and" + denoting the _region_ of the occurrence of the keyword "and". +*) + +type 'a reg = 'a Region.reg + +let rec last to_region = function + [] -> Region.ghost +| [x] -> to_region x +| _::t -> last to_region t + +let nseq_to_region to_region (hd,tl) = + Region.cover (to_region hd) (last to_region tl) + +let nsepseq_to_region to_region (hd,tl) = + let reg (_, item) = to_region item in + Region.cover (to_region hd) (last reg tl) + +let sepseq_to_region to_region = function + None -> Region.ghost +| Some seq -> nsepseq_to_region to_region seq + +(* Keywords of Ligo *) + +type kwd_begin = Region.t +type kwd_const = Region.t +type kwd_down = Region.t +type kwd_else = Region.t +type kwd_end = Region.t +type kwd_entrypoint = Region.t +type kwd_fail = Region.t +type kwd_for = Region.t +type kwd_function = Region.t +type kwd_if = Region.t +type kwd_in = Region.t +type kwd_is = Region.t +type kwd_match = Region.t +type kwd_mod = Region.t +type kwd_not = Region.t +type kwd_null = Region.t +type kwd_of = Region.t +type kwd_operations = Region.t +type kwd_procedure = Region.t +type kwd_record = Region.t +type kwd_step = Region.t +type kwd_storage = Region.t +type kwd_then = Region.t +type kwd_to = Region.t +type kwd_type = Region.t +type kwd_var = Region.t +type kwd_while = Region.t +type kwd_with = Region.t + +(* Data constructors *) + +type c_False = Region.t +type c_None = Region.t +type c_Some = Region.t +type c_True = Region.t +type c_Unit = Region.t + +(* Symbols *) + +type semi = Region.t +type comma = Region.t +type lpar = Region.t +type rpar = Region.t +type lbrace = Region.t +type rbrace = Region.t +type lbracket = Region.t +type rbracket = Region.t +type cons = Region.t +type vbar = Region.t +type arrow = Region.t +type ass = Region.t +type equal = Region.t +type colon = Region.t +type bool_or = Region.t +type bool_and = Region.t +type lt = Region.t +type leq = Region.t +type gt = Region.t +type geq = Region.t +type neq = Region.t +type plus = Region.t +type minus = Region.t +type slash = Region.t +type times = Region.t +type dot = Region.t +type wild = Region.t +type cat = Region.t + +(* Virtual tokens *) + +type eof = Region.t + +(* Literals *) + +type variable = string reg +type fun_name = string reg +type type_name = string reg +type field_name = string reg +type map_name = string reg +type constr = string reg + +(* Comma-separated non-empty lists *) + +type 'a csv = ('a, comma) nsepseq + +(* Bar-separated non-empty lists *) + +type 'a bsv = ('a, vbar) nsepseq + +(* Parentheses *) + +type 'a par = (lpar * 'a * rpar) reg + +(* Brackets compounds *) + +type 'a brackets = (lbracket * 'a * rbracket) reg + +(* Braced compounds *) + +type 'a braces = (lbrace * 'a * rbrace) reg + +(* The Abstract Syntax Tree *) + +type t = { + decl : declaration nseq; + eof : eof +} + +and ast = t + +and declaration = + TypeDecl of type_decl reg +| ConstDecl of const_decl reg +| StorageDecl of storage_decl reg +| OpDecl of operations_decl reg +| LambdaDecl of lambda_decl + +and const_decl = { + kwd_const : kwd_const; + name : variable; + colon : colon; + const_type : type_expr; + equal : equal; + init : expr; + terminator : semi option +} + +and storage_decl = { + kwd_storage : kwd_storage; + name : variable; + colon : colon; + store_type : type_expr; + terminator : semi option +} + +and operations_decl = { + kwd_operations : kwd_operations; + name : variable; + colon : colon; + op_type : type_expr; + terminator : semi option +} + +(* Type declarations *) + +and type_decl = { + kwd_type : kwd_type; + name : type_name; + kwd_is : kwd_is; + type_expr : type_expr; + terminator : semi option +} + +and type_expr = + Prod of cartesian +| Sum of (variant, vbar) nsepseq reg +| Record of record_type +| TypeApp of (type_name * type_tuple) reg +| ParType of type_expr par +| TAlias of variable + +and cartesian = (type_expr, times) nsepseq reg + +and variant = (constr * kwd_of * cartesian) reg + +and record_type = (kwd_record * field_decls * kwd_end) reg + +and field_decls = (field_decl, semi) nsepseq + +and field_decl = (variable * colon * type_expr) reg + +and type_tuple = (type_name, comma) nsepseq par + +(* Function and procedure declarations *) + +and lambda_decl = + FunDecl of fun_decl reg +| ProcDecl of proc_decl reg +| EntryDecl of entry_decl reg + +and fun_decl = { + kwd_function : kwd_function; + name : variable; + param : parameters; + colon : colon; + ret_type : type_expr; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + kwd_with : kwd_with; + return : expr; + terminator : semi option +} + +and proc_decl = { + kwd_procedure : kwd_procedure; + name : variable; + param : parameters; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + terminator : semi option +} + +and entry_decl = { + kwd_entrypoint : kwd_entrypoint; + name : variable; + param : parameters; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + terminator : semi option +} + +and parameters = (param_decl, semi) nsepseq par + +and param_decl = + ParamConst of param_const +| ParamVar of param_var + +and param_const = (kwd_const * variable * colon * type_expr) reg + +and param_var = (kwd_var * variable * colon * type_expr) reg + +and block = { + opening : kwd_begin; + instr : instructions; + terminator : semi option; + close : kwd_end +} + +and local_decl = + LocalLam of lambda_decl +| LocalConst of const_decl reg +| LocalVar of var_decl reg + +and var_decl = { + kwd_var : kwd_var; + name : variable; + colon : colon; + var_type : type_expr; + ass : ass; + init : expr; + terminator : semi option +} + +and instructions = (instruction, semi) nsepseq reg + +and instruction = + Single of single_instr +| Block of block reg + +and single_instr = + Cond of conditional reg +| Match of match_instr reg +| Ass of ass_instr +| Loop of loop +| ProcCall of fun_call +| Null of kwd_null +| Fail of (kwd_fail * expr) reg + +and conditional = { + kwd_if : kwd_if; + test : expr; + kwd_then : kwd_then; + ifso : instruction; + kwd_else : kwd_else; + ifnot : instruction +} + +and match_instr = { + kwd_match : kwd_match; + expr : expr; + kwd_with : kwd_with; + lead_vbar : vbar option; + cases : cases; + kwd_end : kwd_end +} + +and cases = (case, vbar) nsepseq reg + +and case = (pattern * arrow * instruction) reg + +and ass_instr = (variable * ass * expr) reg + +and loop = + While of while_loop +| For of for_loop + +and while_loop = (kwd_while * expr * block reg) reg + +and for_loop = + ForInt of for_int reg +| ForCollect of for_collect reg + +and for_int = { + kwd_for : kwd_for; + ass : ass_instr; + down : kwd_down option; + kwd_to : kwd_to; + bound : expr; + step : (kwd_step * expr) option; + block : block reg +} + +and for_collect = { + kwd_for : kwd_for; + var : variable; + bind_to : (arrow * variable) option; + kwd_in : kwd_in; + expr : expr; + block : block reg +} + +(* Expressions *) + +and expr = + Or of (expr * bool_or * expr) reg +| And of (expr * bool_and * expr) reg +| Lt of (expr * lt * expr) reg +| Leq of (expr * leq * expr) reg +| Gt of (expr * gt * expr) reg +| Geq of (expr * geq * expr) reg +| Equal of (expr * equal * expr) reg +| Neq of (expr * neq * expr) reg +| Cat of (expr * cat * expr) reg +| Cons of (expr * cons * expr) reg +| Add of (expr * plus * expr) reg +| Sub of (expr * minus * expr) reg +| Mult of (expr * times * expr) reg +| Div of (expr * slash * expr) reg +| Mod of (expr * kwd_mod * expr) reg +| Neg of (minus * expr) reg +| Not of (kwd_not * expr) reg +| Int of (Lexer.lexeme * Z.t) reg +| Var of Lexer.lexeme reg +| String of Lexer.lexeme reg +| Bytes of (Lexer.lexeme * MBytes.t) reg +| False of c_False +| True of c_True +| Unit of c_Unit +| Tuple of tuple +| List of (expr, comma) nsepseq brackets +| EmptyList of empty_list +| Set of (expr, comma) nsepseq braces +| EmptySet of empty_set +| NoneExpr of none_expr +| FunCall of fun_call +| ConstrApp of constr_app +| SomeApp of (c_Some * arguments) reg +| MapLookUp of map_lookup reg +| ParExpr of expr par + +and tuple = (expr, comma) nsepseq par + +and empty_list = + (lbracket * rbracket * colon * type_expr) par + +and empty_set = + (lbrace * rbrace * colon * type_expr) par + +and none_expr = + (c_None * colon * type_expr) par + +and fun_call = (fun_name * arguments) reg + +and arguments = tuple + +and constr_app = (constr * arguments) reg + +and map_lookup = { + map_name : variable; + selector : dot; + index : expr brackets +} + +(* Patterns *) + +and pattern = (core_pattern, cons) nsepseq reg + +and core_pattern = + PVar of Lexer.lexeme reg +| PWild of wild +| PInt of (Lexer.lexeme * Z.t) reg +| PBytes of (Lexer.lexeme * MBytes.t) reg +| PString of Lexer.lexeme reg +| PUnit of c_Unit +| PFalse of c_False +| PTrue of c_True +| PNone of c_None +| PSome of (c_Some * core_pattern par) reg +| PList of list_pattern +| PTuple of (core_pattern, comma) nsepseq par + +and list_pattern = + Sugar of (core_pattern, comma) sepseq brackets +| Raw of (core_pattern * cons * pattern) par + +(* Projecting regions *) + +open! Region + +let type_expr_to_region = function + Prod {region; _} +| Sum {region; _} +| Record {region; _} +| TypeApp {region; _} +| ParType {region; _} +| TAlias {region; _} -> region + +let expr_to_region = function + Or {region; _} +| And {region; _} +| Lt {region; _} +| Leq {region; _} +| Gt {region; _} +| Geq {region; _} +| Equal {region; _} +| Neq {region; _} +| Cat {region; _} +| Cons {region; _} +| Add {region; _} +| Sub {region; _} +| Mult {region; _} +| Div {region; _} +| Mod {region; _} +| Neg {region; _} +| Not {region; _} +| Int {region; _} +| Var {region; _} +| String {region; _} +| Bytes {region; _} +| False region +| True region +| Unit region +| Tuple {region; _} +| List {region; _} +| EmptyList {region; _} +| Set {region; _} +| EmptySet {region; _} +| NoneExpr {region; _} +| FunCall {region; _} +| ConstrApp {region; _} +| SomeApp {region; _} +| MapLookUp {region; _} +| ParExpr {region; _} -> region + +let instr_to_region = function + Single Cond {region;_} +| Single Match {region; _} +| Single Ass {region; _} +| Single Loop While {region; _} +| Single Loop For ForInt {region; _} +| Single Loop For ForCollect {region; _} +| Single ProcCall {region; _} +| Single Null region +| Single Fail {region; _} +| Block {region; _} -> region + +let core_pattern_to_region = function + PVar {region; _} +| PWild region +| PInt {region; _} +| PBytes {region; _} +| PString {region; _} +| PUnit region +| PFalse region +| PTrue region +| PNone region +| PSome {region; _} +| PList Sugar {region; _} +| PList Raw {region; _} +| PTuple {region; _} -> region + +let local_decl_to_region = function + LocalLam FunDecl {region; _} +| LocalLam ProcDecl {region; _} +| LocalLam EntryDecl {region; _} +| LocalConst {region; _} +| LocalVar {region; _} -> region + +(* Printing the tokens with their source regions *) + +let printf = Printf.printf + +let compact (region: Region.t) = + region#compact ~offsets:EvalOpt.offsets EvalOpt.mode + +let print_nsepseq : + string -> ('a -> unit) -> ('a, Region.t) nsepseq -> unit = + fun sep visit (head, tail) -> + let print_aux (sep_reg, item) = + printf "%s: %s\n" (compact sep_reg) sep; + visit item + in visit head; List.iter print_aux tail + +let print_sepseq : + string -> ('a -> unit) -> ('a, Region.t) sepseq -> unit = + fun sep visit -> function + None -> () + | Some seq -> print_nsepseq sep visit seq + +let print_token region lexeme = + printf "%s: %s\n"(compact region) lexeme + +let print_var {region; value=lexeme} = + printf "%s: Ident \"%s\"\n" (compact region) lexeme + +let print_constr {region; value=lexeme} = + printf "%s: Constr \"%s\"\n" + (compact region) lexeme + +let print_string {region; value=lexeme} = + printf "%s: String %s\n" + (compact region) lexeme + +let print_bytes {region; value = lexeme, abstract} = + printf "%s: Bytes (\"%s\", \"0x%s\")\n" + (compact region) lexeme + (MBytes.to_hex abstract |> Hex.to_string) + +let print_int {region; value = lexeme, abstract} = + printf "%s: Int (\"%s\", %s)\n" + (compact region) lexeme + (Z.to_string abstract) + +(* Main printing function *) + +let rec print_tokens ast = + let {decl; eof} = ast in + Utils.nseq_iter print_decl decl; + print_token eof "EOF" + +and print_decl = function + TypeDecl decl -> print_type_decl decl +| ConstDecl decl -> print_const_decl decl +| StorageDecl decl -> print_storage_decl decl +| OpDecl decl -> print_operations_decl decl +| LambdaDecl decl -> print_lambda_decl decl + +and print_const_decl {value; _} = + let {kwd_const; name; colon; const_type; + equal; init; terminator} = value in + print_token kwd_const "const"; + print_var name; + print_token colon ":"; + print_type_expr const_type; + print_token equal "="; + print_expr init; + print_terminator terminator + +and print_storage_decl {value; _} = + let {kwd_storage; name; colon; + store_type; terminator} = value in + print_token kwd_storage "storage"; + print_var name; + print_token colon ":"; + print_type_expr store_type; + print_terminator terminator + +and print_operations_decl {value; _} = + let {kwd_operations; name; colon; + op_type; terminator} = value in + print_token kwd_operations "operations"; + print_var name; + print_token colon ":"; + print_type_expr op_type; + print_terminator terminator + +and print_type_decl {value; _} = + let {kwd_type; name; kwd_is; + type_expr; terminator} = value in + print_token kwd_type "type"; + print_var name; + print_token kwd_is "is"; + print_type_expr type_expr; + print_terminator terminator + +and print_type_expr = function + Prod cartesian -> print_cartesian cartesian +| Sum sum_type -> print_sum_type sum_type +| Record record_type -> print_record_type record_type +| TypeApp type_app -> print_type_app type_app +| ParType par_type -> print_par_type par_type +| TAlias type_alias -> print_var type_alias + +and print_cartesian {value; _} = + print_nsepseq "*" print_type_expr value + +and print_variant {value; _} = + let constr, kwd_of, cartesian = value in + print_constr constr; + print_token kwd_of "of"; + print_cartesian cartesian + +and print_sum_type {value; _} = + print_nsepseq "|" print_variant value + +and print_record_type {value; _} = + let kwd_record, field_decls, kwd_end = value in + print_token kwd_record "record"; + print_field_decls field_decls; + print_token kwd_end "end" + +and print_type_app {value; _} = + let type_name, type_tuple = value in + print_var type_name; + print_type_tuple type_tuple + +and print_par_type {value; _} = + let lpar, type_expr, rpar = value in + print_token lpar "("; + print_type_expr type_expr; + print_token rpar ")" + +and print_field_decls sequence = + print_nsepseq ";" print_field_decl sequence + +and print_field_decl {value; _} = + let var, colon, type_expr = value in + print_var var; + print_token colon ":"; + print_type_expr type_expr + +and print_type_tuple {value; _} = + let lpar, sequence, rpar = value in + print_token lpar "("; + print_nsepseq "," print_var sequence; + print_token rpar ")" + +and print_lambda_decl = function + FunDecl fun_decl -> print_fun_decl fun_decl +| ProcDecl proc_decl -> print_proc_decl proc_decl +| EntryDecl entry_decl -> print_entry_decl entry_decl + +and print_fun_decl {value; _} = + let {kwd_function; name; param; colon; + ret_type; kwd_is; local_decls; + block; kwd_with; return; terminator} = value in + print_token kwd_function "function"; + print_var name; + print_parameters param; + print_token colon ":"; + print_type_expr ret_type; + print_token kwd_is "is"; + print_local_decls local_decls; + print_block block; + print_token kwd_with "with"; + print_expr return; + print_terminator terminator + +and print_proc_decl {value; _} = + let {kwd_procedure; name; param; kwd_is; + local_decls; block; terminator} = value in + print_token kwd_procedure "procedure"; + print_var name; + print_parameters param; + print_token kwd_is "is"; + print_local_decls local_decls; + print_block block; + print_terminator terminator + +and print_entry_decl {value; _} = + let {kwd_entrypoint; name; param; kwd_is; + local_decls; block; terminator} = value in + print_token kwd_entrypoint "entrypoint"; + print_var name; + print_parameters param; + print_token kwd_is "is"; + print_local_decls local_decls; + print_block block; + print_terminator terminator + +and print_parameters {value; _} = + let lpar, sequence, rpar = value in + print_token lpar "("; + print_nsepseq ";" print_param_decl sequence; + print_token rpar ")" + +and print_param_decl = function + ParamConst param_const -> print_param_const param_const +| ParamVar param_var -> print_param_var param_var + +and print_param_const {value; _} = + let kwd_const, variable, colon, type_expr = value in + print_token kwd_const "const"; + print_var variable; + print_token colon ":"; + print_type_expr type_expr + +and print_param_var {value; _} = + let kwd_var, variable, colon, type_expr = value in + print_token kwd_var "var"; + print_var variable; + print_token colon ":"; + print_type_expr type_expr + +and print_block {value; _} = + let {opening; instr; terminator; close} = value in + print_token opening "begin"; + print_instructions instr; + print_terminator terminator; + print_token close "end" + +and print_local_decls sequence = + List.iter print_local_decl sequence + +and print_local_decl = function + LocalLam decl -> print_lambda_decl decl +| LocalConst decl -> print_const_decl decl +| LocalVar decl -> print_var_decl decl + +and print_var_decl {value; _} = + let {kwd_var; name; colon; var_type; + ass; init; terminator} = value in + print_token kwd_var "var"; + print_var name; + print_token colon ":"; + print_type_expr var_type; + print_token ass ":="; + print_expr init; + print_terminator terminator + +and print_instructions {value; _} = + print_nsepseq ";" print_instruction value + +and print_instruction = function + Single instr -> print_single_instr instr +| Block block -> print_block block + +and print_single_instr = function + Cond {value; _} -> print_conditional value +| Match {value; _} -> print_match_instr value +| Ass instr -> print_ass_instr instr +| Loop loop -> print_loop loop +| ProcCall fun_call -> print_fun_call fun_call +| Null kwd_null -> print_token kwd_null "null" +| Fail {value; _} -> print_fail value + +and print_fail (kwd_fail, expr) = + print_token kwd_fail "fail"; + print_expr expr + +and print_conditional node = + let {kwd_if; test; kwd_then; ifso; + kwd_else; ifnot} = node in + print_token kwd_if "if"; + print_expr test; + print_token kwd_then "then"; + print_instruction ifso; + print_token kwd_else "else"; + print_instruction ifnot + +and print_match_instr node = + let {kwd_match; expr; kwd_with; + lead_vbar; cases; kwd_end} = node in + print_token kwd_match "match"; + print_expr expr; + print_token kwd_with "with"; + print_token_opt lead_vbar "|"; + print_cases cases; + print_token kwd_end "end" + +and print_token_opt = function + None -> fun _ -> () +| Some region -> print_token region + +and print_cases {value; _} = + print_nsepseq "|" print_case value + +and print_case {value; _} = + let pattern, arrow, instruction = value in + print_pattern pattern; + print_token arrow "->"; + print_instruction instruction + +and print_ass_instr {value; _} = + let variable, ass, expr = value in + print_var variable; + print_token ass ":="; + print_expr expr + +and print_loop = function + While while_loop -> print_while_loop while_loop +| For for_loop -> print_for_loop for_loop + +and print_while_loop {value; _} = + let kwd_while, expr, block = value in + print_token kwd_while "while"; + print_expr expr; + print_block block + +and print_for_loop = function + ForInt for_int -> print_for_int for_int +| ForCollect for_collect -> print_for_collect for_collect + +and print_for_int ({value; _} : for_int reg) = + let {kwd_for; ass; down; kwd_to; + bound; step; block} = value in + print_token kwd_for "for"; + print_ass_instr ass; + print_down down; + print_token kwd_to "to"; + print_expr bound; + print_step step; + print_block block + +and print_down = function + Some kwd_down -> print_token kwd_down "down" +| None -> () + +and print_step = function + Some (kwd_step, expr) -> + print_token kwd_step "step"; + print_expr expr +| None -> () + +and print_for_collect ({value; _} : for_collect reg) = + let {kwd_for; var; bind_to; + kwd_in; expr; block} = value in + print_token kwd_for "for"; + print_var var; + print_bind_to bind_to; + print_token kwd_in "in"; + print_expr expr; + print_block block + +and print_bind_to = function + Some (arrow, variable) -> + print_token arrow "->"; + print_var variable +| None -> () + +and print_expr = function + Or {value = expr1, bool_or, expr2; _} -> + print_expr expr1; print_token bool_or "||"; print_expr expr2 +| And {value = expr1, bool_and, expr2; _} -> + print_expr expr1; print_token bool_and "&&"; print_expr expr2 +| Lt {value = expr1, lt, expr2; _} -> + print_expr expr1; print_token lt "<"; print_expr expr2 +| Leq {value = expr1, leq, expr2; _} -> + print_expr expr1; print_token leq "<="; print_expr expr2 +| Gt {value = expr1, gt, expr2; _} -> + print_expr expr1; print_token gt ">"; print_expr expr2 +| Geq {value = expr1, geq, expr2; _} -> + print_expr expr1; print_token geq ">="; print_expr expr2 +| Equal {value = expr1, equal, expr2; _} -> + print_expr expr1; print_token equal "="; print_expr expr2 +| Neq {value = expr1, neq, expr2; _} -> + print_expr expr1; print_token neq "=/="; print_expr expr2 +| Cat {value = expr1, cat, expr2; _} -> + print_expr expr1; print_token cat "^"; print_expr expr2 +| Cons {value = expr1, cons, expr2; _} -> + print_expr expr1; print_token cons "#"; print_expr expr2 +| Add {value = expr1, add, expr2; _} -> + print_expr expr1; print_token add "+"; print_expr expr2 +| Sub {value = expr1, sub, expr2; _} -> + print_expr expr1; print_token sub "-"; print_expr expr2 +| Mult {value = expr1, mult, expr2; _} -> + print_expr expr1; print_token mult "*"; print_expr expr2 +| Div {value = expr1, div, expr2; _} -> + print_expr expr1; print_token div "/"; print_expr expr2 +| Mod {value = expr1, kwd_mod, expr2; _} -> + print_expr expr1; print_token kwd_mod "mod"; print_expr expr2 +| Neg {value = minus, expr; _} -> + print_token minus "-"; print_expr expr +| Not {value = kwd_not, expr; _} -> + print_token kwd_not "not"; print_expr expr +| Int i -> print_int i +| Var var -> print_var var +| String s -> print_string s +| Bytes b -> print_bytes b +| False region -> print_token region "False" +| True region -> print_token region "True" +| Unit region -> print_token region "Unit" +| Tuple tuple -> print_tuple tuple +| List list -> print_list list +| EmptyList elist -> print_empty_list elist +| Set set -> print_set set +| EmptySet eset -> print_empty_set eset +| NoneExpr nexpr -> print_none_expr nexpr +| FunCall fun_call -> print_fun_call fun_call +| ConstrApp capp -> print_constr_app capp +| SomeApp sapp -> print_some_app sapp +| MapLookUp lookup -> print_map_lookup lookup +| ParExpr pexpr -> print_par_expr pexpr + +and print_tuple {value; _} = + let lpar, sequence, rpar = value in + print_token lpar "("; + print_nsepseq "," print_expr sequence; + print_token rpar ")" + +and print_list {value; _} = + let lbra, sequence, rbra = value in + print_token lbra "["; + print_nsepseq "," print_expr sequence; + print_token rbra "]" + +and print_empty_list {value; _} = + let lpar, (lbracket, rbracket, colon, type_expr), + rpar = value in + print_token lpar "("; + print_token lbracket "["; + print_token rbracket "]"; + print_token colon ":"; + print_type_expr type_expr; + print_token rpar ")" + +and print_set {value; _} = + let lbrace, sequence, rbrace = value in + print_token lbrace "{"; + print_nsepseq "," print_expr sequence; + print_token rbrace "}" + +and print_empty_set {value; _} = + let lpar, (lbrace, rbrace, colon, type_expr), + rpar = value in + print_token lpar "("; + print_token lbrace "{"; + print_token rbrace "}"; + print_token colon ":"; + print_type_expr type_expr; + print_token rpar ")" + +and print_none_expr {value; _} = + let lpar, (c_None, colon, type_expr), rpar = value in + print_token lpar "("; + print_token c_None "None"; + print_token colon ":"; + print_type_expr type_expr; + print_token rpar ")" + +and print_fun_call {value; _} = + let fun_name, arguments = value in + print_var fun_name; + print_tuple arguments + +and print_constr_app {value; _} = + let constr, arguments = value in + print_constr constr; + print_tuple arguments + +and print_some_app {value; _} = + let c_Some, arguments = value in + print_token c_Some "Some"; + print_tuple arguments + +and print_map_lookup {value; _} = + let {map_name; selector; index} = value in + let {value = lbracket, expr, rbracket; _} = index in + print_var map_name; + print_token selector "."; + print_token lbracket "["; + print_expr expr; + print_token rbracket "]" + +and print_par_expr {value; _} = + let lpar, expr, rpar = value in + print_token lpar "("; + print_expr expr; + print_token rpar ")" + +and print_pattern {value; _} = + print_nsepseq "#" print_core_pattern value + +and print_core_pattern = function + PVar var -> print_var var +| PWild wild -> print_token wild "_" +| PInt i -> print_int i +| PBytes b -> print_bytes b +| PString s -> print_string s +| PUnit region -> print_token region "Unit" +| PFalse region -> print_token region "False" +| PTrue region -> print_token region "True" +| PNone region -> print_token region "None" +| PSome psome -> print_psome psome +| PList pattern -> print_list_pattern pattern +| PTuple ptuple -> print_ptuple ptuple + +and print_psome {value; _} = + let c_Some, patterns = value in + print_token c_Some "Some"; + print_patterns patterns + +and print_patterns {value; _} = + let lpar, core_pattern, rpar = value in + print_token lpar "("; + print_core_pattern core_pattern; + print_token rpar ")" + +and print_list_pattern = function + Sugar sugar -> print_sugar sugar +| Raw raw -> print_raw raw + +and print_sugar {value; _} = + let lbracket, sequence, rbracket = value in + print_token lbracket "["; + print_sepseq "," print_core_pattern sequence; + print_token rbracket "]" + +and print_raw {value; _} = + let lpar, (core_pattern, cons, pattern), rpar = value in + print_token lpar "("; + print_core_pattern core_pattern; + print_token cons "#"; + print_pattern pattern; + print_token rpar ")" + +and print_ptuple {value; _} = + let lpar, sequence, rpar = value in + print_token lpar "("; + print_nsepseq "," print_core_pattern sequence; + print_token rpar ")" + +and print_terminator = function + Some semi -> print_token semi ";" +| None -> () diff --git a/src/ligo/parser/AST.mli b/src/ligo/parser/AST.mli new file mode 100644 index 000000000..1b2611d93 --- /dev/null +++ b/src/ligo/parser/AST.mli @@ -0,0 +1,438 @@ +(* Abstract Syntax Tree (AST) for Ligo *) + +[@@@warning "-30"] + +open Utils + +(* Regions + + The AST carries all the regions where tokens have been found by the + lexer, plus additional regions corresponding to whole subtrees + (like entire expressions, patterns etc.). These regions are needed + for error reporting and source-to-source transformations. To make + these pervasive regions more legible, we define singleton types for + the symbols, keywords etc. with suggestive names like "kwd_and" + denoting the _region_ of the occurrence of the keyword "and". +*) + +type 'a reg = 'a Region.reg + +val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t +val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t +val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t + +(* Keywords of Ligo *) + +type kwd_begin = Region.t +type kwd_const = Region.t +type kwd_down = Region.t +type kwd_else = Region.t +type kwd_end = Region.t +type kwd_entrypoint = Region.t +type kwd_fail = Region.t +type kwd_for = Region.t +type kwd_function = Region.t +type kwd_if = Region.t +type kwd_in = Region.t +type kwd_is = Region.t +type kwd_match = Region.t +type kwd_mod = Region.t +type kwd_not = Region.t +type kwd_null = Region.t +type kwd_of = Region.t +type kwd_operations = Region.t +type kwd_procedure = Region.t +type kwd_record = Region.t +type kwd_step = Region.t +type kwd_storage = Region.t +type kwd_then = Region.t +type kwd_to = Region.t +type kwd_type = Region.t +type kwd_var = Region.t +type kwd_while = Region.t +type kwd_with = Region.t + +(* Data constructors *) + +type c_False = Region.t +type c_None = Region.t +type c_Some = Region.t +type c_True = Region.t +type c_Unit = Region.t + +(* Symbols *) + +type semi = Region.t +type comma = Region.t +type lpar = Region.t +type rpar = Region.t +type lbrace = Region.t +type rbrace = Region.t +type lbracket = Region.t +type rbracket = Region.t +type cons = Region.t +type vbar = Region.t +type arrow = Region.t +type ass = Region.t +type equal = Region.t +type colon = Region.t +type bool_or = Region.t +type bool_and = Region.t +type lt = Region.t +type leq = Region.t +type gt = Region.t +type geq = Region.t +type neq = Region.t +type plus = Region.t +type minus = Region.t +type slash = Region.t +type times = Region.t +type dot = Region.t +type wild = Region.t +type cat = Region.t + +(* Virtual tokens *) + +type eof = Region.t + +(* Literals *) + +type variable = string reg +type fun_name = string reg +type type_name = string reg +type field_name = string reg +type map_name = string reg +type constr = string reg + +(* Comma-separated non-empty lists *) + +type 'a csv = ('a, comma) nsepseq + +(* Bar-separated non-empty lists *) + +type 'a bsv = ('a, vbar) nsepseq + +(* Parentheses *) + +type 'a par = (lpar * 'a * rpar) reg + +(* Brackets compounds *) + +type 'a brackets = (lbracket * 'a * rbracket) reg + +(* Braced compounds *) + +type 'a braces = (lbrace * 'a * rbrace) reg + +(* The Abstract Syntax Tree *) + +type t = { + decl : declaration nseq; + eof : eof +} + +and ast = t + +and declaration = + TypeDecl of type_decl reg +| ConstDecl of const_decl reg +| StorageDecl of storage_decl reg +| OpDecl of operations_decl reg +| LambdaDecl of lambda_decl + +and const_decl = { + kwd_const : kwd_const; + name : variable; + colon : colon; + const_type : type_expr; + equal : equal; + init : expr; + terminator : semi option +} + +and storage_decl = { + kwd_storage : kwd_storage; + name : variable; + colon : colon; + store_type : type_expr; + terminator : semi option +} + +and operations_decl = { + kwd_operations : kwd_operations; + name : variable; + colon : colon; + op_type : type_expr; + terminator : semi option +} + +(* Type declarations *) + +and type_decl = { + kwd_type : kwd_type; + name : type_name; + kwd_is : kwd_is; + type_expr : type_expr; + terminator : semi option +} + +and type_expr = + Prod of cartesian +| Sum of (variant, vbar) nsepseq reg +| Record of record_type +| TypeApp of (type_name * type_tuple) reg +| ParType of type_expr par +| TAlias of variable + +and cartesian = (type_expr, times) nsepseq reg + +and variant = (constr * kwd_of * cartesian) reg + +and record_type = (kwd_record * field_decls * kwd_end) reg + +and field_decls = (field_decl, semi) nsepseq + +and field_decl = (variable * colon * type_expr) reg + +and type_tuple = (type_name, comma) nsepseq par + +(* Function and procedure declarations *) + +and lambda_decl = + FunDecl of fun_decl reg +| ProcDecl of proc_decl reg +| EntryDecl of entry_decl reg + +and fun_decl = { + kwd_function : kwd_function; + name : variable; + param : parameters; + colon : colon; + ret_type : type_expr; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + kwd_with : kwd_with; + return : expr; + terminator : semi option +} + +and proc_decl = { + kwd_procedure : kwd_procedure; + name : variable; + param : parameters; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + terminator : semi option +} + +and entry_decl = { + kwd_entrypoint : kwd_entrypoint; + name : variable; + param : parameters; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + terminator : semi option +} + +and parameters = (param_decl, semi) nsepseq par + +and param_decl = + ParamConst of param_const +| ParamVar of param_var + +and param_const = (kwd_const * variable * colon * type_expr) reg + +and param_var = (kwd_var * variable * colon * type_expr) reg + +and block = { + opening : kwd_begin; + instr : instructions; + terminator : semi option; + close : kwd_end +} + +and local_decl = + LocalLam of lambda_decl +| LocalConst of const_decl reg +| LocalVar of var_decl reg + +and var_decl = { + kwd_var : kwd_var; + name : variable; + colon : colon; + var_type : type_expr; + ass : ass; + init : expr; + terminator : semi option +} + +and instructions = (instruction, semi) nsepseq reg + +and instruction = + Single of single_instr +| Block of block reg + +and single_instr = + Cond of conditional reg +| Match of match_instr reg +| Ass of ass_instr +| Loop of loop +| ProcCall of fun_call +| Null of kwd_null +| Fail of (kwd_fail * expr) reg + +and conditional = { + kwd_if : kwd_if; + test : expr; + kwd_then : kwd_then; + ifso : instruction; + kwd_else : kwd_else; + ifnot : instruction +} + +and match_instr = { + kwd_match : kwd_match; + expr : expr; + kwd_with : kwd_with; + lead_vbar : vbar option; + cases : cases; + kwd_end : kwd_end +} + +and cases = (case, vbar) nsepseq reg + +and case = (pattern * arrow * instruction) reg + +and ass_instr = (variable * ass * expr) reg + +and loop = + While of while_loop +| For of for_loop + +and while_loop = (kwd_while * expr * block reg) reg + +and for_loop = + ForInt of for_int reg +| ForCollect of for_collect reg + +and for_int = { + kwd_for : kwd_for; + ass : ass_instr; + down : kwd_down option; + kwd_to : kwd_to; + bound : expr; + step : (kwd_step * expr) option; + block : block reg +} + +and for_collect = { + kwd_for : kwd_for; + var : variable; + bind_to : (arrow * variable) option; + kwd_in : kwd_in; + expr : expr; + block : block reg +} + +(* Expressions *) + +and expr = + Or of (expr * bool_or * expr) reg +| And of (expr * bool_and * expr) reg +| Lt of (expr * lt * expr) reg +| Leq of (expr * leq * expr) reg +| Gt of (expr * gt * expr) reg +| Geq of (expr * geq * expr) reg +| Equal of (expr * equal * expr) reg +| Neq of (expr * neq * expr) reg +| Cat of (expr * cat * expr) reg +| Cons of (expr * cons * expr) reg +| Add of (expr * plus * expr) reg +| Sub of (expr * minus * expr) reg +| Mult of (expr * times * expr) reg +| Div of (expr * slash * expr) reg +| Mod of (expr * kwd_mod * expr) reg +| Neg of (minus * expr) reg +| Not of (kwd_not * expr) reg +| Int of (Lexer.lexeme * Z.t) reg +| Var of Lexer.lexeme reg +| String of Lexer.lexeme reg +| Bytes of (Lexer.lexeme * MBytes.t) reg +| False of c_False +| True of c_True +| Unit of c_Unit +| Tuple of tuple +| List of (expr, comma) nsepseq brackets +| EmptyList of empty_list +| Set of (expr, comma) nsepseq braces +| EmptySet of empty_set +| NoneExpr of none_expr +| FunCall of fun_call +| ConstrApp of constr_app +| SomeApp of (c_Some * arguments) reg +| MapLookUp of map_lookup reg +| ParExpr of expr par + +and tuple = (expr, comma) nsepseq par + +and empty_list = + (lbracket * rbracket * colon * type_expr) par + +and empty_set = + (lbrace * rbrace * colon * type_expr) par + +and none_expr = + (c_None * colon * type_expr) par + +and fun_call = (fun_name * arguments) reg + +and arguments = tuple + +and constr_app = (constr * arguments) reg + +and map_lookup = { + map_name : variable; + selector : dot; + index : expr brackets +} + +(* Patterns *) + +and pattern = (core_pattern, cons) nsepseq reg + +and core_pattern = + PVar of Lexer.lexeme reg +| PWild of wild +| PInt of (Lexer.lexeme * Z.t) reg +| PBytes of (Lexer.lexeme * MBytes.t) reg +| PString of Lexer.lexeme reg +| PUnit of c_Unit +| PFalse of c_False +| PTrue of c_True +| PNone of c_None +| PSome of (c_Some * core_pattern par) reg +| PList of list_pattern +| PTuple of (core_pattern, comma) nsepseq par + +and list_pattern = + Sugar of (core_pattern, comma) sepseq brackets +| Raw of (core_pattern * cons * pattern) par + +(* Projecting regions *) + +val type_expr_to_region : type_expr -> Region.t + +val expr_to_region : expr -> Region.t + +val instr_to_region : instruction -> Region.t + +val core_pattern_to_region : core_pattern -> Region.t + +val local_decl_to_region : local_decl -> Region.t + +(* Printing *) + +val print_tokens : t -> unit diff --git a/src/ligo/parser/AST2.ml b/src/ligo/parser/AST2.ml new file mode 100644 index 000000000..019c3df9f --- /dev/null +++ b/src/ligo/parser/AST2.ml @@ -0,0 +1,669 @@ +[@@@warning "-30"] + +module I = AST + +open Region + +module SMap = Map.Make(String) + +module O = struct + type type_name = string + type var_name = string + + type pattern = + PVar of var_name + | PWild + | PInt of Z.t + | PBytes of MBytes.t + | PString of string + | PUnit + | PFalse + | PTrue + | PNone + | PSome of pattern + | PCons of pattern * pattern + | PNull + | PTuple of pattern list + + type type_expr = + Prod of type_expr list + | Sum of (type_name * type_expr) list + | Record of (type_name * type_expr) list + | TypeApp of type_name * (type_expr list) + | Function of { args: type_expr list; ret: type_expr } + | Ref of type_expr + | Unit + | Int + + type typed_var = { name:var_name; ty:type_expr } + + type type_decl = { name:string; ty:type_expr } + + type expr = + App of { operator: operator; arguments: expr list } + | Var of var_name + | Constant of constant + | Lambda of lambda + + and decl = { name:var_name; ty:type_expr; value: expr } + + and lambda = { + parameters: type_expr SMap.t; + declarations: decl list; + instructions: instr list; + result: expr; + } + + and operator = + Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod + | Neg | Not + | Tuple | Set | List + | MapLookup + | Function of string + + and constant = + Unit | Int of Z.t | String of string | Bytes of MBytes.t | False | True + | Null of type_expr | EmptySet of type_expr | CNone of type_expr + + and instr = + Assignment of { name: var_name; value: expr } + | While of { condition: expr; body: instr list } + | ForCollection of { list: expr; key: var_name; value: var_name option; body: instr list } + | If of { condition: expr; ifso: instr list; ifnot: instr list } + | Match of { expr: expr; cases: (pattern * instr list) list } + | DropUnit of expr (* expr returns unit, drop the result. *) + | Fail of { expr: expr } + + type ast = { + types : type_decl list; + storage_decl : typed_var; + operations_decl : typed_var; + declarations : decl list; + } +end + +(* open Sanity: *) +let (|>) v f = f v (* pipe f to v *) +let (@@) f v = f v (* apply f on v *) +let (@.) f g x = f (g x) (* compose *) +let map f l = List.rev (List.rev_map f l) +(* TODO: check that List.to_seq, List.append and SMap.of_seq are not broken + (i.e. check that they are tail-recursive) *) +let append_map f l = map f l |> List.flatten +let append l1 l2 = List.append l1 l2 +let list_to_map l = List.fold_left (fun m (k,v) -> SMap.add k v m) SMap.empty l +let fold_map f a l = + let f (acc, l) elem = + let acc', elem' = f acc elem + in acc', (elem' :: l) in + let last_acc, last_l = List.fold_left f (a, []) l + in last_acc, List.rev last_l + +(* Simplify the AST *) + +let s_nsepseq : ('a,'sep) Utils.nsepseq -> 'a list = + fun (first, rest) -> first :: (map snd rest) + +let s_sepseq : ('a,'sep) Utils.sepseq -> 'a list = + function + None -> [] + | Some nsepseq -> s_nsepseq nsepseq + +let s_name {value=name; region} : O.var_name = + let () = ignore (region) in + name + +let rec s_cartesian {value=sequence; region} : O.type_expr = + let () = ignore (region) in + Prod (map s_type_expr (s_nsepseq sequence)) + +and s_sum_type {value=sequence; region} : O.type_expr = + let () = ignore (region) in + Sum (map s_variant (s_nsepseq sequence)) + +and s_variant {value=(constr, kwd_of, cartesian); region} = + let () = ignore (kwd_of,region) in + (s_name constr, s_cartesian cartesian) + +and s_record_type {value=(kwd_record, field_decls, kwd_end); region} : O.type_expr = + let () = ignore (kwd_record,region,kwd_end) in + Record (map s_field_decl (s_nsepseq field_decls)) + +and s_field_decl {value=(var, colon, type_expr); region} = + let () = ignore (colon,region) in + (s_name var, s_type_expr type_expr) + +and s_type_app {value=(type_name,type_tuple); region} : O.type_expr = + let () = ignore (region) in + TypeApp (s_name type_name, s_type_tuple type_tuple) + +and s_type_tuple ({value=(lpar, sequence, rpar); region} : (I.type_name, I.comma) Utils.nsepseq I.par) : O.type_expr list = + let () = ignore (lpar,rpar,region) in + (* TODO: the grammar should allow any type expr, not just type_name in the tuple elements *) + map s_type_expr (map (fun a -> I.TAlias a) (s_nsepseq sequence)) + +and s_par_type {value=(lpar, type_expr, rpar); region} : O.type_expr = + let () = ignore (lpar,rpar,region) in + s_type_expr type_expr + +and s_type_alias name : O.type_expr = + let () = ignore () in + TypeApp (s_name name, []) + +and s_type_expr : I.type_expr -> O.type_expr = function + Prod cartesian -> s_cartesian cartesian +| Sum sum_type -> s_sum_type sum_type +| Record record_type -> s_record_type record_type +| TypeApp type_app -> s_type_app type_app +| ParType par_type -> s_par_type par_type +| TAlias type_alias -> s_type_alias type_alias + + +let s_type_decl I.{value={kwd_type;name;kwd_is;type_expr;terminator}; region} : O.type_decl = + let () = ignore (kwd_type,kwd_is,terminator,region) in + O.{ name = s_name name; ty = s_type_expr type_expr } + +let s_storage_decl I.{value={kwd_storage; name; colon; store_type; terminator}; region} : O.typed_var = + let () = ignore (kwd_storage,colon,terminator,region) in + O.{ name = s_name name; ty = s_type_expr store_type } + +let s_operations_decl I.{value={kwd_operations;name;colon;op_type;terminator}; region} : O.typed_var = + let () = ignore (kwd_operations,colon,terminator,region) in + O.{ name = s_name name; ty = s_type_expr op_type } + +let s_empty_list {value=(l, (lbracket, rbracket, colon, type_expr), r); region} : O.expr = + let () = ignore (l, lbracket, rbracket, colon, r, region) in + Constant (Null (s_type_expr type_expr)) + +let s_empty_set {value=(l, (lbrace, rbrace, colon, type_expr), r); region} : O.expr = + let () = ignore (l, lbrace, rbrace, colon, r, region) in + Constant (EmptySet (s_type_expr type_expr)) + +let s_none {value=(l, (c_None, colon, type_expr), r); region} : O.expr = + let () = ignore (l, c_None, colon, r, region) in + Constant (CNone (s_type_expr type_expr)) + +let rec bin l operator r = O.App { operator; arguments = [s_expr l; s_expr r] } +and una operator v = O.App { operator; arguments = [s_expr v] } +and s_expr : I.expr -> O.expr = + function + Or {value=(l, bool_or, r); region} -> let () = ignore (region, bool_or) in bin l Or r + | And {value=(l, bool_and, r); region} -> let () = ignore (region,bool_and) in bin l And r + | Lt {value=(l, lt, r); region} -> let () = ignore (region, lt) in bin l Lt r + | Leq {value=(l, leq, r); region} -> let () = ignore (region, leq) in bin l Leq r + | Gt {value=(l, gt, r); region} -> let () = ignore (region, gt) in bin l Gt r + | Geq {value=(l, geq, r); region} -> let () = ignore (region, geq) in bin l Geq r + | Equal {value=(l, equal, r); region} -> let () = ignore (region, equal) in bin l Equal r + | Neq {value=(l, neq, r); region} -> let () = ignore (region, neq) in bin l Neq r + | Cat {value=(l, cat, r); region} -> let () = ignore (region, cat) in bin l Cat r + | Cons {value=(l, cons, r); region} -> let () = ignore (region, cons) in bin l Cons r + | Add {value=(l, plus, r); region} -> let () = ignore (region, plus) in bin l Add r + | Sub {value=(l, minus, r); region} -> let () = ignore (region, minus) in bin l Sub r + | Mult {value=(l, times, r); region} -> let () = ignore (region, times) in bin l Mult r + | Div {value=(l, slash, r); region} -> let () = ignore (region, slash) in bin l Div r + | Mod {value=(l, kwd_mod, r); region} -> let () = ignore (region, kwd_mod) in bin l Mod r + | Neg {value=(minus, expr); region} -> let () = ignore (region, minus) in una Neg expr + | Not {value=(kwd_not, expr); region} -> let () = ignore (region, kwd_not) in una Not expr + | Int {value=(lexeme, z); region} -> let () = ignore (region, lexeme) in Constant (Int z) + | Var {value=lexeme; region} -> let () = ignore (region) in Var lexeme + | String {value=s; region} -> let () = ignore (region) in Constant (String s) + | Bytes {value=(lexeme, mbytes); region} -> let () = ignore (region, lexeme) in Constant (Bytes mbytes) + | False c_False -> let () = ignore (c_False) in Constant (False) + | True c_True -> let () = ignore (c_True) in Constant (True) + | Unit c_Unit -> let () = ignore (c_Unit) in Constant (Unit) + | Tuple {value=(l,tuple,r); region} -> let () = ignore (l,r,region) in App { operator = Tuple; arguments = map s_expr (s_nsepseq tuple)} + | List list -> s_list list + | EmptyList empty_list -> s_empty_list empty_list + | Set set -> s_set set + | EmptySet empty_set -> s_empty_set empty_set + | NoneExpr none_expr -> s_none none_expr + | FunCall fun_call -> s_fun_call fun_call + | ConstrApp constr_app -> s_constr_app constr_app + | SomeApp some_app -> s_some_app some_app + | MapLookUp map_lookup -> s_map_lookup map_lookup + | ParExpr {value=(lpar,expr,rpar); region} -> let () = ignore (lpar,rpar,region) in s_expr expr + +and s_map_lookup I.{value = {map_name; selector; index}; region} : O.expr = + let {value = lbracket, index_expr, rbracket; region=region2} = index in + let () = ignore (selector, lbracket, rbracket, region2, region) in + App { operator = MapLookup; arguments = [Var (s_name map_name); s_expr index_expr] } + +and s_some_app {value=(c_Some, {value=(l,arguments,r); region=region2}); region} : O.expr = + let () = ignore (c_Some,l,r,region2,region) in + match s_nsepseq arguments with + [] -> failwith "tuple cannot be empty" + | [a] -> s_expr a + | l -> App { operator = Tuple; arguments = map s_expr l } + +and s_list {value=(l, list, r); region} : O.expr = + let () = ignore (l, r, region) in + App { operator = List; arguments = map s_expr (s_nsepseq list) } + +and s_set {value=(l, set, r); region} : O.expr = + let () = ignore (l, r, region) in + App { operator = Set; arguments = map s_expr (s_nsepseq set) } + +and s_pattern {value=sequence; region} : O.pattern = + let () = ignore (region) in + s_pattern_conses (s_nsepseq sequence) + +and s_pattern_conses : I.core_pattern list -> O.pattern = function + [] -> assert false + | [p] -> s_core_pattern p + | hd :: tl -> PCons (s_core_pattern hd, s_pattern_conses tl) + +and s_case ({value=(pattern, arrow, instruction); region} : I.case) : O.pattern * O.instr list = + let () = ignore (arrow,region) in + s_pattern pattern, s_instruction instruction + +and s_core_pattern : I.core_pattern -> O.pattern = function + PVar var -> PVar (s_name var) +| PWild wild -> let () = ignore (wild) in PWild +| PInt {value=(si,i);region} -> let () = ignore (si,region) in PInt i +| PBytes {value=(sb,b);region} -> let () = ignore (sb,region) in PBytes b +| PString {value=s;region} -> let () = ignore (region) in PString s +| PUnit region -> let () = ignore (region) in PUnit +| PFalse region -> let () = ignore (region) in PFalse +| PTrue region -> let () = ignore (region) in PTrue +| PNone region -> let () = ignore (region) in PNone +| PSome psome -> s_psome psome +| PList pattern -> s_list_pattern pattern +| PTuple ptuple -> s_ptuple ptuple + +and s_list_pattern = function + Sugar sugar -> s_sugar sugar +| Raw raw -> s_raw raw + +and s_sugar {value=(lbracket, sequence, rbracket); region} : O.pattern = + let () = ignore (lbracket, rbracket, region) in + List.fold_left (fun acc p -> O.PCons (s_core_pattern p, acc)) + O.PNull + (s_sepseq sequence); + +and s_raw {value=(lpar, (core_pattern, cons, pattern), rpar); region} = + let () = ignore (lpar, cons, rpar, region) in + O.PCons (s_core_pattern core_pattern, s_pattern pattern) + +and s_ptuple {value=(lpar, sequence, rpar); region} = + let () = ignore (lpar, rpar, region) in + PTuple (map s_core_pattern (s_nsepseq sequence)) + +and s_psome {value=(c_Some,{value=(l,psome,r);region=region2});region} : O.pattern = + let () = ignore (c_Some,l,r,region2,region) in + PSome (s_core_pattern psome) + +and s_const_decl I.{value={kwd_const;name;colon;const_type;equal;init;terminator}; region} : O.decl = + let () = ignore (kwd_const,colon,equal,terminator,region) in + O.{ name = s_name name; ty = s_type_expr const_type; value = s_expr init } + +and s_param_const {value=(kwd_const,variable,colon,type_expr); region} : string * O.type_expr = + let () = ignore (kwd_const,colon,region) in + s_name variable, s_type_expr type_expr + +and s_param_var {value=(kwd_var,variable,colon,type_expr); region} : string * O.type_expr = + let () = ignore (kwd_var,colon,region) in + s_name variable, s_type_expr type_expr + +and s_param_decl : I.param_decl -> string * O.type_expr = function + ParamConst p -> s_param_const p + | ParamVar p -> s_param_var p + +and s_parameters ({value=(lpar,param_decl,rpar);region} : I.parameters) : (string * O.type_expr) list = + let () = ignore (lpar,rpar,region) in + let l = (s_nsepseq param_decl) in + map s_param_decl l + +and s_var_decl I.{value={kwd_var;name;colon;var_type;ass;init;terminator}; region} : O.decl = + let () = ignore (kwd_var,colon,ass,terminator,region) in + O.{ + name = s_name name; + ty = s_type_expr var_type; + value = s_expr init + } + +and s_local_decl : I.local_decl -> O.decl = function + LocalLam decl -> s_lambda_decl decl +| LocalConst decl -> s_const_decl decl +| LocalVar decl -> s_var_decl decl + +and s_instructions ({value=sequence; region} : I.instructions) : O.instr list = + let () = ignore (region) in + append_map s_instruction (s_nsepseq sequence) + +and s_instruction : I.instruction -> O.instr list = function + Single instr -> s_single_instr instr +| Block block -> (s_block block) + +and s_conditional I.{kwd_if;test;kwd_then;ifso;kwd_else;ifnot} : O.instr = + let () = ignore (kwd_if,kwd_then,kwd_else) in + If { condition = s_expr test; ifso = s_instruction ifso; ifnot = s_instruction ifnot } + +and s_match_instr I.{kwd_match;expr;kwd_with;lead_vbar;cases;kwd_end} : O.instr = + let {value=cases;region} = cases in + let () = ignore (kwd_match,kwd_with,lead_vbar,kwd_end,region) in + Match { expr = s_expr expr; cases = map s_case (s_nsepseq cases) } + +and s_ass_instr {value=(variable,ass,expr); region} : O.instr = + let () = ignore (ass,region) in + Assignment { name = s_name variable; value = s_expr expr } + +and s_while_loop {value=(kwd_while, expr, block); region} : O.instr list = + let () = ignore (kwd_while,region) in + [While {condition = s_expr expr; body = s_block block}] + +and s_for_loop : I.for_loop -> O.instr list = function + ForInt for_int -> s_for_int for_int +| ForCollect for_collect -> s_for_collect for_collect + +and s_for_int ({value={kwd_for;ass;down;kwd_to;bound;step;block}; region} : I.for_int reg) : O.instr list = + let {value=(variable,ass_kwd,expr);region = ass_region} = ass in + let () = ignore (kwd_for,ass_region,ass_kwd,kwd_to,region) in + let name = s_name variable in + let condition, operator = match down with Some kwd_down -> ignore kwd_down; O.Gt, O.Sub + | None -> O.Lt, O.Add in + let step = s_step step + in [ + Assignment { name; value = s_expr expr }; + (* TODO: lift the declaration of the variable, to avoid creating a nested scope here. *) + While { + condition = App { operator = condition; + arguments = [Var name; s_expr bound] }; + body = append (s_block block) + [O.Assignment { name; + value = App { operator; + arguments = [Var name; step]}}] + } + ] + +and s_for_collect ({value={kwd_for;var;bind_to;kwd_in;expr;block}; _} : I.for_collect reg) : O.instr list = + let () = ignore (kwd_for,kwd_in) in + [ + O.ForCollection { + list = s_expr expr; + key = s_name var; + value = s_bind_to bind_to; + body = s_block block + } + ] + +and s_step : (I.kwd_step * I.expr) option -> O.expr = function + Some (kwd_step, expr) -> let () = ignore (kwd_step) in s_expr expr +| None -> Constant (Int (Z.of_int 1)) + +and s_bind_to : (I.arrow * I.variable) option -> O.var_name option = function + Some (arrow, variable) -> let () = ignore (arrow) in Some (s_name variable) + | None -> None + +and s_loop : I.loop -> O.instr list = function + While while_loop -> s_while_loop while_loop + | For for_loop -> s_for_loop for_loop + +and s_fun_call {value=(fun_name, arguments); region} : O.expr = + let () = ignore (region) in + App { operator = Function (s_name fun_name); arguments = s_arguments arguments } + +and s_constr_app {value=(constr, arguments); region} : O.expr = + let () = ignore (region) in + App { operator = Function (s_name constr); arguments = s_arguments arguments } + +and s_arguments {value=(lpar, sequence, rpar); region} = + let () = ignore (lpar,rpar,region) in + map s_expr (s_nsepseq sequence); + +and s_fail ((kwd_fail, expr) : (I.kwd_fail * I.expr)) : O.instr = + let () = ignore (kwd_fail) in + Fail { expr = s_expr expr } + + + + +and s_single_instr : I.single_instr -> O.instr list = function + Cond {value; _} -> [s_conditional value] +| Match {value; _} -> [s_match_instr value] +| Ass instr -> [s_ass_instr instr] +| Loop loop -> s_loop loop +| ProcCall fun_call -> [DropUnit (s_fun_call fun_call)] +| Null kwd_null -> let () = ignore (kwd_null) in + [] +| Fail {value; _} -> [s_fail value] + +and s_block I.{value={opening;instr;terminator;close}; _} : O.instr list = + let () = ignore (opening,terminator,close) in + s_instructions instr + +and s_fun_decl I.{value={kwd_function;name;param;colon;ret_type;kwd_is;local_decls;block;kwd_with;return;terminator}; region} : O.decl = + let () = ignore (kwd_function,colon,kwd_is,kwd_with,terminator,region) in + O.{ + name = s_name name; + ty = Function { args = map snd (s_parameters param); ret = s_type_expr ret_type }; + value = Lambda { + parameters = s_parameters param |> list_to_map; + declarations = map s_local_decl local_decls; + instructions = s_block block; + result = s_expr return + } + } + +and s_proc_decl I.{value={kwd_procedure;name;param;kwd_is;local_decls;block;terminator}; region} = + let () = ignore (kwd_procedure,kwd_is,terminator,region) in + O.{ + name = s_name name; + ty = Function { args = map snd (s_parameters param); ret = Unit }; + value = Lambda { + parameters = s_parameters param |> list_to_map; + declarations = map s_local_decl local_decls; + instructions = s_block block; + result = O.Constant O.Unit + } + } + +and s_entry_decl I.{value={kwd_entrypoint;name;param;kwd_is;local_decls;block;terminator}; region} = + let () = ignore (kwd_entrypoint,kwd_is,terminator,region) in + O.{ + name = s_name name; + ty = Function { args = map snd (s_parameters param); ret = Unit }; + value = Lambda { + parameters = s_parameters param |> list_to_map; + declarations = map s_local_decl local_decls; + instructions = s_block block; + result = O.Constant O.Unit + } + } + +and s_lambda_decl : I.lambda_decl -> O.decl = function + FunDecl fun_decl -> s_fun_decl fun_decl +| EntryDecl entry_decl -> s_entry_decl entry_decl +| ProcDecl proc_decl -> s_proc_decl proc_decl + +type tmp_ast = { + types : O.type_decl list; + storage_decl : O.typed_var option; + operations_decl : O.typed_var option; + declarations : O.decl list; + } + + +let s_declaration (ast : tmp_ast) : I.declaration -> tmp_ast = function + TypeDecl t -> { ast with types = (s_type_decl t) :: ast.types } + | ConstDecl c -> { ast with declarations = (s_const_decl c) :: ast.declarations } + | StorageDecl s -> { ast with storage_decl = Some (s_storage_decl s) } + | OpDecl o -> { ast with operations_decl = Some (s_operations_decl o) } + | LambdaDecl l -> { ast with declarations = (s_lambda_decl l) :: ast.declarations } + +let s_ast (ast : I.ast) : O.ast = + let I.{decl=(decl1,decls);eof} = ast in + let () = ignore (eof) in + let {types; storage_decl; operations_decl; declarations} = + List.fold_left s_declaration + { types = []; + storage_decl = None; + operations_decl = None; + declarations = [] } + ( decl1 :: decls ) in + let storage_decl = match storage_decl with + Some x -> x + | None -> failwith "Missing storage declaration" in + let operations_decl = match operations_decl with + Some x -> x + | None -> failwith "Missing storage declaration" + in {types; storage_decl; operations_decl; declarations} + + + + +(* let s_token region lexeme = *) +(* printf "%s: %s\n"(compact region) lexeme *) + +(* and s_var {region; value=lexeme} = *) +(* printf "%s: Ident \"%s\"\n" (compact region) lexeme *) + +(* and s_constr {region; value=lexeme} = *) +(* printf "%s: Constr \"%s\"\n" *) +(* (compact region) lexeme *) + +(* and s_string {region; value=lexeme} = *) +(* printf "%s: String \"%s\"\n" *) +(* (compact region) lexeme *) + +(* and s_bytes {region; value = lexeme, abstract} = *) +(* printf "%s: Bytes (\"%s\", \"0x%s\")\n" *) +(* (compact region) lexeme *) +(* (MBytes.to_hex abstract |> Hex.to_string) *) + +(* and s_int {region; value = lexeme, abstract} = *) +(* printf "%s: Int (\"%s\", %s)\n" *) +(* (compact region) lexeme *) +(* (Z.to_string abstract) *) + + +(* and s_parameters {value=node; _} = *) +(* let lpar, sequence, rpar = node in *) +(* s_token lpar "("; *) +(* s_nsepseq ";" s_param_decl sequence; *) +(* s_token rpar ")" *) + +(* and s_param_decl = function *) +(* ParamConst param_const -> s_param_const param_const *) +(* | ParamVar param_var -> s_param_var param_var *) + +(* and s_region_cases {value=sequence; _} = *) +(* s_nsepseq "|" s_case sequence *) + +(* and s_expr = function *) +(* Or {value = expr1, bool_or, expr2; _} -> *) +(* s_expr expr1; s_token bool_or "||"; s_expr expr2 *) +(* | And {value = expr1, bool_and, expr2; _} -> *) +(* s_expr expr1; s_token bool_and "&&"; s_expr expr2 *) +(* | Lt {value = expr1, lt, expr2; _} -> *) +(* s_expr expr1; s_token lt "<"; s_expr expr2 *) +(* | Leq {value = expr1, leq, expr2; _} -> *) +(* s_expr expr1; s_token leq "<="; s_expr expr2 *) +(* | Gt {value = expr1, gt, expr2; _} -> *) +(* s_expr expr1; s_token gt ">"; s_expr expr2 *) +(* | Geq {value = expr1, geq, expr2; _} -> *) +(* s_expr expr1; s_token geq ">="; s_expr expr2 *) +(* | Equal {value = expr1, equal, expr2; _} -> *) +(* s_expr expr1; s_token equal "="; s_expr expr2 *) +(* | Neq {value = expr1, neq, expr2; _} -> *) +(* s_expr expr1; s_token neq "=/="; s_expr expr2 *) +(* | Cat {value = expr1, cat, expr2; _} -> *) +(* s_expr expr1; s_token cat "^"; s_expr expr2 *) +(* | Cons {value = expr1, cons, expr2; _} -> *) +(* s_expr expr1; s_token cons "<:"; s_expr expr2 *) +(* | Add {value = expr1, add, expr2; _} -> *) +(* s_expr expr1; s_token add "+"; s_expr expr2 *) +(* | Sub {value = expr1, sub, expr2; _} -> *) +(* s_expr expr1; s_token sub "-"; s_expr expr2 *) +(* | Mult {value = expr1, mult, expr2; _} -> *) +(* s_expr expr1; s_token mult "*"; s_expr expr2 *) +(* | Div {value = expr1, div, expr2; _} -> *) +(* s_expr expr1; s_token div "/"; s_expr expr2 *) +(* | Mod {value = expr1, kwd_mod, expr2; _} -> *) +(* s_expr expr1; s_token kwd_mod "mod"; s_expr expr2 *) +(* | Neg {value = minus, expr; _} -> *) +(* s_token minus "-"; s_expr expr *) +(* | Not {value = kwd_not, expr; _} -> *) +(* s_token kwd_not "not"; s_expr expr *) +(* | Int i -> s_int i *) +(* | Var var -> s_var var *) +(* | String s -> s_string s *) +(* | Bytes b -> s_bytes b *) +(* | False region -> s_token region "False" *) +(* | True region -> s_token region "True" *) +(* | Unit region -> s_token region "Unit" *) +(* | Tuple tuple -> s_tuple tuple *) +(* | List list -> s_list list *) +(* | EmptyList elist -> s_empty_list elist *) +(* | Set set -> s_set set *) +(* | EmptySet eset -> s_empty_set eset *) +(* | NoneExpr nexpr -> s_none_expr nexpr *) +(* | FunCall fun_call -> s_fun_call fun_call *) +(* | ConstrApp capp -> s_constr_app capp *) +(* | SomeApp sapp -> s_some_app sapp *) +(* | MapLookUp lookup -> s_map_lookup lookup *) +(* | ParExpr pexpr -> s_par_expr pexpr *) + +(* and s_list {value=node; _} = *) +(* let lbra, sequence, rbra = node in *) +(* s_token lbra "["; *) +(* s_nsepseq "," s_expr sequence; *) +(* s_token rbra "]" *) + +(* and s_empty_list {value=node; _} = *) +(* let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token lbracket "["; *) +(* s_token rbracket "]"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_set {value=node; _} = *) +(* let lbrace, sequence, rbrace = node in *) +(* s_token lbrace "{"; *) +(* s_nsepseq "," s_expr sequence; *) +(* s_token rbrace "}" *) + +(* and s_empty_set {value=node; _} = *) +(* let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token lbrace "{"; *) +(* s_token rbrace "}"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_none_expr {value=node; _} = *) +(* let lpar, (c_None, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token c_None "None"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_constr_app {value=node; _} = *) +(* let constr, arguments = node in *) +(* s_constr constr; *) +(* s_tuple arguments *) + +(* and s_some_app {value=node; _} = *) +(* let c_Some, arguments = node in *) +(* s_token c_Some "Some"; *) +(* s_tuple arguments *) + + +(* and s_par_expr {value=node; _} = *) +(* let lpar, expr, rpar = node in *) +(* s_token lpar "("; *) +(* s_expr expr; *) +(* s_token rpar ")" *) + +(* and s_psome {value=node; _} = *) +(* let c_Some, patterns = node in *) +(* s_token c_Some "Some"; *) +(* s_patterns patterns *) + + +(* and s_terminator = function *) +(* Some semi -> s_token semi ";" *) +(* | None -> () *) diff --git a/src/ligo/parser/Error.mli b/src/ligo/parser/Error.mli new file mode 100644 index 000000000..19c1ce4c9 --- /dev/null +++ b/src/ligo/parser/Error.mli @@ -0,0 +1,3 @@ +type t = .. + +type error = t diff --git a/src/ligo/parser/EvalOpt.ml b/src/ligo/parser/EvalOpt.ml new file mode 100644 index 000000000..13c9f51ad --- /dev/null +++ b/src/ligo/parser/EvalOpt.ml @@ -0,0 +1,149 @@ +(* Parsing the command-line option for testing the Ligo lexer and + parser *) + +let printf = Printf.printf +let sprintf = Printf.sprintf + +let abort msg = + Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1 + +(* Help *) + +let help () = + let file = Filename.basename Sys.argv.(0) in + printf "Usage: %s [