diff --git a/src/ligo/ligo-parser/.Lexer.ml.tag b/src/ligo/ligo-parser/.Lexer.ml.tag new file mode 100644 index 000000000..051eeceb0 --- /dev/null +++ b/src/ligo/ligo-parser/.Lexer.ml.tag @@ -0,0 +1 @@ +ocamlc: -w -42 diff --git a/src/ligo/ligo-parser/.LexerMain.tag b/src/ligo/ligo-parser/.LexerMain.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/ligo/ligo-parser/.Parser.mly.tag b/src/ligo/ligo-parser/.Parser.mly.tag new file mode 100644 index 000000000..100f7bb69 --- /dev/null +++ b/src/ligo/ligo-parser/.Parser.mly.tag @@ -0,0 +1 @@ +--explain --external-tokens LexToken --base Parser ParToken.mly diff --git a/src/ligo/ligo-parser/.ParserMain.tag b/src/ligo/ligo-parser/.ParserMain.tag new file mode 100644 index 000000000..e69de29bb diff --git a/src/ligo/ligo-parser/.gitignore b/src/ligo/ligo-parser/.gitignore new file mode 100644 index 000000000..14e1f3c03 --- /dev/null +++ b/src/ligo/ligo-parser/.gitignore @@ -0,0 +1,7 @@ +_build/* +*/_build +*~ +.merlin +*/.merlin +*.install +/Version.ml diff --git a/src/ligo/ligo-parser/.gitlab-ci.yml b/src/ligo/ligo-parser/.gitlab-ci.yml new file mode 100644 index 000000000..5c8b0d9af --- /dev/null +++ b/src/ligo/ligo-parser/.gitlab-ci.yml @@ -0,0 +1,21 @@ +before_script: + - apt-get update -qq + - apt-get -y -qq install libhidapi-dev libcap-dev bubblewrap + - wget https://github.com/ocaml/opam/releases/download/2.0.1/opam-2.0.1-x86_64-linux -O opam-2.0.1-x86_64-linux + - cp opam-2.0.1-x86_64-linux /usr/local/bin/opam + - chmod +x /usr/local/bin/opam + - export PATH="/usr/local/bin${PATH:+:}${PATH:-}" + - echo "$PATH" + - printf '' | opam init + - eval $(opam config env) + - opam repository add tezos-opam-repository https://gitlab.com/gabriel.alfour/tezos-opam-repository.git + - eval $(opam config env) + - opam --version + - printf '' | ocaml + +default-job: + script: + - opam install -y --working-dir . + artifacts: + paths: + - Parser.exe diff --git a/src/ligo/ligo-parser/.links b/src/ligo/ligo-parser/.links new file mode 100644 index 000000000..b29b57639 --- /dev/null +++ b/src/ligo/ligo-parser/.links @@ -0,0 +1,2 @@ +$HOME/git/OCaml-build/Makefile +$HOME/git/OCaml-build/Makefile.cfg diff --git a/src/ligo/ligo-parser/AST.ml b/src/ligo/ligo-parser/AST.ml new file mode 100644 index 000000000..e82ef7d3c --- /dev/null +++ b/src/ligo/ligo-parser/AST.ml @@ -0,0 +1,1497 @@ +(* Abstract Syntax Tree (AST) for LIGO *) + +(* To disable warning about multiply-defined record labels. *) + +[@@@warning "-30-42"] + +(* Utilities *) + +open Utils + +(* Regions + + The AST carries all the regions where tokens have been found by the + lexer, plus additional regions corresponding to whole subtrees + (like entire expressions, patterns etc.). These regions are needed + for error reporting and source-to-source transformations. To make + these pervasive regions more legible, we define singleton types for + the symbols, keywords etc. with suggestive names like "kwd_and" + denoting the _region_ of the occurrence of the keyword "and". +*) + +type 'a reg = 'a Region.reg + +let rec last to_region = function + [] -> Region.ghost +| [x] -> to_region x +| _::t -> last to_region t + +let nseq_to_region to_region (hd,tl) = + Region.cover (to_region hd) (last to_region tl) + +let nsepseq_to_region to_region (hd,tl) = + let reg (_, item) = to_region item in + Region.cover (to_region hd) (last reg tl) + +let sepseq_to_region to_region = function + None -> Region.ghost +| Some seq -> nsepseq_to_region to_region seq + +(* Keywords of LIGO *) + +type keyword = Region.t +type kwd_and = Region.t +type kwd_begin = Region.t +type kwd_block = Region.t +type kwd_case = Region.t +type kwd_const = Region.t +type kwd_contains = Region.t +type kwd_down = Region.t +type kwd_else = Region.t +type kwd_end = Region.t +type kwd_entrypoint = Region.t +type kwd_fail = Region.t +type kwd_for = Region.t +type kwd_from = Region.t +type kwd_function = Region.t +type kwd_if = Region.t +type kwd_in = Region.t +type kwd_is = Region.t +type kwd_list = Region.t +type kwd_map = Region.t +type kwd_mod = Region.t +type kwd_nil = Region.t +type kwd_not = Region.t +type kwd_of = Region.t +type kwd_or = Region.t +type kwd_patch = Region.t +type kwd_procedure = Region.t +type kwd_record = Region.t +type kwd_remove = Region.t +type kwd_set = Region.t +type kwd_skip = Region.t +type kwd_step = Region.t +type kwd_storage = Region.t +type kwd_then = Region.t +type kwd_to = Region.t +type kwd_type = Region.t +type kwd_var = Region.t +type kwd_while = Region.t +type kwd_with = Region.t + +(* Data constructors *) + +type c_False = Region.t +type c_None = Region.t +type c_Some = Region.t +type c_True = Region.t +type c_Unit = Region.t + +(* Symbols *) + +type semi = Region.t +type comma = Region.t +type lpar = Region.t +type rpar = Region.t +type lbrace = Region.t +type rbrace = Region.t +type lbracket = Region.t +type rbracket = Region.t +type cons = Region.t +type vbar = Region.t +type arrow = Region.t +type assign = Region.t +type equal = Region.t +type colon = Region.t +type lt = Region.t +type leq = Region.t +type gt = Region.t +type geq = Region.t +type neq = Region.t +type plus = Region.t +type minus = Region.t +type slash = Region.t +type times = Region.t +type dot = Region.t +type wild = Region.t +type cat = Region.t + +(* Virtual tokens *) + +type eof = Region.t + +(* Literals *) + +type variable = string reg +type fun_name = string reg +type type_name = string reg +type field_name = string reg +type map_name = string reg +type set_name = string reg +type constr = string reg + +(* Parentheses *) + +type 'a par = { + lpar : lpar; + inside : 'a; + rpar : rpar +} + +(* Brackets compounds *) + +type 'a brackets = { + lbracket : lbracket; + inside : 'a; + rbracket : rbracket +} + +(* Braced compounds *) + +type 'a braces = { + lbrace : lbrace; + inside : 'a; + rbrace : rbrace +} + +(* The Abstract Syntax Tree *) + +type t = { + decl : declaration nseq; + eof : eof +} + +and ast = t + +and declaration = + TypeDecl of type_decl reg +| ConstDecl of const_decl reg +| LambdaDecl of lambda_decl + +and const_decl = { + kwd_const : kwd_const; + name : variable; + colon : colon; + const_type : type_expr; + equal : equal; + init : expr; + terminator : semi option +} + +(* Type declarations *) + +and type_decl = { + kwd_type : kwd_type; + name : type_name; + kwd_is : kwd_is; + type_expr : type_expr; + terminator : semi option +} + +and type_expr = + TProd of cartesian +| TSum of (variant reg, vbar) nsepseq reg +| TRecord of record_type reg +| TApp of (type_name * type_tuple) reg +| TPar of type_expr par reg +| TAlias of variable + +and cartesian = (type_expr, times) nsepseq reg + +and variant = { + constr : constr; + kwd_of : kwd_of; + product : cartesian +} + +and record_type = { + opening : kwd_record; + field_decls : field_decls; + terminator : semi option; + closing : kwd_end +} + +and field_decls = (field_decl reg, semi) nsepseq + +and field_decl = { + field_name : field_name; + colon : colon; + field_type : type_expr +} + +and type_tuple = (type_expr, comma) nsepseq par reg + +(* Function and procedure declarations *) + +and lambda_decl = + FunDecl of fun_decl reg +| ProcDecl of proc_decl reg +| EntryDecl of entry_decl reg + +and fun_decl = { + kwd_function : kwd_function; + name : variable; + param : parameters; + colon : colon; + ret_type : type_expr; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + kwd_with : kwd_with; + return : expr; + terminator : semi option +} + +and proc_decl = { + kwd_procedure : kwd_procedure; + name : variable; + param : parameters; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + terminator : semi option +} + +and entry_decl = { + kwd_entrypoint : kwd_entrypoint; + name : variable; + param : entry_params; + colon : colon; + ret_type : type_expr; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + kwd_with : kwd_with; + return : expr; + terminator : semi option +} + +and parameters = (param_decl, semi) nsepseq par reg + +and entry_params = (entry_param_decl, semi) nsepseq par reg + +and entry_param_decl = + EntryConst of param_const reg +| EntryVar of param_var reg +| EntryStore of storage reg + +and storage = { + kwd_storage : kwd_storage; + var : variable; + colon : colon; + storage_type : type_expr +} + +and param_decl = + ParamConst of param_const reg +| ParamVar of param_var reg + +and param_const = { + kwd_const : kwd_const; + var : variable; + colon : colon; + param_type : type_expr +} + +and param_var = { + kwd_var : kwd_var; + var : variable; + colon : colon; + param_type : type_expr +} + +and block = { + opening : block_opening; + instr : instructions; + terminator : semi option; + closing : block_closing +} + +and block_opening = + Block of kwd_block * lbrace +| Begin of kwd_begin + +and block_closing = + Block of rbrace +| End of kwd_end + +and local_decl = + LocalLam of lambda_decl +| LocalConst of const_decl reg +| LocalVar of var_decl reg + +and var_decl = { + kwd_var : kwd_var; + name : variable; + colon : colon; + var_type : type_expr; + assign : assign; + init : expr; + terminator : semi option +} + +and instructions = (instruction, semi) nsepseq + +and instruction = + Single of single_instr +| Block of block reg + +and single_instr = + Cond of conditional reg +| Case of case_instr reg +| Assign of assignment reg +| Loop of loop +| ProcCall of fun_call +| Fail of fail_instr reg +| Skip of kwd_skip +| RecordPatch of record_patch reg +| MapPatch of map_patch reg +| SetPatch of set_patch reg +| MapRemove of map_remove reg +| SetRemove of set_remove reg + +and set_remove = { + kwd_remove : kwd_remove; + element : expr; + kwd_from : kwd_from; + kwd_set : kwd_set; + set : path +} + +and map_remove = { + kwd_remove : kwd_remove; + key : expr; + kwd_from : kwd_from; + kwd_map : kwd_map; + map : path +} + +and set_patch = { + kwd_patch : kwd_patch; + path : path; + kwd_with : kwd_with; + set_inj : expr injection reg +} + +and map_patch = { + kwd_patch : kwd_patch; + path : path; + kwd_with : kwd_with; + map_inj : binding reg injection reg +} + +and binding = { + source : expr; + arrow : arrow; + image : expr +} + +and record_patch = { + kwd_patch : kwd_patch; + path : path; + kwd_with : kwd_with; + record_inj : record_injection reg +} + +and fail_instr = { + kwd_fail : kwd_fail; + fail_expr : expr +} + +and conditional = { + kwd_if : kwd_if; + test : expr; + kwd_then : kwd_then; + ifso : if_clause; + terminator : semi option; + kwd_else : kwd_else; + ifnot : if_clause +} + +and if_clause = + ClauseInstr of instruction +| ClauseBlock of (instructions * semi option) braces reg + +and set_membership = { + set : expr; + kwd_contains : kwd_contains; + element : expr +} + +and case_instr = { + kwd_case : kwd_case; + expr : expr; + kwd_of : kwd_of; + lead_vbar : vbar option; + cases : cases; + kwd_end : kwd_end +} + +and cases = (case reg, vbar) nsepseq reg + +and case = { + pattern : pattern; + arrow : arrow; + instr : instruction +} + +and assignment = { + lhs : lhs; + assign : assign; + rhs : rhs +} + +and lhs = + Path of path +| MapPath of map_lookup reg + +and rhs = + Expr of expr +| NoneExpr of c_None + +and loop = + While of while_loop reg +| For of for_loop + +and while_loop = { + kwd_while : kwd_while; + cond : expr; + block : block reg +} + +and for_loop = + ForInt of for_int reg +| ForCollect of for_collect reg + +and for_int = { + kwd_for : kwd_for; + assign : var_assign reg; + down : kwd_down option; + kwd_to : kwd_to; + bound : expr; + step : (kwd_step * expr) option; + block : block reg +} + +and var_assign = { + name : variable; + assign : assign; + expr : expr +} + +and for_collect = { + kwd_for : kwd_for; + var : variable; + bind_to : (arrow * variable) option; + kwd_in : kwd_in; + expr : expr; + block : block reg +} + +(* Expressions *) + +and expr = + ELogic of logic_expr +| EArith of arith_expr +| EString of string_expr +| EList of list_expr +| ESet of set_expr +| EConstr of constr_expr +| ERecord of record_expr +| EProj of projection reg +| EMap of map_expr +| EVar of Lexer.lexeme reg +| ECall of fun_call +| EBytes of (Lexer.lexeme * Hex.t) reg +| EUnit of c_Unit +| ETuple of tuple_expr +| EPar of expr par reg + +and set_expr = + SetInj of expr injection reg +| SetMem of set_membership reg + +and 'a injection = { + opening : opening; + elements : ('a, semi) sepseq; + terminator : semi option; + closing : closing +} + +and opening = + Kwd of keyword +| KwdBracket of keyword * lbracket + +and closing = + End of kwd_end +| RBracket of rbracket + +and map_expr = + MapLookUp of map_lookup reg +| MapInj of binding reg injection reg + +and map_lookup = { + path : path; + index : expr brackets reg +} + +and path = + Name of variable +| Path of projection reg + +and logic_expr = + BoolExpr of bool_expr +| CompExpr of comp_expr + +and bool_expr = + Or of kwd_or bin_op reg +| And of kwd_and bin_op reg +| Not of kwd_not un_op reg +| False of c_False +| True of c_True + +and 'a bin_op = { + op : 'a; + arg1 : expr; + arg2 : expr +} + +and 'a un_op = { + op : 'a; + arg : expr +} + +and comp_expr = + Lt of lt bin_op reg +| Leq of leq bin_op reg +| Gt of gt bin_op reg +| Geq of geq bin_op reg +| Equal of equal bin_op reg +| Neq of neq bin_op reg + +and arith_expr = + Add of plus bin_op reg +| Sub of minus bin_op reg +| Mult of times bin_op reg +| Div of slash bin_op reg +| Mod of kwd_mod bin_op reg +| Neg of minus un_op reg +| Int of (Lexer.lexeme * Z.t) reg + +and string_expr = + Cat of cat bin_op reg +| String of Lexer.lexeme reg + +and list_expr = + Cons of cons bin_op reg +| List of expr injection reg +| Nil of nil par reg + +and nil = { + nil : kwd_nil; + colon : colon; + list_type : type_expr +} + +and constr_expr = + SomeApp of (c_Some * arguments) reg +| NoneExpr of none_expr reg +| ConstrApp of (constr * arguments) reg + +and record_expr = + RecordInj of record_injection reg + +and record_injection = { + opening : kwd_record; + fields : (field_assign reg, semi) nsepseq; + terminator : semi option; + closing : kwd_end +} + +and field_assign = { + field_name : field_name; + equal : equal; + field_expr : expr +} + +and projection = { + record_name : variable; + selector : dot; + field_path : (selection, dot) nsepseq +} + +and selection = + FieldName of field_name +| Component of (Lexer.lexeme * Z.t) reg + +and tuple_expr = + TupleInj of tuple_injection + +and tuple_injection = (expr, comma) nsepseq par reg + +and none_expr = typed_none_expr par + +and typed_none_expr = { + c_None : c_None; + colon : colon; + opt_type : type_expr +} + +and fun_call = (fun_name * arguments) reg + +and arguments = tuple_injection + +(* Patterns *) + +and pattern = + PCons of (pattern, cons) nsepseq reg +| PVar of Lexer.lexeme reg +| PWild of wild +| PInt of (Lexer.lexeme * Z.t) reg +| PBytes of (Lexer.lexeme * Hex.t) reg +| PString of Lexer.lexeme reg +| PUnit of c_Unit +| PFalse of c_False +| PTrue of c_True +| PNone of c_None +| PSome of (c_Some * pattern par reg) reg +| PList of list_pattern +| PTuple of (pattern, comma) nsepseq par reg + +and list_pattern = + Sugar of (pattern, semi) sepseq brackets reg +| Raw of (pattern * cons * pattern) par reg + +(* Projecting regions *) + +open! Region + +let type_expr_to_region = function + TProd {region; _} +| TSum {region; _} +| TRecord {region; _} +| TApp {region; _} +| TPar {region; _} +| TAlias {region; _} -> region + +let rec expr_to_region = function + ELogic e -> logic_expr_to_region e +| EArith e -> arith_expr_to_region e +| EString e -> string_expr_to_region e +| EList e -> list_expr_to_region e +| ESet e -> set_expr_to_region e +| EConstr e -> constr_expr_to_region e +| ERecord e -> record_expr_to_region e +| EMap e -> map_expr_to_region e +| ETuple e -> tuple_expr_to_region e +| EProj {region; _} +| EVar {region; _} +| ECall {region; _} +| EBytes {region; _} +| EUnit region +| EPar {region; _} -> region + +and tuple_expr_to_region = function + TupleInj {region; _} -> region + +and map_expr_to_region = function + MapLookUp {region; _} +| MapInj {region; _} -> region + +and set_expr_to_region = function + SetInj {region; _} +| SetMem {region; _} -> region + +and logic_expr_to_region = function + BoolExpr e -> bool_expr_to_region e +| CompExpr e -> comp_expr_to_region e + +and bool_expr_to_region = function + Or {region; _} +| And {region; _} +| Not {region; _} +| False region +| True region -> region + +and comp_expr_to_region = function + Lt {region; _} +| Leq {region; _} +| Gt {region; _} +| Geq {region; _} +| Equal {region; _} +| Neq {region; _} -> region + +and arith_expr_to_region = function +| Add {region; _} +| Sub {region; _} +| Mult {region; _} +| Div {region; _} +| Mod {region; _} +| Neg {region; _} +| Int {region; _} -> region + +and string_expr_to_region = function + Cat {region; _} +| String {region; _} -> region + +and list_expr_to_region = function + Cons {region; _} +| List {region; _} +| Nil {region; _} -> region + +and constr_expr_to_region = function + NoneExpr {region; _} +| ConstrApp {region; _} +| SomeApp {region; _} -> region + +and record_expr_to_region = function + RecordInj {region; _} -> region + +let path_to_region = function + Name var -> var.region +| Path {region; _} -> region + +let instr_to_region = function + Single Cond {region; _} +| Single Case {region; _} +| Single Assign {region; _} +| Single Loop While {region; _} +| Single Loop For ForInt {region; _} +| Single Loop For ForCollect {region; _} +| Single ProcCall {region; _} +| Single Skip region +| Single Fail {region; _} +| Single RecordPatch {region; _} +| Single MapPatch {region; _} +| Single SetPatch {region; _} +| Single MapRemove {region; _} +| Single SetRemove {region; _} +| Block {region; _} -> region + +let if_clause_to_region = function + ClauseInstr instr -> instr_to_region instr +| ClauseBlock {region; _} -> region + +let pattern_to_region = function + PCons {region; _} +| PVar {region; _} +| PWild region +| PInt {region; _} +| PBytes {region; _} +| PString {region; _} +| PUnit region +| PFalse region +| PTrue region +| PNone region +| PSome {region; _} +| PList Sugar {region; _} +| PList Raw {region; _} +| PTuple {region; _} -> region + +let local_decl_to_region = function + LocalLam FunDecl {region; _} +| LocalLam ProcDecl {region; _} +| LocalLam EntryDecl {region; _} +| LocalConst {region; _} +| LocalVar {region; _} -> region + +let lhs_to_region : lhs -> Region.t = function + Path path -> path_to_region path +| MapPath {region; _} -> region + +let rhs_to_region = function + Expr e -> expr_to_region e +| NoneExpr r -> r + +let selection_to_region = function + FieldName {region; _} +| Component {region; _} -> region + +(* Printing the tokens with their source regions *) + +let printf = Printf.printf + +let compact (region: Region.t) = + region#compact ~offsets:EvalOpt.offsets EvalOpt.mode + +let print_nsepseq : + string -> ('a -> unit) -> ('a, Region.t) nsepseq -> unit = + fun sep visit (head, tail) -> + let print_aux (sep_reg, item) = + printf "%s: %s\n" (compact sep_reg) sep; + visit item + in visit head; List.iter print_aux tail + +let print_sepseq : + string -> ('a -> unit) -> ('a, Region.t) sepseq -> unit = + fun sep visit -> function + None -> () + | Some seq -> print_nsepseq sep visit seq + +let print_token region lexeme = + printf "%s: %s\n"(compact region) lexeme + +let print_var {region; value=lexeme} = + printf "%s: Ident \"%s\"\n" (compact region) lexeme + +let print_constr {region; value=lexeme} = + printf "%s: Constr \"%s\"\n" + (compact region) lexeme + +let print_string {region; value=lexeme} = + printf "%s: String %s\n" + (compact region) lexeme + +let print_bytes {region; value = lexeme, abstract} = + printf "%s: Bytes (\"%s\", \"0x%s\")\n" + (compact region) lexeme + (Hex.to_string abstract) + +let print_int {region; value = lexeme, abstract} = + printf "%s: Int (\"%s\", %s)\n" + (compact region) lexeme + (Z.to_string abstract) + +(* Main printing function *) + +let rec print_tokens ast = + let {decl; eof} = ast in + Utils.nseq_iter print_decl decl; + print_token eof "EOF" + +and print_decl = function + TypeDecl decl -> print_type_decl decl +| ConstDecl decl -> print_const_decl decl +| LambdaDecl decl -> print_lambda_decl decl + +and print_const_decl {value; _} = + let {kwd_const; name; colon; const_type; + equal; init; terminator} = value in + print_token kwd_const "const"; + print_var name; + print_token colon ":"; + print_type_expr const_type; + print_token equal "="; + print_expr init; + print_terminator terminator + +and print_type_decl {value; _} = + let {kwd_type; name; kwd_is; + type_expr; terminator} = value in + print_token kwd_type "type"; + print_var name; + print_token kwd_is "is"; + print_type_expr type_expr; + print_terminator terminator + +and print_type_expr = function + TProd cartesian -> print_cartesian cartesian +| TSum sum_type -> print_sum_type sum_type +| TRecord record_type -> print_record_type record_type +| TApp type_app -> print_type_app type_app +| TPar par_type -> print_par_type par_type +| TAlias type_alias -> print_var type_alias + +and print_cartesian {value; _} = + print_nsepseq "*" print_type_expr value + +and print_variant {value; _} = + let {constr; kwd_of; product} = value in + print_constr constr; + print_token kwd_of "of"; + print_cartesian product + +and print_sum_type {value; _} = + print_nsepseq "|" print_variant value + +and print_record_type {value; _} = + let {opening; field_decls; terminator; closing} = value in + print_token opening "record"; + print_field_decls field_decls; + print_terminator terminator; + print_token closing "end" + +and print_type_app {value; _} = + let type_name, type_tuple = value in + print_var type_name; + print_type_tuple type_tuple + +and print_par_type {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_type_expr inside; + print_token rpar ")" + +and print_field_decls sequence = + print_nsepseq ";" print_field_decl sequence + +and print_field_decl {value; _} = + let {field_name; colon; field_type} = value in + print_var field_name; + print_token colon ":"; + print_type_expr field_type + +and print_type_tuple {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_nsepseq "," print_type_expr inside; + print_token rpar ")" + +and print_lambda_decl = function + FunDecl fun_decl -> print_fun_decl fun_decl +| ProcDecl proc_decl -> print_proc_decl proc_decl +| EntryDecl entry_decl -> print_entry_decl entry_decl + +and print_fun_decl {value; _} = + let {kwd_function; name; param; colon; + ret_type; kwd_is; local_decls; + block; kwd_with; return; terminator} = value in + print_token kwd_function "function"; + print_var name; + print_parameters param; + print_token colon ":"; + print_type_expr ret_type; + print_token kwd_is "is"; + print_local_decls local_decls; + print_block block; + print_token kwd_with "with"; + print_expr return; + print_terminator terminator + +and print_proc_decl {value; _} = + let {kwd_procedure; name; param; kwd_is; + local_decls; block; terminator} = value in + print_token kwd_procedure "procedure"; + print_var name; + print_parameters param; + print_token kwd_is "is"; + print_local_decls local_decls; + print_block block; + print_terminator terminator + +and print_entry_decl {value; _} = + let {kwd_entrypoint; name; param; colon; + ret_type; kwd_is; local_decls; + block; kwd_with; return; terminator} = value in + print_token kwd_entrypoint "entrypoint"; + print_var name; + print_entry_params param; + print_token colon ":"; + print_type_expr ret_type; + print_token kwd_is "is"; + print_local_decls local_decls; + print_block block; + print_token kwd_with "with"; + print_expr return; + print_terminator terminator + +and print_entry_params {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_nsepseq ";" print_entry_param_decl inside; + print_token rpar ")" + +and print_entry_param_decl = function + EntryConst param_const -> print_param_const param_const +| EntryVar param_var -> print_param_var param_var +| EntryStore param_store -> print_storage param_store + +and print_storage {value; _} = + let {kwd_storage; var; colon; storage_type} = value in + print_token kwd_storage "storage"; + print_var var; + print_token colon ":"; + print_type_expr storage_type + +and print_parameters {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_nsepseq ";" print_param_decl inside; + print_token rpar ")" + +and print_param_decl = function + ParamConst param_const -> print_param_const param_const +| ParamVar param_var -> print_param_var param_var + +and print_param_const {value; _} = + let {kwd_const; var; colon; param_type} = value in + print_token kwd_const "const"; + print_var var; + print_token colon ":"; + print_type_expr param_type + +and print_param_var {value; _} = + let {kwd_var; var; colon; param_type} = value in + print_token kwd_var "var"; + print_var var; + print_token colon ":"; + print_type_expr param_type + +and print_block {value; _} = + let {opening; instr; terminator; closing} = value in + print_block_opening opening; + print_instructions instr; + print_terminator terminator; + print_block_closing closing + +and print_block_opening = function + Block (kwd_block, lbrace) -> print_token kwd_block "block"; + print_token lbrace "{" +| Begin kwd_begin -> print_token kwd_begin "begin" + +and print_block_closing = function + Block rbrace -> print_token rbrace "}" +| End kwd_end -> print_token kwd_end "end" + +and print_local_decls sequence = + List.iter print_local_decl sequence + +and print_local_decl = function + LocalLam decl -> print_lambda_decl decl +| LocalConst decl -> print_const_decl decl +| LocalVar decl -> print_var_decl decl + +and print_var_decl {value; _} = + let {kwd_var; name; colon; var_type; + assign; init; terminator} = value in + print_token kwd_var "var"; + print_var name; + print_token colon ":"; + print_type_expr var_type; + print_token assign ":="; + print_expr init; + print_terminator terminator + +and print_instructions sequence = + print_nsepseq ";" print_instruction sequence + +and print_instruction = function + Single instr -> print_single_instr instr +| Block block -> print_block block + +and print_single_instr = function + Cond {value; _} -> print_conditional value +| Case {value; _} -> print_case_instr value +| Assign assign -> print_assignment assign +| Loop loop -> print_loop loop +| ProcCall fun_call -> print_fun_call fun_call +| Fail {value; _} -> print_fail value +| Skip kwd_skip -> print_token kwd_skip "skip" +| RecordPatch {value; _} -> print_record_patch value +| MapPatch {value; _} -> print_map_patch value +| SetPatch {value; _} -> print_set_patch value +| MapRemove {value; _} -> print_map_remove value +| SetRemove {value; _} -> print_set_remove value + +and print_fail {kwd_fail; fail_expr} = + print_token kwd_fail "fail"; + print_expr fail_expr + +and print_conditional node = + let {kwd_if; test; kwd_then; ifso; terminator; + kwd_else; ifnot} = node in + print_token kwd_if "if"; + print_expr test; + print_token kwd_then "then"; + print_if_clause ifso; + print_terminator terminator; + print_token kwd_else "else"; + print_if_clause ifnot + +and print_if_clause = function + ClauseInstr instr -> print_instruction instr +| ClauseBlock {value; _} -> + let {lbrace; inside; rbrace} = value in + let instr, terminator = inside in + print_token lbrace "{"; + print_instructions instr; + print_terminator terminator; + print_token rbrace "}" + +and print_case_instr (node : case_instr) = + let {kwd_case; expr; kwd_of; + lead_vbar; cases; kwd_end} = node in + print_token kwd_case "case"; + print_expr expr; + print_token kwd_of "of"; + print_token_opt lead_vbar "|"; + print_cases cases; + print_token kwd_end "end" + +and print_token_opt = function + None -> fun _ -> () +| Some region -> print_token region + +and print_cases {value; _} = + print_nsepseq "|" print_case value + +and print_case {value; _} = + let {pattern; arrow; instr} = value in + print_pattern pattern; + print_token arrow "->"; + print_instruction instr + +and print_assignment {value; _} = + let {lhs; assign; rhs} = value in + print_lhs lhs; + print_token assign ":="; + print_rhs rhs + +and print_rhs = function + Expr e -> print_expr e +| NoneExpr r -> print_token r "None" + +and print_lhs = function + Path path -> print_path path +| MapPath {value; _} -> print_map_lookup value + +and print_loop = function + While {value; _} -> print_while_loop value +| For for_loop -> print_for_loop for_loop + +and print_while_loop value = + let {kwd_while; cond; block} = value in + print_token kwd_while "while"; + print_expr cond; + print_block block + +and print_for_loop = function + ForInt for_int -> print_for_int for_int +| ForCollect for_collect -> print_for_collect for_collect + +and print_for_int ({value; _} : for_int reg) = + let {kwd_for; assign; down; kwd_to; + bound; step; block} = value in + print_token kwd_for "for"; + print_var_assign assign; + print_down down; + print_token kwd_to "to"; + print_expr bound; + print_step step; + print_block block + +and print_var_assign {value; _} = + let {name; assign; expr} = value in + print_var name; + print_token assign ":="; + print_expr expr + +and print_down = function + Some kwd_down -> print_token kwd_down "down" +| None -> () + +and print_step = function + Some (kwd_step, expr) -> + print_token kwd_step "step"; + print_expr expr +| None -> () + +and print_for_collect ({value; _} : for_collect reg) = + let {kwd_for; var; bind_to; kwd_in; expr; block} = value in + print_token kwd_for "for"; + print_var var; + print_bind_to bind_to; + print_token kwd_in "in"; + print_expr expr; + print_block block + +and print_bind_to = function + Some (arrow, variable) -> + print_token arrow "->"; + print_var variable +| None -> () + +and print_expr = function + ELogic e -> print_logic_expr e +| EArith e -> print_arith_expr e +| EString e -> print_string_expr e +| EList e -> print_list_expr e +| ESet e -> print_set_expr e +| EConstr e -> print_constr_expr e +| ERecord e -> print_record_expr e +| EProj e -> print_projection e +| EMap e -> print_map_expr e +| EVar v -> print_var v +| ECall e -> print_fun_call e +| EBytes b -> print_bytes b +| EUnit r -> print_token r "Unit" +| ETuple e -> print_tuple_expr e +| EPar e -> print_par_expr e + +and print_map_expr = function + MapLookUp {value; _} -> print_map_lookup value +| MapInj inj -> print_injection "map" print_binding inj + +and print_set_expr = function + SetInj inj -> print_injection "set" print_expr inj +| SetMem mem -> print_set_membership mem + +and print_set_membership {value; _} = + let {set; kwd_contains; element} = value in + print_expr set; + print_token kwd_contains "contains"; + print_expr element + +and print_map_lookup {path; index} = + let {lbracket; inside; rbracket} = index.value in + print_path path; + print_token lbracket "["; + print_expr inside; + print_token rbracket "]" + +and print_path = function + Name var -> print_var var +| Path path -> print_projection path + +and print_logic_expr = function + BoolExpr e -> print_bool_expr e +| CompExpr e -> print_comp_expr e + +and print_bool_expr = function + Or {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "||"; print_expr arg2 +| And {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "&&"; print_expr arg2 +| Not {value = {op; arg}; _} -> + print_token op "not"; print_expr arg +| False region -> print_token region "False" +| True region -> print_token region "True" + +and print_comp_expr = function + Lt {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "<"; print_expr arg2 +| Leq {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "<="; print_expr arg2 +| Gt {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op ">"; print_expr arg2 +| Geq {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op ">="; print_expr arg2 +| Equal {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "="; print_expr arg2 +| Neq {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "=/="; print_expr arg2 + +and print_arith_expr = function + Add {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "+"; print_expr arg2 +| Sub {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "-"; print_expr arg2 +| Mult {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "*"; print_expr arg2 +| Div {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "/"; print_expr arg2 +| Mod {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "mod"; print_expr arg2 +| Neg {value = {op; arg}; _} -> + print_token op "-"; print_expr arg +| Int i -> print_int i + +and print_string_expr = function + Cat {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "^"; print_expr arg2 +| String s -> print_string s + +and print_list_expr = function + Cons {value = {arg1; op; arg2}; _} -> + print_expr arg1; print_token op "#"; print_expr arg2 +| List e -> print_injection "list" print_expr e +| Nil e -> print_nil e + +and print_constr_expr = function + SomeApp e -> print_some_app e +| NoneExpr e -> print_none_expr e +| ConstrApp e -> print_constr_app e + +and print_record_expr = function + RecordInj e -> print_record_injection e + +and print_record_injection {value; _} = + let {opening; fields; terminator; closing} = value in + print_token opening "record"; + print_nsepseq ";" print_field_assign fields; + print_terminator terminator; + print_token closing "end" + +and print_field_assign {value; _} = + let {field_name; equal; field_expr} = value in + print_var field_name; + print_token equal "="; + print_expr field_expr + +and print_projection {value; _} = + let {record_name; selector; field_path} = value in + print_var record_name; + print_token selector "."; + print_field_path field_path + +and print_field_path sequence = + print_nsepseq "." print_selection sequence + +and print_selection = function + FieldName name -> print_var name +| Component int -> print_int int + +and print_record_patch node = + let {kwd_patch; path; kwd_with; record_inj} = node in + print_token kwd_patch "patch"; + print_path path; + print_token kwd_with "with"; + print_record_injection record_inj + +and print_set_patch node = + let {kwd_patch; path; kwd_with; set_inj} = node in + print_token kwd_patch "patch"; + print_path path; + print_token kwd_with "with"; + print_injection "set" print_expr set_inj + +and print_map_patch node = + let {kwd_patch; path; kwd_with; map_inj} = node in + print_token kwd_patch "patch"; + print_path path; + print_token kwd_with "with"; + print_injection "map" print_binding map_inj + +and print_map_remove node = + let {kwd_remove; key; kwd_from; kwd_map; map} = node in + print_token kwd_remove "remove"; + print_expr key; + print_token kwd_from "from"; + print_token kwd_map "map"; + print_path map + +and print_set_remove node = + let {kwd_remove; element; kwd_from; kwd_set; set} = node in + print_token kwd_remove "remove"; + print_expr element; + print_token kwd_from "from"; + print_token kwd_set "set"; + print_path set + +and print_injection : + 'a.string -> ('a -> unit) -> 'a injection reg -> unit = + fun kwd print {value; _} -> + let {opening; elements; terminator; closing} = value in + print_opening kwd opening; + print_sepseq ";" print elements; + print_terminator terminator; + print_closing closing + +and print_opening lexeme = function + Kwd kwd -> print_token kwd lexeme +| KwdBracket (kwd, lbracket) -> + print_token kwd lexeme; + print_token lbracket "{" + +and print_closing = function + RBracket rbracket -> print_token rbracket "}" +| End kwd_end -> print_token kwd_end "end" + +and print_binding {value; _} = + let {source; arrow; image} = value in + print_expr source; + print_token arrow "->"; + print_expr image + +and print_tuple_expr = function + TupleInj inj -> print_tuple_inj inj + +and print_tuple_inj {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_nsepseq "," print_expr inside; + print_token rpar ")" + +and print_nil {value; _} = + let {lpar; inside; rpar} = value in + let {nil; colon; list_type} = inside in + print_token lpar "("; + print_token nil "nil"; + print_token colon ":"; + print_type_expr list_type; + print_token rpar ")" + +and print_none_expr {value; _} = + let {lpar; inside; rpar} = value in + let {c_None; colon; opt_type} = inside in + print_token lpar "("; + print_token c_None "None"; + print_token colon ":"; + print_type_expr opt_type; + print_token rpar ")" + +and print_fun_call {value; _} = + let fun_name, arguments = value in + print_var fun_name; + print_tuple_inj arguments + +and print_constr_app {value; _} = + let constr, arguments = value in + print_constr constr; + print_tuple_inj arguments + +and print_some_app {value; _} = + let c_Some, arguments = value in + print_token c_Some "Some"; + print_tuple_inj arguments + +and print_par_expr {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_expr inside; + print_token rpar ")" + +and print_pattern = function + PCons {value; _} -> print_nsepseq "#" print_pattern value +| PVar var -> print_var var +| PWild wild -> print_token wild "_" +| PInt i -> print_int i +| PBytes b -> print_bytes b +| PString s -> print_string s +| PUnit region -> print_token region "Unit" +| PFalse region -> print_token region "False" +| PTrue region -> print_token region "True" +| PNone region -> print_token region "None" +| PSome psome -> print_psome psome +| PList pattern -> print_list_pattern pattern +| PTuple ptuple -> print_ptuple ptuple + +and print_psome {value; _} = + let c_Some, patterns = value in + print_token c_Some "Some"; + print_patterns patterns + +and print_patterns {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_pattern inside; + print_token rpar ")" + +and print_list_pattern = function + Sugar sugar -> print_sugar sugar +| Raw raw -> print_raw raw + +and print_sugar {value; _} = + let {lbracket; inside; rbracket} = value in + print_token lbracket "["; + print_sepseq "," print_pattern inside; + print_token rbracket "]" + +and print_raw {value; _} = + let {lpar; inside; rpar} = value in + let head, cons, tail = inside in + print_token lpar "("; + print_pattern head; + print_token cons "#"; + print_pattern tail; + print_token rpar ")" + +and print_ptuple {value; _} = + let {lpar; inside; rpar} = value in + print_token lpar "("; + print_nsepseq "," print_pattern inside; + print_token rpar ")" + +and print_terminator = function + Some semi -> print_token semi ";" +| None -> () diff --git a/src/ligo/ligo-parser/AST.mli b/src/ligo/ligo-parser/AST.mli new file mode 100644 index 000000000..fddc5ce1b --- /dev/null +++ b/src/ligo/ligo-parser/AST.mli @@ -0,0 +1,664 @@ +(* Abstract Syntax Tree (AST) for LIGO *) + +[@@@warning "-30"] + +open Utils + +(* Regions + + The AST carries all the regions where tokens have been found by the + lexer, plus additional regions corresponding to whole subtrees + (like entire expressions, patterns etc.). These regions are needed + for error reporting and source-to-source transformations. To make + these pervasive regions more legible, we define singleton types for + the symbols, keywords etc. with suggestive names like "kwd_and" + denoting the _region_ of the occurrence of the keyword "and". +*) + +type 'a reg = 'a Region.reg + +val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t +val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t +val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t + +(* Keywords of LIGO *) + +type keyword = Region.t +type kwd_and = Region.t +type kwd_begin = Region.t +type kwd_block = Region.t +type kwd_case = Region.t +type kwd_const = Region.t +type kwd_contains = Region.t +type kwd_down = Region.t +type kwd_else = Region.t +type kwd_end = Region.t +type kwd_entrypoint = Region.t +type kwd_fail = Region.t +type kwd_for = Region.t +type kwd_from = Region.t +type kwd_function = Region.t +type kwd_if = Region.t +type kwd_in = Region.t +type kwd_is = Region.t +type kwd_list = Region.t +type kwd_map = Region.t +type kwd_mod = Region.t +type kwd_nil = Region.t +type kwd_not = Region.t +type kwd_of = Region.t +type kwd_or = Region.t +type kwd_patch = Region.t +type kwd_procedure = Region.t +type kwd_record = Region.t +type kwd_remove = Region.t +type kwd_set = Region.t +type kwd_skip = Region.t +type kwd_step = Region.t +type kwd_storage = Region.t +type kwd_then = Region.t +type kwd_to = Region.t +type kwd_type = Region.t +type kwd_var = Region.t +type kwd_while = Region.t +type kwd_with = Region.t + +(* Data constructors *) + +type c_False = Region.t +type c_None = Region.t +type c_Some = Region.t +type c_True = Region.t +type c_Unit = Region.t + +(* Symbols *) + +type semi = Region.t (* ";" *) +type comma = Region.t (* "," *) +type lpar = Region.t (* "(" *) +type rpar = Region.t (* ")" *) +type lbrace = Region.t (* "{" *) +type rbrace = Region.t (* "}" *) +type lbracket = Region.t (* "[" *) +type rbracket = Region.t (* "]" *) +type cons = Region.t (* "#" *) +type vbar = Region.t (* "|" *) +type arrow = Region.t (* "->" *) +type assign = Region.t (* ":=" *) +type equal = Region.t (* "=" *) +type colon = Region.t (* ":" *) +type lt = Region.t (* "<" *) +type leq = Region.t (* "<=" *) +type gt = Region.t (* ">" *) +type geq = Region.t (* ">=" *) +type neq = Region.t (* "=/=" *) +type plus = Region.t (* "+" *) +type minus = Region.t (* "-" *) +type slash = Region.t (* "/" *) +type times = Region.t (* "*" *) +type dot = Region.t (* "." *) +type wild = Region.t (* "_" *) +type cat = Region.t (* "^" *) + +(* Virtual tokens *) + +type eof = Region.t + +(* Literals *) + +type variable = string reg +type fun_name = string reg +type type_name = string reg +type field_name = string reg +type map_name = string reg +type set_name = string reg +type constr = string reg + +(* Parentheses *) + +type 'a par = { + lpar : lpar; + inside : 'a; + rpar : rpar +} + +(* Brackets compounds *) + +type 'a brackets = { + lbracket : lbracket; + inside : 'a; + rbracket : rbracket +} + +(* Braced compounds *) + +type 'a braces = { + lbrace : lbrace; + inside : 'a; + rbrace : rbrace +} + +(* The Abstract Syntax Tree *) + +type t = { + decl : declaration nseq; + eof : eof +} + +and ast = t + +and declaration = + TypeDecl of type_decl reg +| ConstDecl of const_decl reg +| LambdaDecl of lambda_decl + +and const_decl = { + kwd_const : kwd_const; + name : variable; + colon : colon; + const_type : type_expr; + equal : equal; + init : expr; + terminator : semi option +} + +(* Type declarations *) + +and type_decl = { + kwd_type : kwd_type; + name : type_name; + kwd_is : kwd_is; + type_expr : type_expr; + terminator : semi option +} + +and type_expr = + TProd of cartesian +| TSum of (variant reg, vbar) nsepseq reg +| TRecord of record_type reg +| TApp of (type_name * type_tuple) reg +| TPar of type_expr par reg +| TAlias of variable + +and cartesian = (type_expr, times) nsepseq reg + +and variant = { + constr : constr; + kwd_of : kwd_of; + product : cartesian +} + +and record_type = { + opening : kwd_record; + field_decls : field_decls; + terminator : semi option; + closing : kwd_end +} + +and field_decls = (field_decl reg, semi) nsepseq + +and field_decl = { + field_name : field_name; + colon : colon; + field_type : type_expr +} + +and type_tuple = (type_expr, comma) nsepseq par reg + +(* Function and procedure declarations *) + +and lambda_decl = + FunDecl of fun_decl reg +| ProcDecl of proc_decl reg +| EntryDecl of entry_decl reg + +and fun_decl = { + kwd_function : kwd_function; + name : variable; + param : parameters; + colon : colon; + ret_type : type_expr; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + kwd_with : kwd_with; + return : expr; + terminator : semi option +} + +and proc_decl = { + kwd_procedure : kwd_procedure; + name : variable; + param : parameters; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + terminator : semi option +} + +and entry_decl = { + kwd_entrypoint : kwd_entrypoint; + name : variable; + param : entry_params; + colon : colon; + ret_type : type_expr; + kwd_is : kwd_is; + local_decls : local_decl list; + block : block reg; + kwd_with : kwd_with; + return : expr; + terminator : semi option +} + +and parameters = (param_decl, semi) nsepseq par reg + +and entry_params = (entry_param_decl, semi) nsepseq par reg + +and entry_param_decl = + EntryConst of param_const reg +| EntryVar of param_var reg +| EntryStore of storage reg + +and storage = { + kwd_storage : kwd_storage; + var : variable; + colon : colon; + storage_type : type_expr +} + +and param_decl = + ParamConst of param_const reg +| ParamVar of param_var reg + +and param_const = { + kwd_const : kwd_const; + var : variable; + colon : colon; + param_type : type_expr +} + +and param_var = { + kwd_var : kwd_var; + var : variable; + colon : colon; + param_type : type_expr +} + +and block = { + opening : block_opening; + instr : instructions; + terminator : semi option; + closing : block_closing +} + +and block_opening = + Block of kwd_block * lbrace +| Begin of kwd_begin + +and block_closing = + Block of rbrace +| End of kwd_end + +and local_decl = + LocalLam of lambda_decl +| LocalConst of const_decl reg +| LocalVar of var_decl reg + +and var_decl = { + kwd_var : kwd_var; + name : variable; + colon : colon; + var_type : type_expr; + assign : assign; + init : expr; + terminator : semi option +} + +and instructions = (instruction, semi) nsepseq + +and instruction = + Single of single_instr +| Block of block reg + +and single_instr = + Cond of conditional reg +| Case of case_instr reg +| Assign of assignment reg +| Loop of loop +| ProcCall of fun_call +| Fail of fail_instr reg +| Skip of kwd_skip +| RecordPatch of record_patch reg +| MapPatch of map_patch reg +| SetPatch of set_patch reg +| MapRemove of map_remove reg +| SetRemove of set_remove reg + +and set_remove = { + kwd_remove : kwd_remove; + element : expr; + kwd_from : kwd_from; + kwd_set : kwd_set; + set : path +} + +and map_remove = { + kwd_remove : kwd_remove; + key : expr; + kwd_from : kwd_from; + kwd_map : kwd_map; + map : path +} + +and set_patch = { + kwd_patch : kwd_patch; + path : path; + kwd_with : kwd_with; + set_inj : expr injection reg +} + +and map_patch = { + kwd_patch : kwd_patch; + path : path; + kwd_with : kwd_with; + map_inj : binding reg injection reg +} + +and binding = { + source : expr; + arrow : arrow; + image : expr +} + +and record_patch = { + kwd_patch : kwd_patch; + path : path; + kwd_with : kwd_with; + record_inj : record_injection reg +} + +and fail_instr = { + kwd_fail : kwd_fail; + fail_expr : expr +} + +and conditional = { + kwd_if : kwd_if; + test : expr; + kwd_then : kwd_then; + ifso : if_clause; + terminator : semi option; + kwd_else : kwd_else; + ifnot : if_clause +} + +and if_clause = + ClauseInstr of instruction +| ClauseBlock of (instructions * semi option) braces reg + +and set_membership = { + set : expr; + kwd_contains : kwd_contains; + element : expr +} + +and case_instr = { + kwd_case : kwd_case; + expr : expr; + kwd_of : kwd_of; + lead_vbar : vbar option; + cases : cases; + kwd_end : kwd_end +} + +and cases = (case reg, vbar) nsepseq reg + +and case = { + pattern : pattern; + arrow : arrow; + instr : instruction +} + +and assignment = { + lhs : lhs; + assign : assign; + rhs : rhs; +} + +and lhs = + Path of path +| MapPath of map_lookup reg + +and rhs = + Expr of expr +| NoneExpr of c_None + +and loop = + While of while_loop reg +| For of for_loop + +and while_loop = { + kwd_while : kwd_while; + cond : expr; + block : block reg +} + +and for_loop = + ForInt of for_int reg +| ForCollect of for_collect reg + +and for_int = { + kwd_for : kwd_for; + assign : var_assign reg; + down : kwd_down option; + kwd_to : kwd_to; + bound : expr; + step : (kwd_step * expr) option; + block : block reg +} + +and var_assign = { + name : variable; + assign : assign; + expr : expr +} + +and for_collect = { + kwd_for : kwd_for; + var : variable; + bind_to : (arrow * variable) option; + kwd_in : kwd_in; + expr : expr; + block : block reg +} + +(* Expressions *) + +and expr = + ELogic of logic_expr +| EArith of arith_expr +| EString of string_expr +| EList of list_expr +| ESet of set_expr +| EConstr of constr_expr +| ERecord of record_expr +| EProj of projection reg +| EMap of map_expr +| EVar of Lexer.lexeme reg +| ECall of fun_call +| EBytes of (Lexer.lexeme * Hex.t) reg +| EUnit of c_Unit +| ETuple of tuple_expr +| EPar of expr par reg + +and set_expr = + SetInj of expr injection reg +| SetMem of set_membership reg + +and 'a injection = { + opening : opening; + elements : ('a, semi) sepseq; + terminator : semi option; + closing : closing +} + +and opening = + Kwd of keyword +| KwdBracket of keyword * lbracket + +and closing = + End of kwd_end +| RBracket of rbracket + +and map_expr = + MapLookUp of map_lookup reg +| MapInj of binding reg injection reg + +and map_lookup = { + path : path; + index : expr brackets reg +} + +and path = + Name of variable +| Path of projection reg + +and logic_expr = + BoolExpr of bool_expr +| CompExpr of comp_expr + +and bool_expr = + Or of kwd_or bin_op reg +| And of kwd_and bin_op reg +| Not of kwd_not un_op reg +| False of c_False +| True of c_True + +and 'a bin_op = { + op : 'a; + arg1 : expr; + arg2 : expr +} + +and 'a un_op = { + op : 'a; + arg : expr +} + +and comp_expr = + Lt of lt bin_op reg +| Leq of leq bin_op reg +| Gt of gt bin_op reg +| Geq of geq bin_op reg +| Equal of equal bin_op reg +| Neq of neq bin_op reg + +and arith_expr = + Add of plus bin_op reg +| Sub of minus bin_op reg +| Mult of times bin_op reg +| Div of slash bin_op reg +| Mod of kwd_mod bin_op reg +| Neg of minus un_op reg +| Int of (Lexer.lexeme * Z.t) reg + +and string_expr = + Cat of cat bin_op reg +| String of Lexer.lexeme reg + +and list_expr = + Cons of cons bin_op reg +| List of expr injection reg +| Nil of nil par reg + +and nil = { + nil : kwd_nil; + colon : colon; + list_type : type_expr +} + +and constr_expr = + SomeApp of (c_Some * arguments) reg +| NoneExpr of none_expr reg +| ConstrApp of (constr * arguments) reg + +and record_expr = + RecordInj of record_injection reg + +and record_injection = { + opening : kwd_record; + fields : (field_assign reg, semi) nsepseq; + terminator : semi option; + closing : kwd_end +} + +and field_assign = { + field_name : field_name; + equal : equal; + field_expr : expr +} + +and projection = { + record_name : variable; + selector : dot; + field_path : (selection, dot) nsepseq +} + +and selection = + FieldName of field_name +| Component of (Lexer.lexeme * Z.t) reg + +and tuple_expr = + TupleInj of tuple_injection + +and tuple_injection = (expr, comma) nsepseq par reg + +and none_expr = typed_none_expr par + +and typed_none_expr = { + c_None : c_None; + colon : colon; + opt_type : type_expr +} + +and fun_call = (fun_name * arguments) reg + +and arguments = tuple_injection + +(* Patterns *) + +and pattern = + PCons of (pattern, cons) nsepseq reg +| PVar of Lexer.lexeme reg +| PWild of wild +| PInt of (Lexer.lexeme * Z.t) reg +| PBytes of (Lexer.lexeme * Hex.t) reg +| PString of Lexer.lexeme reg +| PUnit of c_Unit +| PFalse of c_False +| PTrue of c_True +| PNone of c_None +| PSome of (c_Some * pattern par reg) reg +| PList of list_pattern +| PTuple of (pattern, comma) nsepseq par reg + +and list_pattern = + Sugar of (pattern, semi) sepseq brackets reg +| Raw of (pattern * cons * pattern) par reg + +(* Projecting regions *) + +val type_expr_to_region : type_expr -> Region.t +val expr_to_region : expr -> Region.t +val instr_to_region : instruction -> Region.t +val pattern_to_region : pattern -> Region.t +val local_decl_to_region : local_decl -> Region.t +val path_to_region : path -> Region.t +val lhs_to_region : lhs -> Region.t +val rhs_to_region : rhs -> Region.t +val if_clause_to_region : if_clause -> Region.t +val selection_to_region : selection -> Region.t + +(* Printing *) + +val print_tokens : t -> unit diff --git a/src/ligo/ligo-parser/AST2.ml b/src/ligo/ligo-parser/AST2.ml new file mode 100644 index 000000000..78a181f79 --- /dev/null +++ b/src/ligo/ligo-parser/AST2.ml @@ -0,0 +1,795 @@ +[@@@warning "-30"] + +module I = AST + +open Region + +module SMap = Map.Make(String) + +module O = struct + type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *) + + type name_and_region = {name: string; orig: Region.t} + type type_name = name_and_region + type var_name = name_and_region + type field_name = name_and_region + + type pattern = + PVar of var_name + | PWild + | PInt of Z.t + | PBytes of MBytes.t + | PString of string + | PUnit + | PFalse + | PTrue + | PNone + | PSome of pattern + | PCons of pattern * pattern + | PNull + | PRecord of (field_name * pattern) SMap.t + + type type_constructor = + Option + | List + | Set + | Map + + type type_expr_case = + Sum of (type_name * type_expr) SMap.t + | Record of (field_name * type_expr) SMap.t + | TypeApp of type_constructor * (type_expr list) + | Function of { arg: type_expr; ret: type_expr } + | Ref of type_expr + | String + | Bytes + | Int + | Unit + | Bool + + and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t } + + type typed_var = { name:var_name; ty:type_expr; orig: asttodo } + + type type_decl = { name:type_name; ty:type_expr; orig: asttodo } + + type expr = + App of { operator: operator; arguments: expr list } + | Var of var_name + | Constant of constant + | Record of (field_name * expr) list + | Lambda of lambda + + and decl = { name:var_name; ty:type_expr; value: expr } + + and lambda = { + parameter: typed_var; + declarations: decl list; + instructions: instr list; + result: expr; + } + + and operator = + Function of var_name + | Constructor of var_name + | UpdateField of field_name + | GetField of field_name + | Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod + | Neg | Not + | Set | List + | MapLookup + + and constant = + Unit + | Int of Z.t | String of string | Bytes of MBytes.t + | False | True + | Null of type_expr + | EmptySet of type_expr + | CNone of type_expr + + and instr = + Assignment of { name: var_name; value: expr; orig: asttodo } + | While of { condition: expr; body: instr list; orig: asttodo } + | ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo } + | Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo } + | ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *) + | Fail of { expr: expr; orig: asttodo } + + type ast = { + types : type_decl list; + storage_decl : typed_var; + declarations : decl list; + orig : AST.t + } +end + +(* open Sanity: *) +let (|>) v f = f v (* pipe f to v *) +let (@@) f v = f v (* apply f on v *) +let (@.) f g x = f (g x) (* compose *) +let map f l = List.rev (List.rev_map f l) +let mapi f l = + let f (i, l) elem = + (i + 1, (f i elem) :: l) + in snd (List.fold_left f (0,[]) l) +(* TODO: check that List.append is not broken + (i.e. check that it is tail-recursive) *) +let append_map f l = map f l |> List.flatten +let append l1 l2 = List.append l1 l2 +let list_to_map l = List.fold_left (fun m (k,v) -> SMap.add k v m) SMap.empty l +let fold_map f a l = + let f (acc, l) elem = + let acc', elem' = f acc elem + in acc', (elem' :: l) in + let last_acc, last_l = List.fold_left f (a, []) l + in last_acc, List.rev last_l + +(* Simplify the AST *) + +let name_and_region_of_int i = O.{name = string_of_int i; orig = Region.ghost} + +let s_nsepseq : ('a,'sep) Utils.nsepseq -> 'a list = + fun (first, rest) -> first :: (map snd rest) + +let s_sepseq : ('a,'sep) Utils.sepseq -> 'a list = + function + None -> [] + | Some nsepseq -> s_nsepseq nsepseq + +let s_name {value=name; region} : O.var_name = + let () = ignore (region) in + {name;orig = region} + +let name_to_string {value=name; region} : string = + let () = ignore (region) in + name + +let type_expr (orig : Region.t) (e : O.type_expr_case) : O.type_expr = + { type_expr = e; name = None; orig } + +let s_type_constructor {value=name;region} : O.type_constructor = + let () = ignore (region) in + match name with + "Option" -> Option + | "List" -> List + | "Map" -> Map + | "Set" -> Set + (* TODO: escape the name, prevent any \x1b and other weird characters from appearing in the output *) + | _ -> failwith ("Unknown type constructor: " ^ name) + +let named_list_to_map (l : (O.name_and_region * 'a) list) : (O.name_and_region * 'a) SMap.t = + List.fold_left + (fun m ((x,_) as p) -> + let {name;_} : O.name_and_region = x in + SMap.add name p m) + SMap.empty + l + +let rec s_cartesian {value=sequence; region} : O.type_expr = + let () = ignore (region) in + s_nsepseq sequence + |>map s_type_expr + |> mapi (fun i p -> name_and_region_of_int i, p) + |> named_list_to_map + |> (fun x -> (Record x : O.type_expr_case)) + |> type_expr region + +and s_sum_type {value=sequence; region} : O.type_expr = + let () = ignore (region) in + type_expr region (Sum (map s_variant (s_nsepseq sequence) |> named_list_to_map)) + +and s_variant {value=(constr, kwd_of, cartesian); region} = + let () = ignore (kwd_of,region) in + (s_name constr, s_cartesian cartesian) + +and s_record_type {value=(kwd_record, field_decls, kwd_end); region} : O.type_expr = + let () = ignore (kwd_record,region,kwd_end) in + type_expr region (Record (map s_field_decl (s_nsepseq field_decls) |> named_list_to_map) : O.type_expr_case) + +and s_field_decl {value=(var, colon, type_expr); region} : O.type_name * O.type_expr = + let () = ignore (colon,region) in + ((s_name var), (s_type_expr type_expr)) + +and s_type_app {value=(type_name,type_tuple); region} : O.type_expr = + let () = ignore (region) in + type_expr region (TypeApp (s_type_constructor type_name, s_type_tuple type_tuple)) + +and s_type_tuple ({value=(lpar, sequence, rpar); region} : (I.type_name, I.comma) Utils.nsepseq I.par) : O.type_expr list = + let () = ignore (lpar,rpar,region) in + (* TODO: the grammar should allow any type expr, not just type_name in the tuple elements *) + map s_type_expr (map (fun a -> I.TAlias a) (s_nsepseq sequence)) + +and s_par_type {value=(lpar, type_expr, rpar); region} : O.type_expr = + let () = ignore (lpar,rpar,region) in + s_type_expr type_expr + +and s_type_alias name : O.type_expr = + let () = ignore () in + type_expr name.region (TypeApp (s_type_constructor name, [])) + +and s_type_expr (orig : I.type_expr) : O.type_expr = match orig with + Prod cartesian -> s_cartesian cartesian +| Sum sum_type -> s_sum_type sum_type +| Record record_type -> s_record_type record_type +| TypeApp type_app -> s_type_app type_app +| ParType par_type -> s_par_type par_type +| TAlias type_alias -> s_type_alias type_alias + + +let s_type_decl I.{value={kwd_type;name;kwd_is;type_expr;terminator}; region} : O.type_decl = + let () = ignore (kwd_type,kwd_is,terminator,region) in + let ty = s_type_expr type_expr in + O.{ name = s_name name; ty = { ty with name = Some (s_name name) }; orig = `TODO } + +let s_storage_decl I.{value={kwd_storage; name; colon; store_type; terminator}; region} : O.typed_var = + let () = ignore (kwd_storage,colon,terminator,region) in + O.{ name = s_name name; ty = s_type_expr store_type; orig = `TODO } + +let s_operations_decl I.{value={kwd_operations;name;colon;op_type;terminator}; region} : O.typed_var = + let () = ignore (kwd_operations,colon,terminator,region) in + O.{ name = s_name name; ty = s_type_expr op_type; orig = `TODO } + +let s_empty_list {value=(l, (lbracket, rbracket, colon, type_expr), r); region} : O.expr = + let () = ignore (l, lbracket, rbracket, colon, r, region) in + Constant (Null (s_type_expr type_expr)) + +let s_empty_set {value=(l, (lbrace, rbrace, colon, type_expr), r); region} : O.expr = + let () = ignore (l, lbrace, rbrace, colon, r, region) in + Constant (EmptySet (s_type_expr type_expr)) + +let s_none {value=(l, (c_None, colon, type_expr), r); region} : O.expr = + let () = ignore (l, c_None, colon, r, region) in + Constant (CNone (s_type_expr type_expr)) + +let parameters_to_tuple (parameters : (string * O.type_expr) list) : O.type_expr = + (* TODO: use records with named fields to have named arguments. *) + let parameter_tuple : O.type_expr_case = + Record (mapi (fun i (_name,ty) -> name_and_region_of_int i, ty) parameters |> named_list_to_map) in + O.{ type_expr = parameter_tuple; name = None; orig = Region.ghost } + +and parameters_to_decls singleparam (parameters : (string * O.type_expr) list) : O.decl list = + let f i (name,ty) = + O.{ name = {name; orig=Region.ghost}; + ty = ty; + value = App { operator = O.GetField (name_and_region_of_int i); + arguments = [Var singleparam] } } + in mapi f parameters + +let rec bin l operator r = O.App { operator; arguments = [s_expr l; s_expr r] } +and una operator v = O.App { operator; arguments = [s_expr v] } +and s_expr : I.expr -> O.expr = + function + Or {value=(l, bool_or, r); region} -> let () = ignore (region, bool_or) in bin l Or r + | And {value=(l, bool_and, r); region} -> let () = ignore (region,bool_and) in bin l And r + | Lt {value=(l, lt, r); region} -> let () = ignore (region, lt) in bin l Lt r + | Leq {value=(l, leq, r); region} -> let () = ignore (region, leq) in bin l Leq r + | Gt {value=(l, gt, r); region} -> let () = ignore (region, gt) in bin l Gt r + | Geq {value=(l, geq, r); region} -> let () = ignore (region, geq) in bin l Geq r + | Equal {value=(l, equal, r); region} -> let () = ignore (region, equal) in bin l Equal r + | Neq {value=(l, neq, r); region} -> let () = ignore (region, neq) in bin l Neq r + | Cat {value=(l, cat, r); region} -> let () = ignore (region, cat) in bin l Cat r + | Cons {value=(l, cons, r); region} -> let () = ignore (region, cons) in bin l Cons r + | Add {value=(l, plus, r); region} -> let () = ignore (region, plus) in bin l Add r + | Sub {value=(l, minus, r); region} -> let () = ignore (region, minus) in bin l Sub r + | Mult {value=(l, times, r); region} -> let () = ignore (region, times) in bin l Mult r + | Div {value=(l, slash, r); region} -> let () = ignore (region, slash) in bin l Div r + | Mod {value=(l, kwd_mod, r); region} -> let () = ignore (region, kwd_mod) in bin l Mod r + | Neg {value=(minus, expr); region} -> let () = ignore (region, minus) in una Neg expr + | Not {value=(kwd_not, expr); region} -> let () = ignore (region, kwd_not) in una Not expr + | Int {value=(lexeme, z); region} -> let () = ignore (region, lexeme) in Constant (Int z) + | Var lexeme -> Var (s_name lexeme) + | String {value=s; region} -> let () = ignore (region) in Constant (String s) + | Bytes {value=(lexeme, mbytes); region} -> let () = ignore (region, lexeme) in Constant (Bytes mbytes) + | False c_False -> let () = ignore (c_False) in Constant (False) + | True c_True -> let () = ignore (c_True) in Constant (True) + | Unit c_Unit -> let () = ignore (c_Unit) in Constant (Unit) + | Tuple {value=(l,tuple,r); region} -> let () = ignore (l,r,region) in s_tuple_expr (tuple |> s_nsepseq |> map s_expr) + | List list -> s_list list + | EmptyList empty_list -> s_empty_list empty_list + | Set set -> s_set set + | EmptySet empty_set -> s_empty_set empty_set + | NoneExpr none_expr -> s_none none_expr + | FunCall fun_call -> s_fun_call fun_call + | ConstrApp constr_app -> s_constr_app constr_app + | SomeApp some_app -> s_some_app some_app + | MapLookUp map_lookup -> s_map_lookup map_lookup + | ParExpr {value=(lpar,expr,rpar); region} -> let () = ignore (lpar,rpar,region) in s_expr expr + +and s_tuple_expr tuple : O.expr = + Record (mapi (fun i e -> name_and_region_of_int i, e) tuple) + +and s_map_lookup I.{value = {map_name; selector; index}; region} : O.expr = + let {value = lbracket, index_expr, rbracket; region=region2} = index in + let () = ignore (selector, lbracket, rbracket, region2, region) in + App { operator = MapLookup; arguments = [Var (s_name map_name); s_expr index_expr] } + +and s_some_app {value=(c_Some, {value=(l,arguments,r); region=region2}); region} : O.expr = + let () = ignore (c_Some,l,r,region2,region) in + match s_nsepseq arguments with + [] -> failwith "tuple cannot be empty" + | [a] -> s_expr a + | l -> s_tuple_expr (map s_expr l) + +and s_list {value=(l, list, r); region} : O.expr = + let () = ignore (l, r, region) in + App { operator = List; arguments = map s_expr (s_nsepseq list) } + +and s_set {value=(l, set, r); region} : O.expr = + let () = ignore (l, r, region) in + App { operator = Set; arguments = map s_expr (s_nsepseq set) } + +and s_pattern {value=sequence; region} : O.pattern = + let () = ignore (region) in + s_pattern_conses (s_nsepseq sequence) + +and s_pattern_conses : I.core_pattern list -> O.pattern = function + [] -> assert false + | [p] -> s_core_pattern p + | hd :: tl -> PCons (s_core_pattern hd, s_pattern_conses tl) + +and s_case ({value=(pattern, arrow, instruction); region} : I.case) : O.pattern * O.instr list = + let () = ignore (arrow,region) in + s_pattern pattern, s_instruction instruction + +and s_core_pattern : I.core_pattern -> O.pattern = function + PVar var -> PVar (s_name var) +| PWild wild -> let () = ignore (wild) in PWild +| PInt {value=(si,i);region} -> let () = ignore (si,region) in PInt i +| PBytes {value=(sb,b);region} -> let () = ignore (sb,region) in PBytes b +| PString {value=s;region} -> let () = ignore (region) in PString s +| PUnit region -> let () = ignore (region) in PUnit +| PFalse region -> let () = ignore (region) in PFalse +| PTrue region -> let () = ignore (region) in PTrue +| PNone region -> let () = ignore (region) in PNone +| PSome psome -> s_psome psome +| PList pattern -> s_list_pattern pattern +| PTuple ptuple -> s_ptuple ptuple + +and s_list_pattern = function + Sugar sugar -> s_sugar sugar +| Raw raw -> s_raw raw + +and s_sugar {value=(lbracket, sequence, rbracket); region} : O.pattern = + let () = ignore (lbracket, rbracket, region) in + List.fold_left (fun acc p -> O.PCons (s_core_pattern p, acc)) + O.PNull + (s_sepseq sequence); + +and s_raw {value=(lpar, (core_pattern, cons, pattern), rpar); region} = + let () = ignore (lpar, cons, rpar, region) in + O.PCons (s_core_pattern core_pattern, s_pattern pattern) + +and s_ptuple {value=(lpar, sequence, rpar); region} = + let () = ignore (lpar, rpar, region) in + s_nsepseq sequence + |> map s_core_pattern + |> mapi (fun i p -> name_and_region_of_int i, p) + |> fun x -> O.PRecord (x |> named_list_to_map) + +and s_psome {value=(c_Some,{value=(l,psome,r);region=region2});region} : O.pattern = + let () = ignore (c_Some,l,r,region2,region) in + PSome (s_core_pattern psome) + +and s_const_decl I.{value={kwd_const;name;colon;const_type;equal;init;terminator}; region} : O.decl = + let () = ignore (kwd_const,colon,equal,terminator,region) in + O.{ name = s_name name; ty = s_type_expr const_type; value = s_expr init } + +and s_param_const {value=(kwd_const,variable,colon,type_expr); region} : string * O.type_expr = + let () = ignore (kwd_const,colon,region) in + name_to_string variable, s_type_expr type_expr + +and s_param_var {value=(kwd_var,variable,colon,type_expr); region} : string * O.type_expr = + let () = ignore (kwd_var,colon,region) in + name_to_string variable, s_type_expr type_expr + +and s_param_decl : I.param_decl -> string * O.type_expr = function + ParamConst p -> s_param_const p + | ParamVar p -> s_param_var p + +and s_parameters ({value=(lpar,param_decl,rpar);region} : I.parameters) : (string * O.type_expr) list = + let () = ignore (lpar,rpar,region) in + let l = (s_nsepseq param_decl) in + map s_param_decl l + +and s_var_decl I.{value={kwd_var;name;colon;var_type;ass;init;terminator}; region} : O.decl = + let () = ignore (kwd_var,colon,ass,terminator,region) in + O.{ + name = s_name name; + ty = s_type_expr var_type; + value = s_expr init + } + +and s_local_decl : I.local_decl -> O.decl = function + LocalLam decl -> s_lambda_decl decl +| LocalConst decl -> s_const_decl decl +| LocalVar decl -> s_var_decl decl + +and s_instructions ({value=sequence; region} : I.instructions) : O.instr list = + let () = ignore (region) in + append_map s_instruction (s_nsepseq sequence) + +and s_instruction : I.instruction -> O.instr list = function + Single instr -> s_single_instr instr +| Block block -> (s_block block) + +and s_conditional I.{kwd_if;test;kwd_then;ifso;kwd_else;ifnot} : O.instr = + let () = ignore (kwd_if,kwd_then,kwd_else) in + let test = s_expr test in + let ifso = O.PTrue, s_instruction ifso in + let ifnot = O.PFalse, s_instruction ifnot in + Match { + expr = test; + cases = [ifso; ifnot]; + orig = `TODO + } + +and s_match_instr I.{kwd_match;expr;kwd_with;lead_vbar;cases;kwd_end} : O.instr = + let {value=cases;region} = cases in + let () = ignore (kwd_match,kwd_with,lead_vbar,kwd_end,region) in + Match { expr = s_expr expr; cases = map s_case (s_nsepseq cases); orig = `TODO } + +and s_ass_instr {value=(variable,ass,expr); region} : O.instr = + let () = ignore (ass,region) in + Assignment { name = s_name variable; value = s_expr expr; orig = `TODO } + +and s_while_loop {value=(kwd_while, expr, block); region} : O.instr list = + let () = ignore (kwd_while,region) in + [While {condition = s_expr expr; body = s_block block; orig = `TODO}] + +and s_for_loop : I.for_loop -> O.instr list = function + ForInt for_int -> s_for_int for_int +| ForCollect for_collect -> s_for_collect for_collect + +and s_for_int ({value={kwd_for;ass;down;kwd_to;bound;step;block}; region} : I.for_int reg) : O.instr list = + let {value=(variable,ass_kwd,expr);region = ass_region} = ass in + let () = ignore (kwd_for,ass_region,ass_kwd,kwd_to,region) in + let name = s_name variable in + let condition, operator = match down with Some kwd_down -> ignore kwd_down; O.Gt, O.Sub + | None -> O.Lt, O.Add in + let step = s_step step + in [ + Assignment { name; value = s_expr expr; orig = `TODO }; + (* TODO: lift the declaration of the variable, to avoid creating a nested scope here. *) + While { + condition = App { operator = condition; + arguments = [Var name; s_expr bound]}; + body = append (s_block block) + [O.Assignment { name; + value = App { operator; + arguments = [Var name; step]}; + orig = `TODO }]; + orig = `TODO + } + ] + +and s_for_collect ({value={kwd_for;var;bind_to;kwd_in;expr;block}; _} : I.for_collect reg) : O.instr list = + let () = ignore (kwd_for,kwd_in) in + let for_instr = + match s_bind_to bind_to with + Some _ -> + failwith "TODO: For on maps is not supported yet!" + | None -> + O.ForCollection { + list = s_expr expr; + var = s_name var; + body = s_block block; + orig = `TODO + } + in [for_instr] + +and s_step : (I.kwd_step * I.expr) option -> O.expr = function + Some (kwd_step, expr) -> let () = ignore (kwd_step) in s_expr expr +| None -> Constant (Int (Z.of_int 1)) + +and s_bind_to : (I.arrow * I.variable) option -> O.var_name option = function + Some (arrow, variable) -> let () = ignore (arrow) in Some (s_name variable) + | None -> None + +and s_loop : I.loop -> O.instr list = function + While while_loop -> s_while_loop while_loop + | For for_loop -> s_for_loop for_loop + +and s_fun_call {value=(fun_name, arguments); region} : O.expr = + let () = ignore (region) in + let {value=fun_name_string;_} = fun_name in + let firstchar = String.sub fun_name_string 0 1 in + (* If it starts with a capital letter, then it is a constructor *) + if String.equal firstchar (String.uppercase_ascii firstchar) then + App { operator = Constructor (s_name fun_name); arguments = s_arguments arguments } + else + App { operator = Function (s_name fun_name); arguments = s_arguments arguments } + +and s_constr_app {value=(constr, arguments); region} : O.expr = + let () = ignore (region) in + App { operator = Function (s_name constr); arguments = s_arguments arguments } + +and s_arguments {value=(lpar, sequence, rpar); region} : O.expr list = + (* TODO: should return a tuple *) + let () = ignore (lpar,rpar,region) in + match map s_expr (s_nsepseq sequence) with + [] -> [Constant Unit] + | [single_argument] -> [single_argument] + | args -> [s_tuple_expr args] ; + +and s_fail ((kwd_fail, expr) : (I.kwd_fail * I.expr)) : O.instr = + let () = ignore (kwd_fail) in + Fail { expr = s_expr expr; orig = `TODO } + + + + +and s_single_instr : I.single_instr -> O.instr list = function + Cond {value; _} -> [s_conditional value] +| Match {value; _} -> [s_match_instr value] +| Ass instr -> [s_ass_instr instr] +| Loop loop -> s_loop loop +| ProcCall fun_call -> [ProcedureCall { expr = s_fun_call fun_call; orig = `TODO }] +| Null kwd_null -> let () = ignore (kwd_null) in + [] +| Fail {value; _} -> [s_fail value] + +and s_block I.{value={opening;instr;terminator;close}; _} : O.instr list = + let () = ignore (opening,terminator,close) in + s_instructions instr + +and gensym = + let i = ref 0 in + fun ty -> + i := !i + 1; + (* TODO: Region.ghost *) + ({name = {name=(string_of_int !i) ^ "gensym"; orig = Region.ghost}; ty; orig = `TODO} : O.typed_var) + +and s_fun_decl I.{value={kwd_function;name;param;colon;ret_type;kwd_is;local_decls;block;kwd_with;return;terminator}; region} : O.decl = + let () = ignore (kwd_function,colon,kwd_is,kwd_with,terminator,region) in + let tuple_type = s_parameters param |> parameters_to_tuple in + let single_argument = gensym tuple_type in + let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in + O.{ + name = s_name name; + ty = type_expr region (Function { arg = tuple_type; + ret = s_type_expr ret_type }); + value = Lambda { + parameter = single_argument; + declarations = append + (s_parameters param |> parameters_to_decls single_argument_xxx) + (map s_local_decl local_decls); + instructions = s_block block; + result = s_expr return + } + } + +and s_proc_decl I.{value={kwd_procedure;name;param;kwd_is;local_decls;block;terminator}; region} = + let () = ignore (kwd_procedure,kwd_is,terminator,region) in + let tuple_type = s_parameters param |> parameters_to_tuple in + let single_argument = gensym tuple_type in + let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in + O.{ + name = s_name name; + ty = type_expr region (Function { arg = tuple_type; + ret = type_expr region Unit }); + value = Lambda { + parameter = single_argument; + declarations = append + (s_parameters param |> parameters_to_decls single_argument_xxx) + (map s_local_decl local_decls); + instructions = s_block block; + result = O.Constant O.Unit + } + } + +and s_entry_decl I.{value={kwd_entrypoint;name;param;kwd_is;local_decls;block;terminator}; region} = + let () = ignore (kwd_entrypoint,kwd_is,terminator,region) in + let tuple_type = s_parameters param |> parameters_to_tuple in + let single_argument = gensym tuple_type in + let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in + O.{ + name = s_name name; + ty = type_expr region (Function { arg = tuple_type; + ret = type_expr region Unit }); + value = Lambda { + parameter = single_argument; + declarations = append + (s_parameters param |> parameters_to_decls single_argument_xxx) + (map s_local_decl local_decls); + instructions = s_block block; + result = O.Constant O.Unit + } + } + +and s_lambda_decl : I.lambda_decl -> O.decl = function + FunDecl fun_decl -> s_fun_decl fun_decl +| EntryDecl entry_decl -> s_entry_decl entry_decl +| ProcDecl proc_decl -> s_proc_decl proc_decl + +type tmp_ast = { + types : O.type_decl list; + storage_decl : O.typed_var option; + operations_decl : O.typed_var option; + declarations : O.decl list; + } + + +let s_declaration (ast : tmp_ast) : I.declaration -> tmp_ast = function + TypeDecl t -> { ast with types = (s_type_decl t) :: ast.types } + | ConstDecl c -> { ast with declarations = (s_const_decl c) :: ast.declarations } + | StorageDecl s -> { ast with storage_decl = Some (s_storage_decl s) } + | OpDecl o -> { ast with operations_decl = Some (s_operations_decl o) } + | LambdaDecl l -> { ast with declarations = (s_lambda_decl l) :: ast.declarations } + +let s_ast (ast : I.ast) : O.ast = + let I.{decl=(decl1,decls);eof} = ast in + let () = ignore (eof) in + let {types; storage_decl; operations_decl; declarations} = + List.fold_left s_declaration + { types = []; + storage_decl = None; + operations_decl = None; + declarations = [] } + ( decl1 :: decls ) in + let storage_decl = match storage_decl with + Some x -> x + | None -> failwith "Missing storage declaration" in + let () = match operations_decl with + Some _ -> failwith "Operations declaration is not allowed anymore TODO" + | None -> () + in {types; storage_decl; declarations; orig = ast} + + + + +(* let s_token region lexeme = *) +(* printf "%s: %s\n"(compact region) lexeme *) + +(* and s_var {region; value=lexeme} = *) +(* printf "%s: Ident \"%s\"\n" (compact region) lexeme *) + +(* and s_constr {region; value=lexeme} = *) +(* printf "%s: Constr \"%s\"\n" *) +(* (compact region) lexeme *) + +(* and s_string {region; value=lexeme} = *) +(* printf "%s: String \"%s\"\n" *) +(* (compact region) lexeme *) + +(* and s_bytes {region; value = lexeme, abstract} = *) +(* printf "%s: Bytes (\"%s\", \"0x%s\")\n" *) +(* (compact region) lexeme *) +(* (MBytes.to_hex abstract |> Hex.to_string) *) + +(* and s_int {region; value = lexeme, abstract} = *) +(* printf "%s: Int (\"%s\", %s)\n" *) +(* (compact region) lexeme *) +(* (Z.to_string abstract) *) + + +(* and s_parameters {value=node; _} = *) +(* let lpar, sequence, rpar = node in *) +(* s_token lpar "("; *) +(* s_nsepseq ";" s_param_decl sequence; *) +(* s_token rpar ")" *) + +(* and s_param_decl = function *) +(* ParamConst param_const -> s_param_const param_const *) +(* | ParamVar param_var -> s_param_var param_var *) + +(* and s_region_cases {value=sequence; _} = *) +(* s_nsepseq "|" s_case sequence *) + +(* and s_expr = function *) +(* Or {value = expr1, bool_or, expr2; _} -> *) +(* s_expr expr1; s_token bool_or "||"; s_expr expr2 *) +(* | And {value = expr1, bool_and, expr2; _} -> *) +(* s_expr expr1; s_token bool_and "&&"; s_expr expr2 *) +(* | Lt {value = expr1, lt, expr2; _} -> *) +(* s_expr expr1; s_token lt "<"; s_expr expr2 *) +(* | Leq {value = expr1, leq, expr2; _} -> *) +(* s_expr expr1; s_token leq "<="; s_expr expr2 *) +(* | Gt {value = expr1, gt, expr2; _} -> *) +(* s_expr expr1; s_token gt ">"; s_expr expr2 *) +(* | Geq {value = expr1, geq, expr2; _} -> *) +(* s_expr expr1; s_token geq ">="; s_expr expr2 *) +(* | Equal {value = expr1, equal, expr2; _} -> *) +(* s_expr expr1; s_token equal "="; s_expr expr2 *) +(* | Neq {value = expr1, neq, expr2; _} -> *) +(* s_expr expr1; s_token neq "=/="; s_expr expr2 *) +(* | Cat {value = expr1, cat, expr2; _} -> *) +(* s_expr expr1; s_token cat "^"; s_expr expr2 *) +(* | Cons {value = expr1, cons, expr2; _} -> *) +(* s_expr expr1; s_token cons "<:"; s_expr expr2 *) +(* | Add {value = expr1, add, expr2; _} -> *) +(* s_expr expr1; s_token add "+"; s_expr expr2 *) +(* | Sub {value = expr1, sub, expr2; _} -> *) +(* s_expr expr1; s_token sub "-"; s_expr expr2 *) +(* | Mult {value = expr1, mult, expr2; _} -> *) +(* s_expr expr1; s_token mult "*"; s_expr expr2 *) +(* | Div {value = expr1, div, expr2; _} -> *) +(* s_expr expr1; s_token div "/"; s_expr expr2 *) +(* | Mod {value = expr1, kwd_mod, expr2; _} -> *) +(* s_expr expr1; s_token kwd_mod "mod"; s_expr expr2 *) +(* | Neg {value = minus, expr; _} -> *) +(* s_token minus "-"; s_expr expr *) +(* | Not {value = kwd_not, expr; _} -> *) +(* s_token kwd_not "not"; s_expr expr *) +(* | Int i -> s_int i *) +(* | Var var -> s_var var *) +(* | String s -> s_string s *) +(* | Bytes b -> s_bytes b *) +(* | False region -> s_token region "False" *) +(* | True region -> s_token region "True" *) +(* | Unit region -> s_token region "Unit" *) +(* | Tuple tuple -> s_tuple tuple *) +(* | List list -> s_list list *) +(* | EmptyList elist -> s_empty_list elist *) +(* | Set set -> s_set set *) +(* | EmptySet eset -> s_empty_set eset *) +(* | NoneExpr nexpr -> s_none_expr nexpr *) +(* | FunCall fun_call -> s_fun_call fun_call *) +(* | ConstrApp capp -> s_constr_app capp *) +(* | SomeApp sapp -> s_some_app sapp *) +(* | MapLookUp lookup -> s_map_lookup lookup *) +(* | ParExpr pexpr -> s_par_expr pexpr *) + +(* and s_list {value=node; _} = *) +(* let lbra, sequence, rbra = node in *) +(* s_token lbra "["; *) +(* s_nsepseq "," s_expr sequence; *) +(* s_token rbra "]" *) + +(* and s_empty_list {value=node; _} = *) +(* let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token lbracket "["; *) +(* s_token rbracket "]"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_set {value=node; _} = *) +(* let lbrace, sequence, rbrace = node in *) +(* s_token lbrace "{"; *) +(* s_nsepseq "," s_expr sequence; *) +(* s_token rbrace "}" *) + +(* and s_empty_set {value=node; _} = *) +(* let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token lbrace "{"; *) +(* s_token rbrace "}"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_none_expr {value=node; _} = *) +(* let lpar, (c_None, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token c_None "None"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_constr_app {value=node; _} = *) +(* let constr, arguments = node in *) +(* s_constr constr; *) +(* s_tuple arguments *) + +(* and s_some_app {value=node; _} = *) +(* let c_Some, arguments = node in *) +(* s_token c_Some "Some"; *) +(* s_tuple arguments *) + + +(* and s_par_expr {value=node; _} = *) +(* let lpar, expr, rpar = node in *) +(* s_token lpar "("; *) +(* s_expr expr; *) +(* s_token rpar ")" *) + +(* and s_psome {value=node; _} = *) +(* let c_Some, patterns = node in *) +(* s_token c_Some "Some"; *) +(* s_patterns patterns *) + + +(* and s_terminator = function *) +(* Some semi -> s_token semi ";" *) +(* | None -> () *) diff --git a/src/ligo/ligo-parser/Error.mli b/src/ligo/ligo-parser/Error.mli new file mode 100644 index 000000000..19c1ce4c9 --- /dev/null +++ b/src/ligo/ligo-parser/Error.mli @@ -0,0 +1,3 @@ +type t = .. + +type error = t diff --git a/src/ligo/ligo-parser/EvalOpt.ml b/src/ligo/ligo-parser/EvalOpt.ml new file mode 100644 index 000000000..20d039603 --- /dev/null +++ b/src/ligo/ligo-parser/EvalOpt.ml @@ -0,0 +1,161 @@ +(* Parsing the command-line option for testing the LIGO lexer and + parser *) + +let printf = Printf.printf +let sprintf = Printf.sprintf + +let abort msg = + Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1 + +(* Help *) + +let help () = + let file = Filename.basename Sys.argv.(0) in + printf "Usage: %s [