diff --git a/AST2.ml b/AST2.ml new file mode 100644 index 000000000..78a181f79 --- /dev/null +++ b/AST2.ml @@ -0,0 +1,795 @@ +[@@@warning "-30"] + +module I = AST + +open Region + +module SMap = Map.Make(String) + +module O = struct + type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *) + + type name_and_region = {name: string; orig: Region.t} + type type_name = name_and_region + type var_name = name_and_region + type field_name = name_and_region + + type pattern = + PVar of var_name + | PWild + | PInt of Z.t + | PBytes of MBytes.t + | PString of string + | PUnit + | PFalse + | PTrue + | PNone + | PSome of pattern + | PCons of pattern * pattern + | PNull + | PRecord of (field_name * pattern) SMap.t + + type type_constructor = + Option + | List + | Set + | Map + + type type_expr_case = + Sum of (type_name * type_expr) SMap.t + | Record of (field_name * type_expr) SMap.t + | TypeApp of type_constructor * (type_expr list) + | Function of { arg: type_expr; ret: type_expr } + | Ref of type_expr + | String + | Bytes + | Int + | Unit + | Bool + + and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t } + + type typed_var = { name:var_name; ty:type_expr; orig: asttodo } + + type type_decl = { name:type_name; ty:type_expr; orig: asttodo } + + type expr = + App of { operator: operator; arguments: expr list } + | Var of var_name + | Constant of constant + | Record of (field_name * expr) list + | Lambda of lambda + + and decl = { name:var_name; ty:type_expr; value: expr } + + and lambda = { + parameter: typed_var; + declarations: decl list; + instructions: instr list; + result: expr; + } + + and operator = + Function of var_name + | Constructor of var_name + | UpdateField of field_name + | GetField of field_name + | Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod + | Neg | Not + | Set | List + | MapLookup + + and constant = + Unit + | Int of Z.t | String of string | Bytes of MBytes.t + | False | True + | Null of type_expr + | EmptySet of type_expr + | CNone of type_expr + + and instr = + Assignment of { name: var_name; value: expr; orig: asttodo } + | While of { condition: expr; body: instr list; orig: asttodo } + | ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo } + | Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo } + | ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *) + | Fail of { expr: expr; orig: asttodo } + + type ast = { + types : type_decl list; + storage_decl : typed_var; + declarations : decl list; + orig : AST.t + } +end + +(* open Sanity: *) +let (|>) v f = f v (* pipe f to v *) +let (@@) f v = f v (* apply f on v *) +let (@.) f g x = f (g x) (* compose *) +let map f l = List.rev (List.rev_map f l) +let mapi f l = + let f (i, l) elem = + (i + 1, (f i elem) :: l) + in snd (List.fold_left f (0,[]) l) +(* TODO: check that List.append is not broken + (i.e. check that it is tail-recursive) *) +let append_map f l = map f l |> List.flatten +let append l1 l2 = List.append l1 l2 +let list_to_map l = List.fold_left (fun m (k,v) -> SMap.add k v m) SMap.empty l +let fold_map f a l = + let f (acc, l) elem = + let acc', elem' = f acc elem + in acc', (elem' :: l) in + let last_acc, last_l = List.fold_left f (a, []) l + in last_acc, List.rev last_l + +(* Simplify the AST *) + +let name_and_region_of_int i = O.{name = string_of_int i; orig = Region.ghost} + +let s_nsepseq : ('a,'sep) Utils.nsepseq -> 'a list = + fun (first, rest) -> first :: (map snd rest) + +let s_sepseq : ('a,'sep) Utils.sepseq -> 'a list = + function + None -> [] + | Some nsepseq -> s_nsepseq nsepseq + +let s_name {value=name; region} : O.var_name = + let () = ignore (region) in + {name;orig = region} + +let name_to_string {value=name; region} : string = + let () = ignore (region) in + name + +let type_expr (orig : Region.t) (e : O.type_expr_case) : O.type_expr = + { type_expr = e; name = None; orig } + +let s_type_constructor {value=name;region} : O.type_constructor = + let () = ignore (region) in + match name with + "Option" -> Option + | "List" -> List + | "Map" -> Map + | "Set" -> Set + (* TODO: escape the name, prevent any \x1b and other weird characters from appearing in the output *) + | _ -> failwith ("Unknown type constructor: " ^ name) + +let named_list_to_map (l : (O.name_and_region * 'a) list) : (O.name_and_region * 'a) SMap.t = + List.fold_left + (fun m ((x,_) as p) -> + let {name;_} : O.name_and_region = x in + SMap.add name p m) + SMap.empty + l + +let rec s_cartesian {value=sequence; region} : O.type_expr = + let () = ignore (region) in + s_nsepseq sequence + |>map s_type_expr + |> mapi (fun i p -> name_and_region_of_int i, p) + |> named_list_to_map + |> (fun x -> (Record x : O.type_expr_case)) + |> type_expr region + +and s_sum_type {value=sequence; region} : O.type_expr = + let () = ignore (region) in + type_expr region (Sum (map s_variant (s_nsepseq sequence) |> named_list_to_map)) + +and s_variant {value=(constr, kwd_of, cartesian); region} = + let () = ignore (kwd_of,region) in + (s_name constr, s_cartesian cartesian) + +and s_record_type {value=(kwd_record, field_decls, kwd_end); region} : O.type_expr = + let () = ignore (kwd_record,region,kwd_end) in + type_expr region (Record (map s_field_decl (s_nsepseq field_decls) |> named_list_to_map) : O.type_expr_case) + +and s_field_decl {value=(var, colon, type_expr); region} : O.type_name * O.type_expr = + let () = ignore (colon,region) in + ((s_name var), (s_type_expr type_expr)) + +and s_type_app {value=(type_name,type_tuple); region} : O.type_expr = + let () = ignore (region) in + type_expr region (TypeApp (s_type_constructor type_name, s_type_tuple type_tuple)) + +and s_type_tuple ({value=(lpar, sequence, rpar); region} : (I.type_name, I.comma) Utils.nsepseq I.par) : O.type_expr list = + let () = ignore (lpar,rpar,region) in + (* TODO: the grammar should allow any type expr, not just type_name in the tuple elements *) + map s_type_expr (map (fun a -> I.TAlias a) (s_nsepseq sequence)) + +and s_par_type {value=(lpar, type_expr, rpar); region} : O.type_expr = + let () = ignore (lpar,rpar,region) in + s_type_expr type_expr + +and s_type_alias name : O.type_expr = + let () = ignore () in + type_expr name.region (TypeApp (s_type_constructor name, [])) + +and s_type_expr (orig : I.type_expr) : O.type_expr = match orig with + Prod cartesian -> s_cartesian cartesian +| Sum sum_type -> s_sum_type sum_type +| Record record_type -> s_record_type record_type +| TypeApp type_app -> s_type_app type_app +| ParType par_type -> s_par_type par_type +| TAlias type_alias -> s_type_alias type_alias + + +let s_type_decl I.{value={kwd_type;name;kwd_is;type_expr;terminator}; region} : O.type_decl = + let () = ignore (kwd_type,kwd_is,terminator,region) in + let ty = s_type_expr type_expr in + O.{ name = s_name name; ty = { ty with name = Some (s_name name) }; orig = `TODO } + +let s_storage_decl I.{value={kwd_storage; name; colon; store_type; terminator}; region} : O.typed_var = + let () = ignore (kwd_storage,colon,terminator,region) in + O.{ name = s_name name; ty = s_type_expr store_type; orig = `TODO } + +let s_operations_decl I.{value={kwd_operations;name;colon;op_type;terminator}; region} : O.typed_var = + let () = ignore (kwd_operations,colon,terminator,region) in + O.{ name = s_name name; ty = s_type_expr op_type; orig = `TODO } + +let s_empty_list {value=(l, (lbracket, rbracket, colon, type_expr), r); region} : O.expr = + let () = ignore (l, lbracket, rbracket, colon, r, region) in + Constant (Null (s_type_expr type_expr)) + +let s_empty_set {value=(l, (lbrace, rbrace, colon, type_expr), r); region} : O.expr = + let () = ignore (l, lbrace, rbrace, colon, r, region) in + Constant (EmptySet (s_type_expr type_expr)) + +let s_none {value=(l, (c_None, colon, type_expr), r); region} : O.expr = + let () = ignore (l, c_None, colon, r, region) in + Constant (CNone (s_type_expr type_expr)) + +let parameters_to_tuple (parameters : (string * O.type_expr) list) : O.type_expr = + (* TODO: use records with named fields to have named arguments. *) + let parameter_tuple : O.type_expr_case = + Record (mapi (fun i (_name,ty) -> name_and_region_of_int i, ty) parameters |> named_list_to_map) in + O.{ type_expr = parameter_tuple; name = None; orig = Region.ghost } + +and parameters_to_decls singleparam (parameters : (string * O.type_expr) list) : O.decl list = + let f i (name,ty) = + O.{ name = {name; orig=Region.ghost}; + ty = ty; + value = App { operator = O.GetField (name_and_region_of_int i); + arguments = [Var singleparam] } } + in mapi f parameters + +let rec bin l operator r = O.App { operator; arguments = [s_expr l; s_expr r] } +and una operator v = O.App { operator; arguments = [s_expr v] } +and s_expr : I.expr -> O.expr = + function + Or {value=(l, bool_or, r); region} -> let () = ignore (region, bool_or) in bin l Or r + | And {value=(l, bool_and, r); region} -> let () = ignore (region,bool_and) in bin l And r + | Lt {value=(l, lt, r); region} -> let () = ignore (region, lt) in bin l Lt r + | Leq {value=(l, leq, r); region} -> let () = ignore (region, leq) in bin l Leq r + | Gt {value=(l, gt, r); region} -> let () = ignore (region, gt) in bin l Gt r + | Geq {value=(l, geq, r); region} -> let () = ignore (region, geq) in bin l Geq r + | Equal {value=(l, equal, r); region} -> let () = ignore (region, equal) in bin l Equal r + | Neq {value=(l, neq, r); region} -> let () = ignore (region, neq) in bin l Neq r + | Cat {value=(l, cat, r); region} -> let () = ignore (region, cat) in bin l Cat r + | Cons {value=(l, cons, r); region} -> let () = ignore (region, cons) in bin l Cons r + | Add {value=(l, plus, r); region} -> let () = ignore (region, plus) in bin l Add r + | Sub {value=(l, minus, r); region} -> let () = ignore (region, minus) in bin l Sub r + | Mult {value=(l, times, r); region} -> let () = ignore (region, times) in bin l Mult r + | Div {value=(l, slash, r); region} -> let () = ignore (region, slash) in bin l Div r + | Mod {value=(l, kwd_mod, r); region} -> let () = ignore (region, kwd_mod) in bin l Mod r + | Neg {value=(minus, expr); region} -> let () = ignore (region, minus) in una Neg expr + | Not {value=(kwd_not, expr); region} -> let () = ignore (region, kwd_not) in una Not expr + | Int {value=(lexeme, z); region} -> let () = ignore (region, lexeme) in Constant (Int z) + | Var lexeme -> Var (s_name lexeme) + | String {value=s; region} -> let () = ignore (region) in Constant (String s) + | Bytes {value=(lexeme, mbytes); region} -> let () = ignore (region, lexeme) in Constant (Bytes mbytes) + | False c_False -> let () = ignore (c_False) in Constant (False) + | True c_True -> let () = ignore (c_True) in Constant (True) + | Unit c_Unit -> let () = ignore (c_Unit) in Constant (Unit) + | Tuple {value=(l,tuple,r); region} -> let () = ignore (l,r,region) in s_tuple_expr (tuple |> s_nsepseq |> map s_expr) + | List list -> s_list list + | EmptyList empty_list -> s_empty_list empty_list + | Set set -> s_set set + | EmptySet empty_set -> s_empty_set empty_set + | NoneExpr none_expr -> s_none none_expr + | FunCall fun_call -> s_fun_call fun_call + | ConstrApp constr_app -> s_constr_app constr_app + | SomeApp some_app -> s_some_app some_app + | MapLookUp map_lookup -> s_map_lookup map_lookup + | ParExpr {value=(lpar,expr,rpar); region} -> let () = ignore (lpar,rpar,region) in s_expr expr + +and s_tuple_expr tuple : O.expr = + Record (mapi (fun i e -> name_and_region_of_int i, e) tuple) + +and s_map_lookup I.{value = {map_name; selector; index}; region} : O.expr = + let {value = lbracket, index_expr, rbracket; region=region2} = index in + let () = ignore (selector, lbracket, rbracket, region2, region) in + App { operator = MapLookup; arguments = [Var (s_name map_name); s_expr index_expr] } + +and s_some_app {value=(c_Some, {value=(l,arguments,r); region=region2}); region} : O.expr = + let () = ignore (c_Some,l,r,region2,region) in + match s_nsepseq arguments with + [] -> failwith "tuple cannot be empty" + | [a] -> s_expr a + | l -> s_tuple_expr (map s_expr l) + +and s_list {value=(l, list, r); region} : O.expr = + let () = ignore (l, r, region) in + App { operator = List; arguments = map s_expr (s_nsepseq list) } + +and s_set {value=(l, set, r); region} : O.expr = + let () = ignore (l, r, region) in + App { operator = Set; arguments = map s_expr (s_nsepseq set) } + +and s_pattern {value=sequence; region} : O.pattern = + let () = ignore (region) in + s_pattern_conses (s_nsepseq sequence) + +and s_pattern_conses : I.core_pattern list -> O.pattern = function + [] -> assert false + | [p] -> s_core_pattern p + | hd :: tl -> PCons (s_core_pattern hd, s_pattern_conses tl) + +and s_case ({value=(pattern, arrow, instruction); region} : I.case) : O.pattern * O.instr list = + let () = ignore (arrow,region) in + s_pattern pattern, s_instruction instruction + +and s_core_pattern : I.core_pattern -> O.pattern = function + PVar var -> PVar (s_name var) +| PWild wild -> let () = ignore (wild) in PWild +| PInt {value=(si,i);region} -> let () = ignore (si,region) in PInt i +| PBytes {value=(sb,b);region} -> let () = ignore (sb,region) in PBytes b +| PString {value=s;region} -> let () = ignore (region) in PString s +| PUnit region -> let () = ignore (region) in PUnit +| PFalse region -> let () = ignore (region) in PFalse +| PTrue region -> let () = ignore (region) in PTrue +| PNone region -> let () = ignore (region) in PNone +| PSome psome -> s_psome psome +| PList pattern -> s_list_pattern pattern +| PTuple ptuple -> s_ptuple ptuple + +and s_list_pattern = function + Sugar sugar -> s_sugar sugar +| Raw raw -> s_raw raw + +and s_sugar {value=(lbracket, sequence, rbracket); region} : O.pattern = + let () = ignore (lbracket, rbracket, region) in + List.fold_left (fun acc p -> O.PCons (s_core_pattern p, acc)) + O.PNull + (s_sepseq sequence); + +and s_raw {value=(lpar, (core_pattern, cons, pattern), rpar); region} = + let () = ignore (lpar, cons, rpar, region) in + O.PCons (s_core_pattern core_pattern, s_pattern pattern) + +and s_ptuple {value=(lpar, sequence, rpar); region} = + let () = ignore (lpar, rpar, region) in + s_nsepseq sequence + |> map s_core_pattern + |> mapi (fun i p -> name_and_region_of_int i, p) + |> fun x -> O.PRecord (x |> named_list_to_map) + +and s_psome {value=(c_Some,{value=(l,psome,r);region=region2});region} : O.pattern = + let () = ignore (c_Some,l,r,region2,region) in + PSome (s_core_pattern psome) + +and s_const_decl I.{value={kwd_const;name;colon;const_type;equal;init;terminator}; region} : O.decl = + let () = ignore (kwd_const,colon,equal,terminator,region) in + O.{ name = s_name name; ty = s_type_expr const_type; value = s_expr init } + +and s_param_const {value=(kwd_const,variable,colon,type_expr); region} : string * O.type_expr = + let () = ignore (kwd_const,colon,region) in + name_to_string variable, s_type_expr type_expr + +and s_param_var {value=(kwd_var,variable,colon,type_expr); region} : string * O.type_expr = + let () = ignore (kwd_var,colon,region) in + name_to_string variable, s_type_expr type_expr + +and s_param_decl : I.param_decl -> string * O.type_expr = function + ParamConst p -> s_param_const p + | ParamVar p -> s_param_var p + +and s_parameters ({value=(lpar,param_decl,rpar);region} : I.parameters) : (string * O.type_expr) list = + let () = ignore (lpar,rpar,region) in + let l = (s_nsepseq param_decl) in + map s_param_decl l + +and s_var_decl I.{value={kwd_var;name;colon;var_type;ass;init;terminator}; region} : O.decl = + let () = ignore (kwd_var,colon,ass,terminator,region) in + O.{ + name = s_name name; + ty = s_type_expr var_type; + value = s_expr init + } + +and s_local_decl : I.local_decl -> O.decl = function + LocalLam decl -> s_lambda_decl decl +| LocalConst decl -> s_const_decl decl +| LocalVar decl -> s_var_decl decl + +and s_instructions ({value=sequence; region} : I.instructions) : O.instr list = + let () = ignore (region) in + append_map s_instruction (s_nsepseq sequence) + +and s_instruction : I.instruction -> O.instr list = function + Single instr -> s_single_instr instr +| Block block -> (s_block block) + +and s_conditional I.{kwd_if;test;kwd_then;ifso;kwd_else;ifnot} : O.instr = + let () = ignore (kwd_if,kwd_then,kwd_else) in + let test = s_expr test in + let ifso = O.PTrue, s_instruction ifso in + let ifnot = O.PFalse, s_instruction ifnot in + Match { + expr = test; + cases = [ifso; ifnot]; + orig = `TODO + } + +and s_match_instr I.{kwd_match;expr;kwd_with;lead_vbar;cases;kwd_end} : O.instr = + let {value=cases;region} = cases in + let () = ignore (kwd_match,kwd_with,lead_vbar,kwd_end,region) in + Match { expr = s_expr expr; cases = map s_case (s_nsepseq cases); orig = `TODO } + +and s_ass_instr {value=(variable,ass,expr); region} : O.instr = + let () = ignore (ass,region) in + Assignment { name = s_name variable; value = s_expr expr; orig = `TODO } + +and s_while_loop {value=(kwd_while, expr, block); region} : O.instr list = + let () = ignore (kwd_while,region) in + [While {condition = s_expr expr; body = s_block block; orig = `TODO}] + +and s_for_loop : I.for_loop -> O.instr list = function + ForInt for_int -> s_for_int for_int +| ForCollect for_collect -> s_for_collect for_collect + +and s_for_int ({value={kwd_for;ass;down;kwd_to;bound;step;block}; region} : I.for_int reg) : O.instr list = + let {value=(variable,ass_kwd,expr);region = ass_region} = ass in + let () = ignore (kwd_for,ass_region,ass_kwd,kwd_to,region) in + let name = s_name variable in + let condition, operator = match down with Some kwd_down -> ignore kwd_down; O.Gt, O.Sub + | None -> O.Lt, O.Add in + let step = s_step step + in [ + Assignment { name; value = s_expr expr; orig = `TODO }; + (* TODO: lift the declaration of the variable, to avoid creating a nested scope here. *) + While { + condition = App { operator = condition; + arguments = [Var name; s_expr bound]}; + body = append (s_block block) + [O.Assignment { name; + value = App { operator; + arguments = [Var name; step]}; + orig = `TODO }]; + orig = `TODO + } + ] + +and s_for_collect ({value={kwd_for;var;bind_to;kwd_in;expr;block}; _} : I.for_collect reg) : O.instr list = + let () = ignore (kwd_for,kwd_in) in + let for_instr = + match s_bind_to bind_to with + Some _ -> + failwith "TODO: For on maps is not supported yet!" + | None -> + O.ForCollection { + list = s_expr expr; + var = s_name var; + body = s_block block; + orig = `TODO + } + in [for_instr] + +and s_step : (I.kwd_step * I.expr) option -> O.expr = function + Some (kwd_step, expr) -> let () = ignore (kwd_step) in s_expr expr +| None -> Constant (Int (Z.of_int 1)) + +and s_bind_to : (I.arrow * I.variable) option -> O.var_name option = function + Some (arrow, variable) -> let () = ignore (arrow) in Some (s_name variable) + | None -> None + +and s_loop : I.loop -> O.instr list = function + While while_loop -> s_while_loop while_loop + | For for_loop -> s_for_loop for_loop + +and s_fun_call {value=(fun_name, arguments); region} : O.expr = + let () = ignore (region) in + let {value=fun_name_string;_} = fun_name in + let firstchar = String.sub fun_name_string 0 1 in + (* If it starts with a capital letter, then it is a constructor *) + if String.equal firstchar (String.uppercase_ascii firstchar) then + App { operator = Constructor (s_name fun_name); arguments = s_arguments arguments } + else + App { operator = Function (s_name fun_name); arguments = s_arguments arguments } + +and s_constr_app {value=(constr, arguments); region} : O.expr = + let () = ignore (region) in + App { operator = Function (s_name constr); arguments = s_arguments arguments } + +and s_arguments {value=(lpar, sequence, rpar); region} : O.expr list = + (* TODO: should return a tuple *) + let () = ignore (lpar,rpar,region) in + match map s_expr (s_nsepseq sequence) with + [] -> [Constant Unit] + | [single_argument] -> [single_argument] + | args -> [s_tuple_expr args] ; + +and s_fail ((kwd_fail, expr) : (I.kwd_fail * I.expr)) : O.instr = + let () = ignore (kwd_fail) in + Fail { expr = s_expr expr; orig = `TODO } + + + + +and s_single_instr : I.single_instr -> O.instr list = function + Cond {value; _} -> [s_conditional value] +| Match {value; _} -> [s_match_instr value] +| Ass instr -> [s_ass_instr instr] +| Loop loop -> s_loop loop +| ProcCall fun_call -> [ProcedureCall { expr = s_fun_call fun_call; orig = `TODO }] +| Null kwd_null -> let () = ignore (kwd_null) in + [] +| Fail {value; _} -> [s_fail value] + +and s_block I.{value={opening;instr;terminator;close}; _} : O.instr list = + let () = ignore (opening,terminator,close) in + s_instructions instr + +and gensym = + let i = ref 0 in + fun ty -> + i := !i + 1; + (* TODO: Region.ghost *) + ({name = {name=(string_of_int !i) ^ "gensym"; orig = Region.ghost}; ty; orig = `TODO} : O.typed_var) + +and s_fun_decl I.{value={kwd_function;name;param;colon;ret_type;kwd_is;local_decls;block;kwd_with;return;terminator}; region} : O.decl = + let () = ignore (kwd_function,colon,kwd_is,kwd_with,terminator,region) in + let tuple_type = s_parameters param |> parameters_to_tuple in + let single_argument = gensym tuple_type in + let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in + O.{ + name = s_name name; + ty = type_expr region (Function { arg = tuple_type; + ret = s_type_expr ret_type }); + value = Lambda { + parameter = single_argument; + declarations = append + (s_parameters param |> parameters_to_decls single_argument_xxx) + (map s_local_decl local_decls); + instructions = s_block block; + result = s_expr return + } + } + +and s_proc_decl I.{value={kwd_procedure;name;param;kwd_is;local_decls;block;terminator}; region} = + let () = ignore (kwd_procedure,kwd_is,terminator,region) in + let tuple_type = s_parameters param |> parameters_to_tuple in + let single_argument = gensym tuple_type in + let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in + O.{ + name = s_name name; + ty = type_expr region (Function { arg = tuple_type; + ret = type_expr region Unit }); + value = Lambda { + parameter = single_argument; + declarations = append + (s_parameters param |> parameters_to_decls single_argument_xxx) + (map s_local_decl local_decls); + instructions = s_block block; + result = O.Constant O.Unit + } + } + +and s_entry_decl I.{value={kwd_entrypoint;name;param;kwd_is;local_decls;block;terminator}; region} = + let () = ignore (kwd_entrypoint,kwd_is,terminator,region) in + let tuple_type = s_parameters param |> parameters_to_tuple in + let single_argument = gensym tuple_type in + let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in + O.{ + name = s_name name; + ty = type_expr region (Function { arg = tuple_type; + ret = type_expr region Unit }); + value = Lambda { + parameter = single_argument; + declarations = append + (s_parameters param |> parameters_to_decls single_argument_xxx) + (map s_local_decl local_decls); + instructions = s_block block; + result = O.Constant O.Unit + } + } + +and s_lambda_decl : I.lambda_decl -> O.decl = function + FunDecl fun_decl -> s_fun_decl fun_decl +| EntryDecl entry_decl -> s_entry_decl entry_decl +| ProcDecl proc_decl -> s_proc_decl proc_decl + +type tmp_ast = { + types : O.type_decl list; + storage_decl : O.typed_var option; + operations_decl : O.typed_var option; + declarations : O.decl list; + } + + +let s_declaration (ast : tmp_ast) : I.declaration -> tmp_ast = function + TypeDecl t -> { ast with types = (s_type_decl t) :: ast.types } + | ConstDecl c -> { ast with declarations = (s_const_decl c) :: ast.declarations } + | StorageDecl s -> { ast with storage_decl = Some (s_storage_decl s) } + | OpDecl o -> { ast with operations_decl = Some (s_operations_decl o) } + | LambdaDecl l -> { ast with declarations = (s_lambda_decl l) :: ast.declarations } + +let s_ast (ast : I.ast) : O.ast = + let I.{decl=(decl1,decls);eof} = ast in + let () = ignore (eof) in + let {types; storage_decl; operations_decl; declarations} = + List.fold_left s_declaration + { types = []; + storage_decl = None; + operations_decl = None; + declarations = [] } + ( decl1 :: decls ) in + let storage_decl = match storage_decl with + Some x -> x + | None -> failwith "Missing storage declaration" in + let () = match operations_decl with + Some _ -> failwith "Operations declaration is not allowed anymore TODO" + | None -> () + in {types; storage_decl; declarations; orig = ast} + + + + +(* let s_token region lexeme = *) +(* printf "%s: %s\n"(compact region) lexeme *) + +(* and s_var {region; value=lexeme} = *) +(* printf "%s: Ident \"%s\"\n" (compact region) lexeme *) + +(* and s_constr {region; value=lexeme} = *) +(* printf "%s: Constr \"%s\"\n" *) +(* (compact region) lexeme *) + +(* and s_string {region; value=lexeme} = *) +(* printf "%s: String \"%s\"\n" *) +(* (compact region) lexeme *) + +(* and s_bytes {region; value = lexeme, abstract} = *) +(* printf "%s: Bytes (\"%s\", \"0x%s\")\n" *) +(* (compact region) lexeme *) +(* (MBytes.to_hex abstract |> Hex.to_string) *) + +(* and s_int {region; value = lexeme, abstract} = *) +(* printf "%s: Int (\"%s\", %s)\n" *) +(* (compact region) lexeme *) +(* (Z.to_string abstract) *) + + +(* and s_parameters {value=node; _} = *) +(* let lpar, sequence, rpar = node in *) +(* s_token lpar "("; *) +(* s_nsepseq ";" s_param_decl sequence; *) +(* s_token rpar ")" *) + +(* and s_param_decl = function *) +(* ParamConst param_const -> s_param_const param_const *) +(* | ParamVar param_var -> s_param_var param_var *) + +(* and s_region_cases {value=sequence; _} = *) +(* s_nsepseq "|" s_case sequence *) + +(* and s_expr = function *) +(* Or {value = expr1, bool_or, expr2; _} -> *) +(* s_expr expr1; s_token bool_or "||"; s_expr expr2 *) +(* | And {value = expr1, bool_and, expr2; _} -> *) +(* s_expr expr1; s_token bool_and "&&"; s_expr expr2 *) +(* | Lt {value = expr1, lt, expr2; _} -> *) +(* s_expr expr1; s_token lt "<"; s_expr expr2 *) +(* | Leq {value = expr1, leq, expr2; _} -> *) +(* s_expr expr1; s_token leq "<="; s_expr expr2 *) +(* | Gt {value = expr1, gt, expr2; _} -> *) +(* s_expr expr1; s_token gt ">"; s_expr expr2 *) +(* | Geq {value = expr1, geq, expr2; _} -> *) +(* s_expr expr1; s_token geq ">="; s_expr expr2 *) +(* | Equal {value = expr1, equal, expr2; _} -> *) +(* s_expr expr1; s_token equal "="; s_expr expr2 *) +(* | Neq {value = expr1, neq, expr2; _} -> *) +(* s_expr expr1; s_token neq "=/="; s_expr expr2 *) +(* | Cat {value = expr1, cat, expr2; _} -> *) +(* s_expr expr1; s_token cat "^"; s_expr expr2 *) +(* | Cons {value = expr1, cons, expr2; _} -> *) +(* s_expr expr1; s_token cons "<:"; s_expr expr2 *) +(* | Add {value = expr1, add, expr2; _} -> *) +(* s_expr expr1; s_token add "+"; s_expr expr2 *) +(* | Sub {value = expr1, sub, expr2; _} -> *) +(* s_expr expr1; s_token sub "-"; s_expr expr2 *) +(* | Mult {value = expr1, mult, expr2; _} -> *) +(* s_expr expr1; s_token mult "*"; s_expr expr2 *) +(* | Div {value = expr1, div, expr2; _} -> *) +(* s_expr expr1; s_token div "/"; s_expr expr2 *) +(* | Mod {value = expr1, kwd_mod, expr2; _} -> *) +(* s_expr expr1; s_token kwd_mod "mod"; s_expr expr2 *) +(* | Neg {value = minus, expr; _} -> *) +(* s_token minus "-"; s_expr expr *) +(* | Not {value = kwd_not, expr; _} -> *) +(* s_token kwd_not "not"; s_expr expr *) +(* | Int i -> s_int i *) +(* | Var var -> s_var var *) +(* | String s -> s_string s *) +(* | Bytes b -> s_bytes b *) +(* | False region -> s_token region "False" *) +(* | True region -> s_token region "True" *) +(* | Unit region -> s_token region "Unit" *) +(* | Tuple tuple -> s_tuple tuple *) +(* | List list -> s_list list *) +(* | EmptyList elist -> s_empty_list elist *) +(* | Set set -> s_set set *) +(* | EmptySet eset -> s_empty_set eset *) +(* | NoneExpr nexpr -> s_none_expr nexpr *) +(* | FunCall fun_call -> s_fun_call fun_call *) +(* | ConstrApp capp -> s_constr_app capp *) +(* | SomeApp sapp -> s_some_app sapp *) +(* | MapLookUp lookup -> s_map_lookup lookup *) +(* | ParExpr pexpr -> s_par_expr pexpr *) + +(* and s_list {value=node; _} = *) +(* let lbra, sequence, rbra = node in *) +(* s_token lbra "["; *) +(* s_nsepseq "," s_expr sequence; *) +(* s_token rbra "]" *) + +(* and s_empty_list {value=node; _} = *) +(* let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token lbracket "["; *) +(* s_token rbracket "]"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_set {value=node; _} = *) +(* let lbrace, sequence, rbrace = node in *) +(* s_token lbrace "{"; *) +(* s_nsepseq "," s_expr sequence; *) +(* s_token rbrace "}" *) + +(* and s_empty_set {value=node; _} = *) +(* let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token lbrace "{"; *) +(* s_token rbrace "}"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_none_expr {value=node; _} = *) +(* let lpar, (c_None, colon, type_expr), rpar = node in *) +(* s_token lpar "("; *) +(* s_token c_None "None"; *) +(* s_token colon ":"; *) +(* s_type_expr type_expr; *) +(* s_token rpar ")" *) + +(* and s_constr_app {value=node; _} = *) +(* let constr, arguments = node in *) +(* s_constr constr; *) +(* s_tuple arguments *) + +(* and s_some_app {value=node; _} = *) +(* let c_Some, arguments = node in *) +(* s_token c_Some "Some"; *) +(* s_tuple arguments *) + + +(* and s_par_expr {value=node; _} = *) +(* let lpar, expr, rpar = node in *) +(* s_token lpar "("; *) +(* s_expr expr; *) +(* s_token rpar ")" *) + +(* and s_psome {value=node; _} = *) +(* let c_Some, patterns = node in *) +(* s_token c_Some "Some"; *) +(* s_patterns patterns *) + + +(* and s_terminator = function *) +(* Some semi -> s_token semi ";" *) +(* | None -> () *) diff --git a/ParserMain.ml b/ParserMain.ml index f6282c3d4..a2a54ec6b 100644 --- a/ParserMain.ml +++ b/ParserMain.ml @@ -98,3 +98,17 @@ let () = let () = close_all () in print_error ~offsets EvalOpt.mode error | Sys_error msg -> Utils.highlight msg + +(* Temporary: force dune to build AST2.ml *) +let () = + let open AST2 in + let _ = s_ast in + () + +(* Temporary: force dune to build AST2.ml *) +let () = + if false then + let _ = Typecheck2.annotate in + () + else + () diff --git a/Tests/Crowdfunding.li b/Tests/Crowdfunding.li new file mode 100644 index 000000000..c07ad8c8b --- /dev/null +++ b/Tests/Crowdfunding.li @@ -0,0 +1,68 @@ +type state = + record + goal : nat; + deadline : timestamp; + backers : map (address, nat); + funded : bool + end + +entrypoint donate (storage store : state; + const sender : address; + const amount : mutez) + : storage * list (operation) is + var operations : list (operation) := [] + begin + if now > store.deadline then + fail "Deadline passed" + else + if store.backers.[sender] = None then + store := + copy store with + record + backers = map_add store.backers (sender, amount) + end + else null + end with (store, operations) + +entrypoint get_funds (storage store : state; const sender : address) + : storage * list (operation) is + var operations : list (operation) := [] + begin + if sender = owner then + if now >= store.deadline then + if balance >= store.goal then + begin + store := copy store with record funded = true end; + operations := [Transfer (owner, balance)] + end + else fail "Below target" + else fail "Too soon" + else null + end with (store, operations) + +entrypoint claim (storage store : state; const sender : address) + : storage * list (operation) is + var operations : list (operation) := []; + var amount : mutez := 0 + begin + if now <= store.deadline then + fail "Too soon" + else + match store.backers.[sender] with + None -> + fail "Not a backer" + | Some amount -> + if balance >= store.goal || store.funded then + fail "Cannot refund" + else + begin + amount := store.backers.[sender]; + store := + copy store with + record + backers = map_remove store.backers sender + end; + operations := [Transfer (sender, amount)] + end + end + end with (store, operations) diff --git a/Typecheck2.ml b/Typecheck2.ml new file mode 100644 index 000000000..1bd6fee69 --- /dev/null +++ b/Typecheck2.ml @@ -0,0 +1,275 @@ +[@@@warning "-27"] (* TODO *) +[@@@warning "-32"] (* TODO *) +[@@@warning "-30"] + +module SMap = Map.Make(String) + +module I = AST2.O + +module O = struct + type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *) + + type name_and_region = {name: string; orig: Region.t} + type type_name = name_and_region + type var_name = name_and_region + type field_name = name_and_region + + type pattern = + PVar of var_name + | PWild + | PInt of Z.t + | PBytes of MBytes.t + | PString of string + | PUnit + | PFalse + | PTrue + | PNone + | PSome of pattern + | PCons of pattern * pattern + | PNull + | PRecord of (field_name * pattern) SMap.t + + type type_constructor = + Option + | List + | Set + | Map + + type type_expr_case = + Sum of (type_name * type_expr) SMap.t + | Record of (field_name * type_expr) SMap.t + | TypeApp of type_constructor * (type_expr list) + | Function of { arg: type_expr; ret: type_expr } + | Ref of type_expr + | String + | Bytes + | Int + | Unit + | Bool + + and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t } + + type typed_var = { name:var_name; ty:type_expr; orig: asttodo } + + type type_decl = { name: type_name; ty:type_expr; orig: asttodo } + + type expr_case = + App of { operator: operator; arguments: expr list } + | Var of typed_var + | Constant of constant + | Record of (field_name * expr) list + | Lambda of lambda + + and expr = { expr: expr_case; ty:type_expr; orig: asttodo } + + and decl = { var: typed_var; value: expr; orig: asttodo } + + and lambda = { + parameter: typed_var; + declarations: decl list; + instructions: instr list; + result: expr; + } + + and operator_case = + Function of var_name + | Constructor of var_name + | UpdateField of field_name + | GetField of field_name + | Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod + | Neg | Not + | Set + | MapLookup + + and operator = { operator: operator_case; ty:type_expr; orig: asttodo } + + and constant = + Unit + | Int of Z.t | String of string | Bytes of MBytes.t + | False | True + | Null + | EmptySet + | CNone + + and instr = + Assignment of { name: var_name; value: expr; orig: asttodo } + | While of { condition: expr; body: instr list; orig: asttodo } + | ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo } + | Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo } + | ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *) + | Fail of { expr: expr; orig: asttodo } + + type ast = { + types : type_decl list; + storage_decl : typed_var; + declarations : decl list; + orig : AST.t + } +end + +type te = O.type_expr list SMap.t +type ve = O.type_expr list SMap.t +type tve = te * ve + +let fold_map f a l = + let f (acc, l) elem = + let acc', elem' = f acc elem + in acc', (elem' :: l) in + let last_acc, last_l = List.fold_left f (a, []) l + in last_acc, List.rev last_l + +let map f l = List.rev (List.rev_map f l) + +let shadow (name : string) (typ : O.type_expr) (env : O.type_expr list SMap.t) + : O.type_expr list SMap.t = + SMap.update name (function None -> Some [typ] | Some tl -> Some (typ :: tl)) env + +let lookup (name : string) (env : O.type_expr list SMap.t) : O.type_expr = + match SMap.find name env with + latest :: shadowed -> latest + | [] -> failwith "Unbound variable" + +let string_of_name ({name;_} : I.name_and_region) = name + +let a_name_and_region ({name; orig} : I.name_and_region) : O.name_and_region = + {name; orig} + +let a_type_constructor (tve : tve) : I.type_constructor -> O.type_constructor = function + Option -> Option +| List -> List +| Set -> Set +| Map -> Map + +let a_type_expr_case (tve : tve) : I.type_expr_case -> O.type_expr_case = function + Sum lt -> failwith "TODO" + | Record lt -> failwith "TODO" + | TypeApp (tc, args) -> failwith "TODO" + | Function {arg;ret} -> failwith "TODO" + | Ref t -> failwith "TODO" + | String -> String + | Int -> Int + | Unit -> Unit + | Bool -> Bool + +let a_type_expr (tve : tve) ({type_expr;name;orig} : I.type_expr) : O.type_expr = + let type_expr = a_type_expr_case tve type_expr in + let name = match name with + None -> None + |Some name -> Some (a_name_and_region name) + in {type_expr;name;orig} + +let a_type (te,ve : tve) ({name;ty;orig} : I.type_decl) : tve * O.type_decl = + let ty = a_type_expr (te,ve) ty in + let tve = shadow (string_of_name name) ty te, ve in + let name = (a_name_and_region name) in + tve, {name; ty; orig} + +let a_types (tve : tve) (l : I.type_decl list) : tve * O.type_decl list = + fold_map a_type tve l + +let a_storage_decl : tve -> I.typed_var -> tve * O.typed_var = + failwith "TODO" + +let type_expr_case_equal (t1 : O.type_expr_case) (t2 : O.type_expr_case) : bool = match t1,t2 with + Sum m1, Sum m2 -> failwith "TODO" (* of (O.name_and_region * O.type_expr) SMap.t *) + | Record m1, Record m2 -> failwith "TODO" (* of (O.name_and_region * O.type_expr) SMap.t *) + | TypeApp (tc1, args1), TypeApp (tc2, args2) -> failwith "TODO" (* of O.type_constructor * O.type_expr list *) + | Function {arg=arg1;ret=ret1}, Function {arg=arg2;ret=ret2} -> failwith "TODO" (* of { arg : O.type_expr; ret : O.type_expr; } *) + | Ref t1, Ref t2 -> failwith "TODO" (* of O.type_expr *) + | String, String -> true + | Int, Int -> true + | Unit, Unit -> true + | Bool, Bool -> true + | _ -> false + +let type_expr_equal (t1 : O.type_expr) (t2 : O.type_expr) : bool = + type_expr_case_equal t1.type_expr t2.type_expr + +let check_type_expr_equal (expected : O.type_expr) (actual : O.type_expr) : unit = + if type_expr_equal expected actual then + () + else + failwith "got [actual] but expected [expected]" + +let a_var_expr (te,ve : tve) (expected : O.type_expr) (var_name : I.name_and_region) : O.expr_case = + check_type_expr_equal expected (lookup (string_of_name var_name) ve); + Var { name = a_name_and_region var_name; + ty = expected; + orig = `TODO } + +let a_constant_expr (tve : tve) (expected : O.type_expr) (constant : I.constant) : O.expr_case = + let to_type_expr type_expr_case : O.type_expr = + { type_expr = type_expr_case; name = None; orig = Region.ghost } in + let actual : O.type_expr = match constant with + Unit -> to_type_expr Unit + | Int _ -> to_type_expr Int + | String _ -> to_type_expr String + | Bytes _ -> to_type_expr Bytes + | False -> to_type_expr Bool + | True -> to_type_expr Bool + | Null t -> a_type_expr tve t + | EmptySet t -> a_type_expr tve t + | CNone t -> a_type_expr tve t + in + check_type_expr_equal expected actual; + let c : O.constant = match constant with + Unit -> Unit + | Int i -> Int i + | String s -> String s + | Bytes b -> Bytes b + | False -> False + | True -> True + | Null _ -> Null + | EmptySet _ -> EmptySet + | CNone _ -> CNone + in Constant c + +let map_to_list m = + List.rev (SMap.fold (fun field_name_string p l -> p :: l) m []) + +let a_field tve (expected,expr) = + failwith "TODO" + +let a_record (tve : tve) (expected : O.type_expr) (record : (I.field_name * I.expr) list) + : O.expr_case = + let {type_expr = expected; _} : O.type_expr = expected in + let expected = match expected with + Record fields -> fields + | _ -> failwith "expected some_type but got record" in + let expected_and_field = + List.combine + (map_to_list expected) + record (* TODO SHOULD BE (map_to_list record) *) in + Record (map (a_field tve) expected_and_field) + +let a_expr_case (te,ve : tve) (expected : O.type_expr) : I.expr -> O.expr_case = function + App {operator;arguments} -> failwith "TODO" + | Var var_name -> a_var_expr (te,ve) expected var_name + | Constant constant -> a_constant_expr (te,ve) expected constant + | Record record -> a_record (te,ve) expected record + | Lambda lambda -> failwith "TODO" + +let a_expr (te,ve : tve) (expected : O.type_expr) (e : I.expr) : O.expr = + let expr_case = a_expr_case (te,ve) expected e in + { expr = expr_case; ty = expected; orig = `TODO } + +let a_declaration (te,ve : tve) ({name;ty;value} : I.decl) : tve * O.decl = + let ty = a_type_expr (te,ve) ty in + let value = a_expr (te,ve) ty value in + let ve = shadow (string_of_name name) ty ve in + let name = a_name_and_region name in + (te,ve), {var={name;ty;orig=`TODO};value;orig = `TODO} + +let a_declarations (tve : tve) (l : I.decl list) : tve * O.decl list = + fold_map a_declaration tve l + +let a_ast I.{types; storage_decl; declarations; orig} = + let tve = SMap.empty, SMap.empty in + let tve, types = a_types tve types in + let tve, storage_decl = a_storage_decl tve storage_decl in + let tve, declarations = a_declarations tve declarations in + let _ = tve in + O.{types; storage_decl; declarations; orig} + +let annotate : I.ast -> O.ast = a_ast + diff --git a/Typecheck2.mli b/Typecheck2.mli new file mode 100644 index 000000000..e73f6a875 --- /dev/null +++ b/Typecheck2.mli @@ -0,0 +1,108 @@ +[@@@warning "-30"] + +module SMap : Map.S with type key = string + +module I = AST2.O + +module O : sig + type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *) + + type name_and_region = {name: string; orig: Region.t} + type type_name = name_and_region + type var_name = name_and_region + type field_name = name_and_region + + type pattern = + PVar of var_name + | PWild + | PInt of Z.t + | PBytes of MBytes.t + | PString of string + | PUnit + | PFalse + | PTrue + | PNone + | PSome of pattern + | PCons of pattern * pattern + | PNull + | PRecord of (field_name * pattern) SMap.t + + type type_constructor = + Option + | List + | Set + | Map + + type type_expr_case = + Sum of (type_name * type_expr) SMap.t + | Record of (field_name * type_expr) SMap.t + | TypeApp of type_constructor * (type_expr list) + | Function of { arg: type_expr; ret: type_expr } + | Ref of type_expr + | String + | Bytes + | Int + | Unit + | Bool + + and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t } + + type typed_var = { name:var_name; ty:type_expr; orig: asttodo } + + type type_decl = { name:type_name; ty:type_expr; orig: asttodo } + + type expr_case = + App of { operator: operator; arguments: expr list } + | Var of typed_var + | Constant of constant + | Record of (field_name * expr) list + | Lambda of lambda + + and expr = { expr: expr_case; ty:type_expr; orig: asttodo } + + and decl = { var: typed_var; value: expr; orig: asttodo } + + and lambda = { + parameter: typed_var; + declarations: decl list; + instructions: instr list; + result: expr; + } + + and operator_case = + Function of var_name + | Constructor of var_name + | UpdateField of field_name + | GetField of field_name + | Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod + | Neg | Not + | Set + | MapLookup + + and operator = { operator: operator_case; ty:type_expr; orig: asttodo } + + and constant = + Unit + | Int of Z.t | String of string | Bytes of MBytes.t + | False | True + | Null + | EmptySet + | CNone + + and instr = + Assignment of { name: var_name; value: expr; orig: asttodo } + | While of { condition: expr; body: instr list; orig: asttodo } + | ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo } + | Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo } + | ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *) + | Fail of { expr: expr; orig: asttodo } + + type ast = { + types : type_decl list; + storage_decl : typed_var; + declarations : decl list; + orig : AST.t + } +end + +val annotate : I.ast -> O.ast