ligo/AST2.ml
Christian Rinderknecht 8746802571
Storage and operations are now explicitly named.
Refactoring of AST to enable the detection of incomplete pattern
matchings by the OCaml compiler. Some record fields renamed for
better readability.
2019-03-10 19:41:27 +01:00

617 lines
21 KiB
OCaml

[@@@warning "-30"]
exception TODO of string
open Region
module In = AST
module SMap = Utils.String.Map
module Out =
struct
type type_name = string
type variable = string
type ast = {
types : type_decl list;
storage : typed_var;
operations : typed_var;
declarations : decl list;
prev : In.t;
}
and typed_var = {name: variable; ty: type_expr}
and type_decl = {name: variable; ty: type_expr}
and decl = {name: variable; ty: type_expr; value: expr}
and type_expr =
Prod of type_expr list
| Sum of (type_name * type_expr) list
| Record of (type_name * type_expr) list
| TypeApp of type_name * type_expr list
| Function of {args: type_expr list; ret: type_expr}
| Ref of type_expr
| Unit
| Int
| TODO
and expr =
App of {operator: operator; arguments: expr list}
| Variable of variable
| Constant of constant
| Lambda of lambda
and lambda = {
parameters : type_expr SMap.t;
declarations : decl list;
instructions : instr list;
result : expr
}
and operator = Add | Sub | Lt | Gt | Function of string
and constant =
Unit
| Int of Z.t
and instr =
Assignment of { name: variable; value: expr }
| While of { condition: expr; body: instr list }
| ForCollection of { list: expr; key: variable;
value: variable option;
body: instr list }
| If of { condition: expr; ifso: instr list; ifnot: instr list }
| Match of { expr: expr; cases: (pattern * instr list) list }
| DropUnit of expr (* expr returns unit, drop the result. *)
| Fail of { expr: expr }
| Null
and pattern =
PVar of variable
| PWild
| PInt of Z.t
| PBytes of MBytes.t
| PString of string
| PUnit
| PFalse
| PTrue
| PNone
| PSome of pattern
| Cons of pattern * pattern
| PTuple of pattern list
end
let map f l = List.(rev_map f l |> rev)
(* TODO: check that List.to_seq, SMap.of_seq are not broken
(i.e. check that they are tail-recursive) *)
let append_map f l = map f l |> List.flatten
let append l = List.(rev l |> rev_append)
let list_to_map l = l |> List.to_seq |> SMap.of_seq (* Why lazy ? *)
let fold_map f a l =
let f (acc, l) elem =
let acc', elem' = f acc elem
in acc', (elem' :: l) in
let last_acc, last_l = List.fold_left f (a, []) l
in last_acc, List.rev last_l
(* Simplify the AST *)
let s_nsepseq : ('a,'sep) Utils.nsepseq -> 'a list =
fun (first, rest) -> first :: (map snd rest)
let s_sepseq : ('a,'sep) Utils.sepseq -> 'a list =
function
None -> []
| Some nsepseq -> s_nsepseq nsepseq
let s_name ({value=name; region}: string reg) =
ignore region; name
let rec s_cartesian {value=sequence; region} : Out.type_expr =
let () = ignore region in
Prod (map s_type_expr (s_nsepseq sequence))
and s_sum_type {value=sequence; region} : Out.type_expr =
let () = ignore region in
let _todo = sequence in
(* Sum (map s_type_expr (s_nsepseq sequence)) *)
TODO
and s_record_type {value=(kwd_record, field_decls, kwd_end); region} : Out.type_expr =
let () = ignore (kwd_record,region,kwd_end) in
let _todo = (* s_field_decls *) field_decls in
TODO
and s_type_app {value=node; region} : Out.type_expr =
let () = ignore region in
let _todo = node in
TODO
(* let type_name, type_tuple = node in *)
(* s_var type_name; *)
(* s_type_tuple type_tuple *)
and s_par_type {value=node; region} : Out.type_expr =
let () = ignore region in
let _todo = node in
TODO
and s_var {region; value=lexeme} : Out.type_expr =
let () = ignore region in
let _todo = lexeme in
TODO
(* let lpar, type_expr, rpar = node in
s_token lpar "(";
s_type_expr type_expr;
s_token rpar ")"*)
and s_type_expr : In.type_expr -> Out.type_expr = function
Prod cartesian -> s_cartesian cartesian
| Sum sum_type -> s_sum_type sum_type
| Record record_type -> s_record_type record_type
| TypeApp type_app -> s_type_app type_app
| ParType par_type -> s_par_type par_type
| TAlias type_alias -> s_var type_alias
let s_type_decl In.{value={kwd_type;name;kwd_is;type_expr;terminator}; region} : Out.type_decl =
let () = ignore (kwd_type,kwd_is,terminator,region) in
Out.{ name = s_name name; ty = s_type_expr type_expr }
let s_storage_decl In.{value={kwd_storage; store_type; terminator}; region} : Out.typed_var =
let () = ignore (kwd_storage,terminator,region) in
Out.{ name = "storage"; ty = s_type_expr store_type }
let s_operations_decl In.{value={kwd_operations;op_type;terminator}; region} : Out.typed_var =
let () = ignore (kwd_operations,terminator,region) in
Out.{ name = "operations"; ty = s_type_expr op_type }
let s_expr : In.expr -> Out.expr = function
| _ -> raise (TODO "simplify expressions")
let s_case : In.case -> Out.pattern * (Out.instr list) = function
| _ -> raise (TODO "simplify pattern matching cases")
let s_const_decl In.{value; region} : Out.decl =
let In.{kwd_const; name; colon;
const_type; equal; init; terminator} = value in
let () = ignore (kwd_const,colon,equal,terminator,region) in
Out.{name = s_name name;
ty = s_type_expr const_type;
value = s_expr init}
let s_param_const {value=(kwd_const,variable,colon,type_expr); region} : string * Out.type_expr =
let () = ignore (kwd_const,colon,region) in
s_name variable, s_type_expr type_expr
let s_param_var {value=(kwd_var,variable,colon,type_expr); region} : string * Out.type_expr =
let () = ignore (kwd_var,colon,region) in
s_name variable, s_type_expr type_expr
let s_param_decl : In.param_decl -> string * Out.type_expr = function
ParamConst p -> s_param_const p
| ParamVar p -> s_param_var p
let s_parameters ({value=(lpar,param_decl,rpar);region} : In.parameters) : (string * Out.type_expr) list =
let () = ignore (lpar,rpar,region) in
let l = (s_nsepseq param_decl) in
map s_param_decl l
let rec s_var_decl {value; region} : Out.decl =
let In.{kwd_var; name; colon;
var_type; ass; init; terminator} = value in
let () = ignore (kwd_var, colon, ass, terminator, region) in
Out.{name = s_name name;
ty = s_type_expr var_type;
value = s_expr init}
and s_local_decl : In.local_decl -> Out.decl = function
LocalLam decl -> s_lambda_decl decl
| LocalConst decl -> s_const_decl decl
| LocalVar decl -> s_var_decl decl
and s_instructions ({value=sequence; region} : In.instructions) : Out.instr list =
let () = ignore region in
append_map s_instruction (s_nsepseq sequence)
and s_instruction : In.instruction -> Out.instr list = function
Single instr -> s_single_instr instr
| Block block -> (s_block block)
and s_conditional In.{kwd_if;test;kwd_then;ifso;kwd_else;ifnot} : Out.instr =
let () = ignore (kwd_if,kwd_then,kwd_else) in
If { condition = s_expr test; ifso = s_instruction ifso; ifnot = s_instruction ifnot }
and s_match_instr In.{kwd_match;expr;kwd_with;lead_vbar;cases;kwd_end} : Out.instr =
let {value=cases;region} = cases in
let () = ignore (kwd_match,kwd_with,lead_vbar,kwd_end,region) in
Match { expr = s_expr expr; cases = map s_case (s_nsepseq cases) }
and s_ass_instr {value=(variable,ass,expr); region} : Out.instr =
let () = ignore (ass,region) in
Assignment { name = s_name variable; value = s_expr expr }
and s_while_loop {value=(kwd_while, expr, block); region} : Out.instr list =
let () = ignore (kwd_while,region) in
[While {condition = s_expr expr; body = s_block block}]
and s_for_loop : In.for_loop -> Out.instr list = function
ForInt for_int -> s_for_int for_int
| ForCollect for_collect -> s_for_collect for_collect
and s_for_int ({value={kwd_for;ass;down;kwd_to;bound;step;block}; region} : In.for_int reg) : Out.instr list =
let {value=(variable,ass_kwd,expr);region = ass_region} = ass in
let () = ignore (kwd_for,ass_region,ass_kwd,kwd_to,region) in
let name = s_name variable in
let condition, operator = match down with Some kwd_down -> ignore kwd_down; Out.Gt, Out.Sub
| None -> Out.Lt, Out.Add in
let step = s_step step
in [
Assignment { name; value = s_expr expr };
(* TODO: lift the declaration of the variable *)
While {
condition = App { operator = condition;
arguments = [Variable name; s_expr bound] };
body = List.append (s_block block)
[Out.Assignment { name;
value = App { operator;
arguments = [Variable name; step]}}]
}
]
and s_for_collect ({value={kwd_for;var;bind_to;kwd_in;expr;block}; _} : In.for_collect reg) : Out.instr list =
let () = ignore (kwd_for,kwd_in) in
[
Out.ForCollection {
list = s_expr expr;
key = s_name var;
value = s_bind_to bind_to;
body = s_block block
}
]
and s_step : (In.kwd_step * In.expr) option -> Out.expr = function
Some (kwd_step, expr) -> let () = ignore (kwd_step) in s_expr expr
| None -> Constant (Int Z.one)
and s_bind_to : (In.arrow * In.variable) option -> Out.variable option = function
Some (arrow, variable) ->
let () = ignore arrow in Some (s_name variable)
| None -> None
and s_loop : In.loop -> Out.instr list = function
While while_loop -> s_while_loop while_loop
| For for_loop -> s_for_loop for_loop
and s_fun_call {value=(fun_name, arguments); region} : Out.expr =
let () = ignore region in
App { operator = Function (s_name fun_name); arguments = s_arguments arguments }
and s_arguments {value=(lpar, sequence, rpar); region} =
let () = ignore (lpar, rpar, region) in
map s_expr (s_nsepseq sequence);
and s_fail ((kwd_fail, expr) : (In.kwd_fail * In.expr)) : Out.instr =
ignore kwd_fail; Fail {expr = s_expr expr}
and s_single_instr : In.single_instr -> Out.instr list = function
Cond {value; _} -> [s_conditional value]
| Match {value; _} -> [s_match_instr value]
| Ass instr -> [s_ass_instr instr]
| Loop loop -> s_loop loop
| ProcCall fun_call -> [DropUnit (s_fun_call fun_call)]
| Null kwd_null -> let () = ignore (kwd_null) in
[]
| Fail {value; _} -> [s_fail value]
and s_block In.{value={opening;instr;terminator;close}; _} : Out.instr list =
let () = ignore (opening,terminator,close) in
s_instructions instr
and s_fun_decl In.{value={kwd_function;name;param;colon;ret_type;kwd_is;local_decls;block;kwd_with;return;terminator}; region} : Out.decl =
let () = ignore (kwd_function,colon,kwd_is,kwd_with,terminator,region) in
Out.{
name = s_name name;
ty = Function { args = map snd (s_parameters param); ret = s_type_expr ret_type };
value = Lambda {
parameters = s_parameters param |> list_to_map;
declarations = map s_local_decl local_decls;
instructions = s_block block;
result = s_expr return
}
}
and s_proc_decl In.{value={kwd_procedure;name;param;kwd_is;local_decls;block;terminator}; region} =
let () = ignore (kwd_procedure,kwd_is,terminator,region) in
Out.{
name = s_name name;
ty = Function { args = map snd (s_parameters param); ret = Unit };
value = Lambda {
parameters = s_parameters param |> list_to_map;
declarations = map s_local_decl local_decls;
instructions = s_block block;
result = Out.Constant Out.Unit
}
}
and s_lambda_decl : In.lambda_decl -> Out.decl = function
FunDecl fun_decl -> s_fun_decl fun_decl
| ProcDecl proc_decl -> s_proc_decl proc_decl
| EntryDecl entry_decl -> failwith "TODO"
let s_main_block (block: In.block reg) : Out.decl =
Out.{
name = "main";
ty = Function { args = []; ret = Unit };
value = Lambda {
parameters = SMap.empty;
declarations = [];
instructions = s_block block;
result = Out.Constant Out.Unit
}
}
let s_ast (ast : In.ast) : Out.ast =
let In.{types;constants;storage;operations;lambdas;block;eof} = ast in
let () = ignore (eof) in
Out.{
types = map s_type_decl types;
storage = s_storage_decl storage;
operations = s_operations_decl operations;
declarations = List.flatten [(map s_const_decl constants);
(map s_lambda_decl lambdas);
[s_main_block block]];
prev = ast
}
(* let s_token region lexeme = *)
(* printf "%s: %s\n"(compact region) lexeme *)
(* and s_var {region; value=lexeme} = *)
(* printf "%s: Ident \"%s\"\n" (compact region) lexeme *)
(* and s_constr {region; value=lexeme} = *)
(* printf "%s: Constr \"%s\"\n" *)
(* (compact region) lexeme *)
(* and s_string {region; value=lexeme} = *)
(* printf "%s: String \"%s\"\n" *)
(* (compact region) lexeme *)
(* and s_bytes {region; value = lexeme, abstract} = *)
(* printf "%s: Bytes (\"%s\", \"0x%s\")\n" *)
(* (compact region) lexeme *)
(* (MBytes.to_hex abstract |> Hex.to_string) *)
(* and s_int {region; value = lexeme, abstract} = *)
(* printf "%s: Int (\"%s\", %s)\n" *)
(* (compact region) lexeme *)
(* (Z.to_string abstract) *)
(* and s_cartesian {value=sequence; _} = *)
(* s_nsepseq "*" s_type_expr sequence *)
(* and s_variant {value=node; _} = *)
(* let constr, kwd_of, cartesian = node in *)
(* s_constr constr; *)
(* s_token kwd_of "of"; *)
(* s_cartesian cartesian *)
(* and s_field_decls sequence = *)
(* s_nsepseq ";" s_field_decl sequence *)
(* and s_field_decl {value=node; _} = *)
(* let var, colon, type_expr = node in *)
(* s_var var; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr *)
(* and s_type_tuple {value=node; _} = *)
(* let lpar, sequence, rpar = node in *)
(* s_token lpar "("; *)
(* s_nsepseq "," s_var sequence; *)
(* s_token rpar ")" *)
(* and s_parameters {value=node; _} = *)
(* let lpar, sequence, rpar = node in *)
(* s_token lpar "("; *)
(* s_nsepseq ";" s_param_decl sequence; *)
(* s_token rpar ")" *)
(* and s_param_decl = function *)
(* ParamConst param_const -> s_param_const param_const *)
(* | ParamVar param_var -> s_param_var param_var *)
(* and s_region_cases {value=sequence; _} = *)
(* s_nsepseq "|" s_case sequence *)
(* and s_case {value=node; _} = *)
(* let pattern, arrow, instruction = node in *)
(* s_pattern pattern; *)
(* s_token arrow "->"; *)
(* s_instruction instruction *)
(* and s_expr = function *)
(* Or {value = expr1, bool_or, expr2; _} -> *)
(* s_expr expr1; s_token bool_or "||"; s_expr expr2 *)
(* | And {value = expr1, bool_and, expr2; _} -> *)
(* s_expr expr1; s_token bool_and "&&"; s_expr expr2 *)
(* | Lt {value = expr1, lt, expr2; _} -> *)
(* s_expr expr1; s_token lt "<"; s_expr expr2 *)
(* | Leq {value = expr1, leq, expr2; _} -> *)
(* s_expr expr1; s_token leq "<="; s_expr expr2 *)
(* | Gt {value = expr1, gt, expr2; _} -> *)
(* s_expr expr1; s_token gt ">"; s_expr expr2 *)
(* | Geq {value = expr1, geq, expr2; _} -> *)
(* s_expr expr1; s_token geq ">="; s_expr expr2 *)
(* | Equal {value = expr1, equal, expr2; _} -> *)
(* s_expr expr1; s_token equal "="; s_expr expr2 *)
(* | Neq {value = expr1, neq, expr2; _} -> *)
(* s_expr expr1; s_token neq "=/="; s_expr expr2 *)
(* | Cat {value = expr1, cat, expr2; _} -> *)
(* s_expr expr1; s_token cat "^"; s_expr expr2 *)
(* | Cons {value = expr1, cons, expr2; _} -> *)
(* s_expr expr1; s_token cons "<:"; s_expr expr2 *)
(* | Add {value = expr1, add, expr2; _} -> *)
(* s_expr expr1; s_token add "+"; s_expr expr2 *)
(* | Sub {value = expr1, sub, expr2; _} -> *)
(* s_expr expr1; s_token sub "-"; s_expr expr2 *)
(* | Mult {value = expr1, mult, expr2; _} -> *)
(* s_expr expr1; s_token mult "*"; s_expr expr2 *)
(* | Div {value = expr1, div, expr2; _} -> *)
(* s_expr expr1; s_token div "/"; s_expr expr2 *)
(* | Mod {value = expr1, kwd_mod, expr2; _} -> *)
(* s_expr expr1; s_token kwd_mod "mod"; s_expr expr2 *)
(* | Neg {value = minus, expr; _} -> *)
(* s_token minus "-"; s_expr expr *)
(* | Not {value = kwd_not, expr; _} -> *)
(* s_token kwd_not "not"; s_expr expr *)
(* | Int i -> s_int i *)
(* | Var var -> s_var var *)
(* | String s -> s_string s *)
(* | Bytes b -> s_bytes b *)
(* | False region -> s_token region "False" *)
(* | True region -> s_token region "True" *)
(* | Unit region -> s_token region "Unit" *)
(* | Tuple tuple -> s_tuple tuple *)
(* | List list -> s_list list *)
(* | EmptyList elist -> s_empty_list elist *)
(* | Set set -> s_set set *)
(* | EmptySet eset -> s_empty_set eset *)
(* | NoneExpr nexpr -> s_none_expr nexpr *)
(* | FunCall fun_call -> s_fun_call fun_call *)
(* | ConstrApp capp -> s_constr_app capp *)
(* | SomeApp sapp -> s_some_app sapp *)
(* | MapLookUp lookup -> s_map_lookup lookup *)
(* | ParExpr pexpr -> s_par_expr pexpr *)
(* and s_list {value=node; _} = *)
(* let lbra, sequence, rbra = node in *)
(* s_token lbra "["; *)
(* s_nsepseq "," s_expr sequence; *)
(* s_token rbra "]" *)
(* and s_empty_list {value=node; _} = *)
(* let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in *)
(* s_token lpar "("; *)
(* s_token lbracket "["; *)
(* s_token rbracket "]"; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr; *)
(* s_token rpar ")" *)
(* and s_set {value=node; _} = *)
(* let lbrace, sequence, rbrace = node in *)
(* s_token lbrace "{"; *)
(* s_nsepseq "," s_expr sequence; *)
(* s_token rbrace "}" *)
(* and s_empty_set {value=node; _} = *)
(* let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in *)
(* s_token lpar "("; *)
(* s_token lbrace "{"; *)
(* s_token rbrace "}"; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr; *)
(* s_token rpar ")" *)
(* and s_none_expr {value=node; _} = *)
(* let lpar, (c_None, colon, type_expr), rpar = node in *)
(* s_token lpar "("; *)
(* s_token c_None "None"; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr; *)
(* s_token rpar ")" *)
(* and s_constr_app {value=node; _} = *)
(* let constr, arguments = node in *)
(* s_constr constr; *)
(* s_tuple arguments *)
(* and s_some_app {value=node; _} = *)
(* let c_Some, arguments = node in *)
(* s_token c_Some "Some"; *)
(* s_tuple arguments *)
(* and s_map_lookup {value=node; _} = *)
(* let {value = lbracket, expr, rbracket; _} = node.index in *)
(* s_var node.map_name; *)
(* s_token node.selector "."; *)
(* s_token lbracket "["; *)
(* s_expr expr; *)
(* s_token rbracket "]" *)
(* and s_par_expr {value=node; _} = *)
(* let lpar, expr, rpar = node in *)
(* s_token lpar "("; *)
(* s_expr expr; *)
(* s_token rpar ")" *)
(* and s_pattern {value=sequence; _} = *)
(* s_nsepseq "<:" s_core_pattern sequence *)
(* and s_core_pattern = function *)
(* PVar var -> s_var var *)
(* | PWild wild -> s_token wild "_" *)
(* | PInt i -> s_int i *)
(* | PBytes b -> s_bytes b *)
(* | PString s -> s_string s *)
(* | PUnit region -> s_token region "Unit" *)
(* | PFalse region -> s_token region "False" *)
(* | PTrue region -> s_token region "True" *)
(* | PNone region -> s_token region "None" *)
(* | PSome psome -> s_psome psome *)
(* | PList pattern -> s_list_pattern pattern *)
(* | PTuple ptuple -> s_ptuple ptuple *)
(* and s_psome {value=node; _} = *)
(* let c_Some, patterns = node in *)
(* s_token c_Some "Some"; *)
(* s_patterns patterns *)
(* and s_patterns {value=node; _} = *)
(* let lpar, core_pattern, rpar = node in *)
(* s_token lpar "("; *)
(* s_core_pattern core_pattern; *)
(* s_token rpar ")" *)
(* and s_list_pattern = function *)
(* Sugar sugar -> s_sugar sugar *)
(* | Raw raw -> s_raw raw *)
(* and s_sugar {value=node; _} = *)
(* let lbracket, sequence, rbracket = node in *)
(* s_token lbracket "["; *)
(* s_sepseq "," s_core_pattern sequence; *)
(* s_token rbracket "]" *)
(* and s_raw {value=node; _} = *)
(* let lpar, (core_pattern, cons, pattern), rpar = node in *)
(* s_token lpar "("; *)
(* s_core_pattern core_pattern; *)
(* s_token cons "<:"; *)
(* s_pattern pattern; *)
(* s_token rpar ")" *)
(* and s_ptuple {value=node; _} = *)
(* let lpar, sequence, rpar = node in *)
(* s_token lpar "("; *)
(* s_nsepseq "," s_core_pattern sequence; *)
(* s_token rpar ")" *)
(* and s_terminator = function *)
(* Some semi -> s_token semi ";" *)
(* | None -> () *)