[@@@warning "-30-40-42"] (* Abstract Syntax Tree (AST) for Mini-ML *) type 'a reg = 'a Region.reg (* Keywords of OCaml *) type keyword = Region.t type kwd_and = Region.t type kwd_begin = Region.t type kwd_else = Region.t type kwd_end = Region.t type kwd_false = Region.t type kwd_fun = Region.t type kwd_if = Region.t type kwd_in = Region.t type kwd_let = Region.t type kwd_match = Region.t type kwd_mod = Region.t type kwd_not = Region.t type kwd_of = Region.t type kwd_or = Region.t type kwd_then = Region.t type kwd_true = Region.t type kwd_type = Region.t type kwd_with = Region.t type kwd_let_entry = Region.t (* Symbols *) type arrow = Region.t (* "->" *) type cons = Region.t (* "::" *) type cat = Region.t (* "^" *) type append = Region.t (* "@" *) type dot = Region.t (* "." *) (* Arithmetic operators *) type minus = Region.t (* "-" *) type plus = Region.t (* "+" *) type slash = Region.t (* "/" *) type times = Region.t (* "*" *) (* Boolean operators *) type bool_or = Region.t (* "||" *) type bool_and = Region.t (* "&&" *) (* Comparisons *) type equal = Region.t (* "=" *) type neq = Region.t (* "<>" *) type lt = Region.t (* "<" *) type gt = Region.t (* ">" *) type leq = Region.t (* "=<" *) type geq = Region.t (* ">=" *) (* Compounds *) type lpar = Region.t (* "(" *) type rpar = Region.t (* ")" *) type lbracket = Region.t (* "[" *) type rbracket = Region.t (* "]" *) type lbrace = Region.t (* "{" *) type rbrace = Region.t (* "}" *) (* Separators *) type comma = Region.t (* "," *) type semi = Region.t (* ";" *) type vbar = Region.t (* "|" *) type colon = Region.t (* ":" *) (* Wildcard *) type wild = Region.t (* "_" *) (* Virtual tokens *) type eof = Region.t (* Literals *) type variable = string reg type fun_name = string reg type type_name = string reg type field_name = string reg type type_constr = string reg type constr = string reg (* Parentheses *) type 'a par = { lpar : lpar; inside : 'a; rpar : rpar } type the_unit = lpar * rpar (* The Abstract Syntax Tree *) type t = { decl : declaration Utils.nseq; eof : eof } and ast = t and declaration = Let of (kwd_let * let_bindings) reg | LetEntry of (kwd_let_entry * let_binding) reg | TypeDecl of type_decl reg (* Non-recursive values *) and let_bindings = (let_binding, kwd_and) Utils.nsepseq and let_binding = { pattern : pattern; lhs_type : (colon * type_expr) option; eq : equal; let_rhs : expr } (* Type declarations *) and type_decl = { kwd_type : kwd_type; name : type_name; eq : equal; type_expr : type_expr } and type_expr = TProd of cartesian | TSum of (variant reg, vbar) Utils.nsepseq reg | TRecord of record_type | TApp of (type_constr * type_tuple) reg | TFun of (type_expr * arrow * type_expr) reg | TPar of type_expr par reg | TAlias of variable and cartesian = (type_expr, times) Utils.nsepseq reg and variant = { constr : constr; args : (kwd_of * cartesian) option } and record_type = field_decl reg injection reg and field_decl = { field_name : field_name; colon : colon; field_type : type_expr } and type_tuple = (type_expr, comma) Utils.nsepseq par reg and pattern = PTuple of (pattern, comma) Utils.nsepseq reg | PList of list_pattern | PVar of variable | PUnit of the_unit reg | PInt of (string * Z.t) reg | PTrue of kwd_true | PFalse of kwd_false | PString of string reg | PWild of wild | PPar of pattern par reg | PConstr of (constr * pattern option) reg | PRecord of record_pattern | PTyped of typed_pattern reg and list_pattern = Sugar of pattern injection reg | PCons of (pattern * cons * pattern) reg and typed_pattern = { pattern : pattern; colon : colon; type_expr : type_expr } and record_pattern = field_pattern reg injection reg and field_pattern = { field_name : field_name; eq : equal; pattern : pattern } and expr = ECase of expr case reg | EAnnot of annot_expr reg | ELogic of logic_expr | EArith of arith_expr | EString of string_expr | EList of list_expr | EConstr of constr_expr reg | ERecord of record_expr | EProj of projection reg | EVar of variable | ECall of (expr * expr Utils.nseq) reg | EBytes of (string * Hex.t) reg | EUnit of the_unit reg | ETuple of (expr, comma) Utils.nsepseq reg | EPar of expr par reg | ELetIn of let_in reg | EFun of fun_expr | ECond of conditional reg | ESeq of sequence and constr_expr = constr * expr option and annot_expr = expr * type_expr and 'a injection = { opening : opening; elements : ('a, semi) Utils.sepseq; terminator : semi option; closing : closing } and opening = Begin of kwd_begin | With of kwd_with | LBrace of lbrace | LBracket of lbracket and closing = End of kwd_end | RBrace of rbrace | RBracket of rbracket and list_expr = Cons of cons bin_op reg | List of expr injection reg | Append of (expr * append * expr) reg and string_expr = Cat of cat bin_op reg | String of string reg and arith_expr = Add of plus bin_op reg | Sub of minus bin_op reg | Mult of times bin_op reg | Div of slash bin_op reg | Mod of kwd_mod bin_op reg | Neg of minus un_op reg | Int of (string * Z.t) reg | Nat of (string * Z.t) reg | Mtz of (string * Z.t) reg and logic_expr = BoolExpr of bool_expr | CompExpr of comp_expr and bool_expr = Or of kwd_or bin_op reg | And of kwd_and bin_op reg | Not of kwd_not un_op reg | True of kwd_true | False of kwd_false and 'a bin_op = { op : 'a; arg1 : expr; arg2 : expr } and 'a un_op = { op : 'a; arg : expr } and comp_expr = Lt of lt bin_op reg | Leq of leq bin_op reg | Gt of gt bin_op reg | Geq of geq bin_op reg | Equal of equal bin_op reg | Neq of neq bin_op reg and projection = { struct_name : variable; selector : dot; field_path : (selection, dot) Utils.nsepseq } and selection = FieldName of variable | Component of (string * Z.t) reg par reg and record_expr = field_assignment reg injection reg and field_assignment = { field_name : field_name; assignment : equal; field_expr : expr } and sequence = expr injection reg and 'a case = { kwd_match : kwd_match; expr : expr; opening : opening; lead_vbar : vbar option; cases : ('a case_clause reg, vbar) Utils.nsepseq reg; closing : closing } and 'a case_clause = { pattern : pattern; arrow : arrow; rhs : 'a } and let_in = kwd_let * let_bindings * kwd_in * expr and fun_expr = (kwd_fun * variable * arrow * expr) reg and conditional = { kwd_if : kwd_if; test : expr; kwd_then : kwd_then; ifso : expr; kwd_else : kwd_else; ifnot : expr } (* Projecting regions of the input source code *) let sprintf = Printf.sprintf let region_of_list_pattern = function Sugar {region; _} | PCons {region; _} -> region let region_of_pattern = function PList p -> region_of_list_pattern p | PTuple {region;_} | PVar {region;_} | PUnit {region;_} | PInt {region;_} | PTrue region | PFalse region | PString {region;_} | PWild region | PConstr {region; _} | PPar {region;_} | PRecord {region; _} | PTyped {region; _} -> region let region_of_bool_expr = function Or {region;_} | And {region;_} | True region | False region | Not {region;_} -> region let region_of_comp_expr = function Lt {region;_} | Leq {region;_} | Gt {region;_} | Geq {region;_} | Neq {region;_} | Equal {region;_} -> region let region_of_logic_expr = function BoolExpr e -> region_of_bool_expr e | CompExpr e -> region_of_comp_expr e let region_of_arith_expr = function Add {region;_} | Sub {region;_} | Mult {region;_} | Div {region;_} | Mod {region;_} | Neg {region;_} | Int {region;_} | Mtz {region; _} | Nat {region; _} -> region let region_of_string_expr = function String {region;_} | Cat {region;_} -> region let region_of_list_expr = function Cons {region; _} | List {region; _} | Append {region; _} -> region let region_of_expr = function ELogic e -> region_of_logic_expr e | EArith e -> region_of_arith_expr e | EString e -> region_of_string_expr e | EList e -> region_of_list_expr e | EAnnot {region;_ } | ELetIn {region;_} | EFun {region;_} | ECond {region;_} | ETuple {region;_} | ECase {region;_} | ECall {region;_} | EVar {region; _} | EProj {region; _} | EUnit {region;_} | EPar {region;_} | EBytes {region; _} | ESeq {region; _} | ERecord {region; _} | EConstr {region; _} -> region (* Rewriting let-expressions and fun-expressions, with some optimisations *) type sep = Region.t let ghost_fun, ghost_arrow, ghost_let, ghost_eq, ghost_in = let ghost = Region.ghost in ghost, ghost, ghost, ghost, ghost let norm_fun region kwd_fun pattern eq expr = let value = match pattern with PVar v -> kwd_fun, v, eq, expr | _ -> let value = Utils.gen_sym () in let fresh = Region.{region=Region.ghost; value} in let bindings = {pattern; eq; lhs_type=None; let_rhs = EVar fresh}, [] in let let_in = ghost_let, bindings, ghost_in, expr in let expr = ELetIn {value=let_in; region=Region.ghost} in kwd_fun, fresh, ghost_arrow, expr in Region.{region; value} let norm ?reg (pattern, patterns) sep expr = let reg, fun_reg = match reg with None -> Region.ghost, ghost_fun | Some p -> p in let apply pattern (sep, expr) = ghost_eq, EFun (norm_fun Region.ghost ghost_fun pattern sep expr) in let sep, expr = List.fold_right apply patterns (sep, expr) in norm_fun reg fun_reg pattern sep expr (* Unparsing expressions *) type unparsed = [ `Fun of (kwd_fun * (pattern Utils.nseq * arrow * expr)) | `Let of (pattern Utils.nseq * equal * expr) | `Idem of expr ] (* The function [unparse'] returns a triple [patterns, separator_region, expression], and the context (handled by [unparse]) decides if [separator_region] is the region of a "=" sign or "->". *) let rec unparse' = function EFun {value=_,var,arrow,expr; _} -> if var.region#is_ghost then match expr with ELetIn {value = _,({pattern;eq;_},[]),_,expr; _} -> if eq#is_ghost then let patterns, sep, e = unparse' expr in Utils.nseq_cons pattern patterns, sep, e else (pattern,[]), eq, expr | _ -> assert false else if arrow#is_ghost then let patterns, sep, e = unparse' expr in Utils.nseq_cons (PVar var) patterns, sep, e else (PVar var, []), arrow, expr | _ -> assert false let unparse = function EFun {value=kwd_fun,_,_,_; _} as e -> let binding = unparse' e in if kwd_fun#is_ghost then `Let binding else `Fun (kwd_fun, binding) | e -> `Idem e (* Printing the tokens with their source locations *) let print_nsepseq sep print (head,tail) = let print_aux ((sep_reg:Region.t), item) = Printf.printf "%s: %s\n" (sep_reg#compact `Byte) sep; print item in print head; List.iter print_aux tail let print_sepseq sep print = function None -> () | Some seq -> print_nsepseq sep print seq let print_csv print = print_nsepseq "," print let print_token (reg: Region.t) conc = Printf.printf "%s: %s\n" (reg#compact `Byte) conc let print_var Region.{region; value} = Printf.printf "%s: Ident %s\n" (region#compact `Byte) value let print_uident Region.{region; value} = Printf.printf "%s: Uident %s\n" (region#compact `Byte) value let print_str Region.{region; value} = Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value let print_bytes Region.{region; value=lexeme, abstract} = Printf.printf "%s: Bytes (\"%s\", \"0x%s\")\n" (region#compact `Byte) lexeme (Hex.to_string abstract) let rec print_tokens ?(undo=false) {decl;eof} = Utils.nseq_iter (print_statement undo) decl; print_token eof "EOF" and print_statement undo = function Let {value=kwd_let, let_bindings; _} -> print_token kwd_let "let"; print_let_bindings undo let_bindings | LetEntry {value=kwd_let_entry, let_binding; _} -> print_token kwd_let_entry "let%entry"; print_let_binding undo let_binding | TypeDecl {value={kwd_type; name; eq; type_expr}; _} -> print_token kwd_type "type"; print_var name; print_token eq "="; print_type_expr type_expr and print_type_expr = function TProd prod -> print_cartesian prod | TSum {value; _} -> print_nsepseq "|" print_variant value | TRecord t -> print_record_type t | TApp app -> print_type_app app | TPar par -> print_type_par par | TAlias var -> print_var var | TFun t -> print_fun_type t and print_fun_type {value; _} = let domain, arrow, range = value in print_type_expr domain; print_token arrow "->"; print_type_expr range and print_type_app {value; _} = let type_constr, type_tuple = value in print_type_tuple type_tuple; print_var type_constr and print_type_tuple {value; _} = let {lpar; inside; rpar} = value in print_token lpar "("; print_nsepseq "," print_type_expr inside; print_token rpar ")" and print_type_par {value={lpar;inside=t;rpar}; _} = print_token lpar "("; print_type_expr t; print_token rpar ")" and print_projection Region.{value; _} = let {struct_name; selector; field_path} = value in print_uident struct_name; print_token selector "."; print_nsepseq "." print_selection field_path and print_selection = function FieldName id -> print_var id | Component {value; _} -> let {lpar; inside; rpar} = value in let Region.{value=lexeme,z; region} = inside in print_token lpar "("; print_token region (sprintf "Int %s (%s)" lexeme (Z.to_string z)); print_token rpar ")" and print_cartesian Region.{value;_} = print_nsepseq "*" print_type_expr value and print_variant {value = {constr; args}; _} = print_uident constr; match args with None -> () | Some (kwd_of, cartesian) -> print_token kwd_of "of"; print_cartesian cartesian and print_record_type record_type = print_injection print_field_decl record_type and print_field_decl {value; _} = let {field_name; colon; field_type} = value in print_var field_name; print_token colon ":"; print_type_expr field_type and print_injection : 'a.('a -> unit) -> 'a injection reg -> unit = fun print {value; _} -> let {opening; elements; terminator; closing} = value in print_opening opening; print_sepseq ";" print elements; print_terminator terminator; print_closing closing and print_opening = function Begin region -> print_token region "begin" | With region -> print_token region "with" | LBrace region -> print_token region "{" | LBracket region -> print_token region "[" and print_closing = function End region -> print_token region "end" | RBrace region -> print_token region "}" | RBracket region -> print_token region "]" and print_terminator = function Some semi -> print_token semi ";" | None -> () and print_let_bindings undo = print_nsepseq "and" (print_let_binding undo) and print_let_binding undo {pattern; lhs_type; eq; let_rhs} = print_pattern pattern; (match lhs_type with None -> () | Some (colon, type_expr) -> print_token colon ":"; print_type_expr type_expr); if undo then match unparse let_rhs with `Let (patterns, eq, e) -> Utils.nseq_iter print_pattern patterns; print_token eq "="; print_expr undo e | `Fun (kwd_fun, (patterns, arrow, e)) -> print_token eq "="; print_token kwd_fun "fun"; Utils.nseq_iter print_pattern patterns; print_token arrow "->"; print_expr undo e | `Idem _ -> print_token eq "="; print_expr undo let_rhs else (print_token eq "="; print_expr undo let_rhs) and print_pattern = function PTuple {value=patterns;_} -> print_csv print_pattern patterns | PList p -> print_list_pattern p | PVar {region; value} -> Printf.printf "%s: PVar %s\n" (region#compact `Byte) value | PUnit {value=lpar,rpar; _} -> print_token lpar "("; print_token rpar ")" | PInt {region; value=lex,z} -> print_token region (sprintf "PInt %s (%s)" lex (Z.to_string z)) | PTrue kwd_true -> print_token kwd_true "true" | PFalse kwd_false -> print_token kwd_false "false" | PString s -> print_str s | PWild wild -> print_token wild "_" | PPar {value={lpar;inside=p;rpar}; _} -> print_token lpar "("; print_pattern p; print_token rpar ")" | PConstr p -> print_constr_pattern p | PRecord r -> print_record_pattern r | PTyped t -> print_typed_pattern t and print_list_pattern = function Sugar p -> print_injection print_pattern p | PCons p -> print_raw p and print_raw {value=p1,c,p2; _} = print_pattern p1; print_token c "::"; print_pattern p2 and print_typed_pattern {value; _} = let {pattern; colon; type_expr} = value in print_pattern pattern; print_token colon ":"; print_type_expr type_expr and print_record_pattern record_pattern = print_injection print_field_pattern record_pattern and print_field_pattern {value; _} = let {field_name; eq; pattern} = value in print_var field_name; print_token eq "="; print_pattern pattern and print_constr_pattern {value=constr, p_opt; _} = print_uident constr; match p_opt with None -> () | Some pattern -> print_pattern pattern and print_expr undo = function ELetIn {value;_} -> print_let_in undo value | ECond cond -> print_conditional undo cond | ETuple {value;_} -> print_csv (print_expr undo) value | ECase {value;_} -> print_match_expr undo value | EFun {value=(kwd_fun,_,_,_) as f; _} as e -> if undo then let patterns, arrow, expr = unparse' e in print_token kwd_fun "fun"; Utils.nseq_iter print_pattern patterns; print_token arrow "->"; print_expr undo expr else print_fun_expr undo f | EAnnot e -> print_annot_expr undo e | ELogic e -> print_logic_expr undo e | EArith e -> print_arith_expr undo e | EString e -> print_string_expr undo e | ECall {value=f,l; _} -> print_expr undo f; Utils.nseq_iter (print_expr undo) l | EVar v -> print_var v | EProj p -> print_projection p | EUnit {value=lpar,rpar; _} -> print_token lpar "("; print_token rpar ")" | EBytes b -> print_bytes b | EPar {value={lpar;inside=e;rpar}; _} -> print_token lpar "("; print_expr undo e; print_token rpar ")" | EList e -> print_list_expr undo e | ESeq seq -> print_sequence undo seq | ERecord e -> print_record_expr undo e | EConstr {value=constr,None; _} -> print_uident constr | EConstr {value=(constr, Some arg); _} -> print_uident constr; print_expr undo arg and print_annot_expr undo {value=e,t; _} = print_expr undo e; print_token Region.ghost ":"; print_type_expr t and print_list_expr undo = function Cons {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "::"; print_expr undo arg2 | List e -> print_injection (print_expr undo) e | Append {value=e1,append,e2; _} -> print_expr undo e1; print_token append "@"; print_expr undo e2 and print_arith_expr undo = function Add {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "+"; print_expr undo arg2 | Sub {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "-"; print_expr undo arg2 | Mult {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "*"; print_expr undo arg2 | Div {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "/"; print_expr undo arg2 | Mod {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "mod"; print_expr undo arg2 | Neg {value={op;arg}; _} -> print_token op "-"; print_expr undo arg | Int {region; value=lex,z} -> print_token region (sprintf "Int %s (%s)" lex (Z.to_string z)) | Mtz {region; value=lex,z} -> print_token region (sprintf "Mtz %s (%s)" lex (Z.to_string z)) | Nat {region; value=lex,z} -> print_token region (sprintf "Nat %s (%s)" lex (Z.to_string z)) and print_string_expr undo = function Cat {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "^"; print_expr undo arg2 | String s -> print_str s and print_logic_expr undo = function BoolExpr e -> print_bool_expr undo e | CompExpr e -> print_comp_expr undo e and print_bool_expr undo = function Or {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "||"; print_expr undo arg2 | And {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "&&"; print_expr undo arg2 | Not {value={op;arg}; _} -> print_token op "not"; print_expr undo arg | True kwd_true -> print_token kwd_true "true" | False kwd_false -> print_token kwd_false "false" and print_comp_expr undo = function Lt {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "<"; print_expr undo arg2 | Leq {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "<="; print_expr undo arg2 | Gt {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op ">"; print_expr undo arg2 | Geq {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op ">="; print_expr undo arg2 | Neq {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "<>"; print_expr undo arg2 | Equal {value={arg1;op;arg2}; _} -> print_expr undo arg1; print_token op "="; print_expr undo arg2 and print_record_expr undo e = print_injection (print_field_assignment undo) e and print_field_assignment undo {value; _} = let {field_name; assignment; field_expr} = value in print_var field_name; print_token assignment "="; print_expr undo field_expr and print_sequence undo seq = print_injection (print_expr undo) seq and print_match_expr undo expr = let {kwd_match; expr; opening; lead_vbar; cases; closing} = expr in print_token kwd_match "match"; print_expr undo expr; print_opening opening; print_token_opt lead_vbar "|"; print_cases undo cases; print_closing closing and print_token_opt = function None -> fun _ -> () | Some region -> print_token region and print_cases undo {value; _} = print_nsepseq "|" (print_case_clause undo) value and print_case_clause undo {value; _} = let {pattern; arrow; rhs} = value in print_pattern pattern; print_token arrow "->"; print_expr undo rhs and print_let_in undo (kwd_let, let_bindings, kwd_in, expr) = print_token kwd_let "let"; print_let_bindings undo let_bindings; print_token kwd_in "in"; print_expr undo expr and print_fun_expr undo (kwd_fun, rvar, arrow, expr) = print_token kwd_fun "fun"; print_var rvar; print_token arrow "->"; print_expr undo expr and print_conditional undo {value; _} = let open Region in let {kwd_if; test; kwd_then; ifso; kwd_else; ifnot} = value in print_token ghost "("; print_token kwd_if "if"; print_expr undo test; print_token kwd_then "then"; print_expr undo ifso; print_token kwd_else "else"; print_expr undo ifnot; print_token ghost ")"