[@@@warning "-30"] (* Abstract Syntax Tree (AST) for Mini-ML *) (* Regions The AST carries all the regions where tokens have been found by the lexer, plus additional regions corresponding to whole subtrees (like entire expressions, patterns etc.). These regions are needed for error reporting and source-to-source transformations. To make these pervasive regions more legible, we define singleton types for the symbols, keywords etc. with suggestive names like "kwd_and" denoting the _region_ of the occurrence of the keyword "and". *) type 'a reg = 'a Region.reg (* Some keywords of OCaml *) type keyword = Region.t type kwd_and = Region.t type kwd_begin = Region.t type kwd_else = Region.t type kwd_end = Region.t type kwd_false = Region.t type kwd_fun = Region.t type kwd_if = Region.t type kwd_in = Region.t type kwd_let = Region.t type kwd_let_entry = Region.t type kwd_match = Region.t type kwd_mod = Region.t type kwd_not = Region.t type kwd_of = Region.t type kwd_or = Region.t type kwd_then = Region.t type kwd_true = Region.t type kwd_type = Region.t type kwd_with = Region.t (* Symbols *) type arrow = Region.t (* "->" *) type cons = Region.t (* "::" *) type cat = Region.t (* "^" *) type append = Region.t (* "@" *) type dot = Region.t (* "." *) (* Arithmetic operators *) type minus = Region.t (* "-" *) type plus = Region.t (* "+" *) type slash = Region.t (* "/" *) type times = Region.t (* "*" *) (* Boolean operators *) type bool_or = Region.t (* "||" *) type bool_and = Region.t (* "&&" *) (* Comparisons *) type equal = Region.t (* "=" *) type neq = Region.t (* "<>" *) type lt = Region.t (* "<" *) type gt = Region.t (* ">" *) type leq = Region.t (* "=<" *) type geq = Region.t (* ">=" *) (* Compounds *) type lpar = Region.t (* "(" *) type rpar = Region.t (* ")" *) type lbracket = Region.t (* "[" *) type rbracket = Region.t (* "]" *) type lbrace = Region.t (* "{" *) type rbrace = Region.t (* "}" *) (* Separators *) type comma = Region.t (* "," *) type semi = Region.t (* ";" *) type vbar = Region.t (* "|" *) type colon = Region.t (* Wildcard *) type wild = Region.t (* "_" *) (* Literals *) type variable = string reg type fun_name = string reg type type_name = string reg type field_name = string reg type type_constr = string reg type constr = string reg (* Parentheses *) type 'a par = { lpar : lpar; inside : 'a; rpar : rpar } type the_unit = lpar * rpar (* Brackets compounds *) type 'a brackets = { lbracket : lbracket; inside : 'a; rbracket : rbracket } (* The Abstract Syntax Tree (finally) *) type t = { decl : declaration Utils.nseq; eof : eof } and ast = t and eof = Region.t and declaration = Let of (kwd_let * let_bindings) reg (* let p = e and ... *) | LetEntry of (kwd_let_entry * let_binding) reg (* let%entry p = e and ... *) | TypeDecl of type_decl reg (* type ... *) (* Non-recursive values *) and let_bindings = (let_binding, kwd_and) Utils.nsepseq (* p1 = e1 and p2 = e2 ... *) and let_binding = { (* p = e p : t = e *) pattern : pattern; lhs_type : (colon * type_expr) option; eq : equal; let_rhs : expr } (* Recursive types *) and type_decl = { kwd_type : kwd_type; name : type_name; eq : equal; type_expr : type_expr } and type_expr = TProd of cartesian | TSum of (variant reg, vbar) Utils.nsepseq reg | TRecord of record_type | TApp of (type_constr * type_tuple) reg | TFun of (type_expr * arrow * type_expr) reg | TPar of type_expr par reg | TAlias of variable and cartesian = (type_expr, times) Utils.nsepseq reg and variant = { constr : constr; args : (kwd_of * cartesian) option } and record_type = field_decl reg injection reg and field_decl = { field_name : field_name; colon : colon; field_type : type_expr } and type_tuple = (type_expr, comma) Utils.nsepseq par and 'a injection = { opening : opening; elements : ('a, semi) Utils.sepseq; terminator : semi option; closing : closing } and opening = Begin of kwd_begin | LBrace of lbrace and closing = End of kwd_end | RBrace of rbrace and pattern = PTuple of (pattern, comma) Utils.nsepseq reg (* p1, p2, ... *) | PList of (pattern, semi) Utils.sepseq brackets reg (* [p1; p2; ...] *) | PVar of variable (* x *) | PUnit of the_unit reg (* () *) | PInt of (string * Z.t) reg (* 7 *) | PTrue of kwd_true (* true *) | PFalse of kwd_false (* false *) | PString of string reg (* "foo" *) | PWild of wild (* _ *) | PCons of (pattern * cons * pattern) reg (* p1 :: p2 *) | PPar of pattern par reg (* (p) *) | PConstr of (constr * pattern option) reg (* A B(3,"") *) | PRecord of record_pattern (* {a=...; ...} *) | PTyped of typed_pattern reg (* (x : int) *) and typed_pattern = { pattern : pattern; colon : colon; type_expr : type_expr } and record_pattern = field_pattern reg injection reg and field_pattern = { field_name : field_name; eq : equal; pattern : pattern } and expr = LetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *) | Fun of fun_expr (* fun x -> e *) | If of conditional (* if e1 then e2 else e3 *) | ETuple of (expr, comma) Utils.nsepseq reg (* e1, e2, ... *) | Match of match_expr reg (* p1 -> e1 | p2 -> e2 | ... *) | Seq of sequence (* begin e1; e2; ... ; en end *) | ERecord of record_expr (* {f1=e1; ... } *) | Append of (expr * append * expr) reg (* e1 @ e2 *) | Cons of (expr * cons * expr) reg (* e1 :: e2 *) | ELogic of logic_expr | EArith of arith_expr | EString of string_expr | Call of (expr * expr) reg (* f e *) | Path of path reg (* x x.y.z *) | Unit of the_unit reg (* () *) | Par of expr par reg (* (e) *) | EList of (expr, semi) Utils.sepseq brackets reg (* [e1; e2; ...] *) | EConstr of constr (*| Extern of extern*) and string_expr = Cat of cat bin_op reg (* e1 ^ e2 *) | String of string reg (* "foo" *) and arith_expr = Add of plus bin_op reg (* e1 + e2 *) | Sub of minus bin_op reg (* e1 - e2 *) | Mult of times bin_op reg (* e1 * e2 *) | Div of slash bin_op reg (* e1 / e2 *) | Mod of kwd_mod bin_op reg (* e1 mod e2 *) | Neg of minus un_op reg (* -e *) | Int of (string * Z.t) reg (* 12345 *) | Nat of (string * Z.t) reg (* 3p *) | Mtz of (string * Z.t) reg (* 1.00tz 3tz *) and logic_expr = BoolExpr of bool_expr | CompExpr of comp_expr and bool_expr = Or of kwd_or bin_op reg | And of kwd_and bin_op reg | Not of kwd_not un_op reg | True of kwd_true | False of kwd_false and 'a bin_op = { op : 'a; arg1 : expr; arg2 : expr } and 'a un_op = { op : 'a; arg : expr } and comp_expr = Lt of lt bin_op reg | Leq of leq bin_op reg | Gt of gt bin_op reg | Geq of geq bin_op reg | Equal of equal bin_op reg | Neq of neq bin_op reg (* | Lt of (expr * lt * expr) reg | LEq of (expr * le * expr) reg | Gt of (expr * gt * expr) reg | GEq of (expr * ge * expr) reg | NEq of (expr * ne * expr) reg | Eq of (expr * eq * expr) reg *) and path = { module_proj : (constr * dot) option; value_proj : (selection, dot) Utils.nsepseq } and selection = Name of variable | Component of (string * Z.t) reg par reg and record_expr = field_assignment reg injection reg and field_assignment = { field_name : field_name; assignment : equal; field_expr : expr } and sequence = expr injection reg and match_expr = kwd_match * expr * kwd_with * cases and cases = vbar option * (pattern * arrow * expr, vbar) Utils.nsepseq and let_in = kwd_let * let_bindings * kwd_in * expr and fun_expr = (kwd_fun * variable * arrow * expr) reg and conditional = IfThen of (kwd_if * expr * kwd_then * expr) reg | IfThenElse of (kwd_if * expr * kwd_then * expr * kwd_else * expr) reg (* and extern = Cast of cast_expr | Print of print_expr | Scanf of scanf_expr | PolyEq of (variable * variable) (* polymorphic equality *) and cast_expr = StringOfInt of variable (* string_of_int x *) | StringOfBool of variable (* string_of_bool x *) and print_expr = PrintString of variable (* print_string x *) | PrintInt of variable (* print_int x *) and scanf_expr = ScanfString of variable (* scanf_string x *) | ScanfInt of variable (* scanf_int x *) | ScanfBool of variable (* scanf_bool x *) *) (* Normalising nodes of the AST so the interpreter is more uniform and no source regions are lost in order to enable all manner of source-to-source transformations from the rewritten AST and the initial source. The first kind of expressions to be normalised is lambdas, like: fun a -> fun b -> a fun a b -> a fun a (b,c) -> a to become fun a -> fun b -> a fun a -> fun b -> a fun a -> fun x -> let (b,c) = x in a The second kind is let-bindings introducing functions without the "fun" keyword, like let g a b = a let h a (b,c) = a which become let g = fun a -> fun b -> a let h = fun a -> fun x -> let (b,c) = x in a The former is actually a subcase of the latter. Indeed, the general shape of the former is fun -> and the latter is let = The isomorphic parts are " -> " and " = ". The call [norm patterns sep expr], where [sep] is a region either of an "->" or a "=", evaluates in a function expression (lambda), as expected. In order to get the regions right in the case of lambdas, additional regions are passed: [norm ~reg:(total,kwd_fun) patterns sep expr], where [total] is the region for the whole lambda (even if the resulting lambda is actually longer: we want to keep the region of the original), and the region of the original "fun" keyword. *) type sep = Region.t val norm : ?reg:(Region.t * kwd_fun) -> pattern Utils.nseq -> sep -> expr -> fun_expr (* Undoing the above rewritings (for debugging by comparison with the lexer, and to feed the source-to-source transformations with only tokens that originated from the original input. Unparsing is performed on an expression which is expected to be a series "fun ... -> fun ... -> ...". Either this expression is the right-hand side of a let, or it is not. These two cases are distinguished by the function [unparse], depending on the first keyword "fun" being concrete or ghostly (virtual). In the former case, we are unparsing an expression which was originally starting with "fun"; in the latter, we are unparsing an expression that was parsed on the right-hand side of a let construct. In other words, in the former case, we expect to reconstruct let f p_1 ... p_n = e whereas, in the second case, we want to obtain fun p_1 ... p_n -> e In any case, the heart of the unparsing is the same, and this is why the data constructors [`Fun] and [`Let] of the type [unparsed] share a common type: [pattern * Region.t * expr], the region can either actually denote the alias type [arrow] or [eq]. Let us assume a value of this triple [patterns, separator_region, expression]. Then the context (handled by [unparse]) decides if [separator_region] is the region of a "=" sign or "->". There are two forms to be unparsed: fun x_1 -> let p_1 = x_1 in ... fun x_n -> let p_n = x_n in e or fun p_1 -> ... fun p_n -> e in the first case, the rightmost "=" becomes [separator_region] above, whereas, in the second case, it is the rightmost "->". Here are some example covering all cases: let rec f = fun a -> fun b -> a let rec g = fun a b -> a let rec h = fun a (b,c) -> a let rec fst = fun (x,_) -> x let rec g a b = a let rec h (b,c) a (d,e) = a let len = (fun n _ -> n) let f l = let n = l in n *) type unparsed = [ `Fun of (kwd_fun * (pattern Utils.nseq * arrow * expr)) | `Let of (pattern Utils.nseq * equal * expr) | `Idem of expr ] val unparse : expr -> unparsed (* Conversions to type [string] *) (* val to_string : t -> string val pattern_to_string : pattern -> string *) (* Printing the tokens reconstructed from the AST. This is very useful for debugging, as the output of [print_token ast] can be textually compared to that of [Lexer.trace] (see module [LexerMain]). The optional parameter [undo] is bound to [true] if the caller wants the AST to be unparsed before printing (those nodes that have been normalised with function [norm_let] and [norm_fun]). *) val print_tokens : ?undo:bool -> ast -> unit (* Projecting regions from sundry nodes of the AST. See the first comment at the beginning of this file. *) val region_of_pattern : pattern -> Region.t val region_of_expr : expr -> Region.t (* Removing all outermost parentheses from a given expression *) val rm_par : expr -> expr (* Predicates on expressions *) val is_var : expr -> bool val is_call : expr -> bool val is_fun : expr -> bool (* Variables *) (* module Vars : Set.S with type elt = string module FreeVars : Set.S with type elt = variable (* The value of the call [vars t] is a pair of sets: the first is the set of variables whose definitions are in the scope at the end of the program corresponding to the AST [t], the second is the set of free variables in that same AST. Computing free variables is useful because we do not want to escape a variable that is a predefined variable in OCaml, when we translate the program to OCaml: this way, we make sure that an unbound variable is caught before the translation (where it would be wrongly captured by the OCaml compiler). Dually, computing bound variables is useful when compiling to OCaml. *) val vars : t -> Vars.t * FreeVars.t *)