Extended lib_utils/pos.ml{i}. First import of Ligodity. (No "simplify" yet.)

This commit is contained in:
Christian Rinderknecht 2019-05-12 19:31:22 +02:00 committed by Georges Dupéron
parent 22b331dbed
commit dcbfbf781d
28 changed files with 3159 additions and 1 deletions

View File

@ -0,0 +1 @@
ocamlc: -w -4

View File

@ -0,0 +1 @@
ocamlc: -w -42

View File

@ -0,0 +1 @@
ocamlc: -w -58

View File

@ -0,0 +1 @@
ocamlc: -w -42

View File

View File

@ -0,0 +1 @@
ocamlc: -w -42-40

View File

@ -0,0 +1 @@
--explain --external-tokens Token --base Parser ParToken.mly

View File

View File

@ -0,0 +1,7 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg
$HOME/git/tezos/src/lib_utils/pos.mli
$HOME/git/tezos/src/lib_utils/pos.ml
$HOME/git/tezos/src/lib_utils/region.mli
$HOME/git/tezos/src/lib_utils/region.ml
Stubs/Tezos_utils.ml

790
src/parser/ligodity/AST.ml Normal file
View File

@ -0,0 +1,790 @@
[@@@warning "-30-40-42"]
(* Abstract Syntax Tree (AST) for Mini-ML *)
type 'a reg = 'a Region.reg
(* Keywords of OCaml *)
type keyword = Region.t
type kwd_and = Region.t
type kwd_begin = Region.t
type kwd_else = Region.t
type kwd_end = Region.t
type kwd_false = Region.t
type kwd_fun = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_let = Region.t
type kwd_match = Region.t
type kwd_mod = Region.t
type kwd_not = Region.t
type kwd_of = Region.t
type kwd_or = Region.t
type kwd_then = Region.t
type kwd_true = Region.t
type kwd_type = Region.t
type kwd_with = Region.t
type kwd_let_entry = Region.t
(* Symbols *)
type arrow = Region.t (* "->" *)
type cons = Region.t (* "::" *)
type cat = Region.t (* "^" *)
type append = Region.t (* "@" *)
type dot = Region.t (* "." *)
(* Arithmetic operators *)
type minus = Region.t (* "-" *)
type plus = Region.t (* "+" *)
type slash = Region.t (* "/" *)
type times = Region.t (* "*" *)
(* Boolean operators *)
type bool_or = Region.t (* "||" *)
type bool_and = Region.t (* "&&" *)
(* Comparisons *)
type equal = Region.t (* "=" *)
type neq = Region.t (* "<>" *)
type lt = Region.t (* "<" *)
type gt = Region.t (* ">" *)
type leq = Region.t (* "=<" *)
type geq = Region.t (* ">=" *)
(* Compounds *)
type lpar = Region.t (* "(" *)
type rpar = Region.t (* ")" *)
type lbracket = Region.t (* "[" *)
type rbracket = Region.t (* "]" *)
type lbrace = Region.t (* "{" *)
type rbrace = Region.t (* "}" *)
(* Separators *)
type comma = Region.t (* "," *)
type semi = Region.t (* ";" *)
type vbar = Region.t (* "|" *)
type colon = Region.t (* ":" *)
(* Wildcard *)
type wild = Region.t (* "_" *)
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type type_constr = string reg
type constr = string reg
(* Parentheses *)
type 'a par = {
lpar : lpar;
inside : 'a;
rpar : rpar
}
type the_unit = lpar * rpar
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* The Abstract Syntax Tree *)
type t = {
decl : declaration Utils.nseq;
eof : eof
}
and ast = t
and eof = Region.t
and declaration =
Let of (kwd_let * let_bindings) reg
| LetEntry of (kwd_let_entry * let_binding) reg
| TypeDecl of type_decl reg
(* Non-recursive values *)
and let_bindings = (let_binding, kwd_and) Utils.nsepseq
and let_binding = {
pattern : pattern;
lhs_type : (colon * type_expr) option;
eq : equal;
let_rhs : expr
}
(* Recursive types *)
and type_decl = {
kwd_type : kwd_type;
name : type_name;
eq : equal;
type_expr : type_expr
}
and type_expr =
TProd of cartesian
| TSum of (variant reg, vbar) Utils.nsepseq reg
| TRecord of record_type
| TApp of (type_constr * type_tuple) reg
| TFun of (type_expr * arrow * type_expr) reg
| TPar of type_expr par reg
| TAlias of variable
and cartesian = (type_expr, times) Utils.nsepseq reg
and variant = {
constr : constr;
args : (kwd_of * cartesian) option
}
and record_type = field_decl reg injection reg
and field_decl = {
field_name : field_name;
colon : colon;
field_type : type_expr
}
and type_tuple = (type_expr, comma) Utils.nsepseq par
and 'a injection = {
opening : opening;
elements : ('a, semi) Utils.sepseq;
terminator : semi option;
closing : closing
}
and opening =
Begin of kwd_begin
| LBrace of lbrace
and closing =
End of kwd_end
| RBrace of rbrace
and pattern =
PTuple of (pattern, comma) Utils.nsepseq reg
| PList of (pattern, semi) Utils.sepseq brackets reg
| PVar of variable
| PUnit of the_unit reg
| PInt of (string * Z.t) reg
| PTrue of kwd_true
| PFalse of kwd_false
| PString of string reg
| PWild of wild
| PCons of (pattern * cons * pattern) reg
| PPar of pattern par reg
| PConstr of (constr * pattern option) reg
| PRecord of record_pattern
| PTyped of typed_pattern reg
and typed_pattern = {
pattern : pattern;
colon : colon;
type_expr : type_expr
}
and record_pattern = field_pattern reg injection reg
and field_pattern = {
field_name : field_name;
eq : equal;
pattern : pattern
}
and expr =
LetIn of let_in reg
| Fun of fun_expr
| If of conditional
| ETuple of (expr, comma) Utils.nsepseq reg
| Match of match_expr reg
| Seq of sequence
| ERecord of record_expr
| Append of (expr * append * expr) reg
| Cons of (expr * cons * expr) reg
| ELogic of logic_expr
| EArith of arith_expr
| EString of string_expr
| Call of (expr * expr) reg
| Path of path reg
| Unit of the_unit reg
| Par of expr par reg
| EList of (expr, semi) Utils.sepseq brackets reg
| EConstr of constr
and string_expr =
Cat of cat bin_op reg
| String of string reg
and arith_expr =
Add of plus bin_op reg
| Sub of minus bin_op reg
| Mult of times bin_op reg
| Div of slash bin_op reg
| Mod of kwd_mod bin_op reg
| Neg of minus un_op reg
| Int of (string * Z.t) reg
| Nat of (string * Z.t) reg
| Mtz of (string * Z.t) reg
and logic_expr =
BoolExpr of bool_expr
| CompExpr of comp_expr
and bool_expr =
Or of kwd_or bin_op reg
| And of kwd_and bin_op reg
| Not of kwd_not un_op reg
| True of kwd_true
| False of kwd_false
and 'a bin_op = {
op : 'a;
arg1 : expr;
arg2 : expr
}
and 'a un_op = {
op : 'a;
arg : expr
}
and comp_expr =
Lt of lt bin_op reg
| Leq of leq bin_op reg
| Gt of gt bin_op reg
| Geq of geq bin_op reg
| Equal of equal bin_op reg
| Neq of neq bin_op reg
and path = {
module_proj : (constr * dot) option;
value_proj : (selection, dot) Utils.nsepseq
}
and selection =
Name of variable
| Component of (string * Z.t) reg par reg
and record_expr = field_assignment reg injection reg
and field_assignment = {
field_name : field_name;
assignment : equal;
field_expr : expr
}
and sequence = expr injection reg
and match_expr = kwd_match * expr * kwd_with * cases
and cases =
vbar option * (pattern * arrow * expr, vbar) Utils.nsepseq
and let_in = kwd_let * let_bindings * kwd_in * expr
and fun_expr = (kwd_fun * variable * arrow * expr) reg
and conditional =
IfThen of (kwd_if * expr * kwd_then * expr) reg
| IfThenElse of (kwd_if * expr * kwd_then * expr * kwd_else * expr) reg
(* Projecting regions of the input source code *)
let sprintf = Printf.sprintf
let region_of_pattern = function
PList {region;_} | PTuple {region;_} | PVar {region;_}
| PUnit {region;_} | PInt {region;_} | PTrue region | PFalse region
| PString {region;_} | PWild region | PCons {region;_}
| PConstr {region; _} | PPar {region;_} | PRecord {region; _}
| PTyped {region; _} -> region
let region_of_bool_expr = function
Or {region;_} | And {region;_}
| True region | False region
| Not {region;_} -> region
let region_of_comp_expr = function
Lt {region;_} | Leq {region;_}
| Gt {region;_} | Geq {region;_}
| Neq {region;_} | Equal {region;_} -> region
let region_of_logic_expr = function
BoolExpr e -> region_of_bool_expr e
| CompExpr e -> region_of_comp_expr e
let region_of_arith_expr = function
Add {region;_} | Sub {region;_} | Mult {region;_}
| Div {region;_} | Mod {region;_} | Neg {region;_}
| Int {region;_} | Mtz {region; _}
| Nat {region; _} -> region
let region_of_string_expr = function
String {region;_} | Cat {region;_} -> region
let region_of_expr = function
ELogic e -> region_of_logic_expr e
| EArith e -> region_of_arith_expr e
| EString e -> region_of_string_expr e
| LetIn {region;_} | Fun {region;_}
| If IfThen {region;_} | If IfThenElse {region; _}
| ETuple {region;_} | Match {region;_} | Cons {region;_}
| Call {region;_} | Path {region;_}
| Unit {region;_} | Par {region;_} | EList {region;_}
| Seq {region; _} | ERecord {region; _}
| Append {region; _} | EConstr {region; _} -> region
(* Predicates *)
let rec is_var = function
Par {value={inside=e;_};_} -> is_var e
| Path _ -> true
| _ -> false
let rec is_call = function
Par {value={inside=e;_};_} -> is_call e
| Call _ -> true
| _ -> false
let rec is_fun = function
Par {value={inside=e;_};_} -> is_fun e
| Fun _ -> true
| _ -> false
let rec rm_par = function
Par {value={inside=e;_};_} -> rm_par e
| e -> e
(* Rewriting let-expressions and fun-expressions, with some optimisations *)
type sep = Region.t
let ghost_fun, ghost_arrow, ghost_let, ghost_eq, ghost_in =
let ghost = Region.ghost in ghost, ghost, ghost, ghost, ghost
let norm_fun region kwd_fun pattern eq expr =
let value =
match pattern with
PVar v -> kwd_fun, v, eq, expr
| _ -> let value = Utils.gen_sym () in
let fresh = Region.{region=Region.ghost; value} in
let proj = Name fresh, [] in
let path = {module_proj=None; value_proj=proj} in
let path = Region.{region=Region.ghost; value=path} in
let bindings = {pattern; eq;
lhs_type=None; let_rhs = Path path}, [] in
let let_in = ghost_let, bindings, ghost_in, expr in
let expr = LetIn {value=let_in; region=Region.ghost}
in kwd_fun, fresh, ghost_arrow, expr
in Region.{region; value}
let norm ?reg (pattern, patterns) sep expr =
let reg, fun_reg =
match reg with
None -> Region.ghost, ghost_fun
| Some p -> p in
let apply pattern (sep, expr) =
ghost_eq, Fun (norm_fun Region.ghost ghost_fun pattern sep expr) in
let sep, expr = List.fold_right apply patterns (sep, expr)
in norm_fun reg fun_reg pattern sep expr
(* Unparsing expressions *)
type unparsed = [
`Fun of (kwd_fun * (pattern Utils.nseq * arrow * expr))
| `Let of (pattern Utils.nseq * equal * expr)
| `Idem of expr
]
(* The function [unparse'] returns a triple [patterns,
separator_region, expression], and the context (handled by
[unparse]) decides if [separator_region] is the region of a "="
sign or "->". *)
let rec unparse' = function
Fun {value=_,var,arrow,expr; _} ->
if var.region#is_ghost then
match expr with
LetIn {value = _,({pattern;eq;_},[]),_,expr; _} ->
if eq#is_ghost then
let patterns, sep, e = unparse' expr
in Utils.nseq_cons pattern patterns, sep, e
else (pattern,[]), eq, expr
| _ -> assert false
else if arrow#is_ghost then
let patterns, sep, e = unparse' expr
in Utils.nseq_cons (PVar var) patterns, sep, e
else (PVar var, []), arrow, expr
| _ -> assert false
let unparse = function
Fun {value=kwd_fun,_,_,_; _} as e ->
let binding = unparse' e in
if kwd_fun#is_ghost then `Let binding else `Fun (kwd_fun, binding)
| e -> `Idem e
(* Printing the tokens with their source locations *)
let print_nsepseq sep print (head,tail) =
let print_aux ((sep_reg:Region.t), item) =
Printf.printf "%s: %s\n" (sep_reg#compact `Byte) sep;
print item
in print head; List.iter print_aux tail
let print_sepseq sep print = function
None -> ()
| Some seq -> print_nsepseq sep print seq
let print_csv print = print_nsepseq "," print
let print_bsv print = print_nsepseq "|" print
let print_ssv print = print_sepseq ";" print
let print_token (reg: Region.t) conc =
Printf.printf "%s: %s\n" (reg#compact `Byte) conc
let print_var Region.{region; value} =
Printf.printf "%s: Ident %s\n" (region#compact `Byte) value
let print_uident Region.{region; value} =
Printf.printf "%s: Uident %s\n" (region#compact `Byte) value
let print_str Region.{region; value} =
Printf.printf "%s: Str \"%s\"\n" (region#compact `Byte) value
let rec print_tokens ?(undo=false) {decl;eof} =
Utils.nseq_iter (print_statement undo) decl; print_token eof "EOF"
and print_statement undo = function
Let {value=kwd_let, let_bindings; _} ->
print_token kwd_let "let";
print_let_bindings undo let_bindings
| LetEntry {value=kwd_let_entry, let_binding; _} ->
print_token kwd_let_entry "let%entry";
print_let_binding undo let_binding
| TypeDecl {value={kwd_type; name; eq; type_expr}; _} ->
print_token kwd_type "type";
print_var name;
print_token eq "=";
print_type_expr type_expr
and print_type_expr = function
TProd prod -> print_cartesian prod
| TSum {value; _} -> print_nsepseq "|" print_variant value
| TRecord t -> print_record_type t
| TApp app -> print_type_app app
| TPar par -> print_type_par par
| TAlias var -> print_var var
| TFun t -> print_fun_type t
and print_fun_type {value; _} =
let domain, arrow, range = value in
print_type_expr domain;
print_token arrow "->";
print_type_expr range
and print_type_app {value; _} =
let type_constr, type_tuple = value in
print_type_tuple type_tuple;
print_var type_constr
and print_type_tuple {lpar; inside; rpar} =
print_token lpar "(";
print_nsepseq "," print_type_expr inside;
print_token rpar ")"
and print_type_par {value={lpar;inside=t;rpar}; _} =
print_token lpar "(";
print_type_expr t;
print_token rpar ")"
and print_path Region.{value; _} =
let {module_proj; value_proj} = value in
let () =
match module_proj with
None -> ()
| Some (name, dot) ->
print_uident name;
print_token dot "."
in print_nsepseq "." print_selection value_proj
and print_selection = function
Name id -> print_var id
| Component {value; _} ->
let {lpar; inside; rpar} = value in
let Region.{value=lexeme,z; region} = inside in
print_token lpar "(";
print_token region
(sprintf "Int %s (%s)" lexeme (Z.to_string z));
print_token rpar ")"
and print_cartesian Region.{value;_} =
print_nsepseq "*" print_type_expr value
and print_variant {value = {constr; args}; _} =
print_uident constr;
match args with
None -> ()
| Some (kwd_of, cartesian) ->
print_token kwd_of "of";
print_cartesian cartesian
and print_record_type record_type =
print_injection print_field_decl record_type
and print_field_decl {value; _} =
let {field_name; colon; field_type} = value
in print_var field_name;
print_token colon ":";
print_type_expr field_type
and print_injection :
'a.('a -> unit) -> 'a injection reg -> unit =
fun print {value; _} ->
let {opening; elements; terminator; closing} = value in
print_opening opening;
print_sepseq ";" print elements;
print_terminator terminator;
print_closing closing
and print_opening = function
Begin region -> print_token region "begin"
| LBrace region -> print_token region "{"
and print_closing = function
End region -> print_token region "end"
| RBrace region -> print_token region "}"
and print_terminator = function
Some semi -> print_token semi ";"
| None -> ()
and print_let_bindings undo = print_nsepseq "and" (print_let_binding undo)
and print_let_binding undo {pattern; lhs_type; eq; let_rhs} =
print_pattern pattern;
(match lhs_type with
None -> ()
| Some (colon, type_expr) ->
print_token colon ":";
print_type_expr type_expr);
if undo then
match unparse let_rhs with
`Let (patterns, eq, e) ->
Utils.nseq_iter print_pattern patterns;
print_token eq "=";
print_expr undo e
| `Fun (kwd_fun, (patterns, arrow, e)) ->
print_token eq "=";
print_token kwd_fun "fun";
Utils.nseq_iter print_pattern patterns;
print_token arrow "->";
print_expr undo e
| `Idem _ ->
print_token eq "="; print_expr undo let_rhs
else (print_token eq "="; print_expr undo let_rhs)
and print_pattern = function
PTuple {value=patterns;_} -> print_csv print_pattern patterns
| PList {value={lbracket; inside=patterns; rbracket}; _} ->
print_token lbracket "[";
print_ssv print_pattern patterns;
print_token rbracket "]"
| PVar {region; value} ->
Printf.printf "%s: PVar %s\n" (region#compact `Byte) value
| PUnit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")"
| PInt {region; value=lex,z} ->
print_token region (sprintf "PInt %s (%s)" lex (Z.to_string z))
| PTrue kwd_true -> print_token kwd_true "true"
| PFalse kwd_false -> print_token kwd_false "false"
| PString s -> print_str s
| PWild wild -> print_token wild "_"
| PCons {value=p1,c,p2; _} ->
print_pattern p1; print_token c "::"; print_pattern p2
| PPar {value={lpar;inside=p;rpar}; _} ->
print_token lpar "("; print_pattern p; print_token rpar ")"
| PConstr p -> print_constr_pattern p
| PRecord r -> print_record_pattern r
| PTyped t -> print_typed_pattern t
and print_typed_pattern {value; _} =
let {pattern; colon; type_expr} = value in
print_pattern pattern;
print_token colon ":";
print_type_expr type_expr
and print_record_pattern record_pattern =
print_injection print_field_pattern record_pattern
and print_field_pattern {value; _} =
let {field_name; eq; pattern} = value in
print_var field_name;
print_token eq "=";
print_pattern pattern
and print_constr_pattern {value=constr, p_opt; _} =
print_uident constr;
match p_opt with
None -> ()
| Some pattern -> print_pattern pattern
and print_expr undo = function
LetIn {value;_} -> print_let_in undo value
| If cond -> print_conditional undo cond
| ETuple {value;_} -> print_csv (print_expr undo) value
| Match {value;_} -> print_match_expr undo value
| Fun {value=(kwd_fun,_,_,_) as f; _} as e ->
if undo then
let patterns, arrow, expr = unparse' e in
print_token kwd_fun "fun";
Utils.nseq_iter print_pattern patterns;
print_token arrow "->";
print_expr undo expr
else print_fun_expr undo f
| Cons {value=e1,cons,e2; _} ->
print_expr undo e1; print_token cons "::"; print_expr undo e2
| ELogic e -> print_logic_expr undo e
| EArith e -> print_arith_expr undo e
| EString e -> print_string_expr undo e
| Call {value=e1,e2; _} -> print_expr undo e1; print_expr undo e2
| Path p -> print_path p
| Unit {value=lpar,rpar; _} ->
print_token lpar "("; print_token rpar ")"
| Par {value={lpar;inside=e;rpar}; _} ->
print_token lpar "("; print_expr undo e; print_token rpar ")"
| EList {value={lbracket; inside=ssv; rbracket}; _} ->
print_token lbracket "["; print_ssv (print_expr undo) ssv; print_token rbracket "]"
| Seq seq -> print_sequence undo seq
| ERecord e -> print_record_expr undo e
| Append {value=e1,append,e2; _} ->
print_expr undo e1; print_token append "@"; print_expr undo e2
| EConstr constr -> print_uident constr
and print_arith_expr undo = function
Add {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "+"; print_expr undo arg2
| Sub {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "-"; print_expr undo arg2
| Mult {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "*"; print_expr undo arg2
| Div {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "/"; print_expr undo arg2
| Mod {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "mod"; print_expr undo arg2
| Neg {value={op;arg}; _} -> print_token op "-"; print_expr undo arg
| Int {region; value=lex,z} ->
print_token region (sprintf "Int %s (%s)" lex (Z.to_string z))
| Mtz {region; value=lex,z} ->
print_token region (sprintf "Mtz %s (%s)" lex (Z.to_string z))
| Nat {region; value=lex,z} ->
print_token region (sprintf "Nat %s (%s)" lex (Z.to_string z))
and print_string_expr undo = function
Cat {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "^"; print_expr undo arg2
| String s -> print_str s
and print_logic_expr undo = function
BoolExpr e -> print_bool_expr undo e
| CompExpr e -> print_comp_expr undo e
and print_bool_expr undo = function
Or {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "||"; print_expr undo arg2
| And {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "&&"; print_expr undo arg2
| Not {value={op;arg}; _} -> print_token op "not"; print_expr undo arg
| True kwd_true -> print_token kwd_true "true"
| False kwd_false -> print_token kwd_false "false"
and print_comp_expr undo = function
Lt {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "<"; print_expr undo arg2
| Leq {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "<="; print_expr undo arg2
| Gt {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op ">"; print_expr undo arg2
| Geq {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op ">="; print_expr undo arg2
| Neq {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "<>"; print_expr undo arg2
| Equal {value={arg1;op;arg2}; _} ->
print_expr undo arg1; print_token op "="; print_expr undo arg2
and print_record_expr undo e =
print_injection (print_field_assignment undo) e
and print_field_assignment undo {value; _} =
let {field_name; assignment; field_expr} = value in
print_var field_name;
print_token assignment "=";
print_expr undo field_expr
and print_sequence undo seq = print_injection (print_expr undo) seq
and print_match_expr undo (kwd_match, expr, kwd_with, (_,cases)) =
print_token kwd_match "match";
print_expr undo expr;
print_token kwd_with "with";
print_bsv (print_case undo) cases;
print_token Region.ghost "end"
and print_case undo (pattern, arrow, expr) =
print_pattern pattern;
print_token arrow "->";
print_expr undo expr
and print_let_in undo (kwd_let, let_bindings, kwd_in, expr) =
print_token kwd_let "let";
print_let_bindings undo let_bindings;
print_token kwd_in "in";
print_expr undo expr
and print_fun_expr undo (kwd_fun, rvar, arrow, expr) =
print_token kwd_fun "fun";
print_var rvar;
print_token arrow "->";
print_expr undo expr
and print_conditional undo = function
IfThenElse Region.{value=kwd_if, e1, kwd_then, e2, kwd_else, e3; _} ->
print_token Region.ghost "(";
print_token kwd_if "if";
print_expr undo e1;
print_token kwd_then "then";
print_expr undo e2;
print_token kwd_else "else";
print_expr undo e3;
print_token Region.ghost ")"
| IfThen Region.{value=kwd_if, e1, kwd_then, e2; _} ->
print_token Region.ghost "(";
print_token kwd_if "if";
print_expr undo e1;
print_token kwd_then "then";
print_expr undo e2;
print_token Region.ghost ")"

525
src/parser/ligodity/AST.mli Normal file
View File

@ -0,0 +1,525 @@
[@@@warning "-30"]
(* Abstract Syntax Tree (AST) for Mini-ML *)
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
type 'a reg = 'a Region.reg
(* Some keywords of OCaml *)
type keyword = Region.t
type kwd_and = Region.t
type kwd_begin = Region.t
type kwd_else = Region.t
type kwd_end = Region.t
type kwd_false = Region.t
type kwd_fun = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_let = Region.t
type kwd_let_entry = Region.t
type kwd_match = Region.t
type kwd_mod = Region.t
type kwd_not = Region.t
type kwd_of = Region.t
type kwd_or = Region.t
type kwd_then = Region.t
type kwd_true = Region.t
type kwd_type = Region.t
type kwd_with = Region.t
(* Symbols *)
type arrow = Region.t (* "->" *)
type cons = Region.t (* "::" *)
type cat = Region.t (* "^" *)
type append = Region.t (* "@" *)
type dot = Region.t (* "." *)
(* Arithmetic operators *)
type minus = Region.t (* "-" *)
type plus = Region.t (* "+" *)
type slash = Region.t (* "/" *)
type times = Region.t (* "*" *)
(* Boolean operators *)
type bool_or = Region.t (* "||" *)
type bool_and = Region.t (* "&&" *)
(* Comparisons *)
type equal = Region.t (* "=" *)
type neq = Region.t (* "<>" *)
type lt = Region.t (* "<" *)
type gt = Region.t (* ">" *)
type leq = Region.t (* "=<" *)
type geq = Region.t (* ">=" *)
(* Compounds *)
type lpar = Region.t (* "(" *)
type rpar = Region.t (* ")" *)
type lbracket = Region.t (* "[" *)
type rbracket = Region.t (* "]" *)
type lbrace = Region.t (* "{" *)
type rbrace = Region.t (* "}" *)
(* Separators *)
type comma = Region.t (* "," *)
type semi = Region.t (* ";" *)
type vbar = Region.t (* "|" *)
type colon = Region.t
(* Wildcard *)
type wild = Region.t (* "_" *)
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type type_constr = string reg
type constr = string reg
(* Parentheses *)
type 'a par = {
lpar : lpar;
inside : 'a;
rpar : rpar
}
type the_unit = lpar * rpar
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* The Abstract Syntax Tree (finally) *)
type t = {
decl : declaration Utils.nseq;
eof : eof
}
and ast = t
and eof = Region.t
and declaration =
Let of (kwd_let * let_bindings) reg (* let p = e and ... *)
| LetEntry of (kwd_let_entry * let_binding) reg (* let%entry p = e and ... *)
| TypeDecl of type_decl reg (* type ... *)
(* Non-recursive values *)
and let_bindings =
(let_binding, kwd_and) Utils.nsepseq (* p1 = e1 and p2 = e2 ... *)
and let_binding = { (* p = e p : t = e *)
pattern : pattern;
lhs_type : (colon * type_expr) option;
eq : equal;
let_rhs : expr
}
(* Recursive types *)
and type_decl = {
kwd_type : kwd_type;
name : type_name;
eq : equal;
type_expr : type_expr
}
and type_expr =
TProd of cartesian
| TSum of (variant reg, vbar) Utils.nsepseq reg
| TRecord of record_type
| TApp of (type_constr * type_tuple) reg
| TFun of (type_expr * arrow * type_expr) reg
| TPar of type_expr par reg
| TAlias of variable
and cartesian = (type_expr, times) Utils.nsepseq reg
and variant = {
constr : constr;
args : (kwd_of * cartesian) option
}
and record_type = field_decl reg injection reg
and field_decl = {
field_name : field_name;
colon : colon;
field_type : type_expr
}
and type_tuple = (type_expr, comma) Utils.nsepseq par
and 'a injection = {
opening : opening;
elements : ('a, semi) Utils.sepseq;
terminator : semi option;
closing : closing
}
and opening =
Begin of kwd_begin
| LBrace of lbrace
and closing =
End of kwd_end
| RBrace of rbrace
and pattern =
PTuple of (pattern, comma) Utils.nsepseq reg (* p1, p2, ... *)
| PList of (pattern, semi) Utils.sepseq brackets reg (* [p1; p2; ...] *)
| PVar of variable (* x *)
| PUnit of the_unit reg (* () *)
| PInt of (string * Z.t) reg (* 7 *)
| PTrue of kwd_true (* true *)
| PFalse of kwd_false (* false *)
| PString of string reg (* "foo" *)
| PWild of wild (* _ *)
| PCons of (pattern * cons * pattern) reg (* p1 :: p2 *)
| PPar of pattern par reg (* (p) *)
| PConstr of (constr * pattern option) reg (* A B(3,"") *)
| PRecord of record_pattern (* {a=...; ...} *)
| PTyped of typed_pattern reg (* (x : int) *)
and typed_pattern = {
pattern : pattern;
colon : colon;
type_expr : type_expr
}
and record_pattern = field_pattern reg injection reg
and field_pattern = {
field_name : field_name;
eq : equal;
pattern : pattern
}
and expr =
LetIn of let_in reg (* let p1 = e1 and p2 = e2 and ... in e *)
| Fun of fun_expr (* fun x -> e *)
| If of conditional (* if e1 then e2 else e3 *)
| ETuple of (expr, comma) Utils.nsepseq reg (* e1, e2, ... *)
| Match of match_expr reg (* p1 -> e1 | p2 -> e2 | ... *)
| Seq of sequence (* begin e1; e2; ... ; en end *)
| ERecord of record_expr (* {f1=e1; ... } *)
| Append of (expr * append * expr) reg (* e1 @ e2 *)
| Cons of (expr * cons * expr) reg (* e1 :: e2 *)
| ELogic of logic_expr
| EArith of arith_expr
| EString of string_expr
| Call of (expr * expr) reg (* f e *)
| Path of path reg (* x x.y.z *)
| Unit of the_unit reg (* () *)
| Par of expr par reg (* (e) *)
| EList of (expr, semi) Utils.sepseq brackets reg (* [e1; e2; ...] *)
| EConstr of constr
(*| Extern of extern*)
and string_expr =
Cat of cat bin_op reg (* e1 ^ e2 *)
| String of string reg (* "foo" *)
and arith_expr =
Add of plus bin_op reg (* e1 + e2 *)
| Sub of minus bin_op reg (* e1 - e2 *)
| Mult of times bin_op reg (* e1 * e2 *)
| Div of slash bin_op reg (* e1 / e2 *)
| Mod of kwd_mod bin_op reg (* e1 mod e2 *)
| Neg of minus un_op reg (* -e *)
| Int of (string * Z.t) reg (* 12345 *)
| Nat of (string * Z.t) reg (* 3p *)
| Mtz of (string * Z.t) reg (* 1.00tz 3tz *)
and logic_expr =
BoolExpr of bool_expr
| CompExpr of comp_expr
and bool_expr =
Or of kwd_or bin_op reg
| And of kwd_and bin_op reg
| Not of kwd_not un_op reg
| True of kwd_true
| False of kwd_false
and 'a bin_op = {
op : 'a;
arg1 : expr;
arg2 : expr
}
and 'a un_op = {
op : 'a;
arg : expr
}
and comp_expr =
Lt of lt bin_op reg
| Leq of leq bin_op reg
| Gt of gt bin_op reg
| Geq of geq bin_op reg
| Equal of equal bin_op reg
| Neq of neq bin_op reg
(*
| Lt of (expr * lt * expr) reg
| LEq of (expr * le * expr) reg
| Gt of (expr * gt * expr) reg
| GEq of (expr * ge * expr) reg
| NEq of (expr * ne * expr) reg
| Eq of (expr * eq * expr) reg
*)
and path = {
module_proj : (constr * dot) option;
value_proj : (selection, dot) Utils.nsepseq
}
and selection =
Name of variable
| Component of (string * Z.t) reg par reg
and record_expr = field_assignment reg injection reg
and field_assignment = {
field_name : field_name;
assignment : equal;
field_expr : expr
}
and sequence = expr injection reg
and match_expr = kwd_match * expr * kwd_with * cases
and cases =
vbar option * (pattern * arrow * expr, vbar) Utils.nsepseq
and let_in = kwd_let * let_bindings * kwd_in * expr
and fun_expr = (kwd_fun * variable * arrow * expr) reg
and conditional =
IfThen of (kwd_if * expr * kwd_then * expr) reg
| IfThenElse of (kwd_if * expr * kwd_then * expr * kwd_else * expr) reg
(*
and extern =
Cast of cast_expr
| Print of print_expr
| Scanf of scanf_expr
| PolyEq of (variable * variable) (* polymorphic equality *)
and cast_expr =
StringOfInt of variable (* string_of_int x *)
| StringOfBool of variable (* string_of_bool x *)
and print_expr =
PrintString of variable (* print_string x *)
| PrintInt of variable (* print_int x *)
and scanf_expr =
ScanfString of variable (* scanf_string x *)
| ScanfInt of variable (* scanf_int x *)
| ScanfBool of variable (* scanf_bool x *)
*)
(* Normalising nodes of the AST so the interpreter is more uniform and
no source regions are lost in order to enable all manner of
source-to-source transformations from the rewritten AST and the
initial source.
The first kind of expressions to be normalised is lambdas, like:
fun a -> fun b -> a
fun a b -> a
fun a (b,c) -> a
to become
fun a -> fun b -> a
fun a -> fun b -> a
fun a -> fun x -> let (b,c) = x in a
The second kind is let-bindings introducing functions without the
"fun" keyword, like
let g a b = a
let h a (b,c) = a
which become
let g = fun a -> fun b -> a
let h = fun a -> fun x -> let (b,c) = x in a
The former is actually a subcase of the latter. Indeed, the general
shape of the former is
fun <patterns> -> <expr>
and the latter is
let <ident> <patterns> = <expr>
The isomorphic parts are "<patterns> -> <expr>" and "<patterns> =
<expr>".
The call [norm patterns sep expr], where [sep] is a region either
of an "->" or a "=", evaluates in a function expression (lambda),
as expected. In order to get the regions right in the case of
lambdas, additional regions are passed: [norm ~reg:(total,kwd_fun)
patterns sep expr], where [total] is the region for the whole
lambda (even if the resulting lambda is actually longer: we want to
keep the region of the original), and the region of the original
"fun" keyword.
*)
type sep = Region.t
val norm : ?reg:(Region.t * kwd_fun) -> pattern Utils.nseq -> sep -> expr -> fun_expr
(* Undoing the above rewritings (for debugging by comparison with the
lexer, and to feed the source-to-source transformations with only
tokens that originated from the original input.
Unparsing is performed on an expression which is expected to be a
series "fun ... -> fun ... -> ...". Either this expression is the
right-hand side of a let, or it is not. These two cases are
distinguished by the function [unparse], depending on the first
keyword "fun" being concrete or ghostly (virtual). In the former
case, we are unparsing an expression which was originally starting
with "fun"; in the latter, we are unparsing an expression that was
parsed on the right-hand side of a let construct. In other words,
in the former case, we expect to reconstruct
let f p_1 ... p_n = e
whereas, in the second case, we want to obtain
fun p_1 ... p_n -> e
In any case, the heart of the unparsing is the same, and this is
why the data constructors [`Fun] and [`Let] of the type [unparsed]
share a common type: [pattern * Region.t * expr], the region can
either actually denote the alias type [arrow] or [eq]. Let us
assume a value of this triple [patterns, separator_region,
expression]. Then the context (handled by [unparse]) decides if
[separator_region] is the region of a "=" sign or "->".
There are two forms to be unparsed:
fun x_1 -> let p_1 = x_1 in ... fun x_n -> let p_n = x_n in e
or
fun p_1 -> ... fun p_n -> e
in the first case, the rightmost "=" becomes [separator_region]
above, whereas, in the second case, it is the rightmost "->".
Here are some example covering all cases:
let rec f = fun a -> fun b -> a
let rec g = fun a b -> a
let rec h = fun a (b,c) -> a
let rec fst = fun (x,_) -> x
let rec g a b = a
let rec h (b,c) a (d,e) = a
let len = (fun n _ -> n)
let f l = let n = l in n
*)
type unparsed = [
`Fun of (kwd_fun * (pattern Utils.nseq * arrow * expr))
| `Let of (pattern Utils.nseq * equal * expr)
| `Idem of expr
]
val unparse : expr -> unparsed
(* Conversions to type [string] *)
(*
val to_string : t -> string
val pattern_to_string : pattern -> string
*)
(* Printing the tokens reconstructed from the AST. This is very useful
for debugging, as the output of [print_token ast] can be textually
compared to that of [Lexer.trace] (see module [LexerMain]). The
optional parameter [undo] is bound to [true] if the caller wants
the AST to be unparsed before printing (those nodes that have been
normalised with function [norm_let] and [norm_fun]). *)
val print_tokens : ?undo:bool -> ast -> unit
(* Projecting regions from sundry nodes of the AST. See the first
comment at the beginning of this file. *)
val region_of_pattern : pattern -> Region.t
val region_of_expr : expr -> Region.t
(* Removing all outermost parentheses from a given expression *)
val rm_par : expr -> expr
(* Predicates on expressions *)
val is_var : expr -> bool
val is_call : expr -> bool
val is_fun : expr -> bool
(* Variables *)
(*
module Vars : Set.S with type elt = string
module FreeVars : Set.S with type elt = variable
(* The value of the call [vars t] is a pair of sets: the first is the
set of variables whose definitions are in the scope at the end of
the program corresponding to the AST [t], the second is the set of
free variables in that same AST.
Computing free variables is useful because we do not want to
escape a variable that is a predefined variable in OCaml, when we
translate the program to OCaml: this way, we make sure that an
unbound variable is caught before the translation (where it would
be wrongly captured by the OCaml compiler).
Dually, computing bound variables is useful when compiling to
OCaml.
*)
val vars : t -> Vars.t * FreeVars.t
*)

View File

@ -0,0 +1,148 @@
(* Parsing the command-line option for the Mini-ML compiler/interpreter *)
let abort msg =
Utils.highlight (Printf.sprintf "Command-line error: %s" msg); exit 1
let printf = Printf.printf
let sprintf = Printf.sprintf
(* Help *)
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.ml | \"-\"]\n" file;
print_endline "where <input>.mml is the Mini-ML source file (default: stdin),";
print_endline "and each <option> (if any) is one of the following:";
print_endline " -I <paths> Library paths (colon-separated)";
print_endline " -c [<file>.ml] Translate to OCaml in <file>.ml";
print_endline " (default: <input>.ml)";
print_endline " -e, --eval Interpret <input>.mml or stdin";
print_endline " --raw-edits Do not optimise translation edits";
print_endline " --verbose=<phases> Colon-separated phases: cmdline, lexer,";
print_endline " parser, unparsing, norm, eval";
print_endline " --version Short commit hash on stdout";
print_endline " -h, --help This help";
exit 0
(* Version *)
let version () = printf "%s\n" Version.version; exit 0
(* Specifying the command-line options a la GNU *)
let input = ref None
and eval = ref false
and compile = ref None
and verbose = ref Utils.String.Set.empty
and libs = ref []
and raw_edits = ref false
let set_opt var err =
Some (fun x -> if !var = None then var := Some x else raise (Getopt.Error err))
let split_at_colon = Str.(split (regexp ":"))
let add_path p = libs := !libs @ split_at_colon p
let add_verbose d =
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
!verbose
(split_at_colon d)
let specs =
let open! Getopt in [
'I', nolong, None, Some add_path;
'c', nolong, set compile (Some ""),
set_opt compile "Multiple OCaml outputs";
'e', "eval", set eval true, None;
noshort, "raw-edits", set raw_edits true, None;
noshort, "verbose", None, Some add_verbose;
'h', "help", Some help, None;
noshort, "version", Some version, None
]
;;
(* Handler of anonymous arguments *)
let anonymous arg =
match !input with
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
(* Parsing the command-line options *)
let () = try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
(* Checking options *)
let string_of convert = function
None -> "None"
| Some s -> sprintf "Some %s" (convert s)
let string_of_path p =
let apply s a = if a = "" then s else s ^ ":" ^ a
in List.fold_right apply p ""
let quote s = Printf.sprintf "\"%s\"" s
let verb_str =
let apply e a =
if a <> "" then Printf.sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () =
printf "COMMAND LINE\n";
printf "input = %s\n" (string_of quote !input);
printf "compile = %s\n" (string_of quote !compile);
printf "eval = %B\n" !eval;
printf "raw_edits = %b\n" !raw_edits;
printf "verbose = %s\n" verb_str;
printf "libs = %s\n" (string_of_path !libs)
let () = if Utils.String.Set.mem "cmdline" !verbose then print_opt ()
let input =
match !input with
None | Some "-" ->
if !compile <> None then
abort "An input file is missing (for compilation)."
else !input
| Some file_path ->
if Filename.check_suffix file_path ".mml"
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .mml."
let compile =
match !compile with
Some _ when !eval -> abort "Options -e and -c are mutually exclusive."
| None | Some "-" -> !compile
| Some "" ->
(match input with
None | Some "-" -> abort "The target OCaml filename is missing."
| Some file -> Some (Filename.remove_extension file ^ ".ml"))
| Some compile' ->
if Filename.check_suffix compile' ".ml"
then !compile
else abort "The extension of the target OCaml file is not .ml"
(* Exporting remaining options as non-mutable values *)
let eval = !eval
and verbose = !verbose
and libs = !libs
and raw_edits = !raw_edits
let () =
if Utils.String.Set.mem "cmdline" verbose then
begin
printf "\nEXPORTED COMMAND LINE\n";
printf "input = %s\n" (string_of quote input);
printf "compile = %s\n" (string_of quote compile);
printf "eval = %B\n" eval;
printf "raw_edits = %B\n" raw_edits;
printf "verbose = %s\n" verb_str;
printf "I = %s\n" (string_of_path libs)
end

View File

@ -0,0 +1,24 @@
(* Command-line options for the Mini-ML compiler/interpreter *)
(* If the value of [input] is [Some src], the name of the Mini-ML
source file, with the extension ".mml", is [src]. If [input] is
[Some "-"] or [None], the source file is read from standard
input. *)
val input : string option
(* The Mini-ML source file can be processed in two mutually exclusive
manners: if the value [eval] is set to [true], the source is
interpreted; if the value [compile] is not [None], the source is
compiled to OCaml; if [eval] is [false] and [compile] is [None],
nothing is done with the source. Note: if [compile] is [Some "-"],
the compiled code is sent to standard output. *)
val eval : bool
val compile : string option
(* TODO *)
val libs : string list
val verbose : Utils.String.Set.t
val raw_edits : bool

View File

@ -0,0 +1,45 @@
(* Simple lexer for the Mini-ML language *)
(* Error reporting *)
type message = string
exception Error of message Region.reg
(* Tokeniser *)
(* The call [get_token ~log] evaluates in a lexer (a.k.a
tokeniser or scanner) whose type is [Lexing.lexbuf -> Token.t].
The argument [log] is a logger. As its type shows and suggests,
it is a pair made of an output channel and a printer for
tokens. The lexer would use any logger to print the recognised
tokens to the given channel. If no logger is given to [get_token],
no printing takes place while the lexer runs.
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
type logger = out_channel * (out_channel -> Token.t -> unit)
val get_token : ?log:logger -> Lexing.lexbuf -> Token.t
val reset : file:string -> ?line:int -> Lexing.lexbuf -> unit
(* Debugging *)
type file_path = string
val iter :
(Lexing.lexbuf -> out_channel -> Token.t -> unit) -> file_path option -> unit
val trace : file_path option -> unit
val prerr : kind:string -> message Region.reg -> unit
val format_error : kind:string -> message Region.reg -> string
val output_token : Lexing.lexbuf -> out_channel -> Token.t -> unit

View File

@ -0,0 +1,385 @@
(* Lexer specification for Mini-ML, to be processed by [ocamllex]. *)
{
(* START HEADER *)
(* UTILITIES *)
let sprintf = Printf.sprintf
module SMap = Utils.String.Map
(* Making a natural from its decimal notation (for Tez) *)
let format_tz s =
match String.index s '.' with
index ->
let len = String.length s in
let integral = Str.first_chars s index
and fractional = Str.last_chars s (len-index-1) in
let num = Z.of_string (integral ^ fractional)
and den = Z.of_string ("1" ^ String.make (len-index-1) '0')
and million = Q.of_string "1000000" in
let mtz = Q.make num den |> Q.mul million in
let should_be_1 = Q.den mtz in
if Z.equal Z.one should_be_1 then Some (Q.num mtz) else None
| exception Not_found -> assert false
(* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length
[len] containing the [len] characters in the list [p], in reverse
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
let mk_str (len: int) (p: char list) : string =
let bytes = Bytes.make len ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (len-1) p |> Bytes.to_string
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
type thread = {
opening : Region.t;
len : int;
acc : char list
}
let push_char char {opening; len; acc} =
{opening; len=len+1; acc=char::acc}
let push_string str {opening; len; acc} =
{opening;
len = len + String.length str;
acc = explode str acc}
(* LEXICAL ERRORS *)
type message = string
exception Error of message Region.reg
let error lexbuf msg =
let start = Pos.from_byte (Lexing.lexeme_start_p lexbuf)
and stop = Pos.from_byte (Lexing.lexeme_end_p lexbuf) in
let region = Region.make ~start ~stop
in raise (Error Region.{region; value=msg})
let fail region value = raise (Error Region.{region; value})
(* KEYWORDS *)
let keywords = Token.[
"and", Some And;
"begin", Some Begin;
"else", Some Else;
"false", Some False;
"fun", Some Fun;
"if", Some If;
"in", Some In;
"end", Some End;
"let", Some Let;
"match", Some Match;
"mod", Some Mod;
"not", Some Not;
"of", Some Of;
"or", Some Or;
"then", Some Then;
"true", Some True;
"type", Some Type;
"with", Some With;
(* Reserved *)
"as", None;
"asr", None;
"assert", None;
"class", None;
"constraint", None;
"do", None;
"done", None;
"downto", None;
"exception", None;
"external", None;
"for", None;
"function", None;
"functor", None;
"include", None;
"inherit", None;
"initializer", None;
"land", None;
"lazy", None;
"lor", None;
"lsl", None;
"lsr", None;
"lxor", None;
"method", None;
"module", None;
"mutable", None;
"new", None;
"nonrec", None;
"object", None;
"open", None;
"private", None;
"rec", None;
"sig", None;
"struct", None;
"to", None;
"try", None;
"val", None;
"virtual", None;
"when", None;
"while", None
]
let add map (key,value) = SMap.add key value map
let kwd_map = List.fold_left add SMap.empty keywords
(* LEXER ENGINE *)
(* Resetting file name and line number (according to #line directives) *)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line lnum buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = lnum}
let reset ~file ?(line=1) buffer =
reset_file ~file buffer; reset_line line buffer
(* Hack to roll back one lexeme in the current semantic action *)
(*
let rollback lexbuf =
let open Lexing in
let len = String.length (lexeme lexbuf) in
lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - len;
lexbuf.lex_curr_p <-
{lexbuf.lex_curr_p with pos_cnum = lexbuf.lex_curr_p.pos_cnum - len}
*)
(* REGIONS *)
let mk_region start stop =
let start = Pos.from_byte start
and stop = Pos.from_byte stop
in Region.make ~start ~stop
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Auxiliary regular expressions *)
let nl = ['\n' '\r']
let blank = [' ' '\t']
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural
let decimal = digit+ '.' digit+
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let ichar = letter | digit | ['_' '\'']
let ident = small ichar* | '_' ichar+
let uident = capital ichar*
let tparam = "'" ident (* Type parameters. Unused yet *)
let hexa = digit | ['A'-'F']
let byte = hexa hexa
let esc = "\\n" | "\\\\" | "\\b" | "\\r" | "\\t"
let schar = [^'"' '\\'] # nl (* TODO: Test *)
| "\\\"" | esc | "\\x" byte | "\\0" digit digit
let string = '"' schar* '"'
let char_set = [^'\'' '\\'] # nl (* TODO: Test *)
| "\\'" | esc | "\\x" byte | "\\0" digit digit
let char = "'" char_set "'"
(* Rules *)
rule scan = parse
nl { Lexing.new_line lexbuf; scan lexbuf }
| blank+ { scan lexbuf }
| "->" { Token.ARROW }
| "::" { Token.CONS }
| "^" { Token.CAT }
| "@" { Token.APPEND }
| "=" { Token.EQ }
| "<>" { Token.NE }
| "<" { Token.LT }
| ">" { Token.GT }
| "<=" { Token.LE }
| ">=" { Token.GE }
| "&&" { Token.BOOL_AND }
| "||" { Token.BOOL_OR }
| "-" { Token.MINUS }
| "+" { Token.PLUS }
| "/" { Token.SLASH }
| "*" { Token.TIMES }
| "(" { Token.LPAR }
| ")" { Token.RPAR }
| "[" { Token.LBRACK }
| "]" { Token.RBRACK }
| "{" { Token.LBRACE }
| "}" { Token.RBRACE }
| "," { Token.COMMA }
| ";" { Token.SEMI }
| ":" { Token.COLON }
| "|" { Token.VBAR }
| "." { Token.DOT }
| "_" { Token.WILD }
| eof { Token.EOF }
| integer as n { Token.Int (n, Z.of_string n) }
| integer as n "p" { Token.Nat (n ^ "p", Z.of_string n) }
| integer as tz "tz" { Token.Mtz (tz ^ "tz", Z.of_string tz) }
| decimal as tz "tz" {
match format_tz tz with
Some z -> Token.Mtz (tz ^ "tz", z)
| None -> sprintf "Invalid tez amount." |> error lexbuf }
| uident as id { Token.Constr id }
| "let%init" { Token.Let }
| "let%entry" { Token.LetEntry }
| "match%nat" { Token.MatchNat }
| ident as id {
match SMap.find id kwd_map with
None -> sprintf "Reserved name \"%s\"." id |> error lexbuf
| Some kwd -> kwd
| exception Not_found -> Token.Ident id }
| '"' { let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let opening = mk_region start stop in
let thread = {opening; len=1; acc=['"']} in
let thread = scan_string thread lexbuf in
let lexeme = mk_str thread.len thread.acc in
let () = lexbuf.Lexing.lex_start_p <- start
in Token.Str lexeme }
| "(*" { let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let opening = mk_region start stop in
let thread = {opening; len=2; acc=['*';'(']} in
let thread = scan_block thread lexbuf in
let () = ignore thread
in scan lexbuf }
| _ as c { let msg = sprintf "Invalid character '%s'."
(Char.escaped c)
in error lexbuf msg }
(* Finishing a string *)
and scan_string thread = parse
nl { fail thread.opening "Broken string." }
| eof { fail thread.opening "Unterminated string." }
| '"' { push_char '"' thread }
| esc as s { scan_string (push_string s thread) lexbuf }
| '\\' _ { let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let region = mk_region start stop
in fail region "Undefined escape sequence." }
| _ as c { scan_string (push_char c thread) lexbuf }
(* Comments *)
and scan_block thread = parse
'"' | "(*" {
let opening = thread.opening in
let start = Lexing.lexeme_start_p lexbuf
and stop = Lexing.lexeme_end_p lexbuf in
let opening' = mk_region start stop in
let lexeme = Lexing.lexeme lexbuf in
let thread = push_string lexeme thread in
let thread = {thread with opening=opening'} in
let next = if lexeme = "\"" then scan_string
else scan_block in
let thread = next thread lexbuf in
let thread = {thread with opening}
in scan_block thread lexbuf }
| "*)" { push_string (Lexing.lexeme lexbuf) thread }
| nl { Lexing.new_line lexbuf; scan_block thread lexbuf }
| eof { fail thread.opening "Open comment." }
| _ as c { scan_block (push_char c thread) lexbuf }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
type logger = out_channel * (out_channel -> Token.t -> unit)
let get_token ?log =
match log with
None -> scan
| Some (out_chan, print) ->
let print = print out_chan in
fun buffer -> let t = scan buffer in print t; flush out_chan; t
(* Standalone lexer for debugging purposes *)
(* TODO: Move out (functor). See LIGO. *)
let format_error ~(kind: string) Region.{region; value=msg} =
sprintf "%s error in %s:\n%s%!"
kind (region#to_string `Byte) msg
let prerr ~(kind: string) msg =
Utils.highlight (format_error ~kind msg)
type file_path = string
let output_token buffer chan token =
let open Lexing in
let conc = Token.to_string token in
let start = Pos.from_byte buffer.lex_start_p
and stop = Pos.from_byte buffer.lex_curr_p in
Printf.fprintf chan "%s-%s: %s\n%!"
(start#compact `Byte) (stop#compact `Byte) conc
let iter action file_opt =
try
let cin, reset =
match file_opt with
None | Some "-" -> stdin, fun ?(line=1) _ -> ignore line
| Some file -> open_in file, reset ~file in
let buffer = Lexing.from_channel cin in
let rec iter () =
try
let t = scan buffer in
action buffer stdout t;
if t = Token.EOF then (close_in cin; close_out stdout)
else iter ()
with Error diag ->
close_in cin; close_out stdout;
prerr ~kind:"Lexical" diag
in reset buffer; iter ()
with Sys_error msg -> Utils.highlight msg
let trace = iter output_token
(* END TRAILER *)
}

View File

@ -0,0 +1,12 @@
(* Driver for the lexer of Mini-ML *)
(* Error printing and exception tracing *)
Printexc.record_backtrace true;;
(* Running the lexer on the source *)
if Utils.String.Set.mem "lexer" EvalOpt.verbose then
Lexer.trace EvalOpt.input
else Lexer.iter (fun _lexbuf _out _token -> ()) EvalOpt.input
;;

View File

@ -0,0 +1,75 @@
%{
%}
(* Tokens (mirroring those defined in module Token) *)
%token MINUS
%token PLUS
%token SLASH
%token TIMES
%token LPAR
%token RPAR
%token LBRACK
%token RBRACK
%token LBRACE
%token RBRACE
%token ARROW
%token CONS
%token CAT
%token APPEND
%token DOT
%token COMMA
%token SEMI
%token COLON
%token VBAR
%token WILD
%token EQ
%token NE
%token LT
%token GT
%token LE
%token GE
%token BOOL_OR
%token BOOL_AND
%token <string> Ident
%token <string> Constr
%token <string> Str
%token <string * Z.t> Int
%token <string * Z.t> Mtz
%token <string * Z.t> Nat
%token And
%token Begin
%token Else
%token End
%token False
%token Fun
%token If
%token In
%token Let
%token List
%token Map
%token Match
%token Mod
%token Not
%token Of
%token Or
%token Set
%token Then
%token True
%token Type
%token With
%token LetEntry
%token MatchNat
%token EOF
%%

View File

@ -0,0 +1,546 @@
%{
(* START HEADER *)
open AST
(* END HEADER *)
%}
(* Entry points *)
%start program
%type <AST.t> program
%%
(* RULES *)
(* This parser leverages Menhir-specific features, in particular
parametric rules, rule inlining and primitives to get the source
locations of tokens from the lexer engine generated by ocamllex.
We define below two rules, [reg] and [oreg]. The former parses
its argument and returns its synthesised value together with its
region in the source code (that is, start and end positions --- see
module [Region]). The latter discards the value and only returns
the region: this is mostly useful for parsing keywords, because
those can be easily deduced from the AST node and only their source
region has to be recorded there.
*)
%inline reg(X):
X { let start = Pos.from_byte $symbolstartpos
and stop = Pos.from_byte $endpos in
let region = Region.make ~start ~stop
in Region.{region; value=$1} }
%inline oreg(X):
reg(X) { $1.Region.region }
(* Keywords, symbols, literals and virtual tokens *)
kwd(X) : oreg(X) { $1 }
sym(X) : oreg(X) { $1 }
ident : reg(Ident) { $1 }
constr : reg(Constr) { $1 }
string : reg(Str) { $1 }
eof : oreg(EOF) { $1 }
(* The rule [sep_or_term(item,sep)] ("separated or terminated list")
parses a non-empty list of items separated by [sep], and optionally
terminated by [sep]. *)
sep_or_term_list(item,sep):
nsepseq(item,sep) {
$1, None
}
| nseq(item sep {$1,$2}) {
let (first,sep), tail = $1 in
let rec trans (seq, prev_sep as acc) = function
[] -> acc
| (item,next_sep)::others ->
trans ((prev_sep,item)::seq, next_sep) others in
let list, term = trans ([],sep) tail
in (first, List.rev list), Some term }
(* Compound constructs *)
par(X): sym(LPAR) X sym(RPAR) { {lpar=$1; inside=$2; rpar=$3} }
brackets(X): sym(LBRACK) X sym(RBRACK) {
{lbracket=$1; inside=$2; rbracket=$3} }
(* Sequences
Series of instances of the same syntactical category have often to
be parsed, like lists of expressions, patterns etc. The simplest of
all is the possibly empty sequence (series), parsed below by
[seq]. The non-empty sequence is parsed by [nseq]. Note that the
latter returns a pair made of the first parsed item (the parameter
[X]) and the rest of the sequence (possibly empty). This way, the
OCaml typechecker can keep track of this information along the
static control-flow graph. The rule [sepseq] parses possibly empty
sequences of items separated by some token (e.g., a comma), and
rule [nsepseq] is for non-empty such sequences. See module [Utils]
for the types corresponding to the semantic actions of those
rules.
*)
(* Possibly empty sequence of items *)
seq(item):
(**) { [] }
| item seq(item) { $1::$2 }
(* Non-empty sequence of items *)
nseq(item):
item seq(item) { $1,$2 }
(* Non-empty separated sequence of items *)
nsepseq(item,sep):
item { $1, [] }
| item sep nsepseq(item,sep) { let h,t = $3 in $1, ($2,h)::t }
(* Possibly empy separated sequence of items *)
sepseq(item,sep):
(**) { None }
| nsepseq(item,sep) { Some $1 }
(* Helpers *)
type_name : ident { $1 }
field_name : ident { $1 }
module_name : constr { $1 }
struct_name : Ident { $1 }
(* Non-empty comma-separated values (at least two values) *)
tuple(item):
item sym(COMMA) nsepseq(item,sym(COMMA)) { let h,t = $3 in $1,($2,h)::t }
(* Possibly empty semicolon-separated values between brackets *)
list_of(item):
reg(brackets(sepseq(item,sym(SEMI)))) { $1 }
(* Main *)
program:
nseq(declaration) eof { {decl=$1; eof=$2} }
declaration:
reg(kwd(Let) let_bindings {$1,$2}) { Let $1 }
| reg(kwd(LetEntry) let_binding {$1,$2}) { LetEntry $1 }
| reg(type_decl) { TypeDecl $1 }
(* Type declarations *)
type_decl:
kwd(Type) type_name sym(EQ) type_expr {
{kwd_type=$1; name=$2; eq=$3; type_expr=$4} }
type_expr:
cartesian { TProd $1 }
| reg(sum_type) { TSum $1 }
| reg(record_type) { TRecord $1 }
cartesian:
reg(nsepseq(fun_type, sym(TIMES))) { $1 }
fun_type:
core_type { $1 }
| reg(arrow_type) { TFun $1 }
arrow_type:
core_type sym(ARROW) fun_type { $1,$2,$3 }
core_type:
reg(path) {
let {module_proj; value_proj} = $1.value in
let selection_to_string = function
Name ident -> ident.value
| Component {value={inside;_}; _} ->
fst inside.value in
let module_str =
match module_proj with
None -> ""
| Some (constr,_) -> constr.value ^ "." in
let value_str =
Utils.nsepseq_to_list value_proj
|> List.map selection_to_string
|> String.concat "." in
let alias = module_str ^ value_str
in TAlias {$1 with value=alias}
}
| reg(core_type type_constr {$1,$2}) {
let arg, constr = $1.value in
let lpar, rpar = Region.ghost, Region.ghost in
let arg = {lpar; inside=arg,[]; rpar} in
TApp Region.{$1 with value = constr, arg}
}
| reg(type_tuple type_constr {$1,$2}) {
let arg, constr = $1.value in
TApp Region.{$1 with value = constr, arg}
}
| reg(par(cartesian)) {
let Region.{region; value={lpar; inside=prod; rpar}} = $1 in
TPar Region.{region; value={lpar; inside = TProd prod; rpar}} }
type_constr:
type_name { $1 }
| kwd(Set) { Region.{value="set"; region=$1} }
| kwd(Map) { Region.{value="map"; region=$1} }
| kwd(List) { Region.{value="list"; region=$1} }
type_tuple:
par(tuple(type_expr)) { $1 }
sum_type:
ioption(sym(VBAR)) nsepseq(reg(variant), sym(VBAR)) { $2 }
variant:
constr kwd(Of) cartesian { {constr=$1; args = Some ($2,$3)} }
| constr { {constr=$1; args=None} }
record_type:
sym(LBRACE) sep_or_term_list(reg(field_decl),sym(SEMI))
sym(RBRACE) {
let elements, terminator = $2 in {
opening = LBrace $1;
elements = Some elements;
terminator;
closing = RBrace $3} }
field_decl:
field_name sym(COLON) type_expr {
{field_name=$1; colon=$2; field_type=$3} }
(* Non-recursive definitions *)
let_bindings:
nsepseq(let_binding, kwd(And)) { $1 }
let_binding:
ident nseq(sub_irrefutable) option(type_annotation) sym(EQ) expr {
let let_rhs = Fun (norm $2 $4 $5) in
{pattern = PVar $1; lhs_type=$3; eq = Region.ghost; let_rhs}
}
| irrefutable option(type_annotation) sym(EQ) expr {
{pattern=$1; lhs_type=$2; eq=$3; let_rhs=$4} }
type_annotation:
sym(COLON) type_expr { $1,$2 }
(* Patterns *)
irrefutable:
reg(tuple(sub_irrefutable)) { PTuple $1 }
| sub_irrefutable { $1 }
sub_irrefutable:
ident { PVar $1 }
| sym(WILD) { PWild $1 }
| unit { PUnit $1 }
| reg(par(closed_irrefutable)) { PPar $1 }
closed_irrefutable:
reg(tuple(sub_irrefutable)) { PTuple $1 }
| sub_irrefutable { $1 }
| reg(constr_pattern) { PConstr $1 }
| reg(typed_pattern) { PTyped $1 }
typed_pattern:
irrefutable sym(COLON) type_expr {
{pattern=$1; colon=$2; type_expr=$3} }
pattern:
reg(sub_pattern sym(CONS) tail {$1,$2,$3}) { PCons $1 }
| reg(tuple(sub_pattern)) { PTuple $1 }
| core_pattern { $1 }
sub_pattern:
reg(par(tail)) { PPar $1 }
| core_pattern { $1 }
core_pattern:
ident { PVar $1 }
| sym(WILD) { PWild $1 }
| unit { PUnit $1 }
| reg(Int) { PInt $1 }
| kwd(True) { PTrue $1 }
| kwd(False) { PFalse $1 }
| string { PString $1 }
| reg(par(ptuple)) { PPar $1 }
| list_of(tail) { PList $1 }
| reg(constr_pattern) { PConstr $1 }
| reg(record_pattern) { PRecord $1 }
record_pattern:
sym(LBRACE)
sep_or_term_list(reg(field_pattern),sym(SEMI))
sym(RBRACE) {
let elements, terminator = $2 in
{opening = LBrace $1;
elements = Some elements;
terminator;
closing = RBrace $3} }
field_pattern:
field_name sym(EQ) sub_pattern {
{field_name=$1; eq=$2; pattern=$3} }
constr_pattern:
constr sub_pattern { $1, Some $2 }
| constr { $1, None }
ptuple:
reg(tuple(tail)) { PTuple $1 }
unit:
reg(sym(LPAR) sym(RPAR) {$1,$2}) { $1 }
tail:
reg(sub_pattern sym(CONS) tail {$1,$2,$3}) { PCons $1 }
| sub_pattern { $1 }
(* Expressions *)
expr:
base_cond__open(expr) { $1 }
| match_expr(base_cond) { Match $1 }
base_cond__open(x):
base_expr(x)
| conditional(x) { $1 }
base_cond:
base_cond__open(base_cond) { $1 }
base_expr(right_expr):
let_expr(right_expr)
| fun_expr(right_expr)
| disj_expr_level { $1 }
| reg(tuple(disj_expr_level)) { ETuple $1 }
conditional(right_expr):
if_then_else(right_expr)
| if_then(right_expr) { If $1 }
if_then(right_expr):
reg(kwd(If) expr kwd(Then) right_expr {$1,$2,$3,$4}) { IfThen $1 }
if_then_else(right_expr):
reg(kwd(If) expr kwd(Then) closed_if kwd(Else) right_expr {
$1,$2,$3,$4,$5,$6 }) { IfThenElse $1 }
base_if_then_else__open(x):
base_expr(x) { $1 }
| if_then_else(x) { If $1 }
base_if_then_else:
base_if_then_else__open(base_if_then_else) { $1 }
closed_if:
base_if_then_else__open(closed_if) { $1 }
| match_expr(base_if_then_else) { Match $1 }
match_expr(right_expr):
reg(kwd(Match) expr kwd(With)
option(sym(VBAR)) cases(right_expr) {
$1,$2,$3, ($4, Utils.nsepseq_rev $5) })
| reg(match_nat(right_expr)) { $1 }
match_nat(right_expr):
kwd(MatchNat) expr kwd(With)
option(sym(VBAR)) cases(right_expr) {
let open Region in
let cast_name = Name {region=ghost; value="assert_pos"} in
let cast_path = {module_proj=None; value_proj=cast_name,[]} in
let cast_fun = Path {region=ghost; value=cast_path} in
let cast = Call {region=ghost; value=cast_fun,$2}
in $1, cast, $3, ($4, Utils.nsepseq_rev $5) }
cases(right_expr):
case(right_expr) { $1, [] }
| cases(base_cond) sym(VBAR) case(right_expr) {
let h,t = $1 in $3, ($2,h)::t }
case(right_expr):
pattern sym(ARROW) right_expr { $1,$2,$3 }
let_expr(right_expr):
reg(kwd(Let) let_bindings kwd(In) right_expr {$1,$2,$3,$4}) {
LetIn $1
}
fun_expr(right_expr):
reg(kwd(Fun) nseq(irrefutable) sym(ARROW) right_expr {$1,$2,$3,$4}) {
let Region.{region; value = kwd_fun, patterns, arrow, expr} = $1
in Fun (norm ~reg:(region, kwd_fun) patterns arrow expr) }
disj_expr_level:
reg(disj_expr) { ELogic (BoolExpr (Or $1)) }
| conj_expr_level { $1 }
bin_op(arg1,op,arg2):
arg1 op arg2 { {arg1=$1; op=$2; arg2=$3} }
un_op(op,arg):
op arg { {op=$1; arg=$2} }
disj_expr:
bin_op(disj_expr_level, sym(BOOL_OR), conj_expr_level)
| bin_op(disj_expr_level, kwd(Or), conj_expr_level) { $1 }
conj_expr_level:
reg(conj_expr) { ELogic (BoolExpr (And $1)) }
| comp_expr_level { $1 }
conj_expr:
bin_op(conj_expr_level, sym(BOOL_AND), comp_expr_level) { $1 }
comp_expr_level:
reg(lt_expr) { ELogic (CompExpr (Lt $1)) }
| reg(le_expr) { ELogic (CompExpr (Leq $1)) }
| reg(gt_expr) { ELogic (CompExpr (Gt $1)) }
| reg(ge_expr) { ELogic (CompExpr (Geq $1)) }
| reg(eq_expr) { ELogic (CompExpr (Equal $1)) }
| reg(ne_expr) { ELogic (CompExpr (Neq $1)) }
| cat_expr_level { $1 }
lt_expr:
bin_op(comp_expr_level, sym(LT), cat_expr_level) { $1 }
le_expr:
bin_op(comp_expr_level, sym(LE), cat_expr_level) { $1 }
gt_expr:
bin_op(comp_expr_level, sym(GT), cat_expr_level) { $1 }
ge_expr:
bin_op(comp_expr_level, sym(GE), cat_expr_level) { $1 }
eq_expr:
bin_op(comp_expr_level, sym(EQ), cat_expr_level) { $1 }
ne_expr:
bin_op(comp_expr_level, sym(NE), cat_expr_level) { $1 }
cat_expr_level:
reg(cat_expr) { EString (Cat $1) }
| reg(append_expr) { Append $1 }
| cons_expr_level { $1 }
cat_expr:
bin_op(cons_expr_level, sym(CAT), cat_expr_level) { $1 }
append_expr:
cons_expr_level sym(APPEND) cat_expr_level { $1,$2,$3 }
cons_expr_level:
reg(cons_expr) { Cons $1 }
| add_expr_level { $1 }
cons_expr:
add_expr_level sym(CONS) cons_expr_level { $1,$2,$3 }
add_expr_level:
reg(plus_expr) { EArith (Add $1) }
| reg(minus_expr) { EArith (Sub $1) }
| mult_expr_level { $1 }
plus_expr:
bin_op(add_expr_level, sym(PLUS), mult_expr_level) { $1 }
minus_expr:
bin_op(add_expr_level, sym(MINUS), mult_expr_level) { $1 }
mult_expr_level:
reg(times_expr) { EArith (Mult $1) }
| reg(div_expr) { EArith (Div $1) }
| reg(mod_expr) { EArith (Mod $1) }
| unary_expr_level { $1 }
times_expr:
bin_op(mult_expr_level, sym(TIMES), unary_expr_level) { $1 }
div_expr:
bin_op(mult_expr_level, sym(SLASH), unary_expr_level) { $1 }
mod_expr:
bin_op(mult_expr_level, kwd(Mod), unary_expr_level) { $1 }
unary_expr_level:
reg(uminus_expr) { EArith (Neg $1) }
| reg(not_expr) { ELogic (BoolExpr (Not $1)) }
| call_expr_level { $1 }
uminus_expr:
un_op(sym(MINUS), core_expr) { $1 }
not_expr:
un_op(kwd(Not), core_expr) { $1 }
call_expr_level:
reg(call_expr) { Call $1 }
| core_expr { $1 }
call_expr:
call_expr_level core_expr { $1,$2 }
core_expr:
reg(Int) { EArith (Int $1) }
| reg(Mtz) { EArith (Mtz $1) }
| reg(Nat) { EArith (Nat $1) }
| reg(path) { Path $1 }
| string { EString (String $1) }
| unit { Unit $1 }
| kwd(False) { ELogic (BoolExpr (False $1)) }
| kwd(True) { ELogic (BoolExpr ( True $1)) }
| list_of(expr) { EList $1 }
| reg(par(expr)) { Par $1 }
| constr { EConstr $1 }
| reg(sequence) { Seq $1 }
| reg(record_expr) { ERecord $1 }
path:
reg(struct_name) sym(DOT) nsepseq(selection,sym(DOT)) {
let head, tail = $3 in
let seq = Name $1, ($2,head)::tail
in {module_proj=None; value_proj=seq}
}
| module_name sym(DOT) nsepseq(selection,sym(DOT)) {
{module_proj = Some ($1,$2); value_proj=$3}
}
| ident {
{module_proj = None; value_proj = Name $1, []} }
selection:
ident { Name $1 }
| reg(par(reg(Int))) { Component $1 }
record_expr:
sym(LBRACE)
sep_or_term_list(reg(field_assignment),sym(SEMI))
sym(RBRACE) {
let elements, terminator = $2 in
{opening = LBrace $1;
elements = Some elements;
terminator;
closing = RBrace $3} }
field_assignment:
field_name sym(EQ) expr {
{field_name=$1; assignment=$2; field_expr=$3} }
sequence:
kwd(Begin) sep_or_term_list(expr,sym(SEMI)) kwd(End) {
let elements, terminator = $2 in
{opening = Begin $1;
elements = Some elements;
terminator;
closing = End $3} }

View File

@ -0,0 +1,48 @@
(* Driver for the parser of Mini-ML *)
(* Error printing and exception tracing *)
Printexc.record_backtrace true;;
(* Path to the Mini-ML standard library *)
let lib_path =
match EvalOpt.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
(* Opening the input channel and setting the lexing engine *)
let cin, reset =
match EvalOpt.input with
None | Some "-" -> stdin, fun ?(line=1) _buffer -> ignore line
| Some file -> open_in file, Lexer.reset ~file
let buffer = Lexing.from_channel cin
let () = reset buffer
(* Tokeniser *)
let tokeniser =
if Utils.String.Set.mem "lexer" EvalOpt.verbose then
Lexer.get_token ~log:(stdout, Lexer.output_token buffer)
else Lexer.get_token ?log:None
let () =
try
let ast = Parser.program tokeniser buffer in
if Utils.String.Set.mem "unparsing" EvalOpt.verbose then
AST.print_tokens ~undo:true ast
else () (* AST.print_tokens ast *)
with
Lexer.Error diag ->
close_in cin; Lexer.prerr ~kind:"Lexical" diag
| Parser.Error ->
let start = Pos.from_byte (Lexing.lexeme_start_p buffer)
and stop = Pos.from_byte (Lexing.lexeme_end_p buffer) in
let region = Region.make ~start ~stop in
close_in cin;
Lexer.prerr ~kind:"Syntactical"
Region.{value="Parse error."; region}
| Sys_error msg -> Utils.highlight msg

View File

@ -0,0 +1,2 @@
module Region = Region
module Pos = Pos

View File

@ -0,0 +1,141 @@
(* Abstract lexical tokens for Mini-ML *)
type t =
(* Symbols *)
ARROW
| CONS
| CAT
| APPEND
| MINUS
| PLUS
| SLASH
| TIMES
| LPAR
| RPAR
| LBRACK
| RBRACK
| LBRACE
| RBRACE
| COMMA
| SEMI
| VBAR
| COLON
| DOT
| WILD
| EQ
| NE
| LT
| GT
| LE
| GE
| BOOL_OR
| BOOL_AND
(* Identifiers, numbers and strings *)
| Ident of string
| Constr of string
| Int of (string * Z.t)
| Nat of (string * Z.t)
| Mtz of (string * Z.t)
| Str of string
(* Keywords *)
| And
| Begin
| Else
| End
| False
| Fun
| If
| In
| Let
| List
| Map
| Match
| Mod
| Not
| Of
| Or
| Set
| Then
| True
| Type
| With
| LetEntry
| MatchNat
(*
| Contract
| Sig
| Struct
*)
(* Virtual tokens *)
| EOF (* End of file *)
type token = t
let to_string = function
ARROW -> "->"
| CONS -> "::"
| CAT -> "^"
| APPEND -> "@"
| MINUS -> "-"
| PLUS -> "+"
| SLASH -> "/"
| TIMES -> "*"
| LPAR -> "("
| RPAR -> ")"
| LBRACK -> "["
| RBRACK -> "]"
| LBRACE -> "{"
| RBRACE -> "}"
| COMMA -> ","
| SEMI -> ";"
| VBAR -> "|"
| COLON -> ":"
| DOT -> "."
| WILD -> "_"
| EQ -> "="
| NE -> "<>"
| LT -> "<"
| GT -> ">"
| LE -> "<="
| GE -> ">="
| BOOL_OR -> "||"
| BOOL_AND -> "&&"
| Ident id -> Printf.sprintf "Ident %s" id
| Constr id -> Printf.sprintf "Constr %s" id
| Int (lex,z) -> Printf.sprintf "Int %s (%s)" lex (Z.to_string z)
| Nat (lex,z) -> Printf.sprintf "Nat %s (%s)" lex (Z.to_string z)
| Mtz (lex,z) -> Printf.sprintf "Mtz %s (%s)" lex (Z.to_string z)
| Str n -> Printf.sprintf "Str \"%s\"" n
| And -> "and"
| Begin -> "begin"
| Else -> "else"
| End -> "end"
| False -> "false"
| Fun -> "fun"
| If -> "if"
| In -> "in"
| Let -> "let"
| List -> "list"
| Map -> "map"
| Match -> "match"
| Mod -> "mod"
| Not -> "not"
| Of -> "of"
| Or -> "or"
| Set -> "set"
| Then -> "then"
| True -> "true"
| Type -> "type"
| With -> "with"
| LetEntry -> "let%entry"
| MatchNat -> "match%nat"
| EOF -> "EOF"

View File

@ -0,0 +1,100 @@
(* Lexical tokens for Mini-ML *)
type t =
(* Symbols *)
ARROW (* "->" *)
| CONS (* "::" *)
| CAT (* "^" *)
| APPEND (* "@" *)
(* Arithmetics *)
| MINUS (* "-" *)
| PLUS (* "+" *)
| SLASH (* "/" *)
| TIMES (* "*" *)
(* Compounds *)
| LPAR (* "(" *)
| RPAR (* ")" *)
| LBRACK (* "[" *)
| RBRACK (* "]" *)
| LBRACE (* "{" *)
| RBRACE (* "}" *)
(* Separators *)
| COMMA (* "," *)
| SEMI (* ";" *)
| VBAR (* "|" *)
| COLON (* ":" *)
| DOT (* "." *)
(* Wildcard *)
| WILD (* "_" *)
(* Comparisons *)
| EQ (* "=" *)
| NE (* "<>" *)
| LT (* "<" *)
| GT (* ">" *)
| LE (* "=<" *)
| GE (* ">=" *)
| BOOL_OR (* "||" *)
| BOOL_AND (* "&&" *)
(* Identifiers, labels, numbers and strings *)
| Ident of string
| Constr of string
| Int of (string * Z.t)
| Nat of (string * Z.t)
| Mtz of (string * Z.t)
| Str of string
(* Keywords *)
| And
| Begin
| Else
| End
| False
| Fun
| If
| In
| Let
| List
| Map
| Match
| Mod
| Not
| Of
| Or
| Set
| Then
| True
| Type
| With
(* Liquidity specific *)
| LetEntry
| MatchNat
(*
| Contract
| Sig
| Struct
*)
(* Virtual tokens *)
| EOF (* End of file *)
type token = t
val to_string: t -> string

View File

@ -0,0 +1,154 @@
(* Utility types and functions *)
(* Identity *)
let id x = x
(* Combinators *)
let (<@) f g x = f (g x)
let swap f x y = f y x
let lambda = fun x _ -> x
let curry f x y = f (x,y)
let uncurry f (x,y) = f x y
(* Parametric rules for sequences *)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
let nseq_cons x (hd,tl) = x, hd::tl
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
let sepseq_cons x sep = function
None -> x, []
| Some (hd,tl) -> x, (sep,hd)::tl
(* Rightwards iterators *)
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
let nsepseq_foldl f a (hd,tl) =
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
let sepseq_foldl f a = function
None -> a
| Some s -> nsepseq_foldl f a s
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
let sepseq_iter f = function
None -> ()
| Some s -> nsepseq_iter f s
(* Reversing *)
let nseq_rev (hd,tl) =
let rec aux acc = function
[] -> acc
| x::l -> aux (nseq_cons x acc) l
in aux (hd,[]) tl
let nsepseq_rev =
let rec aux acc = function
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
| hd, [] -> hd, acc in
function
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
| s -> s
let sepseq_rev = function
None -> None
| Some seq -> Some (nsepseq_rev seq)
(* Leftwards iterators *)
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
let sepseq_foldr f = function
None -> fun a -> a
| Some s -> nsepseq_foldr f s
(* Conversions to lists *)
let nseq_to_list (x,y) = x::y
let nsepseq_to_list (x,y) = x :: List.map snd y
let sepseq_to_list = function
None -> []
| Some s -> nsepseq_to_list s
(* Optional values *)
module Option = struct
let apply f x =
match x with
Some y -> Some (f y)
| None -> None
let rev_apply x y =
match x with
Some f -> f y
| None -> y
let to_string = function
None -> ""
| Some x -> x
end
(* Modules based on [String], like sets and maps. *)
module String = struct
include String
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Integers *)
module Int = struct
type t = int
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Effectful symbol generator *)
let gen_sym =
let counter = ref 0 in
fun () -> incr counter; "v" ^ string_of_int !counter
(* General tracing function *)
let trace text = function
None -> ()
| Some chan -> output_string chan text; flush chan
(* Printing a string in red to standard error *)
let highlight msg = Printf.eprintf "\027[31m%s\027[0m\n%!" msg

View File

@ -0,0 +1,97 @@
(* Utility types and functions *)
(* Identity *)
val id : 'a -> 'a
(* Combinators *)
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
val lambda : 'a -> 'b -> 'a
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
(* Parametric rules for sequences
nseq: non-empty sequence;
sepseq: (possibly empty) sequence of separated items;
nsepseq: non-empty sequence of separated items.
*)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
val nseq_cons: 'a -> 'a nseq -> 'a nseq
val nsepseq_cons: 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_cons: 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
(* Reversing *)
val nseq_rev: 'a nseq -> 'a nseq
val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_rev: ('a,'sep) sepseq -> ('a,'sep) sepseq
(* Rightwards iterators *)
val nseq_foldl: ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
val nsepseq_foldl: ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
val sepseq_foldl: ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
val nseq_iter: ('a -> unit) -> 'a nseq -> unit
val nsepseq_iter: ('a -> unit) -> ('a,'b) nsepseq -> unit
val sepseq_iter: ('a -> unit) -> ('a,'b) sepseq -> unit
(* Leftwards iterators *)
val nseq_foldr: ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
val nsepseq_foldr: ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
val sepseq_foldr: ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
(* Conversions to lists *)
val nseq_to_list: 'a nseq -> 'a list
val nsepseq_to_list: ('a,'b) nsepseq -> 'a list
val sepseq_to_list: ('a,'b) sepseq -> 'a list
(* Effectful symbol generator *)
val gen_sym: unit -> string
(* General tracing function *)
val trace: string -> out_channel option -> unit
(* Printing a string in red to standard error *)
val highlight: string -> unit
(* Working with optional values *)
module Option:
sig
val apply: ('a -> 'b) -> 'a option -> 'b option
val rev_apply: ('a -> 'a) option -> 'a -> 'a
val to_string: string option -> string
end
(* An extension to the standard module [String] *)
module String:
sig
include module type of String
module Map: Map.S with type key = t
module Set: Set.S with type elt = t
end
(* Integer maps *)
module Int:
sig
type t = int
module Map: Map.S with type key = t
module Set: Set.S with type elt = t
end

View File

@ -0,0 +1,10 @@
#!/bin/sh
set -e
if test -d ../../.git; then
echo true > dot_git_is_dir
else
echo false > dot_git_is_dir
cat .git >> dot_git_is_dir
fi

39
src/parser/ligodity/dune Normal file
View File

@ -0,0 +1,39 @@
(ocamllex Lexer)
(menhir
(merge_into Parser)
(modules ParToken Parser)
(flags -la 1 --explain --external-tokens Token))
(library
(name parser_ligodity)
(public_name ligo.parser.ligodity)
(modules AST ligodity Utils Version Lexer Parser Token)
;; (modules_without_implementation Error)
(libraries
str
zarith
tezos-utils
)
)
;; Les deux directives (rule) qui suivent sont pour le dev local.
;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier.
;; Pour le purger, il faut faire "dune clean".
;(rule
; (targets Parser.exe)
; (deps ParserMain.exe)
; (action (copy ParserMain.exe Parser.exe))
; (mode promote-until-clean))
;(rule
; (targets Lexer.exe)
; (deps LexerMain.exe)
; (action (copy LexerMain.exe Lexer.exe))
; (mode promote-until-clean))
(rule
(targets Version.ml)
(action
(progn (run "sh" "-c" "printf 'let version = \"%s\"'\\\\n \"$(echo UNKNOWN)\" > Version.ml")))
(mode promote-until-clean))

View File

@ -0,0 +1,4 @@
module Token = Token
module Lexer = Lexer
module AST = AST
module Parser = Parser

2
vendors/ligo-utils vendored

@ -1 +1 @@
Subproject commit 3a7d2a85f1792105a375e35aa03afa137b29a9af
Subproject commit b69e838bec0d89df643bbbdd6451760770c659e2