initial commit

2019-02-25 21:29:29 +00:00 · 2019-02-25 21:29:29 +00:00 · 9df0f6ad3a
commit 9df0f6ad3a
36 changed files with 4958 additions and 0 deletions
--- a/.Lexer.ml.tag
+++ b/.Lexer.ml.tag
@ -0,0 +1 @@
+ocamlc: -w -42
--- a/.LexerMain.tag
+++ b/.LexerMain.tag
--- a/.Parser.mly.tag
+++ b/.Parser.mly.tag
@ -0,0 +1 @@
+--explain --external-tokens LexToken --base Parser ParToken.mly
--- a/.ParserMain.tag
+++ b/.ParserMain.tag
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+_build/*
+*/_build
+*~
+.merlin
+*/.merlin
+*.install
--- a/.links
+++ b/.links
@ -0,0 +1,2 @@
+$HOME/git/OCaml-build/Makefile
+$HOME/git/OCaml-build/Makefile.cfg
--- a/AST.ml
+++ b/AST.ml
@ -0,0 +1,916 @@
+(* Abstract Syntax Tree (AST) for Ligo *)
+
+open Utils
+
+(* Regions
+
+   The AST carries all the regions where tokens have been found by the
+   lexer, plus additional regions corresponding to whole subtrees
+   (like entire expressions, patterns etc.). These regions are needed
+   for error reporting and source-to-source transformations. To make
+   these pervasive regions more legible, we define singleton types for
+   the symbols, keywords etc. with suggestive names like "kwd_and"
+   denoting the _region_ of the occurrence of the keyword "and".
+*)
+
+type 'a reg = 'a Region.reg
+
+let rec last to_region = function
+    [] -> Region.ghost
+|  [x] -> to_region x
+| _::t -> last to_region t
+
+let nseq_to_region to_region (hd,tl) =
+  Region.cover (to_region hd) (last to_region tl)
+
+let nsepseq_to_region to_region (hd,tl) =
+  let reg (_,item) = to_region item in
+  Region.cover (to_region hd) (last reg tl)
+
+let sepseq_to_region to_region = function
+      None -> Region.ghost
+| Some seq -> nsepseq_to_region to_region seq
+
+(* Keywords of Ligo *)
+
+type kwd_begin      = Region.t
+type kwd_const      = Region.t
+type kwd_down       = Region.t
+type kwd_if         = Region.t
+type kwd_in         = Region.t
+type kwd_is         = Region.t
+type kwd_for        = Region.t
+type kwd_function   = Region.t
+type kwd_parameter  = Region.t
+type kwd_storage    = Region.t
+type kwd_type       = Region.t
+type kwd_of         = Region.t
+type kwd_operations = Region.t
+type kwd_var        = Region.t
+type kwd_end        = Region.t
+type kwd_then       = Region.t
+type kwd_else       = Region.t
+type kwd_match      = Region.t
+type kwd_procedure  = Region.t
+type kwd_null       = Region.t
+type kwd_record     = Region.t
+type kwd_step       = Region.t
+type kwd_to         = Region.t
+type kwd_mod        = Region.t
+type kwd_not        = Region.t
+type kwd_while      = Region.t
+type kwd_with       = Region.t
+
+(* Data constructors *)
+
+type c_False = Region.t
+type c_None  = Region.t
+type c_Some  = Region.t
+type c_True  = Region.t
+type c_Unit  = Region.t
+
+(* Symbols *)
+
+type semi     = Region.t
+type comma    = Region.t
+type lpar     = Region.t
+type rpar     = Region.t
+type lbrace   = Region.t
+type rbrace   = Region.t
+type lbracket = Region.t
+type rbracket = Region.t
+type cons     = Region.t
+type vbar     = Region.t
+type arrow    = Region.t
+type asgnmnt  = Region.t
+type equal    = Region.t
+type colon    = Region.t
+type bool_or  = Region.t
+type bool_and = Region.t
+type lt       = Region.t
+type leq      = Region.t
+type gt       = Region.t
+type geq      = Region.t
+type neq      = Region.t
+type plus     = Region.t
+type minus    = Region.t
+type slash    = Region.t
+type times    = Region.t
+type dot      = Region.t
+type wild     = Region.t
+type cat      = Region.t
+
+(* Virtual tokens *)
+
+type eof = Region.t
+
+(* Literals *)
+
+type variable   = string reg
+type fun_name   = string reg
+type type_name  = string reg
+type field_name = string reg
+type map_name   = string reg
+type constr     = string reg
+
+(* Comma-separated non-empty lists *)
+
+type 'a csv = ('a, comma) nsepseq
+
+(* Bar-separated non-empty lists *)
+
+type 'a bsv = ('a, vbar) nsepseq
+
+(* Parentheses *)
+
+type 'a par = (lpar * 'a * rpar) reg
+
+(* Brackets compounds *)
+
+type 'a brackets = (lbracket * 'a * rbracket) reg
+
+(* Braced compounds *)
+
+type 'a braces = (lbrace * 'a * rbrace) reg
+
+(* The Abstract Syntax Tree *)
+
+type t = <
+  types      : type_decl list;
+  parameter  : parameter_decl;
+  storage    : storage_decl;
+  operations : operations_decl;
+  lambdas    : lambda_decl list;
+  block      : block reg;
+  eof        : eof
+>
+
+and ast = t
+
+and parameter_decl = (kwd_parameter * variable * colon * type_expr) reg
+
+and storage_decl = (kwd_storage * type_expr) reg
+
+and operations_decl = (kwd_operations * type_expr) reg
+
+(* Type declarations *)
+
+and type_decl = (kwd_type * type_name * kwd_is * type_expr) reg
+
+and type_expr =
+  Prod    of cartesian
+| Sum     of (variant, vbar) nsepseq reg
+| Record  of record_type
+| TypeApp of (type_name * type_tuple) reg
+| ParType of type_expr par
+| TAlias  of variable
+
+and cartesian = (type_expr, times) nsepseq reg
+
+and variant = (constr * kwd_of * cartesian) reg
+
+and record_type = (kwd_record * field_decls * kwd_end) reg
+
+and field_decls = (field_decl, semi) nsepseq
+
+and field_decl = (variable * colon * type_expr) reg
+
+and type_tuple = (type_name, comma) nsepseq par
+
+(* Function and procedure declarations *)
+
+and lambda_decl =
+  FunDecl  of fun_decl reg
+| ProcDecl of proc_decl reg
+
+and fun_decl = <
+  kwd_function : kwd_function;
+  var          : variable;
+  param        : parameters;
+  colon        : colon;
+  ret_type     : type_expr;
+  kwd_is       : kwd_is;
+  body         : block reg;
+  kwd_with     : kwd_with;
+  return       : expr
+>
+
+and proc_decl = <
+  kwd_procedure : kwd_procedure;
+  var           : variable;
+  param         : parameters;
+  kwd_is        : kwd_is;
+  body          : block reg
+>
+
+and parameters = (param_decl, semi) nsepseq par
+
+and param_decl = (var_kind * variable * colon * type_expr) reg
+
+and var_kind =
+  Mutable of kwd_var
+| Const   of kwd_const
+
+and block = <
+  decls   : value_decls;
+  opening : kwd_begin;
+  instr   : instructions;
+  close   : kwd_end
+>
+
+and value_decls = (var_decl reg, semi) sepseq reg
+
+and var_decl = <
+  kind   : var_kind;
+  var    : variable;
+  colon  : colon;
+  vtype  : type_expr;
+  setter : Region.t;  (* "=" or ":=" *)
+  init   : expr
+>
+
+and instructions = (instruction, semi) nsepseq reg
+
+and instruction =
+  Single of single_instr
+| Block  of block reg
+
+and single_instr =
+  Cond     of conditional reg
+| Match    of match_instr reg
+| Asgnmnt  of asgnmnt_instr
+| Loop     of loop
+| ProcCall of fun_call
+| Null     of kwd_null
+
+and conditional = <
+  kwd_if   : kwd_if;
+  test     : expr;
+  kwd_then : kwd_then;
+  ifso     : instruction;
+  kwd_else : kwd_else;
+  ifnot    : instruction
+>
+
+and match_instr = <
+  kwd_match : kwd_match;
+  expr      : expr;
+  kwd_with  : kwd_with;
+  cases     : cases;
+  kwd_end   : kwd_end
+>
+
+and cases = (case, vbar) nsepseq reg
+
+and case = (pattern * arrow * instruction) reg
+
+and asgnmnt_instr = (variable * asgnmnt * expr) reg
+
+and loop =
+  While of while_loop
+| For   of for_loop
+
+and while_loop = (kwd_while * expr * block reg) reg
+
+and for_loop =
+  ForInt     of for_int reg
+| ForCollect of for_collect reg
+
+and for_int = <
+  kwd_for : kwd_for;
+  asgnmnt : asgnmnt_instr;
+  down    : kwd_down option;
+  kwd_to  : kwd_to;
+  bound   : expr;
+  step    : (kwd_step * expr) option;
+  block   : block reg
+>
+
+and for_collect = <
+  kwd_for : kwd_for;
+  var     : variable;
+  bind_to : (arrow * variable) option;
+  kwd_in  : kwd_in;
+  expr    : expr;
+  block   : block reg
+>
+
+(* Expressions *)
+
+and expr =
+  Or        of (expr * bool_or * expr) reg
+| And       of (expr * bool_and * expr) reg
+| Lt        of (expr * lt * expr) reg
+| Leq       of (expr * leq * expr) reg
+| Gt        of (expr * gt * expr) reg
+| Geq       of (expr * geq * expr) reg
+| Equal     of (expr * equal * expr) reg
+| Neq       of (expr * neq * expr) reg
+| Cat       of (expr * cat * expr) reg
+| Cons      of (expr * cons * expr) reg
+| Add       of (expr * plus * expr) reg
+| Sub       of (expr * minus * expr) reg
+| Mult      of (expr * times * expr) reg
+| Div       of (expr * slash * expr) reg
+| Mod       of (expr * kwd_mod * expr) reg
+| Neg       of (minus * expr) reg
+| Not       of (kwd_not * expr) reg
+| Int       of (Lexer.lexeme * Z.t) reg
+| Var       of Lexer.lexeme reg
+| String    of Lexer.lexeme reg
+| Bytes     of (Lexer.lexeme * MBytes.t) reg
+| False     of c_False
+| True      of c_True
+| Unit      of c_Unit
+| Tuple     of tuple
+| List      of (expr, comma) nsepseq brackets
+| EmptyList of empty_list
+| Set       of (expr, comma) nsepseq braces
+| EmptySet  of empty_set
+| NoneExpr  of none_expr
+| FunCall   of fun_call
+| ConstrApp of constr_app
+| SomeApp   of (c_Some * arguments) reg
+| MapLookUp of map_lookup reg
+| ParExpr   of expr par
+
+and tuple = (expr, comma) nsepseq par
+
+and empty_list =
+  (lbracket * rbracket * colon * type_expr) par
+
+and empty_set =
+  (lbrace * rbrace * colon * type_expr) par
+
+and none_expr =
+  (c_None * colon * type_expr) par
+
+and fun_call = (fun_name * arguments) reg
+
+and arguments = tuple
+
+and constr_app = (constr * arguments) reg
+
+and map_lookup = <
+  map_name : variable;
+  selector : dot;
+  index    : expr brackets
+>
+
+(* Patterns *)
+
+and pattern = (core_pattern, cons) nsepseq reg
+
+and core_pattern =
+  PVar    of Lexer.lexeme reg
+| PWild   of wild
+| PInt    of (Lexer.lexeme * Z.t) reg
+| PBytes  of (Lexer.lexeme * MBytes.t) reg
+| PString of Lexer.lexeme reg
+| PUnit   of c_Unit
+| PFalse  of c_False
+| PTrue   of c_True
+| PNone   of c_None
+| PSome   of (c_Some * core_pattern par) reg
+| PList   of list_pattern
+| PTuple  of (core_pattern, comma) nsepseq par
+
+and list_pattern =
+  Sugar of (core_pattern, comma) sepseq brackets
+| Raw   of (core_pattern * cons * pattern) par
+
+(* Projecting regions *)
+
+open Region
+
+let type_expr_to_region = function
+  Prod    node -> node.region
+| Sum     node -> node.region
+| Record  node -> node.region
+| TypeApp node -> node.region
+| ParType node -> node.region
+| TAlias  node -> node.region
+
+let expr_to_region = function
+  Or        {region; _}
+| And       {region; _}
+| Lt        {region; _}
+| Leq       {region; _}
+| Gt        {region; _}
+| Geq       {region; _}
+| Equal     {region; _}
+| Neq       {region; _}
+| Cat       {region; _}
+| Cons      {region; _}
+| Add       {region; _}
+| Sub       {region; _}
+| Mult      {region; _}
+| Div       {region; _}
+| Mod       {region; _}
+| Neg       {region; _}
+| Not       {region; _}
+| Int       {region; _}
+| Var       {region; _}
+| String    {region; _}
+| Bytes     {region; _}
+| False     region
+| True      region
+| Unit      region
+| Tuple     {region; _}
+| List      {region; _}
+| EmptyList {region; _}
+| Set       {region; _}
+| EmptySet  {region; _}
+| NoneExpr  {region; _}
+| FunCall   {region; _}
+| ConstrApp {region; _}
+| SomeApp   {region; _}
+| MapLookUp {region; _}
+| ParExpr   {region; _} -> region
+
+let var_kind_to_region = function
+  Mutable region
+| Const   region -> region
+
+let instr_to_region = function
+  Single Cond                {region;_}
+| Single Match               {region; _}
+| Single Asgnmnt             {region; _}
+| Single Loop While          {region; _}
+| Single Loop For ForInt     {region; _}
+| Single Loop For ForCollect {region; _}
+| Single ProcCall            {region; _}
+| Single Null                 region
+| Block                      {region; _} -> region
+
+let core_pattern_to_region = function
+  PVar        {region; _}
+| PWild        region
+| PInt        {region; _}
+| PBytes      {region; _}
+| PString     {region; _}
+| PUnit        region
+| PFalse       region
+| PTrue        region
+| PNone        region
+| PSome       {region; _}
+| PList Sugar {region; _}
+| PList Raw   {region; _}
+| PTuple      {region; _} -> region
+
+(* Printing the tokens with their source regions *)
+
+let printf = Printf.printf
+
+let compact (region: Region.t) =
+  region#compact ~offsets:EvalOpt.offsets EvalOpt.mode
+
+let print_nsepseq sep print (head,tail) =
+  let print_aux (sep_reg, item) =
+    printf "%s: %s\n" (compact sep_reg) sep;
+    print item
+  in print head; List.iter print_aux tail
+
+let print_sepseq sep print = function
+      None -> ()
+| Some seq -> print_nsepseq sep print seq
+
+let print_token region lexeme =
+  printf "%s: %s\n"(compact region) lexeme
+
+let print_var {region; value=lexeme} =
+  printf "%s: Ident \"%s\"\n" (compact region) lexeme
+
+let print_constr {region; value=lexeme} =
+  printf "%s: Constr \"%s\"\n"
+         (compact region) lexeme
+
+let print_string {region; value=lexeme} =
+  printf "%s: String \"%s\"\n"
+         (compact region) lexeme
+
+let print_bytes {region; value = lexeme, abstract} =
+  printf "%s: Bytes (\"%s\", \"0x%s\")\n"
+         (compact region) lexeme
+         (MBytes.to_hex abstract |> Hex.to_string)
+
+let print_int {region; value = lexeme, abstract} =
+  printf "%s: Int (\"%s\", %s)\n"
+         (compact region) lexeme
+         (Z.to_string abstract)
+
+let rec print_tokens ast =
+  List.iter print_type_decl   ast#types;
+  print_parameter_decl        ast#parameter;
+  print_storage_decl          ast#storage;
+  print_operations_decl       ast#operations;
+  List.iter print_lambda_decl ast#lambdas;
+  print_block                 ast#block;
+  print_token                 ast#eof "EOF"
+
+and print_parameter_decl {value=node; _} =
+  let kwd_parameter, variable, colon, type_expr = node in
+  print_token     kwd_parameter "parameter";
+  print_var       variable;
+  print_token     colon ":";
+  print_type_expr type_expr
+
+and print_storage_decl {value=node; _} =
+  let kwd_storage, type_expr = node in
+  print_token     kwd_storage "storage";
+  print_type_expr type_expr
+
+and print_operations_decl {value=node; _} =
+  let kwd_operations, type_expr = node in
+  print_token     kwd_operations "operations";
+  print_type_expr type_expr
+
+and print_type_decl {value=node; _} =
+  let kwd_type, type_name, kwd_is, type_expr = node in
+  print_token     kwd_type "type";
+  print_var       type_name;
+  print_token     kwd_is "is";
+  print_type_expr type_expr
+
+and print_type_expr = function
+  Prod    cartesian   -> print_cartesian   cartesian
+| Sum     sum_type    -> print_sum_type    sum_type
+| Record  record_type -> print_record_type record_type
+| TypeApp type_app    -> print_type_app    type_app
+| ParType par_type    -> print_par_type    par_type
+| TAlias  type_alias  -> print_var         type_alias
+
+and print_cartesian {value=sequence; _} =
+  print_nsepseq "*" print_type_expr sequence
+
+and print_variant {value=node; _} =
+  let constr, kwd_of, cartesian = node in
+  print_constr    constr;
+  print_token     kwd_of "of";
+  print_cartesian cartesian
+
+and print_sum_type {value=sequence; _} =
+  print_nsepseq "|" print_variant sequence
+
+and print_record_type {value=node; _} =
+  let kwd_record, field_decls, kwd_end = node in
+  print_token       kwd_record "record";
+  print_field_decls field_decls;
+  print_token       kwd_end "end"
+
+and print_type_app {value=node; _} =
+  let type_name, type_tuple = node in
+  print_var        type_name;
+  print_type_tuple type_tuple
+
+and print_par_type {value=node; _} =
+  let lpar, type_expr, rpar = node in
+  print_token     lpar "(";
+  print_type_expr type_expr;
+  print_token     rpar ")"
+
+and print_field_decls sequence =
+  print_nsepseq ";" print_field_decl sequence
+
+and print_field_decl {value=node; _} =
+  let var, colon, type_expr = node in
+  print_var       var;
+  print_token     colon ":";
+  print_type_expr type_expr
+
+and print_type_tuple {value=node; _} =
+  let lpar, sequence, rpar = node in
+  print_token lpar "(";
+  print_nsepseq "," print_var sequence;
+  print_token rpar ")"
+
+and print_lambda_decl = function
+  FunDecl   fun_decl -> print_fun_decl fun_decl
+| ProcDecl proc_decl -> print_proc_decl proc_decl
+
+and print_fun_decl {value=node; _} =
+  print_token      node#kwd_function "function";
+  print_var        node#var;
+  print_parameters node#param;
+  print_token      node#colon ":";
+  print_type_expr  node#ret_type;
+  print_token      node#kwd_is "is";
+  print_block      node#body;
+  print_token      node#kwd_with "with";
+  print_expr       node#return
+
+and print_proc_decl {value=node; _} =
+  print_token      node#kwd_procedure "procedure";
+  print_var        node#var;
+  print_parameters node#param;
+  print_token      node#kwd_is "is";
+  print_block      node#body
+
+and print_parameters {value=node; _} =
+  let lpar, sequence, rpar = node in
+  print_token lpar "(";
+  print_nsepseq ";" print_param_decl sequence;
+  print_token rpar ")"
+
+and print_param_decl {value=node; _} =
+  let var_kind, variable, colon, type_expr = node in
+  print_var_kind  var_kind;
+  print_var       variable;
+  print_token     colon ":";
+  print_type_expr type_expr
+
+and print_var_kind = function
+  Mutable kwd_var -> print_token kwd_var "var"
+| Const kwd_const -> print_token kwd_const "const"
+
+and print_block {value=node; _} =
+  print_value_decls  node#decls;
+  print_token        node#opening "begin";
+  print_instructions node#instr;
+  print_token        node#close "end"
+
+and print_value_decls {value=sequence; _} =
+  print_sepseq ";" print_var_decl sequence
+
+and print_var_decl {value=node; _} =
+  let setter =
+    match node#kind with
+      Mutable _ -> ":="
+    |   Const _ -> "=" in
+  print_var_kind  node#kind;
+  print_var       node#var;
+  print_token     node#colon ":";
+  print_type_expr node#vtype;
+  print_token     node#setter setter;
+  print_expr      node#init
+
+and print_instructions {value=sequence; _} =
+  print_nsepseq ";" print_instruction sequence
+
+and print_instruction = function
+  Single instr -> print_single_instr instr
+|  Block block -> print_block block
+
+and print_single_instr = function
+  Cond     {value; _} -> print_conditional value
+| Match    {value; _} -> print_match_instr value
+| Asgnmnt  instr      -> print_asgnmnt_instr instr
+| Loop     loop       -> print_loop loop
+| ProcCall fun_call   -> print_fun_call fun_call
+| Null     kwd_null   -> print_token kwd_null "null"
+
+and print_conditional node =
+  print_token       node#kwd_if "if";
+  print_expr        node#test;
+  print_token       node#kwd_then "then";
+  print_instruction node#ifso;
+  print_token       node#kwd_else "else";
+  print_instruction node#ifnot
+
+and print_match_instr node =
+  print_token node#kwd_match "match";
+  print_expr  node#expr;
+  print_token node#kwd_with "with";
+  print_cases node#cases;
+  print_token node#kwd_end "end"
+
+and print_cases {value=sequence; _} =
+  print_nsepseq "|" print_case sequence
+
+and print_case {value=node; _} =
+  let pattern, arrow, instruction = node in
+  print_pattern pattern;
+  print_token arrow "->";
+  print_instruction instruction
+
+and print_asgnmnt_instr {value=node; _} =
+  let variable, asgnmnt, expr = node in
+  print_var variable;
+  print_token asgnmnt ":=";
+  print_expr expr
+
+and print_loop = function
+  While while_loop -> print_while_loop while_loop
+| For     for_loop -> print_for_loop for_loop
+
+and print_while_loop {value=node; _} =
+  let kwd_while, expr, block = node in
+  print_token kwd_while "while";
+  print_expr expr;
+  print_block block
+
+and print_for_loop = function
+  ForInt     for_int     -> print_for_int for_int
+| ForCollect for_collect -> print_for_collect for_collect
+
+and print_for_int {value=node; _} =
+  print_token         node#kwd_for "for";
+  print_asgnmnt_instr node#asgnmnt;
+  print_down          node#down;
+  print_token         node#kwd_to "to";
+  print_expr          node#bound;
+  print_step          node#step;
+  print_block         node#block
+
+and print_down = function
+  Some kwd_down -> print_token kwd_down "down"
+| None -> ()
+
+and print_step = function
+  Some (kwd_step, expr) ->
+    print_token kwd_step "step";
+    print_expr expr
+| None -> ()
+
+and print_for_collect {value=node; _} =
+  print_token   node#kwd_for "for";
+  print_var     node#var;
+  print_bind_to node#bind_to;
+  print_token   node#kwd_in "in";
+  print_expr    node#expr;
+  print_block   node#block
+
+and print_bind_to = function
+  Some (arrow, variable) ->
+    print_token arrow "->";
+    print_var   variable
+| None -> ()
+
+and print_expr = function
+  Or {value = expr1, bool_or, expr2; _} ->
+    print_expr expr1; print_token bool_or "||"; print_expr expr2
+| And {value = expr1, bool_and, expr2; _} ->
+    print_expr expr1; print_token bool_and "&&"; print_expr expr2
+| Lt {value = expr1, lt, expr2; _} ->
+    print_expr expr1; print_token lt "<"; print_expr expr2
+| Leq {value = expr1, leq, expr2; _} ->
+    print_expr expr1; print_token leq "<="; print_expr expr2
+| Gt {value = expr1, gt, expr2; _} ->
+    print_expr expr1; print_token gt ">"; print_expr expr2
+| Geq {value = expr1, geq, expr2; _} ->
+    print_expr expr1; print_token geq ">="; print_expr expr2
+| Equal {value = expr1, equal, expr2; _} ->
+    print_expr expr1; print_token equal "="; print_expr expr2
+| Neq {value = expr1, neq, expr2; _} ->
+    print_expr expr1; print_token neq "=/="; print_expr expr2
+| Cat {value = expr1, cat, expr2; _} ->
+    print_expr expr1; print_token cat "^"; print_expr expr2
+| Cons {value = expr1, cons, expr2; _} ->
+    print_expr expr1; print_token cons "<:"; print_expr expr2
+| Add {value = expr1, add, expr2; _} ->
+    print_expr expr1; print_token add "+"; print_expr expr2
+| Sub {value = expr1, sub, expr2; _} ->
+    print_expr expr1; print_token sub "-"; print_expr expr2
+| Mult {value = expr1, mult, expr2; _} ->
+    print_expr expr1; print_token mult "*"; print_expr expr2
+| Div {value = expr1, div, expr2; _} ->
+    print_expr expr1; print_token div "/"; print_expr expr2
+| Mod {value = expr1, kwd_mod, expr2; _} ->
+    print_expr expr1; print_token kwd_mod "mod"; print_expr expr2
+| Neg {value = minus, expr; _} ->
+    print_token minus "-"; print_expr expr
+| Not {value = kwd_not, expr; _} ->
+    print_token kwd_not "not"; print_expr expr
+| Int i            -> print_int i
+| Var v            -> print_var v
+| String s         -> print_string s
+| Bytes b          -> print_bytes b
+| False region     -> print_token region "False"
+| True region      -> print_token region "True"
+| Unit region      -> print_token region "Unit"
+| Tuple tuple      -> print_tuple tuple
+| List list        -> print_list list
+| EmptyList elist  -> print_empty_list elist
+| Set set          -> print_set set
+| EmptySet eset    -> print_empty_set eset
+| NoneExpr nexpr   -> print_none_expr nexpr
+| FunCall fun_call -> print_fun_call fun_call
+| ConstrApp capp   -> print_constr_app capp
+| SomeApp sapp     -> print_some_app sapp
+| MapLookUp lookup -> print_map_lookup lookup
+| ParExpr pexpr    -> print_par_expr pexpr
+
+and print_tuple {value=node; _} =
+  let lpar, sequence, rpar = node in
+  print_token lpar "(";
+  print_nsepseq "," print_expr sequence;
+  print_token rpar ")"
+
+and print_list {value=node; _} =
+  let lbra, sequence, rbra = node in
+  print_token lbra "[";
+  print_nsepseq "," print_expr sequence;
+  print_token rbra "]"
+
+and print_empty_list {value=node; _} =
+  let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in
+  print_token     lpar "(";
+  print_token     lbracket "[";
+  print_token     rbracket "]";
+  print_token     colon ":";
+  print_type_expr type_expr;
+  print_token     rpar ")"
+
+and print_set {value=node; _} =
+  let lbrace, sequence, rbrace = node in
+  print_token lbrace "{";
+  print_nsepseq "," print_expr sequence;
+  print_token rbrace "}"
+
+and print_empty_set {value=node; _} =
+  let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in
+  print_token     lpar "(";
+  print_token     lbrace "{";
+  print_token     rbrace "}";
+  print_token     colon ":";
+  print_type_expr type_expr;
+  print_token     rpar ")"
+
+and print_none_expr {value=node; _} =
+  let lpar, (c_None, colon, type_expr), rpar = node in
+  print_token     lpar "(";
+  print_token     c_None "None";
+  print_token     colon ":";
+  print_type_expr type_expr;
+  print_token     rpar ")"
+
+and print_fun_call {value=node; _} =
+  let fun_name, arguments = node in
+  print_var   fun_name;
+  print_tuple arguments
+
+and print_constr_app {value=node; _} =
+  let constr, arguments = node in
+  print_constr constr;
+  print_tuple  arguments
+
+and print_some_app {value=node; _} =
+  let c_Some, arguments = node in
+  print_token c_Some "Some";
+  print_tuple arguments
+
+and print_map_lookup {value=node; _} =
+  let {value = lbracket, expr, rbracket; _} = node#index in
+  print_var   node#map_name;
+  print_token node#selector ".";
+  print_token lbracket "[";
+  print_expr  expr;
+  print_token rbracket "]"
+
+and print_par_expr {value=node; _} =
+  let lpar, expr, rpar = node in
+  print_token lpar "(";
+  print_expr  expr;
+  print_token rpar ")"
+
+and print_pattern {value=sequence; _} =
+  print_nsepseq "<:" print_core_pattern sequence
+
+and print_core_pattern = function
+  PVar var      -> print_var var
+| PWild wild    -> print_token wild "_"
+| PInt i        -> print_int i
+| PBytes b      -> print_bytes b
+| PString s     -> print_string s
+| PUnit region  -> print_token region "Unit"
+| PFalse region -> print_token region "False"
+| PTrue region  -> print_token region "True"
+| PNone region  -> print_token region "None"
+| PSome psome   -> print_psome psome
+| PList pattern -> print_list_pattern pattern
+| PTuple ptuple -> print_ptuple ptuple
+
+and print_psome {value=node; _} =
+  let c_Some, patterns = node in
+  print_token    c_Some "Some";
+  print_patterns patterns
+
+and print_patterns {value=node; _} =
+  let lpar, core_pattern, rpar = node in
+  print_token lpar "(";
+  print_core_pattern core_pattern;
+  print_token rpar ")"
+
+and print_list_pattern = function
+  Sugar sugar -> print_sugar sugar
+| Raw     raw -> print_raw raw
+
+and print_sugar {value=node; _} =
+  let lbracket, sequence, rbracket = node in
+  print_token lbracket "[";
+  print_sepseq "," print_core_pattern sequence;
+  print_token rbracket "]"
+
+and print_raw {value=node; _} =
+  let lpar, (core_pattern, cons, pattern), rpar = node in
+  print_token        lpar "(";
+  print_core_pattern core_pattern;
+  print_token        cons "<:";
+  print_pattern      pattern;
+  print_token        rpar ")"
+
+and print_ptuple {value=node; _} =
+  let lpar, sequence, rpar = node in
+  print_token lpar "(";
+  print_nsepseq "," print_core_pattern sequence;
+  print_token rpar ")"
--- a/AST.mli
+++ b/AST.mli
@ -0,0 +1,384 @@
+(* Abstract Syntax Tree (AST) for Ligo *)
+
+open Utils
+
+(* Regions
+
+   The AST carries all the regions where tokens have been found by the
+   lexer, plus additional regions corresponding to whole subtrees
+   (like entire expressions, patterns etc.). These regions are needed
+   for error reporting and source-to-source transformations. To make
+   these pervasive regions more legible, we define singleton types for
+   the symbols, keywords etc. with suggestive names like "kwd_and"
+   denoting the _region_ of the occurrence of the keyword "and".
+*)
+
+type 'a reg = 'a Region.reg
+
+val nseq_to_region    : ('a -> Region.t) -> 'a nseq -> Region.t
+val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
+val sepseq_to_region  : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t
+
+(* Keywords of Ligo *)
+
+type kwd_begin      = Region.t
+type kwd_const      = Region.t
+type kwd_down       = Region.t
+type kwd_if         = Region.t
+type kwd_in         = Region.t
+type kwd_is         = Region.t
+type kwd_for        = Region.t
+type kwd_function   = Region.t
+type kwd_parameter  = Region.t
+type kwd_storage    = Region.t
+type kwd_type       = Region.t
+type kwd_of         = Region.t
+type kwd_operations = Region.t
+type kwd_var        = Region.t
+type kwd_end        = Region.t
+type kwd_then       = Region.t
+type kwd_else       = Region.t
+type kwd_match      = Region.t
+type kwd_procedure  = Region.t
+type kwd_null       = Region.t
+type kwd_record     = Region.t
+type kwd_step       = Region.t
+type kwd_to         = Region.t
+type kwd_mod        = Region.t
+type kwd_not        = Region.t
+type kwd_while      = Region.t
+type kwd_with       = Region.t
+
+(* Data constructors *)
+
+type c_False = Region.t
+type c_None  = Region.t
+type c_Some  = Region.t
+type c_True  = Region.t
+type c_Unit  = Region.t
+
+(* Symbols *)
+
+type semi     = Region.t
+type comma    = Region.t
+type lpar     = Region.t
+type rpar     = Region.t
+type lbrace   = Region.t
+type rbrace   = Region.t
+type lbracket = Region.t
+type rbracket = Region.t
+type cons     = Region.t
+type vbar     = Region.t
+type arrow    = Region.t
+type asgnmnt  = Region.t
+type equal    = Region.t
+type colon    = Region.t
+type bool_or  = Region.t
+type bool_and = Region.t
+type lt       = Region.t
+type leq      = Region.t
+type gt       = Region.t
+type geq      = Region.t
+type neq      = Region.t
+type plus     = Region.t
+type minus    = Region.t
+type slash    = Region.t
+type times    = Region.t
+type dot      = Region.t
+type wild     = Region.t
+type cat      = Region.t
+
+(* Virtual tokens *)
+
+type eof = Region.t
+
+(* Literals *)
+
+type variable   = string reg
+type fun_name   = string reg
+type type_name  = string reg
+type field_name = string reg
+type map_name   = string reg
+type constr     = string reg
+
+(* Comma-separated non-empty lists *)
+
+type 'a csv = ('a, comma) nsepseq
+
+(* Bar-separated non-empty lists *)
+
+type 'a bsv = ('a, vbar) nsepseq
+
+(* Parentheses *)
+
+type 'a par = (lpar * 'a * rpar) reg
+
+(* Brackets compounds *)
+
+type 'a brackets = (lbracket * 'a * rbracket) reg
+
+(* Braced compounds *)
+
+type 'a braces = (lbrace * 'a * rbrace) reg
+
+(* The Abstract Syntax Tree *)
+
+type t = <
+  types      : type_decl list;
+  parameter  : parameter_decl;
+  storage    : storage_decl;
+  operations : operations_decl;
+  lambdas    : lambda_decl list;
+  block      : block reg;
+  eof        : eof
+>
+
+and ast = t
+
+and parameter_decl = (kwd_parameter * variable * colon * type_expr) reg
+
+and storage_decl = (kwd_storage * type_expr) reg
+
+and operations_decl = (kwd_operations * type_expr) reg
+
+(* Type declarations *)
+
+and type_decl = (kwd_type * type_name * kwd_is * type_expr) reg
+
+and type_expr =
+  Prod    of cartesian
+| Sum     of (variant, vbar) nsepseq reg
+| Record  of record_type
+| TypeApp of (type_name * type_tuple) reg
+| ParType of type_expr par
+| TAlias  of variable
+
+and cartesian = (type_expr, times) nsepseq reg
+
+and variant = (constr * kwd_of * cartesian) reg
+
+and record_type = (kwd_record * field_decls * kwd_end) reg
+
+and field_decls = (field_decl, semi) nsepseq
+
+and field_decl = (variable * colon * type_expr) reg
+
+and type_tuple = (type_name, comma) nsepseq par
+
+(* Function and procedure declarations *)
+
+and lambda_decl =
+  FunDecl  of fun_decl reg
+| ProcDecl of proc_decl reg
+
+and fun_decl = <
+  kwd_function : kwd_function;
+  var          : variable;
+  param        : parameters;
+  colon        : colon;
+  ret_type     : type_expr;
+  kwd_is       : kwd_is;
+  body         : block reg;
+  kwd_with     : kwd_with;
+  return       : expr
+>
+
+and proc_decl = <
+  kwd_procedure : kwd_procedure;
+  var           : variable;
+  param         : parameters;
+  kwd_is        : kwd_is;
+  body          : block reg
+>
+
+and parameters = (param_decl, semi) nsepseq par
+
+and param_decl = (var_kind * variable * colon * type_expr) reg
+
+and var_kind =
+  Mutable of kwd_var
+| Const   of kwd_const
+
+and block = <
+  decls   : value_decls;
+  opening : kwd_begin;
+  instr   : instructions;
+  close   : kwd_end
+>
+
+and value_decls = (var_decl reg, semi) sepseq reg
+
+and var_decl = <
+  kind   : var_kind;
+  var    : variable;
+  colon  : colon;
+  vtype  : type_expr;
+  setter : Region.t;  (* "=" or ":=" *)
+  init   : expr
+>
+
+and instructions = (instruction, semi) nsepseq reg
+
+and instruction =
+  Single of single_instr
+| Block  of block reg
+
+and single_instr =
+  Cond     of conditional reg
+| Match    of match_instr reg
+| Asgnmnt  of asgnmnt_instr
+| Loop     of loop
+| ProcCall of fun_call
+| Null     of kwd_null
+
+and conditional = <
+  kwd_if   : kwd_if;
+  test     : expr;
+  kwd_then : kwd_then;
+  ifso     : instruction;
+  kwd_else : kwd_else;
+  ifnot    : instruction
+>
+
+and match_instr = <
+  kwd_match : kwd_match;
+  expr      : expr;
+  kwd_with  : kwd_with;
+  cases     : cases;
+  kwd_end   : kwd_end
+>
+
+and cases = (case, vbar) nsepseq reg
+
+and case = (pattern * arrow * instruction) reg
+
+and asgnmnt_instr = (variable * asgnmnt * expr) reg
+
+and loop =
+  While of while_loop
+| For   of for_loop
+
+and while_loop = (kwd_while * expr * block reg) reg
+
+and for_loop =
+  ForInt     of for_int reg
+| ForCollect of for_collect reg
+
+and for_int = <
+  kwd_for : kwd_for;
+  asgnmnt : asgnmnt_instr;
+  down    : kwd_down option;
+  kwd_to  : kwd_to;
+  bound   : expr;
+  step    : (kwd_step * expr) option;
+  block   : block reg
+>
+
+and for_collect = <
+  kwd_for : kwd_for;
+  var     : variable;
+  bind_to : (arrow * variable) option;
+  kwd_in  : kwd_in;
+  expr    : expr;
+  block   : block reg
+>
+
+(* Expressions *)
+
+and expr =
+  Or        of (expr * bool_or * expr) reg
+| And       of (expr * bool_and * expr) reg
+| Lt        of (expr * lt * expr) reg
+| Leq       of (expr * leq * expr) reg
+| Gt        of (expr * gt * expr) reg
+| Geq       of (expr * geq * expr) reg
+| Equal     of (expr * equal * expr) reg
+| Neq       of (expr * neq * expr) reg
+| Cat       of (expr * cat * expr) reg
+| Cons      of (expr * cons * expr) reg
+| Add       of (expr * plus * expr) reg
+| Sub       of (expr * minus * expr) reg
+| Mult      of (expr * times * expr) reg
+| Div       of (expr * slash * expr) reg
+| Mod       of (expr * kwd_mod * expr) reg
+| Neg       of (minus * expr) reg
+| Not       of (kwd_not * expr) reg
+| Int       of (Lexer.lexeme * Z.t) reg
+| Var       of Lexer.lexeme reg
+| String    of Lexer.lexeme reg
+| Bytes     of (Lexer.lexeme * MBytes.t) reg
+| False     of c_False
+| True      of c_True
+| Unit      of c_Unit
+| Tuple     of tuple
+| List      of (expr, comma) nsepseq brackets
+| EmptyList of empty_list
+| Set       of (expr, comma) nsepseq braces
+| EmptySet  of empty_set
+| NoneExpr  of none_expr
+| FunCall   of fun_call
+| ConstrApp of constr_app
+| SomeApp   of (c_Some * arguments) reg
+| MapLookUp of map_lookup reg
+| ParExpr   of expr par
+
+and tuple = (expr, comma) nsepseq par
+
+and empty_list =
+  (lbracket * rbracket * colon * type_expr) par
+
+and empty_set =
+  (lbrace * rbrace * colon * type_expr) par
+
+and none_expr =
+  (c_None * colon * type_expr) par
+
+and fun_call = (fun_name * arguments) reg
+
+and arguments = tuple
+
+and constr_app = (constr * arguments) reg
+
+and map_lookup = <
+  map_name : variable;
+  selector : dot;
+  index    : expr brackets
+>
+
+(* Patterns *)
+
+and pattern = (core_pattern, cons) nsepseq reg
+
+and core_pattern =
+  PVar    of Lexer.lexeme reg
+| PWild   of wild
+| PInt    of (Lexer.lexeme * Z.t) reg
+| PBytes  of (Lexer.lexeme * MBytes.t) reg
+| PString of Lexer.lexeme reg
+| PUnit   of c_Unit
+| PFalse  of c_False
+| PTrue   of c_True
+| PNone   of c_None
+| PSome   of (c_Some * core_pattern par) reg
+| PList   of list_pattern
+| PTuple  of (core_pattern, comma) nsepseq par
+
+and list_pattern =
+  Sugar of (core_pattern, comma) sepseq brackets
+| Raw   of (core_pattern * cons * pattern) par
+
+(* Projecting regions *)
+
+val type_expr_to_region : type_expr -> Region.t
+
+val expr_to_region : expr -> Region.t
+
+val var_kind_to_region : var_kind -> Region.t
+
+val instr_to_region : instruction -> Region.t
+
+val core_pattern_to_region : core_pattern -> Region.t
+
+(* Printing *)
+
+val print_tokens : t -> unit
--- a/Error.mli
+++ b/Error.mli
@ -0,0 +1,3 @@
+type t = ..
+
+type error = t
--- a/EvalOpt.ml
+++ b/EvalOpt.ml
@ -0,0 +1,143 @@
+(* Parsing the command-line option for testing the Ligo lexer and
+   parser *)
+
+let  printf = Printf.printf
+let sprintf = Printf.sprintf
+
+let abort msg =
+  Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
+
+(* Help *)
+
+let help () =
+  let file = Filename.basename Sys.argv.(0) in
+  printf "Usage: %s [<option> ...] [<input>.li | \"-\"]\n" file;
+  print_endline "where <input>.li is the Ligo source file (default: stdin),";
+  print_endline "and each <option> (if any) is one of the following:";
+  print_endline "  -c, --copy            Print lexemes of tokens and markup (lexer)";
+  print_endline "  -t, --tokens          Print tokens (lexer)";
+  print_endline "  -u, --units           Print tokens and markup (lexer)";
+  print_endline "  -q, --quiet           No output, except errors (default)";
+  print_endline "      --columns         Columns for source locations";
+  print_endline "      --bytes           Bytes for source locations";
+  print_endline "  -v, --verbose=<stage> cmdline, parser";
+  print_endline "  -h, --help            This help";
+  exit 0
+
+(* Specifying the command-line options a la GNU *)
+
+let copy    = ref false
+and tokens  = ref false
+and units   = ref false
+and quiet   = ref false
+and columns = ref false
+and bytes   = ref false
+and verbose = ref Utils.String.Set.empty
+and input   = ref None
+
+let split_at_colon = Str.(split (regexp ":"))
+
+let add_verbose d =
+  verbose := List.fold_left (Utils.swap Utils.String.Set.add)
+                            !verbose
+                            (split_at_colon d)
+
+let specs =
+  let open! Getopt in [
+  'c',     "copy",    set copy true, None;
+  't',     "tokens",  set tokens true, None;
+  'u',     "units",   set units true, None;
+  'q',     "quiet",   set quiet true, None;
+  noshort, "columns", set columns true, None;
+  noshort, "bytes",   set bytes true, None;
+  'v',     "verbose", None, Some add_verbose;
+  'h',     "help",    Some help, None
+  ]
+;;
+
+(* Handler of anonymous arguments *)
+
+let anonymous arg =
+  match !input with
+      None -> input := Some arg
+  | Some _ -> abort (sprintf "Multiple inputs")
+;;
+
+(* Parsing the command-line options *)
+
+try Getopt.parse_cmdline specs anonymous with
+  Getopt.Error msg -> abort msg
+;;
+
+(* Checking options and exporting them as non-mutable values *)
+
+type command = Quiet | Copy | Units | Tokens
+
+let cmd =
+  match !quiet, !copy, !units, !tokens with
+    false, false, false, false
+  |  true, false, false, false -> Quiet
+  | false,  true, false, false -> Copy
+  | false, false,  true, false -> Units
+  | false, false, false,  true -> Tokens
+  | _ -> abort "Choose one of -q, -c, -u, -t."
+
+let string_of convert = function
+    None -> "None"
+| Some s -> sprintf "Some %s" (convert s)
+
+let quote s = sprintf "\"%s\"" s
+
+let verbose_str =
+  let apply e a =
+    if a <> "" then sprintf "%s, %s" e a else e
+  in Utils.String.Set.fold apply !verbose ""
+
+let print_opt () =
+  printf "COMMAND LINE\n";
+  printf "copy     = %b\n" !copy;
+  printf "tokens   = %b\n" !tokens;
+  printf "units    = %b\n" !units;
+  printf "quiet    = %b\n" !quiet;
+  printf "columns  = %b\n" !columns;
+  printf "bytes    = %b\n" !bytes;
+  printf "verbose  = \"%s\"\n" verbose_str;
+  printf "input    = %s\n" (string_of quote !input)
+;;
+
+if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
+
+let input =
+  match !input with
+    None | Some "-" -> !input
+  | Some file_path ->
+      if   Filename.check_suffix file_path ".li"
+      then if   Sys.file_exists file_path
+           then Some file_path
+           else abort "Source file not found."
+      else abort "Source file lacks the extension .ti."
+
+(* Exporting remaining options as non-mutable values *)
+
+let copy    = !copy
+and tokens  = !tokens
+and units   = !units
+and quiet   = !quiet
+and offsets = not !columns
+and mode    = if !bytes then `Byte else `Point
+and verbose = !verbose
+;;
+
+if Utils.String.Set.mem "cmdline" verbose then
+  begin
+    printf "\nEXPORTED COMMAND LINE\n";
+    printf "copy     = %b\n" copy;
+    printf "tokens   = %b\n" tokens;
+    printf "units    = %b\n" units;
+    printf "quiet    = %b\n" quiet;
+    printf "offsets  = %b\n" offsets;
+    printf "mode     = %s\n" (if mode = `Byte then "`Byte" else "`Point");
+    printf "verbose  = \"%s\"\n" verbose_str;
+    printf "input    = %s\n" (string_of quote input)
+  end
+;;
--- a/EvalOpt.mli
+++ b/EvalOpt.mli
@ -0,0 +1,42 @@
+(* Parsing the command-line option for testing the Ligo lexer and
+   parser *)
+
+(* If the value [offsets] is [true], then the user requested that
+   messages about source positions and regions be expressed in terms
+   of horizontal offsets. *)
+
+val offsets : bool
+
+(* If the value [mode] is [`Byte], then the unit in which source
+   positions and regions are expressed in messages is the byte. If
+   [`Point], the unit is unicode points. *)
+
+val mode : [`Byte | `Point]
+
+(* If the option [verbose] is set to a list of predefined stages of
+   the compiler chain, then more information may be displayed about
+   those stages. *)
+
+val verbose : Utils.String.Set.t
+
+(* If the value [input] is [None] or [Some "-"], the input is standard
+   input. If [Some f], then the input is the file whose name (file
+   path) is [f]. *)
+
+val input : string option
+
+(* If the value [cmd] is
+    * [Quiet], then no output from the lexer and parser should be
+      expected, safe error messages: this is the default value;
+    * [Copy], then lexemes of tokens and markup will be printed to
+      standard output, with the expectation of a perfect match with
+      the input file;
+    * [Units], then the tokens and markup will be printed to standard
+      output, that is, the abstract representation of the concrete
+      lexical syntax;
+    * [Tokens], then the tokens only will be printed.
+*)
+
+type command = Quiet | Copy | Units | Tokens
+
+val cmd : command
--- a/FQueue.ml
+++ b/FQueue.ml
@ -0,0 +1,19 @@
+(* Purely functional queues based on a pair of lists *)
+
+type 'a t = {rear: 'a list; front: 'a list}
+
+let empty = {rear=[]; front=[]}
+
+let enq x q = {q with rear = x::q.rear}
+
+let rec deq = function
+  {rear=[]; front=  []} -> None
+| {rear;    front=  []} -> deq {rear=[]; front = List.rev rear}
+| {rear;    front=x::f} -> Some ({rear; front=f}, x)
+
+let rec peek = function
+  {rear=[]; front=  []}      -> None
+| {rear;    front=  []}      -> peek {rear=[]; front = List.rev rear}
+| {rear=_;  front=x::_} as q -> Some (q,x)
+
+let is_empty q = (q = empty)
--- a/FQueue.mli
+++ b/FQueue.mli
@ -0,0 +1,17 @@
+(* Purely functional queues *)
+
+type 'a t
+
+val empty    : 'a t
+val enq      : 'a -> 'a t -> 'a t
+val deq      : 'a t -> ('a t * 'a) option
+
+val is_empty : 'a t -> bool
+
+(* The call [peek q] is [None] if the queue [q] is empty, and,
+   otherwise, is a pair made of a queue and the next item in it to be
+   dequeued. The returned queue contains the same items as [q], in the
+   same order, but more efficient, in general, to use in further
+   calls. *)
+
+val peek     : 'a t -> ('a t * 'a) option
--- a/LexToken.mli
+++ b/LexToken.mli
@ -0,0 +1,149 @@
+(* This signature defines the lexical tokens for Ligo
+
+   _Tokens_ are the abstract units which are used by the parser to
+   build the abstract syntax tree (AST), in other words, the stream of
+   tokens is the minimal model of the input program, carrying
+   implicitly all its structure in a linear encoding, and nothing
+   else, in particular, comments and whitespace are absent.
+
+     A _lexeme_ is a specific character string (concrete
+   representation) denoting a token (abstract representation). Tokens
+   can be thought of as sets, and lexemes as elements of those sets --
+   there is often an infinite number of lexemes, but a small number of
+   tokens. (Think of identifiers as lexemes and one token.)
+
+     The tokens are qualified here as being "lexical" because the
+   parser generator Menhir expects to define them, in which context
+   they are called "parsing tokens", and they are made to match each
+   other. (This is an idiosyncratic terminology.)
+
+     The type of the lexical tokens is the variant [t], also
+   aliased to [token].
+*)
+
+type lexeme = string
+
+(* TOKENS *)
+
+type t =
+  (* Literals *)
+
+  String of lexeme Region.reg
+| Bytes  of (lexeme * MBytes.t) Region.reg
+| Int    of (lexeme * Z.t) Region.reg
+| Ident  of lexeme Region.reg
+| Constr of lexeme Region.reg
+
+  (* Symbols *)
+
+| SEMI     of Region.t  (* ";"   *)
+| COMMA    of Region.t  (* ","   *)
+| LPAR     of Region.t  (* "("   *)
+| RPAR     of Region.t  (* ")"   *)
+| LBRACE   of Region.t  (* "{"   *)
+| RBRACE   of Region.t  (* "}"   *)
+| LBRACKET of Region.t  (* "["   *)
+| RBRACKET of Region.t  (* "]"   *)
+| CONS     of Region.t  (* "<:"  *)
+| VBAR     of Region.t  (* "|"   *)
+| ARROW    of Region.t  (* "->"  *)
+| ASGNMNT  of Region.t  (* ":="  *)
+| EQUAL    of Region.t  (* "="   *)
+| COLON    of Region.t  (* ":"   *)
+| OR       of Region.t  (* "||"  *)
+| AND      of Region.t  (* "&&"  *)
+| LT       of Region.t  (* "<"   *)
+| LEQ      of Region.t  (* "<="  *)
+| GT       of Region.t  (* ">"   *)
+| GEQ      of Region.t  (* ">="  *)
+| NEQ      of Region.t  (* "=/=" *)
+| PLUS     of Region.t  (* "+"   *)
+| MINUS    of Region.t  (* "-"   *)
+| SLASH    of Region.t  (* "/"   *)
+| TIMES    of Region.t  (* "*"   *)
+| DOT      of Region.t  (* "."   *)
+| WILD     of Region.t  (* "_"   *)
+| CAT      of Region.t  (* "^"   *)
+
+  (* Keywords *)
+
+| Begin      of Region.t
+| Const      of Region.t
+| Down       of Region.t
+| If         of Region.t
+| In         of Region.t
+| Is         of Region.t
+| For        of Region.t
+| Function   of Region.t
+| Parameter  of Region.t
+| Storage    of Region.t
+| Type       of Region.t
+| Of         of Region.t
+| Operations of Region.t
+| Var        of Region.t
+| End        of Region.t
+| Then       of Region.t
+| Else       of Region.t
+| Match      of Region.t
+| Null       of Region.t
+| Procedure  of Region.t
+| Record     of Region.t
+| Step       of Region.t
+| To         of Region.t
+| Mod        of Region.t
+| Not        of Region.t
+| While      of Region.t
+| With       of Region.t
+
+  (* Data constructors *)
+
+| C_False of Region.t  (* "False" *)
+| C_None  of Region.t  (* "None"  *)
+| C_Some  of Region.t  (* "Some"  *)
+| C_True  of Region.t  (* "True"  *)
+| C_Unit  of Region.t  (* "Unit"  *)
+
+  (* Virtual tokens *)
+
+| EOF of Region.t
+
+
+type token = t
+
+(* Projections
+
+   The difference between extracting the lexeme and a string from a
+   token is that the latter is the textual representation of the OCaml
+   value denoting the token (its abstract syntax), rather than its
+   lexeme (concrete syntax).
+*)
+
+val to_lexeme : token -> lexeme
+val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
+val to_region : token -> Region.t
+
+(* Injections *)
+
+type int_err =
+  Non_canonical_zero
+
+type ident_err = Reserved_name
+
+val mk_string : lexeme -> Region.t -> token
+val mk_bytes  : lexeme -> Region.t -> token
+val mk_int    : lexeme -> Region.t -> (token,   int_err) result
+val mk_ident  : lexeme -> Region.t -> (token, ident_err) result
+val mk_constr : lexeme -> Region.t -> token
+val mk_sym    : lexeme -> Region.t -> token
+val eof       : Region.t -> token
+
+(* Predicates *)
+
+val is_string : token -> bool
+val is_bytes  : token -> bool
+val is_int    : token -> bool
+val is_ident  : token -> bool
+val is_kwd    : token -> bool
+val is_constr : token -> bool
+val is_sym    : token -> bool
+val is_eof    : token -> bool
--- a/LexToken.mll
+++ b/LexToken.mll
@ -0,0 +1,616 @@
+(* Lexer specification for Ligo, to be processed by [ocamllex] *)
+
+{
+(* START HEADER *)
+
+(* Shorthands *)
+
+type lexeme = string
+
+let sprintf = Printf.sprintf
+
+module SMap = Utils.String.Map
+module SSet = Utils.String.Set
+
+(* Hack to roll back one lexeme in the current semantic action *)
+(*
+let rollback buffer =
+  let open Lexing in
+  let len = String.length (lexeme buffer) in
+  let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
+  buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
+  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
+*)
+
+(* TOKENS *)
+
+type t =
+  (* Literals *)
+
+  String of lexeme Region.reg
+| Bytes  of (lexeme * MBytes.t) Region.reg
+| Int    of (lexeme * Z.t) Region.reg
+| Ident  of lexeme Region.reg
+| Constr of lexeme Region.reg
+
+  (* Symbols *)
+
+| SEMI     of Region.t
+| COMMA    of Region.t
+| LPAR     of Region.t
+| RPAR     of Region.t
+| LBRACE   of Region.t
+| RBRACE   of Region.t
+| LBRACKET of Region.t
+| RBRACKET of Region.t
+| CONS     of Region.t
+| VBAR     of Region.t
+| ARROW    of Region.t
+| ASGNMNT  of Region.t
+| EQUAL    of Region.t
+| COLON    of Region.t
+| OR       of Region.t
+| AND      of Region.t
+| LT       of Region.t
+| LEQ      of Region.t
+| GT       of Region.t
+| GEQ      of Region.t
+| NEQ      of Region.t
+| PLUS     of Region.t
+| MINUS    of Region.t
+| SLASH    of Region.t
+| TIMES    of Region.t
+| DOT      of Region.t
+| WILD     of Region.t
+| CAT      of Region.t
+
+  (* Keywords *)
+
+| Begin      of Region.t
+| Const      of Region.t
+| Down       of Region.t
+| If         of Region.t
+| In         of Region.t
+| Is         of Region.t
+| For        of Region.t
+| Function   of Region.t
+| Parameter  of Region.t
+| Storage    of Region.t
+| Type       of Region.t
+| Of         of Region.t
+| Operations of Region.t
+| Var        of Region.t
+| End        of Region.t
+| Then       of Region.t
+| Else       of Region.t
+| Match      of Region.t
+| Null       of Region.t
+| Procedure  of Region.t
+| Record     of Region.t
+| Step       of Region.t
+| To         of Region.t
+| Mod        of Region.t
+| Not        of Region.t
+| While      of Region.t
+| With       of Region.t
+
+  (* Types *)
+(*
+| T_address   of Region.t  (* "address"   *)
+| T_big_map   of Region.t  (* "big_map"   *)
+| T_bool      of Region.t  (* "bool"      *)
+| T_bytes     of Region.t  (* "bytes"     *)
+| T_contract  of Region.t  (* "contract"  *)
+| T_int       of Region.t  (* "int"       *)
+| T_key       of Region.t  (* "key"       *)
+| T_key_hash  of Region.t  (* "key_hash"  *)
+| T_list      of Region.t  (* "list"      *)
+| T_map       of Region.t  (* "map"       *)
+| T_mutez     of Region.t  (* "mutez"     *)
+| T_nat       of Region.t  (* "nat"       *)
+| T_operation of Region.t  (* "operation" *)
+| T_option    of Region.t  (* "option"    *)
+| T_set       of Region.t  (* "set"       *)
+| T_signature of Region.t  (* "signature" *)
+| T_string    of Region.t  (* "string"    *)
+| T_timestamp of Region.t  (* "timestamp" *)
+| T_unit      of Region.t  (* "unit"      *)
+*)
+  (* Data constructors *)
+
+| C_False of Region.t  (* "False" *)
+| C_None  of Region.t  (* "None"  *)
+| C_Some  of Region.t  (* "Some"  *)
+| C_True  of Region.t  (* "True"  *)
+| C_Unit  of Region.t  (* "Unit"  *)
+
+  (* Virtual tokens *)
+
+| EOF of Region.t
+
+
+type token = t
+
+let proj_token = function
+  (* Literals *)
+
+  String Region.{region; value} ->
+    region, sprintf "String %s" value
+
+| Bytes Region.{region; value = s,b} ->
+    region,
+    sprintf "Bytes (\"%s\", \"0x%s\")"
+      s (MBytes.to_hex b |> Hex.to_string)
+
+| Int Region.{region; value = s,n} ->
+    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
+
+| Ident Region.{region; value} ->
+    region, sprintf "Ident \"%s\"" value
+
+| Constr Region.{region; value} ->
+    region, sprintf "Constr \"%s\"" value
+
+  (* Symbols *)
+
+| SEMI     region -> region, "SEMI"
+| COMMA    region -> region, "COMMA"
+| LPAR     region -> region, "LPAR"
+| RPAR     region -> region, "RPAR"
+| LBRACE   region -> region, "LBRACE"
+| RBRACE   region -> region, "RBRACE"
+| LBRACKET region -> region, "LBRACKET"
+| RBRACKET region -> region, "RBRACKET"
+| CONS     region -> region, "CONS"
+| VBAR     region -> region, "VBAR"
+| ARROW    region -> region, "ARROW"
+| ASGNMNT  region -> region, "ASGNMNT"
+| EQUAL    region -> region, "EQUAL"
+| COLON    region -> region, "COLON"
+| OR       region -> region, "OR"
+| AND      region -> region, "AND"
+| LT       region -> region, "LT"
+| LEQ      region -> region, "LEQ"
+| GT       region -> region, "GT"
+| GEQ      region -> region, "GEQ"
+| NEQ      region -> region, "NEQ"
+| PLUS     region -> region, "PLUS"
+| MINUS    region -> region, "MINUS"
+| SLASH    region -> region, "SLASH"
+| TIMES    region -> region, "TIMES"
+| DOT      region -> region, "DOT"
+| WILD     region -> region, "WILD"
+| CAT      region -> region, "CAT"
+
+  (* Keywords *)
+
+| Begin      region -> region, "Begin"
+| Const      region -> region, "Const"
+| Down       region -> region, "Down"
+| If         region -> region, "If"
+| In         region -> region, "In"
+| Is         region -> region, "Is"
+| For        region -> region, "For"
+| Function   region -> region, "Function"
+| Parameter  region -> region, "Parameter"
+| Storage    region -> region, "Storage"
+| Type       region -> region, "Type"
+| Of         region -> region, "Of"
+| Operations region -> region, "Operations"
+| Var        region -> region, "Var"
+| End        region -> region, "End"
+| Then       region -> region, "Then"
+| Else       region -> region, "Else"
+| Match      region -> region, "Match"
+| Null       region -> region, "Null"
+| Procedure  region -> region, "Procedure"
+| Record     region -> region, "Record"
+| Step       region -> region, "Step"
+| To         region -> region, "To"
+| Mod        region -> region, "Mod"
+| Not        region -> region, "Not"
+| While      region -> region, "While"
+| With       region -> region, "With"
+
+  (* Data *)
+
+| C_False region -> region, "C_False"
+| C_None  region -> region, "C_None"
+| C_Some  region -> region, "C_Some"
+| C_True  region -> region, "C_True"
+| C_Unit  region -> region, "C_Unit"
+
+  (* Virtual tokens *)
+
+| EOF region -> region, "EOF"
+
+
+let to_lexeme = function
+  (* Literals *)
+
+  String s  -> s.Region.value
+| Bytes b   -> fst b.Region.value
+| Int i     -> fst i.Region.value
+| Ident id
+| Constr id -> id.Region.value
+
+  (* Symbols *)
+
+| SEMI     _ -> ";"
+| COMMA    _ -> ","
+| LPAR     _ -> "("
+| RPAR     _ -> ")"
+| LBRACE   _ -> "{"
+| RBRACE   _ -> "}"
+| LBRACKET _ -> "["
+| RBRACKET _ -> "]"
+| CONS     _ -> "<:"
+| VBAR     _ -> "|"
+| ARROW    _ -> "->"
+| ASGNMNT  _ -> ":="
+| EQUAL    _ -> "="
+| COLON    _ -> ":"
+| OR       _ -> "||"
+| AND      _ -> "&&"
+| LT       _ -> "<"
+| LEQ      _ -> "<="
+| GT       _ -> ">"
+| GEQ      _ -> ">="
+| NEQ      _ -> "=/="
+| PLUS     _ -> "+"
+| MINUS    _ -> "-"
+| SLASH    _ -> "/"
+| TIMES    _ -> "*"
+| DOT      _ -> "."
+| WILD     _ -> "_"
+| CAT      _ -> "^"
+
+  (* Keywords *)
+
+| Begin      _ -> "begin"
+| Const      _ -> "const"
+| Down       _ -> "down"
+| If         _ -> "if"
+| In         _ -> "in"
+| Is         _ -> "is"
+| For        _ -> "for"
+| Function   _ -> "function"
+| Parameter  _ -> "parameter"
+| Storage    _ -> "storage"
+| Type       _ -> "type"
+| Of         _ -> "of"
+| Operations _ -> "operations"
+| Var        _ -> "var"
+| End        _ -> "end"
+| Then       _ -> "then"
+| Else       _ -> "else"
+| Match      _ -> "match"
+| Null       _ -> "null"
+| Procedure  _ -> "procedure"
+| Record     _ -> "record"
+| Step       _ -> "step"
+| To         _ -> "to"
+| Mod        _ -> "mod"
+| Not        _ -> "not"
+| While      _ -> "while"
+| With       _ -> "with"
+
+  (* Data constructors *)
+
+| C_False _ -> "False"
+| C_None  _ -> "None"
+| C_Some  _ -> "Some"
+| C_True  _ -> "True"
+| C_Unit  _ -> "Unit"
+
+  (* Virtual tokens *)
+
+| EOF _ -> ""
+
+
+let to_string token ?(offsets=true) mode =
+  let region, val_str = proj_token token in
+  let reg_str = region#compact ~offsets mode
+  in sprintf "%s: %s" reg_str val_str
+
+let to_region token = proj_token token |> fst
+
+(* LEXIS *)
+
+let keywords = [
+  (fun reg -> Begin      reg);
+  (fun reg -> Const      reg);
+  (fun reg -> Down       reg);
+  (fun reg -> If         reg);
+  (fun reg -> In         reg);
+  (fun reg -> Is         reg);
+  (fun reg -> For        reg);
+  (fun reg -> Function   reg);
+  (fun reg -> Parameter  reg);
+  (fun reg -> Storage    reg);
+  (fun reg -> Type       reg);
+  (fun reg -> Of         reg);
+  (fun reg -> Operations reg);
+  (fun reg -> Var        reg);
+  (fun reg -> End        reg);
+  (fun reg -> Then       reg);
+  (fun reg -> Else       reg);
+  (fun reg -> Match      reg);
+  (fun reg -> Null       reg);
+  (fun reg -> Procedure  reg);
+  (fun reg -> Record     reg);
+  (fun reg -> Step       reg);
+  (fun reg -> To         reg);
+  (fun reg -> Mod        reg);
+  (fun reg -> Not        reg);
+  (fun reg -> While      reg);
+  (fun reg -> With       reg)
+]
+
+let reserved =
+  let open SSet in
+  empty |> add "and"
+        |> add "as"
+        |> add "asr"
+        |> add "assert"
+        |> add "class"
+        |> add "constraint"
+        |> add "do"
+        |> add "done"
+        |> add "downto"
+        |> add "exception"
+        |> add "external"
+        |> add "false"
+        |> add "fun"
+        |> add "functor"
+        |> add "include"
+        |> add "inherit"
+        |> add "initializer"
+        |> add "land"
+        |> add "lazy"
+        |> add "let"
+        |> add "lor"
+        |> add "lsl"
+        |> add "lsr"
+        |> add "lxor"
+        |> add "method"
+        |> add "module"
+        |> add "mutable"
+        |> add "new"
+        |> add "nonrec"
+        |> add "object"
+        |> add "open"
+        |> add "or"
+        |> add "private"
+        |> add "rec"
+        |> add "sig"
+        |> add "struct"
+        |> add "true"
+        |> add "try"
+        |> add "val"
+        |> add "virtual"
+        |> add "when"
+
+let constructors = [
+  (fun reg -> C_False reg);
+  (fun reg -> C_None  reg);
+  (fun reg -> C_Some  reg);
+  (fun reg -> C_True  reg);
+  (fun reg -> C_Unit  reg)
+]
+
+let add map (key, value) = SMap.add key value map
+
+let mk_map mk_key list =
+  let apply map value = add map (mk_key value, value)
+  in List.fold_left apply SMap.empty list
+
+type lexis = {
+  kwd  : (Region.t -> token) SMap.t;
+  cstr : (Region.t -> token) SMap.t;
+  res  : SSet.t
+}
+
+let lexicon : lexis =
+  let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
+  in {kwd  = build keywords;
+      cstr = build constructors;
+      res  = reserved}
+
+(* Identifiers *)
+
+type ident_err = Reserved_name
+
+(* END HEADER *)
+}
+
+(* START LEXER DEFINITION *)
+
+(* Named regular expressions *)
+
+let small   = ['a'-'z']
+let capital = ['A'-'Z']
+let letter  = small | capital
+let digit   = ['0'-'9']
+let ident   = small (letter | '_' | digit)*
+let constr  = capital (letter | '_' | digit)*
+
+(* Rules *)
+
+rule scan_ident region lexicon = parse
+  (ident as value) eof {
+    if   SSet.mem value lexicon.res
+    then Error Reserved_name
+    else Ok (match SMap.find_opt value lexicon.kwd with
+               Some mk_kwd -> mk_kwd region
+             |        None -> Ident Region.{region; value}) }
+
+and scan_constr region lexicon = parse
+  (constr as value) eof {
+    match SMap.find_opt value lexicon.cstr with
+      Some mk_cstr -> mk_cstr region
+    |         None -> Constr Region.{region; value} }
+
+(* END LEXER DEFINITION *)
+
+{
+(* START TRAILER *)
+
+(* Smart constructors (injections) *)
+
+let mk_string lexeme region = String Region.{region; value=lexeme}
+
+let mk_bytes lexeme region =
+  let norm = Str.(global_replace (regexp "_") "" lexeme) in
+  let value = lexeme, MBytes.of_hex (Hex.of_string norm)
+  in Bytes Region.{region; value}
+
+type int_err = Non_canonical_zero
+
+let mk_int lexeme region =
+  let z = Str.(global_replace (regexp "_") "" lexeme)
+          |> Z.of_string in
+  if   Z.equal z Z.zero && lexeme <> "0"
+  then Error Non_canonical_zero
+  else Ok (Int Region.{region; value = lexeme, z})
+
+let eof region = EOF region
+
+let mk_sym lexeme region =
+  match lexeme with
+    ";"   -> SEMI     region
+  | ","   -> COMMA    region
+  | "("   -> LPAR     region
+  | ")"   -> RPAR     region
+  | "{"   -> LBRACE   region
+  | "}"   -> RBRACE   region
+  | "["   -> LBRACKET region
+  | "]"   -> RBRACKET region
+  | "<:"  -> CONS     region
+  | "|"   -> VBAR     region
+  | "->"  -> ARROW    region
+  | ":="  -> ASGNMNT  region
+  | "="   -> EQUAL    region
+  | ":"   -> COLON    region
+  | "||"  -> OR       region
+  | "&&"  -> AND      region
+  | "<"   -> LT       region
+  | "<="  -> LEQ      region
+  | ">"   -> GT       region
+  | ">="  -> GEQ      region
+  | "=/=" -> NEQ      region
+  | "+"   -> PLUS     region
+  | "-"   -> MINUS    region
+  | "/"   -> SLASH    region
+  | "*"   -> TIMES    region
+  | "."   -> DOT      region
+  | "_"   -> WILD     region
+  | "^"   -> CAT      region
+  |     _ -> assert false
+
+(* Identifiers *)
+
+let mk_ident' lexeme region lexicon =
+  Lexing.from_string lexeme |> scan_ident region lexicon
+
+let mk_ident lexeme region = mk_ident' lexeme region lexicon
+
+(* Constructors *)
+
+let mk_constr' lexeme region lexicon =
+  Lexing.from_string lexeme |> scan_constr region lexicon
+
+let mk_constr lexeme region = mk_constr' lexeme region lexicon
+
+(* Predicates *)
+
+let is_string = function
+  String _ -> true
+|        _ -> false
+
+let is_bytes = function
+  Bytes _ -> true
+|       _ -> false
+
+let is_int = function
+  Int _ -> true
+|     _ -> false
+
+let is_ident = function
+  Ident _ -> true
+|       _ -> false
+
+let is_kwd = function
+| Begin      _
+| Const      _
+| Down       _
+| If         _
+| In         _
+| Is         _
+| For        _
+| Function   _
+| Parameter  _
+| Storage    _
+| Type       _
+| Of         _
+| Operations _
+| Var        _
+| End        _
+| Then       _
+| Else       _
+| Match      _
+| Null       _
+| Procedure  _
+| Record     _
+| Step       _
+| To         _
+| Mod        _
+| Not        _
+| While      _
+| With       _ -> true
+|            _ -> false
+
+let is_constr = function
+  Constr  _
+| C_False _
+| C_None  _
+| C_Some  _
+| C_True  _
+| C_Unit  _ -> true
+|         _ -> false
+
+let is_sym = function
+  SEMI     _
+| COMMA    _
+| LPAR     _
+| RPAR     _
+| LBRACE   _
+| RBRACE   _
+| LBRACKET _
+| RBRACKET _
+| CONS     _
+| VBAR     _
+| ARROW    _
+| ASGNMNT  _
+| EQUAL    _
+| COLON    _
+| OR       _
+| AND      _
+| LT       _
+| LEQ      _
+| GT       _
+| GEQ      _
+| NEQ      _
+| PLUS     _
+| MINUS    _
+| SLASH    _
+| TIMES    _
+| DOT      _
+| WILD     _
+| CAT      _ -> true
+|          _ -> false
+
+let is_eof = function EOF _ -> true | _ -> false
+
+(* END TRAILER *)
+}
--- a/Lexer.mli
+++ b/Lexer.mli
@ -0,0 +1,153 @@
+(* Lexer specification for Ligo, to be processed by [ocamllex].
+
+   The underlying design principles are:
+
+     (1) enforce stylistic constraints at a lexical level, in order to
+         early reject potentially misleading or poorly written
+         Ligo contracts;
+
+     (2) provide precise error messages with hint as how to fix the
+         issue, which is achieved by consulting the lexical
+         right-context of lexemes;
+
+     (3) be as independent as possible from the Ligo version, so
+         upgrades have as little impact as possible on this
+         specification: this is achieved by using the most general
+         regular expressions to match the lexing buffer and broadly
+         distinguish the syntactic categories, and then delegating a
+         finer, protocol-dependent, second analysis to an external
+         module making the tokens (hence a functor below);
+
+     (4) support unit testing (lexing of the whole input with debug
+         traces);
+
+   The limitation to the protocol independence lies in the errors that
+   the external module building the tokens (which is
+   protocol-dependent) may have to report. Indeed these errors have to
+   be contextualised by the lexer in terms of input source regions, so
+   useful error messages can be printed, therefore they are part of
+   the signature [TOKEN] that parameterise the functor generated
+   here. For instance, if, in a future release of Ligo, new tokens may
+   be added, and the recognition of their lexemes may entail new
+   errors, the signature [TOKEN] will have to be augmented and the
+   lexer specification changed. However, it is more likely that
+   instructions or types are added, instead of new kinds of tokens.
+*)
+
+type lexeme = string
+
+(* TOKENS *)
+
+(* The signature [TOKEN] exports an abstract type [token], so a lexer
+   can be a functor over tokens. This enables to externalise
+   version-dependent constraints in any module whose signature matches
+   [TOKEN]. Generic functions to construct tokens are required.
+
+   Note the predicate [is_eof], which caracterises the virtual token
+   for end-of-file, because it requires special handling. Some of
+   those functions may yield errors, which are defined as values of
+   the type [int_err] etc. These errors can be better understood by
+   reading the ocamllex specification for the lexer ([Lexer.mll]).
+*)
+
+module type TOKEN =
+  sig
+    type token
+
+    (* Errors *)
+
+    type   int_err = Non_canonical_zero
+    type ident_err = Reserved_name
+
+    (* Injections *)
+
+    val mk_string : lexeme -> Region.t -> token
+    val mk_bytes  : lexeme -> Region.t -> token
+    val mk_int    : lexeme -> Region.t -> (token,   int_err) result
+    val mk_ident  : lexeme -> Region.t -> (token, ident_err) result
+    val mk_constr : lexeme -> Region.t -> token
+    val mk_sym    : lexeme -> Region.t -> token
+    val eof       : Region.t -> token
+
+    (* Predicates *)
+
+    val is_string : token -> bool
+    val is_bytes  : token -> bool
+    val is_int    : token -> bool
+    val is_ident  : token -> bool
+    val is_kwd    : token -> bool
+    val is_constr : token -> bool
+    val is_sym    : token -> bool
+    val is_eof    : token -> bool
+
+    (* Projections *)
+
+    val to_lexeme : token -> lexeme
+    val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
+    val to_region : token -> Region.t
+  end
+
+(* The module type for lexers is [S]. It mainly exports the function
+   [open_token_stream], which returns
+
+     * a function [read] that extracts tokens from a lexing buffer,
+     * together with a lexing buffer [buffer] to read from,
+     * a function [close] that closes that buffer,
+     * a function [get_pos] that returns the current position, and
+     * a function [get_last] that returns the region of the last
+       recognised token.
+
+   Note that a module [Token] is exported too, because the signature
+   of the exported functions depend on it.
+
+   The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
+   scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
+   parser generated by Menhir.
+
+   The argument labelled [log] is a logger. It may print a token and
+   its left markup to a given channel, at the caller's discretion.
+*)
+
+module type S =
+  sig
+    module Token : TOKEN
+    type token = Token.token
+
+    type file_path = string
+    type logger = Markup.t list -> token -> unit
+
+    val output_token :
+      ?offsets:bool -> [`Byte | `Point] ->
+      EvalOpt.command -> out_channel -> logger
+
+    type instance = {
+      read     : ?log:logger -> Lexing.lexbuf -> token;
+      buffer   : Lexing.lexbuf;
+      get_pos  : unit -> Pos.t;
+      get_last : unit -> Region.t;
+      close    : unit -> unit
+    }
+
+    val open_token_stream : file_path option -> instance
+
+    (* Error reporting *)
+
+    exception Error of Error.t Region.reg
+
+    val print_error : ?offsets:bool -> [`Byte | `Point] ->
+                      Error.t Region.reg -> unit
+
+    (* Standalone tracer *)
+
+    val trace :
+      ?offsets:bool -> [`Byte | `Point] ->
+      file_path option -> EvalOpt.command -> unit
+  end
+
+(* The functorised interface
+
+   Note that the module parameter [Token] is re-exported as a
+   submodule in [S].
+*)
+
+module Make (Token: TOKEN) : S with module Token = Token
--- a/Lexer.mll
+++ b/Lexer.mll
@ -0,0 +1,803 @@
+(* Lexer specification for Ligo, to be processed by [ocamllex]. *)
+
+{
+(* START HEADER *)
+
+type lexeme = string
+
+(* STRING PROCESSING *)
+
+(* The value of [mk_str len p] ("make string") is a string of length
+   [len] containing the [len] characters in the list [p], in reverse
+   order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
+
+let mk_str (len: int) (p: char list) : string =
+  let bytes = Bytes.make len ' ' in
+  let rec fill i = function
+         [] -> bytes
+  | char::l -> Bytes.set bytes i char; fill (i-1) l
+  in fill (len-1) p |> Bytes.to_string
+
+(* The call [explode s a] is the list made by pushing the characters
+   in the string [s] on top of [a], in reverse order. For example,
+   [explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
+
+let explode s acc =
+  let rec push = function
+    0 -> acc
+  | i -> s.[i-1] :: push (i-1)
+in push (String.length s)
+
+(* LEXER ENGINE *)
+
+(* Resetting file name and line number in the lexing buffer
+
+   The call [reset ~file ~line buffer] modifies in-place the lexing
+   buffer [buffer] so the lexing engine records that the file
+   associated with [buffer] is named [file], and the current line is
+   [line]. This function is useful when lexing a file that has been
+   previously preprocessed by the C preprocessor, in which case the
+   argument [file] is the name of the file that was preprocessed,
+   _not_ the preprocessed file (of which the user is not normally
+   aware). By default, the [line] argument is [1].
+*)
+
+let reset_file ~file buffer =
+  let open Lexing in
+  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
+
+let reset_line line_num buffer =
+  let open Lexing in
+  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line_num}
+
+let reset ~file ?(line=1) buffer =
+  (* Default value per the [Lexing] standard module convention *)
+  reset_file ~file buffer; reset_line line buffer
+
+(* Rolling back one lexeme _within the current semantic action_ *)
+
+let rollback buffer =
+  let open Lexing in
+  let len = String.length (lexeme buffer) in
+  let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
+  buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
+  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
+
+(* ALIASES *)
+
+let sprintf = Printf.sprintf
+
+(* TOKENS *)
+
+(* The signature [TOKEN] exports an abstract type [token], so a lexer
+   can be a functor over tokens. Consequently, generic functions to
+   construct tokens are provided. Note predicate [is_eof], which
+   caracterises the virtual token for end-of-file, because it requires
+   special handling. *)
+
+module type TOKEN =
+  sig
+    type token
+
+    (* Errors *)
+
+    type   int_err = Non_canonical_zero
+    type ident_err = Reserved_name
+
+    (* Injections *)
+
+    val mk_string : lexeme -> Region.t -> token
+    val mk_bytes  : lexeme -> Region.t -> token
+    val mk_int    : lexeme -> Region.t -> (token,   int_err) result
+    val mk_ident  : lexeme -> Region.t -> (token, ident_err) result
+    val mk_constr : lexeme -> Region.t -> token
+    val mk_sym    : lexeme -> Region.t -> token
+    val eof       : Region.t -> token
+
+    (* Predicates *)
+
+    val is_string : token -> bool
+    val is_bytes  : token -> bool
+    val is_int    : token -> bool
+    val is_ident  : token -> bool
+    val is_kwd    : token -> bool
+    val is_constr : token -> bool
+    val is_sym    : token -> bool
+    val is_eof    : token -> bool
+
+    (* Projections *)
+
+    val to_lexeme : token -> lexeme
+    val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
+    val to_region : token -> Region.t
+  end
+
+(* The module type for lexers is [S]. *)
+
+module type S = sig
+  module Token : TOKEN
+  type token = Token.token
+
+  type file_path = string
+  type logger = Markup.t list -> token -> unit
+
+  val output_token :
+    ?offsets:bool -> [`Byte | `Point] ->
+    EvalOpt.command -> out_channel -> logger
+
+  type instance = {
+    read     : ?log:logger -> Lexing.lexbuf -> token;
+    buffer   : Lexing.lexbuf;
+    get_pos  : unit -> Pos.t;
+    get_last : unit -> Region.t;
+    close    : unit -> unit
+  }
+
+  val open_token_stream : file_path option -> instance
+
+  (* Error reporting *)
+
+  exception Error of Error.t Region.reg
+
+  val print_error :
+    ?offsets:bool -> [`Byte | `Point] -> Error.t Region.reg -> unit
+
+  (* Standalone tracer *)
+
+  val trace :
+    ?offsets:bool -> [`Byte | `Point] ->
+    file_path option -> EvalOpt.command -> unit
+end
+
+(* The functorised interface
+
+   Note that the module parameter [Token] is re-exported as a
+   submodule in [S].
+ *)
+
+module Make (Token: TOKEN) : (S with module Token = Token) =
+  struct
+    module Token = Token
+    type token = Token.token
+
+    type file_path = string
+
+    (* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
+
+    (* When scanning structured constructs, like strings and comments,
+       we need to keep the region of the opening symbol (like double
+       quote, "#" or "/*") in order to report any error more
+       precisely. Since ocamllex is byte-oriented, we need to store
+       the parsed bytes are characters in an accumulator [acc] and
+       also its length [len], so, we are done, it is easy to build the
+       string making up the structured construct with [mk_str] (see
+       above).
+
+       The resulting data structure is called a _thread_.
+    *)
+
+    type thread = {
+      opening : Region.t;
+      len     : int;
+      acc     : char list
+    }
+
+    let push_char char {opening; len; acc} = {opening; len=len+1; acc=char::acc}
+
+    let push_string str {opening; len; acc} =
+      {opening;
+       len = len + String.length str;
+       acc = explode str acc}
+
+    (* STATE *)
+
+    (* Beyond tokens, the result of lexing is a state (a so-called
+       _state monad_). The type [state] represents the logical state
+       of the lexing engine, that is, a value which is threaded during
+       scanning and which denotes useful, high-level information
+       beyond what the type [Lexing.lexbuf] in the standard library
+       already provides for all generic lexers.
+
+        Tokens are the smallest units used by the parser to build the
+       abstract syntax tree. The state includes a queue of recognised
+       tokens, with the markup at the left of its lexeme until either
+       the start of the file or the end of the previously recognised
+       token.
+
+         The markup from the last recognised token or, if the first
+       token has not been recognised yet, from the beginning of the
+       file is stored in the field [markup] of the state because it is
+       a side-effect, with respect to the output token list, and we
+       use a record with a single field [units] because that record
+       may be easily extended during the future maintenance of this
+       lexer.
+
+         The state also includes a field [pos] which holds the current
+       position in the Ligo source file. The position is not always
+       updated after a single character has been matched: that depends
+       on the regular expression that matched the lexing buffer.
+
+         The fields [decoder] and [supply] offer the support needed
+       for the lexing of UTF-8 encoded characters in comments (the
+       only place where they are allowed in Ligo). The former is the
+       decoder proper and the latter is the effectful function
+       [supply] that takes a byte, a start index and a length and feed
+       it to [decoder]. See the documentation of the third-party
+       library Uutf.
+    *)
+
+    type state = {
+      units   : (Markup.t list * token) FQueue.t;
+      markup  : Markup.t list;
+      last    : Region.t;
+      pos     : Pos.t;
+      decoder : Uutf.decoder;
+      supply  : Bytes.t -> int -> int -> unit
+    }
+
+    (* The call [enqueue (token, state)] updates functionally the
+       state [state] by associating the token [token] with the stored
+       markup and enqueuing the pair into the units queue. The field
+       [markup] is then reset to the empty list. *)
+
+    let enqueue (token, state) = {
+      state with
+        units  = FQueue.enq (state.markup, token) state.units;
+        markup = []
+    }
+
+    (* The call [sync state buffer] updates the current position in
+       accordance with the contents of the lexing buffer, more
+       precisely, depending on the length of the string which has just
+       been recognised by the scanner: that length is used as a
+       positive offset to the current column. *)
+
+    let sync state buffer =
+      let lex   = Lexing.lexeme buffer in
+      let len   = String.length lex in
+      let start = state.pos in
+      let stop  = start#shift_bytes len in
+      let state = {state with pos = stop}
+      in Region.make ~start ~stop, lex, state
+
+    (* MARKUP *)
+
+    (* Committing markup to the current logical state *)
+
+    let push_newline state buffer =
+      let value  = Lexing.lexeme buffer
+      and ()     = Lexing.new_line buffer
+      and start  = state.pos in
+      let stop   = start#new_line value in
+      let state  = {state with pos = stop}
+      and region = Region.make ~start ~stop in
+      let unit   = Markup.Newline Region.{region; value} in
+      let markup = unit :: state.markup
+      in {state with markup}
+
+    let push_line (thread, state) =
+      let start  = thread.opening#start in
+      let region = Region.make ~start ~stop:state.pos
+      and value  = mk_str thread.len thread.acc in
+      let unit   = Markup.LineCom Region.{region; value} in
+      let markup = unit :: state.markup
+      in {state with markup}
+
+    let push_block (thread, state) =
+      let start  = thread.opening#start in
+      let region = Region.make ~start ~stop:state.pos
+      and value  = mk_str thread.len thread.acc in
+      let unit   = Markup.BlockCom Region.{region; value} in
+      let markup = unit :: state.markup
+      in {state with markup}
+
+    let push_space state buffer =
+      let region, lex, state = sync state buffer in
+      let value  = String.length lex in
+      let unit   = Markup.Space Region.{region; value} in
+      let markup = unit :: state.markup
+      in {state with markup}
+
+    let push_tabs state buffer =
+      let region, lex, state = sync state buffer in
+      let value  = String.length lex in
+      let unit   = Markup.Tabs Region.{region; value} in
+      let markup = unit :: state.markup
+      in {state with markup}
+
+    let push_bom state buffer =
+      let region, value, state = sync state buffer in
+      let unit   = Markup.BOM Region.{region; value} in
+      let markup = unit :: state.markup
+      in {state with markup}
+
+    (* ERRORS *)
+
+    type Error.t += Invalid_utf8_sequence
+    type Error.t += Unexpected_character of char
+    type Error.t += Undefined_escape_sequence
+    type Error.t += Missing_break
+    type Error.t += Unterminated_string
+    type Error.t += Unterminated_integer
+    type Error.t += Odd_lengthed_bytes
+    type Error.t += Unterminated_comment
+    type Error.t += Orphan_minus
+    type Error.t += Non_canonical_zero
+    type Error.t += Negative_byte_sequence
+    type Error.t += Broken_string
+    type Error.t += Invalid_character_in_string
+    type Error.t += Reserved_name
+
+    let error_to_string = function
+      Invalid_utf8_sequence ->
+        "Invalid UTF-8 sequence.\n"
+    | Unexpected_character c ->
+        sprintf "Unexpected character '%c'.\n" c
+    | Undefined_escape_sequence ->
+        "Undefined escape sequence.\n\
+         Hint: Remove or replace the sequence.\n"
+    | Missing_break ->
+        "Missing break.\n\
+         Hint: Insert some space.\n"
+    | Unterminated_string ->
+        "Unterminated string.\n\
+         Hint: Close with double quotes.\n"
+    | Unterminated_integer ->
+        "Unterminated integer.\n\
+         Hint: Remove the sign or proceed with a natural number.\n"
+    | Odd_lengthed_bytes ->
+        "The length of the byte sequence is an odd number.\n\
+         Hint: Add or remove a digit.\n"
+    | Unterminated_comment ->
+        "Unterminated comment.\n\
+         Hint: Close with \"*/\".\n"
+    | Orphan_minus ->
+        "Orphan minus sign.\n\
+         Hint: Remove the trailing space.\n"
+    | Non_canonical_zero ->
+        "Non-canonical zero.\n\
+         Hint: Use 0.\n"
+    | Negative_byte_sequence ->
+        "Negative byte sequence.\n\
+         Hint: Remove the leading minus sign.\n"
+    | Broken_string ->
+        "The string starting here is interrupted by a line break.\n\
+         Hint: Remove the break or close the string before.\n"
+    | Invalid_character_in_string ->
+        "Invalid character in string.\n\
+         Hint: Remove or replace the character.\n"
+    | Reserved_name ->
+        "Reserved named.\n\
+         Hint: Change the name.\n"
+    | _ -> assert false
+
+    exception Error of Error.t Region.reg
+
+    let fail region value = raise (Error Region.{region; value})
+
+    (* TOKENS *)
+
+    (* Making tokens *)
+
+    let mk_string (thread, state) =
+      let start  = thread.opening#start in
+      let stop   = state.pos in
+      let region = Region.make ~start ~stop in
+      let lexeme = mk_str thread.len thread.acc in
+      let token  = Token.mk_string lexeme region
+      in token, state
+
+    let mk_bytes bytes state buffer =
+      let region, _, state = sync state buffer in
+      let token = Token.mk_bytes bytes region
+      in token, state
+
+    let mk_int state buffer =
+      let region, lexeme, state = sync state buffer in
+      match Token.mk_int lexeme region with
+        Ok token -> token, state
+      | Error Token.Non_canonical_zero ->
+          fail region Non_canonical_zero
+
+    let mk_ident state buffer =
+      let region, lexeme, state = sync state buffer in
+      match Token.mk_ident lexeme region with
+        Ok token -> token, state
+      | Error Token.Reserved_name -> fail region Reserved_name
+
+    let mk_constr state buffer =
+      let region, lexeme, state = sync state buffer
+      in Token.mk_constr lexeme region, state
+
+    let mk_sym state buffer =
+      let region, lexeme, state = sync state buffer
+      in Token.mk_sym lexeme region, state
+
+    let mk_eof state buffer =
+      let region, _, state = sync state buffer
+      in Token.eof region, state
+
+(* END HEADER *)
+}
+
+(* START LEXER DEFINITION *)
+
+(* Named regular expressions *)
+
+let utf8_bom   = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
+let nl         = ['\n' '\r'] | "\r\n"
+let digit      = ['0'-'9']
+let natural    = digit | digit (digit | '_')* digit
+let integer    = '-'? natural
+let small      = ['a'-'z']
+let capital    = ['A'-'Z']
+let letter     = small | capital
+let ident      = small (letter | '_' | digit)*
+let constr     = capital (letter | '_' | digit)*
+let hexa_digit = digit | ['A'-'F']
+let byte       = hexa_digit hexa_digit
+let byte_seq   = byte | byte (byte | '_')* byte
+let bytes      = "0x" (byte_seq? as seq)
+let esc        = "\\n" | "\\\"" | "\\\\" | "\\b"
+               | "\\r" | "\\t" | "\\x" byte
+let symbol     = ';' | ','
+               | '(' | ')' | '{' | '}' | '[' | ']'
+               | "<:" | '|' | "->" | ":=" | '=' | ':'
+               | "||" | "&&" | '<' | "<=" | '>' | ">=" | "=/="
+               | '+' | '-' | '*' | '.' | '_' | '^'
+
+(* RULES *)
+
+(* Except for the first rule [init], all rules bear a name starting
+   with "scan".
+
+   All have a parameter [state] that they thread through their
+   recursive calls. The rules for the structured constructs (strings
+   and comments) have an extra parameter of type [thread] (see above).
+*)
+
+rule init state = parse
+  utf8_bom { scan (push_bom state lexbuf) lexbuf }
+| _        { rollback lexbuf; scan state lexbuf  }
+
+and scan state = parse
+  nl      { scan (push_newline state lexbuf) lexbuf }
+| ' '+    { scan (push_space   state lexbuf) lexbuf }
+| '\t'+   { scan (push_tabs    state lexbuf) lexbuf }
+
+| ident   { mk_ident       state lexbuf |> enqueue }
+| constr  { mk_constr      state lexbuf |> enqueue }
+| bytes   { (mk_bytes seq) state lexbuf |> enqueue }
+| integer { mk_int         state lexbuf |> enqueue }
+| symbol  { mk_sym         state lexbuf |> enqueue }
+| eof     { mk_eof         state lexbuf |> enqueue }
+
+| '"'  { let opening, _, state = sync state lexbuf in
+         let thread = {opening; len=1; acc=['"']} in
+         scan_string thread state lexbuf |> mk_string |> enqueue }
+
+| "/*" { let opening, _, state = sync state lexbuf in
+         let thread = {opening; len=2; acc=['*';'/']} in
+         let state = scan_block thread state lexbuf |> push_block
+         in scan state lexbuf }
+
+| '#'  { let opening, _, state = sync state lexbuf in
+         let thread = {opening; len=1; acc=['#']} in
+         let state = scan_line thread state lexbuf |> push_line
+         in scan state lexbuf }
+
+  (* Some special errors
+
+     Some special errors are recognised in the semantic actions of the
+     following regular expressions. The first error is a minus sign
+     separated from the integer it modifies by some markup (space or
+     tabs). The second is a minus sign immediately followed by
+     anything else than a natural number (matched above) or markup and
+     a number (previous error). The third is the strange occurrence of
+     an attempt at defining a negative byte sequence. Finally, the
+     catch-all rule reports unexpected characters in the buffer (and
+     is not so special, after all).
+  *)
+
+| '-' { let region, _, state = sync state lexbuf in
+        let state = scan state lexbuf in
+        let open Markup in
+        match FQueue.peek state.units with
+          None -> assert false
+        | Some (_, ((Space _ | Tabs _)::_, token))
+            when Token.is_int token ->
+              fail region Orphan_minus
+        | _ -> fail region Unterminated_integer }
+
+| '-' "0x" byte_seq?
+      { let region, _, _ = sync state lexbuf
+        in fail region Negative_byte_sequence }
+
+| _ as c { let region, _, _ = sync state lexbuf
+           in fail region (Unexpected_character c) }
+
+(* Finishing a string *)
+
+and scan_string thread state = parse
+  nl         { fail thread.opening Broken_string }
+| eof        { fail thread.opening Unterminated_string }
+| ['\t' '\r' '\b']
+             { let region, _, _ = sync state lexbuf
+               in fail region Invalid_character_in_string }
+| '"'        { let _, _, state = sync state lexbuf
+               in push_char '"' thread, state }
+| esc        { let _, lexeme, state = sync state lexbuf
+               in scan_string (push_string lexeme thread) state lexbuf }
+| '\\' _     { let region, _, _ = sync state lexbuf
+               in fail region Undefined_escape_sequence }
+| _ as c     { let _, _, state = sync state lexbuf in
+               scan_string (push_char c thread) state lexbuf }
+
+(* Finishing a block comment
+
+   The lexing of block comments must take care of embedded block
+   comments that may occur within, as well as strings, so no substring
+   "*/" may inadvertantly close the block. This is the purpose of the
+   first case of the scanner [scan_block].
+*)
+
+and scan_block thread state = parse
+  '"' | "/*" { let opening = thread.opening in
+               let opening', lexeme, state = sync state lexbuf in
+               let thread = push_string lexeme thread in
+               let thread = {thread with opening=opening'} in
+               let next   = if lexeme = "\"" then scan_string
+                            else scan_block in
+               let thread, state = next thread state lexbuf in
+               let thread = {thread with opening}
+               in scan_block thread state lexbuf }
+| "*/"       { let _, lexeme, state = sync state lexbuf
+               in push_string lexeme thread, state }
+| nl as nl   { let ()     = Lexing.new_line lexbuf
+               and state  = {state with pos = state.pos#new_line nl}
+               and thread = push_string nl thread
+               in scan_block thread state lexbuf }
+| eof        { fail thread.opening Unterminated_comment }
+| _          { let     () = rollback lexbuf in
+               let len    = thread.len in
+               let thread,
+                   status = scan_utf8 thread state lexbuf in
+               let delta  = thread.len - len in
+               let pos    = state.pos#shift_one_uchar delta in
+               match status with
+                 None -> scan_block thread {state with pos} lexbuf
+               | Some error ->
+                   let region = Region.make ~start:state.pos ~stop:pos
+                   in fail region error }
+
+(* Finishing a line comment *)
+
+and scan_line thread state = parse
+  nl as nl { let     () = Lexing.new_line lexbuf
+             and thread = push_string nl thread
+             and state  = {state with pos = state.pos#new_line nl}
+             in thread, state }
+| eof      { fail thread.opening Unterminated_comment }
+| _        { let     () = rollback lexbuf in
+             let len    = thread.len in
+             let thread,
+                 status = scan_utf8 thread state lexbuf in
+             let delta  = thread.len - len in
+             let pos    = state.pos#shift_one_uchar delta in
+             match status with
+               None -> scan_line thread {state with pos} lexbuf
+             | Some error ->
+                 let region = Region.make ~start:state.pos ~stop:pos
+                 in fail region error }
+
+and scan_utf8 thread state = parse
+     eof { fail thread.opening Unterminated_comment }
+| _ as c { let thread = push_char c thread in
+           let lexeme = Lexing.lexeme lexbuf in
+           let () = state.supply (Bytes.of_string lexeme) 0 1 in
+           match Uutf.decode state.decoder with
+             `Uchar _     -> thread, None
+           | `Malformed _ -> thread, Some Invalid_utf8_sequence
+           | `Await       -> scan_utf8 thread state lexbuf
+           | `End         -> assert false }
+
+(* END LEXER DEFINITION *)
+
+{
+(* START TRAILER *)
+
+(* Scanning the lexing buffer for tokens (and markup, as a
+   side-effect).
+
+     Because we want the lexer to have access to the right lexical
+   context of a recognised lexeme (to enforce stylistic constraints or
+   report special error patterns), we need to keep a hidden reference
+   to a queue of recognised lexical units (that is, tokens and markup)
+   that acts as a mutable state between the calls to
+   [read_token]. When [read_token] is called, that queue is consulted
+   first and, if it contains at least one token, that token is
+   returned; otherwise, the lexing buffer is scanned for at least one
+   more new token. That is the general principle: we put a high-level
+   buffer (our queue) on top of the low-level lexing buffer.
+
+     One tricky and important detail is that we must make any parser
+   generated by Menhir (and calling [read_token]) believe that the
+   last region of the input source that was matched indeed corresponds
+   to the returned token, despite that many tokens and markup may have
+   been matched since it was actually read from the input. In other
+   words, the parser requests a token that is taken from the
+   high-level buffer, but the parser requests the source regions from
+   the _low-level_ lexing buffer, and they may disagree if more than
+   one token has actually been recognised.
+
+     Consequently, in order to maintain a consistent view for the
+   parser, we have to patch some fields of the lexing buffer, namely
+   [lex_start_p] and [lex_curr_p], as these fields are read by parsers
+   generated by Menhir when querying source positions (regions). This
+   is the purpose of the function [patch_buffer]. After reading one
+   ore more tokens and markup by the scanning rule [scan], we have to
+   save in the hidden reference [buf_reg] the region of the source
+   that was matched by [scan]. This atomic sequence of patching,
+   scanning and saving is implemented by the _function_ [scan]
+   (beware: it shadows the scanning rule [scan]). The function
+   [patch_buffer] is, of course, also called just before returning the
+   token, so the parser has a view of the lexing buffer consistent
+   with the token.
+
+     Note that an additional reference [first_call] is needed to
+   distinguish the first call to the function [scan], as the first
+   scanning rule is actually [init] (which can handle the BOM), not
+   [scan].
+*)
+
+type logger = Markup.t list -> token -> unit
+
+type instance = {
+  read     : ?log:logger -> Lexing.lexbuf -> token;
+  buffer   : Lexing.lexbuf;
+  get_pos  : unit -> Pos.t;
+  get_last : unit -> Region.t;
+  close    : unit -> unit
+}
+
+let  file_path = match EvalOpt.input with
+                   None | Some "-" -> ""
+                 | Some file_path  -> file_path
+let        pos = Pos.min#set_file file_path
+let    buf_reg = ref (pos#byte, pos#byte)
+and first_call = ref true
+and    decoder = Uutf.decoder ~encoding:`UTF_8 `Manual
+let     supply = Uutf.Manual.src decoder
+let      state = ref {units = FQueue.empty;
+                      last = Region.ghost;
+                      pos;
+                      markup = [];
+                      decoder;
+                      supply}
+
+let get_pos () = !state.pos
+
+let get_last () = !state.last
+
+let patch_buffer (start, stop) buffer =
+  let open Lexing in
+  let file_path = buffer.lex_curr_p.pos_fname in
+  buffer.lex_start_p <- {start with pos_fname = file_path};
+  buffer.lex_curr_p  <- {stop  with pos_fname = file_path}
+
+and save_region buffer =
+  buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p)
+
+let scan buffer =
+  patch_buffer !buf_reg buffer;
+  (if   !first_call
+   then (state := init !state buffer; first_call := false)
+   else state := scan !state buffer);
+  save_region buffer
+
+let next_token buffer =
+  scan buffer;
+  match FQueue.peek !state.units with
+                       None -> assert false
+  | Some (units, ext_token) ->
+      state := {!state with units}; Some ext_token
+
+let check_right_context token buffer =
+  let open Token in
+  if is_int token || is_bytes token then
+    match next_token buffer with
+      Some ([], next) ->
+        let pos    = (Token.to_region token)#stop in
+        let region = Region.make ~start:pos ~stop:pos in
+        if is_bytes token && is_int next then
+          fail region Odd_lengthed_bytes
+        else
+        if is_ident next || is_string next
+           || is_bytes next || is_int next then
+          fail region Missing_break
+    | _ -> ()
+  else
+  if Token.is_ident token || Token.is_string token then
+    match next_token buffer with
+      Some ([], next) ->
+        if Token.is_ident next || Token.is_string next
+        || Token.is_bytes next || Token.is_int next
+        then
+          let pos    = (Token.to_region token)#stop in
+          let region = Region.make ~start:pos ~stop:pos
+          in fail region Missing_break
+    | _ -> ()
+
+let rec read_token ?(log=fun _ _ -> ()) buffer =
+  match FQueue.deq !state.units with
+    None ->
+      scan buffer;
+      read_token ~log buffer
+  | Some (units, (left_mark, token)) ->
+      log left_mark token;
+      state := {!state with units; last = Token.to_region token};
+      check_right_context token buffer;
+      patch_buffer (Token.to_region token)#byte_pos buffer;
+      token
+
+let open_token_stream file_path_opt =
+  let      cin = match file_path_opt with
+                   None | Some "-" -> stdin
+                 | Some file_path  -> open_in file_path in
+  let   buffer = Lexing.from_channel cin in
+  let       () = match file_path_opt with
+                   None | Some "-" -> ()
+                 | Some file_path  -> reset ~file:file_path buffer
+  and close () = close_in cin in
+  {read = read_token; buffer; get_pos; get_last; close}
+
+(* Standalone lexer for debugging purposes *)
+
+(* Pretty-printing in a string the lexemes making up the markup
+   between two tokens, concatenated with the last lexeme itself. *)
+
+let output_token ?(offsets=true) mode command
+                 channel left_mark token : unit =
+  let output    str = Printf.fprintf channel "%s%!" str in
+  let output_nl str = output (str ^ "\n") in
+  match command with
+    EvalOpt.Quiet -> ()
+  | EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
+  | EvalOpt.Copy ->
+      let lexeme = Token.to_lexeme token
+      and apply acc markup = Markup.to_lexeme markup :: acc
+      in List.fold_left apply [lexeme] left_mark
+         |> String.concat "" |> output
+  | EvalOpt.Units ->
+      let abs_token = Token.to_string token ~offsets mode
+      and apply acc markup =
+        Markup.to_string markup ~offsets mode :: acc
+      in List.fold_left apply [abs_token] left_mark
+         |> String.concat "\n" |> output_nl
+
+let print_error ?(offsets=true) mode Region.{region; value} =
+  let  msg = error_to_string value in
+  let file = match EvalOpt.input with
+               None | Some "-" -> false
+             |         Some _  -> true in
+  let  reg = region#to_string ~file ~offsets mode in
+  Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg)
+
+let trace ?(offsets=true) mode file_path_opt command : unit =
+  try
+    let {read; buffer; close; _} = open_token_stream file_path_opt
+    and cout = stdout in
+    let log = output_token ~offsets mode command cout
+    and close_all () = close (); close_out cout in
+    let rec iter () =
+      match read ~log buffer with
+        token ->
+          if Token.is_eof token then close_all ()
+          else iter ()
+      | exception Error e -> print_error ~offsets mode e; close_all ()
+    in iter ()
+  with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg)
+
+end (* of functor [Make] in HEADER *)
+(* END TRAILER *)
+}
--- a/LexerMain.ml
+++ b/LexerMain.ml
@ -0,0 +1,17 @@
+(* Driver for the lexer of Ligo *)
+
+open! EvalOpt (* Reads the command-line options: Effectful! *)
+
+(* Error printing and exception tracing *)
+
+let () = Printexc.record_backtrace true
+
+let external_ text =
+  Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
+
+(* Running the lexer on the input file *)
+
+module Lexer = Lexer.Make (LexToken)
+
+let () = Lexer.trace ~offsets:EvalOpt.offsets
+                     EvalOpt.mode EvalOpt.input EvalOpt.cmd
--- a/MBytes.ml
+++ b/MBytes.ml
@ -0,0 +1,6 @@
+(* TEMPORARY: SHOULD BE ERASED *)
+
+type t = Hex.t
+
+let of_hex x = x
+let to_hex x = x
--- a/MBytes.mli
+++ b/MBytes.mli
@ -0,0 +1,6 @@
+(* TEMPORARY: SHOULD BE ERASED *)
+
+type t
+
+val of_hex : Hex.t -> t
+val to_hex : t -> Hex.t
--- a/Markup.ml
+++ b/Markup.ml
@ -0,0 +1,42 @@
+type lexeme = string
+
+type t =
+  Tabs      of int    Region.reg
+| Space     of int    Region.reg
+| Newline   of lexeme Region.reg
+| LineCom   of lexeme Region.reg
+| BlockCom  of lexeme Region.reg
+| BOM       of lexeme Region.reg
+
+type markup = t
+
+(* Pretty-printing *)
+
+let sprintf = Printf.sprintf
+
+let to_lexeme = function
+  Tabs     Region.{value;_} -> String.make value '\t'
+| Space    Region.{value;_} -> String.make value ' '
+| Newline  Region.{value;_}
+| LineCom  Region.{value;_}
+| BlockCom Region.{value;_}
+| BOM      Region.{value;_} -> value
+
+let to_string markup ?(offsets=true) mode =
+  let region, val_str =
+    match markup with
+      Tabs Region.{value; region} ->
+        let lex = String.make value '\t' |> String.escaped
+        in region, sprintf "Tabs \"%s\"" lex
+    | Space Region.{value; region} ->
+        region, sprintf "Space \"%s\"" (String.make value ' ')
+    | Newline Region.{value; region} ->
+        region, sprintf "Newline \"%s\"" (String.escaped value)
+    | LineCom Region.{value; region} ->
+        region, sprintf "LineCom \"%s\"" (String.escaped value)
+    | BlockCom Region.{value; region} ->
+        region, sprintf "BlockCom \"%s\"" (String.escaped value)
+    | BOM Region.{value; region} ->
+        region, sprintf "BOM \"%s\"" (String.escaped value) in
+  let reg_str = region#compact ~offsets mode
+  in sprintf "%s: %s" reg_str val_str
--- a/Markup.mli
+++ b/Markup.mli
@ -0,0 +1,32 @@
+(* This module defines the sorts of markup recognised by the Ligo
+   lexer *)
+
+(* A lexeme is piece of concrete syntax belonging to a token. In
+   algebraic terms, a token is also a piece of abstract lexical
+   syntax. Lexical units emcompass both markup and lexemes. *)
+
+type lexeme = string
+
+type t =
+  Tabs      of int    Region.reg   (* Tabulations *)
+| Space     of int    Region.reg   (* Space *)
+| Newline   of lexeme Region.reg   (* "\n" or "\c\r" escape characters *)
+| LineCom   of lexeme Region.reg   (* Line comments *)
+| BlockCom  of lexeme Region.reg   (* Block comments *)
+| BOM       of lexeme Region.reg   (* Byte-Order Mark for UTF-8 (optional) *)
+
+type markup = t
+
+(* Pretty-printing of markup
+
+   The difference between [to_lexeme] and [to_string] is that the
+   former builds the corresponding concrete syntax (the lexeme),
+   whilst the latter makes up a textual representation of the abstract
+   syntax (the OCaml data constructors).
+
+   The result of [to_string] is escaped to avoid capture by the
+   terminal.
+*)
+
+val to_lexeme : t -> lexeme
+val to_string : t -> ?offsets:bool -> [`Byte | `Point] -> string
--- a/ParToken.mly
+++ b/ParToken.mly
@ -0,0 +1,87 @@
+%{
+%}
+
+(* Tokens (mirroring thise defined in module LexToken) *)
+
+  (* Literals *)
+
+%token <LexToken.lexeme Region.reg> String
+%token <(LexToken.lexeme * MBytes.t) Region.reg> Bytes
+%token <(LexToken.lexeme * Z.t) Region.reg> Int
+%token <LexToken.lexeme Region.reg> Ident
+%token <LexToken.lexeme Region.reg> Constr
+
+  (* Symbols *)
+
+%token <Region.t> SEMI
+%token <Region.t> COMMA
+%token <Region.t> LPAR
+%token <Region.t> RPAR
+%token <Region.t> LBRACE
+%token <Region.t> RBRACE
+%token <Region.t> LBRACKET
+%token <Region.t> RBRACKET
+%token <Region.t> CONS
+%token <Region.t> VBAR
+%token <Region.t> ARROW
+%token <Region.t> ASGNMNT
+%token <Region.t> EQUAL
+%token <Region.t> COLON
+%token <Region.t> OR
+%token <Region.t> AND
+%token <Region.t> LT
+%token <Region.t> LEQ
+%token <Region.t> GT
+%token <Region.t> GEQ
+%token <Region.t> NEQ
+%token <Region.t> PLUS
+%token <Region.t> MINUS
+%token <Region.t> SLASH
+%token <Region.t> TIMES
+%token <Region.t> DOT
+%token <Region.t> WILD
+%token <Region.t> CAT
+
+  (* Keywords *)
+
+%token <Region.t> Begin
+%token <Region.t> Const
+%token <Region.t> Down
+%token <Region.t> If
+%token <Region.t> In
+%token <Region.t> Is
+%token <Region.t> For
+%token <Region.t> Function
+%token <Region.t> Parameter
+%token <Region.t> Storage
+%token <Region.t> Type
+%token <Region.t> Of
+%token <Region.t> Operations
+%token <Region.t> Var
+%token <Region.t> End
+%token <Region.t> Then
+%token <Region.t> Else
+%token <Region.t> Match
+%token <Region.t> Null
+%token <Region.t> Procedure
+%token <Region.t> Record
+%token <Region.t> Step
+%token <Region.t> To
+%token <Region.t> Mod
+%token <Region.t> Not
+%token <Region.t> While
+%token <Region.t> With
+
+  (* Data constructors *)
+
+%token <Region.t> C_False
+%token <Region.t> C_None
+%token <Region.t> C_Some
+%token <Region.t> C_True
+%token <Region.t> C_Unit
+
+  (* Virtual tokens *)
+
+%token <Region.t> EOF
+
+%%
--- a/Parser.mly
+++ b/Parser.mly
@ -0,0 +1,627 @@
+%{
+(* START HEADER *)
+
+open Region
+open AST
+
+(* END HEADER *)
+%}
+
+(* Entry points *)
+
+%start program
+%type <AST.t> program
+
+%%
+
+(* RULES *)
+
+(* Compound constructs *)
+
+par(X):
+  LPAR X RPAR {
+    let region = cover $1 $3
+    in {region; value = $1,$2,$3}
+  }
+
+braces(X):
+  LBRACE X RBRACE {
+    let region = cover $1 $3
+    in {region; value = $1,$2,$3}
+  }
+
+brackets(X):
+  LBRACKET X RBRACKET {
+    let region = cover $1 $3
+    in {region; value = $1,$2,$3}
+  }
+
+(* Sequences
+
+   Series of instances of the same syntactical category have often to
+   be parsed, like lists of expressions, patterns etc. The simplest of
+   all is the possibly empty sequence (series), parsed below by
+   [seq]. The non-empty sequence is parsed by [nseq]. Note that the
+   latter returns a pair made of the first parsed item (the parameter
+   [X]) and the rest of the sequence (possibly empty). This way, the
+   OCaml typechecker can keep track of this information along the
+   static control-flow graph. The rule [sepseq] parses possibly empty
+   sequences of items separated by some token (e.g., a comma), and
+   rule [nsepseq] is for non-empty such sequences. See module [Utils]
+   for the types corresponding to the semantic actions of those rules.
+ *)
+
+(* Possibly empty sequence of items *)
+
+seq(X):
+  (**)     {     [] }
+| X seq(X) { $1::$2 }
+
+(* Non-empty sequence of items *)
+
+nseq(X):
+  X seq(X) { $1,$2 }
+
+(* Non-empty separated sequence of items *)
+
+nsepseq(X,Sep):
+  X                    {                 $1,        [] }
+| X Sep nsepseq(X,Sep) { let h,t = $3 in $1, ($2,h)::t }
+
+(* Possibly empy separated sequence of items *)
+
+sepseq(X,Sep):
+  (**)           {    None }
+| nsepseq(X,Sep) { Some $1 }
+
+(* Inlines *)
+
+%inline var        : Ident { $1 }
+%inline type_name  : Ident { $1 }
+%inline fun_name   : Ident { $1 }
+%inline field_name : Ident { $1 }
+%inline map_name   : Ident { $1 }
+
+(* Main *)
+
+program:
+  seq(type_decl)
+  parameter_decl
+  storage_decl
+  operations_decl
+  seq(lambda_decl)
+  block
+  EOF {
+    object
+      method types      = $1
+      method parameter  = $2
+      method storage    = $3
+      method operations = $4
+      method lambdas    = $5
+      method block      = $6
+      method eof        = $7
+    end
+  }
+
+parameter_decl:
+  Parameter var COLON type_expr {
+    let stop = type_expr_to_region $4
+    in {region = cover $1 stop;
+        value  = $1,$2,$3,$4}
+  }
+
+storage_decl:
+  Storage type_expr {
+    let stop = type_expr_to_region $2
+    in {region = cover $1 stop;
+        value  = $1,$2}
+  }
+
+operations_decl:
+  Operations type_expr {
+    let stop = type_expr_to_region $2
+    in {region = cover $1 stop;
+        value  = $1,$2}
+  }
+
+(* Type declarations *)
+
+type_decl:
+  Type type_name Is type_expr {
+    {region = cover $1 (type_expr_to_region $4);
+     value  = $1,$2,$3,$4}
+  }
+
+type_expr:
+  cartesian   { Prod   $1 }
+| sum_type    { Sum    $1 }
+| record_type { Record $1 }
+
+cartesian:
+  nsepseq(core_type,TIMES) {
+    let region = nsepseq_to_region type_expr_to_region $1
+    in {region; value=$1}
+  }
+
+core_type:
+  type_name {
+    TAlias $1
+  }
+| type_name type_tuple {
+    let region = cover $1.region $2.region
+    in TypeApp {region; value = $1,$2}
+  }
+| par(type_expr) {
+   ParType $1
+  }
+
+type_tuple:
+  par(nsepseq(type_name,COMMA)) { $1 }
+
+sum_type:
+  nsepseq(variant,VBAR) {
+    let region = nsepseq_to_region (fun x -> x.region) $1
+    in {region; value=$1}
+  }
+
+variant:
+  Constr Of cartesian {
+    let region = cover $1.region $3.region
+    in {region; value = $1,$2,$3}
+  }
+
+record_type:
+  Record
+    nsepseq(field_decl,SEMI)
+  End {
+    let region = cover $1 $3
+    in {region; value = $1,$2,$3}
+  }
+
+field_decl:
+  field_name COLON type_expr {
+    let stop = type_expr_to_region $3 in
+    let region = cover $1.region stop
+    in {region; value = $1,$2,$3}
+  }
+
+(* Function and procedure declarations *)
+
+lambda_decl:
+  fun_decl  { FunDecl  $1 }
+| proc_decl { ProcDecl $1 }
+
+fun_decl:
+  Function fun_name parameters COLON type_expr Is
+    block
+  With expr {
+    let region = cover $1 (expr_to_region $9) in
+    let value =
+      object
+        method kwd_function = $1
+        method var          = $2
+        method param        = $3
+        method colon        = $4
+        method ret_type     = $5
+        method kwd_is       = $6
+        method body         = $7
+        method kwd_with     = $8
+        method return       = $9
+      end
+    in {region; value}
+  }
+
+proc_decl:
+  Procedure fun_name parameters Is
+    block {
+      let region = cover $1 $5.region in
+      let value =
+        object
+          method kwd_procedure = $1
+          method var           = $2
+          method param         = $3
+          method kwd_is        = $4
+          method body          = $5
+        end
+      in {region; value}
+  }
+
+parameters:
+  par(nsepseq(param_decl,SEMI)) { $1 }
+
+param_decl:
+  var_kind var COLON type_expr {
+    let start  = var_kind_to_region $1 in
+    let stop   = type_expr_to_region $4 in
+    let region = cover start stop
+    in {region; value = $1,$2,$3,$4}
+  }
+
+var_kind:
+  Var   { Mutable $1 }
+| Const { Const   $1 }
+
+block:
+  value_decls
+  Begin
+    instructions
+  End {
+    let region = cover $1.region $4 in
+    let value =
+      object
+        method decls   = $1
+        method opening = $2
+        method instr   = $3
+        method close   = $4
+      end
+    in {region; value}
+  }
+
+value_decls:
+  sepseq(var_decl,SEMI) {
+    let region = sepseq_to_region (fun x -> x.region) $1
+    in {region; value=$1}
+  }
+
+var_decl:
+  Var var COLON type_expr ASGNMNT expr {
+    let region = cover $1 (expr_to_region $6) in
+    let value =
+      object
+        method kind   = Mutable $1
+        method var    = $2
+        method colon  = $3
+        method vtype  = $4
+        method setter = $5
+        method init   = $6
+      end
+    in {region; value}
+  }
+| Const var COLON type_expr EQUAL expr {
+    let region = cover $1 (expr_to_region $6) in
+    let value =
+      object
+        method kind   = Const $1
+        method var    = $2
+        method colon  = $3
+        method vtype  = $4
+        method setter = $5
+        method init   = $6
+      end
+    in {region; value}
+  }
+
+instructions:
+  nsepseq(instruction,SEMI) {
+    let region = nsepseq_to_region instr_to_region $1
+    in {region; value=$1}
+  }
+
+instruction:
+  single_instr { Single $1 }
+| block        { Block  $1 }
+
+single_instr:
+  conditional {     Cond $1 }
+| match_instr {    Match $1 }
+| asgnmnt     {  Asgnmnt $1 }
+| loop        {     Loop $1 }
+| proc_call   { ProcCall $1 }
+| Null        {     Null $1 }
+
+proc_call:
+  fun_call { $1 }
+
+conditional:
+  If expr Then instruction Else instruction {
+    let region = cover $1 (instr_to_region $6) in
+    let value =
+      object
+        method kwd_if   = $1
+        method test     = $2
+        method kwd_then = $3
+        method ifso     = $4
+        method kwd_else = $5
+        method ifnot    = $6
+      end
+    in {region; value}
+  }
+
+match_instr:
+  Match expr With cases End {
+    let region = cover $1 $5 in
+    let value =
+      object
+        method kwd_match = $1
+        method expr      = $2
+        method kwd_with  = $3
+        method cases     = $4
+        method kwd_end   = $5
+      end
+    in {region; value}
+  }
+
+cases:
+  nsepseq(case,VBAR) {
+    let region = nsepseq_to_region (fun x -> x.region) $1
+    in {region; value=$1}
+  }
+
+case:
+  pattern ARROW instruction {
+    let region = cover $1.region (instr_to_region $3)
+    in {region; value = $1,$2,$3}
+  }
+
+asgnmnt:
+  var ASGNMNT expr {
+    let region = cover $1.region (expr_to_region $3)
+    in {region; value = $1,$2,$3}
+  }
+
+loop:
+  while_loop { $1 }
+| for_loop   { $1 }
+
+while_loop:
+  While expr block {
+    let region = cover $1 $3.region
+    in While {region; value=$1,$2,$3}
+  }
+
+for_loop:
+  For asgnmnt Down? To expr option(step_clause) block {
+    let region = cover $1 $7.region in
+    let value =
+      object
+        method kwd_for  = $1
+        method asgnmnt  = $2
+        method down     = $3
+        method kwd_to   = $4
+        method bound    = $5
+        method step     = $6
+        method block    = $7
+      end
+    in For (ForInt {region; value})
+  }
+
+| For var option(arrow_clause) In expr block {
+    let region = cover $1 $6.region in
+    let value =
+      object
+        method kwd_for = $1
+        method var     = $2
+        method bind_to = $3
+        method kwd_in  = $4
+        method expr    = $5
+        method block   = $6
+      end
+    in For (ForCollect {region; value})
+  }
+
+step_clause:
+  Step expr { $1,$2 }
+
+arrow_clause:
+  ARROW var { $1,$2 }
+
+(* Expressions *)
+
+expr:
+  expr OR conj_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Or {region; value = $1,$2,$3}
+  }
+| conj_expr { $1 }
+
+conj_expr:
+  conj_expr AND comp_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    And {region; value = $1,$2,$3}
+  }
+| comp_expr { $1 }
+
+comp_expr:
+  comp_expr LT cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Lt {region; value = $1,$2,$3}
+  }
+| comp_expr LEQ cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Leq {region; value = $1,$2,$3}
+  }
+| comp_expr GT cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Gt {region; value = $1,$2,$3}
+  }
+| comp_expr GEQ cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Geq {region; value = $1,$2,$3}
+  }
+| comp_expr EQUAL cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Equal {region; value = $1,$2,$3}
+  }
+| comp_expr NEQ cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Neq {region; value = $1,$2,$3}
+  }
+| cat_expr { $1 }
+
+cat_expr:
+  cons_expr CAT cat_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Cat {region; value = $1,$2,$3}
+  }
+| cons_expr { $1 }
+
+cons_expr:
+  add_expr CONS cons_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Cons {region; value = $1,$2,$3}
+  }
+| add_expr { $1 }
+
+add_expr:
+  add_expr PLUS mult_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Add {region; value = $1,$2,$3}
+  }
+| add_expr MINUS mult_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Sub {region; value = $1,$2,$3}
+  }
+| mult_expr { $1 }
+
+mult_expr:
+  mult_expr TIMES unary_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Mult {region; value = $1,$2,$3}
+  }
+| mult_expr SLASH unary_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Div {region; value = $1,$2,$3}
+  }
+| mult_expr Mod unary_expr {
+    let start  = expr_to_region $1
+    and stop   = expr_to_region $3 in
+    let region = cover start stop in
+    Mod {region; value = $1,$2,$3}
+  }
+| unary_expr { $1 }
+
+unary_expr:
+  MINUS core_expr {
+    let stop   = expr_to_region $2 in
+    let region = cover $1 stop in
+    Neg {region; value = $1,$2}
+  }
+| Not core_expr {
+    let stop   = expr_to_region $2 in
+    let region = cover $1 stop in
+    Not {region; value = $1,$2}
+  }
+| core_expr { $1 }
+
+core_expr:
+  Int        {       Int $1 }
+| var        {       Var $1 }
+| String     {    String $1 }
+| Bytes      {     Bytes $1 }
+| C_False    {     False $1 }
+| C_True     {      True $1 }
+| C_Unit     {      Unit $1 }
+| tuple      {     Tuple $1 }
+| list_expr  {      List $1 }
+| empty_list { EmptyList $1 }
+| set_expr   {       Set $1 }
+| empty_set  {  EmptySet $1 }
+| none_expr  {  NoneExpr $1 }
+| fun_call   {   FunCall $1 }
+| Constr arguments {
+    let region = cover $1.region $2.region in
+    ConstrApp {region; value = $1,$2}
+  }
+| C_Some arguments {
+    let region = cover $1 $2.region in
+    SomeApp {region; value = $1,$2}
+  }
+| map_name DOT brackets(expr) {
+    let region = cover $1.region $3.region in
+    let value =
+      object
+        method map_name = $1
+        method selector = $2
+        method index    = $3
+      end
+    in MapLookUp {region; value}
+  }
+
+fun_call:
+  fun_name arguments {
+    let region = cover $1.region $2.region
+    in {region; value = $1,$2}
+  }
+
+tuple:
+  par(nsepseq(expr,COMMA)) { $1 }
+
+arguments:
+  tuple { $1 }
+
+list_expr:
+  brackets(nsepseq(expr,COMMA)) { $1 }
+
+empty_list:
+  par(LBRACKET RBRACKET COLON type_expr { $1,$2,$3,$4 }) { $1 }
+
+set_expr:
+  braces(nsepseq(expr,COMMA)) { $1 }
+
+empty_set:
+  par(LBRACE RBRACE COLON type_expr { $1,$2,$3,$4 }) { $1 }
+
+none_expr:
+  par(C_None COLON type_expr { $1,$2,$3 }) { $1 }
+
+(* Patterns *)
+
+pattern:
+  nsepseq(core_pattern,CONS) {
+    let region = nsepseq_to_region core_pattern_to_region $1
+    in {region; value=$1}
+  }
+
+core_pattern:
+  var        {    PVar $1 }
+| WILD       {   PWild $1 }
+| Int        {    PInt $1 }
+| String     { PString $1 }
+| C_Unit     {   PUnit $1 }
+| C_False    {  PFalse $1 }
+| C_True     {   PTrue $1 }
+| C_None     {   PNone $1 }
+| list_patt  {   PList $1 }
+| tuple_patt {  PTuple $1 }
+| C_Some par(core_pattern) {
+    let region = cover $1 $2.region
+    in PSome {region; value = $1,$2}
+  }
+
+list_patt:
+  brackets(sepseq(core_pattern,COMMA)) { Sugar $1 }
+| par(cons_pattern)                    {   Raw $1 }
+
+cons_pattern:
+  core_pattern CONS pattern { $1,$2,$3 }
+
+tuple_patt:
+  par(nsepseq(core_pattern,COMMA)) { $1 }
--- a/ParserMain.ml
+++ b/ParserMain.ml
@ -0,0 +1,71 @@
+(* Driver for the parser of Ligo *)
+
+open! EvalOpt (* Reads the command-line options: Effectful! *)
+
+let sprintf = Printf.sprintf
+
+(* Error printing and exception tracing *)
+
+let () = Printexc.record_backtrace true
+
+let external_ text =
+  Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
+
+type Error.t += ParseError
+
+let error_to_string = function
+  ParseError -> "Syntax error.\n"
+| _ -> assert false
+
+let print_error ?(offsets=true) mode Region.{region; value} =
+  let  msg = error_to_string value in
+  let file = match EvalOpt.input with
+               None | Some "-" -> false
+             |         Some _  -> true in
+  let  reg = region#to_string ~file ~offsets mode in
+  Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
+
+(* Path to the Ligo standard library *)
+(*
+let lib_path =
+  match EvalOpt.libs with
+      [] -> ""
+  | libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
+            in List.fold_right mk_I libs ""
+*)
+
+(* Instanciating the lexer *)
+
+module Lexer = Lexer.Make (LexToken)
+
+let Lexer.{read; buffer; get_pos; get_last; close} =
+  Lexer.open_token_stream EvalOpt.input
+
+and cout = stdout
+
+let log = Lexer.output_token ~offsets:EvalOpt.offsets
+                             EvalOpt.mode EvalOpt.cmd cout
+
+and close_all () = close (); close_out cout
+
+(* Tokeniser *)
+
+let tokeniser = read ~log
+
+(* Main *)
+
+let () =
+  try
+    let ast = Parser.program tokeniser buffer in
+    if Utils.String.Set.mem "parser" EvalOpt.verbose
+    then AST.print_tokens ast
+  with
+    Lexer.Error err ->
+      close_all ();
+      Lexer.print_error ~offsets EvalOpt.mode err
+  | Parser.Error ->
+      let region = get_last () in
+      let error = Region.{region; value=ParseError} in
+      let () = close_all () in
+      print_error ~offsets EvalOpt.mode error
+  | Sys_error msg -> Utils.highlight msg
--- a/Pos.ml
+++ b/Pos.ml
@ -0,0 +1,124 @@
+type t = <
+  byte       : Lexing.position;
+  point_num  : int;
+  point_bol  : int;
+  file       : string;
+  line       : int;
+
+  set_file   : string -> t;
+  set_line   : int -> t;
+  new_line   : string -> t;
+  add_nl     : t;
+
+  shift_bytes     : int -> t;
+  shift_one_uchar : int -> t;
+
+  offset : [`Byte | `Point] -> int;
+  column : [`Byte | `Point] -> int;
+
+  line_offset : [`Byte | `Point] -> int;
+  byte_offset : int;
+
+  is_ghost : bool;
+
+  to_string : ?offsets:bool -> [`Byte | `Point] -> string;
+  compact   : ?offsets:bool -> [`Byte | `Point] -> string;
+  anonymous : ?offsets:bool -> [`Byte | `Point] -> string
+>
+
+type pos = t
+
+(* Constructors *)
+
+let sprintf = Printf.sprintf
+
+let make ~byte ~point_num ~point_bol =
+  let () = assert (point_num >= point_bol) in
+  object (self)
+    val    byte      = byte
+    method byte      = byte
+
+    val    point_num = point_num
+    method point_num = point_num
+
+    val    point_bol = point_bol
+    method point_bol = point_bol
+
+    method set_file file = {< byte = Lexing.{byte with pos_fname = file} >}
+    method set_line line = {< byte = Lexing.{byte with pos_lnum  = line} >}
+
+    (* The string must not contain '\n'. See [new_line]. *)
+
+    method shift_bytes len =
+      {< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
+         point_num = point_num + len >}
+
+    method shift_one_uchar len =
+      {< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
+         point_num = point_num + 1 >}
+
+    method add_nl =
+      {< byte = Lexing.{byte with
+                          pos_lnum = byte.pos_lnum + 1;
+                          pos_bol  = byte.pos_cnum};
+         point_bol = point_num >}
+
+    method new_line string =
+      let len = String.length string
+      in (self#shift_bytes len)#add_nl
+
+    method is_ghost = byte = Lexing.dummy_pos
+
+    method file = byte.Lexing.pos_fname
+
+    method line = byte.Lexing.pos_lnum
+
+    method offset = function
+       `Byte -> Lexing.(byte.pos_cnum - byte.pos_bol)
+    | `Point -> point_num - point_bol
+
+    method column mode = 1 + self#offset mode
+
+    method line_offset = function
+       `Byte -> byte.Lexing.pos_bol
+    | `Point -> point_bol
+
+    method byte_offset = byte.Lexing.pos_cnum
+
+    method to_string ?(offsets=true) mode =
+      let offset = self#offset mode in
+      let horizontal, value =
+        if offsets then "character", offset else "column", offset + 1
+      in sprintf "File \"%s\", line %i, %s %i"
+           self#file self#line horizontal value
+
+    method compact ?(offsets=true) mode =
+      if self#is_ghost then "ghost"
+      else
+        let offset = self#offset mode in
+        sprintf "%s:%i:%i"
+          self#file self#line (if offsets then offset else offset + 1)
+
+    method anonymous ?(offsets=true) mode =
+      if   self#is_ghost then "ghost"
+      else sprintf "%i:%i" self#line
+             (if offsets then self#offset mode else self#column mode)
+end
+
+let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
+
+let min =
+  let byte = Lexing.{
+               pos_fname = "";
+               pos_lnum  = 1;
+               pos_bol   = 0;
+               pos_cnum  = 0}
+  in make ~byte ~point_num:0 ~point_bol:0
+
+(* Comparisons *)
+
+let equal pos1 pos2 =
+  pos1#file = pos2#file && pos1#byte_offset = pos2#byte_offset
+
+let lt pos1 pos2 =
+  pos1#file = pos2#file && pos1#byte_offset < pos2#byte_offset
--- a/Pos.mli
+++ b/Pos.mli
@ -0,0 +1,105 @@
+(* Positions in a file
+
+   A position in a file denotes a single unit belonging to it, for
+   example, in an ASCII text file, it is a particular character within
+   that file (the unit is the byte in this instance, since in ASCII
+   one character is encoded with one byte).
+
+     Units can be either bytes (as ASCII characters) or, more
+   generally, unicode points.
+
+     The type for positions is the object type [t].
+
+     We use here lexing positions to denote byte-oriented positions
+   (field [byte]), and we manage code points by means of the fields
+   [point_num] and [point_bol]. These two fields have a meaning
+   similar to the fields [pos_cnum] and [pos_bol], respectively, from
+   the standard module [Lexing]. That is to say, [point_num] holds the
+   number of code points since the beginning of the file, and
+   [point_bol] the number of code points since the beginning of the
+   current line.
+
+     The name of the file is given by the field [file], and the line
+   number by the field [line].
+*)
+
+type t = <
+  (* Payload *)
+
+  byte       : Lexing.position;
+  point_num  : int;
+  point_bol  : int;
+  file       : string;
+  line       : int;
+
+  (* Setters *)
+
+  set_file : string -> t;
+  set_line : int -> t;
+
+  (* The call [pos#new_line s], where the string [s] is either "\n" or
+     "\c\r", updates the position [pos] with a new line. *)
+
+  new_line : string -> t;
+  add_nl   : t;
+
+  (* The call [pos#shift_bytes n] evaluates in a position that is the
+     translation of position [pos] of [n] bytes forward in the
+     file. The call [pos#shift_one_uchar n] is similar, except that it
+     assumes that [n] is the number of bytes making up one unicode
+     point. *)
+
+  shift_bytes     : int -> t;
+  shift_one_uchar : int -> t;
+
+  (* Getters *)
+
+  (* The call [pos#offset `Byte] provides the horizontal offset of the
+     position [pos] in bytes. (An offset is the number of units, like
+     bytes, since the beginning of the current line.) The call
+     [pos#offset `Point] is the offset counted in number of unicode
+     points.
+
+     The calls to the method [column] are similar to those to
+     [offset], except that they give the curren column number.
+
+     The call [pos#line_offset `Byte] is the offset of the line of
+     position [pos], counted in bytes. Dually, [pos#line_offset
+     `Point] counts the same offset in code points.
+
+     The call [pos#byte_offset] is the offset of the position [pos]
+     since the begininng of the file, counted in bytes.
+  *)
+
+  offset : [`Byte | `Point] -> int;
+  column : [`Byte | `Point] -> int;
+
+  line_offset : [`Byte | `Point] -> int;
+  byte_offset : int;
+
+  (* Predicates *)
+
+  is_ghost : bool;
+
+  (* Conversions to [string] *)
+
+  to_string : ?offsets:bool -> [`Byte | `Point] -> string;
+  compact   : ?offsets:bool -> [`Byte | `Point] -> string;
+  anonymous : ?offsets:bool -> [`Byte | `Point] -> string
+>
+
+type pos = t
+
+(* Constructors *)
+
+val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t
+
+(* Special positions *)
+
+val ghost : t  (* Same as [Lexing.dummy_pos] *)
+val min   : t  (* Lexing convention: line 1, offsets to 0 and file to "". *)
+
+(* Comparisons *)
+
+val equal : t -> t -> bool
+val lt    : t -> t -> bool
--- a/Region.ml
+++ b/Region.ml
@ -0,0 +1,122 @@
+(* Regions of a file *)
+
+let sprintf = Printf.sprintf
+
+type t = <
+  start : Pos.t;
+  stop  : Pos.t;
+
+  (* Setters *)
+
+  shift_bytes     : int -> t;
+  shift_one_uchar : int -> t;
+
+  (* Getters *)
+
+  file      : string;
+  pos       : Pos.t * Pos.t;
+  byte_pos  : Lexing.position * Lexing.position;
+
+  (* Predicates *)
+
+  is_ghost : bool;
+
+  (* Conversions to [string] *)
+
+  to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
+  compact   : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
+>
+
+type region = t
+
+type 'a reg = {region: t; value: 'a}
+
+(* Injections *)
+
+exception Invalid
+
+let make ~(start: Pos.t) ~(stop: Pos.t) =
+  if start#file <> stop#file || start#byte_offset > stop#byte_offset
+  then raise Invalid
+  else
+    object
+      val    start = start
+      method start = start
+      val    stop  = stop
+      method stop  = stop
+
+      method shift_bytes len =
+        let start = start#shift_bytes len
+        and stop  = stop#shift_bytes len
+        in {< start = start; stop = stop >}
+
+      method shift_one_uchar len =
+        let start = start#shift_one_uchar len
+        and stop  = stop#shift_one_uchar len
+        in {< start = start; stop = stop >}
+
+      (* Getters *)
+
+      method file      = start#file
+      method pos       = start, stop
+      method byte_pos  = start#byte, stop#byte
+
+      (* Predicates *)
+
+      method is_ghost = start#is_ghost && stop#is_ghost
+
+      (* Conversions to strings *)
+
+      method to_string ?(file=true) ?(offsets=true) mode =
+        let horizontal = if offsets then "character"  else "column"
+        and start_offset =
+          if offsets then start#offset mode else start#column mode
+        and stop_offset =
+          if offsets then stop#offset mode else stop#column mode in
+        let info =
+          if   file
+          then sprintf "in file \"%s\", line %i, %s"
+                 (String.escaped start#file) start#line horizontal
+          else sprintf "at line %i, %s" start#line horizontal
+        in if   stop#line = start#line
+           then sprintf "%ss %i-%i" info start_offset stop_offset
+           else sprintf "%s %i to line %i, %s %i"
+                  info start_offset stop#line horizontal stop_offset
+
+      method compact ?(file=true) ?(offsets=true) mode =
+        let start_str = start#anonymous ~offsets mode
+        and stop_str  = stop#anonymous ~offsets mode in
+        if start#file = stop#file then
+          if file then sprintf "%s:%s-%s" start#file start_str stop_str
+          else sprintf "%s-%s" start_str stop_str
+        else sprintf "%s:%s-%s:%s" start#file start_str stop#file stop_str
+    end
+
+(* Special regions *)
+
+let ghost = make ~start:Pos.ghost ~stop:Pos.ghost
+
+let min = make ~start:Pos.min ~stop:Pos.min
+
+(* Comparisons *)
+
+let equal r1 r2 =
+   r1#file = r2#file
+&& Pos.equal r1#start r2#start
+&& Pos.equal r1#stop  r2#stop
+
+let lt r1 r2 =
+  r1#file = r2#file
+&& not r1#is_ghost
+&& not r2#is_ghost
+&& Pos.lt r1#start r2#start
+&& Pos.lt r1#stop  r2#stop
+
+let cover r1 r2 =
+  if r1#is_ghost
+  then r2
+  else if r2#is_ghost
+       then r1
+       else if   lt r1 r2
+            then make ~start:r1#start ~stop:r2#stop
+            else make ~start:r2#start ~stop:r1#stop
--- a/Region.mli
+++ b/Region.mli
@ -0,0 +1,123 @@
+(* Regions of a file
+
+   A _region_ is a contiguous series of bytes, for example, in a text
+   file. It is here denoted by the object type [t].
+
+     The start (included) of the region is given by the field [start],
+   which is a _position_, and the end (excluded) is the position given
+   by the field [stop]. The convention of including the start and
+   excluding the end enables to have empty regions if, and only if,
+   [start = stop]. See module [Pos] for the definition of positions.
+
+     The first byte of a file starts at the offset zero (that is,
+   column one), and [start] is always lower than or equal to [stop],
+   and they must refer to the same file.
+*)
+
+type t = <
+  start : Pos.t;
+  stop  : Pos.t;
+
+  (* Setters *)
+
+  (* The call [region#shift_bytes n] evaluates in a region that is the
+     translation of region [region] of [n] bytes forward in the
+     file. The call [region#shift_one_uchar n] is similar, except that
+     it assumes that [n] is the number of bytes making up one unicode
+     point. *)
+
+  shift_bytes     : int -> t;
+  shift_one_uchar : int -> t;
+
+  (* Getters *)
+
+  (* The method [file] returns the file name.
+     The method [pos] returns the values of the fields [start] and [stop].
+     The method [byte_pos] returns the start and end positions of the
+     region at hand _interpreting them as lexing positions_, that is,
+     the unit is the byte. *)
+
+  file     : string;
+  pos      : Pos.t * Pos.t;
+  byte_pos : Lexing.position * Lexing.position;
+
+  (* Predicates *)
+
+  is_ghost : bool;
+
+  (* Conversions to [string] *)
+
+  (* The call [region#to_string ~file ~offsets mode] evaluates in a
+     string denoting the region [region].
+
+     The name of the file is present if, and only if, [file = true] or
+     [file] is missing.
+
+     The positions in the file are expressed horizontal offsets if
+     [offsets = true] or [offsets] is missing (the default), otherwise
+     as columns.
+
+     If [mode = `Byte], those positions will be assumed to have bytes
+     as their unit, otherwise, if [mode = `Point], they will be
+     assumed to refer to code points.
+
+     The method [compact] has the same signature and calling
+     convention as [to_string], except that the resulting string is
+     more compact.
+   *)
+
+  to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
+  compact   : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
+>
+
+type region = t
+
+type 'a reg = {region: t; value: 'a}
+
+(* Constructors *)
+
+(* The function [make] creates a region from two positions. If the
+   positions are not properly ordered or refer to different files, the
+   exception [Invalid] is raised. *)
+
+exception Invalid
+
+val make : start:Pos.t -> stop:Pos.t -> t
+
+(* Special regions *)
+
+(* To deal with ghost expressions, that is, pieces of abstract syntax
+   that have not been built from excerpts of concrete syntax, we need
+   _ghost regions_. The module [Pos] provides a [ghost] position, and
+   we also provide a [ghost] region and, in type [t], the method
+   [is_ghost] to check it. *)
+
+val ghost : t (* Two [Pos.ghost] positions *)
+
+(* Occasionnally, we may need a minimum region. It is here made of two
+   minimal positions. *)
+
+val min : t (* Two [Pos.min] positions *)
+
+(* Comparisons *)
+
+(* Two regions are equal if, and only if, they refer to the same file
+   and their start positions are equal and their stop positions are
+   equal. See [Pos.equal]. Note that [r1] and [r2] can be ghosts. *)
+
+val equal : t -> t -> bool
+
+(* The call [lt r1 r2] ("lower than") has the value [true] if, and
+   only if, regions [r1] and [r2] refer to the same file, none is a
+   ghost and the start position of [r1] is lower than that of
+   [r2]. (See [Pos.lt].) *)
+
+val lt : t -> t -> bool
+
+(* Given two regions [r1] and [r2], we may want the region [cover r1
+   r2] that covers [r1] and [r2]. We property [equal (cover r1 r2)
+   (cover r2 r1)]. (In a sense, it is the maximum region, but we avoid
+   that name because of the [min] function above.) If [r1] is a ghost,
+   the cover is [r2], and if [r2] is a ghost, the cover is [r1]. *)
+
+val cover : t -> t -> t
--- a/Tests/a.li
+++ b/Tests/a.li
@ -0,0 +1,27 @@
+type t is int * string
+type u is t
+type v is record foo: key; bar: mutez; baz: address end
+type w is K of v * u
+
+parameter p : v
+storage w
+operations u
+
+function f (const x : int) : int is
+  var y : int := 5 - x;
+  const z : int = 6
+  begin
+    y := x + y
+  end with y * 2
+
+procedure g (const l : list (int)) is
+  begin
+     match l with
+         [] -> null
+     | h<:t -> q (h+2)
+     end
+  end
+
+begin
+  g (Unit)
+end
--- a/Utils.ml
+++ b/Utils.ml
@ -0,0 +1,157 @@
+(* Utility types and functions *)
+
+(* Identity *)
+
+let id x = x
+
+(* Combinators *)
+
+let (<@) f g x = f (g x)
+
+let swap f x y = f y x
+
+let lambda = fun x _ -> x
+
+let curry f x y = f (x,y)
+let uncurry f (x,y) = f x y
+
+(* Parametric rules for sequences *)
+
+type        'a    nseq = 'a * 'a list
+type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
+type ('a,'sep)  sepseq = ('a,'sep) nsepseq option
+
+(* Consing *)
+
+let nseq_cons x (hd,tl) = x, hd::tl
+let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
+
+let sepseq_cons x sep = function
+          None -> x, []
+| Some (hd,tl) -> x, (sep,hd)::tl
+
+(* Rightwards iterators *)
+
+let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
+
+let nsepseq_foldl f a (hd,tl) =
+  List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
+
+let sepseq_foldl f a = function
+    None -> a
+| Some s -> nsepseq_foldl f a s
+
+let nseq_iter f (hd,tl) = List.iter f (hd::tl)
+
+let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
+
+let sepseq_iter f = function
+    None -> ()
+| Some s -> nsepseq_iter f s
+
+(* Reversing *)
+
+let nseq_rev (hd,tl) =
+  let rec aux acc = function
+      [] -> acc
+  | x::l -> aux (nseq_cons x acc) l
+in aux (hd,[]) tl
+
+let nsepseq_rev =
+  let rec aux acc = function
+    hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
+  | hd,            [] -> hd, acc in
+function
+  hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
+|                 s -> s
+
+let sepseq_rev = function
+      None -> None
+| Some seq -> Some (nsepseq_rev seq)
+
+(* Leftwards iterators *)
+
+let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
+
+let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
+
+let sepseq_foldr f = function
+    None -> fun a -> a
+| Some s -> nsepseq_foldr f s
+
+(* Conversions to lists *)
+
+let nseq_to_list (x,y) = x::y
+
+let nsepseq_to_list (x,y) = x :: List.map snd y
+
+let sepseq_to_list = function
+    None -> []
+| Some s -> nsepseq_to_list s
+
+(* Optional values *)
+
+module Option =
+  struct
+    let apply f x =
+      match x with
+        Some y -> Some (f y)
+      |   None -> None
+
+    let rev_apply x y =
+      match x with
+        Some f -> f y
+      |   None -> y
+
+    let to_string = function
+      Some x -> x
+    |   None -> ""
+  end
+
+(* Modules based on [String], like sets and maps. *)
+
+module String =
+  struct
+    include String
+
+    module Ord =
+      struct
+        type nonrec t = t
+        let compare = compare
+      end
+
+    module Map = Map.Make (Ord)
+    module Set = Set.Make (Ord)
+  end
+
+(* Integers *)
+
+module Int =
+  struct
+    type t = int
+
+    module Ord =
+      struct
+        type nonrec t = t
+        let compare = compare
+      end
+
+    module Map = Map.Make (Ord)
+    module Set = Set.Make (Ord)
+  end
+
+(* Effectful symbol generator *)
+
+let gen_sym =
+  let counter = ref 0 in
+  fun () -> incr counter; "v" ^ string_of_int !counter
+
+(* General tracing function *)
+
+let trace text = function
+       None -> ()
+| Some chan -> output_string chan text; flush chan
+
+(* Printing a string in red to standard error *)
+
+let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
--- a/Utils.mli
+++ b/Utils.mli
@ -0,0 +1,97 @@
+(* Utility types and functions *)
+
+(* Polymorphic identity function *)
+
+val id : 'a -> 'a
+
+(* Combinators *)
+
+val ( <@ )  : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
+val swap    : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
+val lambda  : 'a -> 'b -> 'a
+val curry   : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
+val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
+
+(* Parametric rules for sequences
+
+   nseq:    non-empty sequence;
+   sepseq:  (possibly empty) sequence of separated items;
+   nsepseq: non-empty sequence of separated items.
+*)
+
+type           'a nseq = 'a * 'a list
+type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
+type ('a,'sep)  sepseq = ('a,'sep) nsepseq option
+
+(* Consing *)
+
+val nseq_cons    : 'a -> 'a nseq -> 'a nseq
+val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
+val sepseq_cons  : 'a -> 'sep -> ('a,'sep)  sepseq -> ('a,'sep) nsepseq
+
+(* Reversing *)
+
+val nseq_rev:    'a nseq -> 'a nseq
+val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
+val sepseq_rev:  ('a,'sep)  sepseq -> ('a,'sep)  sepseq
+
+(* Rightwards iterators *)
+
+val nseq_foldl    : ('a -> 'b -> 'a) -> 'a ->        'b nseq -> 'a
+val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
+val sepseq_foldl  : ('a -> 'b -> 'a) -> 'a -> ('b,'c)  sepseq -> 'a
+
+val nseq_iter    : ('a -> unit) ->        'a nseq -> unit
+val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
+val sepseq_iter  : ('a -> unit) -> ('a,'b)  sepseq -> unit
+
+(* Leftwards iterators *)
+
+val nseq_foldr    : ('a -> 'b -> 'b) ->        'a nseq -> 'b -> 'b
+val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
+val sepseq_foldr  : ('a -> 'b -> 'b) -> ('a,'c)  sepseq -> 'b -> 'b
+
+(* Conversions to lists *)
+
+val nseq_to_list    :        'a nseq -> 'a list
+val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
+val sepseq_to_list  : ('a,'b)  sepseq -> 'a list
+
+(* Effectful symbol generator *)
+
+val gen_sym : unit -> string
+
+(* General tracing function *)
+
+val trace : string -> out_channel option -> unit
+
+(* Printing a string in red to standard error *)
+
+val highlight : string -> unit
+
+(* Working with optional values *)
+
+module Option :
+  sig
+    val apply     : ('a -> 'b) -> 'a option -> 'b option
+    val rev_apply : ('a -> 'a) option -> 'a -> 'a
+    val to_string : string option -> string
+  end
+
+(* An extension to the standard module [String] *)
+
+module String :
+  sig
+    include module type of String
+    module Map : Map.S with type key = t
+    module Set : Set.S with type elt = t
+  end
+
+(* Integer maps *)
+
+module Int :
+  sig
+    type t = int
+    module Map : Map.S with type key = t
+    module Set : Set.S with type elt = t
+  end
--- a/Version.ml
+++ b/Version.ml
@ -0,0 +1 @@
+let version = "7445e9c0"
--- a/38
+++ b/38
@ -0,0 +1,38 @@
+(ocamllex LexToken)
+(ocamllex Lexer)
+
+(menhir
+ (merge_into Parser)
+ (modules ParToken Parser)
+ (flags -la 1 --explain --external-tokens LexToken)
+)
+
+(executables
+ (names LexerMain ParserMain)
+ (public_names ligo-lexer ligo-parser)
+ (package ligo-parser)
+ (modules_without_implementation
+   Error
+ )
+ (libraries
+   hex
+   zarith
+   getopt
+   uutf
+   str
+ )
+)
+
+(rule
+  (targets Parser.exe)
+  (deps ParserMain.exe)
+  (action (copy ParserMain.exe Parser.exe))
+  (mode promote-until-clean)
+)
+
+(rule
+  (targets Lexer.exe)
+  (deps LexerMain.exe)
+  (action (copy LexerMain.exe Lexer.exe))
+  (mode promote-until-clean)
+)
--- a/2
+++ b/2
@ -0,0 +1,2 @@
+(lang dune 1.7)
+(using menhir 2.0)
--- a/ligo-parser.opam
+++ b/ligo-parser.opam
@ -0,0 +1,19 @@
+opam-version: "2.0"
+version: "1.0"
+maintainer: "gabriel.alfour@gmail.com"
+authors: [ "Galfour" ]
+homepage: "https://gitlab.com/gabriel.alfour/ligo-parser"
+bug-reports: "https://gitlab.com/gabriel.alfour/ligo-parser/issues"
+dev-repo: "git+https://gitlab.com/gabriel.alfour/ligo-parser.git"
+license: "MIT"
+depends: [
+  "dune"
+  "menhir"
+  "hex" "zarith" "getopt" "uutf"
+]
+build: [
+  [ "dune" "build" "-p" name "-j" jobs ]
+]
+url {
+  src: "https://gitlab.com/gabriel.alfour/ligo-parser/-/archive/master/ligo-parser.tar.gz"
+}
				`@ -0,0 +1 @@`
				`--explain --external-tokens LexToken --base Parser ParToken.mly`