Finished refactoring of lexer.

2020-04-28 19:26:31 +02:00 · 2020-04-28 19:26:31 +02:00 · 005b7fd69b
commit 005b7fd69b
parent 9618a48848
27 changed files with 1174 additions and 1079 deletions
--- a/src/bin/expect_tests/lexer_tests.ml
+++ b/src/bin/expect_tests/lexer_tests.ml
@ -54,7 +54,7 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23:
+ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
      Negative byte sequence.
      Hint: Remove the leading minus sign.
       {}
@ -71,7 +71,7 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13:
+ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
      Negative byte sequence.
      Hint: Remove the leading minus sign.
       {}
@ -88,7 +88,7 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13:
+ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
      Negative byte sequence.
      Hint: Remove the leading minus sign.
       {}
@ -103,6 +103,7 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
 * Check the changelog by running 'ligo changelog'
 |} ];
 (*
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
  [%expect {|
 ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
@ -119,6 +120,7 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
 * Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
 * Check the changelog by running 'ligo changelog'
 |} ];
 *)
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
  [%expect {|
@ -204,10 +206,9 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2:
+ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
      Unterminated comment.
-      Hint: Close with "*)".
+      Hint: Close with "*)". {}
       {}
 If you're not sure how to fix this error, you can
--- a/src/passes/1-parser/cameligo/.links
+++ b/src/passes/1-parser/cameligo/.links
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
 ../shared/Lexer.mli
 ../shared/Lexer.mll
 ../shared/LexerLib.ml
 ../shared/EvalOpt.ml
 ../shared/EvalOpt.mli
 ../shared/FQueue.ml
--- a/src/passes/1-parser/cameligo/LexToken.mll
+++ b/src/passes/1-parser/cameligo/LexToken.mll
@ -102,138 +102,167 @@ type t =
 | EOF of Region.t (* End of file *)
 (* Projections *)
 type token = t
 let proj_token = function
-  ARROW region -> region, "ARROW"
+  (* Literals *)
-| CONS region -> region, "CONS"
+
-| CAT region -> region, "CAT"
+  String Region.{region; value} ->
-| MINUS region -> region, "MINUS"
+    region, sprintf "Str %s" value
-| PLUS region -> region, "PLUS"
+| Bytes Region.{region; value = s,b} ->
-| SLASH region -> region, "SLASH"
+    region,
-| TIMES region -> region, "TIMES"
+    sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
 | LPAR region -> region, "LPAR"
 | RPAR region -> region, "RPAR"
 | LBRACKET region -> region, "LBRACKET"
 | RBRACKET region -> region, "RBRACKET"
 | LBRACE region -> region, "LBRACE"
 | RBRACE region -> region, "RBRACE"
 | COMMA region -> region, "COMMA"
 | SEMI region -> region, "SEMI"
 | VBAR region -> region, "VBAR"
 | COLON region -> region, "COLON"
 | DOT region -> region, "DOT"
 | WILD region -> region, "WILD"
 | EQ region -> region, "EQ"
 | NE region -> region, "NE"
 | LT region -> region, "LT"
 | GT region -> region, "GT"
 | LE region -> region, "LE"
 | GE region -> region, "GE"
 | BOOL_OR region -> region, "BOOL_OR"
 | BOOL_AND region -> region, "BOOL_AND"
 | Ident Region.{region; value} ->
    region, sprintf "Ident %s" value
 | Constr Region.{region; value} ->
    region, sprintf "Constr %s" value
 | Int Region.{region; value = s,n} ->
    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
 | Nat Region.{region; value = s,n} ->
    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
 | Mutez Region.{region; value = s,n} ->
    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
-| String Region.{region; value} ->
+| Ident Region.{region; value} ->
-    region, sprintf "Str %s" value
+    region, sprintf "Ident %s" value
-| Bytes Region.{region; value = s,b} ->
+| Constr Region.{region; value} ->
-    region,
+    region, sprintf "Constr %s" value
    sprintf "Bytes (\"%s\", \"0x%s\")"
      s (Hex.show b)
 | Attr Region.{region; value} ->
   region, sprintf "Attr \"%s\"" value
  (* Symbols *)
 | ARROW    region -> region, "ARROW"
 | CONS     region -> region, "CONS"
 | CAT      region -> region, "CAT"
 | MINUS    region -> region, "MINUS"
 | PLUS     region -> region, "PLUS"
 | SLASH    region -> region, "SLASH"
 | TIMES    region -> region, "TIMES"
 | LPAR     region -> region, "LPAR"
 | RPAR     region -> region, "RPAR"
 | LBRACKET region -> region, "LBRACKET"
 | RBRACKET region -> region, "RBRACKET"
 | LBRACE   region -> region, "LBRACE"
 | RBRACE   region -> region, "RBRACE"
 | COMMA    region -> region, "COMMA"
 | SEMI     region -> region, "SEMI"
 | VBAR     region -> region, "VBAR"
 | COLON    region -> region, "COLON"
 | DOT      region -> region, "DOT"
 | WILD     region -> region, "WILD"
 | EQ       region -> region, "EQ"
 | NE       region -> region, "NE"
 | LT       region -> region, "LT"
 | GT       region -> region, "GT"
 | LE       region -> region, "LE"
 | GE       region -> region, "GE"
 | BOOL_OR  region -> region, "BOOL_OR"
 | BOOL_AND region -> region, "BOOL_AND"
  (* Keywords *)
 | Begin region -> region, "Begin"
-| Else region -> region, "Else"
+| Else  region -> region, "Else"
-| End region -> region, "End"
+| End   region -> region, "End"
 | False region -> region, "False"
-| Fun region -> region, "Fun"
+| Fun   region -> region, "Fun"
-| Rec region -> region, "Rec"
+| Rec   region -> region, "Rec"
-| If region -> region, "If"
+| If    region -> region, "If"
-| In region -> region, "In"
+| In    region -> region, "In"
-| Let region -> region, "Let"
+| Let   region -> region, "Let"
 | Match region -> region, "Match"
-| Mod region -> region, "Mod"
+| Mod   region -> region, "Mod"
-| Not region -> region, "Not"
+| Not   region -> region, "Not"
-| Of region -> region, "Of"
+| Of    region -> region, "Of"
-| Or region -> region, "Or"
+| Or    region -> region, "Or"
-| Then region -> region, "Then"
+| Then  region -> region, "Then"
-| True region -> region, "True"
+| True  region -> region, "True"
-| Type region -> region, "Type"
+| Type  region -> region, "Type"
-| With region -> region, "With"
+| With  region -> region, "With"
-| C_None  region -> region, "C_None"
+
-| C_Some  region -> region, "C_Some"
+  (* Data *)
 | C_None region -> region, "C_None"
 | C_Some region -> region, "C_Some"
  (* Virtual tokens *)
 | EOF region -> region, "EOF"
 let to_lexeme = function
  ARROW _    -> "->"
 | CONS _     -> "::"
 | CAT _      -> "^"
 | MINUS _    -> "-"
 | PLUS _     -> "+"
 | SLASH _    -> "/"
 | TIMES _    -> "*"
 | LPAR _     -> "("
 | RPAR _     -> ")"
 | LBRACKET _ -> "["
 | RBRACKET _ -> "]"
 | LBRACE _   -> "{"
 | RBRACE _   -> "}"
 | COMMA _    -> ","
 | SEMI _     -> ";"
 | VBAR _     -> "|"
 | COLON _    -> ":"
 | DOT _      -> "."
 | WILD _     -> "_"
 | EQ _       -> "="
 | NE _       -> "<>"
 | LT _       -> "<"
 | GT _       -> ">"
 | LE _       -> "<="
 | GE _       -> ">="
 | BOOL_OR _  -> "||"
 | BOOL_AND _ -> "&&"
-| Ident id   -> id.Region.value
+let to_lexeme = function
-| Constr id  -> id.Region.value
+  (* Literals *)
  String s   -> String.escaped s.Region.value
 | Bytes b    -> fst b.Region.value
 | Int i
 | Nat i
-| Mutez i   -> fst i.Region.value
+| Mutez i    -> fst i.Region.value
-| String s  -> String.escaped s.Region.value
+| Ident id   -> id.Region.value
-| Bytes b   -> fst b.Region.value
+| Constr id  -> id.Region.value
-| Attr a    -> a.Region.value
+| Attr a     -> a.Region.value
  (* Symbols *)
 | ARROW    _ -> "->"
 | CONS     _ -> "::"
 | CAT      _ -> "^"
 | MINUS    _ -> "-"
 | PLUS     _ -> "+"
 | SLASH    _ -> "/"
 | TIMES    _ -> "*"
 | LPAR     _ -> "("
 | RPAR     _ -> ")"
 | LBRACKET _ -> "["
 | RBRACKET _ -> "]"
 | LBRACE   _ -> "{"
 | RBRACE   _ -> "}"
 | COMMA    _ -> ","
 | SEMI     _ -> ";"
 | VBAR     _ -> "|"
 | COLON    _ -> ":"
 | DOT      _ -> "."
 | WILD     _ -> "_"
 | EQ       _ -> "="
 | NE       _ -> "<>"
 | LT       _ -> "<"
 | GT       _ -> ">"
 | LE       _ -> "<="
 | GE       _ -> ">="
 | BOOL_OR  _ -> "||"
 | BOOL_AND _ -> "&&"
  (* Keywords *)
 | Begin _ -> "begin"
-| Else _  -> "else"
+| Else  _ -> "else"
-| End _   -> "end"
+| End   _ -> "end"
 | False _ -> "false"
-| Fun _   -> "fun"
+| Fun   _ -> "fun"
-| Rec _   -> "rec"
+| Rec   _ -> "rec"
-| If _    -> "if"
+| If    _ -> "if"
-| In _    -> "in"
+| In    _ -> "in"
-| Let _   -> "let"
+| Let   _ -> "let"
 | Match _ -> "match"
-| Mod _   -> "mod"
+| Mod   _ -> "mod"
-| Not _   -> "not"
+| Not   _ -> "not"
-| Of _    -> "of"
+| Of    _ -> "of"
-| Or _    -> "or"
+| Or    _ -> "or"
-| True _  -> "true"
+| True  _ -> "true"
-| Type _  -> "type"
+| Type  _ -> "type"
-| Then _  -> "then"
+| Then  _ -> "then"
-| With _  -> "with"
+| With  _ -> "with"
 (* Data constructors *)
 | C_None  _ -> "None"
 | C_Some  _ -> "Some"
 (* Virtual tokens *)
 | EOF _ -> ""
 (* CONVERSIONS *)
 let to_string token ?(offsets=true) mode =
  let region, val_str = proj_token token in
  let reg_str = region#compact ~offsets mode
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =
 let to_region token = proj_token token |> fst
 (* Injections *)
 type int_err = Non_canonical_zero
 (* LEXIS *)
 let keywords = [
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
  let value = lexeme, `Hex norm
  in Bytes Region.{region; value}
 type int_err = Non_canonical_zero
 let mk_int lexeme region =
  let z =
    Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
@ -398,23 +425,21 @@ type nat_err =
 let mk_nat lexeme region =
  match (String.index_opt lexeme 'n') with
-  | None -> Error Invalid_natural
+      None -> Error Invalid_natural
-  | Some _ -> (
+  | Some _ -> let z =
-    let z =
+               Str.(global_replace (regexp "_") "" lexeme) |>
-      Str.(global_replace (regexp "_") "" lexeme) |>
+                 Str.(global_replace (regexp "n") "") |>
-      Str.(global_replace (regexp "n") "") |>
+                 Z.of_string in
-      Z.of_string in
+             if   Z.equal z Z.zero && lexeme <> "0n"
-    if Z.equal z Z.zero && lexeme <> "0n"
+             then Error Non_canonical_zero_nat
-    then Error Non_canonical_zero_nat
+             else Ok (Nat Region.{region; value = lexeme,z})
    else Ok (Nat Region.{region; value = lexeme,z})
  )
 let mk_mutez lexeme region =
  let z =
    Str.(global_replace (regexp "_") "" lexeme) |>
    Str.(global_replace (regexp "mutez") "") |>
    Z.of_string in
-  if Z.equal z Z.zero && lexeme <> "0mutez"
+  if   Z.equal z Z.zero && lexeme <> "0mutez"
  then Error Non_canonical_zero
  else Ok (Mutez Region.{region; value = lexeme, z})
@ -422,8 +447,6 @@ let eof region = EOF region
 type sym_err = Invalid_symbol
 type attr_err = Invalid_attribute
 let mk_sym lexeme region =
  match lexeme with
  (* Lexemes in common with all concrete syntaxes *)
@ -473,24 +496,27 @@ let mk_constr lexeme region =
 (* Attributes *)
 type attr_err = Invalid_attribute
 let mk_attr header lexeme region =
-  if header = "[@" then
+  if header = "[@" then Error Invalid_attribute
    Error Invalid_attribute
  else Ok (Attr Region.{value=lexeme; region})
 (* Predicates *)
 let is_string = function String _ -> true | _ -> false
-let is_bytes  = function Bytes _ -> true | _ -> false
+let is_bytes  = function Bytes  _ -> true | _ -> false
-let is_int    = function Int _ -> true | _ -> false
+let is_int    = function Int    _ -> true | _ -> false
-let is_ident  = function Ident _ -> true | _ -> false
+let is_ident  = function Ident  _ -> true | _ -> false
-let is_eof    = function EOF _ -> true | _ -> false
+let is_eof    = function EOF    _ -> true | _ -> false
 let is_minus  = function MINUS  _ -> true | _ -> false
 (* Errors *)
 type error =
  Odd_lengthed_bytes
 | Missing_break
 | Negative_byte_sequence
 let error_to_string = function
  Odd_lengthed_bytes ->
@ -499,6 +525,9 @@ let error_to_string = function
 | Missing_break ->
    "Missing break.\n\
     Hint: Insert some space."
 | Negative_byte_sequence ->
   "Negative byte sequence.\n\
    Hint: Remove the leading minus sign."
 exception Error of error Region.reg
@ -511,29 +540,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
 let fail region value = raise (Error Region.{region; value})
 let check_right_context token next_token buffer : unit =
-  if not (is_eof token) then
+  let pos    = (to_region token)#stop in
-    if is_int token || is_bytes token then
+  let region = Region.make ~start:pos ~stop:pos in
-      match next_token buffer with
+  match next_token buffer with
-        Some ([], next) ->
+    None -> ()
-          let pos    = (to_region token)#stop in
+  | Some (markup, next) ->
-          let region = Region.make ~start:pos ~stop:pos in
+      if   is_minus token && is_bytes next
-          if is_int next then
+      then let region =
-            fail region Odd_lengthed_bytes
+             Region.cover (to_region token) (to_region next)
-          else
+           in fail region Negative_byte_sequence
-            if is_ident next || is_string next || is_bytes next then
+      else
-              fail region Missing_break
+        match markup with
-      | Some (_::_, _) | None -> ()
+          [] ->
-    else
+            if   is_int token
-      if is_ident token || is_string token then
+            then if   is_string next || is_ident next
-        match next_token buffer with
+                 then fail region Missing_break
-          Some ([], next) ->
+                 else ()
-          if is_ident next || is_string next
+            else
-             || is_bytes next || is_int next
+              if   is_string token
-          then
+              then if   is_int next || is_bytes next || is_ident next
-            let pos    = (to_region token)#stop in
+                   then fail region Missing_break
-            let region = Region.make ~start:pos ~stop:pos
+                   else ()
-            in fail region Missing_break
+              else
-        | Some (_::_, _) | None -> ()
+                if   is_bytes token
                then if is_string next || is_ident next
                     then fail region Missing_break
                     else if   is_int next
                          then fail region Odd_lengthed_bytes
                          else ()
                else ()
        | _::_ -> ()
 (* END TRAILER *)
 }
--- a/src/passes/1-parser/pascaligo/.links
+++ b/src/passes/1-parser/pascaligo/.links
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
 ../shared/Lexer.mli
 ../shared/Lexer.mll
 ../shared/LexerLib.mli
 ../shared/LexerLib.ml
 ../shared/EvalOpt.ml
 ../shared/EvalOpt.mli
--- a/src/passes/1-parser/pascaligo/LexToken.mli
+++ b/src/passes/1-parser/pascaligo/LexToken.mli
@ -1,4 +1,4 @@
-(** This signature defines the lexical tokens for LIGO
+(* This signature defines the lexical tokens for LIGO
   _Tokens_ are the abstract units which are used by the parser to
   build the abstract syntax tree (AST), in other words, the stream of
--- a/src/passes/1-parser/pascaligo/LexToken.mll
+++ b/src/passes/1-parser/pascaligo/LexToken.mll
@ -5,24 +5,14 @@
 (* Shorthands *)
 type lexeme = string
 let sprintf = Printf.sprintf
 module Region = Simple_utils.Region
 module Pos    = Simple_utils.Pos
 module SMap   = Map.Make (String)
 module SSet   = Set.Make (String)
-(* Hack to roll back one lexeme in the current semantic action *)
+type lexeme = string
-(*
+
-let rollback buffer =
+let sprintf = Printf.sprintf
  let open Lexing in
  let len = String.length (lexeme buffer) in
  let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
  buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
 *)
 (* TOKENS *)
@ -123,6 +113,8 @@ type t =
 | EOF of Region.t
 (* Projections *)
 type token = t
 let proj_token = function
@ -130,32 +122,20 @@ let proj_token = function
  String Region.{region; value} ->
    region, sprintf "String %s" value
 | Bytes Region.{region; value = s,b} ->
    region,
-    sprintf "Bytes (\"%s\", \"0x%s\")"
+    sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
      s (Hex.show b)
 | Int Region.{region; value = s,n} ->
    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
 | Nat Region.{region; value = s,n} ->
    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
 | Mutez Region.{region; value = s,n} ->
    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
 | Ident Region.{region; value} ->
    region, sprintf "Ident \"%s\"" value
 | Constr Region.{region; value} ->
    region, sprintf "Constr \"%s\"" value
 (*
 | Attr {header; string={region; value}} ->
    region, sprintf "Attr (\"%s\",\"%s\")" header value
 *)
  (* Symbols *)
 | SEMI     region -> region, "SEMI"
@ -244,7 +224,7 @@ let to_lexeme = function
 | Bytes b   -> fst b.Region.value
 | Int i
 | Nat i
-| Mutez i     -> fst i.Region.value
+| Mutez i   -> fst i.Region.value
 | Ident id
 | Constr id -> id.Region.value
@ -382,9 +362,7 @@ let keywords = [
  (fun reg -> With       reg)
 ]
-let reserved =
+let reserved = SSet.empty
  let open SSet in
  empty |> add "arguments"
 let constructors = [
  (fun reg -> False  reg);
@ -484,22 +462,20 @@ type nat_err =
 let mk_nat lexeme region =
  match String.index_opt lexeme 'n' with
-    None -> Error Invalid_natural
+      None -> Error Invalid_natural
-  | Some _ ->
+  | Some _ -> let z =
-      let z =
+                Str.(global_replace (regexp "_") "" lexeme) |>
-        Str.(global_replace (regexp "_") "" lexeme) |>
+                  Str.(global_replace (regexp "n") "") |>
-        Str.(global_replace (regexp "n") "") |>
+                  Z.of_string in
-        Z.of_string in
+             if   Z.equal z Z.zero && lexeme <> "0n"
-      if Z.equal z Z.zero && lexeme <> "0n"
+             then Error Non_canonical_zero_nat
-      then Error Non_canonical_zero_nat
+             else Ok (Nat Region.{region; value = lexeme,z})
      else Ok (Nat Region.{region; value = lexeme,z})
 let mk_mutez lexeme region =
-  let z =
+  let z = Str.(global_replace (regexp "_") "" lexeme) |>
-    Str.(global_replace (regexp "_") "" lexeme) |>
+            Str.(global_replace (regexp "mutez") "") |>
-    Str.(global_replace (regexp "mutez") "") |>
+            Z.of_string in
-    Z.of_string in
+  if   Z.equal z Z.zero && lexeme <> "0mutez"
  if Z.equal z Z.zero && lexeme <> "0mutez"
  then Error Non_canonical_zero
  else Ok (Mutez Region.{region; value = lexeme, z})
@ -557,22 +533,23 @@ let mk_constr lexeme region =
 type attr_err = Invalid_attribute
-let mk_attr _header _string _region =
+let mk_attr _ _ _ = Error Invalid_attribute
  Error Invalid_attribute
 (* Predicates *)
 let is_string = function String _ -> true | _ -> false
-let is_bytes  = function Bytes _ -> true | _ -> false
+let is_bytes  = function Bytes _  -> true | _ -> false
-let is_int    = function Int _ -> true | _ -> false
+let is_int    = function Int _    -> true | _ -> false
-let is_ident  = function Ident _ -> true | _ -> false
+let is_ident  = function Ident _  -> true | _ -> false
-let is_eof    = function EOF _ -> true | _ -> false
+let is_eof    = function EOF _    -> true | _ -> false
 let is_minus  = function MINUS _  -> true | _ -> false
 (* Errors *)
 type error =
  Odd_lengthed_bytes
 | Missing_break
 | Negative_byte_sequence
 let error_to_string = function
  Odd_lengthed_bytes ->
@ -581,6 +558,9 @@ let error_to_string = function
 | Missing_break ->
    "Missing break.\n\
     Hint: Insert some space."
 | Negative_byte_sequence ->
   "Negative byte sequence.\n\
    Hint: Remove the leading minus sign."
 exception Error of error Region.reg
@ -593,29 +573,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
 let fail region value = raise (Error Region.{region; value})
 let check_right_context token next_token buffer : unit =
-  if not (is_eof token) then
+  let pos    = (to_region token)#stop in
-    if is_int token || is_bytes token then
+  let region = Region.make ~start:pos ~stop:pos in
-      match next_token buffer with
+  match next_token buffer with
-        Some ([], next) ->
+    None -> ()
-          let pos    = (to_region token)#stop in
+  | Some (markup, next) ->
-          let region = Region.make ~start:pos ~stop:pos in
+      if   is_minus token && is_bytes next
-          if is_int next then
+      then let region =
-            fail region Odd_lengthed_bytes
+             Region.cover (to_region token) (to_region next)
-          else
+           in fail region Negative_byte_sequence
-            if is_ident next || is_string next || is_bytes next then
+      else
-              fail region Missing_break
+        match markup with
-      | Some (_::_, _) | None -> ()
+          [] ->
-    else
+            if   is_int token
-      if is_ident token || is_string token then
+            then if   is_string next || is_ident next
-        match next_token buffer with
+                 then fail region Missing_break
-          Some ([], next) ->
+                 else ()
-          if is_ident next || is_string next
+            else
-             || is_bytes next || is_int next
+              if   is_string token
-          then
+              then if   is_int next || is_bytes next || is_ident next
-            let pos    = (to_region token)#stop in
+                   then fail region Missing_break
-            let region = Region.make ~start:pos ~stop:pos
+                   else ()
-            in fail region Missing_break
+              else
-        | Some (_::_, _) | None -> ()
+                if   is_bytes token
                then if is_string next || is_ident next
                     then fail region Missing_break
                     else if   is_int next
                          then fail region Odd_lengthed_bytes
                          else ()
                else ()
        | _::_ -> ()
 (* END TRAILER *)
 }
--- a/src/passes/1-parser/reasonligo/.links
+++ b/src/passes/1-parser/reasonligo/.links
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
 ../shared/Lexer.mli
 ../shared/Lexer.mll
 ../shared/LexerLib.ml
 ../shared/EvalOpt.ml
 ../shared/EvalOpt.mli
 ../shared/FQueue.ml
--- a/src/passes/1-parser/reasonligo/LexToken.mli
+++ b/src/passes/1-parser/reasonligo/LexToken.mli
@ -31,48 +31,49 @@ type lexeme = string
 type t =
  (* Symbols *)
-  CAT of Region.t      (* "++"  *)
+  CAT of Region.t (* "++"  *)
  (* Arithmetics *)
-| MINUS of Region.t    (* "-" *)
+| MINUS of Region.t (* "-" *)
-| PLUS of Region.t     (* "+" *)
+| PLUS  of Region.t (* "+" *)
-| SLASH of Region.t    (* "/" *)
+| SLASH of Region.t (* "/" *)
-| TIMES of Region.t   (* "*" *)
+| TIMES of Region.t (* "*" *)
  (* Compounds *)
-| LPAR of Region.t    (* "(" *)
+| LPAR     of Region.t (* "(" *)
-| RPAR of Region.t   (* ")" *)
+| RPAR     of Region.t (* ")" *)
 | LBRACKET of Region.t (* "[" *)
 | RBRACKET of Region.t (* "]" *)
-| LBRACE of Region.t  (* "{" *)
+| LBRACE   of Region.t (* "{" *)
-| RBRACE of Region.t  (* "}" *)
+| RBRACE   of Region.t (* "}" *)
  (* Separators *)
-| COMMA of Region.t   (* "," *)
+| COMMA    of Region.t (* ","   *)
-| SEMI  of Region.t   (* ";" *)
+| SEMI     of Region.t (* ";"   *)
-| VBAR of Region.t    (* "|" *)
+| VBAR     of Region.t (* "|"   *)
-| COLON of Region.t   (* ":" *)
+| COLON    of Region.t (* ":"   *)
-| DOT of Region.t     (* "." *)
+| DOT      of Region.t (* "."   *)
 | ELLIPSIS of Region.t (* "..." *)
 | ARROW    of Region.t (* "=>"  *)
  (* Wildcard *)
-| WILD of Region.t    (* "_" *)
+| WILD of Region.t     (* "_" *)
  (* Comparisons *)
-| EQ of Region.t      (* "="  *)
+| EQ   of Region.t (* "="  *)
-| EQEQ of Region.t    (* "==" *)
+| EQEQ of Region.t (* "==" *)
-| NE of Region.t      (* "!=" *)
+| NE   of Region.t (* "!=" *)
-| LT of Region.t      (* "<"  *)
+| LT   of Region.t (* "<"  *)
-| GT of Region.t      (* ">"  *)
+| GT   of Region.t (* ">"  *)
-| LE of Region.t      (* "=<" *)
+| LE   of Region.t (* "<=" *)
-| GE of Region.t      (* ">=" *)
+| GE   of Region.t (* ">=" *)
-| ARROW of Region.t   (* "=>" *)
+  (* Logic *)
 | BOOL_OR  of Region.t (* "||" *)
 | BOOL_AND of Region.t (* "&&" *)
@ -91,18 +92,18 @@ type t =
  (* Keywords *)
-| Else of Region.t
+| Else   of Region.t
-| False of Region.t
+| False  of Region.t
-| If of Region.t
+| If     of Region.t
-| Let of Region.t
+| Let    of Region.t
-| Rec of Region.t
+| Mod    of Region.t
 | Or     of Region.t
 | Rec    of Region.t
 | Switch of Region.t
-| Mod of Region.t
+| True   of Region.t
-| Or of Region.t
+| Type   of Region.t
 | True of Region.t
 | Type of Region.t
-(* Data constructors *)
+  (* Data constructors *)
 | C_None  of Region.t  (* "None"  *)
 | C_Some  of Region.t  (* "Some"  *)
@ -111,8 +112,6 @@ type t =
 | EOF of Region.t (* End of file *)
 type token = t
 (* Projections
   The difference between extracting the lexeme and a string from a
@ -121,6 +120,8 @@ type token = t
   lexeme (concrete syntax).
 *)
 type token = t
 val to_lexeme : token -> lexeme
 val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
 val to_region : token -> Region.t
--- a/src/passes/1-parser/reasonligo/LexToken.mll
+++ b/src/passes/1-parser/reasonligo/LexToken.mll
@ -1,66 +1,69 @@
 {
 (* START OF HEADER *)
-type lexeme = string
+(* Shorthands *)
 let sprintf = Printf.sprintf
 module Region = Simple_utils.Region
 module Pos    = Simple_utils.Pos
 module SMap   = Utils.String.Map
 module SSet   = Utils.String.Set
 type lexeme = string
 let sprintf = Printf.sprintf
 (* TOKENS *)
 type t =
  (* Symbols *)
-  CAT of Region.t       (* "++"  *)
+  CAT of Region.t (* "++"  *)
  (* Arithmetics *)
-| MINUS of Region.t     (* "-" *)
+| MINUS of Region.t (* "-" *)
-| PLUS of Region.t      (* "+" *)
+| PLUS  of Region.t (* "+" *)
-| SLASH of Region.t     (* "/" *)
+| SLASH of Region.t (* "/" *)
-| TIMES of Region.t     (* "*" *)
+| TIMES of Region.t (* "*" *)
  (* Compounds *)
-| LPAR of Region.t      (* "(" *)
+| LPAR     of Region.t (* "(" *)
-| RPAR of Region.t      (* ")" *)
+| RPAR     of Region.t (* ")" *)
-| LBRACKET of Region.t  (* "[" *)
+| LBRACKET of Region.t (* "[" *)
-| RBRACKET of Region.t  (* "]" *)
+| RBRACKET of Region.t (* "]" *)
-| LBRACE of Region.t    (* "{" *)
+| LBRACE   of Region.t (* "{" *)
-| RBRACE of Region.t    (* "}" *)
+| RBRACE   of Region.t (* "}" *)
  (* Separators *)
-| COMMA of Region.t     (* "," *)
+| COMMA    of Region.t (* ","   *)
-| SEMI  of Region.t     (* ";" *)
+| SEMI     of Region.t (* ";"   *)
-| VBAR of Region.t      (* "|" *)
+| VBAR     of Region.t (* "|"   *)
-| COLON of Region.t     (* ":" *)
+| COLON    of Region.t (* ":"   *)
-| DOT of Region.t       (* "." *)
+| DOT      of Region.t (* "."   *)
 | ELLIPSIS of Region.t (* "..." *)
 | ARROW    of Region.t (* "=>"  *)
  (* Wildcard *)
-| WILD of Region.t      (* "_" *)
+| WILD of Region.t     (* "_" *)
  (* Comparisons *)
-| EQ of Region.t        (* "="  *)
+| EQ   of Region.t (* "="  *)
-| EQEQ of Region.t      (* "=="  *)
+| EQEQ of Region.t (* "==" *)
-| NE of Region.t        (* "!=" *)
+| NE   of Region.t (* "!=" *)
-| LT of Region.t        (* "<"  *)
+| LT   of Region.t (* "<"  *)
-| GT of Region.t        (* ">"  *)
+| GT   of Region.t (* ">"  *)
-| LE of Region.t        (* "<=" *)
+| LE   of Region.t (* "<=" *)
-| GE of Region.t        (* ">=" *)
+| GE   of Region.t (* ">=" *)
 | ARROW of Region.t     (* "=>" *)
-| BOOL_OR of Region.t   (* "||" *)
+  (* Logic *)
 | BOOL_AND of Region.t  (* "&&" *)
-| NOT of Region.t (* ! *)
+| BOOL_OR  of Region.t (* "||" *)
 | BOOL_AND of Region.t (* "&&" *)
 | NOT      of Region.t (* ! *)
  (* Identifiers, labels, numbers and strings *)
@ -75,17 +78,17 @@ type t =
  (* Keywords *)
-(*| And*)
+| Else   of Region.t
-| Else of Region.t
+| False  of Region.t
-| False of Region.t
+| If     of Region.t
-| If of Region.t
+| Let    of Region.t
-| Let of Region.t
+| Mod    of Region.t
-| Rec of Region.t
+| Or     of Region.t
 | Rec    of Region.t
 | Switch of Region.t
-| Mod of Region.t
+| True   of Region.t
-| Or of Region.t
+| Type   of Region.t
-| True of Region.t
+
 | Type of Region.t
  (* Data constructors *)
 | C_None  of Region.t  (* "None"  *)
@ -96,121 +99,143 @@ type t =
 | EOF of Region.t (* End of file *)
 (* Projections *)
 type token = t
 let proj_token = function
-  CAT region -> region, "CAT"
+  (* Literals *)
-| MINUS region -> region, "MINUS"
+
-| PLUS region -> region, "PLUS"
+  String Region.{region; value} ->
-| SLASH region -> region, "SLASH"
+    region, sprintf "String %s" value
-| TIMES region -> region, "TIMES"
+| Bytes Region.{region; value = s,b} ->
-| LPAR region -> region, "LPAR"
+    region,
-| RPAR region -> region, "RPAR"
+    sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
 | LBRACKET region -> region, "LBRACKET"
 | RBRACKET region -> region, "RBRACKET"
 | LBRACE region -> region, "LBRACE"
 | RBRACE region -> region, "RBRACE"
 | COMMA region -> region, "COMMA"
 | SEMI region -> region, "SEMI"
 | VBAR region -> region, "VBAR"
 | COLON region -> region, "COLON"
 | DOT region -> region, "DOT"
 | ELLIPSIS region -> region, "ELLIPSIS"
 | WILD region -> region, "WILD"
 | EQ region -> region, "EQ"
 | EQEQ region -> region, "EQEQ"
 | NE region -> region, "NE"
 | LT region -> region, "LT"
 | GT region -> region, "GT"
 | LE region -> region, "LE"
 | GE region -> region, "GE"
 | ARROW region -> region, "ARROW"
 | BOOL_OR region -> region, "BOOL_OR"
 | BOOL_AND region -> region, "BOOL_AND"
 | Ident Region.{region; value} ->
    region, sprintf "Ident %s" value
 | Constr Region.{region; value} ->
    region, sprintf "Constr %s" value
 | Int Region.{region; value = s,n} ->
    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
 | Nat Region.{region; value = s,n} ->
    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
 | Mutez Region.{region; value = s,n} ->
    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
-| String Region.{region; value} ->
+| Ident Region.{region; value} ->
-    region, sprintf "String %s" value
+    region, sprintf "Ident %s" value
-| Bytes Region.{region; value = s,b} ->
+| Constr Region.{region; value} ->
-    region,
+    region, sprintf "Constr %s" value
-    sprintf "Bytes (\"%s\", \"0x%s\")"
+
-      s (Hex.show b)
+  (* Symbols *)
-| Else region -> region, "Else"
+
-| False region -> region, "False"
+| CAT      region -> region, "CAT"
-| If region -> region, "If"
+| MINUS    region -> region, "MINUS"
-| Let region -> region, "Let"
+| PLUS     region -> region, "PLUS"
-| Rec region -> region, "Rec"
+| SLASH    region -> region, "SLASH"
-| Switch region -> region, "Switch"
+| TIMES    region -> region, "TIMES"
-| Mod region -> region, "Mod"
+| LPAR     region -> region, "LPAR"
-| NOT region -> region, "!"
+| RPAR     region -> region, "RPAR"
-| Or region -> region, "Or"
+| LBRACKET region -> region, "LBRACKET"
-| True region -> region, "True"
+| RBRACKET region -> region, "RBRACKET"
-| Type region -> region, "Type"
+| LBRACE   region -> region, "LBRACE"
-| C_None  region -> region, "C_None"
+| RBRACE   region -> region, "RBRACE"
-| C_Some  region -> region, "C_Some"
+| COMMA    region -> region, "COMMA"
-| Attr Region.{region; value} -> region, sprintf "Attr %s" value
+| SEMI     region -> region, "SEMI"
-| EOF region -> region, "EOF"
+| VBAR     region -> region, "VBAR"
 | COLON    region -> region, "COLON"
 | DOT      region -> region, "DOT"
 | ELLIPSIS region -> region, "ELLIPSIS"
 | WILD     region -> region, "WILD"
 | EQ       region -> region, "EQ"
 | EQEQ     region -> region, "EQEQ"
 | NE       region -> region, "NE"
 | LT       region -> region, "LT"
 | GT       region -> region, "GT"
 | LE       region -> region, "LE"
 | GE       region -> region, "GE"
 | ARROW    region -> region, "ARROW"
 | NOT      region -> region, "NOT"
 | BOOL_OR  region -> region, "BOOL_OR"
 | BOOL_AND region -> region, "BOOL_AND"
 | Else     region -> region, "Else"
 | False    region -> region, "False"
 | If       region -> region, "If"
 | Let      region -> region, "Let"
 | Rec      region -> region, "Rec"
 | Switch   region -> region, "Switch"
 | Mod      region -> region, "Mod"
 | Or       region -> region, "Or"
 | True     region -> region, "True"
 | Type     region -> region, "Type"
 | C_None   region -> region, "C_None"
 | C_Some   region -> region, "C_Some"
 | Attr     Region.{region; value} -> region, sprintf "Attr %s" value
 | EOF      region -> region, "EOF"
 let to_lexeme = function
-  CAT _ -> "++"
+  (* Literals *)
-| MINUS _ -> "-"
+
-| PLUS _ -> "+"
+  String s   -> s.Region.value
-| SLASH _ -> "/"
+| Bytes b    -> fst b.Region.value
 | TIMES _ -> "*"
 | LPAR _ -> "("
 | RPAR _ -> ")"
 | LBRACKET _ -> "["
 | RBRACKET _ -> "]"
 | LBRACE _ -> "{"
 | RBRACE _ -> "}"
 | COMMA _ -> ","
 | SEMI _ -> ";"
 | VBAR _ -> "|"
 | COLON _ -> ":"
 | DOT _ -> "."
 | ELLIPSIS _ -> "..."
 | WILD _ -> "_"
 | EQ _ -> "="
 | EQEQ _ -> "=="
 | NE _ -> "!="
 | LT _ -> "<"
 | GT _ -> ">"
 | LE _ -> "<="
 | GE _ -> ">="
 | ARROW _ -> "=>"
 | BOOL_OR _ -> "||"
 | BOOL_AND _ -> "&&"
 | Ident id -> id.Region.value
 | Constr id -> id.Region.value
 | Int i
 | Nat i
-| Mutez i -> fst i.Region.value
+| Mutez i    -> fst i.Region.value
-| String s -> s.Region.value
+| Ident  id  -> id.Region.value
-| Bytes b -> fst b.Region.value
+| Constr id  -> id.Region.value
-| Else _ -> "else"
+| Attr a     -> a.Region.value
-| False _ -> "false"
+
-| If _ -> "if"
+  (* Symbols *)
-| Let _ -> "let"
+
-| Rec _ -> "rec"
+| CAT      _ -> "++"
-| Mod _ -> "mod"
+| MINUS    _ -> "-"
-| NOT _ -> "!"
+| PLUS     _ -> "+"
-| Or _ -> "or"
+| SLASH    _ -> "/"
-| Switch _ -> "switch"
+| TIMES    _ -> "*"
-| True _ -> "true"
+| LPAR     _ -> "("
-| Type _ -> "type"
+| RPAR     _ -> ")"
 | LBRACKET _ -> "["
 | RBRACKET _ -> "]"
 | LBRACE   _ -> "{"
 | RBRACE   _ -> "}"
 | COMMA    _ -> ","
 | SEMI     _ -> ";"
 | VBAR     _ -> "|"
 | COLON    _ -> ":"
 | DOT      _ -> "."
 | ELLIPSIS _ -> "..."
 | WILD     _ -> "_"
 | EQ       _ -> "="
 | EQEQ     _ -> "=="
 | NE       _ -> "!="
 | LT       _ -> "<"
 | GT       _ -> ">"
 | LE       _ -> "<="
 | GE       _ -> ">="
 | ARROW    _ -> "=>"
 | BOOL_OR  _ -> "||"
 | BOOL_AND _ -> "&&"
 | NOT      _ -> "!"
  (* Keywords *)
 | Else    _ -> "else"
 | False   _ -> "false"
 | If      _ -> "if"
 | Let     _ -> "let"
 | Mod     _ -> "mod"
 | Or      _ -> "or"
 | Rec     _ -> "rec"
 | Switch  _ -> "switch"
 | True    _ -> "true"
 | Type    _ -> "type"
 (* Data constructors *)
 | C_None  _ -> "None"
 | C_Some  _ -> "Some"
-| Attr a -> a.Region.value
+
 (* Virtual tokens *)
 | EOF _ -> ""
 (* CONVERSIONS *)
 let to_string token ?(offsets=true) mode =
  let region, val_str = proj_token token in
  let reg_str = region#compact ~offsets mode
@ -261,12 +286,9 @@ let reserved =
    |> add "functor"
    |> add "inherit"
    |> add "initializer"
    (* |> add "land"  - see https://ligo.atlassian.net/browse/LIGO-263 *)
    |> add "lazy"
    (* |> add "lor"  - see https://ligo.atlassian.net/browse/LIGO-263 *)
    |> add "lsl"
    |> add "lsr"
    (* |> add "lxor"  - see https://ligo.atlassian.net/browse/LIGO-263 *)
    |> add "match"
    |> add "method"
    |> add "module"
@ -291,7 +313,7 @@ let reserved =
 let constructors = [
  (fun reg -> C_None reg);
-  (fun reg -> C_Some reg);
+  (fun reg -> C_Some reg)
 ]
 let add map (key, value) = SMap.add key value map
@ -376,20 +398,18 @@ let mk_int lexeme region =
  else Ok (Int Region.{region; value = lexeme, z})
 let mk_nat lexeme region =
-  let z =
+  let z = Str.(global_replace (regexp "_") "" lexeme) |>
-    Str.(global_replace (regexp "_") "" lexeme) |>
+            Str.(global_replace (regexp "n") "") |>
-    Str.(global_replace (regexp "n") "") |>
+            Z.of_string in
-    Z.of_string in
+  if   Z.equal z Z.zero && lexeme <> "0n"
  if Z.equal z Z.zero && lexeme <> "0n"
  then Error Non_canonical_zero_nat
  else Ok (Nat Region.{region; value = lexeme, z})
 let mk_mutez lexeme region =
-  let z =
+  let z = Str.(global_replace (regexp "_") "" lexeme) |>
-    Str.(global_replace (regexp "_") "" lexeme) |>
+            Str.(global_replace (regexp "mutez") "") |>
-    Str.(global_replace (regexp "mutez") "") |>
+            Z.of_string in
-    Z.of_string in
+  if   Z.equal z Z.zero && lexeme <> "0mutez"
  if Z.equal z Z.zero && lexeme <> "0mutez"
  then Error Non_canonical_zero
  else Ok (Mutez Region.{region; value = lexeme, z})
@ -426,11 +446,11 @@ let mk_sym lexeme region =
  (* Symbols specific to ReasonLIGO *)
-  | "..." ->  Ok (ELLIPSIS  region)
+  | "..." ->  Ok (ELLIPSIS region)
-  | "=>"  ->  Ok (ARROW     region)
+  | "=>"  ->  Ok (ARROW    region)
-  | "=="  ->  Ok (EQEQ      region)
+  | "=="  ->  Ok (EQEQ     region)
-  | "!"   ->  Ok (NOT       region)
+  | "!"   ->  Ok (NOT      region)
-  | "++"  ->  Ok (CAT       region)
+  | "++"  ->  Ok (CAT      region)
  (* Invalid symbols *)
@ -461,16 +481,18 @@ let mk_attr header lexeme region =
 (* Predicates *)
 let is_string = function String _ -> true | _ -> false
-let is_bytes  = function Bytes _ -> true | _ -> false
+let is_bytes  = function Bytes  _ -> true | _ -> false
-let is_int    = function Int _ -> true | _ -> false
+let is_int    = function Int    _ -> true | _ -> false
-let is_ident  = function Ident _ -> true | _ -> false
+let is_ident  = function Ident  _ -> true | _ -> false
-let is_eof    = function EOF _ -> true | _ -> false
+let is_eof    = function EOF    _ -> true | _ -> false
 let is_minus  = function MINUS  _ -> true | _ -> false
 (* Errors *)
 type error =
  Odd_lengthed_bytes
 | Missing_break
 | Negative_byte_sequence
 let error_to_string = function
  Odd_lengthed_bytes ->
@ -479,6 +501,9 @@ let error_to_string = function
 | Missing_break ->
    "Missing break.\n\
     Hint: Insert some space."
 | Negative_byte_sequence ->
   "Negative byte sequence.\n\
    Hint: Remove the leading minus sign."
 exception Error of error Region.reg
@ -491,29 +516,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
 let fail region value = raise (Error Region.{region; value})
 let check_right_context token next_token buffer : unit =
-  if not (is_eof token) then
+  let pos    = (to_region token)#stop in
-    if is_int token || is_bytes token then
+  let region = Region.make ~start:pos ~stop:pos in
-      match next_token buffer with
+  match next_token buffer with
-        Some ([], next) ->
+    None -> ()
-          let pos    = (to_region token)#stop in
+  | Some (markup, next) ->
-          let region = Region.make ~start:pos ~stop:pos in
+      if   is_minus token && is_bytes next
-          if is_int next then
+      then let region =
-            fail region Odd_lengthed_bytes
+             Region.cover (to_region token) (to_region next)
-          else
+           in fail region Negative_byte_sequence
-            if is_ident next || is_string next || is_bytes next then
+      else
-              fail region Missing_break
+        match markup with
-      | Some (_::_, _) | None -> ()
+          [] ->
-    else
+            if   is_int token
-      if is_ident token || is_string token then
+            then if   is_string next || is_ident next
-        match next_token buffer with
+                 then fail region Missing_break
-          Some ([], next) ->
+                 else ()
-          if is_ident next || is_string next
+            else
-             || is_bytes next || is_int next
+              if   is_string token
-          then
+              then if   is_int next || is_bytes next || is_ident next
-            let pos    = (to_region token)#stop in
+                   then fail region Missing_break
-            let region = Region.make ~start:pos ~stop:pos
+                   else ()
-            in fail region Missing_break
+              else
-        | Some (_::_, _) | None -> ()
+                if   is_bytes token
                then if is_string next || is_ident next
                     then fail region Missing_break
                     else if   is_int next
                          then fail region Odd_lengthed_bytes
                          else ()
                else ()
        | _::_ -> ()
 (* END TRAILER *)
 }
--- a/src/passes/1-parser/shared/.links
+++ b/src/passes/1-parser/shared/.links
@ -1,2 +0,0 @@
 $HOME/git/OCaml-build/Makefile
 $HOME/git/OCaml-build/Makefile.cfg
--- a/src/passes/1-parser/shared/Lexer.mli
+++ b/src/passes/1-parser/shared/Lexer.mli
@ -38,8 +38,6 @@
 module Region = Simple_utils.Region
 module Pos = Simple_utils.Pos
 type lexeme = string
 (* TOKENS *)
 (* The signature [TOKEN] exports an abstract type [token], so a lexer
@ -54,6 +52,8 @@ type lexeme = string
   reading the ocamllex specification for the lexer ([Lexer.mll]).
 *)
 type lexeme = string
 module type TOKEN =
  sig
    type token
@ -112,10 +112,36 @@ module type TOKEN =
      unit
  end
 (* The signature of the lexer *)
 module type S =
  sig
    module Token : TOKEN
    type token = Token.token
    (* The scanner [init] is meant to be called first to read the
       BOM. Then [scan] is called. *)
    val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
    val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
    (* Errors (specific to the generic lexer, not to the tokens) *)
    type error
    val error_to_string : error -> string
    exception Error of error Region.reg
    val format_error :
      ?offsets:bool -> [`Byte | `Point] ->
      error Region.reg -> file:bool -> string Region.reg
  end
 (* The functorised interface
   Note that the module parameter [Token] is re-exported as a
   submodule in [S].
 *)
-module Make (Token: TOKEN) : LexerLib.S with module Token = Token
+module Make (Token : TOKEN) : S with module Token = Token
--- a/src/passes/1-parser/shared/Lexer.mll
+++ b/src/passes/1-parser/shared/Lexer.mll
@ -8,12 +8,6 @@ module Pos = Simple_utils.Pos
 (* START HEADER *)
 type lexeme = string
 (* ALIASES *)
 let sprintf = Printf.sprintf
 (* TOKENS *)
 (* The signature [TOKEN] exports an abstract type [token], so a lexer
@ -22,6 +16,8 @@ let sprintf = Printf.sprintf
   caracterises the virtual token for end-of-file, because it requires
   special handling. *)
 type lexeme = string
 module type TOKEN =
  sig
    type token
@ -84,31 +80,39 @@ module type TOKEN =
   submodule in [S].
 *)
-module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
+module type S =
  sig
    module Token : TOKEN
    type token = Token.token
    val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
    val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
    type error
    val error_to_string : error -> string
    exception Error of error Region.reg
    val format_error :
      ?offsets:bool -> [`Byte | `Point] ->
      error Region.reg -> file:bool -> string Region.reg
  end
 module Make (Token : TOKEN) : (S with module Token = Token) =
  struct
    module Token = Token
    type token = Token.token
    type file_path = string
    type line_comment = LexerLib.line_comment
    type block_comment = LexerLib.block_comment
    let mk_block = LexerLib.mk_block
    (* ERRORS *)
    type error =
      Invalid_utf8_sequence
    | Unexpected_character of char
    | Undefined_escape_sequence
    (*    | Missing_break*)
    | Unterminated_string
    (*    | Unterminated_integer*)
    (*    | Odd_lengthed_bytes*)
    | Unterminated_comment of string
    (*    | Orphan_minus*)
    | Non_canonical_zero
    (*    | Negative_byte_sequence *)
    | Broken_string
    | Invalid_character_in_string
    | Reserved_name of string
@ -116,6 +120,8 @@ module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
    | Invalid_natural
    | Invalid_attribute
    let sprintf = Printf.sprintf
    let error_to_string = function
      Invalid_utf8_sequence ->
        "Invalid UTF-8 sequence."
@ -124,30 +130,15 @@ module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
    | Undefined_escape_sequence ->
        "Undefined escape sequence.\n\
         Hint: Remove or replace the sequence."
-(*    | Missing_break ->
+    | Unterminated_string ->
        "Missing break.\n\
         Hint: Insert some space."
 *)    | Unterminated_string ->
        "Unterminated string.\n\
         Hint: Close with double quotes."
-(*    | Unterminated_integer ->
+    | Unterminated_comment ending ->
        "Unterminated integer.\n\
         Hint: Remove the sign or proceed with a natural number." *)
 (*    | Odd_lengthed_bytes ->
        "The length of the byte sequence is an odd number.\n\
         Hint: Add or remove a digit."
 *)    | Unterminated_comment ending ->
        sprintf "Unterminated comment.\n\
                 Hint: Close with \"%s\"." ending
 (*    | Orphan_minus ->
        "Orphan minus sign.\n\
         Hint: Remove the trailing space." *)
    | Non_canonical_zero ->
        "Non-canonical zero.\n\
         Hint: Use 0."
 (*    | Negative_byte_sequence ->
        "Negative byte sequence.\n\
         Hint: Remove the leading minus sign." *)
    | Broken_string ->
        "The string starting here is interrupted by a line break.\n\
         Hint: Remove the break, close the string before or insert a \
@ -319,13 +310,13 @@ let symbol = common_sym | pascaligo_sym | cameligo_sym | reasonligo_sym
 (* Comments *)
-let pascaligo_block_comment_opening = "(*"
+let pascaligo_block_comment_opening  = "(*"
-let pascaligo_block_comment_closing = "*)"
+let pascaligo_block_comment_closing  = "*)"
-let pascaligo_line_comment          = "//"
+let pascaligo_line_comment           = "//"
-let cameligo_block_comment_opening = "(*"
+let cameligo_block_comment_opening   = "(*"
-let cameligo_block_comment_closing = "*)"
+let cameligo_block_comment_closing   = "*)"
-let cameligo_line_comment          = "//"
+let cameligo_line_comment            = "//"
 let reasonligo_block_comment_opening = "/*"
 let reasonligo_block_comment_closing = "*/"
@ -369,6 +360,7 @@ and scan state = parse
  nl                     { scan (state#push_newline lexbuf) lexbuf }
 | ' '+                   { scan (state#push_space   lexbuf) lexbuf }
 | '\t'+                  { scan (state#push_tabs    lexbuf) lexbuf }
 | ident                  { mk_ident        state lexbuf }
 | constr                 { mk_constr       state lexbuf }
 | bytes                  { mk_bytes seq    state lexbuf }
@ -420,27 +412,14 @@ and scan state = parse
    let state        = state#set_pos pos in
    scan state lexbuf }
-(* Some special errors *)
+(* String *)
 (*
 | '-' { let region, _, state = state#sync lexbuf in
        let state = scan state lexbuf in
        let open Markup in
        match FQueue.peek state#units with
          None -> assert false
        | Some (_, ((Space _ | Tabs _)::_, token))
            when Token.is_int token -> fail region Orphan_minus
        | _ -> fail region Unterminated_integer }
 | "-0x" byte_seq?
      { let region, _, _ = state#sync lexbuf
        in fail region Negative_byte_sequence }
 *)
 | '"' { let opening, lexeme, state = state#sync lexbuf in
        let thread = LexerLib.mk_thread opening lexeme in
        scan_string thread state lexbuf |> mk_string }
 (* Comments *)
 | block_comment_openings {
    let lexeme = Lexing.lexeme lexbuf in
    match state#block with
@ -496,8 +475,6 @@ and scan_flags state acc = parse
 | eof             { let _, _, state = state#sync lexbuf
                    in List.rev acc, state                  }
 (* TODO: Move below to [LexerCommon.mll] *)
 (* Finishing a string *)
 and scan_string thread state = parse
@ -624,164 +601,6 @@ and scan_utf8_inline thread state = parse
 {
 (* START TRAILER *)
 (* Scanning the lexing buffer for tokens (and markup, as a
   side-effect).
     Because we want the lexer to have access to the right lexical
   context of a recognised lexeme (to enforce stylistic constraints or
   report special error patterns), we need to keep a hidden reference
   to a queue of recognised lexical units (that is, tokens and markup)
   that acts as a mutable state between the calls to [read]. When
   [read] is called, that queue is examined first and, if it contains
   at least one token, that token is returned; otherwise, the lexing
   buffer is scanned for at least one more new token. That is the
   general principle: we put a high-level buffer (our queue) on top of
   the low-level lexing buffer.
     One tricky and important detail is that we must make any parser
   generated by Menhir (and calling [read]) believe that the last
   region of the input source that was matched indeed corresponds to
   the returned token, despite that many tokens and markup may have
   been matched since it was actually read from the input. In other
   words, the parser requests a token that is taken from the
   high-level buffer, but the parser requests the source regions from
   the _low-level_ lexing buffer, and they may disagree if more than
   one token has actually been recognised.
     Consequently, in order to maintain a consistent view for the
   parser, we have to patch some fields of the lexing buffer, namely
   [lex_start_p] and [lex_curr_p], as these fields are read by parsers
   generated by Menhir when querying source positions (regions). This
   is the purpose of the function [patch_buffer]. After reading one
   or more tokens and markup by the scanning rule [scan], we have to
   save in the hidden reference [buf_reg] the region of the source
   that was matched by [scan]. This atomic sequence of patching,
   scanning and saving is implemented by the _function_ [scan]
   (beware: it shadows the scanning rule [scan]). The function
   [patch_buffer] is, of course, also called just before returning the
   token, so the parser has a view of the lexing buffer consistent
   with the token.
     Note that an additional reference [first_call] is needed to
   distinguish the first call to the function [scan], as the first
   scanning rule is actually [init] (which can handle the BOM), not
   [scan].
 *)
 type logger = Markup.t list -> token -> unit
 type input =
  File    of file_path
 | String  of string
 | Channel of in_channel
 | Buffer  of Lexing.lexbuf
 type instance = {
  input    : input;
  read     : log:logger -> Lexing.lexbuf -> token;
  buffer   : Lexing.lexbuf;
  get_win  : unit -> token LexerLib.window;
  get_pos  : unit -> Pos.t;
  get_last : unit -> Region.t;
  get_file : unit -> file_path;
  close    : unit -> unit
 }
 type open_err = File_opening of string
 let lexbuf_from_input = function
  File path ->
   (try
      let chan = open_in path in
      let close () = close_in chan in
      let lexbuf = Lexing.from_channel chan in
      let () =
        let open Lexing in
        lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
      in Ok (lexbuf, close)
    with Sys_error msg -> Stdlib.Error (File_opening msg))
 | String s ->
    Ok (Lexing.from_string s, fun () -> ())
 | Channel chan ->
    let close () = close_in chan in
    Ok (Lexing.from_channel chan, close)
 | Buffer b -> Ok (b, fun () -> ())
 let open_token_stream ?line ?block input =
  let file_path  = match input with
                     File path -> path
                   | _ -> "" in
  let        pos = Pos.min ~file:file_path in
  let    buf_reg = ref (pos#byte, pos#byte)
  and first_call = ref true
  and    decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
  let     supply = Uutf.Manual.src decoder in
  let      state = ref (LexerLib.mk_state
                          ~units:FQueue.empty
                          ~last:Region.ghost
                          ~window:LexerLib.Nil
                          ~pos
                          ~markup:[]
                          ~decoder
                          ~supply
                          ?block
                          ?line
                          ()) in
  let get_pos  () = !state#pos
  and get_last () = !state#last
  and get_win  () = !state#window
  and get_file () = file_path in
  let patch_buffer (start, stop) buffer =
    let open Lexing in
    let file_path = buffer.lex_curr_p.pos_fname in
    buffer.lex_start_p <- {start with pos_fname = file_path};
    buffer.lex_curr_p  <- {stop  with pos_fname = file_path}
  and save_region buffer =
    buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
  let scan buffer =
    patch_buffer !buf_reg buffer;
    (if   !first_call
     then (state := init !state buffer; first_call := false)
     else state := scan !state buffer);
    save_region buffer in
  let next_token buffer =
    scan buffer;
    match FQueue.peek !state#units with
                         None -> None
    | Some (units, ext_token) ->
        state := !state#set_units units; Some ext_token in
  let rec read ~log buffer =
    match FQueue.deq !state#units with
      None ->
        scan buffer;
        read ~log buffer
    | Some (units, (left_mark, token)) ->
        log left_mark token;
        state := ((!state#set_units units)
                  #set_last (Token.to_region token))
                  #slide_token token;
        Token.check_right_context token next_token buffer;
        patch_buffer (Token.to_region token)#byte_pos buffer;
        token in
  match lexbuf_from_input input with
    Ok (buffer, close) ->
      let () =
        match input with
          File path when path <> "" -> LexerLib.reset ~file:path buffer
        | _ -> () in
      let instance = {
        input; read; buffer; get_win; get_pos; get_last; get_file; close}
      in Ok instance
  | Error _ as e -> e
 end (* of functor [Make] in HEADER *)
 (* END TRAILER *)
 }
--- a/src/passes/1-parser/shared/LexerLib.ml
+++ b/src/passes/1-parser/shared/LexerLib.ml
@ -1,159 +1,6 @@
 (* Sliding window *)
 (* The type [window] models a two-slot buffer of tokens for reporting
   after a parse error. Technically, it is a parametric type, but its
   use is meant for tokens, wherever they are defined.
   In [Two(t1,t2)], the token [t2] is the next to be sent to the
   parser.
   The call [slide token buffer] pushes the token [token] in the
   buffer [buffer]. If the buffer is full, that is, it is
   [Two(t1,t2)], then the token [t2] is discarded to make room for
   [token].
 *)
 type 'a window =
  Nil
 | One of 'a
 | Two of 'a * 'a
 let slide token = function
  Nil -> One token
 | One t | Two (t,_) -> Two (token,t)
 module Region = Simple_utils.Region
 module Pos = Simple_utils.Pos
 type lexeme = string
 (* The signature [TOKEN] exports an abstract type [token], so a lexer
   can be a functor over tokens. This enables to externalise
   version-dependent constraints in any module whose signature matches
   [TOKEN]. Generic functions to construct tokens are required.
   Note the predicate [is_eof], which caracterises the virtual token
   for end-of-file, because it requires special handling.
 *)
 module type TOKEN =
  sig
    type token
    (* Predicates *)
    val is_eof : token -> bool
    (* Projections *)
    val to_lexeme : token -> lexeme
    val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
    val to_region : token -> Region.t
    (* Style *)
    type error
    val error_to_string : error -> string
    exception Error of error Region.reg
    val format_error :
      ?offsets:bool ->
      [`Byte | `Point] ->
      error Region.reg ->
      file:bool ->
      string Region.reg
    val check_right_context :
      token ->
      (Lexing.lexbuf -> (Markup.t list * token) option) ->
      Lexing.lexbuf ->
      unit
  end
 (* The module type for lexers is [S]. It mainly exports the function
   [open_token_stream], which returns
     * a function [read] that extracts tokens from a lexing buffer,
       together with a lexing buffer [buffer] to read from,
     * a function [close] that closes that buffer,
     * a function [get_pos] that returns the current position, and
     * a function [get_last] that returns the region of the last
       recognised token.
     * a function [get_file] that returns the name of the file being
       scanned (empty string if [stdin]).
   Note that a module [Token] is exported too, because the signature
   of the exported functions depend on it.
     The type [window] is a two-token window, that is, a buffer that
   contains the last recognised token, and the penultimate (if any).
     The call [read ~log] evaluates in a lexer (also known as a
   tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
   suitable for a parser generated by Menhir. The argument labelled
   [log] is a logger, that is, it may print a token and its left
   markup to a given channel, at the caller's discretion.
 *)
 module type S =
  sig
    module Token : TOKEN
    type token = Token.token
    type file_path = string
    type logger = Markup.t list -> token -> unit
    type input =
      File    of file_path
    | String  of string
    | Channel of in_channel
    | Buffer  of Lexing.lexbuf
    type instance = {
      input    : input;
      read     : log:logger -> Lexing.lexbuf -> token;
      buffer   : Lexing.lexbuf;
      get_win  : unit -> token window;
      get_pos  : unit -> Pos.t;
      get_last : unit -> Region.t;
      get_file : unit -> file_path;
      close    : unit -> unit
    }
    type open_err = File_opening of string
    val lexbuf_from_input :
      input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
    type line_comment = string (* Opening of a line comment *)
    type block_comment = <opening : string; closing : string>
    val mk_block : opening:string -> closing:string -> block_comment
    val open_token_stream :
      ?line:line_comment ->
      ?block:block_comment ->
      input ->
      (instance, open_err) Stdlib.result
    (* Error reporting *)
    type error
    val error_to_string : error -> string
    exception Error of error Region.reg
    val format_error :
      ?offsets:bool ->
      [`Byte | `Point] ->
      error Region.reg ->
      file:bool ->
      string Region.reg
  end
 (* LEXER ENGINE *)
 (* Resetting file name and line number in the lexing buffer
@ -205,27 +52,20 @@ let rollback buffer =
  buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
 (* Utility types *)
 type file_path = string
 type lexeme = string
 (* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
 (* When scanning structured constructs, like strings and comments, we
   need to keep the region of the opening symbol (like double quote,
   "//" or "(*") in order to report any error more precisely. Since
   ocamllex is byte-oriented, we need to store the parsed bytes as
   characters in an accumulator [acc] and also its length [len], so,
   we are done, it is easy to build the string making up the
   structured construct with [mk_str] (see above).
     The resulting data structure is called a _thread_. (Note for
   Emacs: "*)".)
 *)
 type thread = <
-  opening : Region.t;
+  opening     : Region.t;
-  length  : int;
+  length      : int;
-  acc     : char list;
+  acc         : char list;
-  push_char : char -> thread;
+  to_string   : string;
  push_char   : char -> thread;
  push_string : string -> thread;
  to_string : string;
  set_opening : Region.t -> thread
 >
@ -233,12 +73,12 @@ let mk_thread region lexeme : thread =
  (* The call [explode s a] is the list made by pushing the characters
     in the string [s] on top of [a], in reverse order. For example,
     [explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
  let explode s acc =
    let rec push = function
      0 -> acc
    | i -> s.[i-1] :: push (i-1)
-    in push (String.length s)
+    in push (String.length s) in
  in
  object
    val opening = region
    method opening = opening
@ -268,184 +108,288 @@ let mk_thread region lexeme : thread =
    method to_string =
      let bytes = Bytes.make length ' ' in
      let rec fill i = function
-             [] -> bytes
+          [] -> bytes
-      | char::l -> Bytes.set bytes i char; fill (i-1) l
+        | char::l -> Bytes.set bytes i char; fill (i-1) l
      in fill (length-1) acc |> Bytes.to_string
  end
-type file_path = string
+(* STATE *)
-    (* STATE *)
+(* Scanning the lexing buffer for tokens (and markup, as a
   side-effect).
-    (* Beyond tokens, the result of lexing is a state. The type
+     Because we want the lexer to have access to the right lexical
-       [state] represents the logical state of the lexing engine, that
+   context of a recognised lexeme (to enforce stylistic constraints or
-       is, a value which is threaded during scanning and which denotes
+   report special error patterns), we need to keep a hidden reference
-       useful, high-level information beyond what the type
+   to a queue of recognised lexical units (that is, tokens and markup)
-       [Lexing.lexbuf] in the standard library already provides for
+   that acts as a mutable state between the calls to [read]. When
-       all generic lexers.
+   [read] is called, that queue is examined first and, if it contains
   at least one token, that token is returned; otherwise, the lexing
   buffer is scanned for at least one more new token. That is the
   general principle: we put a high-level buffer (our queue) on top of
   the low-level lexing buffer.
-         Tokens are the smallest units used by the parser to build the
+     One tricky and important detail is that we must make any parser
-       abstract syntax tree. The state includes a queue of recognised
+   generated by Menhir (and calling [read]) believe that the last
-       tokens, with the markup at the left of its lexeme until either
+   region of the input source that was matched indeed corresponds to
-       the start of the file or the end of the previously recognised
+   the returned token, despite that many tokens and markup may have
-       token.
+   been matched since it was actually read from the input. In other
   words, the parser requests a token that is taken from the
   high-level buffer, but the parser requests the source regions from
   the _low-level_ lexing buffer, and they may disagree if more than
   one token has actually been recognised.
-         The markup from the last recognised token or, if the first
+     Consequently, in order to maintain a consistent view for the
-       token has not been recognised yet, from the beginning of the
+   parser, we have to patch some fields of the lexing buffer, namely
-       file is stored in the field [markup] of the state because it is
+   [lex_start_p] and [lex_curr_p], as these fields are read by parsers
-       a side-effect, with respect to the output token list, and we
+   generated by Menhir when querying source positions (regions). This
-       use a record with a single field [units] because that record
+   is the purpose of the function [patch_buffer]. After reading one or
-       may be easily extended during the future maintenance of this
+   more tokens and markup by the scanning rule [scan], we have to save
-       lexer.
+   in the hidden reference [buf_reg] the region of the source that was
   matched by [scan]. This atomic sequence of patching, scanning and
   saving is implemented by the _function_ [scan] (beware: it shadows
   the scanning rule [scan]). The function [patch_buffer] is, of
   course, also called just before returning the token, so the parser
   has a view of the lexing buffer consistent with the token.
-         The state also includes a field [pos] which holds the current
+     Note that an additional reference [first_call] is needed to
-       position in the LIGO source file. The position is not always
+   distinguish the first call to the function [scan], as the first
-       updated after a single character has been matched: that depends
+   scanning rule is actually [init] (which can handle the BOM), not
-       on the regular expression that matched the lexing buffer.
+   [scan].
 *)
-         The field [window] is a two-token window, that is, a buffer
+type 'token window =
-       that contains the last recognised token, and the penultimate
+  Nil
-       (if any).
+| One of 'token
 | Two of 'token * 'token
-         The fields [decoder] and [supply] offer the support needed
+type 'token state = <
-       for the lexing of UTF-8 encoded characters in comments (the
+  units   : (Markup.t list * 'token) FQueue.t;
-       only place where they are allowed in LIGO). The former is the
+  markup  : Markup.t list;
-       decoder proper and the latter is the effectful function
+  window  : 'token window;
-       [supply] that takes a byte, a start index and a length and feed
+  last    : Region.t;
-       it to [decoder]. See the documentation of the third-party
+  pos     : Pos.t;
-       library Uutf.
+  decoder : Uutf.decoder;
-     *)
+  supply  : Bytes.t -> int -> int -> unit;
  block   : EvalOpt.block_comment option;
  line    : EvalOpt.line_comment option;
-    type line_comment  = string (* Opening of a line comment *)
+  enqueue      : 'token -> 'token state;
-    type block_comment = <opening : string; closing : string>
+  set_units    : (Markup.t list * 'token) FQueue.t -> 'token state;
  set_last     : Region.t -> 'token state;
  set_pos      : Pos.t -> 'token state;
  slide_token  : 'token -> 'token state;
-    let mk_block ~opening ~closing : block_comment =
+  sync         : Lexing.lexbuf -> Region.t * lexeme * 'token state;
      object
        method opening = opening
        method closing = closing
      end
-    type 'a state = <
+  push_newline : Lexing.lexbuf -> 'token state;
-      units   : (Markup.t list * 'a) FQueue.t;
+  push_line    : thread -> 'token state;
-      markup  : Markup.t list;
+  push_block   : thread -> 'token state;
-      window  : 'a window;
+  push_space   : Lexing.lexbuf -> 'token state;
-      last    : Region.t;
+  push_tabs    : Lexing.lexbuf -> 'token state;
-      pos     : Pos.t;
+  push_bom     : Lexing.lexbuf -> 'token state;
-      decoder : Uutf.decoder;
+  push_markup  : Markup.t -> 'token state;
-      supply  : Bytes.t -> int -> int -> unit;
+>
      block   : block_comment option;
      line    : line_comment option;
-      enqueue      : 'a -> 'a state;
+let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
-      set_units    : (Markup.t list * 'a) FQueue.t -> 'a state;
+             ?block ?line () : _ state =
-      set_last     : Region.t -> 'a state;
+  object (self)
-      set_pos      : Pos.t -> 'a state;
+    val units      = units
-      slide_token  : 'a -> 'a state;
+    method units   = units
    val markup     = markup
    method markup  = markup
    val window     = window
    method window  = window
    val last       = last
    method last    = last
    val pos        = pos
    method pos     = pos
    method decoder = decoder
    method supply  = supply
    method block   = block
    method line    = line
-      sync         : Lexing.lexbuf -> Region.t * lexeme * 'a state;
+    method enqueue token =
      {< units  = FQueue.enq (markup, token) units;
         markup = [] >}
-      push_newline : Lexing.lexbuf -> 'a state;
+    method set_units units  = {< units = units  >}
-      push_line    : thread -> 'a state;
+    method set_last  region = {< last  = region >}
-      push_block   : thread -> 'a state;
+    method set_pos   pos    = {< pos   = pos    >}
      push_space   : Lexing.lexbuf -> 'a state;
      push_tabs    : Lexing.lexbuf -> 'a state;
      push_bom     : Lexing.lexbuf -> 'a state;
      push_markup  : Markup.t -> 'a state;
    >
-    let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
+    method slide_token token =
-                 ?block ?line () : _ state =
+      match self#window with
-      object (self)
+                      Nil -> {< window = One token     >}
-        val units = units
+      | One t | Two (t,_) -> {< window = Two (token,t) >}
        method units = units
        val markup = markup
        method markup = markup
        val window = window
        method window = window
        val last = last
        method last = last
        val pos = pos
        method pos = pos
        method decoder = decoder
        method supply = supply
        method block = block
        method line = line
-        (* The call [enqueue (token, state)] updates functionally the
+    method sync buffer =
-           state [state] by associating the token [token] with the
+      let lex   = Lexing.lexeme buffer in
-           stored markup and enqueuing the pair into the units
+      let len   = String.length lex in
-           queue. The field [markup] is then reset to the empty
+      let start = pos in
-           list. *)
+      let stop  = start#shift_bytes len in
      let state = {< pos = stop >}
      in Region.make ~start ~stop, lex, state
-        method enqueue token =
+    (* MARKUP *)
          {< units  = FQueue.enq (markup, token) units;
             markup = [] >}
-        method set_units units = {< units = units >}
+    (* Committing markup to the current logical state *)
        method set_last region = {< last = region >}
        method set_pos pos = {< pos = pos >}
-        method slide_token token =
+    method push_markup unit = {< markup = unit :: markup >}
          {< window = slide token window >}
-        (* The call [sync state buffer] updates the current position
+    method push_newline buffer =
-           in accordance with the contents of the lexing buffer, more
+      let ()     = Lexing.new_line buffer in
-           precisely, depending on the length of the string which has
+      let value  = Lexing.lexeme buffer in
-           just been recognised by the scanner: that length is used as
+      let start  = self#pos in
-           a positive offset to the current column. *)
+      let stop   = start#new_line value in
      let region = Region.make ~start ~stop in
      let unit   = Markup.Newline Region.{region; value}
      in {< pos = stop; markup = unit::markup >}
-        method sync buffer =
+    method push_line thread =
-          let lex   = Lexing.lexeme buffer in
+      let start  = thread#opening#start in
-          let len   = String.length lex in
+      let region = Region.make ~start ~stop:self#pos
-          let start = pos in
+      and value  = thread#to_string in
-          let stop  = start#shift_bytes len in
+      let unit   = Markup.LineCom Region.{region; value}
-          let state = {< pos = stop >}
+      in {< markup = unit::markup >}
          in Region.make ~start ~stop, lex, state
-        (* MARKUP *)
+    method push_block thread =
      let start  = thread#opening#start in
      let region = Region.make ~start ~stop:self#pos
      and value  = thread#to_string in
      let unit   = Markup.BlockCom Region.{region; value}
      in {< markup = unit::markup >}
-        (* Committing markup to the current logical state *)
+    method push_space buffer =
      let region, lex, state = self#sync buffer in
      let value  = String.length lex in
      let unit   = Markup.Space Region.{region; value}
      in state#push_markup unit
-        method push_markup unit = {< markup = unit :: markup >}
+    method push_tabs buffer =
      let region, lex, state = self#sync buffer in
      let value  = String.length lex in
      let unit   = Markup.Tabs Region.{region; value}
      in state#push_markup unit
-        method push_newline buffer =
+    method push_bom buffer =
-          let ()     = Lexing.new_line buffer in
+      let region, value, state = self#sync buffer in
-          let value  = Lexing.lexeme buffer in
+      let unit = Markup.BOM Region.{region; value}
-          let start  = self#pos in
+      in state#push_markup unit
-          let stop   = start#new_line value in
+  end
          let region = Region.make ~start ~stop in
          let unit   = Markup.Newline Region.{region; value}
          in {< pos = stop; markup = unit::markup >}
-        method push_line thread =
+(* LEXER INSTANCE *)
          let start  = thread#opening#start in
          let region = Region.make ~start ~stop:self#pos
          and value  = thread#to_string in
          let unit   = Markup.LineCom Region.{region; value}
          in {< markup = unit::markup >}
-        method push_block thread =
+type input =
-          let start  = thread#opening#start in
+  File    of file_path
-          let region = Region.make ~start ~stop:self#pos
+| String  of string
-          and value  = thread#to_string in
+| Channel of in_channel
-          let unit   = Markup.BlockCom Region.{region; value}
+| Buffer  of Lexing.lexbuf
          in {< markup = unit::markup >}
-        method push_space buffer =
+type 'token logger = Markup.t list -> 'token -> unit
          let region, lex, state = self#sync buffer in
          let value  = String.length lex in
          let unit   = Markup.Space Region.{region; value}
          in state#push_markup unit
-        method push_tabs buffer =
+type 'token instance = {
-          let region, lex, state = self#sync buffer in
+  input    : input;
-          let value  = String.length lex in
+  read     : log:('token logger) -> Lexing.lexbuf -> 'token;
-          let unit   = Markup.Tabs Region.{region; value}
+  buffer   : Lexing.lexbuf;
-          in state#push_markup unit
+  get_win  : unit -> 'token window;
  get_pos  : unit -> Pos.t;
  get_last : unit -> Region.t;
  get_file : unit -> file_path;
  close    : unit -> unit
 }
-        method push_bom buffer =
+type open_err = File_opening of string
          let region, value, state = self#sync buffer in
          let unit = Markup.BOM Region.{region; value}
          in state#push_markup unit
-      end
+let lexbuf_from_input = function
  String s ->
    Ok (Lexing.from_string s, fun () -> ())
 | Channel chan ->
    let close () = close_in chan in
    Ok (Lexing.from_channel chan, close)
 | Buffer b ->
    Ok (b, fun () -> ())
 | File path ->
    try
      let chan = open_in path in
      let close () = close_in chan in
      let lexbuf = Lexing.from_channel chan in
      let () =
        let open Lexing in
        lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
      in Ok (lexbuf, close)
    with Sys_error msg -> Stdlib.Error (File_opening msg)
 let open_token_stream ?line ?block ~init ~scan
                      ~token_to_region ~style input =
  let file_path  = match input with
                     File path -> path
                   | _ -> "" in
  let        pos = Pos.min ~file:file_path in
  let    buf_reg = ref (pos#byte, pos#byte)
  and first_call = ref true
  and    decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
  let     supply = Uutf.Manual.src decoder in
  let      state = ref (mk_state
                          ~units:FQueue.empty
                          ~last:Region.ghost
                          ~window:Nil
                          ~pos
                          ~markup:[]
                          ~decoder
                          ~supply
                          ?block
                          ?line
                          ()) in
  let get_pos  () = !state#pos
  and get_last () = !state#last
  and get_win  () = !state#window
  and get_file () = file_path in
  let patch_buffer (start, stop) buffer =
    let open Lexing in
    let file_path = buffer.lex_curr_p.pos_fname in
    buffer.lex_start_p <- {start with pos_fname = file_path};
    buffer.lex_curr_p  <- {stop  with pos_fname = file_path}
  and save_region buffer =
    buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
  let scan' init scan buffer =
    patch_buffer !buf_reg buffer;
    (if   !first_call
     then (state := init !state buffer; first_call := false)
     else state := scan !state buffer);
    save_region buffer in
  let next_token init scan buffer =
    scan' init scan buffer;
    match FQueue.peek !state#units with
      None -> None
    | Some (units, ext_token) ->
        state := !state#set_units units; Some ext_token in
  let rec read init scan ~token_to_region ~style ~log buffer =
    match FQueue.deq !state#units with
      None ->
        scan' init scan buffer;
        read init scan ~token_to_region ~style ~log buffer
    | Some (units, (left_mark, token)) ->
       log left_mark token;
       state := ((!state#set_units units)
                   #set_last (token_to_region token))
                  #slide_token token;
       style token (next_token init scan) buffer;
       patch_buffer (token_to_region token)#byte_pos buffer;
       token in
  match lexbuf_from_input input with
    Ok (buffer, close) ->
      let () =
        match input with
          File path when path <> "" -> reset ~file:path buffer
        | _ -> () in
      let instance = {
        read = read init scan ~token_to_region ~style;
        input; buffer; get_win; get_pos; get_last; get_file; close}
      in Ok instance
  | Error _ as e -> e
--- a/src/passes/1-parser/shared/LexerLib.mli
+++ b/src/passes/1-parser/shared/LexerLib.mli
@ -0,0 +1,205 @@
 (* A library for writing UTF8-aware lexers *)
 module Region = Simple_utils.Region
 module Pos = Simple_utils.Pos
 (* The function [rollback] resets the lexing buffer to the state it
   was when it matched the last regular expression. This function is
   safe to use only in the semantic action of the rule which last
   matched. *)
 val rollback : Lexing.lexbuf -> unit
 (* Utility types *)
 type file_path = string
 type lexeme = string
 (* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
 (* When scanning structured constructs, like strings and comments, we
   need to keep the region of the opening symbol (like double quote,
   "//" or "(*") in order to report any error more precisely. Since
   ocamllex is byte-oriented, we need to store the parsed bytes as
   characters in an accumulator [acc] and also its length [len], so,
   we are done, it is easy to build the string making up the
   structured construct with [mk_str] (see above).
   The resulting data structure is called a _thread_. (Note for
   Emacs: "*)".)
 *)
 type thread = <
  opening     : Region.t;
  length      : int;
  acc         : char list;
  to_string   : string;
  push_char   : char -> thread;
  push_string : string -> thread;
  set_opening : Region.t -> thread
 >
 val mk_thread : Region.t -> lexeme -> thread
 (* STATE *)
 (* Beyond producing tokens, the result of lexing is a _state_. The
   type [state] represents the abstract logical state of the lexing
   engine, that is, a value which is threaded during scanning and
   which denotes useful, high-level information beyond what the type
   [Lexing.lexbuf] in the standard library already provides for all
   generic lexers. We qualify it as "logical state" because the lexing
   buffer itself has a "physical state" defined by the type
   [Lexing.lexbuf].
     Tokens are the smallest units used by the parser to build the
   abstract syntax tree. The state includes a queue of recognised
   tokens, with the markup at the left of its lexeme until either the
   start of the file or the end of the previously recognised token.
     The markup from the last recognised token or, if the first token
   has not been recognised yet, from the beginning of the file is
   stored in the field [markup] of the state because it is a
   side-effect, with respect to the output token list, and we use a
   record with a single field [units] because that record may be
   easily extended during the future maintenance of this lexer.
     The state also includes a field [pos] which holds the current
   position in the LIGO source file. The position is not always
   updated after a single character has been matched: that depends on
   the regular expression that matched the lexing buffer.
     The field [window] is a two-token window, that is, a buffer that
   contains the last recognised token, and the penultimate (if any).
   Technically, it is a parametric type, but its use is meant for
   tokens, wherever they are defined.  In [Two (t1,t2)], and in case
   of a syntax error, [t1] is the first invalid token and [t2] is the
   last valid one.
     The fields [decoder] and [supply] offer the support needed for
   the lexing of UTF-8 encoded characters in comments (the only place
   where they are allowed in LIGO). The former is the decoder proper
   and the latter is the effectful function [supply] that takes a
   byte, a start index and a length and feed it to [decoder]. See the
   documentation of the third-party library Uutf.
   Some methods are now documented.
     The call [state#enqueue token] updates functionally the state
   [state] by associating the token [token] with the stored markup and
   enqueuing the pair into the units queue. The field [markup] is then
   reset to the empty list.
     The call [state#slide_token token] pushes the token [token] in
   the buffer [buffer]. If the buffer is full, that is, it is [Two
   (t1,t2)], then the token [t2] is discarded to make room for
   [token].
     The call [state#sync buffer] updates the current position in
   accordance with the contents of the lexing buffer, more precisely,
   depending on the length of the string which has just been
   recognised by the scanner: that length is used as a positive offset
   to the current column.
 *)
 type 'token window =
  Nil
 | One of 'token
 | Two of 'token * 'token
 type 'token state = <
  units   : (Markup.t list * 'token) FQueue.t;
  markup  : Markup.t list;
  window  : 'token window;
  last    : Region.t;
  pos     : Pos.t;
  decoder : Uutf.decoder;
  supply  : Bytes.t -> int -> int -> unit;
  block   : EvalOpt.block_comment option;
  line    : EvalOpt.line_comment option;
  enqueue      : 'token -> 'token state;
  set_units    : (Markup.t list * 'token) FQueue.t -> 'token state;
  set_last     : Region.t -> 'token state;
  set_pos      : Pos.t -> 'token state;
  slide_token  : 'token -> 'token state;
  sync         : Lexing.lexbuf -> Region.t * lexeme * 'token state;
  push_newline : Lexing.lexbuf -> 'token state;
  push_line    : thread -> 'token state;
  push_block   : thread -> 'token state;
  push_space   : Lexing.lexbuf -> 'token state;
  push_tabs    : Lexing.lexbuf -> 'token state;
  push_bom     : Lexing.lexbuf -> 'token state;
  push_markup  : Markup.t -> 'token state;
 >
 (* LEXER INSTANCE *)
 (* The function [open_token_stream] returns a lexer instance made of
     * the input [input] of type [input];
     * a function [read] that extracts tokens from a lexing buffer,
       together with a lexing buffer [buffer] to read from,
     * a function [close] that closes that buffer,
     * a function [get_pos] that returns the current position, and
     * a function [get_last] that returns the region of the last
       recognised token.
     * a function [get_file] that returns the name of the file being
       scanned (empty string if [stdin]).
   Note that a module [Token] is exported too, because the signature
   of the exported functions depend on it.
     The type [window] is a two-token window, that is, a buffer that
   contains the last recognised token, and the penultimate (if any).
     The call [read ?line ?block ~init ~scan ~token_to_region ~style
   input] evaluates in a lexer (also known as a tokeniser or scanner)
   whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
   suitable for a parser generated by Menhir. The argument labelled
   [log] is a logger, that is, it may print a token and its left
   markup to a given channel, at the caller's discretion. The argument
   labelled [~init] is the scanner to be called first, usually for
   reading the BOM, then [scan] is used for the following calls. The
   function labelled [~style] is used to check stylistic constraints
   on the tokens and the markup between them.
 *)
 type input =
  File    of file_path
 | String  of string
 | Channel of in_channel
 | Buffer  of Lexing.lexbuf
 type 'token logger = Markup.t list -> 'token -> unit
 type 'token instance = {
  input    : input;
  read     : log:('token logger) -> Lexing.lexbuf -> 'token;
  buffer   : Lexing.lexbuf;
  get_win  : unit -> 'token window;
  get_pos  : unit -> Pos.t;
  get_last : unit -> Region.t;
  get_file : unit -> file_path;
  close    : unit -> unit
 }
 type open_err = File_opening of string
 val lexbuf_from_input :
  input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
 val open_token_stream :
  ?line:EvalOpt.line_comment ->
  ?block:EvalOpt.block_comment ->
  init:('token state -> Lexing.lexbuf -> 'token state) ->
  scan:('token state -> Lexing.lexbuf -> 'token state) ->
  token_to_region:('token -> Region.t) ->
  style:('token ->
         (Lexing.lexbuf -> (Markup.t list * 'token) option) ->
         Lexing.lexbuf ->
         unit) ->
  input ->
  ('token instance, open_err) Stdlib.result
--- a/src/passes/1-parser/shared/LexerLog.ml
+++ b/src/passes/1-parser/shared/LexerLog.ml
@ -4,7 +4,8 @@ module Region = Simple_utils.Region
 module type S =
  sig
-    module Lexer : LexerLib.S
+    module Lexer : Lexer.S
    type token = Lexer.token
    val output_token :
      ?offsets:bool ->
@ -12,7 +13,7 @@ module type S =
      EvalOpt.command ->
      out_channel ->
      Markup.t list ->
-      Lexer.token ->
+      token ->
      unit
    type file_path = string
@ -22,19 +23,26 @@ module type S =
      [`Byte | `Point] ->
      ?block:EvalOpt.block_comment ->
      ?line:EvalOpt.line_comment ->
-      Lexer.input ->
+      token_to_region:(token -> Region.t) ->
      style:(token ->
             (Lexing.lexbuf -> (Markup.t list * token) option) ->
             Lexing.lexbuf ->
             unit) ->
      LexerLib.input ->
      EvalOpt.command ->
      (unit, string Region.reg) Stdlib.result
  end
-module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
+module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
  struct
    module Lexer = Lexer
    module Token = Lexer.Token
    type token = Lexer.token
-    (** Pretty-printing in a string the lexemes making up the markup
+    (* Pretty-printing in a string the lexemes making up the markup
       between two tokens, concatenated with the last lexeme
       itself. *)
    let output_token ?(offsets=true) mode command
                     channel left_mark token : unit =
      let output    str = Printf.fprintf channel "%s%!" str in
@ -57,10 +65,17 @@ module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
    type file_path = string
-    let trace ?(offsets=true) mode ?block ?line input command :
+    let trace ?(offsets=true) mode ?block ?line
              ~token_to_region ~style input command :
          (unit, string Region.reg) Stdlib.result =
-      match Lexer.open_token_stream ?line ?block input with
+      match LexerLib.open_token_stream
-        Ok Lexer.{read; buffer; close; _} ->
+              ~init:Lexer.init
              ~scan:Lexer.scan
              ~token_to_region
              ~style
              ?line ?block input
      with
        Ok LexerLib.{read; buffer; close; _} ->
          let log = output_token ~offsets mode command stdout
          and close_all () = flush_all (); close () in
          let rec iter () =
@ -69,12 +84,17 @@ module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
                if   Token.is_eof token
                then Stdlib.Ok ()
                else iter ()
            | exception Lexer.Token.Error error ->
                let msg =
                  Lexer.Token.format_error
                    ~offsets mode ~file:true error
                in Stdlib.Error msg
            | exception Lexer.Error error ->
                let msg =
                  Lexer.format_error ~offsets mode ~file:true error
                in Stdlib.Error msg in
            let result = iter ()
            in close_all (); result
-        | Stdlib.Error (Lexer.File_opening msg) ->
+        | Stdlib.Error (LexerLib.File_opening msg) ->
            flush_all (); Stdlib.Error (Region.wrap_ghost msg)
  end
--- a/src/passes/1-parser/shared/LexerLog.mli
+++ b/src/passes/1-parser/shared/LexerLog.mli
@ -2,7 +2,8 @@ module Region = Simple_utils.Region
 module type S =
  sig
-    module Lexer : LexerLib.S
+    module Lexer : Lexer.S
    type token = Lexer.token
    val output_token :
      ?offsets:bool ->
@ -10,7 +11,7 @@ module type S =
      EvalOpt.command ->
      out_channel ->
      Markup.t list ->
-      Lexer.token ->
+      token ->
      unit
    type file_path = string
@ -20,9 +21,14 @@ module type S =
      [`Byte | `Point] ->
      ?block:EvalOpt.block_comment ->
      ?line:EvalOpt.line_comment ->
-      Lexer.input ->
+      token_to_region:(token -> Region.t) ->
      style:(token ->
             (Lexing.lexbuf -> (Markup.t list * token) option) ->
             Lexing.lexbuf ->
             unit) ->
      LexerLib.input ->
      EvalOpt.command ->
      (unit, string Region.reg) Stdlib.result
  end
-module Make (Lexer: LexerLib.S) : S with module Lexer = Lexer
+module Make (Lexer: Lexer.S) : S with module Lexer = Lexer
--- a/src/passes/1-parser/shared/LexerUnit.ml
+++ b/src/passes/1-parser/shared/LexerUnit.ml
@ -9,7 +9,7 @@ module type IO =
    val options : EvalOpt.options (* CLI options *)
  end
-module Make (IO: IO) (Lexer: LexerLib.S) =
+module Make (IO: IO) (Lexer: Lexer.S) =
  struct
    (* Error printing and exception tracing *)
@ -39,11 +39,16 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
        | Stdlib.Ok pp_buffer ->
           (* Running the lexer on the preprocessed input *)
-            let source = Lexer.String (Buffer.contents pp_buffer) in
+            let source = LexerLib.String (Buffer.contents pp_buffer) in
-            match Lexer.open_token_stream ?line:IO.options#line
+            match LexerLib.open_token_stream
-                                          ?block:IO.options#block
+                    ?line:IO.options#line
-                                          source with
+                    ?block:IO.options#block
-              Ok Lexer.{read; buffer; close; _} ->
+                    ~init:Lexer.init
                    ~scan:Lexer.scan
                    ~token_to_region:Lexer.Token.to_region
                    ~style:Lexer.Token.check_right_context
                    source with
              Ok LexerLib.{read; buffer; close; _} ->
                let close_all () = flush_all (); close () in
                let rec read_tokens tokens =
                  match read ~log:(fun _ _ -> ()) buffer with
@ -51,9 +56,7 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
                      if   Lexer.Token.is_eof token
                      then Stdlib.Ok (List.rev tokens)
                      else read_tokens (token::tokens)
-(*                  | exception _ ->
+                  | exception Lexer.Token.Error error ->
                              Printf.eprintf "Here\n%!"; exit 1
 *)                  | exception Lexer.Token.Error error ->
                      let file =
                        match IO.options#input with
                          None | Some "-" -> false
@ -74,7 +77,7 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
                      in Stdlib.Error msg in
                let result = read_tokens []
                in close_all (); result
-            | Stdlib.Error (Lexer.File_opening msg) ->
+            | Stdlib.Error (LexerLib.File_opening msg) ->
                flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
      match IO.options#input with
        None -> preproc stdin
@ -115,7 +118,9 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
                           IO.options#mode
                           ?block:IO.options#block
                           ?line:IO.options#line
-                           (Lexer.String preproc_str)
+                           ~token_to_region:Lexer.Token.to_region
                           ~style:Lexer.Token.check_right_context
                           (LexerLib.String preproc_str)
                           IO.options#cmd
      in match IO.options#input with
           None -> preproc stdin
--- a/src/passes/1-parser/shared/LexerUnit.mli
+++ b/src/passes/1-parser/shared/LexerUnit.mli
@ -7,7 +7,7 @@ module type IO =
    val options : EvalOpt.options (* CLI options *)
  end
-module Make (IO: IO) (Lexer: LexerLib.S) :
+module Make (IO: IO) (Lexer: Lexer.S) :
  sig
    val scan  : unit -> (Lexer.token list, string Region.reg) Stdlib.result
    val trace : unit -> (unit, string Region.reg) Stdlib.result
--- a/src/passes/1-parser/shared/ParserAPI.ml
+++ b/src/passes/1-parser/shared/ParserAPI.ml
@ -56,8 +56,8 @@ module type PARSER =
 (* Main functor *)
 module Make (IO: IO)
-            (Lexer: LexerLib.S)
+            (Lexer: Lexer.S)
-            (Parser: PARSER with type token = Lexer.Token.token)
+            (Parser: PARSER with type token = Lexer.token)
            (ParErr: sig val message : int -> string end) =
  struct
    module I = Parser.MenhirInterpreter
@ -143,14 +143,14 @@ module Make (IO: IO)
                   ~offsets:IO.options#offsets
                   IO.options#mode IO.options#cmd stdout
-    let incr_contract Lexer.{read; buffer; get_win; close; _} =
+    let incr_contract LexerLib.{read; buffer; get_win; close; _} =
      let supplier  = I.lexer_lexbuf_to_supplier (read ~log) buffer
      and failure   = failure get_win in
      let parser    = Incr.contract buffer.Lexing.lex_curr_p in
      let ast       = I.loop_handle success failure supplier parser
      in flush_all (); close (); ast
-    let incr_expr Lexer.{read; buffer; get_win; close; _} =
+    let incr_expr LexerLib.{read; buffer; get_win; close; _} =
      let supplier   = I.lexer_lexbuf_to_supplier (read ~log) buffer
      and failure    = failure get_win in
      let parser     = Incr.interactive_expr buffer.Lexing.lex_curr_p in
--- a/src/passes/1-parser/shared/ParserAPI.mli
+++ b/src/passes/1-parser/shared/ParserAPI.mli
@ -55,8 +55,8 @@ module type PARSER =
  end
 module Make (IO: IO)
-            (Lexer: LexerLib.S)
+            (Lexer: Lexer.S)
-            (Parser: PARSER with type token = Lexer.Token.token)
+            (Parser: PARSER with type token = Lexer.token)
            (ParErr: sig val message : int -> string end) :
  sig
    (* WARNING: The following parsers may all raise [Lexer.Error] *)
@ -78,8 +78,8 @@ module Make (IO: IO)
    exception Point of error
-    val incr_contract : Lexer.instance -> Parser.ast
+    val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
-    val incr_expr     : Lexer.instance -> Parser.expr
+    val incr_expr     : Lexer.token LexerLib.instance -> Parser.expr
    val format_error :
      ?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
--- a/src/passes/1-parser/shared/ParserUnit.ml
+++ b/src/passes/1-parser/shared/ParserUnit.ml
@ -37,7 +37,7 @@ module type Printer =
    val print_expr   : state -> expr -> unit
  end
-module Make (Lexer: LexerLib.S)
+module Make (Lexer: Lexer.S)
            (AST: sig type t type expr end)
            (Parser: ParserAPI.PARSER
                     with type ast   = AST.t
@ -89,12 +89,12 @@ module Make (Lexer: LexerLib.S)
        ParserLog.mk_state ~offsets:SubIO.options#offsets
                           ~mode:SubIO.options#mode
                           ~buffer:output in
-      let close () = lexer_inst.Lexer.close () in
+      let close () = lexer_inst.LexerLib.close () in
      let expr =
        try
          if SubIO.options#mono then
-            let tokeniser = lexer_inst.Lexer.read ~log
+            let tokeniser = lexer_inst.LexerLib.read ~log
-            and lexbuf = lexer_inst.Lexer.buffer
+            and lexbuf = lexer_inst.LexerLib.buffer
            in Front.mono_expr tokeniser lexbuf
          else
            Front.incr_expr lexer_inst
@ -124,12 +124,12 @@ module Make (Lexer: LexerLib.S)
        ParserLog.mk_state ~offsets:SubIO.options#offsets
                           ~mode:SubIO.options#mode
                           ~buffer:output in
-      let close () = lexer_inst.Lexer.close () in
+      let close () = lexer_inst.LexerLib.close () in
      let ast =
        try
          if SubIO.options#mono then
-            let tokeniser = lexer_inst.Lexer.read ~log
+            let tokeniser = lexer_inst.LexerLib.read ~log
-            and lexbuf = lexer_inst.Lexer.buffer
+            and lexbuf = lexer_inst.LexerLib.buffer
            in Front.mono_contract tokeniser lexbuf
          else
            Front.incr_contract lexer_inst
@ -163,10 +163,18 @@ module Make (Lexer: LexerLib.S)
      | exception Lexer.Error err ->
          let file =
-            lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
+            lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
-          let error =
+          let error = Lexer.format_error
-            Lexer.format_error ~offsets:SubIO.options#offsets
+                        ~offsets:SubIO.options#offsets
-                               SubIO.options#mode err ~file:(file <> "")
+                        SubIO.options#mode err ~file:(file <> "")
          in Stdlib.Error error
      | exception Lexer.Token.Error err ->
          let file =
            lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
          let error = Lexer.Token.format_error
                        ~offsets:SubIO.options#offsets
                        SubIO.options#mode err ~file:(file <> "")
          in Stdlib.Error error
      (* Incremental API of Menhir *)
@ -181,7 +189,7 @@ module Make (Lexer: LexerLib.S)
      | exception Parser.Error ->
          let invalid, valid_opt =
-            match lexer_inst.Lexer.get_win () with
+            match lexer_inst.LexerLib.get_win () with
              LexerLib.Nil ->
                  assert false (* Safe: There is always at least EOF. *)
            | LexerLib.One invalid -> invalid, None
@ -205,8 +213,8 @@ module Make (Lexer: LexerLib.S)
    (* Parsing a contract *)
    let gen_parser options input parser =
-      match Lexer.lexbuf_from_input input with
+      match LexerLib.lexbuf_from_input input with
-        Stdlib.Error (Lexer.File_opening msg) ->
+        Stdlib.Error (LexerLib.File_opening msg) ->
          Stdlib.Error (Region.wrap_ghost msg)
      | Ok (lexbuf, close) ->
         (* Preprocessing the input source *)
@ -224,48 +232,53 @@ module Make (Lexer: LexerLib.S)
             (* Lexing and parsing the preprocessed input source *)
             let () = close () in
-             let input' = Lexer.String (Buffer.contents buffer) in
+             let input' = LexerLib.String (Buffer.contents buffer) in
-             match Lexer.open_token_stream ?line:options#line
+             match LexerLib.open_token_stream
-                                           ?block:options#block
+                     ~init:Lexer.init
-                                           input'
+                     ~scan:Lexer.scan
                     ~token_to_region:Lexer.Token.to_region
                     ~style:Lexer.Token.check_right_context
                     ?line:options#line
                     ?block:options#block
                     input'
             with
               Ok instance ->
                 let open Lexing in
-                 instance.Lexer.buffer.lex_curr_p <-
+                 instance.LexerLib.buffer.lex_curr_p <-
-                   {instance.Lexer.buffer.lex_curr_p with pos_fname=file};
+                   {instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
                 apply instance parser
-             | Stdlib.Error (Lexer.File_opening msg) ->
+             | Stdlib.Error (LexerLib.File_opening msg) ->
                 Stdlib.Error (Region.wrap_ghost msg)
    (* Parsing a contract in a file *)
    let contract_in_file (source : string) =
      let options = SubIO.make ~input:(Some source) ~expr:false
-      in gen_parser options (Lexer.File source) parse_contract
+      in gen_parser options (LexerLib.File source) parse_contract
    (* Parsing a contract in a string *)
    let contract_in_string (source : string) =
      let options = SubIO.make ~input:None ~expr:false in
-      gen_parser options (Lexer.String source) parse_contract
+      gen_parser options (LexerLib.String source) parse_contract
    (* Parsing a contract in stdin *)
    let contract_in_stdin () =
      let options = SubIO.make ~input:None ~expr:false in
-      gen_parser options (Lexer.Channel stdin) parse_contract
+      gen_parser options (LexerLib.Channel stdin) parse_contract
    (* Parsing an expression in a string *)
    let expr_in_string (source : string) =
      let options = SubIO.make ~input:None ~expr:true in
-      gen_parser options (Lexer.String source) parse_expr
+      gen_parser options (LexerLib.String source) parse_expr
    (* Parsing an expression in stdin *)
    let expr_in_stdin () =
      let options = SubIO.make ~input:None ~expr:true in
-      gen_parser options (Lexer.Channel stdin) parse_expr
+      gen_parser options (LexerLib.Channel stdin) parse_expr
    (* Preprocess only *)
--- a/src/passes/1-parser/shared/ParserUnit.mli
+++ b/src/passes/1-parser/shared/ParserUnit.mli
@ -43,7 +43,7 @@ module type Printer =
 (* Main functor to make the parser *)
-module Make (Lexer : LexerLib.S)
+module Make (Lexer : Lexer.S)
            (AST : sig type t type expr end)
            (Parser : ParserAPI.PARSER
                      with type ast   = AST.t
--- a/src/test/lexer/negative_byte_sequence.ligo
+++ b/src/test/lexer/negative_byte_sequence.ligo
@ -1 +1 @@
-const a: string = -0x222;
+const a: string = - (**) 0x2222
--- a/src/test/lexer/negative_byte_sequence.mligo
+++ b/src/test/lexer/negative_byte_sequence.mligo
@ -1 +1 @@
-let a = -0x222;
+let a = - (**) 0x2222
--- a/src/test/lexer/negative_byte_sequence.religo
+++ b/src/test/lexer/negative_byte_sequence.religo
@ -1 +1 @@
-let a = -0x222;
+let a = - /**/ 0x2222;
--- a/src/test/lexer/reserved_name.ligo
+++ b/src/test/lexer/reserved_name.ligo
@ -1 +0,0 @@
 let arguments = 1;
		`@ -1,2 +0,0 @@`
			`$HOME/git/OCaml-build/Makefile`
			`$HOME/git/OCaml-build/Makefile.cfg`
`@ -1 +1 @@`
	`const a: string = -0x222;`	`const a: string = - (**) 0x2222`