Finished refactoring of lexer.

2020-04-28 19:26:31 +02:00 · 2020-04-28 19:26:31 +02:00 · 005b7fd69b
commit 005b7fd69b
parent 9618a48848
27 changed files with 1174 additions and 1079 deletions
--- a/src/bin/expect_tests/lexer_tests.ml
+++ b/src/bin/expect_tests/lexer_tests.ml
@ -54,7 +54,7 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:

 run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23:
+ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
      Negative byte sequence.
      Hint: Remove the leading minus sign.
       {}
@ -71,7 +71,7 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters

 run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13:
+ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
      Negative byte sequence.
      Hint: Remove the leading minus sign.
       {}
@ -88,7 +88,7 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters

 run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13:
+ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
      Negative byte sequence.
      Hint: Remove the leading minus sign.
       {}
@ -103,6 +103,7 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
 * Check the changelog by running 'ligo changelog'
 |} ];

+(*
 run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
  [%expect {|
 ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
@ -119,6 +120,7 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
 * Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
 * Check the changelog by running 'ligo changelog'
 |} ];
+ *)

 run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
  [%expect {|
@ -204,10 +206,9 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters

 run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
  [%expect {|
-ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2:
+ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
      Unterminated comment.
-      Hint: Close with "*)".
-       {}
+      Hint: Close with "*)". {}


 If you're not sure how to fix this error, you can
--- a/src/passes/1-parser/cameligo/.links
+++ b/src/passes/1-parser/cameligo/.links
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile

 ../shared/Lexer.mli
 ../shared/Lexer.mll
+../shared/LexerLib.ml
 ../shared/EvalOpt.ml
 ../shared/EvalOpt.mli
 ../shared/FQueue.ml
--- a/src/passes/1-parser/cameligo/LexToken.mll
+++ b/src/passes/1-parser/cameligo/LexToken.mll
@ -102,10 +102,35 @@ type t =

 | EOF of Region.t (* End of file *)

+
+(* Projections *)
+
 type token = t

 let proj_token = function
-  ARROW region -> region, "ARROW"
+  (* Literals *)
+
+  String Region.{region; value} ->
+    region, sprintf "Str %s" value
+| Bytes Region.{region; value = s,b} ->
+    region,
+    sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
+| Int Region.{region; value = s,n} ->
+    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
+| Nat Region.{region; value = s,n} ->
+    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
+| Mutez Region.{region; value = s,n} ->
+    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
+| Ident Region.{region; value} ->
+    region, sprintf "Ident %s" value
+| Constr Region.{region; value} ->
+    region, sprintf "Constr %s" value
+| Attr Region.{region; value} ->
+   region, sprintf "Attr \"%s\"" value
+
+  (* Symbols *)
+
+| ARROW    region -> region, "ARROW"
 | CONS     region -> region, "CONS"
 | CAT      region -> region, "CAT"
 | MINUS    region -> region, "MINUS"
@ -132,24 +157,9 @@ let proj_token = function
 | GE       region -> region, "GE"
 | BOOL_OR  region -> region, "BOOL_OR"
 | BOOL_AND region -> region, "BOOL_AND"
-| Ident Region.{region; value} ->
-    region, sprintf "Ident %s" value
-| Constr Region.{region; value} ->
-    region, sprintf "Constr %s" value
-| Int Region.{region; value = s,n} ->
-    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
-| Nat Region.{region; value = s,n} ->
-    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
-| Mutez Region.{region; value = s,n} ->
-    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
-| String Region.{region; value} ->
-    region, sprintf "Str %s" value
-| Bytes Region.{region; value = s,b} ->
-    region,
-    sprintf "Bytes (\"%s\", \"0x%s\")"
-      s (Hex.show b)
-| Attr Region.{region; value} ->
-   region, sprintf "Attr \"%s\"" value
+
+  (* Keywords *)
+
 | Begin region -> region, "Begin"
 | Else  region -> region, "Else"
 | End   region -> region, "End"
@ -168,12 +178,32 @@ let proj_token = function
 | True  region -> region, "True"
 | Type  region -> region, "Type"
 | With  region -> region, "With"
+
+  (* Data *)
+
 | C_None region -> region, "C_None"
 | C_Some region -> region, "C_Some"
+
+  (* Virtual tokens *)
+
 | EOF region -> region, "EOF"

+
 let to_lexeme = function
-  ARROW _    -> "->"
+  (* Literals *)
+
+  String s   -> String.escaped s.Region.value
+| Bytes b    -> fst b.Region.value
+| Int i
+| Nat i
+| Mutez i    -> fst i.Region.value
+| Ident id   -> id.Region.value
+| Constr id  -> id.Region.value
+| Attr a     -> a.Region.value
+
+  (* Symbols *)
+
+| ARROW    _ -> "->"
 | CONS     _ -> "::"
 | CAT      _ -> "^"
 | MINUS    _ -> "-"
@ -201,14 +231,7 @@ let to_lexeme = function
 | BOOL_OR  _ -> "||"
 | BOOL_AND _ -> "&&"

-| Ident id   -> id.Region.value
-| Constr id  -> id.Region.value
-| Int i
-| Nat i
-| Mutez i   -> fst i.Region.value
-| String s  -> String.escaped s.Region.value
-| Bytes b   -> fst b.Region.value
-| Attr a    -> a.Region.value
+  (* Keywords *)

 | Begin _ -> "begin"
 | Else  _ -> "else"
@ -229,11 +252,17 @@ let to_lexeme = function
 | Then  _ -> "then"
 | With  _ -> "with"

+(* Data constructors *)
+
 | C_None  _ -> "None"
 | C_Some  _ -> "Some"

+(* Virtual tokens *)
+
 | EOF _ -> ""

+(* CONVERSIONS *)
+
 let to_string token ?(offsets=true) mode =
  let region, val_str = proj_token token in
  let reg_str = region#compact ~offsets mode
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =

 let to_region token = proj_token token |> fst

-(* Injections *)
-
-type int_err = Non_canonical_zero
-
 (* LEXIS *)

 let keywords = [
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
  let value = lexeme, `Hex norm
  in Bytes Region.{region; value}

+type int_err = Non_canonical_zero
+
 let mk_int lexeme region =
  let z =
    Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
@ -398,16 +425,14 @@ type nat_err =

 let mk_nat lexeme region =
  match (String.index_opt lexeme 'n') with
-  | None -> Error Invalid_natural
-  | Some _ -> (
-    let z =
+      None -> Error Invalid_natural
+  | Some _ -> let z =
               Str.(global_replace (regexp "_") "" lexeme) |>
                 Str.(global_replace (regexp "n") "") |>
                 Z.of_string in
             if   Z.equal z Z.zero && lexeme <> "0n"
             then Error Non_canonical_zero_nat
             else Ok (Nat Region.{region; value = lexeme,z})
-  )

 let mk_mutez lexeme region =
  let z =
@ -422,8 +447,6 @@ let eof region = EOF region

 type sym_err = Invalid_symbol

-type attr_err = Invalid_attribute
-
 let mk_sym lexeme region =
  match lexeme with
  (* Lexemes in common with all concrete syntaxes *)
@ -473,9 +496,10 @@ let mk_constr lexeme region =

 (* Attributes *)

+type attr_err = Invalid_attribute
+
 let mk_attr header lexeme region =
-  if header = "[@" then
-    Error Invalid_attribute
+  if header = "[@" then Error Invalid_attribute
  else Ok (Attr Region.{value=lexeme; region})

 (* Predicates *)
@ -485,12 +509,14 @@ let is_bytes  = function Bytes _ -> true | _ -> false
 let is_int    = function Int    _ -> true | _ -> false
 let is_ident  = function Ident  _ -> true | _ -> false
 let is_eof    = function EOF    _ -> true | _ -> false
+let is_minus  = function MINUS  _ -> true | _ -> false

 (* Errors *)

 type error =
  Odd_lengthed_bytes
 | Missing_break
+| Negative_byte_sequence

 let error_to_string = function
  Odd_lengthed_bytes ->
@ -499,6 +525,9 @@ let error_to_string = function
 | Missing_break ->
    "Missing break.\n\
     Hint: Insert some space."
+| Negative_byte_sequence ->
+   "Negative byte sequence.\n\
+    Hint: Remove the leading minus sign."

 exception Error of error Region.reg

@ -511,29 +540,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
 let fail region value = raise (Error Region.{region; value})

 let check_right_context token next_token buffer : unit =
-  if not (is_eof token) then
-    if is_int token || is_bytes token then
-      match next_token buffer with
-        Some ([], next) ->
  let pos    = (to_region token)#stop in
  let region = Region.make ~start:pos ~stop:pos in
-          if is_int next then
-            fail region Odd_lengthed_bytes
-          else
-            if is_ident next || is_string next || is_bytes next then
-              fail region Missing_break
-      | Some (_::_, _) | None -> ()
-    else
-      if is_ident token || is_string token then
  match next_token buffer with
-          Some ([], next) ->
-          if is_ident next || is_string next
-             || is_bytes next || is_int next
-          then
-            let pos    = (to_region token)#stop in
-            let region = Region.make ~start:pos ~stop:pos
-            in fail region Missing_break
-        | Some (_::_, _) | None -> ()
+    None -> ()
+  | Some (markup, next) ->
+      if   is_minus token && is_bytes next
+      then let region =
+             Region.cover (to_region token) (to_region next)
+           in fail region Negative_byte_sequence
+      else
+        match markup with
+          [] ->
+            if   is_int token
+            then if   is_string next || is_ident next
+                 then fail region Missing_break
+                 else ()
+            else
+              if   is_string token
+              then if   is_int next || is_bytes next || is_ident next
+                   then fail region Missing_break
+                   else ()
+              else
+                if   is_bytes token
+                then if is_string next || is_ident next
+                     then fail region Missing_break
+                     else if   is_int next
+                          then fail region Odd_lengthed_bytes
+                          else ()
+                else ()
+        | _::_ -> ()

 (* END TRAILER *)
 }
--- a/src/passes/1-parser/pascaligo/.links
+++ b/src/passes/1-parser/pascaligo/.links
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile

 ../shared/Lexer.mli
 ../shared/Lexer.mll
+../shared/LexerLib.mli
 ../shared/LexerLib.ml
 ../shared/EvalOpt.ml
 ../shared/EvalOpt.mli
--- a/src/passes/1-parser/pascaligo/LexToken.mli
+++ b/src/passes/1-parser/pascaligo/LexToken.mli
@ -1,4 +1,4 @@
-(** This signature defines the lexical tokens for LIGO
+(* This signature defines the lexical tokens for LIGO

   _Tokens_ are the abstract units which are used by the parser to
   build the abstract syntax tree (AST), in other words, the stream of
--- a/src/passes/1-parser/pascaligo/LexToken.mll
+++ b/src/passes/1-parser/pascaligo/LexToken.mll
@ -5,24 +5,14 @@

 (* Shorthands *)

-type lexeme = string
-
-let sprintf = Printf.sprintf
-
 module Region = Simple_utils.Region
 module Pos    = Simple_utils.Pos
 module SMap   = Map.Make (String)
 module SSet   = Set.Make (String)

-(* Hack to roll back one lexeme in the current semantic action *)
-(*
-let rollback buffer =
-  let open Lexing in
-  let len = String.length (lexeme buffer) in
-  let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
-  buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
-  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
-*)
+type lexeme = string
+
+let sprintf = Printf.sprintf

 (* TOKENS *)

@ -123,6 +113,8 @@ type t =
 | EOF of Region.t


+(* Projections *)
+
 type token = t

 let proj_token = function
@ -130,32 +122,20 @@ let proj_token = function

  String Region.{region; value} ->
    region, sprintf "String %s" value
-
 | Bytes Region.{region; value = s,b} ->
    region,
-    sprintf "Bytes (\"%s\", \"0x%s\")"
-      s (Hex.show b)
-
+    sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
 | Int Region.{region; value = s,n} ->
    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
-
 | Nat Region.{region; value = s,n} ->
    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
-
 | Mutez Region.{region; value = s,n} ->
    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
-
 | Ident Region.{region; value} ->
    region, sprintf "Ident \"%s\"" value
-
 | Constr Region.{region; value} ->
    region, sprintf "Constr \"%s\"" value

-(*
-| Attr {header; string={region; value}} ->
-    region, sprintf "Attr (\"%s\",\"%s\")" header value
- *)
-
  (* Symbols *)

 | SEMI     region -> region, "SEMI"
@ -382,9 +362,7 @@ let keywords = [
  (fun reg -> With       reg)
 ]

-let reserved =
-  let open SSet in
-  empty |> add "arguments"
+let reserved = SSet.empty

 let constructors = [
  (fun reg -> False  reg);
@ -485,8 +463,7 @@ type nat_err =
 let mk_nat lexeme region =
  match String.index_opt lexeme 'n' with
      None -> Error Invalid_natural
-  | Some _ ->
-      let z =
+  | Some _ -> let z =
                Str.(global_replace (regexp "_") "" lexeme) |>
                  Str.(global_replace (regexp "n") "") |>
                  Z.of_string in
@ -495,8 +472,7 @@ let mk_nat lexeme region =
             else Ok (Nat Region.{region; value = lexeme,z})

 let mk_mutez lexeme region =
-  let z =
-    Str.(global_replace (regexp "_") "" lexeme) |>
+  let z = Str.(global_replace (regexp "_") "" lexeme) |>
            Str.(global_replace (regexp "mutez") "") |>
            Z.of_string in
  if   Z.equal z Z.zero && lexeme <> "0mutez"
@ -557,8 +533,7 @@ let mk_constr lexeme region =

 type attr_err = Invalid_attribute

-let mk_attr _header _string _region =
-  Error Invalid_attribute
+let mk_attr _ _ _ = Error Invalid_attribute

 (* Predicates *)

@ -567,12 +542,14 @@ let is_bytes  = function Bytes _ -> true | _ -> false
 let is_int    = function Int _    -> true | _ -> false
 let is_ident  = function Ident _  -> true | _ -> false
 let is_eof    = function EOF _    -> true | _ -> false
+let is_minus  = function MINUS _  -> true | _ -> false

 (* Errors *)

 type error =
  Odd_lengthed_bytes
 | Missing_break
+| Negative_byte_sequence

 let error_to_string = function
  Odd_lengthed_bytes ->
@ -581,6 +558,9 @@ let error_to_string = function
 | Missing_break ->
    "Missing break.\n\
     Hint: Insert some space."
+| Negative_byte_sequence ->
+   "Negative byte sequence.\n\
+    Hint: Remove the leading minus sign."

 exception Error of error Region.reg

@ -593,29 +573,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
 let fail region value = raise (Error Region.{region; value})

 let check_right_context token next_token buffer : unit =
-  if not (is_eof token) then
-    if is_int token || is_bytes token then
-      match next_token buffer with
-        Some ([], next) ->
  let pos    = (to_region token)#stop in
  let region = Region.make ~start:pos ~stop:pos in
-          if is_int next then
-            fail region Odd_lengthed_bytes
-          else
-            if is_ident next || is_string next || is_bytes next then
-              fail region Missing_break
-      | Some (_::_, _) | None -> ()
-    else
-      if is_ident token || is_string token then
  match next_token buffer with
-          Some ([], next) ->
-          if is_ident next || is_string next
-             || is_bytes next || is_int next
-          then
-            let pos    = (to_region token)#stop in
-            let region = Region.make ~start:pos ~stop:pos
-            in fail region Missing_break
-        | Some (_::_, _) | None -> ()
+    None -> ()
+  | Some (markup, next) ->
+      if   is_minus token && is_bytes next
+      then let region =
+             Region.cover (to_region token) (to_region next)
+           in fail region Negative_byte_sequence
+      else
+        match markup with
+          [] ->
+            if   is_int token
+            then if   is_string next || is_ident next
+                 then fail region Missing_break
+                 else ()
+            else
+              if   is_string token
+              then if   is_int next || is_bytes next || is_ident next
+                   then fail region Missing_break
+                   else ()
+              else
+                if   is_bytes token
+                then if is_string next || is_ident next
+                     then fail region Missing_break
+                     else if   is_int next
+                          then fail region Odd_lengthed_bytes
+                          else ()
+                else ()
+        | _::_ -> ()

 (* END TRAILER *)
 }
--- a/src/passes/1-parser/reasonligo/.links
+++ b/src/passes/1-parser/reasonligo/.links
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile

 ../shared/Lexer.mli
 ../shared/Lexer.mll
+../shared/LexerLib.ml
 ../shared/EvalOpt.ml
 ../shared/EvalOpt.mli
 ../shared/FQueue.ml
--- a/src/passes/1-parser/reasonligo/LexToken.mli
+++ b/src/passes/1-parser/reasonligo/LexToken.mli
@ -57,6 +57,7 @@ type t =
 | COLON    of Region.t (* ":"   *)
 | DOT      of Region.t (* "."   *)
 | ELLIPSIS of Region.t (* "..." *)
+| ARROW    of Region.t (* "=>"  *)

  (* Wildcard *)

@ -69,10 +70,10 @@ type t =
 | NE   of Region.t (* "!=" *)
 | LT   of Region.t (* "<"  *)
 | GT   of Region.t (* ">"  *)
-| LE of Region.t      (* "=<" *)
+| LE   of Region.t (* "<=" *)
 | GE   of Region.t (* ">=" *)

-| ARROW of Region.t   (* "=>" *)
+  (* Logic *)

 | BOOL_OR  of Region.t (* "||" *)
 | BOOL_AND of Region.t (* "&&" *)
@ -95,10 +96,10 @@ type t =
 | False  of Region.t
 | If     of Region.t
 | Let    of Region.t
-| Rec of Region.t
-| Switch of Region.t
 | Mod    of Region.t
 | Or     of Region.t
+| Rec    of Region.t
+| Switch of Region.t
 | True   of Region.t
 | Type   of Region.t

@ -111,8 +112,6 @@ type t =

 | EOF of Region.t (* End of file *)

-type token = t
-
 (* Projections

   The difference between extracting the lexeme and a string from a
@ -121,6 +120,8 @@ type token = t
   lexeme (concrete syntax).
 *)

+type token = t
+
 val to_lexeme : token -> lexeme
 val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
 val to_region : token -> Region.t
--- a/src/passes/1-parser/reasonligo/LexToken.mll
+++ b/src/passes/1-parser/reasonligo/LexToken.mll
@ -1,15 +1,17 @@
 {
 (* START OF HEADER *)

-type lexeme = string
-
-let sprintf = Printf.sprintf
+(* Shorthands *)

 module Region = Simple_utils.Region
 module Pos    = Simple_utils.Pos
 module SMap   = Utils.String.Map
 module SSet   = Utils.String.Set

+type lexeme = string
+
+let sprintf = Printf.sprintf
+
 (* TOKENS *)

 type t =
@ -41,6 +43,7 @@ type t =
 | COLON    of Region.t (* ":"   *)
 | DOT      of Region.t (* "."   *)
 | ELLIPSIS of Region.t (* "..." *)
+| ARROW    of Region.t (* "=>"  *)

  (* Wildcard *)

@ -55,11 +58,11 @@ type t =
 | GT   of Region.t (* ">"  *)
 | LE   of Region.t (* "<=" *)
 | GE   of Region.t (* ">=" *)
-| ARROW of Region.t     (* "=>" *)
+
+  (* Logic *)

 | BOOL_OR  of Region.t (* "||" *)
 | BOOL_AND of Region.t (* "&&" *)
-
 | NOT      of Region.t (* ! *)

  (* Identifiers, labels, numbers and strings *)
@ -75,17 +78,17 @@ type t =

  (* Keywords *)

-(*| And*)
 | Else   of Region.t
 | False  of Region.t
 | If     of Region.t
 | Let    of Region.t
-| Rec of Region.t
-| Switch of Region.t
 | Mod    of Region.t
 | Or     of Region.t
+| Rec    of Region.t
+| Switch of Region.t
 | True   of Region.t
 | Type   of Region.t
+
  (* Data constructors *)

 | C_None  of Region.t  (* "None"  *)
@ -96,10 +99,32 @@ type t =
 | EOF of Region.t (* End of file *)


+(* Projections *)
+
 type token = t

 let proj_token = function
-  CAT region -> region, "CAT"
+  (* Literals *)
+
+  String Region.{region; value} ->
+    region, sprintf "String %s" value
+| Bytes Region.{region; value = s,b} ->
+    region,
+    sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
+| Int Region.{region; value = s,n} ->
+    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
+| Nat Region.{region; value = s,n} ->
+    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
+| Mutez Region.{region; value = s,n} ->
+    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
+| Ident Region.{region; value} ->
+    region, sprintf "Ident %s" value
+| Constr Region.{region; value} ->
+    region, sprintf "Constr %s" value
+
+  (* Symbols *)
+
+| CAT      region -> region, "CAT"
 | MINUS    region -> region, "MINUS"
 | PLUS     region -> region, "PLUS"
 | SLASH    region -> region, "SLASH"
@ -125,24 +150,9 @@ let proj_token = function
 | LE       region -> region, "LE"
 | GE       region -> region, "GE"
 | ARROW    region -> region, "ARROW"
+| NOT      region -> region, "NOT"
 | BOOL_OR  region -> region, "BOOL_OR"
 | BOOL_AND region -> region, "BOOL_AND"
-| Ident Region.{region; value} ->
-    region, sprintf "Ident %s" value
-| Constr Region.{region; value} ->
-    region, sprintf "Constr %s" value
-| Int Region.{region; value = s,n} ->
-    region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
-| Nat Region.{region; value = s,n} ->
-    region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
-| Mutez Region.{region; value = s,n} ->
-    region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
-| String Region.{region; value} ->
-    region, sprintf "String %s" value
-| Bytes Region.{region; value = s,b} ->
-    region,
-    sprintf "Bytes (\"%s\", \"0x%s\")"
-      s (Hex.show b)
 | Else     region -> region, "Else"
 | False    region -> region, "False"
 | If       region -> region, "If"
@ -150,7 +160,6 @@ let proj_token = function
 | Rec      region -> region, "Rec"
 | Switch   region -> region, "Switch"
 | Mod      region -> region, "Mod"
-| NOT region -> region, "!"
 | Or       region -> region, "Or"
 | True     region -> region, "True"
 | Type     region -> region, "Type"
@ -160,7 +169,20 @@ let proj_token = function
 | EOF      region -> region, "EOF"

 let to_lexeme = function
-  CAT _ -> "++"
+  (* Literals *)
+
+  String s   -> s.Region.value
+| Bytes b    -> fst b.Region.value
+| Int i
+| Nat i
+| Mutez i    -> fst i.Region.value
+| Ident  id  -> id.Region.value
+| Constr id  -> id.Region.value
+| Attr a     -> a.Region.value
+
+  (* Symbols *)
+
+| CAT      _ -> "++"
 | MINUS    _ -> "-"
 | PLUS     _ -> "+"
 | SLASH    _ -> "/"
@ -188,29 +210,32 @@ let to_lexeme = function
 | ARROW    _ -> "=>"
 | BOOL_OR  _ -> "||"
 | BOOL_AND _ -> "&&"
-| Ident id -> id.Region.value
-| Constr id -> id.Region.value
-| Int i
-| Nat i
-| Mutez i -> fst i.Region.value
-| String s -> s.Region.value
-| Bytes b -> fst b.Region.value
+| NOT      _ -> "!"
+
+  (* Keywords *)
+
 | Else    _ -> "else"
 | False   _ -> "false"
 | If      _ -> "if"
 | Let     _ -> "let"
-| Rec _ -> "rec"
 | Mod     _ -> "mod"
-| NOT _ -> "!"
 | Or      _ -> "or"
+| Rec     _ -> "rec"
 | Switch  _ -> "switch"
 | True    _ -> "true"
 | Type    _ -> "type"
+
+(* Data constructors *)
+
 | C_None  _ -> "None"
 | C_Some  _ -> "Some"
-| Attr a -> a.Region.value
+
+(* Virtual tokens *)
+
 | EOF _ -> ""

+(* CONVERSIONS *)
+
 let to_string token ?(offsets=true) mode =
  let region, val_str = proj_token token in
  let reg_str = region#compact ~offsets mode
@ -261,12 +286,9 @@ let reserved =
    |> add "functor"
    |> add "inherit"
    |> add "initializer"
-    (* |> add "land"  - see https://ligo.atlassian.net/browse/LIGO-263 *)
    |> add "lazy"
-    (* |> add "lor"  - see https://ligo.atlassian.net/browse/LIGO-263 *)
    |> add "lsl"
    |> add "lsr"
-    (* |> add "lxor"  - see https://ligo.atlassian.net/browse/LIGO-263 *)
    |> add "match"
    |> add "method"
    |> add "module"
@ -291,7 +313,7 @@ let reserved =

 let constructors = [
  (fun reg -> C_None reg);
-  (fun reg -> C_Some reg);
+  (fun reg -> C_Some reg)
 ]

 let add map (key, value) = SMap.add key value map
@ -376,8 +398,7 @@ let mk_int lexeme region =
  else Ok (Int Region.{region; value = lexeme, z})

 let mk_nat lexeme region =
-  let z =
-    Str.(global_replace (regexp "_") "" lexeme) |>
+  let z = Str.(global_replace (regexp "_") "" lexeme) |>
            Str.(global_replace (regexp "n") "") |>
            Z.of_string in
  if   Z.equal z Z.zero && lexeme <> "0n"
@ -385,8 +406,7 @@ let mk_nat lexeme region =
  else Ok (Nat Region.{region; value = lexeme, z})

 let mk_mutez lexeme region =
-  let z =
-    Str.(global_replace (regexp "_") "" lexeme) |>
+  let z = Str.(global_replace (regexp "_") "" lexeme) |>
            Str.(global_replace (regexp "mutez") "") |>
            Z.of_string in
  if   Z.equal z Z.zero && lexeme <> "0mutez"
@ -465,12 +485,14 @@ let is_bytes  = function Bytes _ -> true | _ -> false
 let is_int    = function Int    _ -> true | _ -> false
 let is_ident  = function Ident  _ -> true | _ -> false
 let is_eof    = function EOF    _ -> true | _ -> false
+let is_minus  = function MINUS  _ -> true | _ -> false

 (* Errors *)

 type error =
  Odd_lengthed_bytes
 | Missing_break
+| Negative_byte_sequence

 let error_to_string = function
  Odd_lengthed_bytes ->
@ -479,6 +501,9 @@ let error_to_string = function
 | Missing_break ->
    "Missing break.\n\
     Hint: Insert some space."
+| Negative_byte_sequence ->
+   "Negative byte sequence.\n\
+    Hint: Remove the leading minus sign."

 exception Error of error Region.reg

@ -491,29 +516,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
 let fail region value = raise (Error Region.{region; value})

 let check_right_context token next_token buffer : unit =
-  if not (is_eof token) then
-    if is_int token || is_bytes token then
-      match next_token buffer with
-        Some ([], next) ->
  let pos    = (to_region token)#stop in
  let region = Region.make ~start:pos ~stop:pos in
-          if is_int next then
-            fail region Odd_lengthed_bytes
-          else
-            if is_ident next || is_string next || is_bytes next then
-              fail region Missing_break
-      | Some (_::_, _) | None -> ()
-    else
-      if is_ident token || is_string token then
  match next_token buffer with
-          Some ([], next) ->
-          if is_ident next || is_string next
-             || is_bytes next || is_int next
-          then
-            let pos    = (to_region token)#stop in
-            let region = Region.make ~start:pos ~stop:pos
-            in fail region Missing_break
-        | Some (_::_, _) | None -> ()
+    None -> ()
+  | Some (markup, next) ->
+      if   is_minus token && is_bytes next
+      then let region =
+             Region.cover (to_region token) (to_region next)
+           in fail region Negative_byte_sequence
+      else
+        match markup with
+          [] ->
+            if   is_int token
+            then if   is_string next || is_ident next
+                 then fail region Missing_break
+                 else ()
+            else
+              if   is_string token
+              then if   is_int next || is_bytes next || is_ident next
+                   then fail region Missing_break
+                   else ()
+              else
+                if   is_bytes token
+                then if is_string next || is_ident next
+                     then fail region Missing_break
+                     else if   is_int next
+                          then fail region Odd_lengthed_bytes
+                          else ()
+                else ()
+        | _::_ -> ()

 (* END TRAILER *)
 }
--- a/src/passes/1-parser/shared/.links
+++ b/src/passes/1-parser/shared/.links
@ -1,2 +0,0 @@
-$HOME/git/OCaml-build/Makefile
-$HOME/git/OCaml-build/Makefile.cfg
--- a/src/passes/1-parser/shared/Lexer.mli
+++ b/src/passes/1-parser/shared/Lexer.mli
@ -38,8 +38,6 @@
 module Region = Simple_utils.Region
 module Pos = Simple_utils.Pos

-type lexeme = string
-
 (* TOKENS *)

 (* The signature [TOKEN] exports an abstract type [token], so a lexer
@ -54,6 +52,8 @@ type lexeme = string
   reading the ocamllex specification for the lexer ([Lexer.mll]).
 *)

+type lexeme = string
+
 module type TOKEN =
  sig
    type token
@ -112,10 +112,36 @@ module type TOKEN =
      unit
  end

+(* The signature of the lexer *)
+
+module type S =
+  sig
+    module Token : TOKEN
+    type token = Token.token
+
+    (* The scanner [init] is meant to be called first to read the
+       BOM. Then [scan] is called. *)
+
+    val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
+    val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
+
+    (* Errors (specific to the generic lexer, not to the tokens) *)
+
+    type error
+
+    val error_to_string : error -> string
+
+    exception Error of error Region.reg
+
+    val format_error :
+      ?offsets:bool -> [`Byte | `Point] ->
+      error Region.reg -> file:bool -> string Region.reg
+  end
+
 (* The functorised interface

   Note that the module parameter [Token] is re-exported as a
   submodule in [S].
 *)

-module Make (Token: TOKEN) : LexerLib.S with module Token = Token
+module Make (Token : TOKEN) : S with module Token = Token
--- a/src/passes/1-parser/shared/Lexer.mll
+++ b/src/passes/1-parser/shared/Lexer.mll
@ -8,12 +8,6 @@ module Pos = Simple_utils.Pos

 (* START HEADER *)

-type lexeme = string
-
-(* ALIASES *)
-
-let sprintf = Printf.sprintf
-
 (* TOKENS *)

 (* The signature [TOKEN] exports an abstract type [token], so a lexer
@ -22,6 +16,8 @@ let sprintf = Printf.sprintf
   caracterises the virtual token for end-of-file, because it requires
   special handling. *)

+type lexeme = string
+
 module type TOKEN =
  sig
    type token
@ -84,31 +80,39 @@ module type TOKEN =
   submodule in [S].
 *)

-module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
+module type S =
+  sig
+    module Token : TOKEN
+    type token = Token.token
+
+    val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
+    val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
+
+    type error
+
+    val error_to_string : error -> string
+
+    exception Error of error Region.reg
+
+    val format_error :
+      ?offsets:bool -> [`Byte | `Point] ->
+      error Region.reg -> file:bool -> string Region.reg
+  end
+
+module Make (Token : TOKEN) : (S with module Token = Token) =
  struct
    module Token = Token
    type token = Token.token

-    type file_path = string
-
-    type line_comment = LexerLib.line_comment
-    type block_comment = LexerLib.block_comment
-    let mk_block = LexerLib.mk_block
-
    (* ERRORS *)

    type error =
      Invalid_utf8_sequence
    | Unexpected_character of char
    | Undefined_escape_sequence
-    (*    | Missing_break*)
    | Unterminated_string
-    (*    | Unterminated_integer*)
-    (*    | Odd_lengthed_bytes*)
    | Unterminated_comment of string
-    (*    | Orphan_minus*)
    | Non_canonical_zero
-    (*    | Negative_byte_sequence *)
    | Broken_string
    | Invalid_character_in_string
    | Reserved_name of string
@ -116,6 +120,8 @@ module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
    | Invalid_natural
    | Invalid_attribute

+    let sprintf = Printf.sprintf
+
    let error_to_string = function
      Invalid_utf8_sequence ->
        "Invalid UTF-8 sequence."
@ -124,30 +130,15 @@ module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
    | Undefined_escape_sequence ->
        "Undefined escape sequence.\n\
         Hint: Remove or replace the sequence."
-(*    | Missing_break ->
-        "Missing break.\n\
-         Hint: Insert some space."
- *)    | Unterminated_string ->
+    | Unterminated_string ->
        "Unterminated string.\n\
         Hint: Close with double quotes."
-(*    | Unterminated_integer ->
-        "Unterminated integer.\n\
-         Hint: Remove the sign or proceed with a natural number." *)
-(*    | Odd_lengthed_bytes ->
-        "The length of the byte sequence is an odd number.\n\
-         Hint: Add or remove a digit."
- *)    | Unterminated_comment ending ->
+    | Unterminated_comment ending ->
        sprintf "Unterminated comment.\n\
                 Hint: Close with \"%s\"." ending
-(*    | Orphan_minus ->
-        "Orphan minus sign.\n\
-         Hint: Remove the trailing space." *)
    | Non_canonical_zero ->
        "Non-canonical zero.\n\
         Hint: Use 0."
-(*    | Negative_byte_sequence ->
-        "Negative byte sequence.\n\
-         Hint: Remove the leading minus sign." *)
    | Broken_string ->
        "The string starting here is interrupted by a line break.\n\
         Hint: Remove the break, close the string before or insert a \
@ -369,6 +360,7 @@ and scan state = parse
  nl                     { scan (state#push_newline lexbuf) lexbuf }
 | ' '+                   { scan (state#push_space   lexbuf) lexbuf }
 | '\t'+                  { scan (state#push_tabs    lexbuf) lexbuf }
+
 | ident                  { mk_ident        state lexbuf }
 | constr                 { mk_constr       state lexbuf }
 | bytes                  { mk_bytes seq    state lexbuf }
@ -420,27 +412,14 @@ and scan state = parse
    let state        = state#set_pos pos in
    scan state lexbuf }

-(* Some special errors *)
-
-(*
-| '-' { let region, _, state = state#sync lexbuf in
-        let state = scan state lexbuf in
-        let open Markup in
-        match FQueue.peek state#units with
-          None -> assert false
-        | Some (_, ((Space _ | Tabs _)::_, token))
-            when Token.is_int token -> fail region Orphan_minus
-        | _ -> fail region Unterminated_integer }
-
-| "-0x" byte_seq?
-      { let region, _, _ = state#sync lexbuf
-        in fail region Negative_byte_sequence }
- *)
+(* String *)

 | '"' { let opening, lexeme, state = state#sync lexbuf in
        let thread = LexerLib.mk_thread opening lexeme in
        scan_string thread state lexbuf |> mk_string }

+(* Comments *)
+
 | block_comment_openings {
    let lexeme = Lexing.lexeme lexbuf in
    match state#block with
@ -496,8 +475,6 @@ and scan_flags state acc = parse
 | eof             { let _, _, state = state#sync lexbuf
                    in List.rev acc, state                  }

-(* TODO: Move below to [LexerCommon.mll] *)
-
 (* Finishing a string *)

 and scan_string thread state = parse
@ -624,164 +601,6 @@ and scan_utf8_inline thread state = parse

 {
 (* START TRAILER *)
-
-
-(* Scanning the lexing buffer for tokens (and markup, as a
-   side-effect).
-
-     Because we want the lexer to have access to the right lexical
-   context of a recognised lexeme (to enforce stylistic constraints or
-   report special error patterns), we need to keep a hidden reference
-   to a queue of recognised lexical units (that is, tokens and markup)
-   that acts as a mutable state between the calls to [read]. When
-   [read] is called, that queue is examined first and, if it contains
-   at least one token, that token is returned; otherwise, the lexing
-   buffer is scanned for at least one more new token. That is the
-   general principle: we put a high-level buffer (our queue) on top of
-   the low-level lexing buffer.
-
-     One tricky and important detail is that we must make any parser
-   generated by Menhir (and calling [read]) believe that the last
-   region of the input source that was matched indeed corresponds to
-   the returned token, despite that many tokens and markup may have
-   been matched since it was actually read from the input. In other
-   words, the parser requests a token that is taken from the
-   high-level buffer, but the parser requests the source regions from
-   the _low-level_ lexing buffer, and they may disagree if more than
-   one token has actually been recognised.
-
-     Consequently, in order to maintain a consistent view for the
-   parser, we have to patch some fields of the lexing buffer, namely
-   [lex_start_p] and [lex_curr_p], as these fields are read by parsers
-   generated by Menhir when querying source positions (regions). This
-   is the purpose of the function [patch_buffer]. After reading one
-   or more tokens and markup by the scanning rule [scan], we have to
-   save in the hidden reference [buf_reg] the region of the source
-   that was matched by [scan]. This atomic sequence of patching,
-   scanning and saving is implemented by the _function_ [scan]
-   (beware: it shadows the scanning rule [scan]). The function
-   [patch_buffer] is, of course, also called just before returning the
-   token, so the parser has a view of the lexing buffer consistent
-   with the token.
-
-     Note that an additional reference [first_call] is needed to
-   distinguish the first call to the function [scan], as the first
-   scanning rule is actually [init] (which can handle the BOM), not
-   [scan].
- *)
-
-type logger = Markup.t list -> token -> unit
-
-type input =
-  File    of file_path
-| String  of string
-| Channel of in_channel
-| Buffer  of Lexing.lexbuf
-
-type instance = {
-  input    : input;
-  read     : log:logger -> Lexing.lexbuf -> token;
-  buffer   : Lexing.lexbuf;
-  get_win  : unit -> token LexerLib.window;
-  get_pos  : unit -> Pos.t;
-  get_last : unit -> Region.t;
-  get_file : unit -> file_path;
-  close    : unit -> unit
-}
-
-type open_err = File_opening of string
-
-let lexbuf_from_input = function
-  File path ->
-   (try
-      let chan = open_in path in
-      let close () = close_in chan in
-      let lexbuf = Lexing.from_channel chan in
-      let () =
-        let open Lexing in
-        lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
-      in Ok (lexbuf, close)
-    with Sys_error msg -> Stdlib.Error (File_opening msg))
-| String s ->
-    Ok (Lexing.from_string s, fun () -> ())
-| Channel chan ->
-    let close () = close_in chan in
-    Ok (Lexing.from_channel chan, close)
-| Buffer b -> Ok (b, fun () -> ())
-
-let open_token_stream ?line ?block input =
-  let file_path  = match input with
-                     File path -> path
-                   | _ -> "" in
-  let        pos = Pos.min ~file:file_path in
-  let    buf_reg = ref (pos#byte, pos#byte)
-  and first_call = ref true
-  and    decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
-  let     supply = Uutf.Manual.src decoder in
-  let      state = ref (LexerLib.mk_state
-                          ~units:FQueue.empty
-                          ~last:Region.ghost
-                          ~window:LexerLib.Nil
-                          ~pos
-                          ~markup:[]
-                          ~decoder
-                          ~supply
-                          ?block
-                          ?line
-                          ()) in
-
-  let get_pos  () = !state#pos
-  and get_last () = !state#last
-  and get_win  () = !state#window
-  and get_file () = file_path in
-
-  let patch_buffer (start, stop) buffer =
-    let open Lexing in
-    let file_path = buffer.lex_curr_p.pos_fname in
-    buffer.lex_start_p <- {start with pos_fname = file_path};
-    buffer.lex_curr_p  <- {stop  with pos_fname = file_path}
-
-  and save_region buffer =
-    buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
-
-  let scan buffer =
-    patch_buffer !buf_reg buffer;
-    (if   !first_call
-     then (state := init !state buffer; first_call := false)
-     else state := scan !state buffer);
-    save_region buffer in
-
-  let next_token buffer =
-    scan buffer;
-    match FQueue.peek !state#units with
-                         None -> None
-    | Some (units, ext_token) ->
-        state := !state#set_units units; Some ext_token in
-
-  let rec read ~log buffer =
-    match FQueue.deq !state#units with
-      None ->
-        scan buffer;
-        read ~log buffer
-    | Some (units, (left_mark, token)) ->
-        log left_mark token;
-        state := ((!state#set_units units)
-                  #set_last (Token.to_region token))
-                  #slide_token token;
-        Token.check_right_context token next_token buffer;
-        patch_buffer (Token.to_region token)#byte_pos buffer;
-        token in
-  match lexbuf_from_input input with
-    Ok (buffer, close) ->
-      let () =
-        match input with
-          File path when path <> "" -> LexerLib.reset ~file:path buffer
-        | _ -> () in
-      let instance = {
-        input; read; buffer; get_win; get_pos; get_last; get_file; close}
-      in Ok instance
-  | Error _ as e -> e
-
 end (* of functor [Make] in HEADER *)
 (* END TRAILER *)
 }
--- a/src/passes/1-parser/shared/LexerLib.ml
+++ b/src/passes/1-parser/shared/LexerLib.ml
@ -1,159 +1,6 @@
-(* Sliding window *)
-
-(* The type [window] models a two-slot buffer of tokens for reporting
-   after a parse error. Technically, it is a parametric type, but its
-   use is meant for tokens, wherever they are defined.
-
-   In [Two(t1,t2)], the token [t2] is the next to be sent to the
-   parser.
-
-   The call [slide token buffer] pushes the token [token] in the
-   buffer [buffer]. If the buffer is full, that is, it is
-   [Two(t1,t2)], then the token [t2] is discarded to make room for
-   [token].
- *)
-
-type 'a window =
-  Nil
-| One of 'a
-| Two of 'a * 'a
-
-let slide token = function
-  Nil -> One token
-| One t | Two (t,_) -> Two (token,t)
-
 module Region = Simple_utils.Region
 module Pos = Simple_utils.Pos

-type lexeme = string
-
-(* The signature [TOKEN] exports an abstract type [token], so a lexer
-   can be a functor over tokens. This enables to externalise
-   version-dependent constraints in any module whose signature matches
-   [TOKEN]. Generic functions to construct tokens are required.
-
-   Note the predicate [is_eof], which caracterises the virtual token
-   for end-of-file, because it requires special handling.
-*)
-
-module type TOKEN =
-  sig
-    type token
-
-    (* Predicates *)
-
-    val is_eof : token -> bool
-
-    (* Projections *)
-
-    val to_lexeme : token -> lexeme
-    val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
-    val to_region : token -> Region.t
-
-    (* Style *)
-
-    type error
-
-    val error_to_string : error -> string
-
-    exception Error of error Region.reg
-
-    val format_error :
-      ?offsets:bool ->
-      [`Byte | `Point] ->
-      error Region.reg ->
-      file:bool ->
-      string Region.reg
-
-    val check_right_context :
-      token ->
-      (Lexing.lexbuf -> (Markup.t list * token) option) ->
-      Lexing.lexbuf ->
-      unit
-  end
-
-(* The module type for lexers is [S]. It mainly exports the function
-   [open_token_stream], which returns
-
-     * a function [read] that extracts tokens from a lexing buffer,
-       together with a lexing buffer [buffer] to read from,
-     * a function [close] that closes that buffer,
-     * a function [get_pos] that returns the current position, and
-     * a function [get_last] that returns the region of the last
-       recognised token.
-     * a function [get_file] that returns the name of the file being
-       scanned (empty string if [stdin]).
-
-   Note that a module [Token] is exported too, because the signature
-   of the exported functions depend on it.
-
-     The type [window] is a two-token window, that is, a buffer that
-   contains the last recognised token, and the penultimate (if any).
-
-     The call [read ~log] evaluates in a lexer (also known as a
-   tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
-   suitable for a parser generated by Menhir. The argument labelled
-   [log] is a logger, that is, it may print a token and its left
-   markup to a given channel, at the caller's discretion.
-*)
-
-module type S =
-  sig
-    module Token : TOKEN
-    type token = Token.token
-
-    type file_path = string
-    type logger = Markup.t list -> token -> unit
-
-    type input =
-      File    of file_path
-    | String  of string
-    | Channel of in_channel
-    | Buffer  of Lexing.lexbuf
-
-    type instance = {
-      input    : input;
-      read     : log:logger -> Lexing.lexbuf -> token;
-      buffer   : Lexing.lexbuf;
-      get_win  : unit -> token window;
-      get_pos  : unit -> Pos.t;
-      get_last : unit -> Region.t;
-      get_file : unit -> file_path;
-      close    : unit -> unit
-    }
-
-    type open_err = File_opening of string
-
-    val lexbuf_from_input :
-      input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
-
-    type line_comment = string (* Opening of a line comment *)
-    type block_comment = <opening : string; closing : string>
-
-    val mk_block : opening:string -> closing:string -> block_comment
-
-    val open_token_stream :
-      ?line:line_comment ->
-      ?block:block_comment ->
-      input ->
-      (instance, open_err) Stdlib.result
-
-    (* Error reporting *)
-
-    type error
-
-    val error_to_string : error -> string
-
-    exception Error of error Region.reg
-
-    val format_error :
-      ?offsets:bool ->
-      [`Byte | `Point] ->
-      error Region.reg ->
-      file:bool ->
-      string Region.reg
-  end
-
 (* LEXER ENGINE *)

 (* Resetting file name and line number in the lexing buffer
@ -205,27 +52,20 @@ let rollback buffer =
  buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
  buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}

+(* Utility types *)
+
+type file_path = string
+type lexeme = string
+
 (* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)

-(* When scanning structured constructs, like strings and comments, we
-   need to keep the region of the opening symbol (like double quote,
-   "//" or "(*") in order to report any error more precisely. Since
-   ocamllex is byte-oriented, we need to store the parsed bytes as
-   characters in an accumulator [acc] and also its length [len], so,
-   we are done, it is easy to build the string making up the
-   structured construct with [mk_str] (see above).
-
-     The resulting data structure is called a _thread_. (Note for
-   Emacs: "*)".)
-*)
-
 type thread = <
  opening     : Region.t;
  length      : int;
  acc         : char list;
+  to_string   : string;
  push_char   : char -> thread;
  push_string : string -> thread;
-  to_string : string;
  set_opening : Region.t -> thread
 >

@ -233,12 +73,12 @@ let mk_thread region lexeme : thread =
  (* The call [explode s a] is the list made by pushing the characters
     in the string [s] on top of [a], in reverse order. For example,
     [explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
+
  let explode s acc =
    let rec push = function
      0 -> acc
    | i -> s.[i-1] :: push (i-1)
-    in push (String.length s)
-  in
+    in push (String.length s) in
  object
    val opening = region
    method opening = opening
@ -273,84 +113,82 @@ let mk_thread region lexeme : thread =
      in fill (length-1) acc |> Bytes.to_string
  end

-type file_path = string
-
 (* STATE *)

-    (* Beyond tokens, the result of lexing is a state. The type
-       [state] represents the logical state of the lexing engine, that
-       is, a value which is threaded during scanning and which denotes
-       useful, high-level information beyond what the type
-       [Lexing.lexbuf] in the standard library already provides for
-       all generic lexers.
+(* Scanning the lexing buffer for tokens (and markup, as a
+   side-effect).

-         Tokens are the smallest units used by the parser to build the
-       abstract syntax tree. The state includes a queue of recognised
-       tokens, with the markup at the left of its lexeme until either
-       the start of the file or the end of the previously recognised
-       token.
+     Because we want the lexer to have access to the right lexical
+   context of a recognised lexeme (to enforce stylistic constraints or
+   report special error patterns), we need to keep a hidden reference
+   to a queue of recognised lexical units (that is, tokens and markup)
+   that acts as a mutable state between the calls to [read]. When
+   [read] is called, that queue is examined first and, if it contains
+   at least one token, that token is returned; otherwise, the lexing
+   buffer is scanned for at least one more new token. That is the
+   general principle: we put a high-level buffer (our queue) on top of
+   the low-level lexing buffer.

-         The markup from the last recognised token or, if the first
-       token has not been recognised yet, from the beginning of the
-       file is stored in the field [markup] of the state because it is
-       a side-effect, with respect to the output token list, and we
-       use a record with a single field [units] because that record
-       may be easily extended during the future maintenance of this
-       lexer.
+     One tricky and important detail is that we must make any parser
+   generated by Menhir (and calling [read]) believe that the last
+   region of the input source that was matched indeed corresponds to
+   the returned token, despite that many tokens and markup may have
+   been matched since it was actually read from the input. In other
+   words, the parser requests a token that is taken from the
+   high-level buffer, but the parser requests the source regions from
+   the _low-level_ lexing buffer, and they may disagree if more than
+   one token has actually been recognised.

-         The state also includes a field [pos] which holds the current
-       position in the LIGO source file. The position is not always
-       updated after a single character has been matched: that depends
-       on the regular expression that matched the lexing buffer.
+     Consequently, in order to maintain a consistent view for the
+   parser, we have to patch some fields of the lexing buffer, namely
+   [lex_start_p] and [lex_curr_p], as these fields are read by parsers
+   generated by Menhir when querying source positions (regions). This
+   is the purpose of the function [patch_buffer]. After reading one or
+   more tokens and markup by the scanning rule [scan], we have to save
+   in the hidden reference [buf_reg] the region of the source that was
+   matched by [scan]. This atomic sequence of patching, scanning and
+   saving is implemented by the _function_ [scan] (beware: it shadows
+   the scanning rule [scan]). The function [patch_buffer] is, of
+   course, also called just before returning the token, so the parser
+   has a view of the lexing buffer consistent with the token.

-         The field [window] is a two-token window, that is, a buffer
-       that contains the last recognised token, and the penultimate
-       (if any).
-
-         The fields [decoder] and [supply] offer the support needed
-       for the lexing of UTF-8 encoded characters in comments (the
-       only place where they are allowed in LIGO). The former is the
-       decoder proper and the latter is the effectful function
-       [supply] that takes a byte, a start index and a length and feed
-       it to [decoder]. See the documentation of the third-party
-       library Uutf.
+     Note that an additional reference [first_call] is needed to
+   distinguish the first call to the function [scan], as the first
+   scanning rule is actually [init] (which can handle the BOM), not
+   [scan].
 *)

-    type line_comment  = string (* Opening of a line comment *)
-    type block_comment = <opening : string; closing : string>
+type 'token window =
+  Nil
+| One of 'token
+| Two of 'token * 'token

-    let mk_block ~opening ~closing : block_comment =
-      object
-        method opening = opening
-        method closing = closing
-      end
-
-    type 'a state = <
-      units   : (Markup.t list * 'a) FQueue.t;
+type 'token state = <
+  units   : (Markup.t list * 'token) FQueue.t;
  markup  : Markup.t list;
-      window  : 'a window;
+  window  : 'token window;
  last    : Region.t;
  pos     : Pos.t;
  decoder : Uutf.decoder;
  supply  : Bytes.t -> int -> int -> unit;
-      block   : block_comment option;
-      line    : line_comment option;
+  block   : EvalOpt.block_comment option;
+  line    : EvalOpt.line_comment option;

-      enqueue      : 'a -> 'a state;
-      set_units    : (Markup.t list * 'a) FQueue.t -> 'a state;
-      set_last     : Region.t -> 'a state;
-      set_pos      : Pos.t -> 'a state;
-      slide_token  : 'a -> 'a state;
+  enqueue      : 'token -> 'token state;
+  set_units    : (Markup.t list * 'token) FQueue.t -> 'token state;
+  set_last     : Region.t -> 'token state;
+  set_pos      : Pos.t -> 'token state;
+  slide_token  : 'token -> 'token state;

-      sync         : Lexing.lexbuf -> Region.t * lexeme * 'a state;
+  sync         : Lexing.lexbuf -> Region.t * lexeme * 'token state;

-      push_newline : Lexing.lexbuf -> 'a state;
-      push_line    : thread -> 'a state;
-      push_block   : thread -> 'a state;
-      push_space   : Lexing.lexbuf -> 'a state;
-      push_tabs    : Lexing.lexbuf -> 'a state;
-      push_bom     : Lexing.lexbuf -> 'a state;
-      push_markup  : Markup.t -> 'a state;
+  push_newline : Lexing.lexbuf -> 'token state;
+  push_line    : thread -> 'token state;
+  push_block   : thread -> 'token state;
+  push_space   : Lexing.lexbuf -> 'token state;
+  push_tabs    : Lexing.lexbuf -> 'token state;
+  push_bom     : Lexing.lexbuf -> 'token state;
+  push_markup  : Markup.t -> 'token state;
 >

 let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
@ -371,12 +209,6 @@ type file_path = string
    method block   = block
    method line    = line

-        (* The call [enqueue (token, state)] updates functionally the
-           state [state] by associating the token [token] with the
-           stored markup and enqueuing the pair into the units
-           queue. The field [markup] is then reset to the empty
-           list. *)
-
    method enqueue token =
      {< units  = FQueue.enq (markup, token) units;
         markup = [] >}
@ -386,13 +218,9 @@ type file_path = string
    method set_pos   pos    = {< pos   = pos    >}

    method slide_token token =
-          {< window = slide token window >}
-
-        (* The call [sync state buffer] updates the current position
-           in accordance with the contents of the lexing buffer, more
-           precisely, depending on the length of the string which has
-           just been recognised by the scanner: that length is used as
-           a positive offset to the current column. *)
+      match self#window with
+                      Nil -> {< window = One token     >}
+      | One t | Two (t,_) -> {< window = Two (token,t) >}

    method sync buffer =
      let lex   = Lexing.lexeme buffer in
@ -447,5 +275,121 @@ type file_path = string
      let region, value, state = self#sync buffer in
      let unit = Markup.BOM Region.{region; value}
      in state#push_markup unit
-
  end
+
+(* LEXER INSTANCE *)
+
+type input =
+  File    of file_path
+| String  of string
+| Channel of in_channel
+| Buffer  of Lexing.lexbuf
+
+type 'token logger = Markup.t list -> 'token -> unit
+
+type 'token instance = {
+  input    : input;
+  read     : log:('token logger) -> Lexing.lexbuf -> 'token;
+  buffer   : Lexing.lexbuf;
+  get_win  : unit -> 'token window;
+  get_pos  : unit -> Pos.t;
+  get_last : unit -> Region.t;
+  get_file : unit -> file_path;
+  close    : unit -> unit
+}
+
+type open_err = File_opening of string
+
+let lexbuf_from_input = function
+  String s ->
+    Ok (Lexing.from_string s, fun () -> ())
+| Channel chan ->
+    let close () = close_in chan in
+    Ok (Lexing.from_channel chan, close)
+| Buffer b ->
+    Ok (b, fun () -> ())
+| File path ->
+    try
+      let chan = open_in path in
+      let close () = close_in chan in
+      let lexbuf = Lexing.from_channel chan in
+      let () =
+        let open Lexing in
+        lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
+      in Ok (lexbuf, close)
+    with Sys_error msg -> Stdlib.Error (File_opening msg)
+
+let open_token_stream ?line ?block ~init ~scan
+                      ~token_to_region ~style input =
+  let file_path  = match input with
+                     File path -> path
+                   | _ -> "" in
+  let        pos = Pos.min ~file:file_path in
+  let    buf_reg = ref (pos#byte, pos#byte)
+  and first_call = ref true
+  and    decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
+  let     supply = Uutf.Manual.src decoder in
+  let      state = ref (mk_state
+                          ~units:FQueue.empty
+                          ~last:Region.ghost
+                          ~window:Nil
+                          ~pos
+                          ~markup:[]
+                          ~decoder
+                          ~supply
+                          ?block
+                          ?line
+                          ()) in
+  let get_pos  () = !state#pos
+  and get_last () = !state#last
+  and get_win  () = !state#window
+  and get_file () = file_path in
+
+  let patch_buffer (start, stop) buffer =
+    let open Lexing in
+    let file_path = buffer.lex_curr_p.pos_fname in
+    buffer.lex_start_p <- {start with pos_fname = file_path};
+    buffer.lex_curr_p  <- {stop  with pos_fname = file_path}
+
+  and save_region buffer =
+    buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
+
+  let scan' init scan buffer =
+    patch_buffer !buf_reg buffer;
+    (if   !first_call
+     then (state := init !state buffer; first_call := false)
+     else state := scan !state buffer);
+    save_region buffer in
+
+  let next_token init scan buffer =
+    scan' init scan buffer;
+    match FQueue.peek !state#units with
+      None -> None
+    | Some (units, ext_token) ->
+        state := !state#set_units units; Some ext_token in
+
+  let rec read init scan ~token_to_region ~style ~log buffer =
+    match FQueue.deq !state#units with
+      None ->
+        scan' init scan buffer;
+        read init scan ~token_to_region ~style ~log buffer
+    | Some (units, (left_mark, token)) ->
+       log left_mark token;
+       state := ((!state#set_units units)
+                   #set_last (token_to_region token))
+                  #slide_token token;
+       style token (next_token init scan) buffer;
+       patch_buffer (token_to_region token)#byte_pos buffer;
+       token in
+
+  match lexbuf_from_input input with
+    Ok (buffer, close) ->
+      let () =
+        match input with
+          File path when path <> "" -> reset ~file:path buffer
+        | _ -> () in
+      let instance = {
+        read = read init scan ~token_to_region ~style;
+        input; buffer; get_win; get_pos; get_last; get_file; close}
+      in Ok instance
+  | Error _ as e -> e
--- a/src/passes/1-parser/shared/LexerLib.mli
+++ b/src/passes/1-parser/shared/LexerLib.mli
@ -0,0 +1,205 @@
+(* A library for writing UTF8-aware lexers *)
+
+module Region = Simple_utils.Region
+module Pos = Simple_utils.Pos
+
+(* The function [rollback] resets the lexing buffer to the state it
+   was when it matched the last regular expression. This function is
+   safe to use only in the semantic action of the rule which last
+   matched. *)
+
+val rollback : Lexing.lexbuf -> unit
+
+(* Utility types *)
+
+type file_path = string
+type lexeme = string
+
+(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
+
+(* When scanning structured constructs, like strings and comments, we
+   need to keep the region of the opening symbol (like double quote,
+   "//" or "(*") in order to report any error more precisely. Since
+   ocamllex is byte-oriented, we need to store the parsed bytes as
+   characters in an accumulator [acc] and also its length [len], so,
+   we are done, it is easy to build the string making up the
+   structured construct with [mk_str] (see above).
+
+   The resulting data structure is called a _thread_. (Note for
+   Emacs: "*)".)
+ *)
+
+type thread = <
+  opening     : Region.t;
+  length      : int;
+  acc         : char list;
+  to_string   : string;
+  push_char   : char -> thread;
+  push_string : string -> thread;
+  set_opening : Region.t -> thread
+>
+
+val mk_thread : Region.t -> lexeme -> thread
+
+(* STATE *)
+
+(* Beyond producing tokens, the result of lexing is a _state_. The
+   type [state] represents the abstract logical state of the lexing
+   engine, that is, a value which is threaded during scanning and
+   which denotes useful, high-level information beyond what the type
+   [Lexing.lexbuf] in the standard library already provides for all
+   generic lexers. We qualify it as "logical state" because the lexing
+   buffer itself has a "physical state" defined by the type
+   [Lexing.lexbuf].
+
+     Tokens are the smallest units used by the parser to build the
+   abstract syntax tree. The state includes a queue of recognised
+   tokens, with the markup at the left of its lexeme until either the
+   start of the file or the end of the previously recognised token.
+
+     The markup from the last recognised token or, if the first token
+   has not been recognised yet, from the beginning of the file is
+   stored in the field [markup] of the state because it is a
+   side-effect, with respect to the output token list, and we use a
+   record with a single field [units] because that record may be
+   easily extended during the future maintenance of this lexer.
+
+     The state also includes a field [pos] which holds the current
+   position in the LIGO source file. The position is not always
+   updated after a single character has been matched: that depends on
+   the regular expression that matched the lexing buffer.
+
+     The field [window] is a two-token window, that is, a buffer that
+   contains the last recognised token, and the penultimate (if any).
+   Technically, it is a parametric type, but its use is meant for
+   tokens, wherever they are defined.  In [Two (t1,t2)], and in case
+   of a syntax error, [t1] is the first invalid token and [t2] is the
+   last valid one.
+
+     The fields [decoder] and [supply] offer the support needed for
+   the lexing of UTF-8 encoded characters in comments (the only place
+   where they are allowed in LIGO). The former is the decoder proper
+   and the latter is the effectful function [supply] that takes a
+   byte, a start index and a length and feed it to [decoder]. See the
+   documentation of the third-party library Uutf.
+
+   Some methods are now documented.
+
+     The call [state#enqueue token] updates functionally the state
+   [state] by associating the token [token] with the stored markup and
+   enqueuing the pair into the units queue. The field [markup] is then
+   reset to the empty list.
+
+     The call [state#slide_token token] pushes the token [token] in
+   the buffer [buffer]. If the buffer is full, that is, it is [Two
+   (t1,t2)], then the token [t2] is discarded to make room for
+   [token].
+
+     The call [state#sync buffer] updates the current position in
+   accordance with the contents of the lexing buffer, more precisely,
+   depending on the length of the string which has just been
+   recognised by the scanner: that length is used as a positive offset
+   to the current column.
+ *)
+
+type 'token window =
+  Nil
+| One of 'token
+| Two of 'token * 'token
+
+type 'token state = <
+  units   : (Markup.t list * 'token) FQueue.t;
+  markup  : Markup.t list;
+  window  : 'token window;
+  last    : Region.t;
+  pos     : Pos.t;
+  decoder : Uutf.decoder;
+  supply  : Bytes.t -> int -> int -> unit;
+  block   : EvalOpt.block_comment option;
+  line    : EvalOpt.line_comment option;
+
+  enqueue      : 'token -> 'token state;
+  set_units    : (Markup.t list * 'token) FQueue.t -> 'token state;
+  set_last     : Region.t -> 'token state;
+  set_pos      : Pos.t -> 'token state;
+  slide_token  : 'token -> 'token state;
+
+  sync         : Lexing.lexbuf -> Region.t * lexeme * 'token state;
+
+  push_newline : Lexing.lexbuf -> 'token state;
+  push_line    : thread -> 'token state;
+  push_block   : thread -> 'token state;
+  push_space   : Lexing.lexbuf -> 'token state;
+  push_tabs    : Lexing.lexbuf -> 'token state;
+  push_bom     : Lexing.lexbuf -> 'token state;
+  push_markup  : Markup.t -> 'token state;
+>
+
+(* LEXER INSTANCE *)
+
+(* The function [open_token_stream] returns a lexer instance made of
+
+     * the input [input] of type [input];
+     * a function [read] that extracts tokens from a lexing buffer,
+       together with a lexing buffer [buffer] to read from,
+     * a function [close] that closes that buffer,
+     * a function [get_pos] that returns the current position, and
+     * a function [get_last] that returns the region of the last
+       recognised token.
+     * a function [get_file] that returns the name of the file being
+       scanned (empty string if [stdin]).
+
+   Note that a module [Token] is exported too, because the signature
+   of the exported functions depend on it.
+
+     The type [window] is a two-token window, that is, a buffer that
+   contains the last recognised token, and the penultimate (if any).
+
+     The call [read ?line ?block ~init ~scan ~token_to_region ~style
+   input] evaluates in a lexer (also known as a tokeniser or scanner)
+   whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
+   suitable for a parser generated by Menhir. The argument labelled
+   [log] is a logger, that is, it may print a token and its left
+   markup to a given channel, at the caller's discretion. The argument
+   labelled [~init] is the scanner to be called first, usually for
+   reading the BOM, then [scan] is used for the following calls. The
+   function labelled [~style] is used to check stylistic constraints
+   on the tokens and the markup between them.
+*)
+
+type input =
+  File    of file_path
+| String  of string
+| Channel of in_channel
+| Buffer  of Lexing.lexbuf
+
+type 'token logger = Markup.t list -> 'token -> unit
+
+type 'token instance = {
+  input    : input;
+  read     : log:('token logger) -> Lexing.lexbuf -> 'token;
+  buffer   : Lexing.lexbuf;
+  get_win  : unit -> 'token window;
+  get_pos  : unit -> Pos.t;
+  get_last : unit -> Region.t;
+  get_file : unit -> file_path;
+  close    : unit -> unit
+}
+
+type open_err = File_opening of string
+
+val lexbuf_from_input :
+  input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
+
+val open_token_stream :
+  ?line:EvalOpt.line_comment ->
+  ?block:EvalOpt.block_comment ->
+  init:('token state -> Lexing.lexbuf -> 'token state) ->
+  scan:('token state -> Lexing.lexbuf -> 'token state) ->
+  token_to_region:('token -> Region.t) ->
+  style:('token ->
+         (Lexing.lexbuf -> (Markup.t list * 'token) option) ->
+         Lexing.lexbuf ->
+         unit) ->
+  input ->
+  ('token instance, open_err) Stdlib.result
--- a/src/passes/1-parser/shared/LexerLog.ml
+++ b/src/passes/1-parser/shared/LexerLog.ml
@ -4,7 +4,8 @@ module Region = Simple_utils.Region

 module type S =
  sig
-    module Lexer : LexerLib.S
+    module Lexer : Lexer.S
+    type token = Lexer.token

    val output_token :
      ?offsets:bool ->
@ -12,7 +13,7 @@ module type S =
      EvalOpt.command ->
      out_channel ->
      Markup.t list ->
-      Lexer.token ->
+      token ->
      unit

    type file_path = string
@ -22,19 +23,26 @@ module type S =
      [`Byte | `Point] ->
      ?block:EvalOpt.block_comment ->
      ?line:EvalOpt.line_comment ->
-      Lexer.input ->
+      token_to_region:(token -> Region.t) ->
+      style:(token ->
+             (Lexing.lexbuf -> (Markup.t list * token) option) ->
+             Lexing.lexbuf ->
+             unit) ->
+      LexerLib.input ->
      EvalOpt.command ->
      (unit, string Region.reg) Stdlib.result
  end

-module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
+module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
  struct
    module Lexer = Lexer
    module Token = Lexer.Token
+    type token = Lexer.token

-    (** Pretty-printing in a string the lexemes making up the markup
+    (* Pretty-printing in a string the lexemes making up the markup
       between two tokens, concatenated with the last lexeme
       itself. *)
+
    let output_token ?(offsets=true) mode command
                     channel left_mark token : unit =
      let output    str = Printf.fprintf channel "%s%!" str in
@ -57,10 +65,17 @@ module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =

    type file_path = string

-    let trace ?(offsets=true) mode ?block ?line input command :
+    let trace ?(offsets=true) mode ?block ?line
+              ~token_to_region ~style input command :
          (unit, string Region.reg) Stdlib.result =
-      match Lexer.open_token_stream ?line ?block input with
-        Ok Lexer.{read; buffer; close; _} ->
+      match LexerLib.open_token_stream
+              ~init:Lexer.init
+              ~scan:Lexer.scan
+              ~token_to_region
+              ~style
+              ?line ?block input
+      with
+        Ok LexerLib.{read; buffer; close; _} ->
          let log = output_token ~offsets mode command stdout
          and close_all () = flush_all (); close () in
          let rec iter () =
@ -69,12 +84,17 @@ module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
                if   Token.is_eof token
                then Stdlib.Ok ()
                else iter ()
+            | exception Lexer.Token.Error error ->
+                let msg =
+                  Lexer.Token.format_error
+                    ~offsets mode ~file:true error
+                in Stdlib.Error msg
            | exception Lexer.Error error ->
                let msg =
                  Lexer.format_error ~offsets mode ~file:true error
                in Stdlib.Error msg in
            let result = iter ()
            in close_all (); result
-        | Stdlib.Error (Lexer.File_opening msg) ->
+        | Stdlib.Error (LexerLib.File_opening msg) ->
            flush_all (); Stdlib.Error (Region.wrap_ghost msg)
  end
--- a/src/passes/1-parser/shared/LexerLog.mli
+++ b/src/passes/1-parser/shared/LexerLog.mli
@ -2,7 +2,8 @@ module Region = Simple_utils.Region

 module type S =
  sig
-    module Lexer : LexerLib.S
+    module Lexer : Lexer.S
+    type token = Lexer.token

    val output_token :
      ?offsets:bool ->
@ -10,7 +11,7 @@ module type S =
      EvalOpt.command ->
      out_channel ->
      Markup.t list ->
-      Lexer.token ->
+      token ->
      unit

    type file_path = string
@ -20,9 +21,14 @@ module type S =
      [`Byte | `Point] ->
      ?block:EvalOpt.block_comment ->
      ?line:EvalOpt.line_comment ->
-      Lexer.input ->
+      token_to_region:(token -> Region.t) ->
+      style:(token ->
+             (Lexing.lexbuf -> (Markup.t list * token) option) ->
+             Lexing.lexbuf ->
+             unit) ->
+      LexerLib.input ->
      EvalOpt.command ->
      (unit, string Region.reg) Stdlib.result
  end

-module Make (Lexer: LexerLib.S) : S with module Lexer = Lexer
+module Make (Lexer: Lexer.S) : S with module Lexer = Lexer
--- a/src/passes/1-parser/shared/LexerUnit.ml
+++ b/src/passes/1-parser/shared/LexerUnit.ml
@ -9,7 +9,7 @@ module type IO =
    val options : EvalOpt.options (* CLI options *)
  end

-module Make (IO: IO) (Lexer: LexerLib.S) =
+module Make (IO: IO) (Lexer: Lexer.S) =
  struct
    (* Error printing and exception tracing *)

@ -39,11 +39,16 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
        | Stdlib.Ok pp_buffer ->
           (* Running the lexer on the preprocessed input *)

-            let source = Lexer.String (Buffer.contents pp_buffer) in
-            match Lexer.open_token_stream ?line:IO.options#line
+            let source = LexerLib.String (Buffer.contents pp_buffer) in
+            match LexerLib.open_token_stream
+                    ?line:IO.options#line
                    ?block:IO.options#block
+                    ~init:Lexer.init
+                    ~scan:Lexer.scan
+                    ~token_to_region:Lexer.Token.to_region
+                    ~style:Lexer.Token.check_right_context
                    source with
-              Ok Lexer.{read; buffer; close; _} ->
+              Ok LexerLib.{read; buffer; close; _} ->
                let close_all () = flush_all (); close () in
                let rec read_tokens tokens =
                  match read ~log:(fun _ _ -> ()) buffer with
@ -51,9 +56,7 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
                      if   Lexer.Token.is_eof token
                      then Stdlib.Ok (List.rev tokens)
                      else read_tokens (token::tokens)
-(*                  | exception _ ->
-                              Printf.eprintf "Here\n%!"; exit 1
- *)                  | exception Lexer.Token.Error error ->
+                  | exception Lexer.Token.Error error ->
                      let file =
                        match IO.options#input with
                          None | Some "-" -> false
@ -74,7 +77,7 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
                      in Stdlib.Error msg in
                let result = read_tokens []
                in close_all (); result
-            | Stdlib.Error (Lexer.File_opening msg) ->
+            | Stdlib.Error (LexerLib.File_opening msg) ->
                flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
      match IO.options#input with
        None -> preproc stdin
@ -115,7 +118,9 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
                           IO.options#mode
                           ?block:IO.options#block
                           ?line:IO.options#line
-                           (Lexer.String preproc_str)
+                           ~token_to_region:Lexer.Token.to_region
+                           ~style:Lexer.Token.check_right_context
+                           (LexerLib.String preproc_str)
                           IO.options#cmd
      in match IO.options#input with
           None -> preproc stdin
--- a/src/passes/1-parser/shared/LexerUnit.mli
+++ b/src/passes/1-parser/shared/LexerUnit.mli
@ -7,7 +7,7 @@ module type IO =
    val options : EvalOpt.options (* CLI options *)
  end

-module Make (IO: IO) (Lexer: LexerLib.S) :
+module Make (IO: IO) (Lexer: Lexer.S) :
  sig
    val scan  : unit -> (Lexer.token list, string Region.reg) Stdlib.result
    val trace : unit -> (unit, string Region.reg) Stdlib.result
--- a/src/passes/1-parser/shared/ParserAPI.ml
+++ b/src/passes/1-parser/shared/ParserAPI.ml
@ -56,8 +56,8 @@ module type PARSER =
 (* Main functor *)

 module Make (IO: IO)
-            (Lexer: LexerLib.S)
-            (Parser: PARSER with type token = Lexer.Token.token)
+            (Lexer: Lexer.S)
+            (Parser: PARSER with type token = Lexer.token)
            (ParErr: sig val message : int -> string end) =
  struct
    module I = Parser.MenhirInterpreter
@ -143,14 +143,14 @@ module Make (IO: IO)
                   ~offsets:IO.options#offsets
                   IO.options#mode IO.options#cmd stdout

-    let incr_contract Lexer.{read; buffer; get_win; close; _} =
+    let incr_contract LexerLib.{read; buffer; get_win; close; _} =
      let supplier  = I.lexer_lexbuf_to_supplier (read ~log) buffer
      and failure   = failure get_win in
      let parser    = Incr.contract buffer.Lexing.lex_curr_p in
      let ast       = I.loop_handle success failure supplier parser
      in flush_all (); close (); ast

-    let incr_expr Lexer.{read; buffer; get_win; close; _} =
+    let incr_expr LexerLib.{read; buffer; get_win; close; _} =
      let supplier   = I.lexer_lexbuf_to_supplier (read ~log) buffer
      and failure    = failure get_win in
      let parser     = Incr.interactive_expr buffer.Lexing.lex_curr_p in
--- a/src/passes/1-parser/shared/ParserAPI.mli
+++ b/src/passes/1-parser/shared/ParserAPI.mli
@ -55,8 +55,8 @@ module type PARSER =
  end

 module Make (IO: IO)
-            (Lexer: LexerLib.S)
-            (Parser: PARSER with type token = Lexer.Token.token)
+            (Lexer: Lexer.S)
+            (Parser: PARSER with type token = Lexer.token)
            (ParErr: sig val message : int -> string end) :
  sig
    (* WARNING: The following parsers may all raise [Lexer.Error] *)
@ -78,8 +78,8 @@ module Make (IO: IO)

    exception Point of error

-    val incr_contract : Lexer.instance -> Parser.ast
-    val incr_expr     : Lexer.instance -> Parser.expr
+    val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
+    val incr_expr     : Lexer.token LexerLib.instance -> Parser.expr

    val format_error :
      ?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
--- a/src/passes/1-parser/shared/ParserUnit.ml
+++ b/src/passes/1-parser/shared/ParserUnit.ml
@ -37,7 +37,7 @@ module type Printer =
    val print_expr   : state -> expr -> unit
  end

-module Make (Lexer: LexerLib.S)
+module Make (Lexer: Lexer.S)
            (AST: sig type t type expr end)
            (Parser: ParserAPI.PARSER
                     with type ast   = AST.t
@ -89,12 +89,12 @@ module Make (Lexer: LexerLib.S)
        ParserLog.mk_state ~offsets:SubIO.options#offsets
                           ~mode:SubIO.options#mode
                           ~buffer:output in
-      let close () = lexer_inst.Lexer.close () in
+      let close () = lexer_inst.LexerLib.close () in
      let expr =
        try
          if SubIO.options#mono then
-            let tokeniser = lexer_inst.Lexer.read ~log
-            and lexbuf = lexer_inst.Lexer.buffer
+            let tokeniser = lexer_inst.LexerLib.read ~log
+            and lexbuf = lexer_inst.LexerLib.buffer
            in Front.mono_expr tokeniser lexbuf
          else
            Front.incr_expr lexer_inst
@ -124,12 +124,12 @@ module Make (Lexer: LexerLib.S)
        ParserLog.mk_state ~offsets:SubIO.options#offsets
                           ~mode:SubIO.options#mode
                           ~buffer:output in
-      let close () = lexer_inst.Lexer.close () in
+      let close () = lexer_inst.LexerLib.close () in
      let ast =
        try
          if SubIO.options#mono then
-            let tokeniser = lexer_inst.Lexer.read ~log
-            and lexbuf = lexer_inst.Lexer.buffer
+            let tokeniser = lexer_inst.LexerLib.read ~log
+            and lexbuf = lexer_inst.LexerLib.buffer
            in Front.mono_contract tokeniser lexbuf
          else
            Front.incr_contract lexer_inst
@ -163,9 +163,17 @@ module Make (Lexer: LexerLib.S)

      | exception Lexer.Error err ->
          let file =
-            lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
-          let error =
-            Lexer.format_error ~offsets:SubIO.options#offsets
+            lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
+          let error = Lexer.format_error
+                        ~offsets:SubIO.options#offsets
+                        SubIO.options#mode err ~file:(file <> "")
+          in Stdlib.Error error
+
+      | exception Lexer.Token.Error err ->
+          let file =
+            lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
+          let error = Lexer.Token.format_error
+                        ~offsets:SubIO.options#offsets
                        SubIO.options#mode err ~file:(file <> "")
          in Stdlib.Error error

@ -181,7 +189,7 @@ module Make (Lexer: LexerLib.S)

      | exception Parser.Error ->
          let invalid, valid_opt =
-            match lexer_inst.Lexer.get_win () with
+            match lexer_inst.LexerLib.get_win () with
              LexerLib.Nil ->
                  assert false (* Safe: There is always at least EOF. *)
            | LexerLib.One invalid -> invalid, None
@ -205,8 +213,8 @@ module Make (Lexer: LexerLib.S)
    (* Parsing a contract *)

    let gen_parser options input parser =
-      match Lexer.lexbuf_from_input input with
-        Stdlib.Error (Lexer.File_opening msg) ->
+      match LexerLib.lexbuf_from_input input with
+        Stdlib.Error (LexerLib.File_opening msg) ->
          Stdlib.Error (Region.wrap_ghost msg)
      | Ok (lexbuf, close) ->
         (* Preprocessing the input source *)
@ -224,48 +232,53 @@ module Make (Lexer: LexerLib.S)
             (* Lexing and parsing the preprocessed input source *)

             let () = close () in
-             let input' = Lexer.String (Buffer.contents buffer) in
-             match Lexer.open_token_stream ?line:options#line
+             let input' = LexerLib.String (Buffer.contents buffer) in
+             match LexerLib.open_token_stream
+                     ~init:Lexer.init
+                     ~scan:Lexer.scan
+                     ~token_to_region:Lexer.Token.to_region
+                     ~style:Lexer.Token.check_right_context
+                     ?line:options#line
                     ?block:options#block
                     input'
             with
               Ok instance ->
                 let open Lexing in
-                 instance.Lexer.buffer.lex_curr_p <-
-                   {instance.Lexer.buffer.lex_curr_p with pos_fname=file};
+                 instance.LexerLib.buffer.lex_curr_p <-
+                   {instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
                 apply instance parser
-             | Stdlib.Error (Lexer.File_opening msg) ->
+             | Stdlib.Error (LexerLib.File_opening msg) ->
                 Stdlib.Error (Region.wrap_ghost msg)

    (* Parsing a contract in a file *)

    let contract_in_file (source : string) =
      let options = SubIO.make ~input:(Some source) ~expr:false
-      in gen_parser options (Lexer.File source) parse_contract
+      in gen_parser options (LexerLib.File source) parse_contract

    (* Parsing a contract in a string *)

    let contract_in_string (source : string) =
      let options = SubIO.make ~input:None ~expr:false in
-      gen_parser options (Lexer.String source) parse_contract
+      gen_parser options (LexerLib.String source) parse_contract

    (* Parsing a contract in stdin *)

    let contract_in_stdin () =
      let options = SubIO.make ~input:None ~expr:false in
-      gen_parser options (Lexer.Channel stdin) parse_contract
+      gen_parser options (LexerLib.Channel stdin) parse_contract

    (* Parsing an expression in a string *)

    let expr_in_string (source : string) =
      let options = SubIO.make ~input:None ~expr:true in
-      gen_parser options (Lexer.String source) parse_expr
+      gen_parser options (LexerLib.String source) parse_expr

    (* Parsing an expression in stdin *)

    let expr_in_stdin () =
      let options = SubIO.make ~input:None ~expr:true in
-      gen_parser options (Lexer.Channel stdin) parse_expr
+      gen_parser options (LexerLib.Channel stdin) parse_expr

    (* Preprocess only *)

--- a/src/passes/1-parser/shared/ParserUnit.mli
+++ b/src/passes/1-parser/shared/ParserUnit.mli
@ -43,7 +43,7 @@ module type Printer =

 (* Main functor to make the parser *)

-module Make (Lexer : LexerLib.S)
+module Make (Lexer : Lexer.S)
            (AST : sig type t type expr end)
            (Parser : ParserAPI.PARSER
                      with type ast   = AST.t
--- a/src/test/lexer/negative_byte_sequence.ligo
+++ b/src/test/lexer/negative_byte_sequence.ligo
@ -1 +1 @@
-const a: string = -0x222;
+const a: string = - (**) 0x2222
--- a/src/test/lexer/negative_byte_sequence.mligo
+++ b/src/test/lexer/negative_byte_sequence.mligo
@ -1 +1 @@
-let a = -0x222;
+let a = - (**) 0x2222
--- a/src/test/lexer/negative_byte_sequence.religo
+++ b/src/test/lexer/negative_byte_sequence.religo
@ -1 +1 @@
-let a = -0x222;
+let a = - /**/ 0x2222;
--- a/src/test/lexer/reserved_name.ligo
+++ b/src/test/lexer/reserved_name.ligo
@ -1 +0,0 @@
-let arguments = 1;