Finished refactoring of lexer.
This commit is contained in:
parent
9618a48848
commit
005b7fd69b
@ -54,7 +54,7 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23:
|
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
|
||||||
Negative byte sequence.
|
Negative byte sequence.
|
||||||
Hint: Remove the leading minus sign.
|
Hint: Remove the leading minus sign.
|
||||||
{}
|
{}
|
||||||
@ -71,7 +71,7 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13:
|
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
|
||||||
Negative byte sequence.
|
Negative byte sequence.
|
||||||
Hint: Remove the leading minus sign.
|
Hint: Remove the leading minus sign.
|
||||||
{}
|
{}
|
||||||
@ -88,7 +88,7 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13:
|
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
|
||||||
Negative byte sequence.
|
Negative byte sequence.
|
||||||
Hint: Remove the leading minus sign.
|
Hint: Remove the leading minus sign.
|
||||||
{}
|
{}
|
||||||
@ -103,6 +103,7 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
|
|||||||
* Check the changelog by running 'ligo changelog'
|
* Check the changelog by running 'ligo changelog'
|
||||||
|} ];
|
|} ];
|
||||||
|
|
||||||
|
(*
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
||||||
@ -119,6 +120,7 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
|||||||
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
|
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
|
||||||
* Check the changelog by running 'ligo changelog'
|
* Check the changelog by running 'ligo changelog'
|
||||||
|} ];
|
|} ];
|
||||||
|
*)
|
||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
@ -204,10 +206,9 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2:
|
ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
|
||||||
Unterminated comment.
|
Unterminated comment.
|
||||||
Hint: Close with "*)".
|
Hint: Close with "*)". {}
|
||||||
{}
|
|
||||||
|
|
||||||
|
|
||||||
If you're not sure how to fix this error, you can
|
If you're not sure how to fix this error, you can
|
||||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
|
|
||||||
../shared/Lexer.mli
|
../shared/Lexer.mli
|
||||||
../shared/Lexer.mll
|
../shared/Lexer.mll
|
||||||
|
../shared/LexerLib.ml
|
||||||
../shared/EvalOpt.ml
|
../shared/EvalOpt.ml
|
||||||
../shared/EvalOpt.mli
|
../shared/EvalOpt.mli
|
||||||
../shared/FQueue.ml
|
../shared/FQueue.ml
|
||||||
|
@ -102,10 +102,35 @@ type t =
|
|||||||
|
|
||||||
| EOF of Region.t (* End of file *)
|
| EOF of Region.t (* End of file *)
|
||||||
|
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
type token = t
|
type token = t
|
||||||
|
|
||||||
let proj_token = function
|
let proj_token = function
|
||||||
ARROW region -> region, "ARROW"
|
(* Literals *)
|
||||||
|
|
||||||
|
String Region.{region; value} ->
|
||||||
|
region, sprintf "Str %s" value
|
||||||
|
| Bytes Region.{region; value = s,b} ->
|
||||||
|
region,
|
||||||
|
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||||
|
| Int Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Nat Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Mutez Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Ident Region.{region; value} ->
|
||||||
|
region, sprintf "Ident %s" value
|
||||||
|
| Constr Region.{region; value} ->
|
||||||
|
region, sprintf "Constr %s" value
|
||||||
|
| Attr Region.{region; value} ->
|
||||||
|
region, sprintf "Attr \"%s\"" value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| ARROW region -> region, "ARROW"
|
||||||
| CONS region -> region, "CONS"
|
| CONS region -> region, "CONS"
|
||||||
| CAT region -> region, "CAT"
|
| CAT region -> region, "CAT"
|
||||||
| MINUS region -> region, "MINUS"
|
| MINUS region -> region, "MINUS"
|
||||||
@ -132,24 +157,9 @@ let proj_token = function
|
|||||||
| GE region -> region, "GE"
|
| GE region -> region, "GE"
|
||||||
| BOOL_OR region -> region, "BOOL_OR"
|
| BOOL_OR region -> region, "BOOL_OR"
|
||||||
| BOOL_AND region -> region, "BOOL_AND"
|
| BOOL_AND region -> region, "BOOL_AND"
|
||||||
| Ident Region.{region; value} ->
|
|
||||||
region, sprintf "Ident %s" value
|
(* Keywords *)
|
||||||
| Constr Region.{region; value} ->
|
|
||||||
region, sprintf "Constr %s" value
|
|
||||||
| Int Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Nat Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Mutez Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| String Region.{region; value} ->
|
|
||||||
region, sprintf "Str %s" value
|
|
||||||
| Bytes Region.{region; value = s,b} ->
|
|
||||||
region,
|
|
||||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
|
||||||
s (Hex.show b)
|
|
||||||
| Attr Region.{region; value} ->
|
|
||||||
region, sprintf "Attr \"%s\"" value
|
|
||||||
| Begin region -> region, "Begin"
|
| Begin region -> region, "Begin"
|
||||||
| Else region -> region, "Else"
|
| Else region -> region, "Else"
|
||||||
| End region -> region, "End"
|
| End region -> region, "End"
|
||||||
@ -168,12 +178,32 @@ let proj_token = function
|
|||||||
| True region -> region, "True"
|
| True region -> region, "True"
|
||||||
| Type region -> region, "Type"
|
| Type region -> region, "Type"
|
||||||
| With region -> region, "With"
|
| With region -> region, "With"
|
||||||
|
|
||||||
|
(* Data *)
|
||||||
|
|
||||||
| C_None region -> region, "C_None"
|
| C_None region -> region, "C_None"
|
||||||
| C_Some region -> region, "C_Some"
|
| C_Some region -> region, "C_Some"
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
| EOF region -> region, "EOF"
|
| EOF region -> region, "EOF"
|
||||||
|
|
||||||
|
|
||||||
let to_lexeme = function
|
let to_lexeme = function
|
||||||
ARROW _ -> "->"
|
(* Literals *)
|
||||||
|
|
||||||
|
String s -> String.escaped s.Region.value
|
||||||
|
| Bytes b -> fst b.Region.value
|
||||||
|
| Int i
|
||||||
|
| Nat i
|
||||||
|
| Mutez i -> fst i.Region.value
|
||||||
|
| Ident id -> id.Region.value
|
||||||
|
| Constr id -> id.Region.value
|
||||||
|
| Attr a -> a.Region.value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| ARROW _ -> "->"
|
||||||
| CONS _ -> "::"
|
| CONS _ -> "::"
|
||||||
| CAT _ -> "^"
|
| CAT _ -> "^"
|
||||||
| MINUS _ -> "-"
|
| MINUS _ -> "-"
|
||||||
@ -201,14 +231,7 @@ let to_lexeme = function
|
|||||||
| BOOL_OR _ -> "||"
|
| BOOL_OR _ -> "||"
|
||||||
| BOOL_AND _ -> "&&"
|
| BOOL_AND _ -> "&&"
|
||||||
|
|
||||||
| Ident id -> id.Region.value
|
(* Keywords *)
|
||||||
| Constr id -> id.Region.value
|
|
||||||
| Int i
|
|
||||||
| Nat i
|
|
||||||
| Mutez i -> fst i.Region.value
|
|
||||||
| String s -> String.escaped s.Region.value
|
|
||||||
| Bytes b -> fst b.Region.value
|
|
||||||
| Attr a -> a.Region.value
|
|
||||||
|
|
||||||
| Begin _ -> "begin"
|
| Begin _ -> "begin"
|
||||||
| Else _ -> "else"
|
| Else _ -> "else"
|
||||||
@ -229,11 +252,17 @@ let to_lexeme = function
|
|||||||
| Then _ -> "then"
|
| Then _ -> "then"
|
||||||
| With _ -> "with"
|
| With _ -> "with"
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None _ -> "None"
|
| C_None _ -> "None"
|
||||||
| C_Some _ -> "Some"
|
| C_Some _ -> "Some"
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
| EOF _ -> ""
|
| EOF _ -> ""
|
||||||
|
|
||||||
|
(* CONVERSIONS *)
|
||||||
|
|
||||||
let to_string token ?(offsets=true) mode =
|
let to_string token ?(offsets=true) mode =
|
||||||
let region, val_str = proj_token token in
|
let region, val_str = proj_token token in
|
||||||
let reg_str = region#compact ~offsets mode
|
let reg_str = region#compact ~offsets mode
|
||||||
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =
|
|||||||
|
|
||||||
let to_region token = proj_token token |> fst
|
let to_region token = proj_token token |> fst
|
||||||
|
|
||||||
(* Injections *)
|
|
||||||
|
|
||||||
type int_err = Non_canonical_zero
|
|
||||||
|
|
||||||
(* LEXIS *)
|
(* LEXIS *)
|
||||||
|
|
||||||
let keywords = [
|
let keywords = [
|
||||||
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
|
|||||||
let value = lexeme, `Hex norm
|
let value = lexeme, `Hex norm
|
||||||
in Bytes Region.{region; value}
|
in Bytes Region.{region; value}
|
||||||
|
|
||||||
|
type int_err = Non_canonical_zero
|
||||||
|
|
||||||
let mk_int lexeme region =
|
let mk_int lexeme region =
|
||||||
let z =
|
let z =
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
|
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
|
||||||
@ -398,16 +425,14 @@ type nat_err =
|
|||||||
|
|
||||||
let mk_nat lexeme region =
|
let mk_nat lexeme region =
|
||||||
match (String.index_opt lexeme 'n') with
|
match (String.index_opt lexeme 'n') with
|
||||||
| None -> Error Invalid_natural
|
None -> Error Invalid_natural
|
||||||
| Some _ -> (
|
| Some _ -> let z =
|
||||||
let z =
|
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "n") "") |>
|
Str.(global_replace (regexp "n") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0n"
|
if Z.equal z Z.zero && lexeme <> "0n"
|
||||||
then Error Non_canonical_zero_nat
|
then Error Non_canonical_zero_nat
|
||||||
else Ok (Nat Region.{region; value = lexeme,z})
|
else Ok (Nat Region.{region; value = lexeme,z})
|
||||||
)
|
|
||||||
|
|
||||||
let mk_mutez lexeme region =
|
let mk_mutez lexeme region =
|
||||||
let z =
|
let z =
|
||||||
@ -422,8 +447,6 @@ let eof region = EOF region
|
|||||||
|
|
||||||
type sym_err = Invalid_symbol
|
type sym_err = Invalid_symbol
|
||||||
|
|
||||||
type attr_err = Invalid_attribute
|
|
||||||
|
|
||||||
let mk_sym lexeme region =
|
let mk_sym lexeme region =
|
||||||
match lexeme with
|
match lexeme with
|
||||||
(* Lexemes in common with all concrete syntaxes *)
|
(* Lexemes in common with all concrete syntaxes *)
|
||||||
@ -473,9 +496,10 @@ let mk_constr lexeme region =
|
|||||||
|
|
||||||
(* Attributes *)
|
(* Attributes *)
|
||||||
|
|
||||||
|
type attr_err = Invalid_attribute
|
||||||
|
|
||||||
let mk_attr header lexeme region =
|
let mk_attr header lexeme region =
|
||||||
if header = "[@" then
|
if header = "[@" then Error Invalid_attribute
|
||||||
Error Invalid_attribute
|
|
||||||
else Ok (Attr Region.{value=lexeme; region})
|
else Ok (Attr Region.{value=lexeme; region})
|
||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
@ -485,12 +509,14 @@ let is_bytes = function Bytes _ -> true | _ -> false
|
|||||||
let is_int = function Int _ -> true | _ -> false
|
let is_int = function Int _ -> true | _ -> false
|
||||||
let is_ident = function Ident _ -> true | _ -> false
|
let is_ident = function Ident _ -> true | _ -> false
|
||||||
let is_eof = function EOF _ -> true | _ -> false
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
let is_minus = function MINUS _ -> true | _ -> false
|
||||||
|
|
||||||
(* Errors *)
|
(* Errors *)
|
||||||
|
|
||||||
type error =
|
type error =
|
||||||
Odd_lengthed_bytes
|
Odd_lengthed_bytes
|
||||||
| Missing_break
|
| Missing_break
|
||||||
|
| Negative_byte_sequence
|
||||||
|
|
||||||
let error_to_string = function
|
let error_to_string = function
|
||||||
Odd_lengthed_bytes ->
|
Odd_lengthed_bytes ->
|
||||||
@ -499,6 +525,9 @@ let error_to_string = function
|
|||||||
| Missing_break ->
|
| Missing_break ->
|
||||||
"Missing break.\n\
|
"Missing break.\n\
|
||||||
Hint: Insert some space."
|
Hint: Insert some space."
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign."
|
||||||
|
|
||||||
exception Error of error Region.reg
|
exception Error of error Region.reg
|
||||||
|
|
||||||
@ -511,29 +540,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
|||||||
let fail region value = raise (Error Region.{region; value})
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
let check_right_context token next_token buffer : unit =
|
let check_right_context token next_token buffer : unit =
|
||||||
if not (is_eof token) then
|
|
||||||
if is_int token || is_bytes token then
|
|
||||||
match next_token buffer with
|
|
||||||
Some ([], next) ->
|
|
||||||
let pos = (to_region token)#stop in
|
let pos = (to_region token)#stop in
|
||||||
let region = Region.make ~start:pos ~stop:pos in
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
if is_int next then
|
|
||||||
fail region Odd_lengthed_bytes
|
|
||||||
else
|
|
||||||
if is_ident next || is_string next || is_bytes next then
|
|
||||||
fail region Missing_break
|
|
||||||
| Some (_::_, _) | None -> ()
|
|
||||||
else
|
|
||||||
if is_ident token || is_string token then
|
|
||||||
match next_token buffer with
|
match next_token buffer with
|
||||||
Some ([], next) ->
|
None -> ()
|
||||||
if is_ident next || is_string next
|
| Some (markup, next) ->
|
||||||
|| is_bytes next || is_int next
|
if is_minus token && is_bytes next
|
||||||
then
|
then let region =
|
||||||
let pos = (to_region token)#stop in
|
Region.cover (to_region token) (to_region next)
|
||||||
let region = Region.make ~start:pos ~stop:pos
|
in fail region Negative_byte_sequence
|
||||||
in fail region Missing_break
|
else
|
||||||
| Some (_::_, _) | None -> ()
|
match markup with
|
||||||
|
[] ->
|
||||||
|
if is_int token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_string token
|
||||||
|
then if is_int next || is_bytes next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_bytes token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else if is_int next
|
||||||
|
then fail region Odd_lengthed_bytes
|
||||||
|
else ()
|
||||||
|
else ()
|
||||||
|
| _::_ -> ()
|
||||||
|
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
|
|
||||||
../shared/Lexer.mli
|
../shared/Lexer.mli
|
||||||
../shared/Lexer.mll
|
../shared/Lexer.mll
|
||||||
|
../shared/LexerLib.mli
|
||||||
../shared/LexerLib.ml
|
../shared/LexerLib.ml
|
||||||
../shared/EvalOpt.ml
|
../shared/EvalOpt.ml
|
||||||
../shared/EvalOpt.mli
|
../shared/EvalOpt.mli
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
(** This signature defines the lexical tokens for LIGO
|
(* This signature defines the lexical tokens for LIGO
|
||||||
|
|
||||||
_Tokens_ are the abstract units which are used by the parser to
|
_Tokens_ are the abstract units which are used by the parser to
|
||||||
build the abstract syntax tree (AST), in other words, the stream of
|
build the abstract syntax tree (AST), in other words, the stream of
|
||||||
|
@ -5,24 +5,14 @@
|
|||||||
|
|
||||||
(* Shorthands *)
|
(* Shorthands *)
|
||||||
|
|
||||||
type lexeme = string
|
|
||||||
|
|
||||||
let sprintf = Printf.sprintf
|
|
||||||
|
|
||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
module SMap = Map.Make (String)
|
module SMap = Map.Make (String)
|
||||||
module SSet = Set.Make (String)
|
module SSet = Set.Make (String)
|
||||||
|
|
||||||
(* Hack to roll back one lexeme in the current semantic action *)
|
type lexeme = string
|
||||||
(*
|
|
||||||
let rollback buffer =
|
let sprintf = Printf.sprintf
|
||||||
let open Lexing in
|
|
||||||
let len = String.length (lexeme buffer) in
|
|
||||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
|
||||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
|
||||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
@ -123,6 +113,8 @@ type t =
|
|||||||
| EOF of Region.t
|
| EOF of Region.t
|
||||||
|
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
type token = t
|
type token = t
|
||||||
|
|
||||||
let proj_token = function
|
let proj_token = function
|
||||||
@ -130,32 +122,20 @@ let proj_token = function
|
|||||||
|
|
||||||
String Region.{region; value} ->
|
String Region.{region; value} ->
|
||||||
region, sprintf "String %s" value
|
region, sprintf "String %s" value
|
||||||
|
|
||||||
| Bytes Region.{region; value = s,b} ->
|
| Bytes Region.{region; value = s,b} ->
|
||||||
region,
|
region,
|
||||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||||
s (Hex.show b)
|
|
||||||
|
|
||||||
| Int Region.{region; value = s,n} ->
|
| Int Region.{region; value = s,n} ->
|
||||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
| Nat Region.{region; value = s,n} ->
|
| Nat Region.{region; value = s,n} ->
|
||||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
| Mutez Region.{region; value = s,n} ->
|
| Mutez Region.{region; value = s,n} ->
|
||||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
| Ident Region.{region; value} ->
|
| Ident Region.{region; value} ->
|
||||||
region, sprintf "Ident \"%s\"" value
|
region, sprintf "Ident \"%s\"" value
|
||||||
|
|
||||||
| Constr Region.{region; value} ->
|
| Constr Region.{region; value} ->
|
||||||
region, sprintf "Constr \"%s\"" value
|
region, sprintf "Constr \"%s\"" value
|
||||||
|
|
||||||
(*
|
|
||||||
| Attr {header; string={region; value}} ->
|
|
||||||
region, sprintf "Attr (\"%s\",\"%s\")" header value
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* Symbols *)
|
(* Symbols *)
|
||||||
|
|
||||||
| SEMI region -> region, "SEMI"
|
| SEMI region -> region, "SEMI"
|
||||||
@ -382,9 +362,7 @@ let keywords = [
|
|||||||
(fun reg -> With reg)
|
(fun reg -> With reg)
|
||||||
]
|
]
|
||||||
|
|
||||||
let reserved =
|
let reserved = SSet.empty
|
||||||
let open SSet in
|
|
||||||
empty |> add "arguments"
|
|
||||||
|
|
||||||
let constructors = [
|
let constructors = [
|
||||||
(fun reg -> False reg);
|
(fun reg -> False reg);
|
||||||
@ -485,8 +463,7 @@ type nat_err =
|
|||||||
let mk_nat lexeme region =
|
let mk_nat lexeme region =
|
||||||
match String.index_opt lexeme 'n' with
|
match String.index_opt lexeme 'n' with
|
||||||
None -> Error Invalid_natural
|
None -> Error Invalid_natural
|
||||||
| Some _ ->
|
| Some _ -> let z =
|
||||||
let z =
|
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "n") "") |>
|
Str.(global_replace (regexp "n") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
@ -495,8 +472,7 @@ let mk_nat lexeme region =
|
|||||||
else Ok (Nat Region.{region; value = lexeme,z})
|
else Ok (Nat Region.{region; value = lexeme,z})
|
||||||
|
|
||||||
let mk_mutez lexeme region =
|
let mk_mutez lexeme region =
|
||||||
let z =
|
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
|
||||||
Str.(global_replace (regexp "mutez") "") |>
|
Str.(global_replace (regexp "mutez") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||||
@ -557,8 +533,7 @@ let mk_constr lexeme region =
|
|||||||
|
|
||||||
type attr_err = Invalid_attribute
|
type attr_err = Invalid_attribute
|
||||||
|
|
||||||
let mk_attr _header _string _region =
|
let mk_attr _ _ _ = Error Invalid_attribute
|
||||||
Error Invalid_attribute
|
|
||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
@ -567,12 +542,14 @@ let is_bytes = function Bytes _ -> true | _ -> false
|
|||||||
let is_int = function Int _ -> true | _ -> false
|
let is_int = function Int _ -> true | _ -> false
|
||||||
let is_ident = function Ident _ -> true | _ -> false
|
let is_ident = function Ident _ -> true | _ -> false
|
||||||
let is_eof = function EOF _ -> true | _ -> false
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
let is_minus = function MINUS _ -> true | _ -> false
|
||||||
|
|
||||||
(* Errors *)
|
(* Errors *)
|
||||||
|
|
||||||
type error =
|
type error =
|
||||||
Odd_lengthed_bytes
|
Odd_lengthed_bytes
|
||||||
| Missing_break
|
| Missing_break
|
||||||
|
| Negative_byte_sequence
|
||||||
|
|
||||||
let error_to_string = function
|
let error_to_string = function
|
||||||
Odd_lengthed_bytes ->
|
Odd_lengthed_bytes ->
|
||||||
@ -581,6 +558,9 @@ let error_to_string = function
|
|||||||
| Missing_break ->
|
| Missing_break ->
|
||||||
"Missing break.\n\
|
"Missing break.\n\
|
||||||
Hint: Insert some space."
|
Hint: Insert some space."
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign."
|
||||||
|
|
||||||
exception Error of error Region.reg
|
exception Error of error Region.reg
|
||||||
|
|
||||||
@ -593,29 +573,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
|||||||
let fail region value = raise (Error Region.{region; value})
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
let check_right_context token next_token buffer : unit =
|
let check_right_context token next_token buffer : unit =
|
||||||
if not (is_eof token) then
|
|
||||||
if is_int token || is_bytes token then
|
|
||||||
match next_token buffer with
|
|
||||||
Some ([], next) ->
|
|
||||||
let pos = (to_region token)#stop in
|
let pos = (to_region token)#stop in
|
||||||
let region = Region.make ~start:pos ~stop:pos in
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
if is_int next then
|
|
||||||
fail region Odd_lengthed_bytes
|
|
||||||
else
|
|
||||||
if is_ident next || is_string next || is_bytes next then
|
|
||||||
fail region Missing_break
|
|
||||||
| Some (_::_, _) | None -> ()
|
|
||||||
else
|
|
||||||
if is_ident token || is_string token then
|
|
||||||
match next_token buffer with
|
match next_token buffer with
|
||||||
Some ([], next) ->
|
None -> ()
|
||||||
if is_ident next || is_string next
|
| Some (markup, next) ->
|
||||||
|| is_bytes next || is_int next
|
if is_minus token && is_bytes next
|
||||||
then
|
then let region =
|
||||||
let pos = (to_region token)#stop in
|
Region.cover (to_region token) (to_region next)
|
||||||
let region = Region.make ~start:pos ~stop:pos
|
in fail region Negative_byte_sequence
|
||||||
in fail region Missing_break
|
else
|
||||||
| Some (_::_, _) | None -> ()
|
match markup with
|
||||||
|
[] ->
|
||||||
|
if is_int token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_string token
|
||||||
|
then if is_int next || is_bytes next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_bytes token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else if is_int next
|
||||||
|
then fail region Odd_lengthed_bytes
|
||||||
|
else ()
|
||||||
|
else ()
|
||||||
|
| _::_ -> ()
|
||||||
|
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
|
|
||||||
../shared/Lexer.mli
|
../shared/Lexer.mli
|
||||||
../shared/Lexer.mll
|
../shared/Lexer.mll
|
||||||
|
../shared/LexerLib.ml
|
||||||
../shared/EvalOpt.ml
|
../shared/EvalOpt.ml
|
||||||
../shared/EvalOpt.mli
|
../shared/EvalOpt.mli
|
||||||
../shared/FQueue.ml
|
../shared/FQueue.ml
|
||||||
|
@ -57,6 +57,7 @@ type t =
|
|||||||
| COLON of Region.t (* ":" *)
|
| COLON of Region.t (* ":" *)
|
||||||
| DOT of Region.t (* "." *)
|
| DOT of Region.t (* "." *)
|
||||||
| ELLIPSIS of Region.t (* "..." *)
|
| ELLIPSIS of Region.t (* "..." *)
|
||||||
|
| ARROW of Region.t (* "=>" *)
|
||||||
|
|
||||||
(* Wildcard *)
|
(* Wildcard *)
|
||||||
|
|
||||||
@ -69,10 +70,10 @@ type t =
|
|||||||
| NE of Region.t (* "!=" *)
|
| NE of Region.t (* "!=" *)
|
||||||
| LT of Region.t (* "<" *)
|
| LT of Region.t (* "<" *)
|
||||||
| GT of Region.t (* ">" *)
|
| GT of Region.t (* ">" *)
|
||||||
| LE of Region.t (* "=<" *)
|
| LE of Region.t (* "<=" *)
|
||||||
| GE of Region.t (* ">=" *)
|
| GE of Region.t (* ">=" *)
|
||||||
|
|
||||||
| ARROW of Region.t (* "=>" *)
|
(* Logic *)
|
||||||
|
|
||||||
| BOOL_OR of Region.t (* "||" *)
|
| BOOL_OR of Region.t (* "||" *)
|
||||||
| BOOL_AND of Region.t (* "&&" *)
|
| BOOL_AND of Region.t (* "&&" *)
|
||||||
@ -95,10 +96,10 @@ type t =
|
|||||||
| False of Region.t
|
| False of Region.t
|
||||||
| If of Region.t
|
| If of Region.t
|
||||||
| Let of Region.t
|
| Let of Region.t
|
||||||
| Rec of Region.t
|
|
||||||
| Switch of Region.t
|
|
||||||
| Mod of Region.t
|
| Mod of Region.t
|
||||||
| Or of Region.t
|
| Or of Region.t
|
||||||
|
| Rec of Region.t
|
||||||
|
| Switch of Region.t
|
||||||
| True of Region.t
|
| True of Region.t
|
||||||
| Type of Region.t
|
| Type of Region.t
|
||||||
|
|
||||||
@ -111,8 +112,6 @@ type t =
|
|||||||
|
|
||||||
| EOF of Region.t (* End of file *)
|
| EOF of Region.t (* End of file *)
|
||||||
|
|
||||||
type token = t
|
|
||||||
|
|
||||||
(* Projections
|
(* Projections
|
||||||
|
|
||||||
The difference between extracting the lexeme and a string from a
|
The difference between extracting the lexeme and a string from a
|
||||||
@ -121,6 +120,8 @@ type token = t
|
|||||||
lexeme (concrete syntax).
|
lexeme (concrete syntax).
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
type token = t
|
||||||
|
|
||||||
val to_lexeme : token -> lexeme
|
val to_lexeme : token -> lexeme
|
||||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
val to_region : token -> Region.t
|
val to_region : token -> Region.t
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
{
|
{
|
||||||
(* START OF HEADER *)
|
(* START OF HEADER *)
|
||||||
|
|
||||||
type lexeme = string
|
(* Shorthands *)
|
||||||
|
|
||||||
let sprintf = Printf.sprintf
|
|
||||||
|
|
||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
module SMap = Utils.String.Map
|
module SMap = Utils.String.Map
|
||||||
module SSet = Utils.String.Set
|
module SSet = Utils.String.Set
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
type t =
|
type t =
|
||||||
@ -41,6 +43,7 @@ type t =
|
|||||||
| COLON of Region.t (* ":" *)
|
| COLON of Region.t (* ":" *)
|
||||||
| DOT of Region.t (* "." *)
|
| DOT of Region.t (* "." *)
|
||||||
| ELLIPSIS of Region.t (* "..." *)
|
| ELLIPSIS of Region.t (* "..." *)
|
||||||
|
| ARROW of Region.t (* "=>" *)
|
||||||
|
|
||||||
(* Wildcard *)
|
(* Wildcard *)
|
||||||
|
|
||||||
@ -55,11 +58,11 @@ type t =
|
|||||||
| GT of Region.t (* ">" *)
|
| GT of Region.t (* ">" *)
|
||||||
| LE of Region.t (* "<=" *)
|
| LE of Region.t (* "<=" *)
|
||||||
| GE of Region.t (* ">=" *)
|
| GE of Region.t (* ">=" *)
|
||||||
| ARROW of Region.t (* "=>" *)
|
|
||||||
|
(* Logic *)
|
||||||
|
|
||||||
| BOOL_OR of Region.t (* "||" *)
|
| BOOL_OR of Region.t (* "||" *)
|
||||||
| BOOL_AND of Region.t (* "&&" *)
|
| BOOL_AND of Region.t (* "&&" *)
|
||||||
|
|
||||||
| NOT of Region.t (* ! *)
|
| NOT of Region.t (* ! *)
|
||||||
|
|
||||||
(* Identifiers, labels, numbers and strings *)
|
(* Identifiers, labels, numbers and strings *)
|
||||||
@ -75,17 +78,17 @@ type t =
|
|||||||
|
|
||||||
(* Keywords *)
|
(* Keywords *)
|
||||||
|
|
||||||
(*| And*)
|
|
||||||
| Else of Region.t
|
| Else of Region.t
|
||||||
| False of Region.t
|
| False of Region.t
|
||||||
| If of Region.t
|
| If of Region.t
|
||||||
| Let of Region.t
|
| Let of Region.t
|
||||||
| Rec of Region.t
|
|
||||||
| Switch of Region.t
|
|
||||||
| Mod of Region.t
|
| Mod of Region.t
|
||||||
| Or of Region.t
|
| Or of Region.t
|
||||||
|
| Rec of Region.t
|
||||||
|
| Switch of Region.t
|
||||||
| True of Region.t
|
| True of Region.t
|
||||||
| Type of Region.t
|
| Type of Region.t
|
||||||
|
|
||||||
(* Data constructors *)
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None of Region.t (* "None" *)
|
| C_None of Region.t (* "None" *)
|
||||||
@ -96,10 +99,32 @@ type t =
|
|||||||
| EOF of Region.t (* End of file *)
|
| EOF of Region.t (* End of file *)
|
||||||
|
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
type token = t
|
type token = t
|
||||||
|
|
||||||
let proj_token = function
|
let proj_token = function
|
||||||
CAT region -> region, "CAT"
|
(* Literals *)
|
||||||
|
|
||||||
|
String Region.{region; value} ->
|
||||||
|
region, sprintf "String %s" value
|
||||||
|
| Bytes Region.{region; value = s,b} ->
|
||||||
|
region,
|
||||||
|
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||||
|
| Int Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Nat Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Mutez Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Ident Region.{region; value} ->
|
||||||
|
region, sprintf "Ident %s" value
|
||||||
|
| Constr Region.{region; value} ->
|
||||||
|
region, sprintf "Constr %s" value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| CAT region -> region, "CAT"
|
||||||
| MINUS region -> region, "MINUS"
|
| MINUS region -> region, "MINUS"
|
||||||
| PLUS region -> region, "PLUS"
|
| PLUS region -> region, "PLUS"
|
||||||
| SLASH region -> region, "SLASH"
|
| SLASH region -> region, "SLASH"
|
||||||
@ -125,24 +150,9 @@ let proj_token = function
|
|||||||
| LE region -> region, "LE"
|
| LE region -> region, "LE"
|
||||||
| GE region -> region, "GE"
|
| GE region -> region, "GE"
|
||||||
| ARROW region -> region, "ARROW"
|
| ARROW region -> region, "ARROW"
|
||||||
|
| NOT region -> region, "NOT"
|
||||||
| BOOL_OR region -> region, "BOOL_OR"
|
| BOOL_OR region -> region, "BOOL_OR"
|
||||||
| BOOL_AND region -> region, "BOOL_AND"
|
| BOOL_AND region -> region, "BOOL_AND"
|
||||||
| Ident Region.{region; value} ->
|
|
||||||
region, sprintf "Ident %s" value
|
|
||||||
| Constr Region.{region; value} ->
|
|
||||||
region, sprintf "Constr %s" value
|
|
||||||
| Int Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Nat Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Mutez Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| String Region.{region; value} ->
|
|
||||||
region, sprintf "String %s" value
|
|
||||||
| Bytes Region.{region; value = s,b} ->
|
|
||||||
region,
|
|
||||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
|
||||||
s (Hex.show b)
|
|
||||||
| Else region -> region, "Else"
|
| Else region -> region, "Else"
|
||||||
| False region -> region, "False"
|
| False region -> region, "False"
|
||||||
| If region -> region, "If"
|
| If region -> region, "If"
|
||||||
@ -150,7 +160,6 @@ let proj_token = function
|
|||||||
| Rec region -> region, "Rec"
|
| Rec region -> region, "Rec"
|
||||||
| Switch region -> region, "Switch"
|
| Switch region -> region, "Switch"
|
||||||
| Mod region -> region, "Mod"
|
| Mod region -> region, "Mod"
|
||||||
| NOT region -> region, "!"
|
|
||||||
| Or region -> region, "Or"
|
| Or region -> region, "Or"
|
||||||
| True region -> region, "True"
|
| True region -> region, "True"
|
||||||
| Type region -> region, "Type"
|
| Type region -> region, "Type"
|
||||||
@ -160,7 +169,20 @@ let proj_token = function
|
|||||||
| EOF region -> region, "EOF"
|
| EOF region -> region, "EOF"
|
||||||
|
|
||||||
let to_lexeme = function
|
let to_lexeme = function
|
||||||
CAT _ -> "++"
|
(* Literals *)
|
||||||
|
|
||||||
|
String s -> s.Region.value
|
||||||
|
| Bytes b -> fst b.Region.value
|
||||||
|
| Int i
|
||||||
|
| Nat i
|
||||||
|
| Mutez i -> fst i.Region.value
|
||||||
|
| Ident id -> id.Region.value
|
||||||
|
| Constr id -> id.Region.value
|
||||||
|
| Attr a -> a.Region.value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| CAT _ -> "++"
|
||||||
| MINUS _ -> "-"
|
| MINUS _ -> "-"
|
||||||
| PLUS _ -> "+"
|
| PLUS _ -> "+"
|
||||||
| SLASH _ -> "/"
|
| SLASH _ -> "/"
|
||||||
@ -188,29 +210,32 @@ let to_lexeme = function
|
|||||||
| ARROW _ -> "=>"
|
| ARROW _ -> "=>"
|
||||||
| BOOL_OR _ -> "||"
|
| BOOL_OR _ -> "||"
|
||||||
| BOOL_AND _ -> "&&"
|
| BOOL_AND _ -> "&&"
|
||||||
| Ident id -> id.Region.value
|
| NOT _ -> "!"
|
||||||
| Constr id -> id.Region.value
|
|
||||||
| Int i
|
(* Keywords *)
|
||||||
| Nat i
|
|
||||||
| Mutez i -> fst i.Region.value
|
|
||||||
| String s -> s.Region.value
|
|
||||||
| Bytes b -> fst b.Region.value
|
|
||||||
| Else _ -> "else"
|
| Else _ -> "else"
|
||||||
| False _ -> "false"
|
| False _ -> "false"
|
||||||
| If _ -> "if"
|
| If _ -> "if"
|
||||||
| Let _ -> "let"
|
| Let _ -> "let"
|
||||||
| Rec _ -> "rec"
|
|
||||||
| Mod _ -> "mod"
|
| Mod _ -> "mod"
|
||||||
| NOT _ -> "!"
|
|
||||||
| Or _ -> "or"
|
| Or _ -> "or"
|
||||||
|
| Rec _ -> "rec"
|
||||||
| Switch _ -> "switch"
|
| Switch _ -> "switch"
|
||||||
| True _ -> "true"
|
| True _ -> "true"
|
||||||
| Type _ -> "type"
|
| Type _ -> "type"
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None _ -> "None"
|
| C_None _ -> "None"
|
||||||
| C_Some _ -> "Some"
|
| C_Some _ -> "Some"
|
||||||
| Attr a -> a.Region.value
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
| EOF _ -> ""
|
| EOF _ -> ""
|
||||||
|
|
||||||
|
(* CONVERSIONS *)
|
||||||
|
|
||||||
let to_string token ?(offsets=true) mode =
|
let to_string token ?(offsets=true) mode =
|
||||||
let region, val_str = proj_token token in
|
let region, val_str = proj_token token in
|
||||||
let reg_str = region#compact ~offsets mode
|
let reg_str = region#compact ~offsets mode
|
||||||
@ -261,12 +286,9 @@ let reserved =
|
|||||||
|> add "functor"
|
|> add "functor"
|
||||||
|> add "inherit"
|
|> add "inherit"
|
||||||
|> add "initializer"
|
|> add "initializer"
|
||||||
(* |> add "land" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
|
||||||
|> add "lazy"
|
|> add "lazy"
|
||||||
(* |> add "lor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
|
||||||
|> add "lsl"
|
|> add "lsl"
|
||||||
|> add "lsr"
|
|> add "lsr"
|
||||||
(* |> add "lxor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
|
||||||
|> add "match"
|
|> add "match"
|
||||||
|> add "method"
|
|> add "method"
|
||||||
|> add "module"
|
|> add "module"
|
||||||
@ -291,7 +313,7 @@ let reserved =
|
|||||||
|
|
||||||
let constructors = [
|
let constructors = [
|
||||||
(fun reg -> C_None reg);
|
(fun reg -> C_None reg);
|
||||||
(fun reg -> C_Some reg);
|
(fun reg -> C_Some reg)
|
||||||
]
|
]
|
||||||
|
|
||||||
let add map (key, value) = SMap.add key value map
|
let add map (key, value) = SMap.add key value map
|
||||||
@ -376,8 +398,7 @@ let mk_int lexeme region =
|
|||||||
else Ok (Int Region.{region; value = lexeme, z})
|
else Ok (Int Region.{region; value = lexeme, z})
|
||||||
|
|
||||||
let mk_nat lexeme region =
|
let mk_nat lexeme region =
|
||||||
let z =
|
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
|
||||||
Str.(global_replace (regexp "n") "") |>
|
Str.(global_replace (regexp "n") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0n"
|
if Z.equal z Z.zero && lexeme <> "0n"
|
||||||
@ -385,8 +406,7 @@ let mk_nat lexeme region =
|
|||||||
else Ok (Nat Region.{region; value = lexeme, z})
|
else Ok (Nat Region.{region; value = lexeme, z})
|
||||||
|
|
||||||
let mk_mutez lexeme region =
|
let mk_mutez lexeme region =
|
||||||
let z =
|
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
|
||||||
Str.(global_replace (regexp "mutez") "") |>
|
Str.(global_replace (regexp "mutez") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||||
@ -465,12 +485,14 @@ let is_bytes = function Bytes _ -> true | _ -> false
|
|||||||
let is_int = function Int _ -> true | _ -> false
|
let is_int = function Int _ -> true | _ -> false
|
||||||
let is_ident = function Ident _ -> true | _ -> false
|
let is_ident = function Ident _ -> true | _ -> false
|
||||||
let is_eof = function EOF _ -> true | _ -> false
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
let is_minus = function MINUS _ -> true | _ -> false
|
||||||
|
|
||||||
(* Errors *)
|
(* Errors *)
|
||||||
|
|
||||||
type error =
|
type error =
|
||||||
Odd_lengthed_bytes
|
Odd_lengthed_bytes
|
||||||
| Missing_break
|
| Missing_break
|
||||||
|
| Negative_byte_sequence
|
||||||
|
|
||||||
let error_to_string = function
|
let error_to_string = function
|
||||||
Odd_lengthed_bytes ->
|
Odd_lengthed_bytes ->
|
||||||
@ -479,6 +501,9 @@ let error_to_string = function
|
|||||||
| Missing_break ->
|
| Missing_break ->
|
||||||
"Missing break.\n\
|
"Missing break.\n\
|
||||||
Hint: Insert some space."
|
Hint: Insert some space."
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign."
|
||||||
|
|
||||||
exception Error of error Region.reg
|
exception Error of error Region.reg
|
||||||
|
|
||||||
@ -491,29 +516,36 @@ let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
|||||||
let fail region value = raise (Error Region.{region; value})
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
let check_right_context token next_token buffer : unit =
|
let check_right_context token next_token buffer : unit =
|
||||||
if not (is_eof token) then
|
|
||||||
if is_int token || is_bytes token then
|
|
||||||
match next_token buffer with
|
|
||||||
Some ([], next) ->
|
|
||||||
let pos = (to_region token)#stop in
|
let pos = (to_region token)#stop in
|
||||||
let region = Region.make ~start:pos ~stop:pos in
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
if is_int next then
|
|
||||||
fail region Odd_lengthed_bytes
|
|
||||||
else
|
|
||||||
if is_ident next || is_string next || is_bytes next then
|
|
||||||
fail region Missing_break
|
|
||||||
| Some (_::_, _) | None -> ()
|
|
||||||
else
|
|
||||||
if is_ident token || is_string token then
|
|
||||||
match next_token buffer with
|
match next_token buffer with
|
||||||
Some ([], next) ->
|
None -> ()
|
||||||
if is_ident next || is_string next
|
| Some (markup, next) ->
|
||||||
|| is_bytes next || is_int next
|
if is_minus token && is_bytes next
|
||||||
then
|
then let region =
|
||||||
let pos = (to_region token)#stop in
|
Region.cover (to_region token) (to_region next)
|
||||||
let region = Region.make ~start:pos ~stop:pos
|
in fail region Negative_byte_sequence
|
||||||
in fail region Missing_break
|
else
|
||||||
| Some (_::_, _) | None -> ()
|
match markup with
|
||||||
|
[] ->
|
||||||
|
if is_int token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_string token
|
||||||
|
then if is_int next || is_bytes next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_bytes token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else if is_int next
|
||||||
|
then fail region Odd_lengthed_bytes
|
||||||
|
else ()
|
||||||
|
else ()
|
||||||
|
| _::_ -> ()
|
||||||
|
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
$HOME/git/OCaml-build/Makefile
|
|
||||||
$HOME/git/OCaml-build/Makefile.cfg
|
|
@ -38,8 +38,6 @@
|
|||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
|
|
||||||
type lexeme = string
|
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||||
@ -54,6 +52,8 @@ type lexeme = string
|
|||||||
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
module type TOKEN =
|
module type TOKEN =
|
||||||
sig
|
sig
|
||||||
type token
|
type token
|
||||||
@ -112,10 +112,36 @@ module type TOKEN =
|
|||||||
unit
|
unit
|
||||||
end
|
end
|
||||||
|
|
||||||
|
(* The signature of the lexer *)
|
||||||
|
|
||||||
|
module type S =
|
||||||
|
sig
|
||||||
|
module Token : TOKEN
|
||||||
|
type token = Token.token
|
||||||
|
|
||||||
|
(* The scanner [init] is meant to be called first to read the
|
||||||
|
BOM. Then [scan] is called. *)
|
||||||
|
|
||||||
|
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
|
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
|
|
||||||
|
(* Errors (specific to the generic lexer, not to the tokens) *)
|
||||||
|
|
||||||
|
type error
|
||||||
|
|
||||||
|
val error_to_string : error -> string
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
val format_error :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
error Region.reg -> file:bool -> string Region.reg
|
||||||
|
end
|
||||||
|
|
||||||
(* The functorised interface
|
(* The functorised interface
|
||||||
|
|
||||||
Note that the module parameter [Token] is re-exported as a
|
Note that the module parameter [Token] is re-exported as a
|
||||||
submodule in [S].
|
submodule in [S].
|
||||||
*)
|
*)
|
||||||
|
|
||||||
module Make (Token: TOKEN) : LexerLib.S with module Token = Token
|
module Make (Token : TOKEN) : S with module Token = Token
|
||||||
|
@ -8,12 +8,6 @@ module Pos = Simple_utils.Pos
|
|||||||
|
|
||||||
(* START HEADER *)
|
(* START HEADER *)
|
||||||
|
|
||||||
type lexeme = string
|
|
||||||
|
|
||||||
(* ALIASES *)
|
|
||||||
|
|
||||||
let sprintf = Printf.sprintf
|
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||||
@ -22,6 +16,8 @@ let sprintf = Printf.sprintf
|
|||||||
caracterises the virtual token for end-of-file, because it requires
|
caracterises the virtual token for end-of-file, because it requires
|
||||||
special handling. *)
|
special handling. *)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
module type TOKEN =
|
module type TOKEN =
|
||||||
sig
|
sig
|
||||||
type token
|
type token
|
||||||
@ -84,31 +80,39 @@ module type TOKEN =
|
|||||||
submodule in [S].
|
submodule in [S].
|
||||||
*)
|
*)
|
||||||
|
|
||||||
module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
|
module type S =
|
||||||
|
sig
|
||||||
|
module Token : TOKEN
|
||||||
|
type token = Token.token
|
||||||
|
|
||||||
|
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
|
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
|
|
||||||
|
type error
|
||||||
|
|
||||||
|
val error_to_string : error -> string
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
val format_error :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
error Region.reg -> file:bool -> string Region.reg
|
||||||
|
end
|
||||||
|
|
||||||
|
module Make (Token : TOKEN) : (S with module Token = Token) =
|
||||||
struct
|
struct
|
||||||
module Token = Token
|
module Token = Token
|
||||||
type token = Token.token
|
type token = Token.token
|
||||||
|
|
||||||
type file_path = string
|
|
||||||
|
|
||||||
type line_comment = LexerLib.line_comment
|
|
||||||
type block_comment = LexerLib.block_comment
|
|
||||||
let mk_block = LexerLib.mk_block
|
|
||||||
|
|
||||||
(* ERRORS *)
|
(* ERRORS *)
|
||||||
|
|
||||||
type error =
|
type error =
|
||||||
Invalid_utf8_sequence
|
Invalid_utf8_sequence
|
||||||
| Unexpected_character of char
|
| Unexpected_character of char
|
||||||
| Undefined_escape_sequence
|
| Undefined_escape_sequence
|
||||||
(* | Missing_break*)
|
|
||||||
| Unterminated_string
|
| Unterminated_string
|
||||||
(* | Unterminated_integer*)
|
|
||||||
(* | Odd_lengthed_bytes*)
|
|
||||||
| Unterminated_comment of string
|
| Unterminated_comment of string
|
||||||
(* | Orphan_minus*)
|
|
||||||
| Non_canonical_zero
|
| Non_canonical_zero
|
||||||
(* | Negative_byte_sequence *)
|
|
||||||
| Broken_string
|
| Broken_string
|
||||||
| Invalid_character_in_string
|
| Invalid_character_in_string
|
||||||
| Reserved_name of string
|
| Reserved_name of string
|
||||||
@ -116,6 +120,8 @@ module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
|
|||||||
| Invalid_natural
|
| Invalid_natural
|
||||||
| Invalid_attribute
|
| Invalid_attribute
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
let error_to_string = function
|
let error_to_string = function
|
||||||
Invalid_utf8_sequence ->
|
Invalid_utf8_sequence ->
|
||||||
"Invalid UTF-8 sequence."
|
"Invalid UTF-8 sequence."
|
||||||
@ -124,30 +130,15 @@ module Make (Token: TOKEN) : (LexerLib.S with module Token = Token) =
|
|||||||
| Undefined_escape_sequence ->
|
| Undefined_escape_sequence ->
|
||||||
"Undefined escape sequence.\n\
|
"Undefined escape sequence.\n\
|
||||||
Hint: Remove or replace the sequence."
|
Hint: Remove or replace the sequence."
|
||||||
(* | Missing_break ->
|
| Unterminated_string ->
|
||||||
"Missing break.\n\
|
|
||||||
Hint: Insert some space."
|
|
||||||
*) | Unterminated_string ->
|
|
||||||
"Unterminated string.\n\
|
"Unterminated string.\n\
|
||||||
Hint: Close with double quotes."
|
Hint: Close with double quotes."
|
||||||
(* | Unterminated_integer ->
|
| Unterminated_comment ending ->
|
||||||
"Unterminated integer.\n\
|
|
||||||
Hint: Remove the sign or proceed with a natural number." *)
|
|
||||||
(* | Odd_lengthed_bytes ->
|
|
||||||
"The length of the byte sequence is an odd number.\n\
|
|
||||||
Hint: Add or remove a digit."
|
|
||||||
*) | Unterminated_comment ending ->
|
|
||||||
sprintf "Unterminated comment.\n\
|
sprintf "Unterminated comment.\n\
|
||||||
Hint: Close with \"%s\"." ending
|
Hint: Close with \"%s\"." ending
|
||||||
(* | Orphan_minus ->
|
|
||||||
"Orphan minus sign.\n\
|
|
||||||
Hint: Remove the trailing space." *)
|
|
||||||
| Non_canonical_zero ->
|
| Non_canonical_zero ->
|
||||||
"Non-canonical zero.\n\
|
"Non-canonical zero.\n\
|
||||||
Hint: Use 0."
|
Hint: Use 0."
|
||||||
(* | Negative_byte_sequence ->
|
|
||||||
"Negative byte sequence.\n\
|
|
||||||
Hint: Remove the leading minus sign." *)
|
|
||||||
| Broken_string ->
|
| Broken_string ->
|
||||||
"The string starting here is interrupted by a line break.\n\
|
"The string starting here is interrupted by a line break.\n\
|
||||||
Hint: Remove the break, close the string before or insert a \
|
Hint: Remove the break, close the string before or insert a \
|
||||||
@ -369,6 +360,7 @@ and scan state = parse
|
|||||||
nl { scan (state#push_newline lexbuf) lexbuf }
|
nl { scan (state#push_newline lexbuf) lexbuf }
|
||||||
| ' '+ { scan (state#push_space lexbuf) lexbuf }
|
| ' '+ { scan (state#push_space lexbuf) lexbuf }
|
||||||
| '\t'+ { scan (state#push_tabs lexbuf) lexbuf }
|
| '\t'+ { scan (state#push_tabs lexbuf) lexbuf }
|
||||||
|
|
||||||
| ident { mk_ident state lexbuf }
|
| ident { mk_ident state lexbuf }
|
||||||
| constr { mk_constr state lexbuf }
|
| constr { mk_constr state lexbuf }
|
||||||
| bytes { mk_bytes seq state lexbuf }
|
| bytes { mk_bytes seq state lexbuf }
|
||||||
@ -420,27 +412,14 @@ and scan state = parse
|
|||||||
let state = state#set_pos pos in
|
let state = state#set_pos pos in
|
||||||
scan state lexbuf }
|
scan state lexbuf }
|
||||||
|
|
||||||
(* Some special errors *)
|
(* String *)
|
||||||
|
|
||||||
(*
|
|
||||||
| '-' { let region, _, state = state#sync lexbuf in
|
|
||||||
let state = scan state lexbuf in
|
|
||||||
let open Markup in
|
|
||||||
match FQueue.peek state#units with
|
|
||||||
None -> assert false
|
|
||||||
| Some (_, ((Space _ | Tabs _)::_, token))
|
|
||||||
when Token.is_int token -> fail region Orphan_minus
|
|
||||||
| _ -> fail region Unterminated_integer }
|
|
||||||
|
|
||||||
| "-0x" byte_seq?
|
|
||||||
{ let region, _, _ = state#sync lexbuf
|
|
||||||
in fail region Negative_byte_sequence }
|
|
||||||
*)
|
|
||||||
|
|
||||||
| '"' { let opening, lexeme, state = state#sync lexbuf in
|
| '"' { let opening, lexeme, state = state#sync lexbuf in
|
||||||
let thread = LexerLib.mk_thread opening lexeme in
|
let thread = LexerLib.mk_thread opening lexeme in
|
||||||
scan_string thread state lexbuf |> mk_string }
|
scan_string thread state lexbuf |> mk_string }
|
||||||
|
|
||||||
|
(* Comments *)
|
||||||
|
|
||||||
| block_comment_openings {
|
| block_comment_openings {
|
||||||
let lexeme = Lexing.lexeme lexbuf in
|
let lexeme = Lexing.lexeme lexbuf in
|
||||||
match state#block with
|
match state#block with
|
||||||
@ -496,8 +475,6 @@ and scan_flags state acc = parse
|
|||||||
| eof { let _, _, state = state#sync lexbuf
|
| eof { let _, _, state = state#sync lexbuf
|
||||||
in List.rev acc, state }
|
in List.rev acc, state }
|
||||||
|
|
||||||
(* TODO: Move below to [LexerCommon.mll] *)
|
|
||||||
|
|
||||||
(* Finishing a string *)
|
(* Finishing a string *)
|
||||||
|
|
||||||
and scan_string thread state = parse
|
and scan_string thread state = parse
|
||||||
@ -624,164 +601,6 @@ and scan_utf8_inline thread state = parse
|
|||||||
|
|
||||||
{
|
{
|
||||||
(* START TRAILER *)
|
(* START TRAILER *)
|
||||||
|
|
||||||
|
|
||||||
(* Scanning the lexing buffer for tokens (and markup, as a
|
|
||||||
side-effect).
|
|
||||||
|
|
||||||
Because we want the lexer to have access to the right lexical
|
|
||||||
context of a recognised lexeme (to enforce stylistic constraints or
|
|
||||||
report special error patterns), we need to keep a hidden reference
|
|
||||||
to a queue of recognised lexical units (that is, tokens and markup)
|
|
||||||
that acts as a mutable state between the calls to [read]. When
|
|
||||||
[read] is called, that queue is examined first and, if it contains
|
|
||||||
at least one token, that token is returned; otherwise, the lexing
|
|
||||||
buffer is scanned for at least one more new token. That is the
|
|
||||||
general principle: we put a high-level buffer (our queue) on top of
|
|
||||||
the low-level lexing buffer.
|
|
||||||
|
|
||||||
One tricky and important detail is that we must make any parser
|
|
||||||
generated by Menhir (and calling [read]) believe that the last
|
|
||||||
region of the input source that was matched indeed corresponds to
|
|
||||||
the returned token, despite that many tokens and markup may have
|
|
||||||
been matched since it was actually read from the input. In other
|
|
||||||
words, the parser requests a token that is taken from the
|
|
||||||
high-level buffer, but the parser requests the source regions from
|
|
||||||
the _low-level_ lexing buffer, and they may disagree if more than
|
|
||||||
one token has actually been recognised.
|
|
||||||
|
|
||||||
Consequently, in order to maintain a consistent view for the
|
|
||||||
parser, we have to patch some fields of the lexing buffer, namely
|
|
||||||
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
|
|
||||||
generated by Menhir when querying source positions (regions). This
|
|
||||||
is the purpose of the function [patch_buffer]. After reading one
|
|
||||||
or more tokens and markup by the scanning rule [scan], we have to
|
|
||||||
save in the hidden reference [buf_reg] the region of the source
|
|
||||||
that was matched by [scan]. This atomic sequence of patching,
|
|
||||||
scanning and saving is implemented by the _function_ [scan]
|
|
||||||
(beware: it shadows the scanning rule [scan]). The function
|
|
||||||
[patch_buffer] is, of course, also called just before returning the
|
|
||||||
token, so the parser has a view of the lexing buffer consistent
|
|
||||||
with the token.
|
|
||||||
|
|
||||||
Note that an additional reference [first_call] is needed to
|
|
||||||
distinguish the first call to the function [scan], as the first
|
|
||||||
scanning rule is actually [init] (which can handle the BOM), not
|
|
||||||
[scan].
|
|
||||||
*)
|
|
||||||
|
|
||||||
type logger = Markup.t list -> token -> unit
|
|
||||||
|
|
||||||
type input =
|
|
||||||
File of file_path
|
|
||||||
| String of string
|
|
||||||
| Channel of in_channel
|
|
||||||
| Buffer of Lexing.lexbuf
|
|
||||||
|
|
||||||
type instance = {
|
|
||||||
input : input;
|
|
||||||
read : log:logger -> Lexing.lexbuf -> token;
|
|
||||||
buffer : Lexing.lexbuf;
|
|
||||||
get_win : unit -> token LexerLib.window;
|
|
||||||
get_pos : unit -> Pos.t;
|
|
||||||
get_last : unit -> Region.t;
|
|
||||||
get_file : unit -> file_path;
|
|
||||||
close : unit -> unit
|
|
||||||
}
|
|
||||||
|
|
||||||
type open_err = File_opening of string
|
|
||||||
|
|
||||||
let lexbuf_from_input = function
|
|
||||||
File path ->
|
|
||||||
(try
|
|
||||||
let chan = open_in path in
|
|
||||||
let close () = close_in chan in
|
|
||||||
let lexbuf = Lexing.from_channel chan in
|
|
||||||
let () =
|
|
||||||
let open Lexing in
|
|
||||||
lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
|
|
||||||
in Ok (lexbuf, close)
|
|
||||||
with Sys_error msg -> Stdlib.Error (File_opening msg))
|
|
||||||
| String s ->
|
|
||||||
Ok (Lexing.from_string s, fun () -> ())
|
|
||||||
| Channel chan ->
|
|
||||||
let close () = close_in chan in
|
|
||||||
Ok (Lexing.from_channel chan, close)
|
|
||||||
| Buffer b -> Ok (b, fun () -> ())
|
|
||||||
|
|
||||||
let open_token_stream ?line ?block input =
|
|
||||||
let file_path = match input with
|
|
||||||
File path -> path
|
|
||||||
| _ -> "" in
|
|
||||||
let pos = Pos.min ~file:file_path in
|
|
||||||
let buf_reg = ref (pos#byte, pos#byte)
|
|
||||||
and first_call = ref true
|
|
||||||
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
|
||||||
let supply = Uutf.Manual.src decoder in
|
|
||||||
let state = ref (LexerLib.mk_state
|
|
||||||
~units:FQueue.empty
|
|
||||||
~last:Region.ghost
|
|
||||||
~window:LexerLib.Nil
|
|
||||||
~pos
|
|
||||||
~markup:[]
|
|
||||||
~decoder
|
|
||||||
~supply
|
|
||||||
?block
|
|
||||||
?line
|
|
||||||
()) in
|
|
||||||
|
|
||||||
let get_pos () = !state#pos
|
|
||||||
and get_last () = !state#last
|
|
||||||
and get_win () = !state#window
|
|
||||||
and get_file () = file_path in
|
|
||||||
|
|
||||||
let patch_buffer (start, stop) buffer =
|
|
||||||
let open Lexing in
|
|
||||||
let file_path = buffer.lex_curr_p.pos_fname in
|
|
||||||
buffer.lex_start_p <- {start with pos_fname = file_path};
|
|
||||||
buffer.lex_curr_p <- {stop with pos_fname = file_path}
|
|
||||||
|
|
||||||
and save_region buffer =
|
|
||||||
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
|
||||||
|
|
||||||
let scan buffer =
|
|
||||||
patch_buffer !buf_reg buffer;
|
|
||||||
(if !first_call
|
|
||||||
then (state := init !state buffer; first_call := false)
|
|
||||||
else state := scan !state buffer);
|
|
||||||
save_region buffer in
|
|
||||||
|
|
||||||
let next_token buffer =
|
|
||||||
scan buffer;
|
|
||||||
match FQueue.peek !state#units with
|
|
||||||
None -> None
|
|
||||||
| Some (units, ext_token) ->
|
|
||||||
state := !state#set_units units; Some ext_token in
|
|
||||||
|
|
||||||
let rec read ~log buffer =
|
|
||||||
match FQueue.deq !state#units with
|
|
||||||
None ->
|
|
||||||
scan buffer;
|
|
||||||
read ~log buffer
|
|
||||||
| Some (units, (left_mark, token)) ->
|
|
||||||
log left_mark token;
|
|
||||||
state := ((!state#set_units units)
|
|
||||||
#set_last (Token.to_region token))
|
|
||||||
#slide_token token;
|
|
||||||
Token.check_right_context token next_token buffer;
|
|
||||||
patch_buffer (Token.to_region token)#byte_pos buffer;
|
|
||||||
token in
|
|
||||||
match lexbuf_from_input input with
|
|
||||||
Ok (buffer, close) ->
|
|
||||||
let () =
|
|
||||||
match input with
|
|
||||||
File path when path <> "" -> LexerLib.reset ~file:path buffer
|
|
||||||
| _ -> () in
|
|
||||||
let instance = {
|
|
||||||
input; read; buffer; get_win; get_pos; get_last; get_file; close}
|
|
||||||
in Ok instance
|
|
||||||
| Error _ as e -> e
|
|
||||||
|
|
||||||
end (* of functor [Make] in HEADER *)
|
end (* of functor [Make] in HEADER *)
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -1,159 +1,6 @@
|
|||||||
(* Sliding window *)
|
|
||||||
|
|
||||||
(* The type [window] models a two-slot buffer of tokens for reporting
|
|
||||||
after a parse error. Technically, it is a parametric type, but its
|
|
||||||
use is meant for tokens, wherever they are defined.
|
|
||||||
|
|
||||||
In [Two(t1,t2)], the token [t2] is the next to be sent to the
|
|
||||||
parser.
|
|
||||||
|
|
||||||
The call [slide token buffer] pushes the token [token] in the
|
|
||||||
buffer [buffer]. If the buffer is full, that is, it is
|
|
||||||
[Two(t1,t2)], then the token [t2] is discarded to make room for
|
|
||||||
[token].
|
|
||||||
*)
|
|
||||||
|
|
||||||
type 'a window =
|
|
||||||
Nil
|
|
||||||
| One of 'a
|
|
||||||
| Two of 'a * 'a
|
|
||||||
|
|
||||||
let slide token = function
|
|
||||||
Nil -> One token
|
|
||||||
| One t | Two (t,_) -> Two (token,t)
|
|
||||||
|
|
||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
|
|
||||||
type lexeme = string
|
|
||||||
|
|
||||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
|
||||||
can be a functor over tokens. This enables to externalise
|
|
||||||
version-dependent constraints in any module whose signature matches
|
|
||||||
[TOKEN]. Generic functions to construct tokens are required.
|
|
||||||
|
|
||||||
Note the predicate [is_eof], which caracterises the virtual token
|
|
||||||
for end-of-file, because it requires special handling.
|
|
||||||
*)
|
|
||||||
|
|
||||||
module type TOKEN =
|
|
||||||
sig
|
|
||||||
type token
|
|
||||||
|
|
||||||
(* Predicates *)
|
|
||||||
|
|
||||||
val is_eof : token -> bool
|
|
||||||
|
|
||||||
(* Projections *)
|
|
||||||
|
|
||||||
val to_lexeme : token -> lexeme
|
|
||||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
|
||||||
val to_region : token -> Region.t
|
|
||||||
|
|
||||||
(* Style *)
|
|
||||||
|
|
||||||
type error
|
|
||||||
|
|
||||||
val error_to_string : error -> string
|
|
||||||
|
|
||||||
exception Error of error Region.reg
|
|
||||||
|
|
||||||
val format_error :
|
|
||||||
?offsets:bool ->
|
|
||||||
[`Byte | `Point] ->
|
|
||||||
error Region.reg ->
|
|
||||||
file:bool ->
|
|
||||||
string Region.reg
|
|
||||||
|
|
||||||
val check_right_context :
|
|
||||||
token ->
|
|
||||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
|
||||||
Lexing.lexbuf ->
|
|
||||||
unit
|
|
||||||
end
|
|
||||||
|
|
||||||
(* The module type for lexers is [S]. It mainly exports the function
|
|
||||||
[open_token_stream], which returns
|
|
||||||
|
|
||||||
* a function [read] that extracts tokens from a lexing buffer,
|
|
||||||
together with a lexing buffer [buffer] to read from,
|
|
||||||
* a function [close] that closes that buffer,
|
|
||||||
* a function [get_pos] that returns the current position, and
|
|
||||||
* a function [get_last] that returns the region of the last
|
|
||||||
recognised token.
|
|
||||||
* a function [get_file] that returns the name of the file being
|
|
||||||
scanned (empty string if [stdin]).
|
|
||||||
|
|
||||||
Note that a module [Token] is exported too, because the signature
|
|
||||||
of the exported functions depend on it.
|
|
||||||
|
|
||||||
The type [window] is a two-token window, that is, a buffer that
|
|
||||||
contains the last recognised token, and the penultimate (if any).
|
|
||||||
|
|
||||||
The call [read ~log] evaluates in a lexer (also known as a
|
|
||||||
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
|
|
||||||
suitable for a parser generated by Menhir. The argument labelled
|
|
||||||
[log] is a logger, that is, it may print a token and its left
|
|
||||||
markup to a given channel, at the caller's discretion.
|
|
||||||
*)
|
|
||||||
|
|
||||||
module type S =
|
|
||||||
sig
|
|
||||||
module Token : TOKEN
|
|
||||||
type token = Token.token
|
|
||||||
|
|
||||||
type file_path = string
|
|
||||||
type logger = Markup.t list -> token -> unit
|
|
||||||
|
|
||||||
type input =
|
|
||||||
File of file_path
|
|
||||||
| String of string
|
|
||||||
| Channel of in_channel
|
|
||||||
| Buffer of Lexing.lexbuf
|
|
||||||
|
|
||||||
type instance = {
|
|
||||||
input : input;
|
|
||||||
read : log:logger -> Lexing.lexbuf -> token;
|
|
||||||
buffer : Lexing.lexbuf;
|
|
||||||
get_win : unit -> token window;
|
|
||||||
get_pos : unit -> Pos.t;
|
|
||||||
get_last : unit -> Region.t;
|
|
||||||
get_file : unit -> file_path;
|
|
||||||
close : unit -> unit
|
|
||||||
}
|
|
||||||
|
|
||||||
type open_err = File_opening of string
|
|
||||||
|
|
||||||
val lexbuf_from_input :
|
|
||||||
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
|
||||||
|
|
||||||
type line_comment = string (* Opening of a line comment *)
|
|
||||||
type block_comment = <opening : string; closing : string>
|
|
||||||
|
|
||||||
val mk_block : opening:string -> closing:string -> block_comment
|
|
||||||
|
|
||||||
val open_token_stream :
|
|
||||||
?line:line_comment ->
|
|
||||||
?block:block_comment ->
|
|
||||||
input ->
|
|
||||||
(instance, open_err) Stdlib.result
|
|
||||||
|
|
||||||
(* Error reporting *)
|
|
||||||
|
|
||||||
type error
|
|
||||||
|
|
||||||
val error_to_string : error -> string
|
|
||||||
|
|
||||||
exception Error of error Region.reg
|
|
||||||
|
|
||||||
val format_error :
|
|
||||||
?offsets:bool ->
|
|
||||||
[`Byte | `Point] ->
|
|
||||||
error Region.reg ->
|
|
||||||
file:bool ->
|
|
||||||
string Region.reg
|
|
||||||
end
|
|
||||||
|
|
||||||
(* LEXER ENGINE *)
|
(* LEXER ENGINE *)
|
||||||
|
|
||||||
(* Resetting file name and line number in the lexing buffer
|
(* Resetting file name and line number in the lexing buffer
|
||||||
@ -205,27 +52,20 @@ let rollback buffer =
|
|||||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||||
|
|
||||||
|
(* Utility types *)
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||||
|
|
||||||
(* When scanning structured constructs, like strings and comments, we
|
|
||||||
need to keep the region of the opening symbol (like double quote,
|
|
||||||
"//" or "(*") in order to report any error more precisely. Since
|
|
||||||
ocamllex is byte-oriented, we need to store the parsed bytes as
|
|
||||||
characters in an accumulator [acc] and also its length [len], so,
|
|
||||||
we are done, it is easy to build the string making up the
|
|
||||||
structured construct with [mk_str] (see above).
|
|
||||||
|
|
||||||
The resulting data structure is called a _thread_. (Note for
|
|
||||||
Emacs: "*)".)
|
|
||||||
*)
|
|
||||||
|
|
||||||
type thread = <
|
type thread = <
|
||||||
opening : Region.t;
|
opening : Region.t;
|
||||||
length : int;
|
length : int;
|
||||||
acc : char list;
|
acc : char list;
|
||||||
|
to_string : string;
|
||||||
push_char : char -> thread;
|
push_char : char -> thread;
|
||||||
push_string : string -> thread;
|
push_string : string -> thread;
|
||||||
to_string : string;
|
|
||||||
set_opening : Region.t -> thread
|
set_opening : Region.t -> thread
|
||||||
>
|
>
|
||||||
|
|
||||||
@ -233,12 +73,12 @@ let mk_thread region lexeme : thread =
|
|||||||
(* The call [explode s a] is the list made by pushing the characters
|
(* The call [explode s a] is the list made by pushing the characters
|
||||||
in the string [s] on top of [a], in reverse order. For example,
|
in the string [s] on top of [a], in reverse order. For example,
|
||||||
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
||||||
|
|
||||||
let explode s acc =
|
let explode s acc =
|
||||||
let rec push = function
|
let rec push = function
|
||||||
0 -> acc
|
0 -> acc
|
||||||
| i -> s.[i-1] :: push (i-1)
|
| i -> s.[i-1] :: push (i-1)
|
||||||
in push (String.length s)
|
in push (String.length s) in
|
||||||
in
|
|
||||||
object
|
object
|
||||||
val opening = region
|
val opening = region
|
||||||
method opening = opening
|
method opening = opening
|
||||||
@ -273,84 +113,82 @@ let mk_thread region lexeme : thread =
|
|||||||
in fill (length-1) acc |> Bytes.to_string
|
in fill (length-1) acc |> Bytes.to_string
|
||||||
end
|
end
|
||||||
|
|
||||||
type file_path = string
|
|
||||||
|
|
||||||
(* STATE *)
|
(* STATE *)
|
||||||
|
|
||||||
(* Beyond tokens, the result of lexing is a state. The type
|
(* Scanning the lexing buffer for tokens (and markup, as a
|
||||||
[state] represents the logical state of the lexing engine, that
|
side-effect).
|
||||||
is, a value which is threaded during scanning and which denotes
|
|
||||||
useful, high-level information beyond what the type
|
|
||||||
[Lexing.lexbuf] in the standard library already provides for
|
|
||||||
all generic lexers.
|
|
||||||
|
|
||||||
Tokens are the smallest units used by the parser to build the
|
Because we want the lexer to have access to the right lexical
|
||||||
abstract syntax tree. The state includes a queue of recognised
|
context of a recognised lexeme (to enforce stylistic constraints or
|
||||||
tokens, with the markup at the left of its lexeme until either
|
report special error patterns), we need to keep a hidden reference
|
||||||
the start of the file or the end of the previously recognised
|
to a queue of recognised lexical units (that is, tokens and markup)
|
||||||
token.
|
that acts as a mutable state between the calls to [read]. When
|
||||||
|
[read] is called, that queue is examined first and, if it contains
|
||||||
|
at least one token, that token is returned; otherwise, the lexing
|
||||||
|
buffer is scanned for at least one more new token. That is the
|
||||||
|
general principle: we put a high-level buffer (our queue) on top of
|
||||||
|
the low-level lexing buffer.
|
||||||
|
|
||||||
The markup from the last recognised token or, if the first
|
One tricky and important detail is that we must make any parser
|
||||||
token has not been recognised yet, from the beginning of the
|
generated by Menhir (and calling [read]) believe that the last
|
||||||
file is stored in the field [markup] of the state because it is
|
region of the input source that was matched indeed corresponds to
|
||||||
a side-effect, with respect to the output token list, and we
|
the returned token, despite that many tokens and markup may have
|
||||||
use a record with a single field [units] because that record
|
been matched since it was actually read from the input. In other
|
||||||
may be easily extended during the future maintenance of this
|
words, the parser requests a token that is taken from the
|
||||||
lexer.
|
high-level buffer, but the parser requests the source regions from
|
||||||
|
the _low-level_ lexing buffer, and they may disagree if more than
|
||||||
|
one token has actually been recognised.
|
||||||
|
|
||||||
The state also includes a field [pos] which holds the current
|
Consequently, in order to maintain a consistent view for the
|
||||||
position in the LIGO source file. The position is not always
|
parser, we have to patch some fields of the lexing buffer, namely
|
||||||
updated after a single character has been matched: that depends
|
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
|
||||||
on the regular expression that matched the lexing buffer.
|
generated by Menhir when querying source positions (regions). This
|
||||||
|
is the purpose of the function [patch_buffer]. After reading one or
|
||||||
|
more tokens and markup by the scanning rule [scan], we have to save
|
||||||
|
in the hidden reference [buf_reg] the region of the source that was
|
||||||
|
matched by [scan]. This atomic sequence of patching, scanning and
|
||||||
|
saving is implemented by the _function_ [scan] (beware: it shadows
|
||||||
|
the scanning rule [scan]). The function [patch_buffer] is, of
|
||||||
|
course, also called just before returning the token, so the parser
|
||||||
|
has a view of the lexing buffer consistent with the token.
|
||||||
|
|
||||||
The field [window] is a two-token window, that is, a buffer
|
Note that an additional reference [first_call] is needed to
|
||||||
that contains the last recognised token, and the penultimate
|
distinguish the first call to the function [scan], as the first
|
||||||
(if any).
|
scanning rule is actually [init] (which can handle the BOM), not
|
||||||
|
[scan].
|
||||||
The fields [decoder] and [supply] offer the support needed
|
|
||||||
for the lexing of UTF-8 encoded characters in comments (the
|
|
||||||
only place where they are allowed in LIGO). The former is the
|
|
||||||
decoder proper and the latter is the effectful function
|
|
||||||
[supply] that takes a byte, a start index and a length and feed
|
|
||||||
it to [decoder]. See the documentation of the third-party
|
|
||||||
library Uutf.
|
|
||||||
*)
|
*)
|
||||||
|
|
||||||
type line_comment = string (* Opening of a line comment *)
|
type 'token window =
|
||||||
type block_comment = <opening : string; closing : string>
|
Nil
|
||||||
|
| One of 'token
|
||||||
|
| Two of 'token * 'token
|
||||||
|
|
||||||
let mk_block ~opening ~closing : block_comment =
|
type 'token state = <
|
||||||
object
|
units : (Markup.t list * 'token) FQueue.t;
|
||||||
method opening = opening
|
|
||||||
method closing = closing
|
|
||||||
end
|
|
||||||
|
|
||||||
type 'a state = <
|
|
||||||
units : (Markup.t list * 'a) FQueue.t;
|
|
||||||
markup : Markup.t list;
|
markup : Markup.t list;
|
||||||
window : 'a window;
|
window : 'token window;
|
||||||
last : Region.t;
|
last : Region.t;
|
||||||
pos : Pos.t;
|
pos : Pos.t;
|
||||||
decoder : Uutf.decoder;
|
decoder : Uutf.decoder;
|
||||||
supply : Bytes.t -> int -> int -> unit;
|
supply : Bytes.t -> int -> int -> unit;
|
||||||
block : block_comment option;
|
block : EvalOpt.block_comment option;
|
||||||
line : line_comment option;
|
line : EvalOpt.line_comment option;
|
||||||
|
|
||||||
enqueue : 'a -> 'a state;
|
enqueue : 'token -> 'token state;
|
||||||
set_units : (Markup.t list * 'a) FQueue.t -> 'a state;
|
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
|
||||||
set_last : Region.t -> 'a state;
|
set_last : Region.t -> 'token state;
|
||||||
set_pos : Pos.t -> 'a state;
|
set_pos : Pos.t -> 'token state;
|
||||||
slide_token : 'a -> 'a state;
|
slide_token : 'token -> 'token state;
|
||||||
|
|
||||||
sync : Lexing.lexbuf -> Region.t * lexeme * 'a state;
|
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
|
||||||
|
|
||||||
push_newline : Lexing.lexbuf -> 'a state;
|
push_newline : Lexing.lexbuf -> 'token state;
|
||||||
push_line : thread -> 'a state;
|
push_line : thread -> 'token state;
|
||||||
push_block : thread -> 'a state;
|
push_block : thread -> 'token state;
|
||||||
push_space : Lexing.lexbuf -> 'a state;
|
push_space : Lexing.lexbuf -> 'token state;
|
||||||
push_tabs : Lexing.lexbuf -> 'a state;
|
push_tabs : Lexing.lexbuf -> 'token state;
|
||||||
push_bom : Lexing.lexbuf -> 'a state;
|
push_bom : Lexing.lexbuf -> 'token state;
|
||||||
push_markup : Markup.t -> 'a state;
|
push_markup : Markup.t -> 'token state;
|
||||||
>
|
>
|
||||||
|
|
||||||
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
|
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
|
||||||
@ -371,12 +209,6 @@ type file_path = string
|
|||||||
method block = block
|
method block = block
|
||||||
method line = line
|
method line = line
|
||||||
|
|
||||||
(* The call [enqueue (token, state)] updates functionally the
|
|
||||||
state [state] by associating the token [token] with the
|
|
||||||
stored markup and enqueuing the pair into the units
|
|
||||||
queue. The field [markup] is then reset to the empty
|
|
||||||
list. *)
|
|
||||||
|
|
||||||
method enqueue token =
|
method enqueue token =
|
||||||
{< units = FQueue.enq (markup, token) units;
|
{< units = FQueue.enq (markup, token) units;
|
||||||
markup = [] >}
|
markup = [] >}
|
||||||
@ -386,13 +218,9 @@ type file_path = string
|
|||||||
method set_pos pos = {< pos = pos >}
|
method set_pos pos = {< pos = pos >}
|
||||||
|
|
||||||
method slide_token token =
|
method slide_token token =
|
||||||
{< window = slide token window >}
|
match self#window with
|
||||||
|
Nil -> {< window = One token >}
|
||||||
(* The call [sync state buffer] updates the current position
|
| One t | Two (t,_) -> {< window = Two (token,t) >}
|
||||||
in accordance with the contents of the lexing buffer, more
|
|
||||||
precisely, depending on the length of the string which has
|
|
||||||
just been recognised by the scanner: that length is used as
|
|
||||||
a positive offset to the current column. *)
|
|
||||||
|
|
||||||
method sync buffer =
|
method sync buffer =
|
||||||
let lex = Lexing.lexeme buffer in
|
let lex = Lexing.lexeme buffer in
|
||||||
@ -447,5 +275,121 @@ type file_path = string
|
|||||||
let region, value, state = self#sync buffer in
|
let region, value, state = self#sync buffer in
|
||||||
let unit = Markup.BOM Region.{region; value}
|
let unit = Markup.BOM Region.{region; value}
|
||||||
in state#push_markup unit
|
in state#push_markup unit
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
(* LEXER INSTANCE *)
|
||||||
|
|
||||||
|
type input =
|
||||||
|
File of file_path
|
||||||
|
| String of string
|
||||||
|
| Channel of in_channel
|
||||||
|
| Buffer of Lexing.lexbuf
|
||||||
|
|
||||||
|
type 'token logger = Markup.t list -> 'token -> unit
|
||||||
|
|
||||||
|
type 'token instance = {
|
||||||
|
input : input;
|
||||||
|
read : log:('token logger) -> Lexing.lexbuf -> 'token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_win : unit -> 'token window;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
get_file : unit -> file_path;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
type open_err = File_opening of string
|
||||||
|
|
||||||
|
let lexbuf_from_input = function
|
||||||
|
String s ->
|
||||||
|
Ok (Lexing.from_string s, fun () -> ())
|
||||||
|
| Channel chan ->
|
||||||
|
let close () = close_in chan in
|
||||||
|
Ok (Lexing.from_channel chan, close)
|
||||||
|
| Buffer b ->
|
||||||
|
Ok (b, fun () -> ())
|
||||||
|
| File path ->
|
||||||
|
try
|
||||||
|
let chan = open_in path in
|
||||||
|
let close () = close_in chan in
|
||||||
|
let lexbuf = Lexing.from_channel chan in
|
||||||
|
let () =
|
||||||
|
let open Lexing in
|
||||||
|
lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
|
||||||
|
in Ok (lexbuf, close)
|
||||||
|
with Sys_error msg -> Stdlib.Error (File_opening msg)
|
||||||
|
|
||||||
|
let open_token_stream ?line ?block ~init ~scan
|
||||||
|
~token_to_region ~style input =
|
||||||
|
let file_path = match input with
|
||||||
|
File path -> path
|
||||||
|
| _ -> "" in
|
||||||
|
let pos = Pos.min ~file:file_path in
|
||||||
|
let buf_reg = ref (pos#byte, pos#byte)
|
||||||
|
and first_call = ref true
|
||||||
|
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
||||||
|
let supply = Uutf.Manual.src decoder in
|
||||||
|
let state = ref (mk_state
|
||||||
|
~units:FQueue.empty
|
||||||
|
~last:Region.ghost
|
||||||
|
~window:Nil
|
||||||
|
~pos
|
||||||
|
~markup:[]
|
||||||
|
~decoder
|
||||||
|
~supply
|
||||||
|
?block
|
||||||
|
?line
|
||||||
|
()) in
|
||||||
|
let get_pos () = !state#pos
|
||||||
|
and get_last () = !state#last
|
||||||
|
and get_win () = !state#window
|
||||||
|
and get_file () = file_path in
|
||||||
|
|
||||||
|
let patch_buffer (start, stop) buffer =
|
||||||
|
let open Lexing in
|
||||||
|
let file_path = buffer.lex_curr_p.pos_fname in
|
||||||
|
buffer.lex_start_p <- {start with pos_fname = file_path};
|
||||||
|
buffer.lex_curr_p <- {stop with pos_fname = file_path}
|
||||||
|
|
||||||
|
and save_region buffer =
|
||||||
|
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
||||||
|
|
||||||
|
let scan' init scan buffer =
|
||||||
|
patch_buffer !buf_reg buffer;
|
||||||
|
(if !first_call
|
||||||
|
then (state := init !state buffer; first_call := false)
|
||||||
|
else state := scan !state buffer);
|
||||||
|
save_region buffer in
|
||||||
|
|
||||||
|
let next_token init scan buffer =
|
||||||
|
scan' init scan buffer;
|
||||||
|
match FQueue.peek !state#units with
|
||||||
|
None -> None
|
||||||
|
| Some (units, ext_token) ->
|
||||||
|
state := !state#set_units units; Some ext_token in
|
||||||
|
|
||||||
|
let rec read init scan ~token_to_region ~style ~log buffer =
|
||||||
|
match FQueue.deq !state#units with
|
||||||
|
None ->
|
||||||
|
scan' init scan buffer;
|
||||||
|
read init scan ~token_to_region ~style ~log buffer
|
||||||
|
| Some (units, (left_mark, token)) ->
|
||||||
|
log left_mark token;
|
||||||
|
state := ((!state#set_units units)
|
||||||
|
#set_last (token_to_region token))
|
||||||
|
#slide_token token;
|
||||||
|
style token (next_token init scan) buffer;
|
||||||
|
patch_buffer (token_to_region token)#byte_pos buffer;
|
||||||
|
token in
|
||||||
|
|
||||||
|
match lexbuf_from_input input with
|
||||||
|
Ok (buffer, close) ->
|
||||||
|
let () =
|
||||||
|
match input with
|
||||||
|
File path when path <> "" -> reset ~file:path buffer
|
||||||
|
| _ -> () in
|
||||||
|
let instance = {
|
||||||
|
read = read init scan ~token_to_region ~style;
|
||||||
|
input; buffer; get_win; get_pos; get_last; get_file; close}
|
||||||
|
in Ok instance
|
||||||
|
| Error _ as e -> e
|
||||||
|
205
src/passes/1-parser/shared/LexerLib.mli
Normal file
205
src/passes/1-parser/shared/LexerLib.mli
Normal file
@ -0,0 +1,205 @@
|
|||||||
|
(* A library for writing UTF8-aware lexers *)
|
||||||
|
|
||||||
|
module Region = Simple_utils.Region
|
||||||
|
module Pos = Simple_utils.Pos
|
||||||
|
|
||||||
|
(* The function [rollback] resets the lexing buffer to the state it
|
||||||
|
was when it matched the last regular expression. This function is
|
||||||
|
safe to use only in the semantic action of the rule which last
|
||||||
|
matched. *)
|
||||||
|
|
||||||
|
val rollback : Lexing.lexbuf -> unit
|
||||||
|
|
||||||
|
(* Utility types *)
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||||
|
|
||||||
|
(* When scanning structured constructs, like strings and comments, we
|
||||||
|
need to keep the region of the opening symbol (like double quote,
|
||||||
|
"//" or "(*") in order to report any error more precisely. Since
|
||||||
|
ocamllex is byte-oriented, we need to store the parsed bytes as
|
||||||
|
characters in an accumulator [acc] and also its length [len], so,
|
||||||
|
we are done, it is easy to build the string making up the
|
||||||
|
structured construct with [mk_str] (see above).
|
||||||
|
|
||||||
|
The resulting data structure is called a _thread_. (Note for
|
||||||
|
Emacs: "*)".)
|
||||||
|
*)
|
||||||
|
|
||||||
|
type thread = <
|
||||||
|
opening : Region.t;
|
||||||
|
length : int;
|
||||||
|
acc : char list;
|
||||||
|
to_string : string;
|
||||||
|
push_char : char -> thread;
|
||||||
|
push_string : string -> thread;
|
||||||
|
set_opening : Region.t -> thread
|
||||||
|
>
|
||||||
|
|
||||||
|
val mk_thread : Region.t -> lexeme -> thread
|
||||||
|
|
||||||
|
(* STATE *)
|
||||||
|
|
||||||
|
(* Beyond producing tokens, the result of lexing is a _state_. The
|
||||||
|
type [state] represents the abstract logical state of the lexing
|
||||||
|
engine, that is, a value which is threaded during scanning and
|
||||||
|
which denotes useful, high-level information beyond what the type
|
||||||
|
[Lexing.lexbuf] in the standard library already provides for all
|
||||||
|
generic lexers. We qualify it as "logical state" because the lexing
|
||||||
|
buffer itself has a "physical state" defined by the type
|
||||||
|
[Lexing.lexbuf].
|
||||||
|
|
||||||
|
Tokens are the smallest units used by the parser to build the
|
||||||
|
abstract syntax tree. The state includes a queue of recognised
|
||||||
|
tokens, with the markup at the left of its lexeme until either the
|
||||||
|
start of the file or the end of the previously recognised token.
|
||||||
|
|
||||||
|
The markup from the last recognised token or, if the first token
|
||||||
|
has not been recognised yet, from the beginning of the file is
|
||||||
|
stored in the field [markup] of the state because it is a
|
||||||
|
side-effect, with respect to the output token list, and we use a
|
||||||
|
record with a single field [units] because that record may be
|
||||||
|
easily extended during the future maintenance of this lexer.
|
||||||
|
|
||||||
|
The state also includes a field [pos] which holds the current
|
||||||
|
position in the LIGO source file. The position is not always
|
||||||
|
updated after a single character has been matched: that depends on
|
||||||
|
the regular expression that matched the lexing buffer.
|
||||||
|
|
||||||
|
The field [window] is a two-token window, that is, a buffer that
|
||||||
|
contains the last recognised token, and the penultimate (if any).
|
||||||
|
Technically, it is a parametric type, but its use is meant for
|
||||||
|
tokens, wherever they are defined. In [Two (t1,t2)], and in case
|
||||||
|
of a syntax error, [t1] is the first invalid token and [t2] is the
|
||||||
|
last valid one.
|
||||||
|
|
||||||
|
The fields [decoder] and [supply] offer the support needed for
|
||||||
|
the lexing of UTF-8 encoded characters in comments (the only place
|
||||||
|
where they are allowed in LIGO). The former is the decoder proper
|
||||||
|
and the latter is the effectful function [supply] that takes a
|
||||||
|
byte, a start index and a length and feed it to [decoder]. See the
|
||||||
|
documentation of the third-party library Uutf.
|
||||||
|
|
||||||
|
Some methods are now documented.
|
||||||
|
|
||||||
|
The call [state#enqueue token] updates functionally the state
|
||||||
|
[state] by associating the token [token] with the stored markup and
|
||||||
|
enqueuing the pair into the units queue. The field [markup] is then
|
||||||
|
reset to the empty list.
|
||||||
|
|
||||||
|
The call [state#slide_token token] pushes the token [token] in
|
||||||
|
the buffer [buffer]. If the buffer is full, that is, it is [Two
|
||||||
|
(t1,t2)], then the token [t2] is discarded to make room for
|
||||||
|
[token].
|
||||||
|
|
||||||
|
The call [state#sync buffer] updates the current position in
|
||||||
|
accordance with the contents of the lexing buffer, more precisely,
|
||||||
|
depending on the length of the string which has just been
|
||||||
|
recognised by the scanner: that length is used as a positive offset
|
||||||
|
to the current column.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'token window =
|
||||||
|
Nil
|
||||||
|
| One of 'token
|
||||||
|
| Two of 'token * 'token
|
||||||
|
|
||||||
|
type 'token state = <
|
||||||
|
units : (Markup.t list * 'token) FQueue.t;
|
||||||
|
markup : Markup.t list;
|
||||||
|
window : 'token window;
|
||||||
|
last : Region.t;
|
||||||
|
pos : Pos.t;
|
||||||
|
decoder : Uutf.decoder;
|
||||||
|
supply : Bytes.t -> int -> int -> unit;
|
||||||
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
|
|
||||||
|
enqueue : 'token -> 'token state;
|
||||||
|
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
|
||||||
|
set_last : Region.t -> 'token state;
|
||||||
|
set_pos : Pos.t -> 'token state;
|
||||||
|
slide_token : 'token -> 'token state;
|
||||||
|
|
||||||
|
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
|
||||||
|
|
||||||
|
push_newline : Lexing.lexbuf -> 'token state;
|
||||||
|
push_line : thread -> 'token state;
|
||||||
|
push_block : thread -> 'token state;
|
||||||
|
push_space : Lexing.lexbuf -> 'token state;
|
||||||
|
push_tabs : Lexing.lexbuf -> 'token state;
|
||||||
|
push_bom : Lexing.lexbuf -> 'token state;
|
||||||
|
push_markup : Markup.t -> 'token state;
|
||||||
|
>
|
||||||
|
|
||||||
|
(* LEXER INSTANCE *)
|
||||||
|
|
||||||
|
(* The function [open_token_stream] returns a lexer instance made of
|
||||||
|
|
||||||
|
* the input [input] of type [input];
|
||||||
|
* a function [read] that extracts tokens from a lexing buffer,
|
||||||
|
together with a lexing buffer [buffer] to read from,
|
||||||
|
* a function [close] that closes that buffer,
|
||||||
|
* a function [get_pos] that returns the current position, and
|
||||||
|
* a function [get_last] that returns the region of the last
|
||||||
|
recognised token.
|
||||||
|
* a function [get_file] that returns the name of the file being
|
||||||
|
scanned (empty string if [stdin]).
|
||||||
|
|
||||||
|
Note that a module [Token] is exported too, because the signature
|
||||||
|
of the exported functions depend on it.
|
||||||
|
|
||||||
|
The type [window] is a two-token window, that is, a buffer that
|
||||||
|
contains the last recognised token, and the penultimate (if any).
|
||||||
|
|
||||||
|
The call [read ?line ?block ~init ~scan ~token_to_region ~style
|
||||||
|
input] evaluates in a lexer (also known as a tokeniser or scanner)
|
||||||
|
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
|
||||||
|
suitable for a parser generated by Menhir. The argument labelled
|
||||||
|
[log] is a logger, that is, it may print a token and its left
|
||||||
|
markup to a given channel, at the caller's discretion. The argument
|
||||||
|
labelled [~init] is the scanner to be called first, usually for
|
||||||
|
reading the BOM, then [scan] is used for the following calls. The
|
||||||
|
function labelled [~style] is used to check stylistic constraints
|
||||||
|
on the tokens and the markup between them.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type input =
|
||||||
|
File of file_path
|
||||||
|
| String of string
|
||||||
|
| Channel of in_channel
|
||||||
|
| Buffer of Lexing.lexbuf
|
||||||
|
|
||||||
|
type 'token logger = Markup.t list -> 'token -> unit
|
||||||
|
|
||||||
|
type 'token instance = {
|
||||||
|
input : input;
|
||||||
|
read : log:('token logger) -> Lexing.lexbuf -> 'token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_win : unit -> 'token window;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
get_file : unit -> file_path;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
type open_err = File_opening of string
|
||||||
|
|
||||||
|
val lexbuf_from_input :
|
||||||
|
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
||||||
|
|
||||||
|
val open_token_stream :
|
||||||
|
?line:EvalOpt.line_comment ->
|
||||||
|
?block:EvalOpt.block_comment ->
|
||||||
|
init:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||||
|
scan:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||||
|
token_to_region:('token -> Region.t) ->
|
||||||
|
style:('token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * 'token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit) ->
|
||||||
|
input ->
|
||||||
|
('token instance, open_err) Stdlib.result
|
@ -4,7 +4,8 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
module type S =
|
module type S =
|
||||||
sig
|
sig
|
||||||
module Lexer : LexerLib.S
|
module Lexer : Lexer.S
|
||||||
|
type token = Lexer.token
|
||||||
|
|
||||||
val output_token :
|
val output_token :
|
||||||
?offsets:bool ->
|
?offsets:bool ->
|
||||||
@ -12,7 +13,7 @@ module type S =
|
|||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
out_channel ->
|
out_channel ->
|
||||||
Markup.t list ->
|
Markup.t list ->
|
||||||
Lexer.token ->
|
token ->
|
||||||
unit
|
unit
|
||||||
|
|
||||||
type file_path = string
|
type file_path = string
|
||||||
@ -22,19 +23,26 @@ module type S =
|
|||||||
[`Byte | `Point] ->
|
[`Byte | `Point] ->
|
||||||
?block:EvalOpt.block_comment ->
|
?block:EvalOpt.block_comment ->
|
||||||
?line:EvalOpt.line_comment ->
|
?line:EvalOpt.line_comment ->
|
||||||
Lexer.input ->
|
token_to_region:(token -> Region.t) ->
|
||||||
|
style:(token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit) ->
|
||||||
|
LexerLib.input ->
|
||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
(unit, string Region.reg) Stdlib.result
|
(unit, string Region.reg) Stdlib.result
|
||||||
end
|
end
|
||||||
|
|
||||||
module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
|
module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||||
struct
|
struct
|
||||||
module Lexer = Lexer
|
module Lexer = Lexer
|
||||||
module Token = Lexer.Token
|
module Token = Lexer.Token
|
||||||
|
type token = Lexer.token
|
||||||
|
|
||||||
(** Pretty-printing in a string the lexemes making up the markup
|
(* Pretty-printing in a string the lexemes making up the markup
|
||||||
between two tokens, concatenated with the last lexeme
|
between two tokens, concatenated with the last lexeme
|
||||||
itself. *)
|
itself. *)
|
||||||
|
|
||||||
let output_token ?(offsets=true) mode command
|
let output_token ?(offsets=true) mode command
|
||||||
channel left_mark token : unit =
|
channel left_mark token : unit =
|
||||||
let output str = Printf.fprintf channel "%s%!" str in
|
let output str = Printf.fprintf channel "%s%!" str in
|
||||||
@ -57,10 +65,17 @@ module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
|
|||||||
|
|
||||||
type file_path = string
|
type file_path = string
|
||||||
|
|
||||||
let trace ?(offsets=true) mode ?block ?line input command :
|
let trace ?(offsets=true) mode ?block ?line
|
||||||
|
~token_to_region ~style input command :
|
||||||
(unit, string Region.reg) Stdlib.result =
|
(unit, string Region.reg) Stdlib.result =
|
||||||
match Lexer.open_token_stream ?line ?block input with
|
match LexerLib.open_token_stream
|
||||||
Ok Lexer.{read; buffer; close; _} ->
|
~init:Lexer.init
|
||||||
|
~scan:Lexer.scan
|
||||||
|
~token_to_region
|
||||||
|
~style
|
||||||
|
?line ?block input
|
||||||
|
with
|
||||||
|
Ok LexerLib.{read; buffer; close; _} ->
|
||||||
let log = output_token ~offsets mode command stdout
|
let log = output_token ~offsets mode command stdout
|
||||||
and close_all () = flush_all (); close () in
|
and close_all () = flush_all (); close () in
|
||||||
let rec iter () =
|
let rec iter () =
|
||||||
@ -69,12 +84,17 @@ module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
|
|||||||
if Token.is_eof token
|
if Token.is_eof token
|
||||||
then Stdlib.Ok ()
|
then Stdlib.Ok ()
|
||||||
else iter ()
|
else iter ()
|
||||||
|
| exception Lexer.Token.Error error ->
|
||||||
|
let msg =
|
||||||
|
Lexer.Token.format_error
|
||||||
|
~offsets mode ~file:true error
|
||||||
|
in Stdlib.Error msg
|
||||||
| exception Lexer.Error error ->
|
| exception Lexer.Error error ->
|
||||||
let msg =
|
let msg =
|
||||||
Lexer.format_error ~offsets mode ~file:true error
|
Lexer.format_error ~offsets mode ~file:true error
|
||||||
in Stdlib.Error msg in
|
in Stdlib.Error msg in
|
||||||
let result = iter ()
|
let result = iter ()
|
||||||
in close_all (); result
|
in close_all (); result
|
||||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg)
|
flush_all (); Stdlib.Error (Region.wrap_ghost msg)
|
||||||
end
|
end
|
||||||
|
@ -2,7 +2,8 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
module type S =
|
module type S =
|
||||||
sig
|
sig
|
||||||
module Lexer : LexerLib.S
|
module Lexer : Lexer.S
|
||||||
|
type token = Lexer.token
|
||||||
|
|
||||||
val output_token :
|
val output_token :
|
||||||
?offsets:bool ->
|
?offsets:bool ->
|
||||||
@ -10,7 +11,7 @@ module type S =
|
|||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
out_channel ->
|
out_channel ->
|
||||||
Markup.t list ->
|
Markup.t list ->
|
||||||
Lexer.token ->
|
token ->
|
||||||
unit
|
unit
|
||||||
|
|
||||||
type file_path = string
|
type file_path = string
|
||||||
@ -20,9 +21,14 @@ module type S =
|
|||||||
[`Byte | `Point] ->
|
[`Byte | `Point] ->
|
||||||
?block:EvalOpt.block_comment ->
|
?block:EvalOpt.block_comment ->
|
||||||
?line:EvalOpt.line_comment ->
|
?line:EvalOpt.line_comment ->
|
||||||
Lexer.input ->
|
token_to_region:(token -> Region.t) ->
|
||||||
|
style:(token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit) ->
|
||||||
|
LexerLib.input ->
|
||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
(unit, string Region.reg) Stdlib.result
|
(unit, string Region.reg) Stdlib.result
|
||||||
end
|
end
|
||||||
|
|
||||||
module Make (Lexer: LexerLib.S) : S with module Lexer = Lexer
|
module Make (Lexer: Lexer.S) : S with module Lexer = Lexer
|
||||||
|
@ -9,7 +9,7 @@ module type IO =
|
|||||||
val options : EvalOpt.options (* CLI options *)
|
val options : EvalOpt.options (* CLI options *)
|
||||||
end
|
end
|
||||||
|
|
||||||
module Make (IO: IO) (Lexer: LexerLib.S) =
|
module Make (IO: IO) (Lexer: Lexer.S) =
|
||||||
struct
|
struct
|
||||||
(* Error printing and exception tracing *)
|
(* Error printing and exception tracing *)
|
||||||
|
|
||||||
@ -39,11 +39,16 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
|
|||||||
| Stdlib.Ok pp_buffer ->
|
| Stdlib.Ok pp_buffer ->
|
||||||
(* Running the lexer on the preprocessed input *)
|
(* Running the lexer on the preprocessed input *)
|
||||||
|
|
||||||
let source = Lexer.String (Buffer.contents pp_buffer) in
|
let source = LexerLib.String (Buffer.contents pp_buffer) in
|
||||||
match Lexer.open_token_stream ?line:IO.options#line
|
match LexerLib.open_token_stream
|
||||||
|
?line:IO.options#line
|
||||||
?block:IO.options#block
|
?block:IO.options#block
|
||||||
|
~init:Lexer.init
|
||||||
|
~scan:Lexer.scan
|
||||||
|
~token_to_region:Lexer.Token.to_region
|
||||||
|
~style:Lexer.Token.check_right_context
|
||||||
source with
|
source with
|
||||||
Ok Lexer.{read; buffer; close; _} ->
|
Ok LexerLib.{read; buffer; close; _} ->
|
||||||
let close_all () = flush_all (); close () in
|
let close_all () = flush_all (); close () in
|
||||||
let rec read_tokens tokens =
|
let rec read_tokens tokens =
|
||||||
match read ~log:(fun _ _ -> ()) buffer with
|
match read ~log:(fun _ _ -> ()) buffer with
|
||||||
@ -51,9 +56,7 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
|
|||||||
if Lexer.Token.is_eof token
|
if Lexer.Token.is_eof token
|
||||||
then Stdlib.Ok (List.rev tokens)
|
then Stdlib.Ok (List.rev tokens)
|
||||||
else read_tokens (token::tokens)
|
else read_tokens (token::tokens)
|
||||||
(* | exception _ ->
|
| exception Lexer.Token.Error error ->
|
||||||
Printf.eprintf "Here\n%!"; exit 1
|
|
||||||
*) | exception Lexer.Token.Error error ->
|
|
||||||
let file =
|
let file =
|
||||||
match IO.options#input with
|
match IO.options#input with
|
||||||
None | Some "-" -> false
|
None | Some "-" -> false
|
||||||
@ -74,7 +77,7 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
|
|||||||
in Stdlib.Error msg in
|
in Stdlib.Error msg in
|
||||||
let result = read_tokens []
|
let result = read_tokens []
|
||||||
in close_all (); result
|
in close_all (); result
|
||||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
||||||
match IO.options#input with
|
match IO.options#input with
|
||||||
None -> preproc stdin
|
None -> preproc stdin
|
||||||
@ -115,7 +118,9 @@ module Make (IO: IO) (Lexer: LexerLib.S) =
|
|||||||
IO.options#mode
|
IO.options#mode
|
||||||
?block:IO.options#block
|
?block:IO.options#block
|
||||||
?line:IO.options#line
|
?line:IO.options#line
|
||||||
(Lexer.String preproc_str)
|
~token_to_region:Lexer.Token.to_region
|
||||||
|
~style:Lexer.Token.check_right_context
|
||||||
|
(LexerLib.String preproc_str)
|
||||||
IO.options#cmd
|
IO.options#cmd
|
||||||
in match IO.options#input with
|
in match IO.options#input with
|
||||||
None -> preproc stdin
|
None -> preproc stdin
|
||||||
|
@ -7,7 +7,7 @@ module type IO =
|
|||||||
val options : EvalOpt.options (* CLI options *)
|
val options : EvalOpt.options (* CLI options *)
|
||||||
end
|
end
|
||||||
|
|
||||||
module Make (IO: IO) (Lexer: LexerLib.S) :
|
module Make (IO: IO) (Lexer: Lexer.S) :
|
||||||
sig
|
sig
|
||||||
val scan : unit -> (Lexer.token list, string Region.reg) Stdlib.result
|
val scan : unit -> (Lexer.token list, string Region.reg) Stdlib.result
|
||||||
val trace : unit -> (unit, string Region.reg) Stdlib.result
|
val trace : unit -> (unit, string Region.reg) Stdlib.result
|
||||||
|
@ -56,8 +56,8 @@ module type PARSER =
|
|||||||
(* Main functor *)
|
(* Main functor *)
|
||||||
|
|
||||||
module Make (IO: IO)
|
module Make (IO: IO)
|
||||||
(Lexer: LexerLib.S)
|
(Lexer: Lexer.S)
|
||||||
(Parser: PARSER with type token = Lexer.Token.token)
|
(Parser: PARSER with type token = Lexer.token)
|
||||||
(ParErr: sig val message : int -> string end) =
|
(ParErr: sig val message : int -> string end) =
|
||||||
struct
|
struct
|
||||||
module I = Parser.MenhirInterpreter
|
module I = Parser.MenhirInterpreter
|
||||||
@ -143,14 +143,14 @@ module Make (IO: IO)
|
|||||||
~offsets:IO.options#offsets
|
~offsets:IO.options#offsets
|
||||||
IO.options#mode IO.options#cmd stdout
|
IO.options#mode IO.options#cmd stdout
|
||||||
|
|
||||||
let incr_contract Lexer.{read; buffer; get_win; close; _} =
|
let incr_contract LexerLib.{read; buffer; get_win; close; _} =
|
||||||
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
||||||
and failure = failure get_win in
|
and failure = failure get_win in
|
||||||
let parser = Incr.contract buffer.Lexing.lex_curr_p in
|
let parser = Incr.contract buffer.Lexing.lex_curr_p in
|
||||||
let ast = I.loop_handle success failure supplier parser
|
let ast = I.loop_handle success failure supplier parser
|
||||||
in flush_all (); close (); ast
|
in flush_all (); close (); ast
|
||||||
|
|
||||||
let incr_expr Lexer.{read; buffer; get_win; close; _} =
|
let incr_expr LexerLib.{read; buffer; get_win; close; _} =
|
||||||
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
||||||
and failure = failure get_win in
|
and failure = failure get_win in
|
||||||
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in
|
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in
|
||||||
|
@ -55,8 +55,8 @@ module type PARSER =
|
|||||||
end
|
end
|
||||||
|
|
||||||
module Make (IO: IO)
|
module Make (IO: IO)
|
||||||
(Lexer: LexerLib.S)
|
(Lexer: Lexer.S)
|
||||||
(Parser: PARSER with type token = Lexer.Token.token)
|
(Parser: PARSER with type token = Lexer.token)
|
||||||
(ParErr: sig val message : int -> string end) :
|
(ParErr: sig val message : int -> string end) :
|
||||||
sig
|
sig
|
||||||
(* WARNING: The following parsers may all raise [Lexer.Error] *)
|
(* WARNING: The following parsers may all raise [Lexer.Error] *)
|
||||||
@ -78,8 +78,8 @@ module Make (IO: IO)
|
|||||||
|
|
||||||
exception Point of error
|
exception Point of error
|
||||||
|
|
||||||
val incr_contract : Lexer.instance -> Parser.ast
|
val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
|
||||||
val incr_expr : Lexer.instance -> Parser.expr
|
val incr_expr : Lexer.token LexerLib.instance -> Parser.expr
|
||||||
|
|
||||||
val format_error :
|
val format_error :
|
||||||
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
|
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
|
||||||
|
@ -37,7 +37,7 @@ module type Printer =
|
|||||||
val print_expr : state -> expr -> unit
|
val print_expr : state -> expr -> unit
|
||||||
end
|
end
|
||||||
|
|
||||||
module Make (Lexer: LexerLib.S)
|
module Make (Lexer: Lexer.S)
|
||||||
(AST: sig type t type expr end)
|
(AST: sig type t type expr end)
|
||||||
(Parser: ParserAPI.PARSER
|
(Parser: ParserAPI.PARSER
|
||||||
with type ast = AST.t
|
with type ast = AST.t
|
||||||
@ -89,12 +89,12 @@ module Make (Lexer: LexerLib.S)
|
|||||||
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
||||||
~mode:SubIO.options#mode
|
~mode:SubIO.options#mode
|
||||||
~buffer:output in
|
~buffer:output in
|
||||||
let close () = lexer_inst.Lexer.close () in
|
let close () = lexer_inst.LexerLib.close () in
|
||||||
let expr =
|
let expr =
|
||||||
try
|
try
|
||||||
if SubIO.options#mono then
|
if SubIO.options#mono then
|
||||||
let tokeniser = lexer_inst.Lexer.read ~log
|
let tokeniser = lexer_inst.LexerLib.read ~log
|
||||||
and lexbuf = lexer_inst.Lexer.buffer
|
and lexbuf = lexer_inst.LexerLib.buffer
|
||||||
in Front.mono_expr tokeniser lexbuf
|
in Front.mono_expr tokeniser lexbuf
|
||||||
else
|
else
|
||||||
Front.incr_expr lexer_inst
|
Front.incr_expr lexer_inst
|
||||||
@ -124,12 +124,12 @@ module Make (Lexer: LexerLib.S)
|
|||||||
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
||||||
~mode:SubIO.options#mode
|
~mode:SubIO.options#mode
|
||||||
~buffer:output in
|
~buffer:output in
|
||||||
let close () = lexer_inst.Lexer.close () in
|
let close () = lexer_inst.LexerLib.close () in
|
||||||
let ast =
|
let ast =
|
||||||
try
|
try
|
||||||
if SubIO.options#mono then
|
if SubIO.options#mono then
|
||||||
let tokeniser = lexer_inst.Lexer.read ~log
|
let tokeniser = lexer_inst.LexerLib.read ~log
|
||||||
and lexbuf = lexer_inst.Lexer.buffer
|
and lexbuf = lexer_inst.LexerLib.buffer
|
||||||
in Front.mono_contract tokeniser lexbuf
|
in Front.mono_contract tokeniser lexbuf
|
||||||
else
|
else
|
||||||
Front.incr_contract lexer_inst
|
Front.incr_contract lexer_inst
|
||||||
@ -163,9 +163,17 @@ module Make (Lexer: LexerLib.S)
|
|||||||
|
|
||||||
| exception Lexer.Error err ->
|
| exception Lexer.Error err ->
|
||||||
let file =
|
let file =
|
||||||
lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||||
let error =
|
let error = Lexer.format_error
|
||||||
Lexer.format_error ~offsets:SubIO.options#offsets
|
~offsets:SubIO.options#offsets
|
||||||
|
SubIO.options#mode err ~file:(file <> "")
|
||||||
|
in Stdlib.Error error
|
||||||
|
|
||||||
|
| exception Lexer.Token.Error err ->
|
||||||
|
let file =
|
||||||
|
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||||
|
let error = Lexer.Token.format_error
|
||||||
|
~offsets:SubIO.options#offsets
|
||||||
SubIO.options#mode err ~file:(file <> "")
|
SubIO.options#mode err ~file:(file <> "")
|
||||||
in Stdlib.Error error
|
in Stdlib.Error error
|
||||||
|
|
||||||
@ -181,7 +189,7 @@ module Make (Lexer: LexerLib.S)
|
|||||||
|
|
||||||
| exception Parser.Error ->
|
| exception Parser.Error ->
|
||||||
let invalid, valid_opt =
|
let invalid, valid_opt =
|
||||||
match lexer_inst.Lexer.get_win () with
|
match lexer_inst.LexerLib.get_win () with
|
||||||
LexerLib.Nil ->
|
LexerLib.Nil ->
|
||||||
assert false (* Safe: There is always at least EOF. *)
|
assert false (* Safe: There is always at least EOF. *)
|
||||||
| LexerLib.One invalid -> invalid, None
|
| LexerLib.One invalid -> invalid, None
|
||||||
@ -205,8 +213,8 @@ module Make (Lexer: LexerLib.S)
|
|||||||
(* Parsing a contract *)
|
(* Parsing a contract *)
|
||||||
|
|
||||||
let gen_parser options input parser =
|
let gen_parser options input parser =
|
||||||
match Lexer.lexbuf_from_input input with
|
match LexerLib.lexbuf_from_input input with
|
||||||
Stdlib.Error (Lexer.File_opening msg) ->
|
Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
Stdlib.Error (Region.wrap_ghost msg)
|
Stdlib.Error (Region.wrap_ghost msg)
|
||||||
| Ok (lexbuf, close) ->
|
| Ok (lexbuf, close) ->
|
||||||
(* Preprocessing the input source *)
|
(* Preprocessing the input source *)
|
||||||
@ -224,48 +232,53 @@ module Make (Lexer: LexerLib.S)
|
|||||||
(* Lexing and parsing the preprocessed input source *)
|
(* Lexing and parsing the preprocessed input source *)
|
||||||
|
|
||||||
let () = close () in
|
let () = close () in
|
||||||
let input' = Lexer.String (Buffer.contents buffer) in
|
let input' = LexerLib.String (Buffer.contents buffer) in
|
||||||
match Lexer.open_token_stream ?line:options#line
|
match LexerLib.open_token_stream
|
||||||
|
~init:Lexer.init
|
||||||
|
~scan:Lexer.scan
|
||||||
|
~token_to_region:Lexer.Token.to_region
|
||||||
|
~style:Lexer.Token.check_right_context
|
||||||
|
?line:options#line
|
||||||
?block:options#block
|
?block:options#block
|
||||||
input'
|
input'
|
||||||
with
|
with
|
||||||
Ok instance ->
|
Ok instance ->
|
||||||
let open Lexing in
|
let open Lexing in
|
||||||
instance.Lexer.buffer.lex_curr_p <-
|
instance.LexerLib.buffer.lex_curr_p <-
|
||||||
{instance.Lexer.buffer.lex_curr_p with pos_fname=file};
|
{instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
|
||||||
apply instance parser
|
apply instance parser
|
||||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
Stdlib.Error (Region.wrap_ghost msg)
|
Stdlib.Error (Region.wrap_ghost msg)
|
||||||
|
|
||||||
(* Parsing a contract in a file *)
|
(* Parsing a contract in a file *)
|
||||||
|
|
||||||
let contract_in_file (source : string) =
|
let contract_in_file (source : string) =
|
||||||
let options = SubIO.make ~input:(Some source) ~expr:false
|
let options = SubIO.make ~input:(Some source) ~expr:false
|
||||||
in gen_parser options (Lexer.File source) parse_contract
|
in gen_parser options (LexerLib.File source) parse_contract
|
||||||
|
|
||||||
(* Parsing a contract in a string *)
|
(* Parsing a contract in a string *)
|
||||||
|
|
||||||
let contract_in_string (source : string) =
|
let contract_in_string (source : string) =
|
||||||
let options = SubIO.make ~input:None ~expr:false in
|
let options = SubIO.make ~input:None ~expr:false in
|
||||||
gen_parser options (Lexer.String source) parse_contract
|
gen_parser options (LexerLib.String source) parse_contract
|
||||||
|
|
||||||
(* Parsing a contract in stdin *)
|
(* Parsing a contract in stdin *)
|
||||||
|
|
||||||
let contract_in_stdin () =
|
let contract_in_stdin () =
|
||||||
let options = SubIO.make ~input:None ~expr:false in
|
let options = SubIO.make ~input:None ~expr:false in
|
||||||
gen_parser options (Lexer.Channel stdin) parse_contract
|
gen_parser options (LexerLib.Channel stdin) parse_contract
|
||||||
|
|
||||||
(* Parsing an expression in a string *)
|
(* Parsing an expression in a string *)
|
||||||
|
|
||||||
let expr_in_string (source : string) =
|
let expr_in_string (source : string) =
|
||||||
let options = SubIO.make ~input:None ~expr:true in
|
let options = SubIO.make ~input:None ~expr:true in
|
||||||
gen_parser options (Lexer.String source) parse_expr
|
gen_parser options (LexerLib.String source) parse_expr
|
||||||
|
|
||||||
(* Parsing an expression in stdin *)
|
(* Parsing an expression in stdin *)
|
||||||
|
|
||||||
let expr_in_stdin () =
|
let expr_in_stdin () =
|
||||||
let options = SubIO.make ~input:None ~expr:true in
|
let options = SubIO.make ~input:None ~expr:true in
|
||||||
gen_parser options (Lexer.Channel stdin) parse_expr
|
gen_parser options (LexerLib.Channel stdin) parse_expr
|
||||||
|
|
||||||
(* Preprocess only *)
|
(* Preprocess only *)
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ module type Printer =
|
|||||||
|
|
||||||
(* Main functor to make the parser *)
|
(* Main functor to make the parser *)
|
||||||
|
|
||||||
module Make (Lexer : LexerLib.S)
|
module Make (Lexer : Lexer.S)
|
||||||
(AST : sig type t type expr end)
|
(AST : sig type t type expr end)
|
||||||
(Parser : ParserAPI.PARSER
|
(Parser : ParserAPI.PARSER
|
||||||
with type ast = AST.t
|
with type ast = AST.t
|
||||||
|
@ -1 +1 @@
|
|||||||
const a: string = -0x222;
|
const a: string = - (**) 0x2222
|
||||||
|
@ -1 +1 @@
|
|||||||
let a = -0x222;
|
let a = - (**) 0x2222
|
||||||
|
@ -1 +1 @@
|
|||||||
let a = -0x222;
|
let a = - /**/ 0x2222;
|
||||||
|
@ -1 +0,0 @@
|
|||||||
let arguments = 1;
|
|
Loading…
Reference in New Issue
Block a user