Merge branch 'rinderknecht@michelson' into 'dev'
Refactoring of the lexer and preprocessor See merge request ligolang/ligo!591
This commit is contained in:
commit
d0d495ccce
@ -54,7 +54,7 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23:
|
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
|
||||||
Negative byte sequence.
|
Negative byte sequence.
|
||||||
Hint: Remove the leading minus sign.
|
Hint: Remove the leading minus sign.
|
||||||
{}
|
{}
|
||||||
@ -71,7 +71,7 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13:
|
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
|
||||||
Negative byte sequence.
|
Negative byte sequence.
|
||||||
Hint: Remove the leading minus sign.
|
Hint: Remove the leading minus sign.
|
||||||
{}
|
{}
|
||||||
@ -88,7 +88,7 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13:
|
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
|
||||||
Negative byte sequence.
|
Negative byte sequence.
|
||||||
Hint: Remove the leading minus sign.
|
Hint: Remove the leading minus sign.
|
||||||
{}
|
{}
|
||||||
@ -103,6 +103,7 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
|
|||||||
* Check the changelog by running 'ligo changelog'
|
* Check the changelog by running 'ligo changelog'
|
||||||
|} ];
|
|} ];
|
||||||
|
|
||||||
|
(*
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
||||||
@ -119,6 +120,7 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
|||||||
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
|
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
|
||||||
* Check the changelog by running 'ligo changelog'
|
* Check the changelog by running 'ligo changelog'
|
||||||
|} ];
|
|} ];
|
||||||
|
*)
|
||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
@ -204,10 +206,9 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters
|
|||||||
|
|
||||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
|
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
|
||||||
[%expect {|
|
[%expect {|
|
||||||
ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2:
|
ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
|
||||||
Unterminated comment.
|
Unterminated comment.
|
||||||
Hint: Close with "*)".
|
Hint: Close with "*)". {}
|
||||||
{}
|
|
||||||
|
|
||||||
|
|
||||||
If you're not sure how to fix this error, you can
|
If you're not sure how to fix this error, you can
|
||||||
|
@ -8,15 +8,14 @@ module SSet = Set.Make (String)
|
|||||||
|
|
||||||
(* Mock IOs TODO: Fill them with CLI options *)
|
(* Mock IOs TODO: Fill them with CLI options *)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
module SubIO =
|
module SubIO =
|
||||||
struct
|
struct
|
||||||
type options = <
|
type options = <
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
ext : string; (* ".mligo" *)
|
ext : string; (* ".mligo" *)
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
@ -24,11 +23,13 @@ module SubIO =
|
|||||||
>
|
>
|
||||||
|
|
||||||
let options : options =
|
let options : options =
|
||||||
object
|
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in object
|
||||||
method libs = []
|
method libs = []
|
||||||
method verbose = SSet.empty
|
method verbose = SSet.empty
|
||||||
method offsets = true
|
method offsets = true
|
||||||
method lang = `CameLIGO
|
method block = Some block
|
||||||
|
method line = Some "//"
|
||||||
method ext = ".mligo"
|
method ext = ".mligo"
|
||||||
method mode = `Point
|
method mode = `Point
|
||||||
method cmd = EvalOpt.Quiet
|
method cmd = EvalOpt.Quiet
|
||||||
@ -39,7 +40,8 @@ module SubIO =
|
|||||||
EvalOpt.make ~libs:options#libs
|
EvalOpt.make ~libs:options#libs
|
||||||
~verbose:options#verbose
|
~verbose:options#verbose
|
||||||
~offsets:options#offsets
|
~offsets:options#offsets
|
||||||
~lang:options#lang
|
?block:options#block
|
||||||
|
?line:options#line
|
||||||
~ext:options#ext
|
~ext:options#ext
|
||||||
~mode:options#mode
|
~mode:options#mode
|
||||||
~cmd:options#cmd
|
~cmd:options#cmd
|
||||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
|
|
||||||
../shared/Lexer.mli
|
../shared/Lexer.mli
|
||||||
../shared/Lexer.mll
|
../shared/Lexer.mll
|
||||||
|
../shared/LexerLib.ml
|
||||||
../shared/EvalOpt.ml
|
../shared/EvalOpt.ml
|
||||||
../shared/EvalOpt.mli
|
../shared/EvalOpt.mli
|
||||||
../shared/FQueue.ml
|
../shared/FQueue.ml
|
||||||
|
@ -156,11 +156,22 @@ val eof : Region.t -> token
|
|||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
val is_string : token -> bool
|
|
||||||
val is_bytes : token -> bool
|
|
||||||
val is_int : token -> bool
|
|
||||||
val is_ident : token -> bool
|
|
||||||
val is_kwd : token -> bool
|
|
||||||
val is_constr : token -> bool
|
|
||||||
val is_sym : token -> bool
|
|
||||||
val is_eof : token -> bool
|
val is_eof : token -> bool
|
||||||
|
|
||||||
|
(* Style *)
|
||||||
|
|
||||||
|
type error
|
||||||
|
|
||||||
|
val error_to_string : error -> string
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
val format_error :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
error Region.reg -> file:bool -> string Region.reg
|
||||||
|
|
||||||
|
val check_right_context :
|
||||||
|
token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit
|
||||||
|
@ -102,10 +102,35 @@ type t =
|
|||||||
|
|
||||||
| EOF of Region.t (* End of file *)
|
| EOF of Region.t (* End of file *)
|
||||||
|
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
type token = t
|
type token = t
|
||||||
|
|
||||||
let proj_token = function
|
let proj_token = function
|
||||||
ARROW region -> region, "ARROW"
|
(* Literals *)
|
||||||
|
|
||||||
|
String Region.{region; value} ->
|
||||||
|
region, sprintf "Str %s" value
|
||||||
|
| Bytes Region.{region; value = s,b} ->
|
||||||
|
region,
|
||||||
|
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||||
|
| Int Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Nat Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Mutez Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Ident Region.{region; value} ->
|
||||||
|
region, sprintf "Ident %s" value
|
||||||
|
| Constr Region.{region; value} ->
|
||||||
|
region, sprintf "Constr %s" value
|
||||||
|
| Attr Region.{region; value} ->
|
||||||
|
region, sprintf "Attr \"%s\"" value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| ARROW region -> region, "ARROW"
|
||||||
| CONS region -> region, "CONS"
|
| CONS region -> region, "CONS"
|
||||||
| CAT region -> region, "CAT"
|
| CAT region -> region, "CAT"
|
||||||
| MINUS region -> region, "MINUS"
|
| MINUS region -> region, "MINUS"
|
||||||
@ -132,24 +157,9 @@ let proj_token = function
|
|||||||
| GE region -> region, "GE"
|
| GE region -> region, "GE"
|
||||||
| BOOL_OR region -> region, "BOOL_OR"
|
| BOOL_OR region -> region, "BOOL_OR"
|
||||||
| BOOL_AND region -> region, "BOOL_AND"
|
| BOOL_AND region -> region, "BOOL_AND"
|
||||||
| Ident Region.{region; value} ->
|
|
||||||
region, sprintf "Ident %s" value
|
(* Keywords *)
|
||||||
| Constr Region.{region; value} ->
|
|
||||||
region, sprintf "Constr %s" value
|
|
||||||
| Int Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Nat Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Mutez Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| String Region.{region; value} ->
|
|
||||||
region, sprintf "Str %s" value
|
|
||||||
| Bytes Region.{region; value = s,b} ->
|
|
||||||
region,
|
|
||||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
|
||||||
s (Hex.show b)
|
|
||||||
| Attr Region.{region; value} ->
|
|
||||||
region, sprintf "Attr \"%s\"" value
|
|
||||||
| Begin region -> region, "Begin"
|
| Begin region -> region, "Begin"
|
||||||
| Else region -> region, "Else"
|
| Else region -> region, "Else"
|
||||||
| End region -> region, "End"
|
| End region -> region, "End"
|
||||||
@ -168,12 +178,32 @@ let proj_token = function
|
|||||||
| True region -> region, "True"
|
| True region -> region, "True"
|
||||||
| Type region -> region, "Type"
|
| Type region -> region, "Type"
|
||||||
| With region -> region, "With"
|
| With region -> region, "With"
|
||||||
|
|
||||||
|
(* Data *)
|
||||||
|
|
||||||
| C_None region -> region, "C_None"
|
| C_None region -> region, "C_None"
|
||||||
| C_Some region -> region, "C_Some"
|
| C_Some region -> region, "C_Some"
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
| EOF region -> region, "EOF"
|
| EOF region -> region, "EOF"
|
||||||
|
|
||||||
|
|
||||||
let to_lexeme = function
|
let to_lexeme = function
|
||||||
ARROW _ -> "->"
|
(* Literals *)
|
||||||
|
|
||||||
|
String s -> String.escaped s.Region.value
|
||||||
|
| Bytes b -> fst b.Region.value
|
||||||
|
| Int i
|
||||||
|
| Nat i
|
||||||
|
| Mutez i -> fst i.Region.value
|
||||||
|
| Ident id -> id.Region.value
|
||||||
|
| Constr id -> id.Region.value
|
||||||
|
| Attr a -> a.Region.value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| ARROW _ -> "->"
|
||||||
| CONS _ -> "::"
|
| CONS _ -> "::"
|
||||||
| CAT _ -> "^"
|
| CAT _ -> "^"
|
||||||
| MINUS _ -> "-"
|
| MINUS _ -> "-"
|
||||||
@ -201,14 +231,7 @@ let to_lexeme = function
|
|||||||
| BOOL_OR _ -> "||"
|
| BOOL_OR _ -> "||"
|
||||||
| BOOL_AND _ -> "&&"
|
| BOOL_AND _ -> "&&"
|
||||||
|
|
||||||
| Ident id -> id.Region.value
|
(* Keywords *)
|
||||||
| Constr id -> id.Region.value
|
|
||||||
| Int i
|
|
||||||
| Nat i
|
|
||||||
| Mutez i -> fst i.Region.value
|
|
||||||
| String s -> String.escaped s.Region.value
|
|
||||||
| Bytes b -> fst b.Region.value
|
|
||||||
| Attr a -> a.Region.value
|
|
||||||
|
|
||||||
| Begin _ -> "begin"
|
| Begin _ -> "begin"
|
||||||
| Else _ -> "else"
|
| Else _ -> "else"
|
||||||
@ -229,11 +252,17 @@ let to_lexeme = function
|
|||||||
| Then _ -> "then"
|
| Then _ -> "then"
|
||||||
| With _ -> "with"
|
| With _ -> "with"
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None _ -> "None"
|
| C_None _ -> "None"
|
||||||
| C_Some _ -> "Some"
|
| C_Some _ -> "Some"
|
||||||
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
| EOF _ -> ""
|
| EOF _ -> ""
|
||||||
|
|
||||||
|
(* CONVERSIONS *)
|
||||||
|
|
||||||
let to_string token ?(offsets=true) mode =
|
let to_string token ?(offsets=true) mode =
|
||||||
let region, val_str = proj_token token in
|
let region, val_str = proj_token token in
|
||||||
let reg_str = region#compact ~offsets mode
|
let reg_str = region#compact ~offsets mode
|
||||||
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =
|
|||||||
|
|
||||||
let to_region token = proj_token token |> fst
|
let to_region token = proj_token token |> fst
|
||||||
|
|
||||||
(* Injections *)
|
|
||||||
|
|
||||||
type int_err = Non_canonical_zero
|
|
||||||
|
|
||||||
(* LEXIS *)
|
(* LEXIS *)
|
||||||
|
|
||||||
let keywords = [
|
let keywords = [
|
||||||
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
|
|||||||
let value = lexeme, `Hex norm
|
let value = lexeme, `Hex norm
|
||||||
in Bytes Region.{region; value}
|
in Bytes Region.{region; value}
|
||||||
|
|
||||||
|
type int_err = Non_canonical_zero
|
||||||
|
|
||||||
let mk_int lexeme region =
|
let mk_int lexeme region =
|
||||||
let z =
|
let z =
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
|
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
|
||||||
@ -398,16 +425,14 @@ type nat_err =
|
|||||||
|
|
||||||
let mk_nat lexeme region =
|
let mk_nat lexeme region =
|
||||||
match (String.index_opt lexeme 'n') with
|
match (String.index_opt lexeme 'n') with
|
||||||
| None -> Error Invalid_natural
|
None -> Error Invalid_natural
|
||||||
| Some _ -> (
|
| Some _ -> let z =
|
||||||
let z =
|
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "n") "") |>
|
Str.(global_replace (regexp "n") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0n"
|
if Z.equal z Z.zero && lexeme <> "0n"
|
||||||
then Error Non_canonical_zero_nat
|
then Error Non_canonical_zero_nat
|
||||||
else Ok (Nat Region.{region; value = lexeme,z})
|
else Ok (Nat Region.{region; value = lexeme,z})
|
||||||
)
|
|
||||||
|
|
||||||
let mk_mutez lexeme region =
|
let mk_mutez lexeme region =
|
||||||
let z =
|
let z =
|
||||||
@ -422,8 +447,6 @@ let eof region = EOF region
|
|||||||
|
|
||||||
type sym_err = Invalid_symbol
|
type sym_err = Invalid_symbol
|
||||||
|
|
||||||
type attr_err = Invalid_attribute
|
|
||||||
|
|
||||||
let mk_sym lexeme region =
|
let mk_sym lexeme region =
|
||||||
match lexeme with
|
match lexeme with
|
||||||
(* Lexemes in common with all concrete syntaxes *)
|
(* Lexemes in common with all concrete syntaxes *)
|
||||||
@ -473,88 +496,80 @@ let mk_constr lexeme region =
|
|||||||
|
|
||||||
(* Attributes *)
|
(* Attributes *)
|
||||||
|
|
||||||
|
type attr_err = Invalid_attribute
|
||||||
|
|
||||||
let mk_attr header lexeme region =
|
let mk_attr header lexeme region =
|
||||||
if header = "[@" then
|
if header = "[@" then Error Invalid_attribute
|
||||||
Error Invalid_attribute
|
|
||||||
else Ok (Attr Region.{value=lexeme; region})
|
else Ok (Attr Region.{value=lexeme; region})
|
||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
let is_string = function
|
let is_string = function String _ -> true | _ -> false
|
||||||
String _ -> true
|
let is_bytes = function Bytes _ -> true | _ -> false
|
||||||
| _ -> false
|
let is_int = function Int _ -> true | _ -> false
|
||||||
|
let is_ident = function Ident _ -> true | _ -> false
|
||||||
let is_bytes = function
|
|
||||||
Bytes _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_int = function
|
|
||||||
Int _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_ident = function
|
|
||||||
Ident _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_kwd = function
|
|
||||||
| Begin _
|
|
||||||
| Else _
|
|
||||||
| End _
|
|
||||||
| False _
|
|
||||||
| Fun _
|
|
||||||
| Rec _
|
|
||||||
| If _
|
|
||||||
| In _
|
|
||||||
| Let _
|
|
||||||
| Match _
|
|
||||||
| Mod _
|
|
||||||
| Not _
|
|
||||||
| Of _
|
|
||||||
| Or _
|
|
||||||
| Then _
|
|
||||||
| True _
|
|
||||||
| Type _
|
|
||||||
| With _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_constr = function
|
|
||||||
| Constr _
|
|
||||||
| Ident _
|
|
||||||
| False _
|
|
||||||
| True _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_sym = function
|
|
||||||
| ARROW _
|
|
||||||
| CONS _
|
|
||||||
| CAT _
|
|
||||||
| MINUS _
|
|
||||||
| PLUS _
|
|
||||||
| SLASH _
|
|
||||||
| TIMES _
|
|
||||||
| LPAR _
|
|
||||||
| RPAR _
|
|
||||||
| LBRACKET _
|
|
||||||
| RBRACKET _
|
|
||||||
| LBRACE _
|
|
||||||
| RBRACE _
|
|
||||||
| COMMA _
|
|
||||||
| SEMI _
|
|
||||||
| VBAR _
|
|
||||||
| COLON _
|
|
||||||
| DOT _
|
|
||||||
| WILD _
|
|
||||||
| EQ _
|
|
||||||
| NE _
|
|
||||||
| LT _
|
|
||||||
| GT _
|
|
||||||
| LE _
|
|
||||||
| GE _
|
|
||||||
| BOOL_OR _
|
|
||||||
| BOOL_AND _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_eof = function EOF _ -> true | _ -> false
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
let is_minus = function MINUS _ -> true | _ -> false
|
||||||
|
|
||||||
|
(* Errors *)
|
||||||
|
|
||||||
|
type error =
|
||||||
|
Odd_lengthed_bytes
|
||||||
|
| Missing_break
|
||||||
|
| Negative_byte_sequence
|
||||||
|
|
||||||
|
let error_to_string = function
|
||||||
|
Odd_lengthed_bytes ->
|
||||||
|
"The length of the byte sequence is an odd number.\n\
|
||||||
|
Hint: Add or remove a digit."
|
||||||
|
| Missing_break ->
|
||||||
|
"Missing break.\n\
|
||||||
|
Hint: Insert some space."
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign."
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||||
|
let msg = error_to_string value
|
||||||
|
and reg = region#to_string ~file ~offsets mode in
|
||||||
|
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||||
|
in Region.{value; region}
|
||||||
|
|
||||||
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
|
let check_right_context token next_token buffer : unit =
|
||||||
|
let pos = (to_region token)#stop in
|
||||||
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
|
match next_token buffer with
|
||||||
|
None -> ()
|
||||||
|
| Some (markup, next) ->
|
||||||
|
if is_minus token && is_bytes next
|
||||||
|
then let region =
|
||||||
|
Region.cover (to_region token) (to_region next)
|
||||||
|
in fail region Negative_byte_sequence
|
||||||
|
else
|
||||||
|
match markup with
|
||||||
|
[] ->
|
||||||
|
if is_int token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_string token
|
||||||
|
then if is_int next || is_bytes next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_bytes token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else if is_int next
|
||||||
|
then fail region Odd_lengthed_bytes
|
||||||
|
else ()
|
||||||
|
else ()
|
||||||
|
| _::_ -> ()
|
||||||
|
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
module IO =
|
module IO =
|
||||||
struct
|
struct
|
||||||
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in read ~block ~line:"//" ".mligo"
|
||||||
end
|
end
|
||||||
|
|
||||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||||
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
|||||||
let () =
|
let () =
|
||||||
match M.trace () with
|
match M.trace () with
|
||||||
Stdlib.Ok () -> ()
|
Stdlib.Ok () -> ()
|
||||||
| Error Region.{value; _} -> Utils.highlight value
|
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value
|
||||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
|||||||
|
|
||||||
module IO =
|
module IO =
|
||||||
struct
|
struct
|
||||||
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in read ~block ~line:"//" ".mligo"
|
||||||
end
|
end
|
||||||
|
|
||||||
module SubIO =
|
module SubIO =
|
||||||
@ -14,7 +17,8 @@ module SubIO =
|
|||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : EvalOpt.language;
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
ext : string;
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
@ -26,7 +30,8 @@ module SubIO =
|
|||||||
method libs = IO.options#libs
|
method libs = IO.options#libs
|
||||||
method verbose = IO.options#verbose
|
method verbose = IO.options#verbose
|
||||||
method offsets = IO.options#offsets
|
method offsets = IO.options#offsets
|
||||||
method lang = IO.options#lang
|
method block = IO.options#block
|
||||||
|
method line = IO.options#line
|
||||||
method ext = IO.options#ext
|
method ext = IO.options#ext
|
||||||
method mode = IO.options#mode
|
method mode = IO.options#mode
|
||||||
method cmd = IO.options#cmd
|
method cmd = IO.options#cmd
|
||||||
@ -37,7 +42,8 @@ module SubIO =
|
|||||||
EvalOpt.make ~libs:options#libs
|
EvalOpt.make ~libs:options#libs
|
||||||
~verbose:options#verbose
|
~verbose:options#verbose
|
||||||
~offsets:options#offsets
|
~offsets:options#offsets
|
||||||
~lang:options#lang
|
?block:options#block
|
||||||
|
?line:options#line
|
||||||
~ext:options#ext
|
~ext:options#ext
|
||||||
~mode:options#mode
|
~mode:options#mode
|
||||||
~cmd:options#cmd
|
~cmd:options#cmd
|
||||||
|
@ -8,15 +8,14 @@ module SSet = Set.Make (String)
|
|||||||
|
|
||||||
(* Mock IOs TODO: Fill them with CLI options *)
|
(* Mock IOs TODO: Fill them with CLI options *)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
module SubIO =
|
module SubIO =
|
||||||
struct
|
struct
|
||||||
type options = <
|
type options = <
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
ext : string; (* ".ligo" *)
|
ext : string; (* ".ligo" *)
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
@ -24,11 +23,13 @@ module SubIO =
|
|||||||
>
|
>
|
||||||
|
|
||||||
let options : options =
|
let options : options =
|
||||||
object
|
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in object
|
||||||
method libs = []
|
method libs = []
|
||||||
method verbose = SSet.empty
|
method verbose = SSet.empty
|
||||||
method offsets = true
|
method offsets = true
|
||||||
method lang = `PascaLIGO
|
method block = Some block
|
||||||
|
method line = Some "//"
|
||||||
method ext = ".ligo"
|
method ext = ".ligo"
|
||||||
method mode = `Point
|
method mode = `Point
|
||||||
method cmd = EvalOpt.Quiet
|
method cmd = EvalOpt.Quiet
|
||||||
@ -39,7 +40,8 @@ module SubIO =
|
|||||||
EvalOpt.make ~libs:options#libs
|
EvalOpt.make ~libs:options#libs
|
||||||
~verbose:options#verbose
|
~verbose:options#verbose
|
||||||
~offsets:options#offsets
|
~offsets:options#offsets
|
||||||
~lang:options#lang
|
?block:options#block
|
||||||
|
?line:options#line
|
||||||
~ext:options#ext
|
~ext:options#ext
|
||||||
~mode:options#mode
|
~mode:options#mode
|
||||||
~cmd:options#cmd
|
~cmd:options#cmd
|
||||||
|
@ -2,6 +2,8 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
|
|
||||||
../shared/Lexer.mli
|
../shared/Lexer.mli
|
||||||
../shared/Lexer.mll
|
../shared/Lexer.mll
|
||||||
|
../shared/LexerLib.mli
|
||||||
|
../shared/LexerLib.ml
|
||||||
../shared/EvalOpt.ml
|
../shared/EvalOpt.ml
|
||||||
../shared/EvalOpt.mli
|
../shared/EvalOpt.mli
|
||||||
../shared/FQueue.ml
|
../shared/FQueue.ml
|
||||||
@ -18,5 +20,6 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
../shared/LexerUnit.ml
|
../shared/LexerUnit.ml
|
||||||
../shared/ParserUnit.mli
|
../shared/ParserUnit.mli
|
||||||
../shared/ParserUnit.ml
|
../shared/ParserUnit.ml
|
||||||
|
../shared/LexerLib.ml
|
||||||
|
|
||||||
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml
|
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
(** This signature defines the lexical tokens for LIGO
|
(* This signature defines the lexical tokens for LIGO
|
||||||
|
|
||||||
_Tokens_ are the abstract units which are used by the parser to
|
_Tokens_ are the abstract units which are used by the parser to
|
||||||
build the abstract syntax tree (AST), in other words, the stream of
|
build the abstract syntax tree (AST), in other words, the stream of
|
||||||
@ -163,11 +163,22 @@ val eof : Region.t -> token
|
|||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
val is_string : token -> bool
|
|
||||||
val is_bytes : token -> bool
|
|
||||||
val is_int : token -> bool
|
|
||||||
val is_ident : token -> bool
|
|
||||||
val is_kwd : token -> bool
|
|
||||||
val is_constr : token -> bool
|
|
||||||
val is_sym : token -> bool
|
|
||||||
val is_eof : token -> bool
|
val is_eof : token -> bool
|
||||||
|
|
||||||
|
(* Style *)
|
||||||
|
|
||||||
|
type error
|
||||||
|
|
||||||
|
val error_to_string : error -> string
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
val format_error :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
error Region.reg -> file:bool -> string Region.reg
|
||||||
|
|
||||||
|
val check_right_context :
|
||||||
|
token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit
|
||||||
|
@ -5,24 +5,14 @@
|
|||||||
|
|
||||||
(* Shorthands *)
|
(* Shorthands *)
|
||||||
|
|
||||||
type lexeme = string
|
|
||||||
|
|
||||||
let sprintf = Printf.sprintf
|
|
||||||
|
|
||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
module SMap = Map.Make (String)
|
module SMap = Map.Make (String)
|
||||||
module SSet = Set.Make (String)
|
module SSet = Set.Make (String)
|
||||||
|
|
||||||
(* Hack to roll back one lexeme in the current semantic action *)
|
type lexeme = string
|
||||||
(*
|
|
||||||
let rollback buffer =
|
let sprintf = Printf.sprintf
|
||||||
let open Lexing in
|
|
||||||
let len = String.length (lexeme buffer) in
|
|
||||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
|
||||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
|
||||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
@ -123,6 +113,8 @@ type t =
|
|||||||
| EOF of Region.t
|
| EOF of Region.t
|
||||||
|
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
type token = t
|
type token = t
|
||||||
|
|
||||||
let proj_token = function
|
let proj_token = function
|
||||||
@ -130,32 +122,20 @@ let proj_token = function
|
|||||||
|
|
||||||
String Region.{region; value} ->
|
String Region.{region; value} ->
|
||||||
region, sprintf "String %s" value
|
region, sprintf "String %s" value
|
||||||
|
|
||||||
| Bytes Region.{region; value = s,b} ->
|
| Bytes Region.{region; value = s,b} ->
|
||||||
region,
|
region,
|
||||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||||
s (Hex.show b)
|
|
||||||
|
|
||||||
| Int Region.{region; value = s,n} ->
|
| Int Region.{region; value = s,n} ->
|
||||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
| Nat Region.{region; value = s,n} ->
|
| Nat Region.{region; value = s,n} ->
|
||||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
| Mutez Region.{region; value = s,n} ->
|
| Mutez Region.{region; value = s,n} ->
|
||||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
|
||||||
| Ident Region.{region; value} ->
|
| Ident Region.{region; value} ->
|
||||||
region, sprintf "Ident \"%s\"" value
|
region, sprintf "Ident \"%s\"" value
|
||||||
|
|
||||||
| Constr Region.{region; value} ->
|
| Constr Region.{region; value} ->
|
||||||
region, sprintf "Constr \"%s\"" value
|
region, sprintf "Constr \"%s\"" value
|
||||||
|
|
||||||
(*
|
|
||||||
| Attr {header; string={region; value}} ->
|
|
||||||
region, sprintf "Attr (\"%s\",\"%s\")" header value
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* Symbols *)
|
(* Symbols *)
|
||||||
|
|
||||||
| SEMI region -> region, "SEMI"
|
| SEMI region -> region, "SEMI"
|
||||||
@ -382,9 +362,7 @@ let keywords = [
|
|||||||
(fun reg -> With reg)
|
(fun reg -> With reg)
|
||||||
]
|
]
|
||||||
|
|
||||||
let reserved =
|
let reserved = SSet.empty
|
||||||
let open SSet in
|
|
||||||
empty |> add "arguments"
|
|
||||||
|
|
||||||
let constructors = [
|
let constructors = [
|
||||||
(fun reg -> False reg);
|
(fun reg -> False reg);
|
||||||
@ -483,21 +461,18 @@ type nat_err =
|
|||||||
| Non_canonical_zero_nat
|
| Non_canonical_zero_nat
|
||||||
|
|
||||||
let mk_nat lexeme region =
|
let mk_nat lexeme region =
|
||||||
match (String.index_opt lexeme 'n') with
|
match String.index_opt lexeme 'n' with
|
||||||
| None -> Error Invalid_natural
|
None -> Error Invalid_natural
|
||||||
| Some _ -> (
|
| Some _ -> let z =
|
||||||
let z =
|
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "n") "") |>
|
Str.(global_replace (regexp "n") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0n"
|
if Z.equal z Z.zero && lexeme <> "0n"
|
||||||
then Error Non_canonical_zero_nat
|
then Error Non_canonical_zero_nat
|
||||||
else Ok (Nat Region.{region; value = lexeme,z})
|
else Ok (Nat Region.{region; value = lexeme,z})
|
||||||
)
|
|
||||||
|
|
||||||
let mk_mutez lexeme region =
|
let mk_mutez lexeme region =
|
||||||
let z =
|
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
|
||||||
Str.(global_replace (regexp "mutez") "") |>
|
Str.(global_replace (regexp "mutez") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||||
@ -558,104 +533,76 @@ let mk_constr lexeme region =
|
|||||||
|
|
||||||
type attr_err = Invalid_attribute
|
type attr_err = Invalid_attribute
|
||||||
|
|
||||||
let mk_attr _header _string _region =
|
let mk_attr _ _ _ = Error Invalid_attribute
|
||||||
Error Invalid_attribute
|
|
||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
let is_string = function
|
let is_string = function String _ -> true | _ -> false
|
||||||
String _ -> true
|
let is_bytes = function Bytes _ -> true | _ -> false
|
||||||
| _ -> false
|
let is_int = function Int _ -> true | _ -> false
|
||||||
|
let is_ident = function Ident _ -> true | _ -> false
|
||||||
let is_bytes = function
|
|
||||||
Bytes _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_int = function
|
|
||||||
Int _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_ident = function
|
|
||||||
Ident _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_kwd = function
|
|
||||||
And _
|
|
||||||
| Attributes _
|
|
||||||
| Begin _
|
|
||||||
| BigMap _
|
|
||||||
| Block _
|
|
||||||
| Case _
|
|
||||||
| Const _
|
|
||||||
| Contains _
|
|
||||||
| Else _
|
|
||||||
| End _
|
|
||||||
| False _
|
|
||||||
| For _
|
|
||||||
| From _
|
|
||||||
| Function _
|
|
||||||
| If _
|
|
||||||
| In _
|
|
||||||
| Is _
|
|
||||||
| List _
|
|
||||||
| Map _
|
|
||||||
| Mod _
|
|
||||||
| Nil _
|
|
||||||
| Not _
|
|
||||||
| Of _
|
|
||||||
| Or _
|
|
||||||
| Patch _
|
|
||||||
| Record _
|
|
||||||
| Remove _
|
|
||||||
| Set _
|
|
||||||
| Skip _
|
|
||||||
| Step _
|
|
||||||
| Then _
|
|
||||||
| To _
|
|
||||||
| True _
|
|
||||||
| Type _
|
|
||||||
| Unit _
|
|
||||||
| Var _
|
|
||||||
| While _
|
|
||||||
| With _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_constr = function
|
|
||||||
Constr _
|
|
||||||
| C_None _
|
|
||||||
| C_Some _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_sym = function
|
|
||||||
SEMI _
|
|
||||||
| COMMA _
|
|
||||||
| LPAR _
|
|
||||||
| RPAR _
|
|
||||||
| LBRACE _
|
|
||||||
| RBRACE _
|
|
||||||
| LBRACKET _
|
|
||||||
| RBRACKET _
|
|
||||||
| CONS _
|
|
||||||
| VBAR _
|
|
||||||
| ARROW _
|
|
||||||
| ASS _
|
|
||||||
| EQ _
|
|
||||||
| COLON _
|
|
||||||
| LT _
|
|
||||||
| LE _
|
|
||||||
| GT _
|
|
||||||
| GE _
|
|
||||||
| NE _
|
|
||||||
| PLUS _
|
|
||||||
| MINUS _
|
|
||||||
| SLASH _
|
|
||||||
| TIMES _
|
|
||||||
| DOT _
|
|
||||||
| WILD _
|
|
||||||
| CAT _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_eof = function EOF _ -> true | _ -> false
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
let is_minus = function MINUS _ -> true | _ -> false
|
||||||
|
|
||||||
|
(* Errors *)
|
||||||
|
|
||||||
|
type error =
|
||||||
|
Odd_lengthed_bytes
|
||||||
|
| Missing_break
|
||||||
|
| Negative_byte_sequence
|
||||||
|
|
||||||
|
let error_to_string = function
|
||||||
|
Odd_lengthed_bytes ->
|
||||||
|
"The length of the byte sequence is an odd number.\n\
|
||||||
|
Hint: Add or remove a digit."
|
||||||
|
| Missing_break ->
|
||||||
|
"Missing break.\n\
|
||||||
|
Hint: Insert some space."
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign."
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||||
|
let msg = error_to_string value
|
||||||
|
and reg = region#to_string ~file ~offsets mode in
|
||||||
|
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||||
|
in Region.{value; region}
|
||||||
|
|
||||||
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
|
let check_right_context token next_token buffer : unit =
|
||||||
|
let pos = (to_region token)#stop in
|
||||||
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
|
match next_token buffer with
|
||||||
|
None -> ()
|
||||||
|
| Some (markup, next) ->
|
||||||
|
if is_minus token && is_bytes next
|
||||||
|
then let region =
|
||||||
|
Region.cover (to_region token) (to_region next)
|
||||||
|
in fail region Negative_byte_sequence
|
||||||
|
else
|
||||||
|
match markup with
|
||||||
|
[] ->
|
||||||
|
if is_int token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_string token
|
||||||
|
then if is_int next || is_bytes next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_bytes token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else if is_int next
|
||||||
|
then fail region Odd_lengthed_bytes
|
||||||
|
else ()
|
||||||
|
else ()
|
||||||
|
| _::_ -> ()
|
||||||
|
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
module IO =
|
module IO =
|
||||||
struct
|
struct
|
||||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in read ~block ~line:"//" ".ligo"
|
||||||
end
|
end
|
||||||
|
|
||||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
|||||||
|
|
||||||
module IO =
|
module IO =
|
||||||
struct
|
struct
|
||||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in read ~block ~line:"//" ".ligo"
|
||||||
end
|
end
|
||||||
|
|
||||||
module SubIO =
|
module SubIO =
|
||||||
@ -14,7 +17,8 @@ module SubIO =
|
|||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : EvalOpt.language;
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
ext : string;
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
@ -26,7 +30,8 @@ module SubIO =
|
|||||||
method libs = IO.options#libs
|
method libs = IO.options#libs
|
||||||
method verbose = IO.options#verbose
|
method verbose = IO.options#verbose
|
||||||
method offsets = IO.options#offsets
|
method offsets = IO.options#offsets
|
||||||
method lang = IO.options#lang
|
method block = IO.options#block
|
||||||
|
method line = IO.options#line
|
||||||
method ext = IO.options#ext
|
method ext = IO.options#ext
|
||||||
method mode = IO.options#mode
|
method mode = IO.options#mode
|
||||||
method cmd = IO.options#cmd
|
method cmd = IO.options#cmd
|
||||||
@ -37,7 +42,8 @@ module SubIO =
|
|||||||
EvalOpt.make ~libs:options#libs
|
EvalOpt.make ~libs:options#libs
|
||||||
~verbose:options#verbose
|
~verbose:options#verbose
|
||||||
~offsets:options#offsets
|
~offsets:options#offsets
|
||||||
~lang:options#lang
|
?block:options#block
|
||||||
|
?line:options#line
|
||||||
~ext:options#ext
|
~ext:options#ext
|
||||||
~mode:options#mode
|
~mode:options#mode
|
||||||
~cmd:options#cmd
|
~cmd:options#cmd
|
||||||
|
@ -11,15 +11,14 @@ module SSet = Set.Make (String)
|
|||||||
|
|
||||||
(* Mock IOs TODO: Fill them with CLI options *)
|
(* Mock IOs TODO: Fill them with CLI options *)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
module SubIO =
|
module SubIO =
|
||||||
struct
|
struct
|
||||||
type options = <
|
type options = <
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
ext : string; (* ".religo" *)
|
ext : string; (* ".religo" *)
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
@ -27,11 +26,13 @@ module SubIO =
|
|||||||
>
|
>
|
||||||
|
|
||||||
let options : options =
|
let options : options =
|
||||||
object
|
let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/"
|
||||||
|
in object
|
||||||
method libs = []
|
method libs = []
|
||||||
method verbose = SSet.empty
|
method verbose = SSet.empty
|
||||||
method offsets = true
|
method offsets = true
|
||||||
method lang = `ReasonLIGO
|
method block = Some block
|
||||||
|
method line = Some "//"
|
||||||
method ext = ".religo"
|
method ext = ".religo"
|
||||||
method mode = `Point
|
method mode = `Point
|
||||||
method cmd = EvalOpt.Quiet
|
method cmd = EvalOpt.Quiet
|
||||||
@ -42,7 +43,8 @@ module SubIO =
|
|||||||
EvalOpt.make ~libs:options#libs
|
EvalOpt.make ~libs:options#libs
|
||||||
~verbose:options#verbose
|
~verbose:options#verbose
|
||||||
~offsets:options#offsets
|
~offsets:options#offsets
|
||||||
~lang:options#lang
|
?block:options#block
|
||||||
|
?line:options#line
|
||||||
~ext:options#ext
|
~ext:options#ext
|
||||||
~mode:options#mode
|
~mode:options#mode
|
||||||
~cmd:options#cmd
|
~cmd:options#cmd
|
||||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
|||||||
|
|
||||||
../shared/Lexer.mli
|
../shared/Lexer.mli
|
||||||
../shared/Lexer.mll
|
../shared/Lexer.mll
|
||||||
|
../shared/LexerLib.ml
|
||||||
../shared/EvalOpt.ml
|
../shared/EvalOpt.ml
|
||||||
../shared/EvalOpt.mli
|
../shared/EvalOpt.mli
|
||||||
../shared/FQueue.ml
|
../shared/FQueue.ml
|
||||||
|
@ -57,6 +57,7 @@ type t =
|
|||||||
| COLON of Region.t (* ":" *)
|
| COLON of Region.t (* ":" *)
|
||||||
| DOT of Region.t (* "." *)
|
| DOT of Region.t (* "." *)
|
||||||
| ELLIPSIS of Region.t (* "..." *)
|
| ELLIPSIS of Region.t (* "..." *)
|
||||||
|
| ARROW of Region.t (* "=>" *)
|
||||||
|
|
||||||
(* Wildcard *)
|
(* Wildcard *)
|
||||||
|
|
||||||
@ -69,10 +70,10 @@ type t =
|
|||||||
| NE of Region.t (* "!=" *)
|
| NE of Region.t (* "!=" *)
|
||||||
| LT of Region.t (* "<" *)
|
| LT of Region.t (* "<" *)
|
||||||
| GT of Region.t (* ">" *)
|
| GT of Region.t (* ">" *)
|
||||||
| LE of Region.t (* "=<" *)
|
| LE of Region.t (* "<=" *)
|
||||||
| GE of Region.t (* ">=" *)
|
| GE of Region.t (* ">=" *)
|
||||||
|
|
||||||
| ARROW of Region.t (* "=>" *)
|
(* Logic *)
|
||||||
|
|
||||||
| BOOL_OR of Region.t (* "||" *)
|
| BOOL_OR of Region.t (* "||" *)
|
||||||
| BOOL_AND of Region.t (* "&&" *)
|
| BOOL_AND of Region.t (* "&&" *)
|
||||||
@ -95,14 +96,14 @@ type t =
|
|||||||
| False of Region.t
|
| False of Region.t
|
||||||
| If of Region.t
|
| If of Region.t
|
||||||
| Let of Region.t
|
| Let of Region.t
|
||||||
| Rec of Region.t
|
|
||||||
| Switch of Region.t
|
|
||||||
| Mod of Region.t
|
| Mod of Region.t
|
||||||
| Or of Region.t
|
| Or of Region.t
|
||||||
|
| Rec of Region.t
|
||||||
|
| Switch of Region.t
|
||||||
| True of Region.t
|
| True of Region.t
|
||||||
| Type of Region.t
|
| Type of Region.t
|
||||||
|
|
||||||
(* Data constructors *)
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None of Region.t (* "None" *)
|
| C_None of Region.t (* "None" *)
|
||||||
| C_Some of Region.t (* "Some" *)
|
| C_Some of Region.t (* "Some" *)
|
||||||
@ -111,8 +112,6 @@ type t =
|
|||||||
|
|
||||||
| EOF of Region.t (* End of file *)
|
| EOF of Region.t (* End of file *)
|
||||||
|
|
||||||
type token = t
|
|
||||||
|
|
||||||
(* Projections
|
(* Projections
|
||||||
|
|
||||||
The difference between extracting the lexeme and a string from a
|
The difference between extracting the lexeme and a string from a
|
||||||
@ -121,6 +120,8 @@ type token = t
|
|||||||
lexeme (concrete syntax).
|
lexeme (concrete syntax).
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
type token = t
|
||||||
|
|
||||||
val to_lexeme : token -> lexeme
|
val to_lexeme : token -> lexeme
|
||||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
val to_region : token -> Region.t
|
val to_region : token -> Region.t
|
||||||
@ -154,11 +155,22 @@ val eof : Region.t -> token
|
|||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
val is_string : token -> bool
|
|
||||||
val is_bytes : token -> bool
|
|
||||||
val is_int : token -> bool
|
|
||||||
val is_ident : token -> bool
|
|
||||||
val is_kwd : token -> bool
|
|
||||||
val is_constr : token -> bool
|
|
||||||
val is_sym : token -> bool
|
|
||||||
val is_eof : token -> bool
|
val is_eof : token -> bool
|
||||||
|
|
||||||
|
(* Style *)
|
||||||
|
|
||||||
|
type error
|
||||||
|
|
||||||
|
val error_to_string : error -> string
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
val format_error :
|
||||||
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
|
error Region.reg -> file:bool -> string Region.reg
|
||||||
|
|
||||||
|
val check_right_context :
|
||||||
|
token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit
|
||||||
|
@ -1,15 +1,17 @@
|
|||||||
{
|
{
|
||||||
(* START OF HEADER *)
|
(* START OF HEADER *)
|
||||||
|
|
||||||
type lexeme = string
|
(* Shorthands *)
|
||||||
|
|
||||||
let sprintf = Printf.sprintf
|
|
||||||
|
|
||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
module SMap = Utils.String.Map
|
module SMap = Utils.String.Map
|
||||||
module SSet = Utils.String.Set
|
module SSet = Utils.String.Set
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
let sprintf = Printf.sprintf
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
type t =
|
type t =
|
||||||
@ -41,6 +43,7 @@ type t =
|
|||||||
| COLON of Region.t (* ":" *)
|
| COLON of Region.t (* ":" *)
|
||||||
| DOT of Region.t (* "." *)
|
| DOT of Region.t (* "." *)
|
||||||
| ELLIPSIS of Region.t (* "..." *)
|
| ELLIPSIS of Region.t (* "..." *)
|
||||||
|
| ARROW of Region.t (* "=>" *)
|
||||||
|
|
||||||
(* Wildcard *)
|
(* Wildcard *)
|
||||||
|
|
||||||
@ -55,11 +58,11 @@ type t =
|
|||||||
| GT of Region.t (* ">" *)
|
| GT of Region.t (* ">" *)
|
||||||
| LE of Region.t (* "<=" *)
|
| LE of Region.t (* "<=" *)
|
||||||
| GE of Region.t (* ">=" *)
|
| GE of Region.t (* ">=" *)
|
||||||
| ARROW of Region.t (* "=>" *)
|
|
||||||
|
(* Logic *)
|
||||||
|
|
||||||
| BOOL_OR of Region.t (* "||" *)
|
| BOOL_OR of Region.t (* "||" *)
|
||||||
| BOOL_AND of Region.t (* "&&" *)
|
| BOOL_AND of Region.t (* "&&" *)
|
||||||
|
|
||||||
| NOT of Region.t (* ! *)
|
| NOT of Region.t (* ! *)
|
||||||
|
|
||||||
(* Identifiers, labels, numbers and strings *)
|
(* Identifiers, labels, numbers and strings *)
|
||||||
@ -75,17 +78,17 @@ type t =
|
|||||||
|
|
||||||
(* Keywords *)
|
(* Keywords *)
|
||||||
|
|
||||||
(*| And*)
|
|
||||||
| Else of Region.t
|
| Else of Region.t
|
||||||
| False of Region.t
|
| False of Region.t
|
||||||
| If of Region.t
|
| If of Region.t
|
||||||
| Let of Region.t
|
| Let of Region.t
|
||||||
| Rec of Region.t
|
|
||||||
| Switch of Region.t
|
|
||||||
| Mod of Region.t
|
| Mod of Region.t
|
||||||
| Or of Region.t
|
| Or of Region.t
|
||||||
|
| Rec of Region.t
|
||||||
|
| Switch of Region.t
|
||||||
| True of Region.t
|
| True of Region.t
|
||||||
| Type of Region.t
|
| Type of Region.t
|
||||||
|
|
||||||
(* Data constructors *)
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None of Region.t (* "None" *)
|
| C_None of Region.t (* "None" *)
|
||||||
@ -96,10 +99,32 @@ type t =
|
|||||||
| EOF of Region.t (* End of file *)
|
| EOF of Region.t (* End of file *)
|
||||||
|
|
||||||
|
|
||||||
|
(* Projections *)
|
||||||
|
|
||||||
type token = t
|
type token = t
|
||||||
|
|
||||||
let proj_token = function
|
let proj_token = function
|
||||||
CAT region -> region, "CAT"
|
(* Literals *)
|
||||||
|
|
||||||
|
String Region.{region; value} ->
|
||||||
|
region, sprintf "String %s" value
|
||||||
|
| Bytes Region.{region; value = s,b} ->
|
||||||
|
region,
|
||||||
|
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||||
|
| Int Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Nat Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Mutez Region.{region; value = s,n} ->
|
||||||
|
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||||
|
| Ident Region.{region; value} ->
|
||||||
|
region, sprintf "Ident %s" value
|
||||||
|
| Constr Region.{region; value} ->
|
||||||
|
region, sprintf "Constr %s" value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| CAT region -> region, "CAT"
|
||||||
| MINUS region -> region, "MINUS"
|
| MINUS region -> region, "MINUS"
|
||||||
| PLUS region -> region, "PLUS"
|
| PLUS region -> region, "PLUS"
|
||||||
| SLASH region -> region, "SLASH"
|
| SLASH region -> region, "SLASH"
|
||||||
@ -125,24 +150,9 @@ let proj_token = function
|
|||||||
| LE region -> region, "LE"
|
| LE region -> region, "LE"
|
||||||
| GE region -> region, "GE"
|
| GE region -> region, "GE"
|
||||||
| ARROW region -> region, "ARROW"
|
| ARROW region -> region, "ARROW"
|
||||||
|
| NOT region -> region, "NOT"
|
||||||
| BOOL_OR region -> region, "BOOL_OR"
|
| BOOL_OR region -> region, "BOOL_OR"
|
||||||
| BOOL_AND region -> region, "BOOL_AND"
|
| BOOL_AND region -> region, "BOOL_AND"
|
||||||
| Ident Region.{region; value} ->
|
|
||||||
region, sprintf "Ident %s" value
|
|
||||||
| Constr Region.{region; value} ->
|
|
||||||
region, sprintf "Constr %s" value
|
|
||||||
| Int Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Nat Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| Mutez Region.{region; value = s,n} ->
|
|
||||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
|
||||||
| String Region.{region; value} ->
|
|
||||||
region, sprintf "String %s" value
|
|
||||||
| Bytes Region.{region; value = s,b} ->
|
|
||||||
region,
|
|
||||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
|
||||||
s (Hex.show b)
|
|
||||||
| Else region -> region, "Else"
|
| Else region -> region, "Else"
|
||||||
| False region -> region, "False"
|
| False region -> region, "False"
|
||||||
| If region -> region, "If"
|
| If region -> region, "If"
|
||||||
@ -150,7 +160,6 @@ let proj_token = function
|
|||||||
| Rec region -> region, "Rec"
|
| Rec region -> region, "Rec"
|
||||||
| Switch region -> region, "Switch"
|
| Switch region -> region, "Switch"
|
||||||
| Mod region -> region, "Mod"
|
| Mod region -> region, "Mod"
|
||||||
| NOT region -> region, "!"
|
|
||||||
| Or region -> region, "Or"
|
| Or region -> region, "Or"
|
||||||
| True region -> region, "True"
|
| True region -> region, "True"
|
||||||
| Type region -> region, "Type"
|
| Type region -> region, "Type"
|
||||||
@ -160,7 +169,20 @@ let proj_token = function
|
|||||||
| EOF region -> region, "EOF"
|
| EOF region -> region, "EOF"
|
||||||
|
|
||||||
let to_lexeme = function
|
let to_lexeme = function
|
||||||
CAT _ -> "++"
|
(* Literals *)
|
||||||
|
|
||||||
|
String s -> s.Region.value
|
||||||
|
| Bytes b -> fst b.Region.value
|
||||||
|
| Int i
|
||||||
|
| Nat i
|
||||||
|
| Mutez i -> fst i.Region.value
|
||||||
|
| Ident id -> id.Region.value
|
||||||
|
| Constr id -> id.Region.value
|
||||||
|
| Attr a -> a.Region.value
|
||||||
|
|
||||||
|
(* Symbols *)
|
||||||
|
|
||||||
|
| CAT _ -> "++"
|
||||||
| MINUS _ -> "-"
|
| MINUS _ -> "-"
|
||||||
| PLUS _ -> "+"
|
| PLUS _ -> "+"
|
||||||
| SLASH _ -> "/"
|
| SLASH _ -> "/"
|
||||||
@ -188,29 +210,32 @@ let to_lexeme = function
|
|||||||
| ARROW _ -> "=>"
|
| ARROW _ -> "=>"
|
||||||
| BOOL_OR _ -> "||"
|
| BOOL_OR _ -> "||"
|
||||||
| BOOL_AND _ -> "&&"
|
| BOOL_AND _ -> "&&"
|
||||||
| Ident id -> id.Region.value
|
| NOT _ -> "!"
|
||||||
| Constr id -> id.Region.value
|
|
||||||
| Int i
|
(* Keywords *)
|
||||||
| Nat i
|
|
||||||
| Mutez i -> fst i.Region.value
|
|
||||||
| String s -> s.Region.value
|
|
||||||
| Bytes b -> fst b.Region.value
|
|
||||||
| Else _ -> "else"
|
| Else _ -> "else"
|
||||||
| False _ -> "false"
|
| False _ -> "false"
|
||||||
| If _ -> "if"
|
| If _ -> "if"
|
||||||
| Let _ -> "let"
|
| Let _ -> "let"
|
||||||
| Rec _ -> "rec"
|
|
||||||
| Mod _ -> "mod"
|
| Mod _ -> "mod"
|
||||||
| NOT _ -> "!"
|
|
||||||
| Or _ -> "or"
|
| Or _ -> "or"
|
||||||
|
| Rec _ -> "rec"
|
||||||
| Switch _ -> "switch"
|
| Switch _ -> "switch"
|
||||||
| True _ -> "true"
|
| True _ -> "true"
|
||||||
| Type _ -> "type"
|
| Type _ -> "type"
|
||||||
|
|
||||||
|
(* Data constructors *)
|
||||||
|
|
||||||
| C_None _ -> "None"
|
| C_None _ -> "None"
|
||||||
| C_Some _ -> "Some"
|
| C_Some _ -> "Some"
|
||||||
| Attr a -> a.Region.value
|
|
||||||
|
(* Virtual tokens *)
|
||||||
|
|
||||||
| EOF _ -> ""
|
| EOF _ -> ""
|
||||||
|
|
||||||
|
(* CONVERSIONS *)
|
||||||
|
|
||||||
let to_string token ?(offsets=true) mode =
|
let to_string token ?(offsets=true) mode =
|
||||||
let region, val_str = proj_token token in
|
let region, val_str = proj_token token in
|
||||||
let reg_str = region#compact ~offsets mode
|
let reg_str = region#compact ~offsets mode
|
||||||
@ -261,12 +286,9 @@ let reserved =
|
|||||||
|> add "functor"
|
|> add "functor"
|
||||||
|> add "inherit"
|
|> add "inherit"
|
||||||
|> add "initializer"
|
|> add "initializer"
|
||||||
(* |> add "land" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
|
||||||
|> add "lazy"
|
|> add "lazy"
|
||||||
(* |> add "lor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
|
||||||
|> add "lsl"
|
|> add "lsl"
|
||||||
|> add "lsr"
|
|> add "lsr"
|
||||||
(* |> add "lxor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
|
||||||
|> add "match"
|
|> add "match"
|
||||||
|> add "method"
|
|> add "method"
|
||||||
|> add "module"
|
|> add "module"
|
||||||
@ -291,7 +313,7 @@ let reserved =
|
|||||||
|
|
||||||
let constructors = [
|
let constructors = [
|
||||||
(fun reg -> C_None reg);
|
(fun reg -> C_None reg);
|
||||||
(fun reg -> C_Some reg);
|
(fun reg -> C_Some reg)
|
||||||
]
|
]
|
||||||
|
|
||||||
let add map (key, value) = SMap.add key value map
|
let add map (key, value) = SMap.add key value map
|
||||||
@ -376,8 +398,7 @@ let mk_int lexeme region =
|
|||||||
else Ok (Int Region.{region; value = lexeme, z})
|
else Ok (Int Region.{region; value = lexeme, z})
|
||||||
|
|
||||||
let mk_nat lexeme region =
|
let mk_nat lexeme region =
|
||||||
let z =
|
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
|
||||||
Str.(global_replace (regexp "n") "") |>
|
Str.(global_replace (regexp "n") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0n"
|
if Z.equal z Z.zero && lexeme <> "0n"
|
||||||
@ -385,8 +406,7 @@ let mk_nat lexeme region =
|
|||||||
else Ok (Nat Region.{region; value = lexeme, z})
|
else Ok (Nat Region.{region; value = lexeme, z})
|
||||||
|
|
||||||
let mk_mutez lexeme region =
|
let mk_mutez lexeme region =
|
||||||
let z =
|
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
|
||||||
Str.(global_replace (regexp "mutez") "") |>
|
Str.(global_replace (regexp "mutez") "") |>
|
||||||
Z.of_string in
|
Z.of_string in
|
||||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||||
@ -460,75 +480,72 @@ let mk_attr header lexeme region =
|
|||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
let is_string = function
|
let is_string = function String _ -> true | _ -> false
|
||||||
String _ -> true
|
let is_bytes = function Bytes _ -> true | _ -> false
|
||||||
| _ -> false
|
let is_int = function Int _ -> true | _ -> false
|
||||||
|
let is_ident = function Ident _ -> true | _ -> false
|
||||||
let is_bytes = function
|
|
||||||
Bytes _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_int = function
|
|
||||||
Int _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_ident = function
|
|
||||||
Ident _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_kwd = function
|
|
||||||
Else _
|
|
||||||
| False _
|
|
||||||
| If _
|
|
||||||
| Let _
|
|
||||||
| Rec _
|
|
||||||
| Switch _
|
|
||||||
| Mod _
|
|
||||||
| Or _
|
|
||||||
| True _
|
|
||||||
| Type _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_constr = function
|
|
||||||
Constr _
|
|
||||||
| Ident _
|
|
||||||
| False _
|
|
||||||
| True _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_sym = function
|
|
||||||
CAT _
|
|
||||||
| MINUS _
|
|
||||||
| PLUS _
|
|
||||||
| SLASH _
|
|
||||||
| TIMES _
|
|
||||||
| LPAR _
|
|
||||||
| RPAR _
|
|
||||||
| LBRACKET _
|
|
||||||
| RBRACKET _
|
|
||||||
| LBRACE _
|
|
||||||
| RBRACE _
|
|
||||||
| COMMA _
|
|
||||||
| SEMI _
|
|
||||||
| VBAR _
|
|
||||||
| COLON _
|
|
||||||
| DOT _
|
|
||||||
| ELLIPSIS _
|
|
||||||
| WILD _
|
|
||||||
| EQ _
|
|
||||||
| EQEQ _
|
|
||||||
| NE _
|
|
||||||
| LT _
|
|
||||||
| GT _
|
|
||||||
| LE _
|
|
||||||
| GE _
|
|
||||||
| ARROW _
|
|
||||||
| BOOL_OR _
|
|
||||||
| NOT _
|
|
||||||
| BOOL_AND _ -> true
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let is_eof = function EOF _ -> true | _ -> false
|
let is_eof = function EOF _ -> true | _ -> false
|
||||||
|
let is_minus = function MINUS _ -> true | _ -> false
|
||||||
|
|
||||||
|
(* Errors *)
|
||||||
|
|
||||||
|
type error =
|
||||||
|
Odd_lengthed_bytes
|
||||||
|
| Missing_break
|
||||||
|
| Negative_byte_sequence
|
||||||
|
|
||||||
|
let error_to_string = function
|
||||||
|
Odd_lengthed_bytes ->
|
||||||
|
"The length of the byte sequence is an odd number.\n\
|
||||||
|
Hint: Add or remove a digit."
|
||||||
|
| Missing_break ->
|
||||||
|
"Missing break.\n\
|
||||||
|
Hint: Insert some space."
|
||||||
|
| Negative_byte_sequence ->
|
||||||
|
"Negative byte sequence.\n\
|
||||||
|
Hint: Remove the leading minus sign."
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||||
|
let msg = error_to_string value
|
||||||
|
and reg = region#to_string ~file ~offsets mode in
|
||||||
|
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||||
|
in Region.{value; region}
|
||||||
|
|
||||||
|
let fail region value = raise (Error Region.{region; value})
|
||||||
|
|
||||||
|
let check_right_context token next_token buffer : unit =
|
||||||
|
let pos = (to_region token)#stop in
|
||||||
|
let region = Region.make ~start:pos ~stop:pos in
|
||||||
|
match next_token buffer with
|
||||||
|
None -> ()
|
||||||
|
| Some (markup, next) ->
|
||||||
|
if is_minus token && is_bytes next
|
||||||
|
then let region =
|
||||||
|
Region.cover (to_region token) (to_region next)
|
||||||
|
in fail region Negative_byte_sequence
|
||||||
|
else
|
||||||
|
match markup with
|
||||||
|
[] ->
|
||||||
|
if is_int token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_string token
|
||||||
|
then if is_int next || is_bytes next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else ()
|
||||||
|
else
|
||||||
|
if is_bytes token
|
||||||
|
then if is_string next || is_ident next
|
||||||
|
then fail region Missing_break
|
||||||
|
else if is_int next
|
||||||
|
then fail region Odd_lengthed_bytes
|
||||||
|
else ()
|
||||||
|
else ()
|
||||||
|
| _::_ -> ()
|
||||||
|
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
module IO =
|
module IO =
|
||||||
struct
|
struct
|
||||||
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"/*" ~closing:"*/"
|
||||||
|
in read ~block ~line:"//" ".religo"
|
||||||
end
|
end
|
||||||
|
|
||||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||||
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
|||||||
let () =
|
let () =
|
||||||
match M.trace () with
|
match M.trace () with
|
||||||
Stdlib.Ok () -> ()
|
Stdlib.Ok () -> ()
|
||||||
| Error Region.{value; _} -> Utils.highlight value
|
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value
|
||||||
|
@ -895,7 +895,7 @@ path:
|
|||||||
| projection { Path $1 }
|
| projection { Path $1 }
|
||||||
|
|
||||||
update_record:
|
update_record:
|
||||||
"{""..."path "," sep_or_term_list(field_path_assignment,",") "}" {
|
"{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" {
|
||||||
let region = cover $1 $6 in
|
let region = cover $1 $6 in
|
||||||
let ne_elements, terminator = $5 in
|
let ne_elements, terminator = $5 in
|
||||||
let value = {
|
let value = {
|
||||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
|||||||
|
|
||||||
module IO =
|
module IO =
|
||||||
struct
|
struct
|
||||||
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"/*" ~closing:"*/"
|
||||||
|
in read ~block ~line:"//" ".religo"
|
||||||
end
|
end
|
||||||
|
|
||||||
module SubIO =
|
module SubIO =
|
||||||
@ -14,7 +17,8 @@ module SubIO =
|
|||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : EvalOpt.language;
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
ext : string;
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
@ -26,7 +30,8 @@ module SubIO =
|
|||||||
method libs = IO.options#libs
|
method libs = IO.options#libs
|
||||||
method verbose = IO.options#verbose
|
method verbose = IO.options#verbose
|
||||||
method offsets = IO.options#offsets
|
method offsets = IO.options#offsets
|
||||||
method lang = IO.options#lang
|
method block = IO.options#block
|
||||||
|
method line = IO.options#line
|
||||||
method ext = IO.options#ext
|
method ext = IO.options#ext
|
||||||
method mode = IO.options#mode
|
method mode = IO.options#mode
|
||||||
method cmd = IO.options#cmd
|
method cmd = IO.options#cmd
|
||||||
@ -37,7 +42,8 @@ module SubIO =
|
|||||||
EvalOpt.make ~libs:options#libs
|
EvalOpt.make ~libs:options#libs
|
||||||
~verbose:options#verbose
|
~verbose:options#verbose
|
||||||
~offsets:options#offsets
|
~offsets:options#offsets
|
||||||
~lang:options#lang
|
?block:options#block
|
||||||
|
?line:options#line
|
||||||
~ext:options#ext
|
~ext:options#ext
|
||||||
~mode:options#mode
|
~mode:options#mode
|
||||||
~cmd:options#cmd
|
~cmd:options#cmd
|
||||||
|
@ -1,7 +0,0 @@
|
|||||||
$HOME/git/OCaml-build/Makefile
|
|
||||||
$HOME/git/OCaml-build/Makefile.cfg
|
|
||||||
|
|
||||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
|
|
||||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
|
|
||||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
|
|
||||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml
|
|
@ -5,37 +5,42 @@
|
|||||||
|
|
||||||
type command = Quiet | Copy | Units | Tokens
|
type command = Quiet | Copy | Units | Tokens
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
let lang_to_string = function
|
|
||||||
`PascaLIGO -> "PascaLIGO"
|
|
||||||
| `CameLIGO -> "CameLIGO"
|
|
||||||
| `ReasonLIGO -> "ReasonLIGO"
|
|
||||||
|
|
||||||
(* The type [options] gathers the command-line options. *)
|
(* The type [options] gathers the command-line options. *)
|
||||||
|
|
||||||
module SSet = Set.Make (String)
|
module SSet = Set.Make (String)
|
||||||
|
|
||||||
|
type line_comment = string (* Opening of a line comment *)
|
||||||
|
type block_comment = <opening : string; closing : string>
|
||||||
|
|
||||||
|
let mk_block ~opening ~closing : block_comment =
|
||||||
|
object
|
||||||
|
method opening = opening
|
||||||
|
method closing = closing
|
||||||
|
end
|
||||||
|
|
||||||
type options = <
|
type options = <
|
||||||
input : string option;
|
input : string option;
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : block_comment option;
|
||||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
line : line_comment option;
|
||||||
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : command;
|
cmd : command;
|
||||||
mono : bool;
|
mono : bool;
|
||||||
expr : bool
|
expr : bool
|
||||||
>
|
>
|
||||||
|
|
||||||
let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options =
|
let make ~input ~libs ~verbose ~offsets ?block
|
||||||
|
?line ~ext ~mode ~cmd ~mono ~expr : options =
|
||||||
object
|
object
|
||||||
method input = input
|
method input = input
|
||||||
method libs = libs
|
method libs = libs
|
||||||
method verbose = verbose
|
method verbose = verbose
|
||||||
method offsets = offsets
|
method offsets = offsets
|
||||||
method lang = lang
|
method block = block
|
||||||
|
method line = line
|
||||||
method ext = ext
|
method ext = ext
|
||||||
method mode = mode
|
method mode = mode
|
||||||
method cmd = cmd
|
method cmd = cmd
|
||||||
@ -58,10 +63,10 @@ let abort msg =
|
|||||||
|
|
||||||
(* Help *)
|
(* Help *)
|
||||||
|
|
||||||
let help language extension () =
|
let help extension () =
|
||||||
let file = Filename.basename Sys.argv.(0) in
|
let file = Filename.basename Sys.argv.(0) in
|
||||||
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
|
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
|
||||||
printf "where <input>%s is the %s source file (default: stdin),\n" extension language;
|
printf "where <input>%s is the LIGO source file (default: stdin),\n" extension;
|
||||||
print "and each <option> (if any) is one of the following:";
|
print "and each <option> (if any) is one of the following:";
|
||||||
print " -I <paths> Library paths (colon-separated)";
|
print " -I <paths> Library paths (colon-separated)";
|
||||||
print " -t, --tokens Print tokens";
|
print " -t, --tokens Print tokens";
|
||||||
@ -105,8 +110,7 @@ let add_verbose d =
|
|||||||
!verbose
|
!verbose
|
||||||
(split_at_colon d)
|
(split_at_colon d)
|
||||||
|
|
||||||
let specs language extension =
|
let specs extension =
|
||||||
let language = lang_to_string language in
|
|
||||||
let open! Getopt in [
|
let open! Getopt in [
|
||||||
'I', nolong, None, Some add_path;
|
'I', nolong, None, Some add_path;
|
||||||
'c', "copy", set copy true, None;
|
'c', "copy", set copy true, None;
|
||||||
@ -118,7 +122,7 @@ let specs language extension =
|
|||||||
noshort, "mono", set mono true, None;
|
noshort, "mono", set mono true, None;
|
||||||
noshort, "expr", set expr true, None;
|
noshort, "expr", set expr true, None;
|
||||||
noshort, "verbose", None, Some add_verbose;
|
noshort, "verbose", None, Some add_verbose;
|
||||||
'h', "help", Some (help language extension), None;
|
'h', "help", Some (help extension), None;
|
||||||
noshort, "version", Some version, None
|
noshort, "version", Some version, None
|
||||||
]
|
]
|
||||||
;;
|
;;
|
||||||
@ -156,7 +160,7 @@ let print_opt () =
|
|||||||
printf "input = %s\n" (string_of quote !input);
|
printf "input = %s\n" (string_of quote !input);
|
||||||
printf "libs = %s\n" (string_of_path !libs)
|
printf "libs = %s\n" (string_of_path !libs)
|
||||||
|
|
||||||
let check lang ext =
|
let check ?block ?line ~ext =
|
||||||
let () =
|
let () =
|
||||||
if SSet.mem "cli" !verbose then print_opt () in
|
if SSet.mem "cli" !verbose then print_opt () in
|
||||||
|
|
||||||
@ -209,16 +213,19 @@ let check lang ext =
|
|||||||
| false, false, false, true -> Tokens
|
| false, false, false, true -> Tokens
|
||||||
| _ -> abort "Choose one of -q, -c, -u, -t."
|
| _ -> abort "Choose one of -q, -c, -u, -t."
|
||||||
|
|
||||||
in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono ~expr ~lang ~ext
|
in make ~input ~libs ~verbose ~offsets ~mode
|
||||||
|
~cmd ~mono ~expr ?block ?line ~ext
|
||||||
|
|
||||||
(* Parsing the command-line options *)
|
(* Parsing the command-line options *)
|
||||||
|
|
||||||
let read ~lang ~ext =
|
type extension = string
|
||||||
|
|
||||||
|
let read ?block ?line (ext: extension) =
|
||||||
try
|
try
|
||||||
Getopt.parse_cmdline (specs lang ext) anonymous;
|
Getopt.parse_cmdline (specs ext) anonymous;
|
||||||
(verb_str :=
|
(verb_str :=
|
||||||
let apply e a =
|
let apply e a =
|
||||||
if a = "" then e else Printf.sprintf "%s, %s" e a
|
if a = "" then e else Printf.sprintf "%s, %s" e a
|
||||||
in SSet.fold apply !verbose "");
|
in SSet.fold apply !verbose "");
|
||||||
check lang ext
|
check ?block ?line ~ext
|
||||||
with Getopt.Error msg -> abort msg
|
with Getopt.Error msg -> abort msg
|
||||||
|
@ -49,19 +49,21 @@ type command = Quiet | Copy | Units | Tokens
|
|||||||
expected.}
|
expected.}
|
||||||
} *)
|
} *)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
val lang_to_string : language -> string
|
|
||||||
|
|
||||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||||
|
|
||||||
|
type line_comment = string (* Opening of a line comment *)
|
||||||
|
type block_comment = <opening : string; closing : string>
|
||||||
|
|
||||||
|
val mk_block : opening:string -> closing:string -> block_comment
|
||||||
|
|
||||||
type options = <
|
type options = <
|
||||||
input : string option;
|
input : string option;
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : block_comment option;
|
||||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
line : line_comment option;
|
||||||
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : command;
|
cmd : command;
|
||||||
mono : bool;
|
mono : bool;
|
||||||
@ -73,7 +75,8 @@ val make :
|
|||||||
libs:string list ->
|
libs:string list ->
|
||||||
verbose:SSet.t ->
|
verbose:SSet.t ->
|
||||||
offsets:bool ->
|
offsets:bool ->
|
||||||
lang:language ->
|
?block:block_comment ->
|
||||||
|
?line:line_comment ->
|
||||||
ext:string ->
|
ext:string ->
|
||||||
mode:[`Byte | `Point] ->
|
mode:[`Byte | `Point] ->
|
||||||
cmd:command ->
|
cmd:command ->
|
||||||
@ -81,8 +84,9 @@ val make :
|
|||||||
expr:bool ->
|
expr:bool ->
|
||||||
options
|
options
|
||||||
|
|
||||||
(** Parsing the command-line options on stdin. The first parameter is
|
(** Parsing the command-line options on stdin. *)
|
||||||
the name of the concrete syntax, e.g., [PascaLIGO], and the second
|
|
||||||
is the expected file extension, e.g., ".ligo". *)
|
|
||||||
|
|
||||||
val read : lang:language -> ext:string -> options
|
type extension = string
|
||||||
|
|
||||||
|
val read :
|
||||||
|
?block:block_comment -> ?line:line_comment -> extension -> options
|
||||||
|
@ -38,8 +38,6 @@
|
|||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
module Pos = Simple_utils.Pos
|
module Pos = Simple_utils.Pos
|
||||||
|
|
||||||
type lexeme = string
|
|
||||||
|
|
||||||
(* TOKENS *)
|
(* TOKENS *)
|
||||||
|
|
||||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||||
@ -54,6 +52,8 @@ type lexeme = string
|
|||||||
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
||||||
*)
|
*)
|
||||||
|
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
module type TOKEN =
|
module type TOKEN =
|
||||||
sig
|
sig
|
||||||
type token
|
type token
|
||||||
@ -82,13 +82,6 @@ module type TOKEN =
|
|||||||
|
|
||||||
(* Predicates *)
|
(* Predicates *)
|
||||||
|
|
||||||
val is_string : token -> bool
|
|
||||||
val is_bytes : token -> bool
|
|
||||||
val is_int : token -> bool
|
|
||||||
val is_ident : token -> bool
|
|
||||||
val is_kwd : token -> bool
|
|
||||||
val is_constr : token -> bool
|
|
||||||
val is_sym : token -> bool
|
|
||||||
val is_eof : token -> bool
|
val is_eof : token -> bool
|
||||||
|
|
||||||
(* Projections *)
|
(* Projections *)
|
||||||
@ -96,73 +89,41 @@ module type TOKEN =
|
|||||||
val to_lexeme : token -> lexeme
|
val to_lexeme : token -> lexeme
|
||||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||||
val to_region : token -> Region.t
|
val to_region : token -> Region.t
|
||||||
|
|
||||||
|
(* Style *)
|
||||||
|
|
||||||
|
type error
|
||||||
|
|
||||||
|
val error_to_string : error -> string
|
||||||
|
|
||||||
|
exception Error of error Region.reg
|
||||||
|
|
||||||
|
val format_error :
|
||||||
|
?offsets:bool ->
|
||||||
|
[`Byte | `Point] ->
|
||||||
|
error Region.reg ->
|
||||||
|
file:bool ->
|
||||||
|
string Region.reg
|
||||||
|
|
||||||
|
val check_right_context :
|
||||||
|
token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit
|
||||||
end
|
end
|
||||||
|
|
||||||
(* The module type for lexers is [S]. It mainly exports the function
|
(* The signature of the lexer *)
|
||||||
[open_token_stream], which returns
|
|
||||||
|
|
||||||
* a function [read] that extracts tokens from a lexing buffer,
|
|
||||||
together with a lexing buffer [buffer] to read from,
|
|
||||||
* a function [close] that closes that buffer,
|
|
||||||
* a function [get_pos] that returns the current position, and
|
|
||||||
* a function [get_last] that returns the region of the last
|
|
||||||
recognised token.
|
|
||||||
* a function [get_file] that returns the name of the file being scanned
|
|
||||||
(empty string if [stdin]).
|
|
||||||
|
|
||||||
Note that a module [Token] is exported too, because the signature
|
|
||||||
of the exported functions depend on it.
|
|
||||||
|
|
||||||
The call [read ~log] evaluates in a lexer (also known as a
|
|
||||||
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
|
|
||||||
suitable for a parser generated by Menhir. The argument labelled
|
|
||||||
[log] is a logger, that is, it may print a token and its left
|
|
||||||
markup to a given channel, at the caller's discretion.
|
|
||||||
*)
|
|
||||||
|
|
||||||
module type S =
|
module type S =
|
||||||
sig
|
sig
|
||||||
module Token : TOKEN
|
module Token : TOKEN
|
||||||
type token = Token.token
|
type token = Token.token
|
||||||
|
|
||||||
type file_path = string
|
(* The scanner *)
|
||||||
type logger = Markup.t list -> token -> unit
|
|
||||||
|
|
||||||
type window =
|
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
Nil
|
|
||||||
| One of token
|
|
||||||
| Two of token * token
|
|
||||||
|
|
||||||
val slide : token -> window -> window
|
(* Errors (specific to the generic lexer, not to the tokens) *)
|
||||||
|
|
||||||
type input =
|
|
||||||
File of file_path
|
|
||||||
| String of string
|
|
||||||
| Channel of in_channel
|
|
||||||
| Buffer of Lexing.lexbuf
|
|
||||||
|
|
||||||
type instance = {
|
|
||||||
input : input;
|
|
||||||
read : log:logger -> Lexing.lexbuf -> token;
|
|
||||||
buffer : Lexing.lexbuf;
|
|
||||||
get_win : unit -> window;
|
|
||||||
get_pos : unit -> Pos.t;
|
|
||||||
get_last : unit -> Region.t;
|
|
||||||
get_file : unit -> file_path;
|
|
||||||
close : unit -> unit
|
|
||||||
}
|
|
||||||
|
|
||||||
type open_err = File_opening of string
|
|
||||||
|
|
||||||
val lexbuf_from_input :
|
|
||||||
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
val open_token_stream :
|
|
||||||
language -> input -> (instance, open_err) Stdlib.result
|
|
||||||
|
|
||||||
(* Error reporting *)
|
|
||||||
|
|
||||||
type error
|
type error
|
||||||
|
|
||||||
@ -173,7 +134,6 @@ module type S =
|
|||||||
val format_error :
|
val format_error :
|
||||||
?offsets:bool -> [`Byte | `Point] ->
|
?offsets:bool -> [`Byte | `Point] ->
|
||||||
error Region.reg -> file:bool -> string Region.reg
|
error Region.reg -> file:bool -> string Region.reg
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
(* The functorised interface
|
(* The functorised interface
|
||||||
@ -182,4 +142,4 @@ module type S =
|
|||||||
submodule in [S].
|
submodule in [S].
|
||||||
*)
|
*)
|
||||||
|
|
||||||
module Make (Token: TOKEN) : S with module Token = Token
|
module Make (Token : TOKEN) : S with module Token = Token
|
||||||
|
File diff suppressed because it is too large
Load Diff
387
src/passes/1-parser/shared/LexerLib.ml
Normal file
387
src/passes/1-parser/shared/LexerLib.ml
Normal file
@ -0,0 +1,387 @@
|
|||||||
|
module Region = Simple_utils.Region
|
||||||
|
module Pos = Simple_utils.Pos
|
||||||
|
|
||||||
|
(* LEXER ENGINE *)
|
||||||
|
|
||||||
|
(* Resetting file name and line number in the lexing buffer
|
||||||
|
|
||||||
|
The call [reset ~file ~line buffer] modifies in-place the lexing
|
||||||
|
buffer [buffer] so the lexing engine records that the file
|
||||||
|
associated with [buffer] is named [file], and the current line is
|
||||||
|
[line]. This function is useful when lexing a file that has been
|
||||||
|
previously preprocessed by the C preprocessor, in which case the
|
||||||
|
argument [file] is the name of the file that was preprocessed,
|
||||||
|
_not_ the preprocessed file (of which the user is not normally
|
||||||
|
aware). By default, the [line] argument is [1].
|
||||||
|
*)
|
||||||
|
|
||||||
|
let reset_file ~file buffer =
|
||||||
|
let open Lexing in
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||||
|
|
||||||
|
let reset_line ~line buffer =
|
||||||
|
assert (line >= 0);
|
||||||
|
let open Lexing in
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
|
||||||
|
|
||||||
|
let reset_offset ~offset buffer =
|
||||||
|
assert (offset >= 0);
|
||||||
|
let open Lexing in
|
||||||
|
let bol = buffer.lex_curr_p.pos_bol in
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
|
||||||
|
|
||||||
|
let reset ?file ?line ?offset buffer =
|
||||||
|
let () =
|
||||||
|
match file with
|
||||||
|
Some file -> reset_file ~file buffer
|
||||||
|
| None -> () in
|
||||||
|
let () =
|
||||||
|
match line with
|
||||||
|
Some line -> reset_line ~line buffer
|
||||||
|
| None -> () in
|
||||||
|
match offset with
|
||||||
|
Some offset -> reset_offset ~offset buffer
|
||||||
|
| None -> ()
|
||||||
|
|
||||||
|
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||||
|
|
||||||
|
let rollback buffer =
|
||||||
|
let open Lexing in
|
||||||
|
let len = String.length (lexeme buffer) in
|
||||||
|
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||||
|
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||||
|
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||||
|
|
||||||
|
(* Utility types *)
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||||
|
|
||||||
|
type thread = <
|
||||||
|
opening : Region.t;
|
||||||
|
length : int;
|
||||||
|
acc : char list;
|
||||||
|
to_string : string;
|
||||||
|
push_char : char -> thread;
|
||||||
|
push_string : string -> thread;
|
||||||
|
set_opening : Region.t -> thread
|
||||||
|
>
|
||||||
|
|
||||||
|
let mk_thread region lexeme : thread =
|
||||||
|
(* The call [explode s a] is the list made by pushing the characters
|
||||||
|
in the string [s] on top of [a], in reverse order. For example,
|
||||||
|
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
||||||
|
|
||||||
|
let explode s acc =
|
||||||
|
let rec push = function
|
||||||
|
0 -> acc
|
||||||
|
| i -> s.[i-1] :: push (i-1)
|
||||||
|
in push (String.length s) in
|
||||||
|
object
|
||||||
|
val opening = region
|
||||||
|
method opening = opening
|
||||||
|
|
||||||
|
val length = String.length lexeme
|
||||||
|
method length = length
|
||||||
|
|
||||||
|
val acc = explode lexeme []
|
||||||
|
method acc = acc
|
||||||
|
|
||||||
|
method set_opening opening = {< opening; length; acc >}
|
||||||
|
|
||||||
|
method push_char char =
|
||||||
|
{< opening; length=length+1; acc=char::acc >}
|
||||||
|
|
||||||
|
method push_string str =
|
||||||
|
{< opening;
|
||||||
|
length = length + String.length str;
|
||||||
|
acc = explode str acc >}
|
||||||
|
|
||||||
|
(* The value of [thread#to_string] is a string of length
|
||||||
|
[thread#length] containing the [thread#length] characters in
|
||||||
|
the list [thread#acc], in reverse order. For instance,
|
||||||
|
[thread#to_string = "abc"] if [thread#length = 3] and
|
||||||
|
[thread#acc = ['c';'b';'a']]. *)
|
||||||
|
|
||||||
|
method to_string =
|
||||||
|
let bytes = Bytes.make length ' ' in
|
||||||
|
let rec fill i = function
|
||||||
|
[] -> bytes
|
||||||
|
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
||||||
|
in fill (length-1) acc |> Bytes.to_string
|
||||||
|
end
|
||||||
|
|
||||||
|
(* STATE *)
|
||||||
|
|
||||||
|
(* Scanning the lexing buffer for tokens (and markup, as a
|
||||||
|
side-effect).
|
||||||
|
|
||||||
|
Because we want the lexer to have access to the right lexical
|
||||||
|
context of a recognised lexeme (to enforce stylistic constraints or
|
||||||
|
report special error patterns), we need to keep a hidden reference
|
||||||
|
to a queue of recognised lexical units (that is, tokens and markup)
|
||||||
|
that acts as a mutable state between the calls to [read]. When
|
||||||
|
[read] is called, that queue is examined first and, if it contains
|
||||||
|
at least one token, that token is returned; otherwise, the lexing
|
||||||
|
buffer is scanned for at least one more new token. That is the
|
||||||
|
general principle: we put a high-level buffer (our queue) on top of
|
||||||
|
the low-level lexing buffer.
|
||||||
|
|
||||||
|
One tricky and important detail is that we must make any parser
|
||||||
|
generated by Menhir (and calling [read]) believe that the last
|
||||||
|
region of the input source that was matched indeed corresponds to
|
||||||
|
the returned token, despite that many tokens and markup may have
|
||||||
|
been matched since it was actually read from the input. In other
|
||||||
|
words, the parser requests a token that is taken from the
|
||||||
|
high-level buffer, but the parser requests the source regions from
|
||||||
|
the _low-level_ lexing buffer, and they may disagree if more than
|
||||||
|
one token has actually been recognised.
|
||||||
|
|
||||||
|
Consequently, in order to maintain a consistent view for the
|
||||||
|
parser, we have to patch some fields of the lexing buffer, namely
|
||||||
|
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
|
||||||
|
generated by Menhir when querying source positions (regions). This
|
||||||
|
is the purpose of the function [patch_buffer]. After reading one or
|
||||||
|
more tokens and markup by the scanning rule [scan], we have to save
|
||||||
|
in the hidden reference [buf_reg] the region of the source that was
|
||||||
|
matched by [scan]. This atomic sequence of patching, scanning and
|
||||||
|
saving is implemented by the _function_ [scan] (beware: it shadows
|
||||||
|
the scanning rule [scan]). The function [patch_buffer] is, of
|
||||||
|
course, also called just before returning the token, so the parser
|
||||||
|
has a view of the lexing buffer consistent with the token.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'token window =
|
||||||
|
Nil
|
||||||
|
| One of 'token
|
||||||
|
| Two of 'token * 'token
|
||||||
|
|
||||||
|
type 'token state = <
|
||||||
|
units : (Markup.t list * 'token) FQueue.t;
|
||||||
|
markup : Markup.t list;
|
||||||
|
window : 'token window;
|
||||||
|
last : Region.t;
|
||||||
|
pos : Pos.t;
|
||||||
|
decoder : Uutf.decoder;
|
||||||
|
supply : Bytes.t -> int -> int -> unit;
|
||||||
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
|
|
||||||
|
enqueue : 'token -> 'token state;
|
||||||
|
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
|
||||||
|
set_last : Region.t -> 'token state;
|
||||||
|
set_pos : Pos.t -> 'token state;
|
||||||
|
slide_token : 'token -> 'token state;
|
||||||
|
|
||||||
|
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
|
||||||
|
|
||||||
|
push_newline : Lexing.lexbuf -> 'token state;
|
||||||
|
push_line : thread -> 'token state;
|
||||||
|
push_block : thread -> 'token state;
|
||||||
|
push_space : Lexing.lexbuf -> 'token state;
|
||||||
|
push_tabs : Lexing.lexbuf -> 'token state;
|
||||||
|
push_bom : Lexing.lexbuf -> 'token state;
|
||||||
|
push_markup : Markup.t -> 'token state;
|
||||||
|
>
|
||||||
|
|
||||||
|
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
|
||||||
|
?block ?line () : _ state =
|
||||||
|
object (self)
|
||||||
|
val units = units
|
||||||
|
method units = units
|
||||||
|
val markup = markup
|
||||||
|
method markup = markup
|
||||||
|
val window = window
|
||||||
|
method window = window
|
||||||
|
val last = last
|
||||||
|
method last = last
|
||||||
|
val pos = pos
|
||||||
|
method pos = pos
|
||||||
|
method decoder = decoder
|
||||||
|
method supply = supply
|
||||||
|
method block = block
|
||||||
|
method line = line
|
||||||
|
|
||||||
|
method enqueue token =
|
||||||
|
{< units = FQueue.enq (markup, token) units;
|
||||||
|
markup = [] >}
|
||||||
|
|
||||||
|
method set_units units = {< units = units >}
|
||||||
|
method set_last region = {< last = region >}
|
||||||
|
method set_pos pos = {< pos = pos >}
|
||||||
|
|
||||||
|
method slide_token token =
|
||||||
|
match self#window with
|
||||||
|
Nil -> {< window = One token >}
|
||||||
|
| One t | Two (t,_) -> {< window = Two (token,t) >}
|
||||||
|
|
||||||
|
method sync buffer =
|
||||||
|
let lex = Lexing.lexeme buffer in
|
||||||
|
let len = String.length lex in
|
||||||
|
let start = pos in
|
||||||
|
let stop = start#shift_bytes len in
|
||||||
|
let state = {< pos = stop >}
|
||||||
|
in Region.make ~start ~stop, lex, state
|
||||||
|
|
||||||
|
(* MARKUP *)
|
||||||
|
|
||||||
|
(* Committing markup to the current logical state *)
|
||||||
|
|
||||||
|
method push_markup unit = {< markup = unit :: markup >}
|
||||||
|
|
||||||
|
method push_newline buffer =
|
||||||
|
let () = Lexing.new_line buffer in
|
||||||
|
let value = Lexing.lexeme buffer in
|
||||||
|
let start = self#pos in
|
||||||
|
let stop = start#new_line value in
|
||||||
|
let region = Region.make ~start ~stop in
|
||||||
|
let unit = Markup.Newline Region.{region; value}
|
||||||
|
in {< pos = stop; markup = unit::markup >}
|
||||||
|
|
||||||
|
method push_line thread =
|
||||||
|
let start = thread#opening#start in
|
||||||
|
let region = Region.make ~start ~stop:self#pos
|
||||||
|
and value = thread#to_string in
|
||||||
|
let unit = Markup.LineCom Region.{region; value}
|
||||||
|
in {< markup = unit::markup >}
|
||||||
|
|
||||||
|
method push_block thread =
|
||||||
|
let start = thread#opening#start in
|
||||||
|
let region = Region.make ~start ~stop:self#pos
|
||||||
|
and value = thread#to_string in
|
||||||
|
let unit = Markup.BlockCom Region.{region; value}
|
||||||
|
in {< markup = unit::markup >}
|
||||||
|
|
||||||
|
method push_space buffer =
|
||||||
|
let region, lex, state = self#sync buffer in
|
||||||
|
let value = String.length lex in
|
||||||
|
let unit = Markup.Space Region.{region; value}
|
||||||
|
in state#push_markup unit
|
||||||
|
|
||||||
|
method push_tabs buffer =
|
||||||
|
let region, lex, state = self#sync buffer in
|
||||||
|
let value = String.length lex in
|
||||||
|
let unit = Markup.Tabs Region.{region; value}
|
||||||
|
in state#push_markup unit
|
||||||
|
|
||||||
|
method push_bom buffer =
|
||||||
|
let region, value, state = self#sync buffer in
|
||||||
|
let unit = Markup.BOM Region.{region; value}
|
||||||
|
in state#push_markup unit
|
||||||
|
end
|
||||||
|
|
||||||
|
(* LEXER INSTANCE *)
|
||||||
|
|
||||||
|
type input =
|
||||||
|
File of file_path
|
||||||
|
| String of string
|
||||||
|
| Channel of in_channel
|
||||||
|
| Buffer of Lexing.lexbuf
|
||||||
|
|
||||||
|
type 'token logger = Markup.t list -> 'token -> unit
|
||||||
|
|
||||||
|
type 'token instance = {
|
||||||
|
input : input;
|
||||||
|
read : log:('token logger) -> Lexing.lexbuf -> 'token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_win : unit -> 'token window;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
get_file : unit -> file_path;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
type open_err = File_opening of string
|
||||||
|
|
||||||
|
let lexbuf_from_input = function
|
||||||
|
String s ->
|
||||||
|
Ok (Lexing.from_string s, fun () -> ())
|
||||||
|
| Channel chan ->
|
||||||
|
let close () = close_in chan in
|
||||||
|
Ok (Lexing.from_channel chan, close)
|
||||||
|
| Buffer b ->
|
||||||
|
Ok (b, fun () -> ())
|
||||||
|
| File path ->
|
||||||
|
try
|
||||||
|
let chan = open_in path in
|
||||||
|
let close () = close_in chan in
|
||||||
|
let lexbuf = Lexing.from_channel chan in
|
||||||
|
let () =
|
||||||
|
let open Lexing in
|
||||||
|
lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
|
||||||
|
in Ok (lexbuf, close)
|
||||||
|
with Sys_error msg -> Stdlib.Error (File_opening msg)
|
||||||
|
|
||||||
|
let open_token_stream ?line ?block ~scan
|
||||||
|
~token_to_region ~style input =
|
||||||
|
let file_path = match input with
|
||||||
|
File path -> path
|
||||||
|
| _ -> "" in
|
||||||
|
let pos = Pos.min ~file:file_path in
|
||||||
|
let buf_reg = ref (pos#byte, pos#byte)
|
||||||
|
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
||||||
|
let supply = Uutf.Manual.src decoder in
|
||||||
|
let state = ref (mk_state
|
||||||
|
~units:FQueue.empty
|
||||||
|
~last:Region.ghost
|
||||||
|
~window:Nil
|
||||||
|
~pos
|
||||||
|
~markup:[]
|
||||||
|
~decoder
|
||||||
|
~supply
|
||||||
|
?block
|
||||||
|
?line
|
||||||
|
()) in
|
||||||
|
let get_pos () = !state#pos
|
||||||
|
and get_last () = !state#last
|
||||||
|
and get_win () = !state#window
|
||||||
|
and get_file () = file_path in
|
||||||
|
|
||||||
|
let patch_buffer (start, stop) buffer =
|
||||||
|
let open Lexing in
|
||||||
|
let file_path = buffer.lex_curr_p.pos_fname in
|
||||||
|
buffer.lex_start_p <- {start with pos_fname = file_path};
|
||||||
|
buffer.lex_curr_p <- {stop with pos_fname = file_path}
|
||||||
|
|
||||||
|
and save_region buffer =
|
||||||
|
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
||||||
|
|
||||||
|
let scan' scan buffer =
|
||||||
|
patch_buffer !buf_reg buffer;
|
||||||
|
state := scan !state buffer;
|
||||||
|
save_region buffer in
|
||||||
|
|
||||||
|
let next_token scan buffer =
|
||||||
|
scan' scan buffer;
|
||||||
|
match FQueue.peek !state#units with
|
||||||
|
None -> None
|
||||||
|
| Some (units, ext_token) ->
|
||||||
|
state := !state#set_units units; Some ext_token in
|
||||||
|
|
||||||
|
let rec read scan ~token_to_region ~style ~log buffer =
|
||||||
|
match FQueue.deq !state#units with
|
||||||
|
None ->
|
||||||
|
scan' scan buffer;
|
||||||
|
read scan ~token_to_region ~style ~log buffer
|
||||||
|
| Some (units, (left_mark, token)) ->
|
||||||
|
log left_mark token;
|
||||||
|
state := ((!state#set_units units)
|
||||||
|
#set_last (token_to_region token))
|
||||||
|
#slide_token token;
|
||||||
|
style token (next_token scan) buffer;
|
||||||
|
patch_buffer (token_to_region token)#byte_pos buffer;
|
||||||
|
token in
|
||||||
|
|
||||||
|
match lexbuf_from_input input with
|
||||||
|
Ok (buffer, close) ->
|
||||||
|
let () =
|
||||||
|
match input with
|
||||||
|
File path when path <> "" -> reset ~file:path buffer
|
||||||
|
| _ -> () in
|
||||||
|
let instance = {
|
||||||
|
read = read scan ~token_to_region ~style;
|
||||||
|
input; buffer; get_win; get_pos; get_last; get_file; close}
|
||||||
|
in Ok instance
|
||||||
|
| Error _ as e -> e
|
203
src/passes/1-parser/shared/LexerLib.mli
Normal file
203
src/passes/1-parser/shared/LexerLib.mli
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
(* A library for writing UTF8-aware lexers *)
|
||||||
|
|
||||||
|
module Region = Simple_utils.Region
|
||||||
|
module Pos = Simple_utils.Pos
|
||||||
|
|
||||||
|
(* The function [rollback] resets the lexing buffer to the state it
|
||||||
|
was when it matched the last regular expression. This function is
|
||||||
|
safe to use only in the semantic action of the rule which last
|
||||||
|
matched. *)
|
||||||
|
|
||||||
|
val rollback : Lexing.lexbuf -> unit
|
||||||
|
|
||||||
|
(* Utility types *)
|
||||||
|
|
||||||
|
type file_path = string
|
||||||
|
type lexeme = string
|
||||||
|
|
||||||
|
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||||
|
|
||||||
|
(* When scanning structured constructs, like strings and comments, we
|
||||||
|
need to keep the region of the opening symbol (like double quote,
|
||||||
|
"//" or "(*") in order to report any error more precisely. Since
|
||||||
|
ocamllex is byte-oriented, we need to store the parsed bytes as
|
||||||
|
characters in an accumulator [acc] and also its length [len], so,
|
||||||
|
we are done, it is easy to build the string making up the
|
||||||
|
structured construct with [mk_str] (see above).
|
||||||
|
|
||||||
|
The resulting data structure is called a _thread_. (Note for
|
||||||
|
Emacs: "*)".)
|
||||||
|
*)
|
||||||
|
|
||||||
|
type thread = <
|
||||||
|
opening : Region.t;
|
||||||
|
length : int;
|
||||||
|
acc : char list;
|
||||||
|
to_string : string;
|
||||||
|
push_char : char -> thread;
|
||||||
|
push_string : string -> thread;
|
||||||
|
set_opening : Region.t -> thread
|
||||||
|
>
|
||||||
|
|
||||||
|
val mk_thread : Region.t -> lexeme -> thread
|
||||||
|
|
||||||
|
(* STATE *)
|
||||||
|
|
||||||
|
(* Beyond producing tokens, the result of lexing is a _state_. The
|
||||||
|
type [state] represents the abstract logical state of the lexing
|
||||||
|
engine, that is, a value which is threaded during scanning and
|
||||||
|
which denotes useful, high-level information beyond what the type
|
||||||
|
[Lexing.lexbuf] in the standard library already provides for all
|
||||||
|
generic lexers. We qualify it as "logical state" because the lexing
|
||||||
|
buffer itself has a "physical state" defined by the type
|
||||||
|
[Lexing.lexbuf].
|
||||||
|
|
||||||
|
Tokens are the smallest units used by the parser to build the
|
||||||
|
abstract syntax tree. The state includes a queue of recognised
|
||||||
|
tokens, with the markup at the left of its lexeme until either the
|
||||||
|
start of the file or the end of the previously recognised token.
|
||||||
|
|
||||||
|
The markup from the last recognised token or, if the first token
|
||||||
|
has not been recognised yet, from the beginning of the file is
|
||||||
|
stored in the field [markup] of the state because it is a
|
||||||
|
side-effect, with respect to the output token list, and we use a
|
||||||
|
record with a single field [units] because that record may be
|
||||||
|
easily extended during the future maintenance of this lexer.
|
||||||
|
|
||||||
|
The state also includes a field [pos] which holds the current
|
||||||
|
position in the LIGO source file. The position is not always
|
||||||
|
updated after a single character has been matched: that depends on
|
||||||
|
the regular expression that matched the lexing buffer.
|
||||||
|
|
||||||
|
The field [window] is a two-token window, that is, a buffer that
|
||||||
|
contains the last recognised token, and the penultimate (if any).
|
||||||
|
Technically, it is a parametric type, but its use is meant for
|
||||||
|
tokens, wherever they are defined. In [Two (t1,t2)], and in case
|
||||||
|
of a syntax error, [t1] is the first invalid token and [t2] is the
|
||||||
|
last valid one.
|
||||||
|
|
||||||
|
The fields [decoder] and [supply] offer the support needed for
|
||||||
|
the lexing of UTF-8 encoded characters in comments (the only place
|
||||||
|
where they are allowed in LIGO). The former is the decoder proper
|
||||||
|
and the latter is the effectful function [supply] that takes a
|
||||||
|
byte, a start index and a length and feed it to [decoder]. See the
|
||||||
|
documentation of the third-party library Uutf.
|
||||||
|
|
||||||
|
Some methods are now documented.
|
||||||
|
|
||||||
|
The call [state#enqueue token] updates functionally the state
|
||||||
|
[state] by associating the token [token] with the stored markup and
|
||||||
|
enqueuing the pair into the units queue. The field [markup] is then
|
||||||
|
reset to the empty list.
|
||||||
|
|
||||||
|
The call [state#slide_token token] pushes the token [token] in
|
||||||
|
the buffer [buffer]. If the buffer is full, that is, it is [Two
|
||||||
|
(t1,t2)], then the token [t2] is discarded to make room for
|
||||||
|
[token].
|
||||||
|
|
||||||
|
The call [state#sync buffer] updates the current position in
|
||||||
|
accordance with the contents of the lexing buffer, more precisely,
|
||||||
|
depending on the length of the string which has just been
|
||||||
|
recognised by the scanner: that length is used as a positive offset
|
||||||
|
to the current column.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type 'token window =
|
||||||
|
Nil
|
||||||
|
| One of 'token
|
||||||
|
| Two of 'token * 'token
|
||||||
|
|
||||||
|
type 'token state = <
|
||||||
|
units : (Markup.t list * 'token) FQueue.t;
|
||||||
|
markup : Markup.t list;
|
||||||
|
window : 'token window;
|
||||||
|
last : Region.t;
|
||||||
|
pos : Pos.t;
|
||||||
|
decoder : Uutf.decoder;
|
||||||
|
supply : Bytes.t -> int -> int -> unit;
|
||||||
|
block : EvalOpt.block_comment option;
|
||||||
|
line : EvalOpt.line_comment option;
|
||||||
|
|
||||||
|
enqueue : 'token -> 'token state;
|
||||||
|
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
|
||||||
|
set_last : Region.t -> 'token state;
|
||||||
|
set_pos : Pos.t -> 'token state;
|
||||||
|
slide_token : 'token -> 'token state;
|
||||||
|
|
||||||
|
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
|
||||||
|
|
||||||
|
push_newline : Lexing.lexbuf -> 'token state;
|
||||||
|
push_line : thread -> 'token state;
|
||||||
|
push_block : thread -> 'token state;
|
||||||
|
push_space : Lexing.lexbuf -> 'token state;
|
||||||
|
push_tabs : Lexing.lexbuf -> 'token state;
|
||||||
|
push_bom : Lexing.lexbuf -> 'token state;
|
||||||
|
push_markup : Markup.t -> 'token state;
|
||||||
|
>
|
||||||
|
|
||||||
|
(* LEXER INSTANCE *)
|
||||||
|
|
||||||
|
(* The function [open_token_stream] returns a lexer instance made of
|
||||||
|
|
||||||
|
* the input [input] of type [input];
|
||||||
|
* a function [read] that extracts tokens from a lexing buffer,
|
||||||
|
together with a lexing buffer [buffer] to read from,
|
||||||
|
* a function [close] that closes that buffer,
|
||||||
|
* a function [get_pos] that returns the current position, and
|
||||||
|
* a function [get_last] that returns the region of the last
|
||||||
|
recognised token.
|
||||||
|
* a function [get_file] that returns the name of the file being
|
||||||
|
scanned (empty string if [stdin]).
|
||||||
|
|
||||||
|
Note that a module [Token] is exported too, because the signature
|
||||||
|
of the exported functions depend on it.
|
||||||
|
|
||||||
|
The type [window] is a two-token window, that is, a buffer that
|
||||||
|
contains the last recognised token, and the penultimate (if any).
|
||||||
|
|
||||||
|
The call [read ?line ?block ~scan ~token_to_region ~style
|
||||||
|
input] evaluates in a lexer (also known as a tokeniser or scanner)
|
||||||
|
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
|
||||||
|
suitable for a parser generated by Menhir. The argument labelled
|
||||||
|
[log] is a logger, that is, it may print a token and its left
|
||||||
|
markup to a given channel, at the caller's discretion. The function
|
||||||
|
labelled [~scan] is the main scanner of the lexer. The function
|
||||||
|
labelled [~style] is used to check stylistic constraints on the
|
||||||
|
tokens and the markup between them.
|
||||||
|
*)
|
||||||
|
|
||||||
|
type input =
|
||||||
|
File of file_path
|
||||||
|
| String of string
|
||||||
|
| Channel of in_channel
|
||||||
|
| Buffer of Lexing.lexbuf
|
||||||
|
|
||||||
|
type 'token logger = Markup.t list -> 'token -> unit
|
||||||
|
|
||||||
|
type 'token instance = {
|
||||||
|
input : input;
|
||||||
|
read : log:('token logger) -> Lexing.lexbuf -> 'token;
|
||||||
|
buffer : Lexing.lexbuf;
|
||||||
|
get_win : unit -> 'token window;
|
||||||
|
get_pos : unit -> Pos.t;
|
||||||
|
get_last : unit -> Region.t;
|
||||||
|
get_file : unit -> file_path;
|
||||||
|
close : unit -> unit
|
||||||
|
}
|
||||||
|
|
||||||
|
type open_err = File_opening of string
|
||||||
|
|
||||||
|
val lexbuf_from_input :
|
||||||
|
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
||||||
|
|
||||||
|
val open_token_stream :
|
||||||
|
?line:EvalOpt.line_comment ->
|
||||||
|
?block:EvalOpt.block_comment ->
|
||||||
|
scan:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||||
|
token_to_region:('token -> Region.t) ->
|
||||||
|
style:('token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * 'token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit) ->
|
||||||
|
input ->
|
||||||
|
('token instance, open_err) Stdlib.result
|
@ -5,6 +5,7 @@ module Region = Simple_utils.Region
|
|||||||
module type S =
|
module type S =
|
||||||
sig
|
sig
|
||||||
module Lexer : Lexer.S
|
module Lexer : Lexer.S
|
||||||
|
type token = Lexer.token
|
||||||
|
|
||||||
val output_token :
|
val output_token :
|
||||||
?offsets:bool ->
|
?offsets:bool ->
|
||||||
@ -12,7 +13,7 @@ module type S =
|
|||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
out_channel ->
|
out_channel ->
|
||||||
Markup.t list ->
|
Markup.t list ->
|
||||||
Lexer.token ->
|
token ->
|
||||||
unit
|
unit
|
||||||
|
|
||||||
type file_path = string
|
type file_path = string
|
||||||
@ -20,8 +21,14 @@ module type S =
|
|||||||
val trace :
|
val trace :
|
||||||
?offsets:bool ->
|
?offsets:bool ->
|
||||||
[`Byte | `Point] ->
|
[`Byte | `Point] ->
|
||||||
EvalOpt.language ->
|
?block:EvalOpt.block_comment ->
|
||||||
Lexer.input ->
|
?line:EvalOpt.line_comment ->
|
||||||
|
token_to_region:(token -> Region.t) ->
|
||||||
|
style:(token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit) ->
|
||||||
|
LexerLib.input ->
|
||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
(unit, string Region.reg) Stdlib.result
|
(unit, string Region.reg) Stdlib.result
|
||||||
end
|
end
|
||||||
@ -30,10 +37,12 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
|||||||
struct
|
struct
|
||||||
module Lexer = Lexer
|
module Lexer = Lexer
|
||||||
module Token = Lexer.Token
|
module Token = Lexer.Token
|
||||||
|
type token = Lexer.token
|
||||||
|
|
||||||
(** Pretty-printing in a string the lexemes making up the markup
|
(* Pretty-printing in a string the lexemes making up the markup
|
||||||
between two tokens, concatenated with the last lexeme
|
between two tokens, concatenated with the last lexeme
|
||||||
itself. *)
|
itself. *)
|
||||||
|
|
||||||
let output_token ?(offsets=true) mode command
|
let output_token ?(offsets=true) mode command
|
||||||
channel left_mark token : unit =
|
channel left_mark token : unit =
|
||||||
let output str = Printf.fprintf channel "%s%!" str in
|
let output str = Printf.fprintf channel "%s%!" str in
|
||||||
@ -56,10 +65,16 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
|||||||
|
|
||||||
type file_path = string
|
type file_path = string
|
||||||
|
|
||||||
let trace ?(offsets=true) mode lang input command :
|
let trace ?(offsets=true) mode ?block ?line
|
||||||
|
~token_to_region ~style input command :
|
||||||
(unit, string Region.reg) Stdlib.result =
|
(unit, string Region.reg) Stdlib.result =
|
||||||
match Lexer.open_token_stream lang input with
|
match LexerLib.open_token_stream
|
||||||
Ok Lexer.{read; buffer; close; _} ->
|
~scan:Lexer.scan
|
||||||
|
~token_to_region
|
||||||
|
~style
|
||||||
|
?line ?block input
|
||||||
|
with
|
||||||
|
Ok LexerLib.{read; buffer; close; _} ->
|
||||||
let log = output_token ~offsets mode command stdout
|
let log = output_token ~offsets mode command stdout
|
||||||
and close_all () = flush_all (); close () in
|
and close_all () = flush_all (); close () in
|
||||||
let rec iter () =
|
let rec iter () =
|
||||||
@ -68,12 +83,17 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
|||||||
if Token.is_eof token
|
if Token.is_eof token
|
||||||
then Stdlib.Ok ()
|
then Stdlib.Ok ()
|
||||||
else iter ()
|
else iter ()
|
||||||
|
| exception Lexer.Token.Error error ->
|
||||||
|
let msg =
|
||||||
|
Lexer.Token.format_error
|
||||||
|
~offsets mode ~file:true error
|
||||||
|
in Stdlib.Error msg
|
||||||
| exception Lexer.Error error ->
|
| exception Lexer.Error error ->
|
||||||
let msg =
|
let msg =
|
||||||
Lexer.format_error ~offsets mode ~file:true error
|
Lexer.format_error ~offsets mode ~file:true error
|
||||||
in Stdlib.Error msg in
|
in Stdlib.Error msg in
|
||||||
let result = iter ()
|
let result = iter ()
|
||||||
in close_all (); result
|
in close_all (); result
|
||||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg)
|
flush_all (); Stdlib.Error (Region.wrap_ghost msg)
|
||||||
end
|
end
|
||||||
|
@ -3,6 +3,7 @@ module Region = Simple_utils.Region
|
|||||||
module type S =
|
module type S =
|
||||||
sig
|
sig
|
||||||
module Lexer : Lexer.S
|
module Lexer : Lexer.S
|
||||||
|
type token = Lexer.token
|
||||||
|
|
||||||
val output_token :
|
val output_token :
|
||||||
?offsets:bool ->
|
?offsets:bool ->
|
||||||
@ -10,7 +11,7 @@ module type S =
|
|||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
out_channel ->
|
out_channel ->
|
||||||
Markup.t list ->
|
Markup.t list ->
|
||||||
Lexer.token ->
|
token ->
|
||||||
unit
|
unit
|
||||||
|
|
||||||
type file_path = string
|
type file_path = string
|
||||||
@ -18,8 +19,14 @@ module type S =
|
|||||||
val trace :
|
val trace :
|
||||||
?offsets:bool ->
|
?offsets:bool ->
|
||||||
[`Byte | `Point] ->
|
[`Byte | `Point] ->
|
||||||
EvalOpt.language ->
|
?block:EvalOpt.block_comment ->
|
||||||
Lexer.input ->
|
?line:EvalOpt.line_comment ->
|
||||||
|
token_to_region:(token -> Region.t) ->
|
||||||
|
style:(token ->
|
||||||
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||||
|
Lexing.lexbuf ->
|
||||||
|
unit) ->
|
||||||
|
LexerLib.input ->
|
||||||
EvalOpt.command ->
|
EvalOpt.command ->
|
||||||
(unit, string Region.reg) Stdlib.result
|
(unit, string Region.reg) Stdlib.result
|
||||||
end
|
end
|
||||||
|
@ -39,9 +39,15 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
|||||||
| Stdlib.Ok pp_buffer ->
|
| Stdlib.Ok pp_buffer ->
|
||||||
(* Running the lexer on the preprocessed input *)
|
(* Running the lexer on the preprocessed input *)
|
||||||
|
|
||||||
let source = Lexer.String (Buffer.contents pp_buffer) in
|
let source = LexerLib.String (Buffer.contents pp_buffer) in
|
||||||
match Lexer.open_token_stream IO.options#lang source with
|
match LexerLib.open_token_stream
|
||||||
Ok Lexer.{read; buffer; close; _} ->
|
?line:IO.options#line
|
||||||
|
?block:IO.options#block
|
||||||
|
~scan:Lexer.scan
|
||||||
|
~token_to_region:Lexer.Token.to_region
|
||||||
|
~style:Lexer.Token.check_right_context
|
||||||
|
source with
|
||||||
|
Ok LexerLib.{read; buffer; close; _} ->
|
||||||
let close_all () = flush_all (); close () in
|
let close_all () = flush_all (); close () in
|
||||||
let rec read_tokens tokens =
|
let rec read_tokens tokens =
|
||||||
match read ~log:(fun _ _ -> ()) buffer with
|
match read ~log:(fun _ _ -> ()) buffer with
|
||||||
@ -49,20 +55,28 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
|||||||
if Lexer.Token.is_eof token
|
if Lexer.Token.is_eof token
|
||||||
then Stdlib.Ok (List.rev tokens)
|
then Stdlib.Ok (List.rev tokens)
|
||||||
else read_tokens (token::tokens)
|
else read_tokens (token::tokens)
|
||||||
|
| exception Lexer.Token.Error error ->
|
||||||
|
let file =
|
||||||
|
match IO.options#input with
|
||||||
|
None | Some "-" -> false
|
||||||
|
| Some _ -> true in
|
||||||
|
let msg =
|
||||||
|
Lexer.Token.format_error
|
||||||
|
~offsets:IO.options#offsets
|
||||||
|
IO.options#mode ~file error
|
||||||
|
in Stdlib.Error msg
|
||||||
| exception Lexer.Error error ->
|
| exception Lexer.Error error ->
|
||||||
let file =
|
let file =
|
||||||
match IO.options#input with
|
match IO.options#input with
|
||||||
None | Some "-" -> false
|
None | Some "-" -> false
|
||||||
| Some _ -> true in
|
| Some _ -> true in
|
||||||
let () =
|
|
||||||
Printf.eprintf "[LexerUnit] file = %b\n%!" file in
|
|
||||||
let msg =
|
let msg =
|
||||||
Lexer.format_error ~offsets:IO.options#offsets
|
Lexer.format_error ~offsets:IO.options#offsets
|
||||||
IO.options#mode ~file error
|
IO.options#mode ~file error
|
||||||
in Stdlib.Error msg in
|
in Stdlib.Error msg in
|
||||||
let result = read_tokens []
|
let result = read_tokens []
|
||||||
in close_all (); result
|
in close_all (); result
|
||||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
||||||
match IO.options#input with
|
match IO.options#input with
|
||||||
None -> preproc stdin
|
None -> preproc stdin
|
||||||
@ -101,8 +115,11 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
|||||||
end
|
end
|
||||||
else Log.trace ~offsets:IO.options#offsets
|
else Log.trace ~offsets:IO.options#offsets
|
||||||
IO.options#mode
|
IO.options#mode
|
||||||
IO.options#lang
|
?block:IO.options#block
|
||||||
(Lexer.String preproc_str)
|
?line:IO.options#line
|
||||||
|
~token_to_region:Lexer.Token.to_region
|
||||||
|
~style:Lexer.Token.check_right_context
|
||||||
|
(LexerLib.String preproc_str)
|
||||||
IO.options#cmd
|
IO.options#cmd
|
||||||
in match IO.options#input with
|
in match IO.options#input with
|
||||||
None -> preproc stdin
|
None -> preproc stdin
|
||||||
|
@ -57,7 +57,7 @@ module type PARSER =
|
|||||||
|
|
||||||
module Make (IO: IO)
|
module Make (IO: IO)
|
||||||
(Lexer: Lexer.S)
|
(Lexer: Lexer.S)
|
||||||
(Parser: PARSER with type token = Lexer.Token.token)
|
(Parser: PARSER with type token = Lexer.token)
|
||||||
(ParErr: sig val message : int -> string end) =
|
(ParErr: sig val message : int -> string end) =
|
||||||
struct
|
struct
|
||||||
module I = Parser.MenhirInterpreter
|
module I = Parser.MenhirInterpreter
|
||||||
@ -122,10 +122,10 @@ module Make (IO: IO)
|
|||||||
message
|
message
|
||||||
in
|
in
|
||||||
match get_win () with
|
match get_win () with
|
||||||
Lexer.Nil -> assert false
|
LexerLib.Nil -> assert false
|
||||||
| Lexer.One invalid ->
|
| LexerLib.One invalid ->
|
||||||
raise (Point (message, None, invalid))
|
raise (Point (message, None, invalid))
|
||||||
| Lexer.Two (invalid, valid) ->
|
| LexerLib.Two (invalid, valid) ->
|
||||||
raise (Point (message, Some valid, invalid))
|
raise (Point (message, Some valid, invalid))
|
||||||
|
|
||||||
(* The monolithic API of Menhir *)
|
(* The monolithic API of Menhir *)
|
||||||
@ -143,14 +143,14 @@ module Make (IO: IO)
|
|||||||
~offsets:IO.options#offsets
|
~offsets:IO.options#offsets
|
||||||
IO.options#mode IO.options#cmd stdout
|
IO.options#mode IO.options#cmd stdout
|
||||||
|
|
||||||
let incr_contract Lexer.{read; buffer; get_win; close; _} =
|
let incr_contract LexerLib.{read; buffer; get_win; close; _} =
|
||||||
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
||||||
and failure = failure get_win in
|
and failure = failure get_win in
|
||||||
let parser = Incr.contract buffer.Lexing.lex_curr_p in
|
let parser = Incr.contract buffer.Lexing.lex_curr_p in
|
||||||
let ast = I.loop_handle success failure supplier parser
|
let ast = I.loop_handle success failure supplier parser
|
||||||
in flush_all (); close (); ast
|
in flush_all (); close (); ast
|
||||||
|
|
||||||
let incr_expr Lexer.{read; buffer; get_win; close; _} =
|
let incr_expr LexerLib.{read; buffer; get_win; close; _} =
|
||||||
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
||||||
and failure = failure get_win in
|
and failure = failure get_win in
|
||||||
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in
|
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in
|
||||||
|
@ -56,7 +56,7 @@ module type PARSER =
|
|||||||
|
|
||||||
module Make (IO: IO)
|
module Make (IO: IO)
|
||||||
(Lexer: Lexer.S)
|
(Lexer: Lexer.S)
|
||||||
(Parser: PARSER with type token = Lexer.Token.token)
|
(Parser: PARSER with type token = Lexer.token)
|
||||||
(ParErr: sig val message : int -> string end) :
|
(ParErr: sig val message : int -> string end) :
|
||||||
sig
|
sig
|
||||||
(* WARNING: The following parsers may all raise [Lexer.Error] *)
|
(* WARNING: The following parsers may all raise [Lexer.Error] *)
|
||||||
@ -78,8 +78,8 @@ module Make (IO: IO)
|
|||||||
|
|
||||||
exception Point of error
|
exception Point of error
|
||||||
|
|
||||||
val incr_contract : Lexer.instance -> Parser.ast
|
val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
|
||||||
val incr_expr : Lexer.instance -> Parser.expr
|
val incr_expr : Lexer.token LexerLib.instance -> Parser.expr
|
||||||
|
|
||||||
val format_error :
|
val format_error :
|
||||||
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
|
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
|
||||||
|
@ -4,16 +4,15 @@ module Region = Simple_utils.Region
|
|||||||
module Preproc = Preprocessor.Preproc
|
module Preproc = Preprocessor.Preproc
|
||||||
module SSet = Set.Make (String)
|
module SSet = Set.Make (String)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
module type SubIO =
|
module type SubIO =
|
||||||
sig
|
sig
|
||||||
type options = <
|
type options = <
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : EvalOpt.block_comment option;
|
||||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
line : EvalOpt.line_comment option;
|
||||||
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
mono : bool
|
mono : bool
|
||||||
@ -23,7 +22,7 @@ module type SubIO =
|
|||||||
val make : input:string option -> expr:bool -> EvalOpt.options
|
val make : input:string option -> expr:bool -> EvalOpt.options
|
||||||
end
|
end
|
||||||
|
|
||||||
module type Pretty =
|
module type Printer =
|
||||||
sig
|
sig
|
||||||
type state
|
type state
|
||||||
type ast
|
type ast
|
||||||
@ -45,7 +44,7 @@ module Make (Lexer: Lexer.S)
|
|||||||
and type expr = AST.expr
|
and type expr = AST.expr
|
||||||
and type token = Lexer.token)
|
and type token = Lexer.token)
|
||||||
(ParErr: sig val message : int -> string end)
|
(ParErr: sig val message : int -> string end)
|
||||||
(ParserLog: Pretty with type ast = AST.t
|
(ParserLog: Printer with type ast = AST.t
|
||||||
and type expr = AST.expr)
|
and type expr = AST.expr)
|
||||||
(SubIO: SubIO) =
|
(SubIO: SubIO) =
|
||||||
struct
|
struct
|
||||||
@ -90,12 +89,12 @@ module Make (Lexer: Lexer.S)
|
|||||||
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
||||||
~mode:SubIO.options#mode
|
~mode:SubIO.options#mode
|
||||||
~buffer:output in
|
~buffer:output in
|
||||||
let close () = lexer_inst.Lexer.close () in
|
let close () = lexer_inst.LexerLib.close () in
|
||||||
let expr =
|
let expr =
|
||||||
try
|
try
|
||||||
if SubIO.options#mono then
|
if SubIO.options#mono then
|
||||||
let tokeniser = lexer_inst.Lexer.read ~log
|
let tokeniser = lexer_inst.LexerLib.read ~log
|
||||||
and lexbuf = lexer_inst.Lexer.buffer
|
and lexbuf = lexer_inst.LexerLib.buffer
|
||||||
in Front.mono_expr tokeniser lexbuf
|
in Front.mono_expr tokeniser lexbuf
|
||||||
else
|
else
|
||||||
Front.incr_expr lexer_inst
|
Front.incr_expr lexer_inst
|
||||||
@ -125,12 +124,12 @@ module Make (Lexer: Lexer.S)
|
|||||||
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
||||||
~mode:SubIO.options#mode
|
~mode:SubIO.options#mode
|
||||||
~buffer:output in
|
~buffer:output in
|
||||||
let close () = lexer_inst.Lexer.close () in
|
let close () = lexer_inst.LexerLib.close () in
|
||||||
let ast =
|
let ast =
|
||||||
try
|
try
|
||||||
if SubIO.options#mono then
|
if SubIO.options#mono then
|
||||||
let tokeniser = lexer_inst.Lexer.read ~log
|
let tokeniser = lexer_inst.LexerLib.read ~log
|
||||||
and lexbuf = lexer_inst.Lexer.buffer
|
and lexbuf = lexer_inst.LexerLib.buffer
|
||||||
in Front.mono_contract tokeniser lexbuf
|
in Front.mono_contract tokeniser lexbuf
|
||||||
else
|
else
|
||||||
Front.incr_contract lexer_inst
|
Front.incr_contract lexer_inst
|
||||||
@ -164,9 +163,17 @@ module Make (Lexer: Lexer.S)
|
|||||||
|
|
||||||
| exception Lexer.Error err ->
|
| exception Lexer.Error err ->
|
||||||
let file =
|
let file =
|
||||||
lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||||
let error =
|
let error = Lexer.format_error
|
||||||
Lexer.format_error ~offsets:SubIO.options#offsets
|
~offsets:SubIO.options#offsets
|
||||||
|
SubIO.options#mode err ~file:(file <> "")
|
||||||
|
in Stdlib.Error error
|
||||||
|
|
||||||
|
| exception Lexer.Token.Error err ->
|
||||||
|
let file =
|
||||||
|
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||||
|
let error = Lexer.Token.format_error
|
||||||
|
~offsets:SubIO.options#offsets
|
||||||
SubIO.options#mode err ~file:(file <> "")
|
SubIO.options#mode err ~file:(file <> "")
|
||||||
in Stdlib.Error error
|
in Stdlib.Error error
|
||||||
|
|
||||||
@ -182,11 +189,11 @@ module Make (Lexer: Lexer.S)
|
|||||||
|
|
||||||
| exception Parser.Error ->
|
| exception Parser.Error ->
|
||||||
let invalid, valid_opt =
|
let invalid, valid_opt =
|
||||||
match lexer_inst.Lexer.get_win () with
|
match lexer_inst.LexerLib.get_win () with
|
||||||
Lexer.Nil ->
|
LexerLib.Nil ->
|
||||||
assert false (* Safe: There is always at least EOF. *)
|
assert false (* Safe: There is always at least EOF. *)
|
||||||
| Lexer.One invalid -> invalid, None
|
| LexerLib.One invalid -> invalid, None
|
||||||
| Lexer.Two (invalid, valid) -> invalid, Some valid in
|
| LexerLib.Two (invalid, valid) -> invalid, Some valid in
|
||||||
let point = "", valid_opt, invalid in
|
let point = "", valid_opt, invalid in
|
||||||
let error =
|
let error =
|
||||||
Front.format_error ~offsets:SubIO.options#offsets
|
Front.format_error ~offsets:SubIO.options#offsets
|
||||||
@ -206,8 +213,8 @@ module Make (Lexer: Lexer.S)
|
|||||||
(* Parsing a contract *)
|
(* Parsing a contract *)
|
||||||
|
|
||||||
let gen_parser options input parser =
|
let gen_parser options input parser =
|
||||||
match Lexer.lexbuf_from_input input with
|
match LexerLib.lexbuf_from_input input with
|
||||||
Stdlib.Error (Lexer.File_opening msg) ->
|
Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
Stdlib.Error (Region.wrap_ghost msg)
|
Stdlib.Error (Region.wrap_ghost msg)
|
||||||
| Ok (lexbuf, close) ->
|
| Ok (lexbuf, close) ->
|
||||||
(* Preprocessing the input source *)
|
(* Preprocessing the input source *)
|
||||||
@ -225,45 +232,52 @@ module Make (Lexer: Lexer.S)
|
|||||||
(* Lexing and parsing the preprocessed input source *)
|
(* Lexing and parsing the preprocessed input source *)
|
||||||
|
|
||||||
let () = close () in
|
let () = close () in
|
||||||
let input' = Lexer.String (Buffer.contents buffer) in
|
let input' = LexerLib.String (Buffer.contents buffer) in
|
||||||
match Lexer.open_token_stream options#lang input' with
|
match LexerLib.open_token_stream
|
||||||
|
~scan:Lexer.scan
|
||||||
|
~token_to_region:Lexer.Token.to_region
|
||||||
|
~style:Lexer.Token.check_right_context
|
||||||
|
?line:options#line
|
||||||
|
?block:options#block
|
||||||
|
input'
|
||||||
|
with
|
||||||
Ok instance ->
|
Ok instance ->
|
||||||
let open Lexing in
|
let open Lexing in
|
||||||
instance.Lexer.buffer.lex_curr_p <-
|
instance.LexerLib.buffer.lex_curr_p <-
|
||||||
{instance.Lexer.buffer.lex_curr_p with pos_fname = file};
|
{instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
|
||||||
apply instance parser
|
apply instance parser
|
||||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||||
Stdlib.Error (Region.wrap_ghost msg)
|
Stdlib.Error (Region.wrap_ghost msg)
|
||||||
|
|
||||||
(* Parsing a contract in a file *)
|
(* Parsing a contract in a file *)
|
||||||
|
|
||||||
let contract_in_file (source : string) =
|
let contract_in_file (source : string) =
|
||||||
let options = SubIO.make ~input:(Some source) ~expr:false
|
let options = SubIO.make ~input:(Some source) ~expr:false
|
||||||
in gen_parser options (Lexer.File source) parse_contract
|
in gen_parser options (LexerLib.File source) parse_contract
|
||||||
|
|
||||||
(* Parsing a contract in a string *)
|
(* Parsing a contract in a string *)
|
||||||
|
|
||||||
let contract_in_string (source : string) =
|
let contract_in_string (source : string) =
|
||||||
let options = SubIO.make ~input:None ~expr:false in
|
let options = SubIO.make ~input:None ~expr:false in
|
||||||
gen_parser options (Lexer.String source) parse_contract
|
gen_parser options (LexerLib.String source) parse_contract
|
||||||
|
|
||||||
(* Parsing a contract in stdin *)
|
(* Parsing a contract in stdin *)
|
||||||
|
|
||||||
let contract_in_stdin () =
|
let contract_in_stdin () =
|
||||||
let options = SubIO.make ~input:None ~expr:false in
|
let options = SubIO.make ~input:None ~expr:false in
|
||||||
gen_parser options (Lexer.Channel stdin) parse_contract
|
gen_parser options (LexerLib.Channel stdin) parse_contract
|
||||||
|
|
||||||
(* Parsing an expression in a string *)
|
(* Parsing an expression in a string *)
|
||||||
|
|
||||||
let expr_in_string (source : string) =
|
let expr_in_string (source : string) =
|
||||||
let options = SubIO.make ~input:None ~expr:true in
|
let options = SubIO.make ~input:None ~expr:true in
|
||||||
gen_parser options (Lexer.String source) parse_expr
|
gen_parser options (LexerLib.String source) parse_expr
|
||||||
|
|
||||||
(* Parsing an expression in stdin *)
|
(* Parsing an expression in stdin *)
|
||||||
|
|
||||||
let expr_in_stdin () =
|
let expr_in_stdin () =
|
||||||
let options = SubIO.make ~input:None ~expr:true in
|
let options = SubIO.make ~input:None ~expr:true in
|
||||||
gen_parser options (Lexer.Channel stdin) parse_expr
|
gen_parser options (LexerLib.Channel stdin) parse_expr
|
||||||
|
|
||||||
(* Preprocess only *)
|
(* Preprocess only *)
|
||||||
|
|
||||||
|
@ -2,18 +2,19 @@
|
|||||||
|
|
||||||
module Region = Simple_utils.Region
|
module Region = Simple_utils.Region
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||||
|
|
||||||
|
(* A subtype of [EvalOpt.options] *)
|
||||||
|
|
||||||
module type SubIO =
|
module type SubIO =
|
||||||
sig
|
sig
|
||||||
type options = <
|
type options = <
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : EvalOpt.block_comment option;
|
||||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
line : EvalOpt.line_comment option;
|
||||||
|
ext : string;
|
||||||
mode : [`Byte | `Point];
|
mode : [`Byte | `Point];
|
||||||
cmd : EvalOpt.command;
|
cmd : EvalOpt.command;
|
||||||
mono : bool
|
mono : bool
|
||||||
@ -23,7 +24,9 @@ module type SubIO =
|
|||||||
val make : input:string option -> expr:bool -> EvalOpt.options
|
val make : input:string option -> expr:bool -> EvalOpt.options
|
||||||
end
|
end
|
||||||
|
|
||||||
module type Pretty =
|
(* Signature for the printers *)
|
||||||
|
|
||||||
|
module type Printer =
|
||||||
sig
|
sig
|
||||||
type state
|
type state
|
||||||
type ast
|
type ast
|
||||||
@ -38,6 +41,8 @@ module type Pretty =
|
|||||||
val print_expr : state -> expr -> unit
|
val print_expr : state -> expr -> unit
|
||||||
end
|
end
|
||||||
|
|
||||||
|
(* Main functor to make the parser *)
|
||||||
|
|
||||||
module Make (Lexer : Lexer.S)
|
module Make (Lexer : Lexer.S)
|
||||||
(AST : sig type t type expr end)
|
(AST : sig type t type expr end)
|
||||||
(Parser : ParserAPI.PARSER
|
(Parser : ParserAPI.PARSER
|
||||||
@ -45,7 +50,7 @@ module Make (Lexer : Lexer.S)
|
|||||||
and type expr = AST.expr
|
and type expr = AST.expr
|
||||||
and type token = Lexer.token)
|
and type token = Lexer.token)
|
||||||
(ParErr : sig val message : int -> string end)
|
(ParErr : sig val message : int -> string end)
|
||||||
(ParserLog : Pretty with type ast = AST.t
|
(ParserLog : Printer with type ast = AST.t
|
||||||
and type expr = AST.expr)
|
and type expr = AST.expr)
|
||||||
(SubIO: SubIO) :
|
(SubIO: SubIO) :
|
||||||
sig
|
sig
|
||||||
@ -82,4 +87,4 @@ module Make (Lexer : Lexer.S)
|
|||||||
|
|
||||||
val preprocess :
|
val preprocess :
|
||||||
string -> (Buffer.t, message Region.reg) Stdlib.result
|
string -> (Buffer.t, message Region.reg) Stdlib.result
|
||||||
end
|
end
|
||||||
|
@ -13,6 +13,7 @@
|
|||||||
(preprocess
|
(preprocess
|
||||||
(pps bisect_ppx --conditional))
|
(pps bisect_ppx --conditional))
|
||||||
(modules
|
(modules
|
||||||
|
LexerLib
|
||||||
LexerUnit
|
LexerUnit
|
||||||
ParserUnit
|
ParserUnit
|
||||||
ParserAPI
|
ParserAPI
|
||||||
|
@ -1 +1 @@
|
|||||||
const a: string = -0x222;
|
const a: string = - (**) 0x2222
|
||||||
|
@ -1 +1 @@
|
|||||||
let a = -0x222;
|
let a = - (**) 0x2222
|
||||||
|
@ -1 +1 @@
|
|||||||
let a = -0x222;
|
let a = - /**/ 0x2222;
|
||||||
|
@ -1 +0,0 @@
|
|||||||
let arguments = 1;
|
|
2
vendors/Preprocessor/E_LexerMain.ml
vendored
2
vendors/Preprocessor/E_LexerMain.ml
vendored
@ -5,7 +5,7 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
||||||
|
|
||||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
let options = EvalOpt.read ".ligo" (* No comments allowed *)
|
||||||
|
|
||||||
let lex in_chan =
|
let lex in_chan =
|
||||||
let buffer = Lexing.from_channel in_chan in
|
let buffer = Lexing.from_channel in_chan in
|
||||||
|
2
vendors/Preprocessor/E_ParserMain.ml
vendored
2
vendors/Preprocessor/E_ParserMain.ml
vendored
@ -5,7 +5,7 @@ module Region = Simple_utils.Region
|
|||||||
|
|
||||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
||||||
|
|
||||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
let options = EvalOpt.read ".ligo" (* No comments allowed *)
|
||||||
|
|
||||||
let parse in_chan =
|
let parse in_chan =
|
||||||
let buffer = Lexing.from_channel in_chan in
|
let buffer = Lexing.from_channel in_chan in
|
||||||
|
49
vendors/Preprocessor/EvalOpt.ml
vendored
49
vendors/Preprocessor/EvalOpt.ml
vendored
@ -2,29 +2,33 @@
|
|||||||
|
|
||||||
(* The type [options] gathers the command-line options. *)
|
(* The type [options] gathers the command-line options. *)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
let lang_to_string = function
|
|
||||||
`PascaLIGO -> "PascaLIGO"
|
|
||||||
| `CameLIGO -> "CameLIGO"
|
|
||||||
| `ReasonLIGO -> "ReasonLIGO"
|
|
||||||
|
|
||||||
module SSet = Set.Make (String)
|
module SSet = Set.Make (String)
|
||||||
|
|
||||||
|
type line_comment = string (* Opening of a line comment *)
|
||||||
|
type block_comment = <opening : string; closing : string>
|
||||||
|
|
||||||
|
let mk_block ~opening ~closing : block_comment =
|
||||||
|
object
|
||||||
|
method opening = opening
|
||||||
|
method closing = closing
|
||||||
|
end
|
||||||
|
|
||||||
type options = <
|
type options = <
|
||||||
input : string option;
|
input : string option;
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : block_comment option;
|
||||||
ext : string (* ".ligo", ".mligo", ".religo" *)
|
line : line_comment option;
|
||||||
|
ext : string
|
||||||
>
|
>
|
||||||
|
|
||||||
let make ~input ~libs ~lang ~offsets ~verbose ~ext : options =
|
let make ~input ~libs ?block ?line ~offsets ~verbose ~ext : options =
|
||||||
object
|
object
|
||||||
method input = input
|
method input = input
|
||||||
method libs = libs
|
method libs = libs
|
||||||
method lang = lang
|
method block = block
|
||||||
|
method line = line
|
||||||
method offsets = offsets
|
method offsets = offsets
|
||||||
method verbose = verbose
|
method verbose = verbose
|
||||||
method ext = ext
|
method ext = ext
|
||||||
@ -47,10 +51,10 @@ let abort msg =
|
|||||||
|
|
||||||
(* Help *)
|
(* Help *)
|
||||||
|
|
||||||
let help lang ext () =
|
let help ext () =
|
||||||
let file = Filename.basename Sys.argv.(0) in
|
let file = Filename.basename Sys.argv.(0) in
|
||||||
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file ext;
|
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file ext;
|
||||||
printf "where <input>%s is the %s source file (default: stdin),\n" ext lang;
|
printf "where <input>%s is the LIGO source file (default: stdin),\n" ext;
|
||||||
print "and each <option> (if any) is one of the following:";
|
print "and each <option> (if any) is one of the following:";
|
||||||
print " -I <paths> Inclusion paths (colon-separated)";
|
print " -I <paths> Inclusion paths (colon-separated)";
|
||||||
print " --columns Columns for source locations";
|
print " --columns Columns for source locations";
|
||||||
@ -74,11 +78,10 @@ let add_verbose d =
|
|||||||
verbose := List.fold_left (fun x y -> SSet.add y x)
|
verbose := List.fold_left (fun x y -> SSet.add y x)
|
||||||
!verbose
|
!verbose
|
||||||
(split_at_colon d)
|
(split_at_colon d)
|
||||||
let specs lang ext =
|
let specs ext =
|
||||||
let lang_str = lang_to_string lang in
|
let open! Getopt in [
|
||||||
let open!Getopt in [
|
|
||||||
'I', nolong, None, Some add_path;
|
'I', nolong, None, Some add_path;
|
||||||
'h', "help", Some (help lang_str ext), None;
|
'h', "help", Some (help ext), None;
|
||||||
noshort, "columns", set columns true, None;
|
noshort, "columns", set columns true, None;
|
||||||
noshort, "verbose", None, Some add_verbose
|
noshort, "verbose", None, Some add_verbose
|
||||||
]
|
]
|
||||||
@ -92,7 +95,7 @@ let anonymous arg =
|
|||||||
|
|
||||||
(* Checking options and exporting them as non-mutable values *)
|
(* Checking options and exporting them as non-mutable values *)
|
||||||
|
|
||||||
let check lang ext =
|
let check ?block ?line ~ext =
|
||||||
let libs = !libs
|
let libs = !libs
|
||||||
|
|
||||||
and offsets = not !columns
|
and offsets = not !columns
|
||||||
@ -109,16 +112,18 @@ let check lang ext =
|
|||||||
else abort "Source file not found."
|
else abort "Source file not found."
|
||||||
else abort ("Source file lacks the extension " ^ ext ^ ".")
|
else abort ("Source file lacks the extension " ^ ext ^ ".")
|
||||||
|
|
||||||
in make ~input ~libs ~lang ~offsets ~verbose ~ext
|
in make ~input ~libs ?block ?line ~offsets ~verbose ~ext
|
||||||
|
|
||||||
(* Parsing the command-line options *)
|
(* Parsing the command-line options *)
|
||||||
|
|
||||||
let read ~lang:(lang : language) ~ext:(ext : string) =
|
type extension = string
|
||||||
|
|
||||||
|
let read ?block ?line (ext: extension) =
|
||||||
try
|
try
|
||||||
Getopt.parse_cmdline (specs lang ext) anonymous;
|
Getopt.parse_cmdline (specs ext) anonymous;
|
||||||
(verb_str :=
|
(verb_str :=
|
||||||
let apply e a =
|
let apply e a =
|
||||||
if a = "" then e else sprintf "%s, %s" e a
|
if a = "" then e else sprintf "%s, %s" e a
|
||||||
in SSet.fold apply !verbose "");
|
in SSet.fold apply !verbose "");
|
||||||
check lang ext
|
check ?block ?line ~ext
|
||||||
with Getopt.Error msg -> abort msg
|
with Getopt.Error msg -> abort msg
|
||||||
|
22
vendors/Preprocessor/EvalOpt.mli
vendored
22
vendors/Preprocessor/EvalOpt.mli
vendored
@ -2,25 +2,28 @@
|
|||||||
|
|
||||||
(* The type [options] gathers the command-line options. *)
|
(* The type [options] gathers the command-line options. *)
|
||||||
|
|
||||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
|
||||||
|
|
||||||
val lang_to_string : language -> string
|
|
||||||
|
|
||||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||||
|
|
||||||
|
type line_comment = string (* Opening of a line comment *)
|
||||||
|
type block_comment = <opening : string; closing : string>
|
||||||
|
|
||||||
|
val mk_block : opening:string -> closing:string -> block_comment
|
||||||
|
|
||||||
type options = <
|
type options = <
|
||||||
input : string option;
|
input : string option;
|
||||||
libs : string list;
|
libs : string list;
|
||||||
verbose : SSet.t;
|
verbose : SSet.t;
|
||||||
offsets : bool;
|
offsets : bool;
|
||||||
lang : language;
|
block : block_comment option;
|
||||||
ext : string (* ".ligo", ".mligo", ".religo" *)
|
line : line_comment option;
|
||||||
|
ext : string
|
||||||
>
|
>
|
||||||
|
|
||||||
val make :
|
val make :
|
||||||
input:string option ->
|
input:string option ->
|
||||||
libs:string list ->
|
libs:string list ->
|
||||||
lang:language ->
|
?block:block_comment ->
|
||||||
|
?line:line_comment ->
|
||||||
offsets:bool ->
|
offsets:bool ->
|
||||||
verbose:SSet.t ->
|
verbose:SSet.t ->
|
||||||
ext:string ->
|
ext:string ->
|
||||||
@ -30,4 +33,7 @@ val make :
|
|||||||
the name of the concrete syntax. This is needed to correctly handle
|
the name of the concrete syntax. This is needed to correctly handle
|
||||||
comments. *)
|
comments. *)
|
||||||
|
|
||||||
val read : lang:language -> ext:string -> options
|
type extension = string
|
||||||
|
|
||||||
|
val read :
|
||||||
|
?block:block_comment -> ?line:line_comment -> extension -> options
|
||||||
|
4
vendors/Preprocessor/Preproc.mli
vendored
4
vendors/Preprocessor/Preproc.mli
vendored
@ -15,7 +15,7 @@ type error =
|
|||||||
| No_line_indicator
|
| No_line_indicator
|
||||||
| End_line_indicator
|
| End_line_indicator
|
||||||
| Newline_in_string (* For #include argument only *)
|
| Newline_in_string (* For #include argument only *)
|
||||||
| Open_string (* For #include argument only *)
|
| Unterminated_string (* For #include argument only *)
|
||||||
| Dangling_endif
|
| Dangling_endif
|
||||||
| Open_region_in_conditional
|
| Open_region_in_conditional
|
||||||
| Dangling_endregion
|
| Dangling_endregion
|
||||||
@ -29,10 +29,10 @@ type error =
|
|||||||
| Multiply_defined_symbol of string
|
| Multiply_defined_symbol of string
|
||||||
| Error_directive of string
|
| Error_directive of string
|
||||||
| Parse_error
|
| Parse_error
|
||||||
| No_line_comment_or_blank
|
|
||||||
| Invalid_symbol
|
| Invalid_symbol
|
||||||
| File_not_found of string
|
| File_not_found of string
|
||||||
| Invalid_character of char
|
| Invalid_character of char
|
||||||
|
| Unterminated_comment of string
|
||||||
|
|
||||||
val format :
|
val format :
|
||||||
?offsets:bool -> error Region.reg -> file:bool -> string Region.reg
|
?offsets:bool -> error Region.reg -> file:bool -> string Region.reg
|
||||||
|
324
vendors/Preprocessor/Preproc.mll
vendored
324
vendors/Preprocessor/Preproc.mll
vendored
@ -44,19 +44,6 @@ type mode = Copy | Skip
|
|||||||
type cond = If of mode | Elif of mode | Else | Region
|
type cond = If of mode | Elif of mode | Else | Region
|
||||||
type trace = cond list
|
type trace = cond list
|
||||||
|
|
||||||
(* Line offsets
|
|
||||||
|
|
||||||
The value [Inline] of type [offset] means that the current location
|
|
||||||
cannot be reached from the start of the line with only white
|
|
||||||
space. The same holds for the special value [Prefix 0]. Values of
|
|
||||||
the form [Prefix n] mean that the current location can be reached
|
|
||||||
from the start of the line with [n] white spaces (padding). These
|
|
||||||
distinctions are needed because preprocessor directives cannot
|
|
||||||
occur inside lines.
|
|
||||||
*)
|
|
||||||
|
|
||||||
type offset = Prefix of int | Inline
|
|
||||||
|
|
||||||
(* Environments *)
|
(* Environments *)
|
||||||
|
|
||||||
module Env = Set.Make (String)
|
module Env = Set.Make (String)
|
||||||
@ -78,8 +65,6 @@ in function
|
|||||||
* the field [env] records the symbols defined;
|
* the field [env] records the symbols defined;
|
||||||
* the field [mode] informs whether the preprocessor is in copying or
|
* the field [mode] informs whether the preprocessor is in copying or
|
||||||
skipping mode;
|
skipping mode;
|
||||||
* the field [offset] tells whether the current location can be
|
|
||||||
reached from the start of the line with only white space;
|
|
||||||
* the field [trace] is a stack of previous, still active conditional
|
* the field [trace] is a stack of previous, still active conditional
|
||||||
directives;
|
directives;
|
||||||
* the field [out] keeps the output buffer;
|
* the field [out] keeps the output buffer;
|
||||||
@ -92,7 +77,6 @@ in function
|
|||||||
type state = {
|
type state = {
|
||||||
env : Env.t;
|
env : Env.t;
|
||||||
mode : mode;
|
mode : mode;
|
||||||
offset : offset;
|
|
||||||
trace : trace;
|
trace : trace;
|
||||||
out : Buffer.t;
|
out : Buffer.t;
|
||||||
incl : in_channel list;
|
incl : in_channel list;
|
||||||
@ -117,7 +101,7 @@ type error =
|
|||||||
| No_line_indicator
|
| No_line_indicator
|
||||||
| End_line_indicator
|
| End_line_indicator
|
||||||
| Newline_in_string
|
| Newline_in_string
|
||||||
| Open_string
|
| Unterminated_string
|
||||||
| Dangling_endif
|
| Dangling_endif
|
||||||
| Open_region_in_conditional
|
| Open_region_in_conditional
|
||||||
| Dangling_endregion
|
| Dangling_endregion
|
||||||
@ -131,10 +115,10 @@ type error =
|
|||||||
| Multiply_defined_symbol of string
|
| Multiply_defined_symbol of string
|
||||||
| Error_directive of string
|
| Error_directive of string
|
||||||
| Parse_error
|
| Parse_error
|
||||||
| No_line_comment_or_blank
|
|
||||||
| Invalid_symbol
|
| Invalid_symbol
|
||||||
| File_not_found of string
|
| File_not_found of string
|
||||||
| Invalid_character of char
|
| Invalid_character of char
|
||||||
|
| Unterminated_comment of string
|
||||||
|
|
||||||
let error_to_string = function
|
let error_to_string = function
|
||||||
Directive_inside_line ->
|
Directive_inside_line ->
|
||||||
@ -151,7 +135,7 @@ let error_to_string = function
|
|||||||
Hint: Try a string, end of line, or a line comment."
|
Hint: Try a string, end of line, or a line comment."
|
||||||
| Newline_in_string ->
|
| Newline_in_string ->
|
||||||
sprintf "Invalid newline character in string."
|
sprintf "Invalid newline character in string."
|
||||||
| Open_string ->
|
| Unterminated_string ->
|
||||||
sprintf "Unterminated string.\n\
|
sprintf "Unterminated string.\n\
|
||||||
Hint: Close with double quotes."
|
Hint: Close with double quotes."
|
||||||
| Dangling_endif ->
|
| Dangling_endif ->
|
||||||
@ -187,14 +171,15 @@ let error_to_string = function
|
|||||||
msg
|
msg
|
||||||
| Parse_error ->
|
| Parse_error ->
|
||||||
"Parse error in expression."
|
"Parse error in expression."
|
||||||
| No_line_comment_or_blank ->
|
|
||||||
"Line comment or whitespace expected."
|
|
||||||
| Invalid_symbol ->
|
| Invalid_symbol ->
|
||||||
"Expected a symbol (identifier)."
|
"Expected a symbol (identifier)."
|
||||||
| File_not_found name ->
|
| File_not_found name ->
|
||||||
sprintf "File \"%s\" to include not found." name
|
sprintf "File \"%s\" to include not found." name
|
||||||
| Invalid_character c ->
|
| Invalid_character c ->
|
||||||
E_Lexer.error_to_string (E_Lexer.Invalid_character c)
|
E_Lexer.error_to_string (E_Lexer.Invalid_character c)
|
||||||
|
| Unterminated_comment ending ->
|
||||||
|
sprintf "Unterminated comment.\n\
|
||||||
|
Hint: Close with \"%s\"." ending
|
||||||
|
|
||||||
let format ?(offsets=true) Region.{region; value} ~file =
|
let format ?(offsets=true) Region.{region; value} ~file =
|
||||||
let msg = error_to_string value
|
let msg = error_to_string value
|
||||||
@ -224,7 +209,7 @@ let fail error state buffer = stop error state (mk_reg buffer)
|
|||||||
let reduce_cond state region =
|
let reduce_cond state region =
|
||||||
let rec reduce = function
|
let rec reduce = function
|
||||||
[] -> stop Dangling_endif state region
|
[] -> stop Dangling_endif state region
|
||||||
| If mode::trace -> {state with mode; trace; offset = Prefix 0}
|
| If mode::trace -> {state with mode; trace}
|
||||||
| Region::_ -> stop Open_region_in_conditional state region
|
| Region::_ -> stop Open_region_in_conditional state region
|
||||||
| _::trace -> reduce trace
|
| _::trace -> reduce trace
|
||||||
in reduce state.trace
|
in reduce state.trace
|
||||||
@ -235,7 +220,7 @@ let reduce_cond state region =
|
|||||||
let reduce_region state region =
|
let reduce_region state region =
|
||||||
match state.trace with
|
match state.trace with
|
||||||
[] -> stop Dangling_endregion state region
|
[] -> stop Dangling_endregion state region
|
||||||
| Region::trace -> {state with trace; offset = Prefix 0}
|
| Region::trace -> {state with trace}
|
||||||
| _ -> stop Conditional_in_region state region
|
| _ -> stop Conditional_in_region state region
|
||||||
|
|
||||||
(* The function [extend] is called when encountering conditional
|
(* The function [extend] is called when encountering conditional
|
||||||
@ -286,7 +271,7 @@ let find dir file libs =
|
|||||||
|
|
||||||
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
|
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
|
||||||
|
|
||||||
(* End of lines *)
|
(* End of lines are always copied *)
|
||||||
|
|
||||||
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
||||||
|
|
||||||
@ -294,13 +279,6 @@ let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
|||||||
|
|
||||||
let print state string = Buffer.add_string state.out string
|
let print state string = Buffer.add_string state.out string
|
||||||
|
|
||||||
(* Expanding the offset into whitespace *)
|
|
||||||
|
|
||||||
let expand_offset state =
|
|
||||||
match state.offset with
|
|
||||||
Prefix 0 | Inline -> ()
|
|
||||||
| Prefix n -> print state (String.make n ' ')
|
|
||||||
|
|
||||||
(* Evaluating a preprocessor expression
|
(* Evaluating a preprocessor expression
|
||||||
|
|
||||||
The evaluation of conditional directives may involve symbols whose
|
The evaluation of conditional directives may involve symbols whose
|
||||||
@ -346,6 +324,35 @@ let letter = small | capital
|
|||||||
let ident = letter (letter | '_' | digit)*
|
let ident = letter (letter | '_' | digit)*
|
||||||
let directive = '#' (blank* as space) (small+ as id)
|
let directive = '#' (blank* as space) (small+ as id)
|
||||||
|
|
||||||
|
(* Comments *)
|
||||||
|
|
||||||
|
let pascaligo_block_comment_opening = "(*"
|
||||||
|
let pascaligo_block_comment_closing = "*)"
|
||||||
|
let pascaligo_line_comment = "//"
|
||||||
|
|
||||||
|
let cameligo_block_comment_opening = "(*"
|
||||||
|
let cameligo_block_comment_closing = "*)"
|
||||||
|
let cameligo_line_comment = "//"
|
||||||
|
|
||||||
|
let reasonligo_block_comment_opening = "/*"
|
||||||
|
let reasonligo_block_comment_closing = "*/"
|
||||||
|
let reasonligo_line_comment = "//"
|
||||||
|
|
||||||
|
let block_comment_openings =
|
||||||
|
pascaligo_block_comment_opening
|
||||||
|
| cameligo_block_comment_opening
|
||||||
|
| reasonligo_block_comment_opening
|
||||||
|
|
||||||
|
let block_comment_closings =
|
||||||
|
pascaligo_block_comment_closing
|
||||||
|
| cameligo_block_comment_closing
|
||||||
|
| reasonligo_block_comment_closing
|
||||||
|
|
||||||
|
let line_comments =
|
||||||
|
pascaligo_line_comment
|
||||||
|
| cameligo_line_comment
|
||||||
|
| reasonligo_line_comment
|
||||||
|
|
||||||
(* Rules *)
|
(* Rules *)
|
||||||
|
|
||||||
(* The rule [scan] scans the input buffer for directives, strings,
|
(* The rule [scan] scans the input buffer for directives, strings,
|
||||||
@ -354,19 +361,17 @@ let directive = '#' (blank* as space) (small+ as id)
|
|||||||
depending on the compilation directives. If not copied, new line
|
depending on the compilation directives. If not copied, new line
|
||||||
characters are output.
|
characters are output.
|
||||||
|
|
||||||
Scanning is triggered by the function call [scan env mode offset
|
Scanning is triggered by the function call [scan env mode trace
|
||||||
trace lexbuf], where [env] is the set of defined symbols
|
lexbuf], where [env] is the set of defined symbols (introduced by
|
||||||
(introduced by `#define'), [mode] specifies whether we are copying
|
`#define'), [mode] specifies whether we are copying or skipping the
|
||||||
or skipping the input, [offset] informs about the location in the
|
input, and [trace] is the stack of conditional directives read so
|
||||||
line (either there is a prefix of blanks, or at least a non-blank
|
far.
|
||||||
character has been read), and [trace] is the stack of conditional
|
|
||||||
directives read so far.
|
|
||||||
|
|
||||||
The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix
|
The first call is [scan {env=Env.empty; mode=Copy; trace=[];
|
||||||
0; trace=[]; incl=[]; opt}], meaning that we start with an empty
|
incl=[]; opt}], meaning that we start with an empty environment,
|
||||||
environment, that copying the input is enabled by default, and that
|
that copying the input is enabled by default, and that we are at
|
||||||
we are at the start of a line and no previous conditional
|
the start of a line and no previous conditional directives have
|
||||||
directives have been read yet. The field [opt] is the CLI options.
|
been read yet. The field [opt] is the CLI options.
|
||||||
|
|
||||||
When an "#if" is matched, the trace is extended by the call [extend
|
When an "#if" is matched, the trace is extended by the call [extend
|
||||||
lexbuf (If mode) trace], during the evaluation of which the
|
lexbuf (If mode) trace], during the evaluation of which the
|
||||||
@ -386,12 +391,11 @@ let directive = '#' (blank* as space) (small+ as id)
|
|||||||
value of the conditional expression must be ignored (but not its
|
value of the conditional expression must be ignored (but not its
|
||||||
syntax), and we continue skipping the input.
|
syntax), and we continue skipping the input.
|
||||||
|
|
||||||
When an "#else" is matched, the trace is extended with [Else],
|
When an "#else" is matched, the trace is extended with [Else], then
|
||||||
then, if the directive is not at a wrong offset, the rest of the
|
the rest of the line is scanned with [skip_line]. If we were in
|
||||||
line is scanned with [skip_line]. If we were in copy mode, the new
|
copy mode, the new mode toggles to skipping mode; otherwise, the
|
||||||
mode toggles to skipping mode; otherwise, the trace is searched for
|
trace is searched for the last encountered "#if" of "#elif" and the
|
||||||
the last encountered "#if" of "#elif" and the associated mode is
|
associated mode is restored.
|
||||||
restored.
|
|
||||||
|
|
||||||
The case "#elif" is the result of the fusion (in the technical
|
The case "#elif" is the result of the fusion (in the technical
|
||||||
sense) of the code for dealing with an "#else" followed by an
|
sense) of the code for dealing with an "#else" followed by an
|
||||||
@ -465,28 +469,23 @@ let directive = '#' (blank* as space) (small+ as id)
|
|||||||
Important note: Comments and strings are recognised as such only in
|
Important note: Comments and strings are recognised as such only in
|
||||||
copy mode, which is a different behaviour from the preprocessor of
|
copy mode, which is a different behaviour from the preprocessor of
|
||||||
GNU GCC, which always does.
|
GNU GCC, which always does.
|
||||||
*)
|
*)
|
||||||
|
|
||||||
rule scan state = parse
|
rule scan state = parse
|
||||||
nl { expand_offset state; proc_nl state lexbuf;
|
nl { proc_nl state lexbuf; scan state lexbuf }
|
||||||
scan {state with offset = Prefix 0} lexbuf }
|
| blank { if state.mode = Copy then copy state lexbuf;
|
||||||
| blank { match state.offset with
|
|
||||||
Prefix n ->
|
|
||||||
scan {state with offset = Prefix (n+1)} lexbuf
|
|
||||||
| Inline ->
|
|
||||||
if state.mode = Copy then copy state lexbuf;
|
|
||||||
scan state lexbuf }
|
scan state lexbuf }
|
||||||
| directive {
|
| directive {
|
||||||
|
let region = mk_reg lexbuf in
|
||||||
if not (List.mem id directives)
|
if not (List.mem id directives)
|
||||||
then begin
|
then begin
|
||||||
if state.mode = Copy then copy state lexbuf;
|
if state.mode = Copy then copy state lexbuf;
|
||||||
scan state lexbuf
|
scan state lexbuf
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
if state.offset = Inline
|
if region#start#offset `Byte > 0
|
||||||
then fail Directive_inside_line state lexbuf
|
then fail Directive_inside_line state lexbuf
|
||||||
else
|
else
|
||||||
let region = mk_reg lexbuf in
|
|
||||||
match id with
|
match id with
|
||||||
"include" ->
|
"include" ->
|
||||||
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
|
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
|
||||||
@ -517,7 +516,7 @@ rule scan state = parse
|
|||||||
let mode = expr state lexbuf in
|
let mode = expr state lexbuf in
|
||||||
let mode = if state.mode = Copy then mode else Skip in
|
let mode = if state.mode = Copy then mode else Skip in
|
||||||
let trace = extend (If state.mode) state region in
|
let trace = extend (If state.mode) state region in
|
||||||
let state = {state with mode; offset = Prefix 0; trace}
|
let state = {state with mode; trace}
|
||||||
in scan state lexbuf
|
in scan state lexbuf
|
||||||
| "else" ->
|
| "else" ->
|
||||||
let () = skip_line state lexbuf in
|
let () = skip_line state lexbuf in
|
||||||
@ -525,7 +524,7 @@ rule scan state = parse
|
|||||||
Copy -> Skip
|
Copy -> Skip
|
||||||
| Skip -> last_mode state.trace in
|
| Skip -> last_mode state.trace in
|
||||||
let trace = extend Else state region
|
let trace = extend Else state region
|
||||||
in scan {state with mode; offset = Prefix 0; trace} lexbuf
|
in scan {state with mode; trace} lexbuf
|
||||||
| "elif" ->
|
| "elif" ->
|
||||||
let mode = expr state lexbuf in
|
let mode = expr state lexbuf in
|
||||||
let trace, mode =
|
let trace, mode =
|
||||||
@ -534,7 +533,7 @@ rule scan state = parse
|
|||||||
| Skip -> let old_mode = last_mode state.trace
|
| Skip -> let old_mode = last_mode state.trace
|
||||||
in extend (Elif old_mode) state region,
|
in extend (Elif old_mode) state region,
|
||||||
if old_mode = Copy then mode else Skip
|
if old_mode = Copy then mode else Skip
|
||||||
in scan {state with mode; offset = Prefix 0; trace} lexbuf
|
in scan {state with mode; trace} lexbuf
|
||||||
| "endif" ->
|
| "endif" ->
|
||||||
skip_line state lexbuf;
|
skip_line state lexbuf;
|
||||||
scan (reduce_cond state region) lexbuf
|
scan (reduce_cond state region) lexbuf
|
||||||
@ -544,89 +543,81 @@ rule scan state = parse
|
|||||||
then stop (Reserved_symbol id) state region;
|
then stop (Reserved_symbol id) state region;
|
||||||
if Env.mem id state.env
|
if Env.mem id state.env
|
||||||
then stop (Multiply_defined_symbol id) state region;
|
then stop (Multiply_defined_symbol id) state region;
|
||||||
let state = {state with env = Env.add id state.env;
|
let state = {state with env = Env.add id state.env}
|
||||||
offset = Prefix 0}
|
|
||||||
in scan state lexbuf
|
in scan state lexbuf
|
||||||
| "undef" ->
|
| "undef" ->
|
||||||
let id, _ = variable state lexbuf in
|
let id, _ = variable state lexbuf in
|
||||||
let state = {state with env = Env.remove id state.env;
|
let state = {state with env = Env.remove id state.env}
|
||||||
offset = Prefix 0}
|
|
||||||
in scan state lexbuf
|
in scan state lexbuf
|
||||||
| "error" ->
|
| "error" ->
|
||||||
stop (Error_directive (message [] lexbuf)) state region
|
stop (Error_directive (message [] lexbuf)) state region
|
||||||
| "region" ->
|
| "region" ->
|
||||||
let msg = message [] lexbuf
|
let msg = message [] lexbuf
|
||||||
in expand_offset state;
|
in print state ("#" ^ space ^ "region" ^ msg ^ "\n");
|
||||||
print state ("#" ^ space ^ "region" ^ msg ^ "\n");
|
let state = {state with trace=Region::state.trace}
|
||||||
let state =
|
|
||||||
{state with offset = Prefix 0; trace=Region::state.trace}
|
|
||||||
in scan state lexbuf
|
in scan state lexbuf
|
||||||
| "endregion" ->
|
| "endregion" ->
|
||||||
let msg = message [] lexbuf
|
let msg = message [] lexbuf
|
||||||
in expand_offset state;
|
in print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
|
||||||
print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
|
|
||||||
scan (reduce_region state region) lexbuf
|
scan (reduce_region state region) lexbuf
|
||||||
(*
|
|
||||||
| "line" ->
|
|
||||||
expand_offset state;
|
|
||||||
print state ("#" ^ space ^ "line");
|
|
||||||
line_ind state lexbuf;
|
|
||||||
scan {state with offset = Prefix 0} lexbuf
|
|
||||||
|
|
||||||
| "warning" ->
|
|
||||||
let start_p, end_p = region in
|
|
||||||
let msg = message [] lexbuf in
|
|
||||||
let open Lexing
|
|
||||||
in prerr_endline
|
|
||||||
("Warning at line " ^ string_of_int start_p.pos_lnum
|
|
||||||
^ ", char "
|
|
||||||
^ string_of_int (start_p.pos_cnum - start_p.pos_bol)
|
|
||||||
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
|
|
||||||
^ ":\n" ^ msg);
|
|
||||||
scan env mode (Prefix 0) trace lexbuf
|
|
||||||
*)
|
|
||||||
| _ -> assert false
|
| _ -> assert false
|
||||||
}
|
}
|
||||||
| eof { match state.trace with
|
|
||||||
[] -> expand_offset state; state
|
| eof { if state.trace = [] then state
|
||||||
| _ -> fail Missing_endif state lexbuf }
|
else fail Missing_endif state lexbuf }
|
||||||
|
|
||||||
| '"' { if state.mode = Copy then
|
| '"' { if state.mode = Copy then
|
||||||
begin
|
begin
|
||||||
expand_offset state;
|
|
||||||
copy state lexbuf;
|
copy state lexbuf;
|
||||||
in_string (mk_reg lexbuf) state lexbuf
|
scan (in_string (mk_reg lexbuf) state lexbuf) lexbuf
|
||||||
end;
|
end
|
||||||
scan {state with offset=Inline} lexbuf }
|
else scan state lexbuf }
|
||||||
| "//" { if state.mode = Copy then
|
|
||||||
|
| block_comment_openings {
|
||||||
|
let lexeme = Lexing.lexeme lexbuf in
|
||||||
|
match state.opt#block with
|
||||||
|
Some block when block#opening = lexeme ->
|
||||||
|
if state.mode = Copy then
|
||||||
begin
|
begin
|
||||||
expand_offset state;
|
|
||||||
copy state lexbuf;
|
copy state lexbuf;
|
||||||
in_line_com state lexbuf
|
let state = in_block block (mk_reg lexbuf) state lexbuf
|
||||||
end;
|
in scan state lexbuf
|
||||||
scan {state with offset=Inline} lexbuf }
|
end
|
||||||
| "/*" { if state.mode = Copy then
|
else scan state lexbuf
|
||||||
|
| Some _ | None ->
|
||||||
|
let n = String.length lexeme in
|
||||||
|
begin
|
||||||
|
rollback lexbuf;
|
||||||
|
assert (n > 0);
|
||||||
|
scan (scan_n_char n state lexbuf) lexbuf
|
||||||
|
end }
|
||||||
|
|
||||||
|
| line_comments {
|
||||||
|
let lexeme = Lexing.lexeme lexbuf in
|
||||||
|
match state.opt#line with
|
||||||
|
Some line when line = lexeme ->
|
||||||
|
if state.mode = Copy then
|
||||||
begin
|
begin
|
||||||
expand_offset state;
|
|
||||||
copy state lexbuf;
|
copy state lexbuf;
|
||||||
if state.opt#lang = `ReasonLIGO then
|
scan (in_line_com state lexbuf) lexbuf
|
||||||
reasonLIGO_com (mk_reg lexbuf) state lexbuf
|
end
|
||||||
end;
|
else scan state lexbuf
|
||||||
scan {state with offset=Inline} lexbuf }
|
| Some _ | None ->
|
||||||
| "(*" { if state.mode = Copy then
|
let n = String.length lexeme in
|
||||||
begin
|
begin
|
||||||
expand_offset state;
|
rollback lexbuf;
|
||||||
copy state lexbuf;
|
assert (n > 0);
|
||||||
if state.opt#lang = `CameLIGO
|
scan (scan_n_char n state lexbuf) lexbuf
|
||||||
|| state.opt#lang = `PascaLIGO then
|
end }
|
||||||
cameLIGO_com (mk_reg lexbuf) state lexbuf
|
|
||||||
end;
|
| _ { if state.mode = Copy then copy state lexbuf;
|
||||||
scan {state with offset=Inline} lexbuf }
|
scan state lexbuf }
|
||||||
| _ { if state.mode = Copy then
|
|
||||||
begin
|
(* Scanning a series of characters *)
|
||||||
expand_offset state;
|
|
||||||
copy state lexbuf
|
and scan_n_char n state = parse
|
||||||
end;
|
_ { if state.mode = Copy then copy state lexbuf;
|
||||||
scan {state with offset=Inline} lexbuf }
|
if n = 1 then state else scan_n_char (n-1) state lexbuf }
|
||||||
|
|
||||||
(* Support for #define and #undef *)
|
(* Support for #define and #undef *)
|
||||||
|
|
||||||
@ -638,47 +629,12 @@ and symbol state = parse
|
|||||||
ident as id { id, mk_reg lexbuf }
|
ident as id { id, mk_reg lexbuf }
|
||||||
| _ { fail Invalid_symbol state lexbuf }
|
| _ { fail Invalid_symbol state lexbuf }
|
||||||
|
|
||||||
(*
|
|
||||||
(* Line indicator (#line) *)
|
|
||||||
|
|
||||||
and line_ind state = parse
|
|
||||||
blank* { copy state lexbuf; line_indicator state lexbuf }
|
|
||||||
|
|
||||||
and line_indicator state = parse
|
|
||||||
natural { copy state lexbuf; end_indicator state lexbuf }
|
|
||||||
| ident as id {
|
|
||||||
match id with
|
|
||||||
"default" | "hidden" ->
|
|
||||||
print state (id ^ message [] lexbuf)
|
|
||||||
| _ -> fail (Invalid_line_indicator id) state lexbuf }
|
|
||||||
| _ { fail No_line_indicator state lexbuf }
|
|
||||||
|
|
||||||
and end_indicator state = parse
|
|
||||||
blank+ { copy state lexbuf; end_indicator state lexbuf }
|
|
||||||
| nl { proc_nl state lexbuf }
|
|
||||||
| eof { copy state lexbuf }
|
|
||||||
| "//" { copy state lexbuf;
|
|
||||||
print state (message [] lexbuf ^ "\n") }
|
|
||||||
| '"' { copy state lexbuf;
|
|
||||||
in_string (mk_reg lexbuf) state lexbuf;
|
|
||||||
opt_line_com state lexbuf }
|
|
||||||
| _ { fail End_line_indicator state lexbuf }
|
|
||||||
|
|
||||||
and opt_line_com state = parse
|
|
||||||
nl { proc_nl state lexbuf }
|
|
||||||
| eof { copy state lexbuf }
|
|
||||||
| blank+ { copy state lexbuf; opt_line_com state lexbuf }
|
|
||||||
| "//" { print state ("//" ^ message [] lexbuf) }
|
|
||||||
*)
|
|
||||||
|
|
||||||
(* New lines and verbatim sequence of characters *)
|
(* New lines and verbatim sequence of characters *)
|
||||||
|
|
||||||
and skip_line state = parse
|
and skip_line state = parse
|
||||||
nl { proc_nl state lexbuf }
|
nl { proc_nl state lexbuf }
|
||||||
| blank+ { skip_line state lexbuf }
|
| blank+ { skip_line state lexbuf }
|
||||||
| "//" { in_line_com {state with mode=Skip} lexbuf }
|
| _ { () }
|
||||||
| _ { fail No_line_comment_or_blank state lexbuf }
|
|
||||||
| eof { () }
|
|
||||||
|
|
||||||
and message acc = parse
|
and message acc = parse
|
||||||
nl { Lexing.new_line lexbuf;
|
nl { Lexing.new_line lexbuf;
|
||||||
@ -689,22 +645,41 @@ and message acc = parse
|
|||||||
(* Comments *)
|
(* Comments *)
|
||||||
|
|
||||||
and in_line_com state = parse
|
and in_line_com state = parse
|
||||||
nl { proc_nl state lexbuf }
|
nl { proc_nl state lexbuf; state }
|
||||||
| eof { () }
|
| eof { state }
|
||||||
| _ { if state.mode = Copy then copy state lexbuf;
|
| _ { if state.mode = Copy then copy state lexbuf;
|
||||||
in_line_com state lexbuf }
|
in_line_com state lexbuf }
|
||||||
|
|
||||||
and reasonLIGO_com opening state = parse
|
and in_block block opening state = parse
|
||||||
nl { proc_nl state lexbuf; reasonLIGO_com opening state lexbuf }
|
'"' | block_comment_openings {
|
||||||
| "*/" { copy state lexbuf }
|
let lexeme = Lexing.lexeme lexbuf in
|
||||||
| eof { () }
|
if block#opening = lexeme || lexeme = "\""
|
||||||
| _ { copy state lexbuf; reasonLIGO_com opening state lexbuf }
|
then let () = copy state lexbuf in
|
||||||
|
let opening' = mk_reg lexbuf in
|
||||||
|
let next = if lexeme = "\"" then in_string
|
||||||
|
else in_block block in
|
||||||
|
let state = next opening' state lexbuf
|
||||||
|
in in_block block opening state lexbuf
|
||||||
|
else let () = rollback lexbuf in
|
||||||
|
let n = String.length lexeme in
|
||||||
|
let () = assert (n > 0) in
|
||||||
|
let state = scan_n_char n state lexbuf
|
||||||
|
in in_block block opening state lexbuf }
|
||||||
|
|
||||||
and cameLIGO_com opening state = parse
|
| block_comment_closings {
|
||||||
nl { proc_nl state lexbuf; cameLIGO_com opening state lexbuf }
|
let lexeme = Lexing.lexeme lexbuf in
|
||||||
| "*)" { copy state lexbuf }
|
if block#closing = lexeme
|
||||||
| eof { () }
|
then (copy state lexbuf; state)
|
||||||
| _ { copy state lexbuf; cameLIGO_com opening state lexbuf }
|
else let () = rollback lexbuf in
|
||||||
|
let n = String.length lexeme in
|
||||||
|
let () = assert (n > 0) in
|
||||||
|
let state = scan_n_char n state lexbuf
|
||||||
|
in in_block block opening state lexbuf }
|
||||||
|
|
||||||
|
| nl { proc_nl state lexbuf; in_block block opening state lexbuf }
|
||||||
|
| eof { let err = Unterminated_comment (block#closing)
|
||||||
|
in stop err state opening }
|
||||||
|
| _ { copy state lexbuf; in_block block opening state lexbuf }
|
||||||
|
|
||||||
(* Included filename *)
|
(* Included filename *)
|
||||||
|
|
||||||
@ -717,15 +692,15 @@ and in_inclusion opening acc len state = parse
|
|||||||
in Region.cover opening closing,
|
in Region.cover opening closing,
|
||||||
mk_str len acc }
|
mk_str len acc }
|
||||||
| nl { fail Newline_in_string state lexbuf }
|
| nl { fail Newline_in_string state lexbuf }
|
||||||
| eof { stop Open_string state opening }
|
| eof { stop Unterminated_string state opening }
|
||||||
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
|
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
|
||||||
|
|
||||||
(* Strings *)
|
(* Strings *)
|
||||||
|
|
||||||
and in_string opening state = parse
|
and in_string opening state = parse
|
||||||
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
|
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
|
||||||
| '"' { copy state lexbuf }
|
| '"' { copy state lexbuf; state }
|
||||||
| eof { () }
|
| eof { state }
|
||||||
| _ { copy state lexbuf; in_string opening state lexbuf }
|
| _ { copy state lexbuf; in_string opening state lexbuf }
|
||||||
|
|
||||||
and preproc state = parse
|
and preproc state = parse
|
||||||
@ -750,7 +725,6 @@ let lex opt buffer =
|
|||||||
let state = {
|
let state = {
|
||||||
env = Env.empty;
|
env = Env.empty;
|
||||||
mode = Copy;
|
mode = Copy;
|
||||||
offset = Prefix 0;
|
|
||||||
trace = [];
|
trace = [];
|
||||||
out = Buffer.create 80;
|
out = Buffer.create 80;
|
||||||
incl = [];
|
incl = [];
|
||||||
|
7
vendors/Preprocessor/PreprocMain.ml
vendored
7
vendors/Preprocessor/PreprocMain.ml
vendored
@ -4,9 +4,12 @@ module Region = Simple_utils.Region
|
|||||||
module Preproc = Preprocessor.Preproc
|
module Preproc = Preprocessor.Preproc
|
||||||
module EvalOpt = Preprocessor.EvalOpt
|
module EvalOpt = Preprocessor.EvalOpt
|
||||||
|
|
||||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m\n%!" msg
|
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
||||||
|
|
||||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
let options =
|
||||||
|
let open EvalOpt in
|
||||||
|
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||||
|
in read ~block ~line:"//" ".ligo"
|
||||||
|
|
||||||
let preproc cin =
|
let preproc cin =
|
||||||
let buffer = Lexing.from_channel cin in
|
let buffer = Lexing.from_channel cin in
|
||||||
|
22
vendors/Preprocessor/build.sh
vendored
22
vendors/Preprocessor/build.sh
vendored
@ -1,22 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
set -x
|
|
||||||
ocamllex.opt E_Lexer.mll
|
|
||||||
ocamllex.opt Preproc.mll
|
|
||||||
menhir -la 1 E_Parser.mly
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c EvalOpt.mli
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_AST.ml
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.mli
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.mli
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_LexerMain.ml
|
|
||||||
camlcmd="ocamlfind ocamlc -I _x86_64 -strict-sequence -w +A-48-4 "
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package getopt,str -c EvalOpt.ml
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.ml
|
|
||||||
menhir --infer --ocamlc="$camlcmd" E_Parser.mly
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.ml
|
|
||||||
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_LexerMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo E_LexerMain.cmo
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.mli
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c PreprocMain.ml
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.ml
|
|
||||||
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o PreprocMain.byte EvalOpt.cmo E_AST.cmo E_Parser.cmo E_Lexer.cmo Preproc.cmo PreprocMain.cmo
|
|
||||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_ParserMain.ml
|
|
||||||
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_ParserMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo Preproc.cmo E_ParserMain.cmo
|
|
4
vendors/Preprocessor/clean.sh
vendored
4
vendors/Preprocessor/clean.sh
vendored
@ -1,4 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
|
|
||||||
\rm -f *.cm* *.o *.byte *.opt
|
|
||||||
\rm E_Lexer.ml E_Parser.ml E_Parser.mli Preproc.ml
|
|
Loading…
Reference in New Issue
Block a user