Merge branch 'rinderknecht@michelson' into 'dev'
Refactoring of the lexer and preprocessor See merge request ligolang/ligo!591
This commit is contained in:
commit
d0d495ccce
@ -2,7 +2,7 @@ open Cli_expect
|
||||
|
||||
let%expect_test _ =
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/broken_string.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "broken_string.ligo", line 1, characters 18-19:
|
||||
The string starting here is interrupted by a line break.
|
||||
Hint: Remove the break, close the string before or insert a backslash.
|
||||
@ -19,7 +19,7 @@ ligo: : Lexical error in file "broken_string.ligo", line 1, characters 18-19:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/broken_string.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "broken_string.mligo", line 1, characters 8-9:
|
||||
The string starting here is interrupted by a line break.
|
||||
Hint: Remove the break, close the string before or insert a backslash.
|
||||
@ -36,7 +36,7 @@ ligo: : Lexical error in file "broken_string.mligo", line 1, characters 8-9:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/broken_string.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
|
||||
The string starting here is interrupted by a line break.
|
||||
Hint: Remove the break, close the string before or insert a backslash.
|
||||
@ -53,8 +53,8 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23:
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
|
||||
Negative byte sequence.
|
||||
Hint: Remove the leading minus sign.
|
||||
{}
|
||||
@ -70,8 +70,8 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13:
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
|
||||
Negative byte sequence.
|
||||
Hint: Remove the leading minus sign.
|
||||
{}
|
||||
@ -87,8 +87,8 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13:
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
|
||||
Negative byte sequence.
|
||||
Hint: Remove the leading minus sign.
|
||||
{}
|
||||
@ -103,8 +103,9 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
|
||||
* Check the changelog by running 'ligo changelog'
|
||||
|} ];
|
||||
|
||||
(*
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
||||
Reserved name: "arguments".
|
||||
Hint: Change the name.
|
||||
@ -119,9 +120,10 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
|
||||
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
|
||||
* Check the changelog by running 'ligo changelog'
|
||||
|} ];
|
||||
*)
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "reserved_name.religo", line 1, characters 4-7:
|
||||
Reserved name: "end".
|
||||
Hint: Change the name.
|
||||
@ -138,7 +140,7 @@ ligo: : Lexical error in file "reserved_name.religo", line 1, characters 4-7:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "reserved_name.mligo", line 1, characters 4-10:
|
||||
Reserved name: "object".
|
||||
Hint: Change the name.
|
||||
@ -155,7 +157,7 @@ ligo: : Lexical error in file "reserved_name.mligo", line 1, characters 4-10:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unexpected_character.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "unexpected_character.ligo", line 1, characters 18-19:
|
||||
Unexpected character '\239'.
|
||||
{}
|
||||
@ -171,7 +173,7 @@ ligo: : Lexical error in file "unexpected_character.ligo", line 1, characters 18
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unexpected_character.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "unexpected_character.mligo", line 1, characters 8-9:
|
||||
Unexpected character '\239'.
|
||||
{}
|
||||
@ -187,7 +189,7 @@ ligo: : Lexical error in file "unexpected_character.mligo", line 1, characters 8
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unexpected_character.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "unexpected_character.religo", line 1, characters 8-9:
|
||||
Unexpected character '\239'.
|
||||
{}
|
||||
@ -203,11 +205,10 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2:
|
||||
[%expect {|
|
||||
ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
|
||||
Unterminated comment.
|
||||
Hint: Close with "*)".
|
||||
{}
|
||||
Hint: Close with "*)". {}
|
||||
|
||||
|
||||
If you're not sure how to fix this error, you can
|
||||
@ -220,7 +221,7 @@ ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_symbol.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "invalid_symbol.ligo", line 1, characters 17-20:
|
||||
Invalid symbol.
|
||||
Hint: Check the LIGO syntax you use.
|
||||
@ -237,7 +238,7 @@ ligo: : Lexical error in file "invalid_symbol.ligo", line 1, characters 17-20:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_symbol.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "invalid_symbol.mligo", line 1, characters 10-13:
|
||||
Invalid symbol.
|
||||
Hint: Check the LIGO syntax you use.
|
||||
@ -254,7 +255,7 @@ ligo: : Lexical error in file "invalid_symbol.mligo", line 1, characters 10-13:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_symbol.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "invalid_symbol.religo", line 1, characters 10-11:
|
||||
Invalid symbol.
|
||||
Hint: Check the LIGO syntax you use.
|
||||
@ -271,7 +272,7 @@ ligo: : Lexical error in file "invalid_symbol.religo", line 1, characters 10-11:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/missing_break.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "missing_break.ligo", line 1, characters 18-18:
|
||||
Missing break.
|
||||
Hint: Insert some space.
|
||||
@ -288,7 +289,7 @@ ligo: : Lexical error in file "missing_break.ligo", line 1, characters 18-18:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/missing_break.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "missing_break.mligo", line 1, characters 11-11:
|
||||
Missing break.
|
||||
Hint: Insert some space.
|
||||
@ -305,7 +306,7 @@ ligo: : Lexical error in file "missing_break.mligo", line 1, characters 11-11:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/missing_break.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "missing_break.religo", line 1, characters 11-11:
|
||||
Missing break.
|
||||
Hint: Insert some space.
|
||||
@ -322,7 +323,7 @@ ligo: : Lexical error in file "missing_break.religo", line 1, characters 11-11:
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_character_in_string.ligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "invalid_character_in_string.ligo", line 1, characters 19-20:
|
||||
Invalid character in string.
|
||||
Hint: Remove or replace the character.
|
||||
@ -339,7 +340,7 @@ ligo: : Lexical error in file "invalid_character_in_string.ligo", line 1, charac
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_character_in_string.mligo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "invalid_character_in_string.mligo", line 1, characters 9-10:
|
||||
Invalid character in string.
|
||||
Hint: Remove or replace the character.
|
||||
@ -356,7 +357,7 @@ ligo: : Lexical error in file "invalid_character_in_string.mligo", line 1, chara
|
||||
|} ];
|
||||
|
||||
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_character_in_string.religo" ; "main" ] ;
|
||||
[%expect {|
|
||||
[%expect {|
|
||||
ligo: : Lexical error in file "invalid_character_in_string.religo", line 1, characters 9-10:
|
||||
Invalid character in string.
|
||||
Hint: Remove or replace the character.
|
||||
@ -370,4 +371,4 @@ ligo: : Lexical error in file "invalid_character_in_string.religo", line 1, char
|
||||
* Ask a question on our Discord: https://discord.gg/9rhYaEt
|
||||
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
|
||||
* Check the changelog by running 'ligo changelog'
|
||||
|} ]
|
||||
|} ]
|
||||
|
@ -8,15 +8,14 @@ module SSet = Set.Make (String)
|
||||
|
||||
(* Mock IOs TODO: Fill them with CLI options *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SubIO =
|
||||
struct
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string; (* ".mligo" *)
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -24,22 +23,25 @@ module SubIO =
|
||||
>
|
||||
|
||||
let options : options =
|
||||
object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method lang = `CameLIGO
|
||||
method ext = ".mligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
|
||||
in object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method block = Some block
|
||||
method line = Some "//"
|
||||
method ext = ".mligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
|
||||
let make =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
||||
|
||||
../shared/Lexer.mli
|
||||
../shared/Lexer.mll
|
||||
../shared/LexerLib.ml
|
||||
../shared/EvalOpt.ml
|
||||
../shared/EvalOpt.mli
|
||||
../shared/FQueue.ml
|
||||
|
@ -156,11 +156,22 @@ val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
|
@ -102,138 +102,167 @@ type t =
|
||||
|
||||
| EOF of Region.t (* End of file *)
|
||||
|
||||
|
||||
(* Projections *)
|
||||
|
||||
type token = t
|
||||
|
||||
let proj_token = function
|
||||
ARROW region -> region, "ARROW"
|
||||
| CONS region -> region, "CONS"
|
||||
| CAT region -> region, "CAT"
|
||||
| MINUS region -> region, "MINUS"
|
||||
| PLUS region -> region, "PLUS"
|
||||
| SLASH region -> region, "SLASH"
|
||||
| TIMES region -> region, "TIMES"
|
||||
| LPAR region -> region, "LPAR"
|
||||
| RPAR region -> region, "RPAR"
|
||||
| LBRACKET region -> region, "LBRACKET"
|
||||
| RBRACKET region -> region, "RBRACKET"
|
||||
| LBRACE region -> region, "LBRACE"
|
||||
| RBRACE region -> region, "RBRACE"
|
||||
| COMMA region -> region, "COMMA"
|
||||
| SEMI region -> region, "SEMI"
|
||||
| VBAR region -> region, "VBAR"
|
||||
| COLON region -> region, "COLON"
|
||||
| DOT region -> region, "DOT"
|
||||
| WILD region -> region, "WILD"
|
||||
| EQ region -> region, "EQ"
|
||||
| NE region -> region, "NE"
|
||||
| LT region -> region, "LT"
|
||||
| GT region -> region, "GT"
|
||||
| LE region -> region, "LE"
|
||||
| GE region -> region, "GE"
|
||||
| BOOL_OR region -> region, "BOOL_OR"
|
||||
| BOOL_AND region -> region, "BOOL_AND"
|
||||
| Ident Region.{region; value} ->
|
||||
region, sprintf "Ident %s" value
|
||||
| Constr Region.{region; value} ->
|
||||
region, sprintf "Constr %s" value
|
||||
(* Literals *)
|
||||
|
||||
String Region.{region; value} ->
|
||||
region, sprintf "Str %s" value
|
||||
| Bytes Region.{region; value = s,b} ->
|
||||
region,
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||
| Int Region.{region; value = s,n} ->
|
||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||
| Nat Region.{region; value = s,n} ->
|
||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||
| Mutez Region.{region; value = s,n} ->
|
||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||
| String Region.{region; value} ->
|
||||
region, sprintf "Str %s" value
|
||||
| Bytes Region.{region; value = s,b} ->
|
||||
region,
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
||||
s (Hex.show b)
|
||||
| Ident Region.{region; value} ->
|
||||
region, sprintf "Ident %s" value
|
||||
| Constr Region.{region; value} ->
|
||||
region, sprintf "Constr %s" value
|
||||
| Attr Region.{region; value} ->
|
||||
region, sprintf "Attr \"%s\"" value
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| ARROW region -> region, "ARROW"
|
||||
| CONS region -> region, "CONS"
|
||||
| CAT region -> region, "CAT"
|
||||
| MINUS region -> region, "MINUS"
|
||||
| PLUS region -> region, "PLUS"
|
||||
| SLASH region -> region, "SLASH"
|
||||
| TIMES region -> region, "TIMES"
|
||||
| LPAR region -> region, "LPAR"
|
||||
| RPAR region -> region, "RPAR"
|
||||
| LBRACKET region -> region, "LBRACKET"
|
||||
| RBRACKET region -> region, "RBRACKET"
|
||||
| LBRACE region -> region, "LBRACE"
|
||||
| RBRACE region -> region, "RBRACE"
|
||||
| COMMA region -> region, "COMMA"
|
||||
| SEMI region -> region, "SEMI"
|
||||
| VBAR region -> region, "VBAR"
|
||||
| COLON region -> region, "COLON"
|
||||
| DOT region -> region, "DOT"
|
||||
| WILD region -> region, "WILD"
|
||||
| EQ region -> region, "EQ"
|
||||
| NE region -> region, "NE"
|
||||
| LT region -> region, "LT"
|
||||
| GT region -> region, "GT"
|
||||
| LE region -> region, "LE"
|
||||
| GE region -> region, "GE"
|
||||
| BOOL_OR region -> region, "BOOL_OR"
|
||||
| BOOL_AND region -> region, "BOOL_AND"
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| Begin region -> region, "Begin"
|
||||
| Else region -> region, "Else"
|
||||
| End region -> region, "End"
|
||||
| Else region -> region, "Else"
|
||||
| End region -> region, "End"
|
||||
| False region -> region, "False"
|
||||
| Fun region -> region, "Fun"
|
||||
| Rec region -> region, "Rec"
|
||||
| If region -> region, "If"
|
||||
| In region -> region, "In"
|
||||
| Let region -> region, "Let"
|
||||
| Fun region -> region, "Fun"
|
||||
| Rec region -> region, "Rec"
|
||||
| If region -> region, "If"
|
||||
| In region -> region, "In"
|
||||
| Let region -> region, "Let"
|
||||
| Match region -> region, "Match"
|
||||
| Mod region -> region, "Mod"
|
||||
| Not region -> region, "Not"
|
||||
| Of region -> region, "Of"
|
||||
| Or region -> region, "Or"
|
||||
| Then region -> region, "Then"
|
||||
| True region -> region, "True"
|
||||
| Type region -> region, "Type"
|
||||
| With region -> region, "With"
|
||||
| C_None region -> region, "C_None"
|
||||
| C_Some region -> region, "C_Some"
|
||||
| Mod region -> region, "Mod"
|
||||
| Not region -> region, "Not"
|
||||
| Of region -> region, "Of"
|
||||
| Or region -> region, "Or"
|
||||
| Then region -> region, "Then"
|
||||
| True region -> region, "True"
|
||||
| Type region -> region, "Type"
|
||||
| With region -> region, "With"
|
||||
|
||||
(* Data *)
|
||||
|
||||
| C_None region -> region, "C_None"
|
||||
| C_Some region -> region, "C_Some"
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF region -> region, "EOF"
|
||||
|
||||
let to_lexeme = function
|
||||
ARROW _ -> "->"
|
||||
| CONS _ -> "::"
|
||||
| CAT _ -> "^"
|
||||
| MINUS _ -> "-"
|
||||
| PLUS _ -> "+"
|
||||
| SLASH _ -> "/"
|
||||
| TIMES _ -> "*"
|
||||
| LPAR _ -> "("
|
||||
| RPAR _ -> ")"
|
||||
| LBRACKET _ -> "["
|
||||
| RBRACKET _ -> "]"
|
||||
| LBRACE _ -> "{"
|
||||
| RBRACE _ -> "}"
|
||||
| COMMA _ -> ","
|
||||
| SEMI _ -> ";"
|
||||
| VBAR _ -> "|"
|
||||
| COLON _ -> ":"
|
||||
| DOT _ -> "."
|
||||
| WILD _ -> "_"
|
||||
| EQ _ -> "="
|
||||
| NE _ -> "<>"
|
||||
| LT _ -> "<"
|
||||
| GT _ -> ">"
|
||||
| LE _ -> "<="
|
||||
| GE _ -> ">="
|
||||
| BOOL_OR _ -> "||"
|
||||
| BOOL_AND _ -> "&&"
|
||||
|
||||
| Ident id -> id.Region.value
|
||||
| Constr id -> id.Region.value
|
||||
let to_lexeme = function
|
||||
(* Literals *)
|
||||
|
||||
String s -> String.escaped s.Region.value
|
||||
| Bytes b -> fst b.Region.value
|
||||
| Int i
|
||||
| Nat i
|
||||
| Mutez i -> fst i.Region.value
|
||||
| String s -> String.escaped s.Region.value
|
||||
| Bytes b -> fst b.Region.value
|
||||
| Attr a -> a.Region.value
|
||||
| Mutez i -> fst i.Region.value
|
||||
| Ident id -> id.Region.value
|
||||
| Constr id -> id.Region.value
|
||||
| Attr a -> a.Region.value
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| ARROW _ -> "->"
|
||||
| CONS _ -> "::"
|
||||
| CAT _ -> "^"
|
||||
| MINUS _ -> "-"
|
||||
| PLUS _ -> "+"
|
||||
| SLASH _ -> "/"
|
||||
| TIMES _ -> "*"
|
||||
| LPAR _ -> "("
|
||||
| RPAR _ -> ")"
|
||||
| LBRACKET _ -> "["
|
||||
| RBRACKET _ -> "]"
|
||||
| LBRACE _ -> "{"
|
||||
| RBRACE _ -> "}"
|
||||
| COMMA _ -> ","
|
||||
| SEMI _ -> ";"
|
||||
| VBAR _ -> "|"
|
||||
| COLON _ -> ":"
|
||||
| DOT _ -> "."
|
||||
| WILD _ -> "_"
|
||||
| EQ _ -> "="
|
||||
| NE _ -> "<>"
|
||||
| LT _ -> "<"
|
||||
| GT _ -> ">"
|
||||
| LE _ -> "<="
|
||||
| GE _ -> ">="
|
||||
| BOOL_OR _ -> "||"
|
||||
| BOOL_AND _ -> "&&"
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| Begin _ -> "begin"
|
||||
| Else _ -> "else"
|
||||
| End _ -> "end"
|
||||
| Else _ -> "else"
|
||||
| End _ -> "end"
|
||||
| False _ -> "false"
|
||||
| Fun _ -> "fun"
|
||||
| Rec _ -> "rec"
|
||||
| If _ -> "if"
|
||||
| In _ -> "in"
|
||||
| Let _ -> "let"
|
||||
| Fun _ -> "fun"
|
||||
| Rec _ -> "rec"
|
||||
| If _ -> "if"
|
||||
| In _ -> "in"
|
||||
| Let _ -> "let"
|
||||
| Match _ -> "match"
|
||||
| Mod _ -> "mod"
|
||||
| Not _ -> "not"
|
||||
| Of _ -> "of"
|
||||
| Or _ -> "or"
|
||||
| True _ -> "true"
|
||||
| Type _ -> "type"
|
||||
| Then _ -> "then"
|
||||
| With _ -> "with"
|
||||
| Mod _ -> "mod"
|
||||
| Not _ -> "not"
|
||||
| Of _ -> "of"
|
||||
| Or _ -> "or"
|
||||
| True _ -> "true"
|
||||
| Type _ -> "type"
|
||||
| Then _ -> "then"
|
||||
| With _ -> "with"
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
| C_None _ -> "None"
|
||||
| C_Some _ -> "Some"
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF _ -> ""
|
||||
|
||||
(* CONVERSIONS *)
|
||||
|
||||
let to_string token ?(offsets=true) mode =
|
||||
let region, val_str = proj_token token in
|
||||
let reg_str = region#compact ~offsets mode
|
||||
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =
|
||||
|
||||
let to_region token = proj_token token |> fst
|
||||
|
||||
(* Injections *)
|
||||
|
||||
type int_err = Non_canonical_zero
|
||||
|
||||
(* LEXIS *)
|
||||
|
||||
let keywords = [
|
||||
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
|
||||
let value = lexeme, `Hex norm
|
||||
in Bytes Region.{region; value}
|
||||
|
||||
type int_err = Non_canonical_zero
|
||||
|
||||
let mk_int lexeme region =
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
|
||||
@ -398,23 +425,21 @@ type nat_err =
|
||||
|
||||
let mk_nat lexeme region =
|
||||
match (String.index_opt lexeme 'n') with
|
||||
| None -> Error Invalid_natural
|
||||
| Some _ -> (
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme,z})
|
||||
)
|
||||
None -> Error Invalid_natural
|
||||
| Some _ -> let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme,z})
|
||||
|
||||
let mk_mutez lexeme region =
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "mutez") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||
then Error Non_canonical_zero
|
||||
else Ok (Mutez Region.{region; value = lexeme, z})
|
||||
|
||||
@ -422,8 +447,6 @@ let eof region = EOF region
|
||||
|
||||
type sym_err = Invalid_symbol
|
||||
|
||||
type attr_err = Invalid_attribute
|
||||
|
||||
let mk_sym lexeme region =
|
||||
match lexeme with
|
||||
(* Lexemes in common with all concrete syntaxes *)
|
||||
@ -473,88 +496,80 @@ let mk_constr lexeme region =
|
||||
|
||||
(* Attributes *)
|
||||
|
||||
type attr_err = Invalid_attribute
|
||||
|
||||
let mk_attr header lexeme region =
|
||||
if header = "[@" then
|
||||
Error Invalid_attribute
|
||||
if header = "[@" then Error Invalid_attribute
|
||||
else Ok (Attr Region.{value=lexeme; region})
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
let is_string = function String _ -> true | _ -> false
|
||||
let is_bytes = function Bytes _ -> true | _ -> false
|
||||
let is_int = function Int _ -> true | _ -> false
|
||||
let is_ident = function Ident _ -> true | _ -> false
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let is_minus = function MINUS _ -> true | _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
(* Errors *)
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
type error =
|
||||
Odd_lengthed_bytes
|
||||
| Missing_break
|
||||
| Negative_byte_sequence
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
let error_to_string = function
|
||||
Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit."
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space."
|
||||
| Negative_byte_sequence ->
|
||||
"Negative byte sequence.\n\
|
||||
Hint: Remove the leading minus sign."
|
||||
|
||||
let is_kwd = function
|
||||
| Begin _
|
||||
| Else _
|
||||
| End _
|
||||
| False _
|
||||
| Fun _
|
||||
| Rec _
|
||||
| If _
|
||||
| In _
|
||||
| Let _
|
||||
| Match _
|
||||
| Mod _
|
||||
| Not _
|
||||
| Of _
|
||||
| Or _
|
||||
| Then _
|
||||
| True _
|
||||
| Type _
|
||||
| With _ -> true
|
||||
| _ -> false
|
||||
exception Error of error Region.reg
|
||||
|
||||
let is_constr = function
|
||||
| Constr _
|
||||
| Ident _
|
||||
| False _
|
||||
| True _ -> true
|
||||
| _ -> false
|
||||
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
and reg = region#to_string ~file ~offsets mode in
|
||||
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||
in Region.{value; region}
|
||||
|
||||
let is_sym = function
|
||||
| ARROW _
|
||||
| CONS _
|
||||
| CAT _
|
||||
| MINUS _
|
||||
| PLUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| COMMA _
|
||||
| SEMI _
|
||||
| VBAR _
|
||||
| COLON _
|
||||
| DOT _
|
||||
| WILD _
|
||||
| EQ _
|
||||
| NE _
|
||||
| LT _
|
||||
| GT _
|
||||
| LE _
|
||||
| GE _
|
||||
| BOOL_OR _
|
||||
| BOOL_AND _ -> true
|
||||
| _ -> false
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let check_right_context token next_token buffer : unit =
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
match next_token buffer with
|
||||
None -> ()
|
||||
| Some (markup, next) ->
|
||||
if is_minus token && is_bytes next
|
||||
then let region =
|
||||
Region.cover (to_region token) (to_region next)
|
||||
in fail region Negative_byte_sequence
|
||||
else
|
||||
match markup with
|
||||
[] ->
|
||||
if is_int token
|
||||
then if is_string next || is_ident next
|
||||
then fail region Missing_break
|
||||
else ()
|
||||
else
|
||||
if is_string token
|
||||
then if is_int next || is_bytes next || is_ident next
|
||||
then fail region Missing_break
|
||||
else ()
|
||||
else
|
||||
if is_bytes token
|
||||
then if is_string next || is_ident next
|
||||
then fail region Missing_break
|
||||
else if is_int next
|
||||
then fail region Odd_lengthed_bytes
|
||||
else ()
|
||||
else ()
|
||||
| _::_ -> ()
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".mligo"
|
||||
end
|
||||
|
||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
let () =
|
||||
match M.trace () with
|
||||
Stdlib.Ok () -> ()
|
||||
| Error Region.{value; _} -> Utils.highlight value
|
||||
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value
|
||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".mligo"
|
||||
end
|
||||
|
||||
module SubIO =
|
||||
@ -14,7 +17,8 @@ module SubIO =
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : EvalOpt.language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -26,7 +30,8 @@ module SubIO =
|
||||
method libs = IO.options#libs
|
||||
method verbose = IO.options#verbose
|
||||
method offsets = IO.options#offsets
|
||||
method lang = IO.options#lang
|
||||
method block = IO.options#block
|
||||
method line = IO.options#line
|
||||
method ext = IO.options#ext
|
||||
method mode = IO.options#mode
|
||||
method cmd = IO.options#cmd
|
||||
@ -37,7 +42,8 @@ module SubIO =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -8,15 +8,14 @@ module SSet = Set.Make (String)
|
||||
|
||||
(* Mock IOs TODO: Fill them with CLI options *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SubIO =
|
||||
struct
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string; (* ".ligo" *)
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -24,22 +23,25 @@ module SubIO =
|
||||
>
|
||||
|
||||
let options : options =
|
||||
object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method lang = `PascaLIGO
|
||||
method ext = ".ligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
|
||||
in object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method block = Some block
|
||||
method line = Some "//"
|
||||
method ext = ".ligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
|
||||
let make =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -2,6 +2,8 @@ $HOME/git/OCaml-build/Makefile
|
||||
|
||||
../shared/Lexer.mli
|
||||
../shared/Lexer.mll
|
||||
../shared/LexerLib.mli
|
||||
../shared/LexerLib.ml
|
||||
../shared/EvalOpt.ml
|
||||
../shared/EvalOpt.mli
|
||||
../shared/FQueue.ml
|
||||
@ -18,5 +20,6 @@ $HOME/git/OCaml-build/Makefile
|
||||
../shared/LexerUnit.ml
|
||||
../shared/ParserUnit.mli
|
||||
../shared/ParserUnit.ml
|
||||
../shared/LexerLib.ml
|
||||
|
||||
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml
|
||||
|
@ -1,4 +1,4 @@
|
||||
(** This signature defines the lexical tokens for LIGO
|
||||
(* This signature defines the lexical tokens for LIGO
|
||||
|
||||
_Tokens_ are the abstract units which are used by the parser to
|
||||
build the abstract syntax tree (AST), in other words, the stream of
|
||||
@ -163,11 +163,22 @@ val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
|
@ -5,24 +5,14 @@
|
||||
|
||||
(* Shorthands *)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
module Region = Simple_utils.Region
|
||||
module Pos = Simple_utils.Pos
|
||||
module SMap = Map.Make (String)
|
||||
module SSet = Set.Make (String)
|
||||
|
||||
(* Hack to roll back one lexeme in the current semantic action *)
|
||||
(*
|
||||
let rollback buffer =
|
||||
let open Lexing in
|
||||
let len = String.length (lexeme buffer) in
|
||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||
*)
|
||||
type lexeme = string
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
@ -123,6 +113,8 @@ type t =
|
||||
| EOF of Region.t
|
||||
|
||||
|
||||
(* Projections *)
|
||||
|
||||
type token = t
|
||||
|
||||
let proj_token = function
|
||||
@ -130,32 +122,20 @@ let proj_token = function
|
||||
|
||||
String Region.{region; value} ->
|
||||
region, sprintf "String %s" value
|
||||
|
||||
| Bytes Region.{region; value = s,b} ->
|
||||
region,
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
||||
s (Hex.show b)
|
||||
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||
| Int Region.{region; value = s,n} ->
|
||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||
|
||||
| Nat Region.{region; value = s,n} ->
|
||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||
|
||||
| Mutez Region.{region; value = s,n} ->
|
||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||
|
||||
| Ident Region.{region; value} ->
|
||||
region, sprintf "Ident \"%s\"" value
|
||||
|
||||
| Constr Region.{region; value} ->
|
||||
region, sprintf "Constr \"%s\"" value
|
||||
|
||||
(*
|
||||
| Attr {header; string={region; value}} ->
|
||||
region, sprintf "Attr (\"%s\",\"%s\")" header value
|
||||
*)
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| SEMI region -> region, "SEMI"
|
||||
@ -244,7 +224,7 @@ let to_lexeme = function
|
||||
| Bytes b -> fst b.Region.value
|
||||
| Int i
|
||||
| Nat i
|
||||
| Mutez i -> fst i.Region.value
|
||||
| Mutez i -> fst i.Region.value
|
||||
| Ident id
|
||||
| Constr id -> id.Region.value
|
||||
|
||||
@ -382,9 +362,7 @@ let keywords = [
|
||||
(fun reg -> With reg)
|
||||
]
|
||||
|
||||
let reserved =
|
||||
let open SSet in
|
||||
empty |> add "arguments"
|
||||
let reserved = SSet.empty
|
||||
|
||||
let constructors = [
|
||||
(fun reg -> False reg);
|
||||
@ -483,24 +461,21 @@ type nat_err =
|
||||
| Non_canonical_zero_nat
|
||||
|
||||
let mk_nat lexeme region =
|
||||
match (String.index_opt lexeme 'n') with
|
||||
| None -> Error Invalid_natural
|
||||
| Some _ -> (
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme,z})
|
||||
)
|
||||
match String.index_opt lexeme 'n' with
|
||||
None -> Error Invalid_natural
|
||||
| Some _ -> let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme,z})
|
||||
|
||||
let mk_mutez lexeme region =
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "mutez") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "mutez") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||
then Error Non_canonical_zero
|
||||
else Ok (Mutez Region.{region; value = lexeme, z})
|
||||
|
||||
@ -558,104 +533,76 @@ let mk_constr lexeme region =
|
||||
|
||||
type attr_err = Invalid_attribute
|
||||
|
||||
let mk_attr _header _string _region =
|
||||
Error Invalid_attribute
|
||||
let mk_attr _ _ _ = Error Invalid_attribute
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
let is_string = function String _ -> true | _ -> false
|
||||
let is_bytes = function Bytes _ -> true | _ -> false
|
||||
let is_int = function Int _ -> true | _ -> false
|
||||
let is_ident = function Ident _ -> true | _ -> false
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let is_minus = function MINUS _ -> true | _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
(* Errors *)
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
type error =
|
||||
Odd_lengthed_bytes
|
||||
| Missing_break
|
||||
| Negative_byte_sequence
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
let error_to_string = function
|
||||
Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit."
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space."
|
||||
| Negative_byte_sequence ->
|
||||
"Negative byte sequence.\n\
|
||||
Hint: Remove the leading minus sign."
|
||||
|
||||
let is_kwd = function
|
||||
And _
|
||||
| Attributes _
|
||||
| Begin _
|
||||
| BigMap _
|
||||
| Block _
|
||||
| Case _
|
||||
| Const _
|
||||
| Contains _
|
||||
| Else _
|
||||
| End _
|
||||
| False _
|
||||
| For _
|
||||
| From _
|
||||
| Function _
|
||||
| If _
|
||||
| In _
|
||||
| Is _
|
||||
| List _
|
||||
| Map _
|
||||
| Mod _
|
||||
| Nil _
|
||||
| Not _
|
||||
| Of _
|
||||
| Or _
|
||||
| Patch _
|
||||
| Record _
|
||||
| Remove _
|
||||
| Set _
|
||||
| Skip _
|
||||
| Step _
|
||||
| Then _
|
||||
| To _
|
||||
| True _
|
||||
| Type _
|
||||
| Unit _
|
||||
| Var _
|
||||
| While _
|
||||
| With _ -> true
|
||||
| _ -> false
|
||||
exception Error of error Region.reg
|
||||
|
||||
let is_constr = function
|
||||
Constr _
|
||||
| C_None _
|
||||
| C_Some _ -> true
|
||||
| _ -> false
|
||||
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
and reg = region#to_string ~file ~offsets mode in
|
||||
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||
in Region.{value; region}
|
||||
|
||||
let is_sym = function
|
||||
SEMI _
|
||||
| COMMA _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| CONS _
|
||||
| VBAR _
|
||||
| ARROW _
|
||||
| ASS _
|
||||
| EQ _
|
||||
| COLON _
|
||||
| LT _
|
||||
| LE _
|
||||
| GT _
|
||||
| GE _
|
||||
| NE _
|
||||
| PLUS _
|
||||
| MINUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| DOT _
|
||||
| WILD _
|
||||
| CAT _ -> true
|
||||
| _ -> false
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let check_right_context token next_token buffer : unit =
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
match next_token buffer with
|
||||
None -> ()
|
||||
| Some (markup, next) ->
|
||||
if is_minus token && is_bytes next
|
||||
then let region =
|
||||
Region.cover (to_region token) (to_region next)
|
||||
in fail region Negative_byte_sequence
|
||||
else
|
||||
match markup with
|
||||
[] ->
|
||||
if is_int token
|
||||
then if is_string next || is_ident next
|
||||
then fail region Missing_break
|
||||
else ()
|
||||
else
|
||||
if is_string token
|
||||
then if is_int next || is_bytes next || is_ident next
|
||||
then fail region Missing_break
|
||||
else ()
|
||||
else
|
||||
if is_bytes token
|
||||
then if is_string next || is_ident next
|
||||
then fail region Missing_break
|
||||
else if is_int next
|
||||
then fail region Odd_lengthed_bytes
|
||||
else ()
|
||||
else ()
|
||||
| _::_ -> ()
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".ligo"
|
||||
end
|
||||
|
||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".ligo"
|
||||
end
|
||||
|
||||
module SubIO =
|
||||
@ -14,7 +17,8 @@ module SubIO =
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : EvalOpt.language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -26,7 +30,8 @@ module SubIO =
|
||||
method libs = IO.options#libs
|
||||
method verbose = IO.options#verbose
|
||||
method offsets = IO.options#offsets
|
||||
method lang = IO.options#lang
|
||||
method block = IO.options#block
|
||||
method line = IO.options#line
|
||||
method ext = IO.options#ext
|
||||
method mode = IO.options#mode
|
||||
method cmd = IO.options#cmd
|
||||
@ -37,7 +42,8 @@ module SubIO =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -11,15 +11,14 @@ module SSet = Set.Make (String)
|
||||
|
||||
(* Mock IOs TODO: Fill them with CLI options *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SubIO =
|
||||
struct
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string; (* ".religo" *)
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -27,22 +26,25 @@ module SubIO =
|
||||
>
|
||||
|
||||
let options : options =
|
||||
object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method lang = `ReasonLIGO
|
||||
method ext = ".religo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/"
|
||||
in object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method block = Some block
|
||||
method line = Some "//"
|
||||
method ext = ".religo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
|
||||
let make =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
||||
|
||||
../shared/Lexer.mli
|
||||
../shared/Lexer.mll
|
||||
../shared/LexerLib.ml
|
||||
../shared/EvalOpt.ml
|
||||
../shared/EvalOpt.mli
|
||||
../shared/FQueue.ml
|
||||
|
@ -31,48 +31,49 @@ type lexeme = string
|
||||
type t =
|
||||
(* Symbols *)
|
||||
|
||||
CAT of Region.t (* "++" *)
|
||||
CAT of Region.t (* "++" *)
|
||||
|
||||
(* Arithmetics *)
|
||||
|
||||
| MINUS of Region.t (* "-" *)
|
||||
| PLUS of Region.t (* "+" *)
|
||||
| SLASH of Region.t (* "/" *)
|
||||
| TIMES of Region.t (* "*" *)
|
||||
| MINUS of Region.t (* "-" *)
|
||||
| PLUS of Region.t (* "+" *)
|
||||
| SLASH of Region.t (* "/" *)
|
||||
| TIMES of Region.t (* "*" *)
|
||||
|
||||
(* Compounds *)
|
||||
|
||||
| LPAR of Region.t (* "(" *)
|
||||
| RPAR of Region.t (* ")" *)
|
||||
| LPAR of Region.t (* "(" *)
|
||||
| RPAR of Region.t (* ")" *)
|
||||
| LBRACKET of Region.t (* "[" *)
|
||||
| RBRACKET of Region.t (* "]" *)
|
||||
| LBRACE of Region.t (* "{" *)
|
||||
| RBRACE of Region.t (* "}" *)
|
||||
| LBRACE of Region.t (* "{" *)
|
||||
| RBRACE of Region.t (* "}" *)
|
||||
|
||||
(* Separators *)
|
||||
|
||||
| COMMA of Region.t (* "," *)
|
||||
| SEMI of Region.t (* ";" *)
|
||||
| VBAR of Region.t (* "|" *)
|
||||
| COLON of Region.t (* ":" *)
|
||||
| DOT of Region.t (* "." *)
|
||||
| COMMA of Region.t (* "," *)
|
||||
| SEMI of Region.t (* ";" *)
|
||||
| VBAR of Region.t (* "|" *)
|
||||
| COLON of Region.t (* ":" *)
|
||||
| DOT of Region.t (* "." *)
|
||||
| ELLIPSIS of Region.t (* "..." *)
|
||||
| ARROW of Region.t (* "=>" *)
|
||||
|
||||
(* Wildcard *)
|
||||
|
||||
| WILD of Region.t (* "_" *)
|
||||
| WILD of Region.t (* "_" *)
|
||||
|
||||
(* Comparisons *)
|
||||
|
||||
| EQ of Region.t (* "=" *)
|
||||
| EQEQ of Region.t (* "==" *)
|
||||
| NE of Region.t (* "!=" *)
|
||||
| LT of Region.t (* "<" *)
|
||||
| GT of Region.t (* ">" *)
|
||||
| LE of Region.t (* "=<" *)
|
||||
| GE of Region.t (* ">=" *)
|
||||
| EQ of Region.t (* "=" *)
|
||||
| EQEQ of Region.t (* "==" *)
|
||||
| NE of Region.t (* "!=" *)
|
||||
| LT of Region.t (* "<" *)
|
||||
| GT of Region.t (* ">" *)
|
||||
| LE of Region.t (* "<=" *)
|
||||
| GE of Region.t (* ">=" *)
|
||||
|
||||
| ARROW of Region.t (* "=>" *)
|
||||
(* Logic *)
|
||||
|
||||
| BOOL_OR of Region.t (* "||" *)
|
||||
| BOOL_AND of Region.t (* "&&" *)
|
||||
@ -91,18 +92,18 @@ type t =
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| Else of Region.t
|
||||
| False of Region.t
|
||||
| If of Region.t
|
||||
| Let of Region.t
|
||||
| Rec of Region.t
|
||||
| Else of Region.t
|
||||
| False of Region.t
|
||||
| If of Region.t
|
||||
| Let of Region.t
|
||||
| Mod of Region.t
|
||||
| Or of Region.t
|
||||
| Rec of Region.t
|
||||
| Switch of Region.t
|
||||
| Mod of Region.t
|
||||
| Or of Region.t
|
||||
| True of Region.t
|
||||
| Type of Region.t
|
||||
| True of Region.t
|
||||
| Type of Region.t
|
||||
|
||||
(* Data constructors *)
|
||||
(* Data constructors *)
|
||||
|
||||
| C_None of Region.t (* "None" *)
|
||||
| C_Some of Region.t (* "Some" *)
|
||||
@ -111,8 +112,6 @@ type t =
|
||||
|
||||
| EOF of Region.t (* End of file *)
|
||||
|
||||
type token = t
|
||||
|
||||
(* Projections
|
||||
|
||||
The difference between extracting the lexeme and a string from a
|
||||
@ -121,6 +120,8 @@ type token = t
|
||||
lexeme (concrete syntax).
|
||||
*)
|
||||
|
||||
type token = t
|
||||
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
@ -154,11 +155,22 @@ val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
|
@ -1,66 +1,69 @@
|
||||
{
|
||||
(* START OF HEADER *)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
(* Shorthands *)
|
||||
|
||||
module Region = Simple_utils.Region
|
||||
module Pos = Simple_utils.Pos
|
||||
module SMap = Utils.String.Map
|
||||
module SSet = Utils.String.Set
|
||||
|
||||
type lexeme = string
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
type t =
|
||||
(* Symbols *)
|
||||
|
||||
CAT of Region.t (* "++" *)
|
||||
CAT of Region.t (* "++" *)
|
||||
|
||||
(* Arithmetics *)
|
||||
|
||||
| MINUS of Region.t (* "-" *)
|
||||
| PLUS of Region.t (* "+" *)
|
||||
| SLASH of Region.t (* "/" *)
|
||||
| TIMES of Region.t (* "*" *)
|
||||
| MINUS of Region.t (* "-" *)
|
||||
| PLUS of Region.t (* "+" *)
|
||||
| SLASH of Region.t (* "/" *)
|
||||
| TIMES of Region.t (* "*" *)
|
||||
|
||||
(* Compounds *)
|
||||
|
||||
| LPAR of Region.t (* "(" *)
|
||||
| RPAR of Region.t (* ")" *)
|
||||
| LBRACKET of Region.t (* "[" *)
|
||||
| RBRACKET of Region.t (* "]" *)
|
||||
| LBRACE of Region.t (* "{" *)
|
||||
| RBRACE of Region.t (* "}" *)
|
||||
| LPAR of Region.t (* "(" *)
|
||||
| RPAR of Region.t (* ")" *)
|
||||
| LBRACKET of Region.t (* "[" *)
|
||||
| RBRACKET of Region.t (* "]" *)
|
||||
| LBRACE of Region.t (* "{" *)
|
||||
| RBRACE of Region.t (* "}" *)
|
||||
|
||||
(* Separators *)
|
||||
|
||||
| COMMA of Region.t (* "," *)
|
||||
| SEMI of Region.t (* ";" *)
|
||||
| VBAR of Region.t (* "|" *)
|
||||
| COLON of Region.t (* ":" *)
|
||||
| DOT of Region.t (* "." *)
|
||||
| COMMA of Region.t (* "," *)
|
||||
| SEMI of Region.t (* ";" *)
|
||||
| VBAR of Region.t (* "|" *)
|
||||
| COLON of Region.t (* ":" *)
|
||||
| DOT of Region.t (* "." *)
|
||||
| ELLIPSIS of Region.t (* "..." *)
|
||||
| ARROW of Region.t (* "=>" *)
|
||||
|
||||
(* Wildcard *)
|
||||
|
||||
| WILD of Region.t (* "_" *)
|
||||
| WILD of Region.t (* "_" *)
|
||||
|
||||
(* Comparisons *)
|
||||
|
||||
| EQ of Region.t (* "=" *)
|
||||
| EQEQ of Region.t (* "==" *)
|
||||
| NE of Region.t (* "!=" *)
|
||||
| LT of Region.t (* "<" *)
|
||||
| GT of Region.t (* ">" *)
|
||||
| LE of Region.t (* "<=" *)
|
||||
| GE of Region.t (* ">=" *)
|
||||
| ARROW of Region.t (* "=>" *)
|
||||
| EQ of Region.t (* "=" *)
|
||||
| EQEQ of Region.t (* "==" *)
|
||||
| NE of Region.t (* "!=" *)
|
||||
| LT of Region.t (* "<" *)
|
||||
| GT of Region.t (* ">" *)
|
||||
| LE of Region.t (* "<=" *)
|
||||
| GE of Region.t (* ">=" *)
|
||||
|
||||
| BOOL_OR of Region.t (* "||" *)
|
||||
| BOOL_AND of Region.t (* "&&" *)
|
||||
(* Logic *)
|
||||
|
||||
| NOT of Region.t (* ! *)
|
||||
| BOOL_OR of Region.t (* "||" *)
|
||||
| BOOL_AND of Region.t (* "&&" *)
|
||||
| NOT of Region.t (* ! *)
|
||||
|
||||
(* Identifiers, labels, numbers and strings *)
|
||||
|
||||
@ -75,17 +78,17 @@ type t =
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
(*| And*)
|
||||
| Else of Region.t
|
||||
| False of Region.t
|
||||
| If of Region.t
|
||||
| Let of Region.t
|
||||
| Rec of Region.t
|
||||
| Else of Region.t
|
||||
| False of Region.t
|
||||
| If of Region.t
|
||||
| Let of Region.t
|
||||
| Mod of Region.t
|
||||
| Or of Region.t
|
||||
| Rec of Region.t
|
||||
| Switch of Region.t
|
||||
| Mod of Region.t
|
||||
| Or of Region.t
|
||||
| True of Region.t
|
||||
| Type of Region.t
|
||||
| True of Region.t
|
||||
| Type of Region.t
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
| C_None of Region.t (* "None" *)
|
||||
@ -96,121 +99,143 @@ type t =
|
||||
| EOF of Region.t (* End of file *)
|
||||
|
||||
|
||||
(* Projections *)
|
||||
|
||||
type token = t
|
||||
|
||||
let proj_token = function
|
||||
CAT region -> region, "CAT"
|
||||
| MINUS region -> region, "MINUS"
|
||||
| PLUS region -> region, "PLUS"
|
||||
| SLASH region -> region, "SLASH"
|
||||
| TIMES region -> region, "TIMES"
|
||||
| LPAR region -> region, "LPAR"
|
||||
| RPAR region -> region, "RPAR"
|
||||
| LBRACKET region -> region, "LBRACKET"
|
||||
| RBRACKET region -> region, "RBRACKET"
|
||||
| LBRACE region -> region, "LBRACE"
|
||||
| RBRACE region -> region, "RBRACE"
|
||||
| COMMA region -> region, "COMMA"
|
||||
| SEMI region -> region, "SEMI"
|
||||
| VBAR region -> region, "VBAR"
|
||||
| COLON region -> region, "COLON"
|
||||
| DOT region -> region, "DOT"
|
||||
| ELLIPSIS region -> region, "ELLIPSIS"
|
||||
| WILD region -> region, "WILD"
|
||||
| EQ region -> region, "EQ"
|
||||
| EQEQ region -> region, "EQEQ"
|
||||
| NE region -> region, "NE"
|
||||
| LT region -> region, "LT"
|
||||
| GT region -> region, "GT"
|
||||
| LE region -> region, "LE"
|
||||
| GE region -> region, "GE"
|
||||
| ARROW region -> region, "ARROW"
|
||||
| BOOL_OR region -> region, "BOOL_OR"
|
||||
| BOOL_AND region -> region, "BOOL_AND"
|
||||
| Ident Region.{region; value} ->
|
||||
region, sprintf "Ident %s" value
|
||||
| Constr Region.{region; value} ->
|
||||
region, sprintf "Constr %s" value
|
||||
(* Literals *)
|
||||
|
||||
String Region.{region; value} ->
|
||||
region, sprintf "String %s" value
|
||||
| Bytes Region.{region; value = s,b} ->
|
||||
region,
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
|
||||
| Int Region.{region; value = s,n} ->
|
||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||
| Nat Region.{region; value = s,n} ->
|
||||
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
|
||||
| Mutez Region.{region; value = s,n} ->
|
||||
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
|
||||
| String Region.{region; value} ->
|
||||
region, sprintf "String %s" value
|
||||
| Bytes Region.{region; value = s,b} ->
|
||||
region,
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
||||
s (Hex.show b)
|
||||
| Else region -> region, "Else"
|
||||
| False region -> region, "False"
|
||||
| If region -> region, "If"
|
||||
| Let region -> region, "Let"
|
||||
| Rec region -> region, "Rec"
|
||||
| Switch region -> region, "Switch"
|
||||
| Mod region -> region, "Mod"
|
||||
| NOT region -> region, "!"
|
||||
| Or region -> region, "Or"
|
||||
| True region -> region, "True"
|
||||
| Type region -> region, "Type"
|
||||
| C_None region -> region, "C_None"
|
||||
| C_Some region -> region, "C_Some"
|
||||
| Attr Region.{region; value} -> region, sprintf "Attr %s" value
|
||||
| EOF region -> region, "EOF"
|
||||
| Ident Region.{region; value} ->
|
||||
region, sprintf "Ident %s" value
|
||||
| Constr Region.{region; value} ->
|
||||
region, sprintf "Constr %s" value
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| CAT region -> region, "CAT"
|
||||
| MINUS region -> region, "MINUS"
|
||||
| PLUS region -> region, "PLUS"
|
||||
| SLASH region -> region, "SLASH"
|
||||
| TIMES region -> region, "TIMES"
|
||||
| LPAR region -> region, "LPAR"
|
||||
| RPAR region -> region, "RPAR"
|
||||
| LBRACKET region -> region, "LBRACKET"
|
||||
| RBRACKET region -> region, "RBRACKET"
|
||||
| LBRACE region -> region, "LBRACE"
|
||||
| RBRACE region -> region, "RBRACE"
|
||||
| COMMA region -> region, "COMMA"
|
||||
| SEMI region -> region, "SEMI"
|
||||
| VBAR region -> region, "VBAR"
|
||||
| COLON region -> region, "COLON"
|
||||
| DOT region -> region, "DOT"
|
||||
| ELLIPSIS region -> region, "ELLIPSIS"
|
||||
| WILD region -> region, "WILD"
|
||||
| EQ region -> region, "EQ"
|
||||
| EQEQ region -> region, "EQEQ"
|
||||
| NE region -> region, "NE"
|
||||
| LT region -> region, "LT"
|
||||
| GT region -> region, "GT"
|
||||
| LE region -> region, "LE"
|
||||
| GE region -> region, "GE"
|
||||
| ARROW region -> region, "ARROW"
|
||||
| NOT region -> region, "NOT"
|
||||
| BOOL_OR region -> region, "BOOL_OR"
|
||||
| BOOL_AND region -> region, "BOOL_AND"
|
||||
| Else region -> region, "Else"
|
||||
| False region -> region, "False"
|
||||
| If region -> region, "If"
|
||||
| Let region -> region, "Let"
|
||||
| Rec region -> region, "Rec"
|
||||
| Switch region -> region, "Switch"
|
||||
| Mod region -> region, "Mod"
|
||||
| Or region -> region, "Or"
|
||||
| True region -> region, "True"
|
||||
| Type region -> region, "Type"
|
||||
| C_None region -> region, "C_None"
|
||||
| C_Some region -> region, "C_Some"
|
||||
| Attr Region.{region; value} -> region, sprintf "Attr %s" value
|
||||
| EOF region -> region, "EOF"
|
||||
|
||||
let to_lexeme = function
|
||||
CAT _ -> "++"
|
||||
| MINUS _ -> "-"
|
||||
| PLUS _ -> "+"
|
||||
| SLASH _ -> "/"
|
||||
| TIMES _ -> "*"
|
||||
| LPAR _ -> "("
|
||||
| RPAR _ -> ")"
|
||||
| LBRACKET _ -> "["
|
||||
| RBRACKET _ -> "]"
|
||||
| LBRACE _ -> "{"
|
||||
| RBRACE _ -> "}"
|
||||
| COMMA _ -> ","
|
||||
| SEMI _ -> ";"
|
||||
| VBAR _ -> "|"
|
||||
| COLON _ -> ":"
|
||||
| DOT _ -> "."
|
||||
| ELLIPSIS _ -> "..."
|
||||
| WILD _ -> "_"
|
||||
| EQ _ -> "="
|
||||
| EQEQ _ -> "=="
|
||||
| NE _ -> "!="
|
||||
| LT _ -> "<"
|
||||
| GT _ -> ">"
|
||||
| LE _ -> "<="
|
||||
| GE _ -> ">="
|
||||
| ARROW _ -> "=>"
|
||||
| BOOL_OR _ -> "||"
|
||||
| BOOL_AND _ -> "&&"
|
||||
| Ident id -> id.Region.value
|
||||
| Constr id -> id.Region.value
|
||||
(* Literals *)
|
||||
|
||||
String s -> s.Region.value
|
||||
| Bytes b -> fst b.Region.value
|
||||
| Int i
|
||||
| Nat i
|
||||
| Mutez i -> fst i.Region.value
|
||||
| String s -> s.Region.value
|
||||
| Bytes b -> fst b.Region.value
|
||||
| Else _ -> "else"
|
||||
| False _ -> "false"
|
||||
| If _ -> "if"
|
||||
| Let _ -> "let"
|
||||
| Rec _ -> "rec"
|
||||
| Mod _ -> "mod"
|
||||
| NOT _ -> "!"
|
||||
| Or _ -> "or"
|
||||
| Switch _ -> "switch"
|
||||
| True _ -> "true"
|
||||
| Type _ -> "type"
|
||||
| Mutez i -> fst i.Region.value
|
||||
| Ident id -> id.Region.value
|
||||
| Constr id -> id.Region.value
|
||||
| Attr a -> a.Region.value
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| CAT _ -> "++"
|
||||
| MINUS _ -> "-"
|
||||
| PLUS _ -> "+"
|
||||
| SLASH _ -> "/"
|
||||
| TIMES _ -> "*"
|
||||
| LPAR _ -> "("
|
||||
| RPAR _ -> ")"
|
||||
| LBRACKET _ -> "["
|
||||
| RBRACKET _ -> "]"
|
||||
| LBRACE _ -> "{"
|
||||
| RBRACE _ -> "}"
|
||||
| COMMA _ -> ","
|
||||
| SEMI _ -> ";"
|
||||
| VBAR _ -> "|"
|
||||
| COLON _ -> ":"
|
||||
| DOT _ -> "."
|
||||
| ELLIPSIS _ -> "..."
|
||||
| WILD _ -> "_"
|
||||
| EQ _ -> "="
|
||||
| EQEQ _ -> "=="
|
||||
| NE _ -> "!="
|
||||
| LT _ -> "<"
|
||||
| GT _ -> ">"
|
||||
| LE _ -> "<="
|
||||
| GE _ -> ">="
|
||||
| ARROW _ -> "=>"
|
||||
| BOOL_OR _ -> "||"
|
||||
| BOOL_AND _ -> "&&"
|
||||
| NOT _ -> "!"
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| Else _ -> "else"
|
||||
| False _ -> "false"
|
||||
| If _ -> "if"
|
||||
| Let _ -> "let"
|
||||
| Mod _ -> "mod"
|
||||
| Or _ -> "or"
|
||||
| Rec _ -> "rec"
|
||||
| Switch _ -> "switch"
|
||||
| True _ -> "true"
|
||||
| Type _ -> "type"
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
| C_None _ -> "None"
|
||||
| C_Some _ -> "Some"
|
||||
| Attr a -> a.Region.value
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF _ -> ""
|
||||
|
||||
(* CONVERSIONS *)
|
||||
|
||||
let to_string token ?(offsets=true) mode =
|
||||
let region, val_str = proj_token token in
|
||||
let reg_str = region#compact ~offsets mode
|
||||
@ -261,12 +286,9 @@ let reserved =
|
||||
|> add "functor"
|
||||
|> add "inherit"
|
||||
|> add "initializer"
|
||||
(* |> add "land" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
||||
|> add "lazy"
|
||||
(* |> add "lor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
||||
|> add "lsl"
|
||||
|> add "lsr"
|
||||
(* |> add "lxor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|
||||
|> add "match"
|
||||
|> add "method"
|
||||
|> add "module"
|
||||
@ -291,7 +313,7 @@ let reserved =
|
||||
|
||||
let constructors = [
|
||||
(fun reg -> C_None reg);
|
||||
(fun reg -> C_Some reg);
|
||||
(fun reg -> C_Some reg)
|
||||
]
|
||||
|
||||
let add map (key, value) = SMap.add key value map
|
||||
@ -376,20 +398,18 @@ let mk_int lexeme region =
|
||||
else Ok (Int Region.{region; value = lexeme, z})
|
||||
|
||||
let mk_nat lexeme region =
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme, z})
|
||||
|
||||
let mk_mutez lexeme region =
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "mutez") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||
let z = Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "mutez") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0mutez"
|
||||
then Error Non_canonical_zero
|
||||
else Ok (Mutez Region.{region; value = lexeme, z})
|
||||
|
||||
@ -426,11 +446,11 @@ let mk_sym lexeme region =
|
||||
|
||||
(* Symbols specific to ReasonLIGO *)
|
||||
|
||||
| "..." -> Ok (ELLIPSIS region)
|
||||
| "=>" -> Ok (ARROW region)
|
||||
| "==" -> Ok (EQEQ region)
|
||||
| "!" -> Ok (NOT region)
|
||||
| "++" -> Ok (CAT region)
|
||||
| "..." -> Ok (ELLIPSIS region)
|
||||
| "=>" -> Ok (ARROW region)
|
||||
| "==" -> Ok (EQEQ region)
|
||||
| "!" -> Ok (NOT region)
|
||||
| "++" -> Ok (CAT region)
|
||||
|
||||
(* Invalid symbols *)
|
||||
|
||||
@ -460,75 +480,72 @@ let mk_attr header lexeme region =
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
let is_string = function String _ -> true | _ -> false
|
||||
let is_bytes = function Bytes _ -> true | _ -> false
|
||||
let is_int = function Int _ -> true | _ -> false
|
||||
let is_ident = function Ident _ -> true | _ -> false
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let is_minus = function MINUS _ -> true | _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
(* Errors *)
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
type error =
|
||||
Odd_lengthed_bytes
|
||||
| Missing_break
|
||||
| Negative_byte_sequence
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
let error_to_string = function
|
||||
Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit."
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space."
|
||||
| Negative_byte_sequence ->
|
||||
"Negative byte sequence.\n\
|
||||
Hint: Remove the leading minus sign."
|
||||
|
||||
let is_kwd = function
|
||||
Else _
|
||||
| False _
|
||||
| If _
|
||||
| Let _
|
||||
| Rec _
|
||||
| Switch _
|
||||
| Mod _
|
||||
| Or _
|
||||
| True _
|
||||
| Type _ -> true
|
||||
| _ -> false
|
||||
exception Error of error Region.reg
|
||||
|
||||
let is_constr = function
|
||||
Constr _
|
||||
| Ident _
|
||||
| False _
|
||||
| True _ -> true
|
||||
| _ -> false
|
||||
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
and reg = region#to_string ~file ~offsets mode in
|
||||
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||
in Region.{value; region}
|
||||
|
||||
let is_sym = function
|
||||
CAT _
|
||||
| MINUS _
|
||||
| PLUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| COMMA _
|
||||
| SEMI _
|
||||
| VBAR _
|
||||
| COLON _
|
||||
| DOT _
|
||||
| ELLIPSIS _
|
||||
| WILD _
|
||||
| EQ _
|
||||
| EQEQ _
|
||||
| NE _
|
||||
| LT _
|
||||
| GT _
|
||||
| LE _
|
||||
| GE _
|
||||
| ARROW _
|
||||
| BOOL_OR _
|
||||
| NOT _
|
||||
| BOOL_AND _ -> true
|
||||
| _ -> false
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let check_right_context token next_token buffer : unit =
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
match next_token buffer with
|
||||
None -> ()
|
||||
| Some (markup, next) ->
|
||||
if is_minus token && is_bytes next
|
||||
then let region =
|
||||
Region.cover (to_region token) (to_region next)
|
||||
in fail region Negative_byte_sequence
|
||||
else
|
||||
match markup with
|
||||
[] ->
|
||||
if is_int token
|
||||
then if is_string next || is_ident next
|
||||
then fail region Missing_break
|
||||
else ()
|
||||
else
|
||||
if is_string token
|
||||
then if is_int next || is_bytes next || is_ident next
|
||||
then fail region Missing_break
|
||||
else ()
|
||||
else
|
||||
if is_bytes token
|
||||
then if is_string next || is_ident next
|
||||
then fail region Missing_break
|
||||
else if is_int next
|
||||
then fail region Odd_lengthed_bytes
|
||||
else ()
|
||||
else ()
|
||||
| _::_ -> ()
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"/*" ~closing:"*/"
|
||||
in read ~block ~line:"//" ".religo"
|
||||
end
|
||||
|
||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
let () =
|
||||
match M.trace () with
|
||||
Stdlib.Ok () -> ()
|
||||
| Error Region.{value; _} -> Utils.highlight value
|
||||
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value
|
||||
|
@ -40,11 +40,11 @@ let rec curry hd = function
|
||||
in TFun {value; region}
|
||||
| [] -> hd
|
||||
|
||||
let wild_error e =
|
||||
match e with
|
||||
| EVar { value = "_"; _} as e ->
|
||||
let wild_error e =
|
||||
match e with
|
||||
| EVar { value = "_"; _} as e ->
|
||||
let open! SyntaxError in
|
||||
raise (Error (InvalidWild e))
|
||||
raise (Error (InvalidWild e))
|
||||
| _ -> ()
|
||||
|
||||
(* END HEADER *)
|
||||
@ -270,30 +270,30 @@ let_declaration:
|
||||
|
||||
let_binding:
|
||||
"<ident>" type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Scoping.check_reserved_name $1;
|
||||
{binders = PVar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| "_" type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
{binders = PWild $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| unit type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
{binders = PUnit $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| record_pattern type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Scoping.check_pattern (PRecord $1);
|
||||
{binders = PRecord $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| par(closed_irrefutable) type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Scoping.check_pattern $1.value.inside;
|
||||
{binders = PPar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| tuple(sub_irrefutable) type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Utils.nsepseq_iter Scoping.check_pattern $1;
|
||||
let hd, tl = $1 in
|
||||
let start = pattern_to_region hd in
|
||||
@ -422,7 +422,7 @@ expr:
|
||||
base_cond__open(expr) | switch_expr(base_cond) { $1 }
|
||||
|
||||
base_cond__open(x):
|
||||
base_expr(x) | conditional(expr_with_let_expr) {
|
||||
base_expr(x) | conditional(expr_with_let_expr) {
|
||||
wild_error $1;
|
||||
$1 }
|
||||
|
||||
@ -460,11 +460,11 @@ fun_expr:
|
||||
let region = cover start stop in
|
||||
|
||||
let rec arg_to_pattern = function
|
||||
EVar v ->
|
||||
EVar v ->
|
||||
if v.value = "_" then
|
||||
PWild v.region
|
||||
else (
|
||||
Scoping.check_reserved_name v;
|
||||
Scoping.check_reserved_name v;
|
||||
PVar v
|
||||
)
|
||||
| EAnnot {region; value = {inside = EVar v, colon, typ; _}} ->
|
||||
@ -895,7 +895,7 @@ path:
|
||||
| projection { Path $1 }
|
||||
|
||||
update_record:
|
||||
"{""..."path "," sep_or_term_list(field_path_assignment,",") "}" {
|
||||
"{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" {
|
||||
let region = cover $1 $6 in
|
||||
let ne_elements, terminator = $5 in
|
||||
let value = {
|
||||
@ -913,7 +913,7 @@ expr_with_let_expr:
|
||||
expr { $1 }
|
||||
| let_expr(expr_with_let_expr) { $1 }
|
||||
|
||||
more_field_assignments:
|
||||
more_field_assignments:
|
||||
"," sep_or_term_list(field_assignment_punning,",") {
|
||||
let elts, _region = $2 in
|
||||
$1, elts
|
||||
@ -926,9 +926,9 @@ sequence_or_record_in:
|
||||
PaSequence {s_elts = elts; s_terminator=None}
|
||||
}
|
||||
| field_assignment more_field_assignments? {
|
||||
match $2 with
|
||||
| Some (comma, elts) ->
|
||||
let r_elts = Utils.nsepseq_cons $1 comma elts in
|
||||
match $2 with
|
||||
| Some (comma, elts) ->
|
||||
let r_elts = Utils.nsepseq_cons $1 comma elts in
|
||||
PaRecord {r_elts; r_terminator = None}
|
||||
| None ->
|
||||
PaRecord {r_elts = ($1, []); r_terminator = None}
|
||||
@ -961,15 +961,15 @@ sequence_or_record:
|
||||
terminator = r.r_terminator}
|
||||
in ERecord {region; value}}
|
||||
|
||||
field_assignment_punning:
|
||||
(* This can only happen with multiple fields -
|
||||
field_assignment_punning:
|
||||
(* This can only happen with multiple fields -
|
||||
one item punning does NOT work in ReasonML *)
|
||||
field_name {
|
||||
field_name {
|
||||
let value = {
|
||||
field_name = $1;
|
||||
assignment = ghost;
|
||||
field_expr = EVar $1 }
|
||||
in
|
||||
in
|
||||
{$1 with value}
|
||||
}
|
||||
| field_assignment {
|
||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"/*" ~closing:"*/"
|
||||
in read ~block ~line:"//" ".religo"
|
||||
end
|
||||
|
||||
module SubIO =
|
||||
@ -14,7 +17,8 @@ module SubIO =
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : EvalOpt.language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -26,7 +30,8 @@ module SubIO =
|
||||
method libs = IO.options#libs
|
||||
method verbose = IO.options#verbose
|
||||
method offsets = IO.options#offsets
|
||||
method lang = IO.options#lang
|
||||
method block = IO.options#block
|
||||
method line = IO.options#line
|
||||
method ext = IO.options#ext
|
||||
method mode = IO.options#mode
|
||||
method cmd = IO.options#cmd
|
||||
@ -37,7 +42,8 @@ module SubIO =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -1,7 +0,0 @@
|
||||
$HOME/git/OCaml-build/Makefile
|
||||
$HOME/git/OCaml-build/Makefile.cfg
|
||||
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml
|
@ -5,37 +5,42 @@
|
||||
|
||||
type command = Quiet | Copy | Units | Tokens
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
let lang_to_string = function
|
||||
`PascaLIGO -> "PascaLIGO"
|
||||
| `CameLIGO -> "CameLIGO"
|
||||
| `ReasonLIGO -> "ReasonLIGO"
|
||||
|
||||
(* The type [options] gathers the command-line options. *)
|
||||
|
||||
module SSet = Set.Make (String)
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
let mk_block ~opening ~closing : block_comment =
|
||||
object
|
||||
method opening = opening
|
||||
method closing = closing
|
||||
end
|
||||
|
||||
type options = <
|
||||
input : string option;
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : command;
|
||||
mono : bool;
|
||||
expr : bool
|
||||
>
|
||||
|
||||
let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options =
|
||||
let make ~input ~libs ~verbose ~offsets ?block
|
||||
?line ~ext ~mode ~cmd ~mono ~expr : options =
|
||||
object
|
||||
method input = input
|
||||
method libs = libs
|
||||
method verbose = verbose
|
||||
method offsets = offsets
|
||||
method lang = lang
|
||||
method block = block
|
||||
method line = line
|
||||
method ext = ext
|
||||
method mode = mode
|
||||
method cmd = cmd
|
||||
@ -58,10 +63,10 @@ let abort msg =
|
||||
|
||||
(* Help *)
|
||||
|
||||
let help language extension () =
|
||||
let help extension () =
|
||||
let file = Filename.basename Sys.argv.(0) in
|
||||
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
|
||||
printf "where <input>%s is the %s source file (default: stdin),\n" extension language;
|
||||
printf "where <input>%s is the LIGO source file (default: stdin),\n" extension;
|
||||
print "and each <option> (if any) is one of the following:";
|
||||
print " -I <paths> Library paths (colon-separated)";
|
||||
print " -t, --tokens Print tokens";
|
||||
@ -105,8 +110,7 @@ let add_verbose d =
|
||||
!verbose
|
||||
(split_at_colon d)
|
||||
|
||||
let specs language extension =
|
||||
let language = lang_to_string language in
|
||||
let specs extension =
|
||||
let open! Getopt in [
|
||||
'I', nolong, None, Some add_path;
|
||||
'c', "copy", set copy true, None;
|
||||
@ -118,7 +122,7 @@ let specs language extension =
|
||||
noshort, "mono", set mono true, None;
|
||||
noshort, "expr", set expr true, None;
|
||||
noshort, "verbose", None, Some add_verbose;
|
||||
'h', "help", Some (help language extension), None;
|
||||
'h', "help", Some (help extension), None;
|
||||
noshort, "version", Some version, None
|
||||
]
|
||||
;;
|
||||
@ -156,7 +160,7 @@ let print_opt () =
|
||||
printf "input = %s\n" (string_of quote !input);
|
||||
printf "libs = %s\n" (string_of_path !libs)
|
||||
|
||||
let check lang ext =
|
||||
let check ?block ?line ~ext =
|
||||
let () =
|
||||
if SSet.mem "cli" !verbose then print_opt () in
|
||||
|
||||
@ -209,16 +213,19 @@ let check lang ext =
|
||||
| false, false, false, true -> Tokens
|
||||
| _ -> abort "Choose one of -q, -c, -u, -t."
|
||||
|
||||
in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono ~expr ~lang ~ext
|
||||
in make ~input ~libs ~verbose ~offsets ~mode
|
||||
~cmd ~mono ~expr ?block ?line ~ext
|
||||
|
||||
(* Parsing the command-line options *)
|
||||
|
||||
let read ~lang ~ext =
|
||||
type extension = string
|
||||
|
||||
let read ?block ?line (ext: extension) =
|
||||
try
|
||||
Getopt.parse_cmdline (specs lang ext) anonymous;
|
||||
Getopt.parse_cmdline (specs ext) anonymous;
|
||||
(verb_str :=
|
||||
let apply e a =
|
||||
if a = "" then e else Printf.sprintf "%s, %s" e a
|
||||
in SSet.fold apply !verbose "");
|
||||
check lang ext
|
||||
check ?block ?line ~ext
|
||||
with Getopt.Error msg -> abort msg
|
||||
|
@ -49,19 +49,21 @@ type command = Quiet | Copy | Units | Tokens
|
||||
expected.}
|
||||
} *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
val lang_to_string : language -> string
|
||||
|
||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
val mk_block : opening:string -> closing:string -> block_comment
|
||||
|
||||
type options = <
|
||||
input : string option;
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : command;
|
||||
mono : bool;
|
||||
@ -73,7 +75,8 @@ val make :
|
||||
libs:string list ->
|
||||
verbose:SSet.t ->
|
||||
offsets:bool ->
|
||||
lang:language ->
|
||||
?block:block_comment ->
|
||||
?line:line_comment ->
|
||||
ext:string ->
|
||||
mode:[`Byte | `Point] ->
|
||||
cmd:command ->
|
||||
@ -81,8 +84,9 @@ val make :
|
||||
expr:bool ->
|
||||
options
|
||||
|
||||
(** Parsing the command-line options on stdin. The first parameter is
|
||||
the name of the concrete syntax, e.g., [PascaLIGO], and the second
|
||||
is the expected file extension, e.g., ".ligo". *)
|
||||
(** Parsing the command-line options on stdin. *)
|
||||
|
||||
val read : lang:language -> ext:string -> options
|
||||
type extension = string
|
||||
|
||||
val read :
|
||||
?block:block_comment -> ?line:line_comment -> extension -> options
|
||||
|
@ -38,8 +38,6 @@
|
||||
module Region = Simple_utils.Region
|
||||
module Pos = Simple_utils.Pos
|
||||
|
||||
type lexeme = string
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||
@ -54,6 +52,8 @@ type lexeme = string
|
||||
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
||||
*)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
module type TOKEN =
|
||||
sig
|
||||
type token
|
||||
@ -82,13 +82,6 @@ module type TOKEN =
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Projections *)
|
||||
@ -96,73 +89,41 @@ module type TOKEN =
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
error Region.reg ->
|
||||
file:bool ->
|
||||
string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
end
|
||||
|
||||
(* The module type for lexers is [S]. It mainly exports the function
|
||||
[open_token_stream], which returns
|
||||
|
||||
* a function [read] that extracts tokens from a lexing buffer,
|
||||
together with a lexing buffer [buffer] to read from,
|
||||
* a function [close] that closes that buffer,
|
||||
* a function [get_pos] that returns the current position, and
|
||||
* a function [get_last] that returns the region of the last
|
||||
recognised token.
|
||||
* a function [get_file] that returns the name of the file being scanned
|
||||
(empty string if [stdin]).
|
||||
|
||||
Note that a module [Token] is exported too, because the signature
|
||||
of the exported functions depend on it.
|
||||
|
||||
The call [read ~log] evaluates in a lexer (also known as a
|
||||
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
|
||||
suitable for a parser generated by Menhir. The argument labelled
|
||||
[log] is a logger, that is, it may print a token and its left
|
||||
markup to a given channel, at the caller's discretion.
|
||||
*)
|
||||
(* The signature of the lexer *)
|
||||
|
||||
module type S =
|
||||
sig
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
type file_path = string
|
||||
type logger = Markup.t list -> token -> unit
|
||||
(* The scanner *)
|
||||
|
||||
type window =
|
||||
Nil
|
||||
| One of token
|
||||
| Two of token * token
|
||||
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||
|
||||
val slide : token -> window -> window
|
||||
|
||||
type input =
|
||||
File of file_path
|
||||
| String of string
|
||||
| Channel of in_channel
|
||||
| Buffer of Lexing.lexbuf
|
||||
|
||||
type instance = {
|
||||
input : input;
|
||||
read : log:logger -> Lexing.lexbuf -> token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_win : unit -> window;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
get_file : unit -> file_path;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
type open_err = File_opening of string
|
||||
|
||||
val lexbuf_from_input :
|
||||
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
val open_token_stream :
|
||||
language -> input -> (instance, open_err) Stdlib.result
|
||||
|
||||
(* Error reporting *)
|
||||
(* Errors (specific to the generic lexer, not to the tokens) *)
|
||||
|
||||
type error
|
||||
|
||||
@ -173,7 +134,6 @@ module type S =
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
end
|
||||
|
||||
(* The functorised interface
|
||||
@ -182,4 +142,4 @@ module type S =
|
||||
submodule in [S].
|
||||
*)
|
||||
|
||||
module Make (Token: TOKEN) : S with module Token = Token
|
||||
module Make (Token : TOKEN) : S with module Token = Token
|
||||
|
File diff suppressed because it is too large
Load Diff
387
src/passes/1-parser/shared/LexerLib.ml
Normal file
387
src/passes/1-parser/shared/LexerLib.ml
Normal file
@ -0,0 +1,387 @@
|
||||
module Region = Simple_utils.Region
|
||||
module Pos = Simple_utils.Pos
|
||||
|
||||
(* LEXER ENGINE *)
|
||||
|
||||
(* Resetting file name and line number in the lexing buffer
|
||||
|
||||
The call [reset ~file ~line buffer] modifies in-place the lexing
|
||||
buffer [buffer] so the lexing engine records that the file
|
||||
associated with [buffer] is named [file], and the current line is
|
||||
[line]. This function is useful when lexing a file that has been
|
||||
previously preprocessed by the C preprocessor, in which case the
|
||||
argument [file] is the name of the file that was preprocessed,
|
||||
_not_ the preprocessed file (of which the user is not normally
|
||||
aware). By default, the [line] argument is [1].
|
||||
*)
|
||||
|
||||
let reset_file ~file buffer =
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||
|
||||
let reset_line ~line buffer =
|
||||
assert (line >= 0);
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
|
||||
|
||||
let reset_offset ~offset buffer =
|
||||
assert (offset >= 0);
|
||||
let open Lexing in
|
||||
let bol = buffer.lex_curr_p.pos_bol in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
|
||||
|
||||
let reset ?file ?line ?offset buffer =
|
||||
let () =
|
||||
match file with
|
||||
Some file -> reset_file ~file buffer
|
||||
| None -> () in
|
||||
let () =
|
||||
match line with
|
||||
Some line -> reset_line ~line buffer
|
||||
| None -> () in
|
||||
match offset with
|
||||
Some offset -> reset_offset ~offset buffer
|
||||
| None -> ()
|
||||
|
||||
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||
|
||||
let rollback buffer =
|
||||
let open Lexing in
|
||||
let len = String.length (lexeme buffer) in
|
||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||
|
||||
(* Utility types *)
|
||||
|
||||
type file_path = string
|
||||
type lexeme = string
|
||||
|
||||
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||
|
||||
type thread = <
|
||||
opening : Region.t;
|
||||
length : int;
|
||||
acc : char list;
|
||||
to_string : string;
|
||||
push_char : char -> thread;
|
||||
push_string : string -> thread;
|
||||
set_opening : Region.t -> thread
|
||||
>
|
||||
|
||||
let mk_thread region lexeme : thread =
|
||||
(* The call [explode s a] is the list made by pushing the characters
|
||||
in the string [s] on top of [a], in reverse order. For example,
|
||||
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
||||
|
||||
let explode s acc =
|
||||
let rec push = function
|
||||
0 -> acc
|
||||
| i -> s.[i-1] :: push (i-1)
|
||||
in push (String.length s) in
|
||||
object
|
||||
val opening = region
|
||||
method opening = opening
|
||||
|
||||
val length = String.length lexeme
|
||||
method length = length
|
||||
|
||||
val acc = explode lexeme []
|
||||
method acc = acc
|
||||
|
||||
method set_opening opening = {< opening; length; acc >}
|
||||
|
||||
method push_char char =
|
||||
{< opening; length=length+1; acc=char::acc >}
|
||||
|
||||
method push_string str =
|
||||
{< opening;
|
||||
length = length + String.length str;
|
||||
acc = explode str acc >}
|
||||
|
||||
(* The value of [thread#to_string] is a string of length
|
||||
[thread#length] containing the [thread#length] characters in
|
||||
the list [thread#acc], in reverse order. For instance,
|
||||
[thread#to_string = "abc"] if [thread#length = 3] and
|
||||
[thread#acc = ['c';'b';'a']]. *)
|
||||
|
||||
method to_string =
|
||||
let bytes = Bytes.make length ' ' in
|
||||
let rec fill i = function
|
||||
[] -> bytes
|
||||
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
||||
in fill (length-1) acc |> Bytes.to_string
|
||||
end
|
||||
|
||||
(* STATE *)
|
||||
|
||||
(* Scanning the lexing buffer for tokens (and markup, as a
|
||||
side-effect).
|
||||
|
||||
Because we want the lexer to have access to the right lexical
|
||||
context of a recognised lexeme (to enforce stylistic constraints or
|
||||
report special error patterns), we need to keep a hidden reference
|
||||
to a queue of recognised lexical units (that is, tokens and markup)
|
||||
that acts as a mutable state between the calls to [read]. When
|
||||
[read] is called, that queue is examined first and, if it contains
|
||||
at least one token, that token is returned; otherwise, the lexing
|
||||
buffer is scanned for at least one more new token. That is the
|
||||
general principle: we put a high-level buffer (our queue) on top of
|
||||
the low-level lexing buffer.
|
||||
|
||||
One tricky and important detail is that we must make any parser
|
||||
generated by Menhir (and calling [read]) believe that the last
|
||||
region of the input source that was matched indeed corresponds to
|
||||
the returned token, despite that many tokens and markup may have
|
||||
been matched since it was actually read from the input. In other
|
||||
words, the parser requests a token that is taken from the
|
||||
high-level buffer, but the parser requests the source regions from
|
||||
the _low-level_ lexing buffer, and they may disagree if more than
|
||||
one token has actually been recognised.
|
||||
|
||||
Consequently, in order to maintain a consistent view for the
|
||||
parser, we have to patch some fields of the lexing buffer, namely
|
||||
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
|
||||
generated by Menhir when querying source positions (regions). This
|
||||
is the purpose of the function [patch_buffer]. After reading one or
|
||||
more tokens and markup by the scanning rule [scan], we have to save
|
||||
in the hidden reference [buf_reg] the region of the source that was
|
||||
matched by [scan]. This atomic sequence of patching, scanning and
|
||||
saving is implemented by the _function_ [scan] (beware: it shadows
|
||||
the scanning rule [scan]). The function [patch_buffer] is, of
|
||||
course, also called just before returning the token, so the parser
|
||||
has a view of the lexing buffer consistent with the token.
|
||||
*)
|
||||
|
||||
type 'token window =
|
||||
Nil
|
||||
| One of 'token
|
||||
| Two of 'token * 'token
|
||||
|
||||
type 'token state = <
|
||||
units : (Markup.t list * 'token) FQueue.t;
|
||||
markup : Markup.t list;
|
||||
window : 'token window;
|
||||
last : Region.t;
|
||||
pos : Pos.t;
|
||||
decoder : Uutf.decoder;
|
||||
supply : Bytes.t -> int -> int -> unit;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
|
||||
enqueue : 'token -> 'token state;
|
||||
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
|
||||
set_last : Region.t -> 'token state;
|
||||
set_pos : Pos.t -> 'token state;
|
||||
slide_token : 'token -> 'token state;
|
||||
|
||||
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
|
||||
|
||||
push_newline : Lexing.lexbuf -> 'token state;
|
||||
push_line : thread -> 'token state;
|
||||
push_block : thread -> 'token state;
|
||||
push_space : Lexing.lexbuf -> 'token state;
|
||||
push_tabs : Lexing.lexbuf -> 'token state;
|
||||
push_bom : Lexing.lexbuf -> 'token state;
|
||||
push_markup : Markup.t -> 'token state;
|
||||
>
|
||||
|
||||
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
|
||||
?block ?line () : _ state =
|
||||
object (self)
|
||||
val units = units
|
||||
method units = units
|
||||
val markup = markup
|
||||
method markup = markup
|
||||
val window = window
|
||||
method window = window
|
||||
val last = last
|
||||
method last = last
|
||||
val pos = pos
|
||||
method pos = pos
|
||||
method decoder = decoder
|
||||
method supply = supply
|
||||
method block = block
|
||||
method line = line
|
||||
|
||||
method enqueue token =
|
||||
{< units = FQueue.enq (markup, token) units;
|
||||
markup = [] >}
|
||||
|
||||
method set_units units = {< units = units >}
|
||||
method set_last region = {< last = region >}
|
||||
method set_pos pos = {< pos = pos >}
|
||||
|
||||
method slide_token token =
|
||||
match self#window with
|
||||
Nil -> {< window = One token >}
|
||||
| One t | Two (t,_) -> {< window = Two (token,t) >}
|
||||
|
||||
method sync buffer =
|
||||
let lex = Lexing.lexeme buffer in
|
||||
let len = String.length lex in
|
||||
let start = pos in
|
||||
let stop = start#shift_bytes len in
|
||||
let state = {< pos = stop >}
|
||||
in Region.make ~start ~stop, lex, state
|
||||
|
||||
(* MARKUP *)
|
||||
|
||||
(* Committing markup to the current logical state *)
|
||||
|
||||
method push_markup unit = {< markup = unit :: markup >}
|
||||
|
||||
method push_newline buffer =
|
||||
let () = Lexing.new_line buffer in
|
||||
let value = Lexing.lexeme buffer in
|
||||
let start = self#pos in
|
||||
let stop = start#new_line value in
|
||||
let region = Region.make ~start ~stop in
|
||||
let unit = Markup.Newline Region.{region; value}
|
||||
in {< pos = stop; markup = unit::markup >}
|
||||
|
||||
method push_line thread =
|
||||
let start = thread#opening#start in
|
||||
let region = Region.make ~start ~stop:self#pos
|
||||
and value = thread#to_string in
|
||||
let unit = Markup.LineCom Region.{region; value}
|
||||
in {< markup = unit::markup >}
|
||||
|
||||
method push_block thread =
|
||||
let start = thread#opening#start in
|
||||
let region = Region.make ~start ~stop:self#pos
|
||||
and value = thread#to_string in
|
||||
let unit = Markup.BlockCom Region.{region; value}
|
||||
in {< markup = unit::markup >}
|
||||
|
||||
method push_space buffer =
|
||||
let region, lex, state = self#sync buffer in
|
||||
let value = String.length lex in
|
||||
let unit = Markup.Space Region.{region; value}
|
||||
in state#push_markup unit
|
||||
|
||||
method push_tabs buffer =
|
||||
let region, lex, state = self#sync buffer in
|
||||
let value = String.length lex in
|
||||
let unit = Markup.Tabs Region.{region; value}
|
||||
in state#push_markup unit
|
||||
|
||||
method push_bom buffer =
|
||||
let region, value, state = self#sync buffer in
|
||||
let unit = Markup.BOM Region.{region; value}
|
||||
in state#push_markup unit
|
||||
end
|
||||
|
||||
(* LEXER INSTANCE *)
|
||||
|
||||
type input =
|
||||
File of file_path
|
||||
| String of string
|
||||
| Channel of in_channel
|
||||
| Buffer of Lexing.lexbuf
|
||||
|
||||
type 'token logger = Markup.t list -> 'token -> unit
|
||||
|
||||
type 'token instance = {
|
||||
input : input;
|
||||
read : log:('token logger) -> Lexing.lexbuf -> 'token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_win : unit -> 'token window;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
get_file : unit -> file_path;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
type open_err = File_opening of string
|
||||
|
||||
let lexbuf_from_input = function
|
||||
String s ->
|
||||
Ok (Lexing.from_string s, fun () -> ())
|
||||
| Channel chan ->
|
||||
let close () = close_in chan in
|
||||
Ok (Lexing.from_channel chan, close)
|
||||
| Buffer b ->
|
||||
Ok (b, fun () -> ())
|
||||
| File path ->
|
||||
try
|
||||
let chan = open_in path in
|
||||
let close () = close_in chan in
|
||||
let lexbuf = Lexing.from_channel chan in
|
||||
let () =
|
||||
let open Lexing in
|
||||
lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
|
||||
in Ok (lexbuf, close)
|
||||
with Sys_error msg -> Stdlib.Error (File_opening msg)
|
||||
|
||||
let open_token_stream ?line ?block ~scan
|
||||
~token_to_region ~style input =
|
||||
let file_path = match input with
|
||||
File path -> path
|
||||
| _ -> "" in
|
||||
let pos = Pos.min ~file:file_path in
|
||||
let buf_reg = ref (pos#byte, pos#byte)
|
||||
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
||||
let supply = Uutf.Manual.src decoder in
|
||||
let state = ref (mk_state
|
||||
~units:FQueue.empty
|
||||
~last:Region.ghost
|
||||
~window:Nil
|
||||
~pos
|
||||
~markup:[]
|
||||
~decoder
|
||||
~supply
|
||||
?block
|
||||
?line
|
||||
()) in
|
||||
let get_pos () = !state#pos
|
||||
and get_last () = !state#last
|
||||
and get_win () = !state#window
|
||||
and get_file () = file_path in
|
||||
|
||||
let patch_buffer (start, stop) buffer =
|
||||
let open Lexing in
|
||||
let file_path = buffer.lex_curr_p.pos_fname in
|
||||
buffer.lex_start_p <- {start with pos_fname = file_path};
|
||||
buffer.lex_curr_p <- {stop with pos_fname = file_path}
|
||||
|
||||
and save_region buffer =
|
||||
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
||||
|
||||
let scan' scan buffer =
|
||||
patch_buffer !buf_reg buffer;
|
||||
state := scan !state buffer;
|
||||
save_region buffer in
|
||||
|
||||
let next_token scan buffer =
|
||||
scan' scan buffer;
|
||||
match FQueue.peek !state#units with
|
||||
None -> None
|
||||
| Some (units, ext_token) ->
|
||||
state := !state#set_units units; Some ext_token in
|
||||
|
||||
let rec read scan ~token_to_region ~style ~log buffer =
|
||||
match FQueue.deq !state#units with
|
||||
None ->
|
||||
scan' scan buffer;
|
||||
read scan ~token_to_region ~style ~log buffer
|
||||
| Some (units, (left_mark, token)) ->
|
||||
log left_mark token;
|
||||
state := ((!state#set_units units)
|
||||
#set_last (token_to_region token))
|
||||
#slide_token token;
|
||||
style token (next_token scan) buffer;
|
||||
patch_buffer (token_to_region token)#byte_pos buffer;
|
||||
token in
|
||||
|
||||
match lexbuf_from_input input with
|
||||
Ok (buffer, close) ->
|
||||
let () =
|
||||
match input with
|
||||
File path when path <> "" -> reset ~file:path buffer
|
||||
| _ -> () in
|
||||
let instance = {
|
||||
read = read scan ~token_to_region ~style;
|
||||
input; buffer; get_win; get_pos; get_last; get_file; close}
|
||||
in Ok instance
|
||||
| Error _ as e -> e
|
203
src/passes/1-parser/shared/LexerLib.mli
Normal file
203
src/passes/1-parser/shared/LexerLib.mli
Normal file
@ -0,0 +1,203 @@
|
||||
(* A library for writing UTF8-aware lexers *)
|
||||
|
||||
module Region = Simple_utils.Region
|
||||
module Pos = Simple_utils.Pos
|
||||
|
||||
(* The function [rollback] resets the lexing buffer to the state it
|
||||
was when it matched the last regular expression. This function is
|
||||
safe to use only in the semantic action of the rule which last
|
||||
matched. *)
|
||||
|
||||
val rollback : Lexing.lexbuf -> unit
|
||||
|
||||
(* Utility types *)
|
||||
|
||||
type file_path = string
|
||||
type lexeme = string
|
||||
|
||||
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||
|
||||
(* When scanning structured constructs, like strings and comments, we
|
||||
need to keep the region of the opening symbol (like double quote,
|
||||
"//" or "(*") in order to report any error more precisely. Since
|
||||
ocamllex is byte-oriented, we need to store the parsed bytes as
|
||||
characters in an accumulator [acc] and also its length [len], so,
|
||||
we are done, it is easy to build the string making up the
|
||||
structured construct with [mk_str] (see above).
|
||||
|
||||
The resulting data structure is called a _thread_. (Note for
|
||||
Emacs: "*)".)
|
||||
*)
|
||||
|
||||
type thread = <
|
||||
opening : Region.t;
|
||||
length : int;
|
||||
acc : char list;
|
||||
to_string : string;
|
||||
push_char : char -> thread;
|
||||
push_string : string -> thread;
|
||||
set_opening : Region.t -> thread
|
||||
>
|
||||
|
||||
val mk_thread : Region.t -> lexeme -> thread
|
||||
|
||||
(* STATE *)
|
||||
|
||||
(* Beyond producing tokens, the result of lexing is a _state_. The
|
||||
type [state] represents the abstract logical state of the lexing
|
||||
engine, that is, a value which is threaded during scanning and
|
||||
which denotes useful, high-level information beyond what the type
|
||||
[Lexing.lexbuf] in the standard library already provides for all
|
||||
generic lexers. We qualify it as "logical state" because the lexing
|
||||
buffer itself has a "physical state" defined by the type
|
||||
[Lexing.lexbuf].
|
||||
|
||||
Tokens are the smallest units used by the parser to build the
|
||||
abstract syntax tree. The state includes a queue of recognised
|
||||
tokens, with the markup at the left of its lexeme until either the
|
||||
start of the file or the end of the previously recognised token.
|
||||
|
||||
The markup from the last recognised token or, if the first token
|
||||
has not been recognised yet, from the beginning of the file is
|
||||
stored in the field [markup] of the state because it is a
|
||||
side-effect, with respect to the output token list, and we use a
|
||||
record with a single field [units] because that record may be
|
||||
easily extended during the future maintenance of this lexer.
|
||||
|
||||
The state also includes a field [pos] which holds the current
|
||||
position in the LIGO source file. The position is not always
|
||||
updated after a single character has been matched: that depends on
|
||||
the regular expression that matched the lexing buffer.
|
||||
|
||||
The field [window] is a two-token window, that is, a buffer that
|
||||
contains the last recognised token, and the penultimate (if any).
|
||||
Technically, it is a parametric type, but its use is meant for
|
||||
tokens, wherever they are defined. In [Two (t1,t2)], and in case
|
||||
of a syntax error, [t1] is the first invalid token and [t2] is the
|
||||
last valid one.
|
||||
|
||||
The fields [decoder] and [supply] offer the support needed for
|
||||
the lexing of UTF-8 encoded characters in comments (the only place
|
||||
where they are allowed in LIGO). The former is the decoder proper
|
||||
and the latter is the effectful function [supply] that takes a
|
||||
byte, a start index and a length and feed it to [decoder]. See the
|
||||
documentation of the third-party library Uutf.
|
||||
|
||||
Some methods are now documented.
|
||||
|
||||
The call [state#enqueue token] updates functionally the state
|
||||
[state] by associating the token [token] with the stored markup and
|
||||
enqueuing the pair into the units queue. The field [markup] is then
|
||||
reset to the empty list.
|
||||
|
||||
The call [state#slide_token token] pushes the token [token] in
|
||||
the buffer [buffer]. If the buffer is full, that is, it is [Two
|
||||
(t1,t2)], then the token [t2] is discarded to make room for
|
||||
[token].
|
||||
|
||||
The call [state#sync buffer] updates the current position in
|
||||
accordance with the contents of the lexing buffer, more precisely,
|
||||
depending on the length of the string which has just been
|
||||
recognised by the scanner: that length is used as a positive offset
|
||||
to the current column.
|
||||
*)
|
||||
|
||||
type 'token window =
|
||||
Nil
|
||||
| One of 'token
|
||||
| Two of 'token * 'token
|
||||
|
||||
type 'token state = <
|
||||
units : (Markup.t list * 'token) FQueue.t;
|
||||
markup : Markup.t list;
|
||||
window : 'token window;
|
||||
last : Region.t;
|
||||
pos : Pos.t;
|
||||
decoder : Uutf.decoder;
|
||||
supply : Bytes.t -> int -> int -> unit;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
|
||||
enqueue : 'token -> 'token state;
|
||||
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
|
||||
set_last : Region.t -> 'token state;
|
||||
set_pos : Pos.t -> 'token state;
|
||||
slide_token : 'token -> 'token state;
|
||||
|
||||
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
|
||||
|
||||
push_newline : Lexing.lexbuf -> 'token state;
|
||||
push_line : thread -> 'token state;
|
||||
push_block : thread -> 'token state;
|
||||
push_space : Lexing.lexbuf -> 'token state;
|
||||
push_tabs : Lexing.lexbuf -> 'token state;
|
||||
push_bom : Lexing.lexbuf -> 'token state;
|
||||
push_markup : Markup.t -> 'token state;
|
||||
>
|
||||
|
||||
(* LEXER INSTANCE *)
|
||||
|
||||
(* The function [open_token_stream] returns a lexer instance made of
|
||||
|
||||
* the input [input] of type [input];
|
||||
* a function [read] that extracts tokens from a lexing buffer,
|
||||
together with a lexing buffer [buffer] to read from,
|
||||
* a function [close] that closes that buffer,
|
||||
* a function [get_pos] that returns the current position, and
|
||||
* a function [get_last] that returns the region of the last
|
||||
recognised token.
|
||||
* a function [get_file] that returns the name of the file being
|
||||
scanned (empty string if [stdin]).
|
||||
|
||||
Note that a module [Token] is exported too, because the signature
|
||||
of the exported functions depend on it.
|
||||
|
||||
The type [window] is a two-token window, that is, a buffer that
|
||||
contains the last recognised token, and the penultimate (if any).
|
||||
|
||||
The call [read ?line ?block ~scan ~token_to_region ~style
|
||||
input] evaluates in a lexer (also known as a tokeniser or scanner)
|
||||
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
|
||||
suitable for a parser generated by Menhir. The argument labelled
|
||||
[log] is a logger, that is, it may print a token and its left
|
||||
markup to a given channel, at the caller's discretion. The function
|
||||
labelled [~scan] is the main scanner of the lexer. The function
|
||||
labelled [~style] is used to check stylistic constraints on the
|
||||
tokens and the markup between them.
|
||||
*)
|
||||
|
||||
type input =
|
||||
File of file_path
|
||||
| String of string
|
||||
| Channel of in_channel
|
||||
| Buffer of Lexing.lexbuf
|
||||
|
||||
type 'token logger = Markup.t list -> 'token -> unit
|
||||
|
||||
type 'token instance = {
|
||||
input : input;
|
||||
read : log:('token logger) -> Lexing.lexbuf -> 'token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_win : unit -> 'token window;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
get_file : unit -> file_path;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
type open_err = File_opening of string
|
||||
|
||||
val lexbuf_from_input :
|
||||
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
||||
|
||||
val open_token_stream :
|
||||
?line:EvalOpt.line_comment ->
|
||||
?block:EvalOpt.block_comment ->
|
||||
scan:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||
token_to_region:('token -> Region.t) ->
|
||||
style:('token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * 'token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit) ->
|
||||
input ->
|
||||
('token instance, open_err) Stdlib.result
|
@ -5,6 +5,7 @@ module Region = Simple_utils.Region
|
||||
module type S =
|
||||
sig
|
||||
module Lexer : Lexer.S
|
||||
type token = Lexer.token
|
||||
|
||||
val output_token :
|
||||
?offsets:bool ->
|
||||
@ -12,7 +13,7 @@ module type S =
|
||||
EvalOpt.command ->
|
||||
out_channel ->
|
||||
Markup.t list ->
|
||||
Lexer.token ->
|
||||
token ->
|
||||
unit
|
||||
|
||||
type file_path = string
|
||||
@ -20,8 +21,14 @@ module type S =
|
||||
val trace :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
EvalOpt.language ->
|
||||
Lexer.input ->
|
||||
?block:EvalOpt.block_comment ->
|
||||
?line:EvalOpt.line_comment ->
|
||||
token_to_region:(token -> Region.t) ->
|
||||
style:(token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit) ->
|
||||
LexerLib.input ->
|
||||
EvalOpt.command ->
|
||||
(unit, string Region.reg) Stdlib.result
|
||||
end
|
||||
@ -30,10 +37,12 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||
struct
|
||||
module Lexer = Lexer
|
||||
module Token = Lexer.Token
|
||||
type token = Lexer.token
|
||||
|
||||
(** Pretty-printing in a string the lexemes making up the markup
|
||||
(* Pretty-printing in a string the lexemes making up the markup
|
||||
between two tokens, concatenated with the last lexeme
|
||||
itself. *)
|
||||
|
||||
let output_token ?(offsets=true) mode command
|
||||
channel left_mark token : unit =
|
||||
let output str = Printf.fprintf channel "%s%!" str in
|
||||
@ -56,10 +65,16 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||
|
||||
type file_path = string
|
||||
|
||||
let trace ?(offsets=true) mode lang input command :
|
||||
let trace ?(offsets=true) mode ?block ?line
|
||||
~token_to_region ~style input command :
|
||||
(unit, string Region.reg) Stdlib.result =
|
||||
match Lexer.open_token_stream lang input with
|
||||
Ok Lexer.{read; buffer; close; _} ->
|
||||
match LexerLib.open_token_stream
|
||||
~scan:Lexer.scan
|
||||
~token_to_region
|
||||
~style
|
||||
?line ?block input
|
||||
with
|
||||
Ok LexerLib.{read; buffer; close; _} ->
|
||||
let log = output_token ~offsets mode command stdout
|
||||
and close_all () = flush_all (); close () in
|
||||
let rec iter () =
|
||||
@ -68,12 +83,17 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||
if Token.is_eof token
|
||||
then Stdlib.Ok ()
|
||||
else iter ()
|
||||
| exception Lexer.Token.Error error ->
|
||||
let msg =
|
||||
Lexer.Token.format_error
|
||||
~offsets mode ~file:true error
|
||||
in Stdlib.Error msg
|
||||
| exception Lexer.Error error ->
|
||||
let msg =
|
||||
Lexer.format_error ~offsets mode ~file:true error
|
||||
in Stdlib.Error msg in
|
||||
let result = iter ()
|
||||
in close_all (); result
|
||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
||||
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg)
|
||||
end
|
||||
|
@ -3,6 +3,7 @@ module Region = Simple_utils.Region
|
||||
module type S =
|
||||
sig
|
||||
module Lexer : Lexer.S
|
||||
type token = Lexer.token
|
||||
|
||||
val output_token :
|
||||
?offsets:bool ->
|
||||
@ -10,7 +11,7 @@ module type S =
|
||||
EvalOpt.command ->
|
||||
out_channel ->
|
||||
Markup.t list ->
|
||||
Lexer.token ->
|
||||
token ->
|
||||
unit
|
||||
|
||||
type file_path = string
|
||||
@ -18,8 +19,14 @@ module type S =
|
||||
val trace :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
EvalOpt.language ->
|
||||
Lexer.input ->
|
||||
?block:EvalOpt.block_comment ->
|
||||
?line:EvalOpt.line_comment ->
|
||||
token_to_region:(token -> Region.t) ->
|
||||
style:(token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit) ->
|
||||
LexerLib.input ->
|
||||
EvalOpt.command ->
|
||||
(unit, string Region.reg) Stdlib.result
|
||||
end
|
||||
|
@ -39,31 +39,45 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
||||
| Stdlib.Ok pp_buffer ->
|
||||
(* Running the lexer on the preprocessed input *)
|
||||
|
||||
let source = Lexer.String (Buffer.contents pp_buffer) in
|
||||
match Lexer.open_token_stream IO.options#lang source with
|
||||
Ok Lexer.{read; buffer; close; _} ->
|
||||
let close_all () = flush_all (); close () in
|
||||
let rec read_tokens tokens =
|
||||
match read ~log:(fun _ _ -> ()) buffer with
|
||||
token ->
|
||||
if Lexer.Token.is_eof token
|
||||
then Stdlib.Ok (List.rev tokens)
|
||||
else read_tokens (token::tokens)
|
||||
| exception Lexer.Error error ->
|
||||
let file =
|
||||
match IO.options#input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let () =
|
||||
Printf.eprintf "[LexerUnit] file = %b\n%!" file in
|
||||
let msg =
|
||||
Lexer.format_error ~offsets:IO.options#offsets
|
||||
IO.options#mode ~file error
|
||||
in Stdlib.Error msg in
|
||||
let result = read_tokens []
|
||||
in close_all (); result
|
||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
||||
let source = LexerLib.String (Buffer.contents pp_buffer) in
|
||||
match LexerLib.open_token_stream
|
||||
?line:IO.options#line
|
||||
?block:IO.options#block
|
||||
~scan:Lexer.scan
|
||||
~token_to_region:Lexer.Token.to_region
|
||||
~style:Lexer.Token.check_right_context
|
||||
source with
|
||||
Ok LexerLib.{read; buffer; close; _} ->
|
||||
let close_all () = flush_all (); close () in
|
||||
let rec read_tokens tokens =
|
||||
match read ~log:(fun _ _ -> ()) buffer with
|
||||
token ->
|
||||
if Lexer.Token.is_eof token
|
||||
then Stdlib.Ok (List.rev tokens)
|
||||
else read_tokens (token::tokens)
|
||||
| exception Lexer.Token.Error error ->
|
||||
let file =
|
||||
match IO.options#input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let msg =
|
||||
Lexer.Token.format_error
|
||||
~offsets:IO.options#offsets
|
||||
IO.options#mode ~file error
|
||||
in Stdlib.Error msg
|
||||
| exception Lexer.Error error ->
|
||||
let file =
|
||||
match IO.options#input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let msg =
|
||||
Lexer.format_error ~offsets:IO.options#offsets
|
||||
IO.options#mode ~file error
|
||||
in Stdlib.Error msg in
|
||||
let result = read_tokens []
|
||||
in close_all (); result
|
||||
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
||||
match IO.options#input with
|
||||
None -> preproc stdin
|
||||
| Some file_path ->
|
||||
@ -101,8 +115,11 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
||||
end
|
||||
else Log.trace ~offsets:IO.options#offsets
|
||||
IO.options#mode
|
||||
IO.options#lang
|
||||
(Lexer.String preproc_str)
|
||||
?block:IO.options#block
|
||||
?line:IO.options#line
|
||||
~token_to_region:Lexer.Token.to_region
|
||||
~style:Lexer.Token.check_right_context
|
||||
(LexerLib.String preproc_str)
|
||||
IO.options#cmd
|
||||
in match IO.options#input with
|
||||
None -> preproc stdin
|
||||
|
@ -57,7 +57,7 @@ module type PARSER =
|
||||
|
||||
module Make (IO: IO)
|
||||
(Lexer: Lexer.S)
|
||||
(Parser: PARSER with type token = Lexer.Token.token)
|
||||
(Parser: PARSER with type token = Lexer.token)
|
||||
(ParErr: sig val message : int -> string end) =
|
||||
struct
|
||||
module I = Parser.MenhirInterpreter
|
||||
@ -122,10 +122,10 @@ module Make (IO: IO)
|
||||
message
|
||||
in
|
||||
match get_win () with
|
||||
Lexer.Nil -> assert false
|
||||
| Lexer.One invalid ->
|
||||
LexerLib.Nil -> assert false
|
||||
| LexerLib.One invalid ->
|
||||
raise (Point (message, None, invalid))
|
||||
| Lexer.Two (invalid, valid) ->
|
||||
| LexerLib.Two (invalid, valid) ->
|
||||
raise (Point (message, Some valid, invalid))
|
||||
|
||||
(* The monolithic API of Menhir *)
|
||||
@ -143,14 +143,14 @@ module Make (IO: IO)
|
||||
~offsets:IO.options#offsets
|
||||
IO.options#mode IO.options#cmd stdout
|
||||
|
||||
let incr_contract Lexer.{read; buffer; get_win; close; _} =
|
||||
let incr_contract LexerLib.{read; buffer; get_win; close; _} =
|
||||
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
||||
and failure = failure get_win in
|
||||
let parser = Incr.contract buffer.Lexing.lex_curr_p in
|
||||
let ast = I.loop_handle success failure supplier parser
|
||||
in flush_all (); close (); ast
|
||||
|
||||
let incr_expr Lexer.{read; buffer; get_win; close; _} =
|
||||
let incr_expr LexerLib.{read; buffer; get_win; close; _} =
|
||||
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
|
||||
and failure = failure get_win in
|
||||
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in
|
||||
|
@ -56,7 +56,7 @@ module type PARSER =
|
||||
|
||||
module Make (IO: IO)
|
||||
(Lexer: Lexer.S)
|
||||
(Parser: PARSER with type token = Lexer.Token.token)
|
||||
(Parser: PARSER with type token = Lexer.token)
|
||||
(ParErr: sig val message : int -> string end) :
|
||||
sig
|
||||
(* WARNING: The following parsers may all raise [Lexer.Error] *)
|
||||
@ -78,8 +78,8 @@ module Make (IO: IO)
|
||||
|
||||
exception Point of error
|
||||
|
||||
val incr_contract : Lexer.instance -> Parser.ast
|
||||
val incr_expr : Lexer.instance -> Parser.expr
|
||||
val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
|
||||
val incr_expr : Lexer.token LexerLib.instance -> Parser.expr
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg
|
||||
|
@ -4,16 +4,15 @@ module Region = Simple_utils.Region
|
||||
module Preproc = Preprocessor.Preproc
|
||||
module SSet = Set.Make (String)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module type SubIO =
|
||||
sig
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
mono : bool
|
||||
@ -23,7 +22,7 @@ module type SubIO =
|
||||
val make : input:string option -> expr:bool -> EvalOpt.options
|
||||
end
|
||||
|
||||
module type Pretty =
|
||||
module type Printer =
|
||||
sig
|
||||
type state
|
||||
type ast
|
||||
@ -45,7 +44,7 @@ module Make (Lexer: Lexer.S)
|
||||
and type expr = AST.expr
|
||||
and type token = Lexer.token)
|
||||
(ParErr: sig val message : int -> string end)
|
||||
(ParserLog: Pretty with type ast = AST.t
|
||||
(ParserLog: Printer with type ast = AST.t
|
||||
and type expr = AST.expr)
|
||||
(SubIO: SubIO) =
|
||||
struct
|
||||
@ -90,12 +89,12 @@ module Make (Lexer: Lexer.S)
|
||||
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
||||
~mode:SubIO.options#mode
|
||||
~buffer:output in
|
||||
let close () = lexer_inst.Lexer.close () in
|
||||
let close () = lexer_inst.LexerLib.close () in
|
||||
let expr =
|
||||
try
|
||||
if SubIO.options#mono then
|
||||
let tokeniser = lexer_inst.Lexer.read ~log
|
||||
and lexbuf = lexer_inst.Lexer.buffer
|
||||
let tokeniser = lexer_inst.LexerLib.read ~log
|
||||
and lexbuf = lexer_inst.LexerLib.buffer
|
||||
in Front.mono_expr tokeniser lexbuf
|
||||
else
|
||||
Front.incr_expr lexer_inst
|
||||
@ -125,12 +124,12 @@ module Make (Lexer: Lexer.S)
|
||||
ParserLog.mk_state ~offsets:SubIO.options#offsets
|
||||
~mode:SubIO.options#mode
|
||||
~buffer:output in
|
||||
let close () = lexer_inst.Lexer.close () in
|
||||
let close () = lexer_inst.LexerLib.close () in
|
||||
let ast =
|
||||
try
|
||||
if SubIO.options#mono then
|
||||
let tokeniser = lexer_inst.Lexer.read ~log
|
||||
and lexbuf = lexer_inst.Lexer.buffer
|
||||
let tokeniser = lexer_inst.LexerLib.read ~log
|
||||
and lexbuf = lexer_inst.LexerLib.buffer
|
||||
in Front.mono_contract tokeniser lexbuf
|
||||
else
|
||||
Front.incr_contract lexer_inst
|
||||
@ -164,10 +163,18 @@ module Make (Lexer: Lexer.S)
|
||||
|
||||
| exception Lexer.Error err ->
|
||||
let file =
|
||||
lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||
let error =
|
||||
Lexer.format_error ~offsets:SubIO.options#offsets
|
||||
SubIO.options#mode err ~file:(file <> "")
|
||||
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||
let error = Lexer.format_error
|
||||
~offsets:SubIO.options#offsets
|
||||
SubIO.options#mode err ~file:(file <> "")
|
||||
in Stdlib.Error error
|
||||
|
||||
| exception Lexer.Token.Error err ->
|
||||
let file =
|
||||
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
|
||||
let error = Lexer.Token.format_error
|
||||
~offsets:SubIO.options#offsets
|
||||
SubIO.options#mode err ~file:(file <> "")
|
||||
in Stdlib.Error error
|
||||
|
||||
(* Incremental API of Menhir *)
|
||||
@ -182,11 +189,11 @@ module Make (Lexer: Lexer.S)
|
||||
|
||||
| exception Parser.Error ->
|
||||
let invalid, valid_opt =
|
||||
match lexer_inst.Lexer.get_win () with
|
||||
Lexer.Nil ->
|
||||
match lexer_inst.LexerLib.get_win () with
|
||||
LexerLib.Nil ->
|
||||
assert false (* Safe: There is always at least EOF. *)
|
||||
| Lexer.One invalid -> invalid, None
|
||||
| Lexer.Two (invalid, valid) -> invalid, Some valid in
|
||||
| LexerLib.One invalid -> invalid, None
|
||||
| LexerLib.Two (invalid, valid) -> invalid, Some valid in
|
||||
let point = "", valid_opt, invalid in
|
||||
let error =
|
||||
Front.format_error ~offsets:SubIO.options#offsets
|
||||
@ -206,8 +213,8 @@ module Make (Lexer: Lexer.S)
|
||||
(* Parsing a contract *)
|
||||
|
||||
let gen_parser options input parser =
|
||||
match Lexer.lexbuf_from_input input with
|
||||
Stdlib.Error (Lexer.File_opening msg) ->
|
||||
match LexerLib.lexbuf_from_input input with
|
||||
Stdlib.Error (LexerLib.File_opening msg) ->
|
||||
Stdlib.Error (Region.wrap_ghost msg)
|
||||
| Ok (lexbuf, close) ->
|
||||
(* Preprocessing the input source *)
|
||||
@ -225,45 +232,52 @@ module Make (Lexer: Lexer.S)
|
||||
(* Lexing and parsing the preprocessed input source *)
|
||||
|
||||
let () = close () in
|
||||
let input' = Lexer.String (Buffer.contents buffer) in
|
||||
match Lexer.open_token_stream options#lang input' with
|
||||
let input' = LexerLib.String (Buffer.contents buffer) in
|
||||
match LexerLib.open_token_stream
|
||||
~scan:Lexer.scan
|
||||
~token_to_region:Lexer.Token.to_region
|
||||
~style:Lexer.Token.check_right_context
|
||||
?line:options#line
|
||||
?block:options#block
|
||||
input'
|
||||
with
|
||||
Ok instance ->
|
||||
let open Lexing in
|
||||
instance.Lexer.buffer.lex_curr_p <-
|
||||
{instance.Lexer.buffer.lex_curr_p with pos_fname = file};
|
||||
instance.LexerLib.buffer.lex_curr_p <-
|
||||
{instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
|
||||
apply instance parser
|
||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
||||
| Stdlib.Error (LexerLib.File_opening msg) ->
|
||||
Stdlib.Error (Region.wrap_ghost msg)
|
||||
|
||||
(* Parsing a contract in a file *)
|
||||
|
||||
let contract_in_file (source : string) =
|
||||
let options = SubIO.make ~input:(Some source) ~expr:false
|
||||
in gen_parser options (Lexer.File source) parse_contract
|
||||
in gen_parser options (LexerLib.File source) parse_contract
|
||||
|
||||
(* Parsing a contract in a string *)
|
||||
|
||||
let contract_in_string (source : string) =
|
||||
let options = SubIO.make ~input:None ~expr:false in
|
||||
gen_parser options (Lexer.String source) parse_contract
|
||||
gen_parser options (LexerLib.String source) parse_contract
|
||||
|
||||
(* Parsing a contract in stdin *)
|
||||
|
||||
let contract_in_stdin () =
|
||||
let options = SubIO.make ~input:None ~expr:false in
|
||||
gen_parser options (Lexer.Channel stdin) parse_contract
|
||||
gen_parser options (LexerLib.Channel stdin) parse_contract
|
||||
|
||||
(* Parsing an expression in a string *)
|
||||
|
||||
let expr_in_string (source : string) =
|
||||
let options = SubIO.make ~input:None ~expr:true in
|
||||
gen_parser options (Lexer.String source) parse_expr
|
||||
gen_parser options (LexerLib.String source) parse_expr
|
||||
|
||||
(* Parsing an expression in stdin *)
|
||||
|
||||
let expr_in_stdin () =
|
||||
let options = SubIO.make ~input:None ~expr:true in
|
||||
gen_parser options (Lexer.Channel stdin) parse_expr
|
||||
gen_parser options (LexerLib.Channel stdin) parse_expr
|
||||
|
||||
(* Preprocess only *)
|
||||
|
||||
|
@ -2,18 +2,19 @@
|
||||
|
||||
module Region = Simple_utils.Region
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||
|
||||
(* A subtype of [EvalOpt.options] *)
|
||||
|
||||
module type SubIO =
|
||||
sig
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
mono : bool
|
||||
@ -23,7 +24,9 @@ module type SubIO =
|
||||
val make : input:string option -> expr:bool -> EvalOpt.options
|
||||
end
|
||||
|
||||
module type Pretty =
|
||||
(* Signature for the printers *)
|
||||
|
||||
module type Printer =
|
||||
sig
|
||||
type state
|
||||
type ast
|
||||
@ -38,6 +41,8 @@ module type Pretty =
|
||||
val print_expr : state -> expr -> unit
|
||||
end
|
||||
|
||||
(* Main functor to make the parser *)
|
||||
|
||||
module Make (Lexer : Lexer.S)
|
||||
(AST : sig type t type expr end)
|
||||
(Parser : ParserAPI.PARSER
|
||||
@ -45,7 +50,7 @@ module Make (Lexer : Lexer.S)
|
||||
and type expr = AST.expr
|
||||
and type token = Lexer.token)
|
||||
(ParErr : sig val message : int -> string end)
|
||||
(ParserLog : Pretty with type ast = AST.t
|
||||
(ParserLog : Printer with type ast = AST.t
|
||||
and type expr = AST.expr)
|
||||
(SubIO: SubIO) :
|
||||
sig
|
||||
@ -82,4 +87,4 @@ module Make (Lexer : Lexer.S)
|
||||
|
||||
val preprocess :
|
||||
string -> (Buffer.t, message Region.reg) Stdlib.result
|
||||
end
|
||||
end
|
||||
|
@ -13,6 +13,7 @@
|
||||
(preprocess
|
||||
(pps bisect_ppx --conditional))
|
||||
(modules
|
||||
LexerLib
|
||||
LexerUnit
|
||||
ParserUnit
|
||||
ParserAPI
|
||||
|
@ -1 +1 @@
|
||||
const a: string = -0x222;
|
||||
const a: string = - (**) 0x2222
|
||||
|
@ -1 +1 @@
|
||||
let a = -0x222;
|
||||
let a = - (**) 0x2222
|
||||
|
@ -1 +1 @@
|
||||
let a = -0x222;
|
||||
let a = - /**/ 0x2222;
|
||||
|
@ -1 +0,0 @@
|
||||
let arguments = 1;
|
@ -1 +1 @@
|
||||
let end = 1;
|
||||
let end = 1;
|
||||
|
2
vendors/Preprocessor/E_LexerMain.ml
vendored
2
vendors/Preprocessor/E_LexerMain.ml
vendored
@ -5,7 +5,7 @@ module Region = Simple_utils.Region
|
||||
|
||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
||||
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options = EvalOpt.read ".ligo" (* No comments allowed *)
|
||||
|
||||
let lex in_chan =
|
||||
let buffer = Lexing.from_channel in_chan in
|
||||
|
2
vendors/Preprocessor/E_ParserMain.ml
vendored
2
vendors/Preprocessor/E_ParserMain.ml
vendored
@ -5,7 +5,7 @@ module Region = Simple_utils.Region
|
||||
|
||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
||||
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options = EvalOpt.read ".ligo" (* No comments allowed *)
|
||||
|
||||
let parse in_chan =
|
||||
let buffer = Lexing.from_channel in_chan in
|
||||
|
49
vendors/Preprocessor/EvalOpt.ml
vendored
49
vendors/Preprocessor/EvalOpt.ml
vendored
@ -2,29 +2,33 @@
|
||||
|
||||
(* The type [options] gathers the command-line options. *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
let lang_to_string = function
|
||||
`PascaLIGO -> "PascaLIGO"
|
||||
| `CameLIGO -> "CameLIGO"
|
||||
| `ReasonLIGO -> "ReasonLIGO"
|
||||
|
||||
module SSet = Set.Make (String)
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
let mk_block ~opening ~closing : block_comment =
|
||||
object
|
||||
method opening = opening
|
||||
method closing = closing
|
||||
end
|
||||
|
||||
type options = <
|
||||
input : string option;
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string (* ".ligo", ".mligo", ".religo" *)
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
ext : string
|
||||
>
|
||||
|
||||
let make ~input ~libs ~lang ~offsets ~verbose ~ext : options =
|
||||
let make ~input ~libs ?block ?line ~offsets ~verbose ~ext : options =
|
||||
object
|
||||
method input = input
|
||||
method libs = libs
|
||||
method lang = lang
|
||||
method block = block
|
||||
method line = line
|
||||
method offsets = offsets
|
||||
method verbose = verbose
|
||||
method ext = ext
|
||||
@ -47,10 +51,10 @@ let abort msg =
|
||||
|
||||
(* Help *)
|
||||
|
||||
let help lang ext () =
|
||||
let help ext () =
|
||||
let file = Filename.basename Sys.argv.(0) in
|
||||
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file ext;
|
||||
printf "where <input>%s is the %s source file (default: stdin),\n" ext lang;
|
||||
printf "where <input>%s is the LIGO source file (default: stdin),\n" ext;
|
||||
print "and each <option> (if any) is one of the following:";
|
||||
print " -I <paths> Inclusion paths (colon-separated)";
|
||||
print " --columns Columns for source locations";
|
||||
@ -74,11 +78,10 @@ let add_verbose d =
|
||||
verbose := List.fold_left (fun x y -> SSet.add y x)
|
||||
!verbose
|
||||
(split_at_colon d)
|
||||
let specs lang ext =
|
||||
let lang_str = lang_to_string lang in
|
||||
let open!Getopt in [
|
||||
let specs ext =
|
||||
let open! Getopt in [
|
||||
'I', nolong, None, Some add_path;
|
||||
'h', "help", Some (help lang_str ext), None;
|
||||
'h', "help", Some (help ext), None;
|
||||
noshort, "columns", set columns true, None;
|
||||
noshort, "verbose", None, Some add_verbose
|
||||
]
|
||||
@ -92,7 +95,7 @@ let anonymous arg =
|
||||
|
||||
(* Checking options and exporting them as non-mutable values *)
|
||||
|
||||
let check lang ext =
|
||||
let check ?block ?line ~ext =
|
||||
let libs = !libs
|
||||
|
||||
and offsets = not !columns
|
||||
@ -109,16 +112,18 @@ let check lang ext =
|
||||
else abort "Source file not found."
|
||||
else abort ("Source file lacks the extension " ^ ext ^ ".")
|
||||
|
||||
in make ~input ~libs ~lang ~offsets ~verbose ~ext
|
||||
in make ~input ~libs ?block ?line ~offsets ~verbose ~ext
|
||||
|
||||
(* Parsing the command-line options *)
|
||||
|
||||
let read ~lang:(lang : language) ~ext:(ext : string) =
|
||||
type extension = string
|
||||
|
||||
let read ?block ?line (ext: extension) =
|
||||
try
|
||||
Getopt.parse_cmdline (specs lang ext) anonymous;
|
||||
Getopt.parse_cmdline (specs ext) anonymous;
|
||||
(verb_str :=
|
||||
let apply e a =
|
||||
if a = "" then e else sprintf "%s, %s" e a
|
||||
in SSet.fold apply !verbose "");
|
||||
check lang ext
|
||||
check ?block ?line ~ext
|
||||
with Getopt.Error msg -> abort msg
|
||||
|
22
vendors/Preprocessor/EvalOpt.mli
vendored
22
vendors/Preprocessor/EvalOpt.mli
vendored
@ -2,25 +2,28 @@
|
||||
|
||||
(* The type [options] gathers the command-line options. *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
val lang_to_string : language -> string
|
||||
|
||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
val mk_block : opening:string -> closing:string -> block_comment
|
||||
|
||||
type options = <
|
||||
input : string option;
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string (* ".ligo", ".mligo", ".religo" *)
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
ext : string
|
||||
>
|
||||
|
||||
val make :
|
||||
input:string option ->
|
||||
libs:string list ->
|
||||
lang:language ->
|
||||
?block:block_comment ->
|
||||
?line:line_comment ->
|
||||
offsets:bool ->
|
||||
verbose:SSet.t ->
|
||||
ext:string ->
|
||||
@ -30,4 +33,7 @@ val make :
|
||||
the name of the concrete syntax. This is needed to correctly handle
|
||||
comments. *)
|
||||
|
||||
val read : lang:language -> ext:string -> options
|
||||
type extension = string
|
||||
|
||||
val read :
|
||||
?block:block_comment -> ?line:line_comment -> extension -> options
|
||||
|
6
vendors/Preprocessor/Preproc.mli
vendored
6
vendors/Preprocessor/Preproc.mli
vendored
@ -14,8 +14,8 @@ type error =
|
||||
| Invalid_line_indicator of string
|
||||
| No_line_indicator
|
||||
| End_line_indicator
|
||||
| Newline_in_string (* For #include argument only *)
|
||||
| Open_string (* For #include argument only *)
|
||||
| Newline_in_string (* For #include argument only *)
|
||||
| Unterminated_string (* For #include argument only *)
|
||||
| Dangling_endif
|
||||
| Open_region_in_conditional
|
||||
| Dangling_endregion
|
||||
@ -29,10 +29,10 @@ type error =
|
||||
| Multiply_defined_symbol of string
|
||||
| Error_directive of string
|
||||
| Parse_error
|
||||
| No_line_comment_or_blank
|
||||
| Invalid_symbol
|
||||
| File_not_found of string
|
||||
| Invalid_character of char
|
||||
| Unterminated_comment of string
|
||||
|
||||
val format :
|
||||
?offsets:bool -> error Region.reg -> file:bool -> string Region.reg
|
||||
|
348
vendors/Preprocessor/Preproc.mll
vendored
348
vendors/Preprocessor/Preproc.mll
vendored
@ -44,19 +44,6 @@ type mode = Copy | Skip
|
||||
type cond = If of mode | Elif of mode | Else | Region
|
||||
type trace = cond list
|
||||
|
||||
(* Line offsets
|
||||
|
||||
The value [Inline] of type [offset] means that the current location
|
||||
cannot be reached from the start of the line with only white
|
||||
space. The same holds for the special value [Prefix 0]. Values of
|
||||
the form [Prefix n] mean that the current location can be reached
|
||||
from the start of the line with [n] white spaces (padding). These
|
||||
distinctions are needed because preprocessor directives cannot
|
||||
occur inside lines.
|
||||
*)
|
||||
|
||||
type offset = Prefix of int | Inline
|
||||
|
||||
(* Environments *)
|
||||
|
||||
module Env = Set.Make (String)
|
||||
@ -78,8 +65,6 @@ in function
|
||||
* the field [env] records the symbols defined;
|
||||
* the field [mode] informs whether the preprocessor is in copying or
|
||||
skipping mode;
|
||||
* the field [offset] tells whether the current location can be
|
||||
reached from the start of the line with only white space;
|
||||
* the field [trace] is a stack of previous, still active conditional
|
||||
directives;
|
||||
* the field [out] keeps the output buffer;
|
||||
@ -92,7 +77,6 @@ in function
|
||||
type state = {
|
||||
env : Env.t;
|
||||
mode : mode;
|
||||
offset : offset;
|
||||
trace : trace;
|
||||
out : Buffer.t;
|
||||
incl : in_channel list;
|
||||
@ -117,7 +101,7 @@ type error =
|
||||
| No_line_indicator
|
||||
| End_line_indicator
|
||||
| Newline_in_string
|
||||
| Open_string
|
||||
| Unterminated_string
|
||||
| Dangling_endif
|
||||
| Open_region_in_conditional
|
||||
| Dangling_endregion
|
||||
@ -131,10 +115,10 @@ type error =
|
||||
| Multiply_defined_symbol of string
|
||||
| Error_directive of string
|
||||
| Parse_error
|
||||
| No_line_comment_or_blank
|
||||
| Invalid_symbol
|
||||
| File_not_found of string
|
||||
| Invalid_character of char
|
||||
| Unterminated_comment of string
|
||||
|
||||
let error_to_string = function
|
||||
Directive_inside_line ->
|
||||
@ -151,7 +135,7 @@ let error_to_string = function
|
||||
Hint: Try a string, end of line, or a line comment."
|
||||
| Newline_in_string ->
|
||||
sprintf "Invalid newline character in string."
|
||||
| Open_string ->
|
||||
| Unterminated_string ->
|
||||
sprintf "Unterminated string.\n\
|
||||
Hint: Close with double quotes."
|
||||
| Dangling_endif ->
|
||||
@ -187,14 +171,15 @@ let error_to_string = function
|
||||
msg
|
||||
| Parse_error ->
|
||||
"Parse error in expression."
|
||||
| No_line_comment_or_blank ->
|
||||
"Line comment or whitespace expected."
|
||||
| Invalid_symbol ->
|
||||
"Expected a symbol (identifier)."
|
||||
| File_not_found name ->
|
||||
sprintf "File \"%s\" to include not found." name
|
||||
| Invalid_character c ->
|
||||
E_Lexer.error_to_string (E_Lexer.Invalid_character c)
|
||||
| Unterminated_comment ending ->
|
||||
sprintf "Unterminated comment.\n\
|
||||
Hint: Close with \"%s\"." ending
|
||||
|
||||
let format ?(offsets=true) Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
@ -224,7 +209,7 @@ let fail error state buffer = stop error state (mk_reg buffer)
|
||||
let reduce_cond state region =
|
||||
let rec reduce = function
|
||||
[] -> stop Dangling_endif state region
|
||||
| If mode::trace -> {state with mode; trace; offset = Prefix 0}
|
||||
| If mode::trace -> {state with mode; trace}
|
||||
| Region::_ -> stop Open_region_in_conditional state region
|
||||
| _::trace -> reduce trace
|
||||
in reduce state.trace
|
||||
@ -235,7 +220,7 @@ let reduce_cond state region =
|
||||
let reduce_region state region =
|
||||
match state.trace with
|
||||
[] -> stop Dangling_endregion state region
|
||||
| Region::trace -> {state with trace; offset = Prefix 0}
|
||||
| Region::trace -> {state with trace}
|
||||
| _ -> stop Conditional_in_region state region
|
||||
|
||||
(* The function [extend] is called when encountering conditional
|
||||
@ -286,7 +271,7 @@ let find dir file libs =
|
||||
|
||||
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
|
||||
|
||||
(* End of lines *)
|
||||
(* End of lines are always copied *)
|
||||
|
||||
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
||||
|
||||
@ -294,13 +279,6 @@ let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
|
||||
|
||||
let print state string = Buffer.add_string state.out string
|
||||
|
||||
(* Expanding the offset into whitespace *)
|
||||
|
||||
let expand_offset state =
|
||||
match state.offset with
|
||||
Prefix 0 | Inline -> ()
|
||||
| Prefix n -> print state (String.make n ' ')
|
||||
|
||||
(* Evaluating a preprocessor expression
|
||||
|
||||
The evaluation of conditional directives may involve symbols whose
|
||||
@ -346,6 +324,35 @@ let letter = small | capital
|
||||
let ident = letter (letter | '_' | digit)*
|
||||
let directive = '#' (blank* as space) (small+ as id)
|
||||
|
||||
(* Comments *)
|
||||
|
||||
let pascaligo_block_comment_opening = "(*"
|
||||
let pascaligo_block_comment_closing = "*)"
|
||||
let pascaligo_line_comment = "//"
|
||||
|
||||
let cameligo_block_comment_opening = "(*"
|
||||
let cameligo_block_comment_closing = "*)"
|
||||
let cameligo_line_comment = "//"
|
||||
|
||||
let reasonligo_block_comment_opening = "/*"
|
||||
let reasonligo_block_comment_closing = "*/"
|
||||
let reasonligo_line_comment = "//"
|
||||
|
||||
let block_comment_openings =
|
||||
pascaligo_block_comment_opening
|
||||
| cameligo_block_comment_opening
|
||||
| reasonligo_block_comment_opening
|
||||
|
||||
let block_comment_closings =
|
||||
pascaligo_block_comment_closing
|
||||
| cameligo_block_comment_closing
|
||||
| reasonligo_block_comment_closing
|
||||
|
||||
let line_comments =
|
||||
pascaligo_line_comment
|
||||
| cameligo_line_comment
|
||||
| reasonligo_line_comment
|
||||
|
||||
(* Rules *)
|
||||
|
||||
(* The rule [scan] scans the input buffer for directives, strings,
|
||||
@ -354,19 +361,17 @@ let directive = '#' (blank* as space) (small+ as id)
|
||||
depending on the compilation directives. If not copied, new line
|
||||
characters are output.
|
||||
|
||||
Scanning is triggered by the function call [scan env mode offset
|
||||
trace lexbuf], where [env] is the set of defined symbols
|
||||
(introduced by `#define'), [mode] specifies whether we are copying
|
||||
or skipping the input, [offset] informs about the location in the
|
||||
line (either there is a prefix of blanks, or at least a non-blank
|
||||
character has been read), and [trace] is the stack of conditional
|
||||
directives read so far.
|
||||
Scanning is triggered by the function call [scan env mode trace
|
||||
lexbuf], where [env] is the set of defined symbols (introduced by
|
||||
`#define'), [mode] specifies whether we are copying or skipping the
|
||||
input, and [trace] is the stack of conditional directives read so
|
||||
far.
|
||||
|
||||
The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix
|
||||
0; trace=[]; incl=[]; opt}], meaning that we start with an empty
|
||||
environment, that copying the input is enabled by default, and that
|
||||
we are at the start of a line and no previous conditional
|
||||
directives have been read yet. The field [opt] is the CLI options.
|
||||
The first call is [scan {env=Env.empty; mode=Copy; trace=[];
|
||||
incl=[]; opt}], meaning that we start with an empty environment,
|
||||
that copying the input is enabled by default, and that we are at
|
||||
the start of a line and no previous conditional directives have
|
||||
been read yet. The field [opt] is the CLI options.
|
||||
|
||||
When an "#if" is matched, the trace is extended by the call [extend
|
||||
lexbuf (If mode) trace], during the evaluation of which the
|
||||
@ -386,12 +391,11 @@ let directive = '#' (blank* as space) (small+ as id)
|
||||
value of the conditional expression must be ignored (but not its
|
||||
syntax), and we continue skipping the input.
|
||||
|
||||
When an "#else" is matched, the trace is extended with [Else],
|
||||
then, if the directive is not at a wrong offset, the rest of the
|
||||
line is scanned with [skip_line]. If we were in copy mode, the new
|
||||
mode toggles to skipping mode; otherwise, the trace is searched for
|
||||
the last encountered "#if" of "#elif" and the associated mode is
|
||||
restored.
|
||||
When an "#else" is matched, the trace is extended with [Else], then
|
||||
the rest of the line is scanned with [skip_line]. If we were in
|
||||
copy mode, the new mode toggles to skipping mode; otherwise, the
|
||||
trace is searched for the last encountered "#if" of "#elif" and the
|
||||
associated mode is restored.
|
||||
|
||||
The case "#elif" is the result of the fusion (in the technical
|
||||
sense) of the code for dealing with an "#else" followed by an
|
||||
@ -465,28 +469,23 @@ let directive = '#' (blank* as space) (small+ as id)
|
||||
Important note: Comments and strings are recognised as such only in
|
||||
copy mode, which is a different behaviour from the preprocessor of
|
||||
GNU GCC, which always does.
|
||||
*)
|
||||
*)
|
||||
|
||||
rule scan state = parse
|
||||
nl { expand_offset state; proc_nl state lexbuf;
|
||||
scan {state with offset = Prefix 0} lexbuf }
|
||||
| blank { match state.offset with
|
||||
Prefix n ->
|
||||
scan {state with offset = Prefix (n+1)} lexbuf
|
||||
| Inline ->
|
||||
if state.mode = Copy then copy state lexbuf;
|
||||
scan state lexbuf }
|
||||
nl { proc_nl state lexbuf; scan state lexbuf }
|
||||
| blank { if state.mode = Copy then copy state lexbuf;
|
||||
scan state lexbuf }
|
||||
| directive {
|
||||
let region = mk_reg lexbuf in
|
||||
if not (List.mem id directives)
|
||||
then begin
|
||||
if state.mode = Copy then copy state lexbuf;
|
||||
scan state lexbuf
|
||||
end
|
||||
else
|
||||
if state.offset = Inline
|
||||
if region#start#offset `Byte > 0
|
||||
then fail Directive_inside_line state lexbuf
|
||||
else
|
||||
let region = mk_reg lexbuf in
|
||||
match id with
|
||||
"include" ->
|
||||
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
|
||||
@ -517,15 +516,15 @@ rule scan state = parse
|
||||
let mode = expr state lexbuf in
|
||||
let mode = if state.mode = Copy then mode else Skip in
|
||||
let trace = extend (If state.mode) state region in
|
||||
let state = {state with mode; offset = Prefix 0; trace}
|
||||
let state = {state with mode; trace}
|
||||
in scan state lexbuf
|
||||
| "else" ->
|
||||
let () = skip_line state lexbuf in
|
||||
let mode = match state.mode with
|
||||
Copy -> Skip
|
||||
| Skip -> last_mode state.trace in
|
||||
let () = skip_line state lexbuf in
|
||||
let mode = match state.mode with
|
||||
Copy -> Skip
|
||||
| Skip -> last_mode state.trace in
|
||||
let trace = extend Else state region
|
||||
in scan {state with mode; offset = Prefix 0; trace} lexbuf
|
||||
in scan {state with mode; trace} lexbuf
|
||||
| "elif" ->
|
||||
let mode = expr state lexbuf in
|
||||
let trace, mode =
|
||||
@ -534,7 +533,7 @@ rule scan state = parse
|
||||
| Skip -> let old_mode = last_mode state.trace
|
||||
in extend (Elif old_mode) state region,
|
||||
if old_mode = Copy then mode else Skip
|
||||
in scan {state with mode; offset = Prefix 0; trace} lexbuf
|
||||
in scan {state with mode; trace} lexbuf
|
||||
| "endif" ->
|
||||
skip_line state lexbuf;
|
||||
scan (reduce_cond state region) lexbuf
|
||||
@ -544,89 +543,81 @@ rule scan state = parse
|
||||
then stop (Reserved_symbol id) state region;
|
||||
if Env.mem id state.env
|
||||
then stop (Multiply_defined_symbol id) state region;
|
||||
let state = {state with env = Env.add id state.env;
|
||||
offset = Prefix 0}
|
||||
let state = {state with env = Env.add id state.env}
|
||||
in scan state lexbuf
|
||||
| "undef" ->
|
||||
let id, _ = variable state lexbuf in
|
||||
let state = {state with env = Env.remove id state.env;
|
||||
offset = Prefix 0}
|
||||
let state = {state with env = Env.remove id state.env}
|
||||
in scan state lexbuf
|
||||
| "error" ->
|
||||
stop (Error_directive (message [] lexbuf)) state region
|
||||
| "region" ->
|
||||
let msg = message [] lexbuf
|
||||
in expand_offset state;
|
||||
print state ("#" ^ space ^ "region" ^ msg ^ "\n");
|
||||
let state =
|
||||
{state with offset = Prefix 0; trace=Region::state.trace}
|
||||
in print state ("#" ^ space ^ "region" ^ msg ^ "\n");
|
||||
let state = {state with trace=Region::state.trace}
|
||||
in scan state lexbuf
|
||||
| "endregion" ->
|
||||
let msg = message [] lexbuf
|
||||
in expand_offset state;
|
||||
print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
|
||||
in print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
|
||||
scan (reduce_region state region) lexbuf
|
||||
(*
|
||||
| "line" ->
|
||||
expand_offset state;
|
||||
print state ("#" ^ space ^ "line");
|
||||
line_ind state lexbuf;
|
||||
scan {state with offset = Prefix 0} lexbuf
|
||||
|
||||
| "warning" ->
|
||||
let start_p, end_p = region in
|
||||
let msg = message [] lexbuf in
|
||||
let open Lexing
|
||||
in prerr_endline
|
||||
("Warning at line " ^ string_of_int start_p.pos_lnum
|
||||
^ ", char "
|
||||
^ string_of_int (start_p.pos_cnum - start_p.pos_bol)
|
||||
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
|
||||
^ ":\n" ^ msg);
|
||||
scan env mode (Prefix 0) trace lexbuf
|
||||
*)
|
||||
| _ -> assert false
|
||||
}
|
||||
| eof { match state.trace with
|
||||
[] -> expand_offset state; state
|
||||
| _ -> fail Missing_endif state lexbuf }
|
||||
|
||||
| eof { if state.trace = [] then state
|
||||
else fail Missing_endif state lexbuf }
|
||||
|
||||
| '"' { if state.mode = Copy then
|
||||
begin
|
||||
expand_offset state;
|
||||
copy state lexbuf;
|
||||
in_string (mk_reg lexbuf) state lexbuf
|
||||
end;
|
||||
scan {state with offset=Inline} lexbuf }
|
||||
| "//" { if state.mode = Copy then
|
||||
begin
|
||||
expand_offset state;
|
||||
copy state lexbuf;
|
||||
in_line_com state lexbuf
|
||||
end;
|
||||
scan {state with offset=Inline} lexbuf }
|
||||
| "/*" { if state.mode = Copy then
|
||||
begin
|
||||
expand_offset state;
|
||||
copy state lexbuf;
|
||||
if state.opt#lang = `ReasonLIGO then
|
||||
reasonLIGO_com (mk_reg lexbuf) state lexbuf
|
||||
end;
|
||||
scan {state with offset=Inline} lexbuf }
|
||||
| "(*" { if state.mode = Copy then
|
||||
begin
|
||||
expand_offset state;
|
||||
copy state lexbuf;
|
||||
if state.opt#lang = `CameLIGO
|
||||
|| state.opt#lang = `PascaLIGO then
|
||||
cameLIGO_com (mk_reg lexbuf) state lexbuf
|
||||
end;
|
||||
scan {state with offset=Inline} lexbuf }
|
||||
| _ { if state.mode = Copy then
|
||||
begin
|
||||
expand_offset state;
|
||||
copy state lexbuf
|
||||
end;
|
||||
scan {state with offset=Inline} lexbuf }
|
||||
scan (in_string (mk_reg lexbuf) state lexbuf) lexbuf
|
||||
end
|
||||
else scan state lexbuf }
|
||||
|
||||
| block_comment_openings {
|
||||
let lexeme = Lexing.lexeme lexbuf in
|
||||
match state.opt#block with
|
||||
Some block when block#opening = lexeme ->
|
||||
if state.mode = Copy then
|
||||
begin
|
||||
copy state lexbuf;
|
||||
let state = in_block block (mk_reg lexbuf) state lexbuf
|
||||
in scan state lexbuf
|
||||
end
|
||||
else scan state lexbuf
|
||||
| Some _ | None ->
|
||||
let n = String.length lexeme in
|
||||
begin
|
||||
rollback lexbuf;
|
||||
assert (n > 0);
|
||||
scan (scan_n_char n state lexbuf) lexbuf
|
||||
end }
|
||||
|
||||
| line_comments {
|
||||
let lexeme = Lexing.lexeme lexbuf in
|
||||
match state.opt#line with
|
||||
Some line when line = lexeme ->
|
||||
if state.mode = Copy then
|
||||
begin
|
||||
copy state lexbuf;
|
||||
scan (in_line_com state lexbuf) lexbuf
|
||||
end
|
||||
else scan state lexbuf
|
||||
| Some _ | None ->
|
||||
let n = String.length lexeme in
|
||||
begin
|
||||
rollback lexbuf;
|
||||
assert (n > 0);
|
||||
scan (scan_n_char n state lexbuf) lexbuf
|
||||
end }
|
||||
|
||||
| _ { if state.mode = Copy then copy state lexbuf;
|
||||
scan state lexbuf }
|
||||
|
||||
(* Scanning a series of characters *)
|
||||
|
||||
and scan_n_char n state = parse
|
||||
_ { if state.mode = Copy then copy state lexbuf;
|
||||
if n = 1 then state else scan_n_char (n-1) state lexbuf }
|
||||
|
||||
(* Support for #define and #undef *)
|
||||
|
||||
@ -638,47 +629,12 @@ and symbol state = parse
|
||||
ident as id { id, mk_reg lexbuf }
|
||||
| _ { fail Invalid_symbol state lexbuf }
|
||||
|
||||
(*
|
||||
(* Line indicator (#line) *)
|
||||
|
||||
and line_ind state = parse
|
||||
blank* { copy state lexbuf; line_indicator state lexbuf }
|
||||
|
||||
and line_indicator state = parse
|
||||
natural { copy state lexbuf; end_indicator state lexbuf }
|
||||
| ident as id {
|
||||
match id with
|
||||
"default" | "hidden" ->
|
||||
print state (id ^ message [] lexbuf)
|
||||
| _ -> fail (Invalid_line_indicator id) state lexbuf }
|
||||
| _ { fail No_line_indicator state lexbuf }
|
||||
|
||||
and end_indicator state = parse
|
||||
blank+ { copy state lexbuf; end_indicator state lexbuf }
|
||||
| nl { proc_nl state lexbuf }
|
||||
| eof { copy state lexbuf }
|
||||
| "//" { copy state lexbuf;
|
||||
print state (message [] lexbuf ^ "\n") }
|
||||
| '"' { copy state lexbuf;
|
||||
in_string (mk_reg lexbuf) state lexbuf;
|
||||
opt_line_com state lexbuf }
|
||||
| _ { fail End_line_indicator state lexbuf }
|
||||
|
||||
and opt_line_com state = parse
|
||||
nl { proc_nl state lexbuf }
|
||||
| eof { copy state lexbuf }
|
||||
| blank+ { copy state lexbuf; opt_line_com state lexbuf }
|
||||
| "//" { print state ("//" ^ message [] lexbuf) }
|
||||
*)
|
||||
|
||||
(* New lines and verbatim sequence of characters *)
|
||||
|
||||
and skip_line state = parse
|
||||
nl { proc_nl state lexbuf }
|
||||
| blank+ { skip_line state lexbuf }
|
||||
| "//" { in_line_com {state with mode=Skip} lexbuf }
|
||||
| _ { fail No_line_comment_or_blank state lexbuf }
|
||||
| eof { () }
|
||||
nl { proc_nl state lexbuf }
|
||||
| blank+ { skip_line state lexbuf }
|
||||
| _ { () }
|
||||
|
||||
and message acc = parse
|
||||
nl { Lexing.new_line lexbuf;
|
||||
@ -689,22 +645,41 @@ and message acc = parse
|
||||
(* Comments *)
|
||||
|
||||
and in_line_com state = parse
|
||||
nl { proc_nl state lexbuf }
|
||||
| eof { () }
|
||||
nl { proc_nl state lexbuf; state }
|
||||
| eof { state }
|
||||
| _ { if state.mode = Copy then copy state lexbuf;
|
||||
in_line_com state lexbuf }
|
||||
|
||||
and reasonLIGO_com opening state = parse
|
||||
nl { proc_nl state lexbuf; reasonLIGO_com opening state lexbuf }
|
||||
| "*/" { copy state lexbuf }
|
||||
| eof { () }
|
||||
| _ { copy state lexbuf; reasonLIGO_com opening state lexbuf }
|
||||
and in_block block opening state = parse
|
||||
'"' | block_comment_openings {
|
||||
let lexeme = Lexing.lexeme lexbuf in
|
||||
if block#opening = lexeme || lexeme = "\""
|
||||
then let () = copy state lexbuf in
|
||||
let opening' = mk_reg lexbuf in
|
||||
let next = if lexeme = "\"" then in_string
|
||||
else in_block block in
|
||||
let state = next opening' state lexbuf
|
||||
in in_block block opening state lexbuf
|
||||
else let () = rollback lexbuf in
|
||||
let n = String.length lexeme in
|
||||
let () = assert (n > 0) in
|
||||
let state = scan_n_char n state lexbuf
|
||||
in in_block block opening state lexbuf }
|
||||
|
||||
and cameLIGO_com opening state = parse
|
||||
nl { proc_nl state lexbuf; cameLIGO_com opening state lexbuf }
|
||||
| "*)" { copy state lexbuf }
|
||||
| eof { () }
|
||||
| _ { copy state lexbuf; cameLIGO_com opening state lexbuf }
|
||||
| block_comment_closings {
|
||||
let lexeme = Lexing.lexeme lexbuf in
|
||||
if block#closing = lexeme
|
||||
then (copy state lexbuf; state)
|
||||
else let () = rollback lexbuf in
|
||||
let n = String.length lexeme in
|
||||
let () = assert (n > 0) in
|
||||
let state = scan_n_char n state lexbuf
|
||||
in in_block block opening state lexbuf }
|
||||
|
||||
| nl { proc_nl state lexbuf; in_block block opening state lexbuf }
|
||||
| eof { let err = Unterminated_comment (block#closing)
|
||||
in stop err state opening }
|
||||
| _ { copy state lexbuf; in_block block opening state lexbuf }
|
||||
|
||||
(* Included filename *)
|
||||
|
||||
@ -717,15 +692,15 @@ and in_inclusion opening acc len state = parse
|
||||
in Region.cover opening closing,
|
||||
mk_str len acc }
|
||||
| nl { fail Newline_in_string state lexbuf }
|
||||
| eof { stop Open_string state opening }
|
||||
| eof { stop Unterminated_string state opening }
|
||||
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
|
||||
|
||||
(* Strings *)
|
||||
|
||||
and in_string opening state = parse
|
||||
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
|
||||
| '"' { copy state lexbuf }
|
||||
| eof { () }
|
||||
| '"' { copy state lexbuf; state }
|
||||
| eof { state }
|
||||
| _ { copy state lexbuf; in_string opening state lexbuf }
|
||||
|
||||
and preproc state = parse
|
||||
@ -750,7 +725,6 @@ let lex opt buffer =
|
||||
let state = {
|
||||
env = Env.empty;
|
||||
mode = Copy;
|
||||
offset = Prefix 0;
|
||||
trace = [];
|
||||
out = Buffer.create 80;
|
||||
incl = [];
|
||||
|
7
vendors/Preprocessor/PreprocMain.ml
vendored
7
vendors/Preprocessor/PreprocMain.ml
vendored
@ -4,9 +4,12 @@ module Region = Simple_utils.Region
|
||||
module Preproc = Preprocessor.Preproc
|
||||
module EvalOpt = Preprocessor.EvalOpt
|
||||
|
||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m\n%!" msg
|
||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
||||
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".ligo"
|
||||
|
||||
let preproc cin =
|
||||
let buffer = Lexing.from_channel cin in
|
||||
|
22
vendors/Preprocessor/build.sh
vendored
22
vendors/Preprocessor/build.sh
vendored
@ -1,22 +0,0 @@
|
||||
#!/bin/sh
|
||||
set -x
|
||||
ocamllex.opt E_Lexer.mll
|
||||
ocamllex.opt Preproc.mll
|
||||
menhir -la 1 E_Parser.mly
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c EvalOpt.mli
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_AST.ml
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.mli
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.mli
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_LexerMain.ml
|
||||
camlcmd="ocamlfind ocamlc -I _x86_64 -strict-sequence -w +A-48-4 "
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package getopt,str -c EvalOpt.ml
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.ml
|
||||
menhir --infer --ocamlc="$camlcmd" E_Parser.mly
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.ml
|
||||
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_LexerMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo E_LexerMain.cmo
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.mli
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c PreprocMain.ml
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.ml
|
||||
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o PreprocMain.byte EvalOpt.cmo E_AST.cmo E_Parser.cmo E_Lexer.cmo Preproc.cmo PreprocMain.cmo
|
||||
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_ParserMain.ml
|
||||
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_ParserMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo Preproc.cmo E_ParserMain.cmo
|
4
vendors/Preprocessor/clean.sh
vendored
4
vendors/Preprocessor/clean.sh
vendored
@ -1,4 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
\rm -f *.cm* *.o *.byte *.opt
|
||||
\rm E_Lexer.ml E_Parser.ml E_Parser.mli Preproc.ml
|
Loading…
Reference in New Issue
Block a user