Merge branch 'rinderknecht@michelson' into 'dev'

Refactoring of the lexer and preprocessor

See merge request ligolang/ligo!591
This commit is contained in:
Sander 2020-04-29 14:19:29 +00:00
commit d0d495ccce
49 changed files with 2048 additions and 1876 deletions

View File

@ -2,7 +2,7 @@ open Cli_expect
let%expect_test _ =
run_ligo_bad [ "compile-contract" ; "../../test/lexer/broken_string.ligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "broken_string.ligo", line 1, characters 18-19:
The string starting here is interrupted by a line break.
Hint: Remove the break, close the string before or insert a backslash.
@ -19,7 +19,7 @@ ligo: : Lexical error in file "broken_string.ligo", line 1, characters 18-19:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/broken_string.mligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "broken_string.mligo", line 1, characters 8-9:
The string starting here is interrupted by a line break.
Hint: Remove the break, close the string before or insert a backslash.
@ -36,7 +36,7 @@ ligo: : Lexical error in file "broken_string.mligo", line 1, characters 8-9:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/broken_string.religo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
The string starting here is interrupted by a line break.
Hint: Remove the break, close the string before or insert a backslash.
@ -53,8 +53,8 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
[%expect {|
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23:
[%expect {|
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
Negative byte sequence.
Hint: Remove the leading minus sign.
{}
@ -70,8 +70,8 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
[%expect {|
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13:
[%expect {|
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
Negative byte sequence.
Hint: Remove the leading minus sign.
{}
@ -87,8 +87,8 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
[%expect {|
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13:
[%expect {|
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
Negative byte sequence.
Hint: Remove the leading minus sign.
{}
@ -103,8 +103,9 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
* Check the changelog by running 'ligo changelog'
|} ];
(*
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
Reserved name: "arguments".
Hint: Change the name.
@ -119,9 +120,10 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
* Check the changelog by running 'ligo changelog'
|} ];
*)
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "reserved_name.religo", line 1, characters 4-7:
Reserved name: "end".
Hint: Change the name.
@ -138,7 +140,7 @@ ligo: : Lexical error in file "reserved_name.religo", line 1, characters 4-7:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.mligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "reserved_name.mligo", line 1, characters 4-10:
Reserved name: "object".
Hint: Change the name.
@ -155,7 +157,7 @@ ligo: : Lexical error in file "reserved_name.mligo", line 1, characters 4-10:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unexpected_character.ligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "unexpected_character.ligo", line 1, characters 18-19:
Unexpected character '\239'.
{}
@ -171,7 +173,7 @@ ligo: : Lexical error in file "unexpected_character.ligo", line 1, characters 18
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unexpected_character.mligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "unexpected_character.mligo", line 1, characters 8-9:
Unexpected character '\239'.
{}
@ -187,7 +189,7 @@ ligo: : Lexical error in file "unexpected_character.mligo", line 1, characters 8
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unexpected_character.religo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "unexpected_character.religo", line 1, characters 8-9:
Unexpected character '\239'.
{}
@ -203,11 +205,10 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
[%expect {|
ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2:
[%expect {|
ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
Unterminated comment.
Hint: Close with "*)".
{}
Hint: Close with "*)". {}
If you're not sure how to fix this error, you can
@ -220,7 +221,7 @@ ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_symbol.ligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "invalid_symbol.ligo", line 1, characters 17-20:
Invalid symbol.
Hint: Check the LIGO syntax you use.
@ -237,7 +238,7 @@ ligo: : Lexical error in file "invalid_symbol.ligo", line 1, characters 17-20:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_symbol.mligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "invalid_symbol.mligo", line 1, characters 10-13:
Invalid symbol.
Hint: Check the LIGO syntax you use.
@ -254,7 +255,7 @@ ligo: : Lexical error in file "invalid_symbol.mligo", line 1, characters 10-13:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_symbol.religo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "invalid_symbol.religo", line 1, characters 10-11:
Invalid symbol.
Hint: Check the LIGO syntax you use.
@ -271,7 +272,7 @@ ligo: : Lexical error in file "invalid_symbol.religo", line 1, characters 10-11:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/missing_break.ligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "missing_break.ligo", line 1, characters 18-18:
Missing break.
Hint: Insert some space.
@ -288,7 +289,7 @@ ligo: : Lexical error in file "missing_break.ligo", line 1, characters 18-18:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/missing_break.mligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "missing_break.mligo", line 1, characters 11-11:
Missing break.
Hint: Insert some space.
@ -305,7 +306,7 @@ ligo: : Lexical error in file "missing_break.mligo", line 1, characters 11-11:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/missing_break.religo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "missing_break.religo", line 1, characters 11-11:
Missing break.
Hint: Insert some space.
@ -322,7 +323,7 @@ ligo: : Lexical error in file "missing_break.religo", line 1, characters 11-11:
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_character_in_string.ligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "invalid_character_in_string.ligo", line 1, characters 19-20:
Invalid character in string.
Hint: Remove or replace the character.
@ -339,7 +340,7 @@ ligo: : Lexical error in file "invalid_character_in_string.ligo", line 1, charac
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_character_in_string.mligo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "invalid_character_in_string.mligo", line 1, characters 9-10:
Invalid character in string.
Hint: Remove or replace the character.
@ -356,7 +357,7 @@ ligo: : Lexical error in file "invalid_character_in_string.mligo", line 1, chara
|} ];
run_ligo_bad [ "compile-contract" ; "../../test/lexer/invalid_character_in_string.religo" ; "main" ] ;
[%expect {|
[%expect {|
ligo: : Lexical error in file "invalid_character_in_string.religo", line 1, characters 9-10:
Invalid character in string.
Hint: Remove or replace the character.
@ -370,4 +371,4 @@ ligo: : Lexical error in file "invalid_character_in_string.religo", line 1, char
* Ask a question on our Discord: https://discord.gg/9rhYaEt
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
* Check the changelog by running 'ligo changelog'
|} ]
|} ]

View File

@ -8,15 +8,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO =
struct
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".mligo" *)
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -24,22 +23,25 @@ module SubIO =
>
let options : options =
object
method libs = []
method verbose = SSet.empty
method offsets = true
method lang = `CameLIGO
method ext = ".mligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
in object
method libs = []
method verbose = SSet.empty
method offsets = true
method block = Some block
method line = Some "//"
method ext = ".mligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let make =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli
../shared/Lexer.mll
../shared/LexerLib.ml
../shared/EvalOpt.ml
../shared/EvalOpt.mli
../shared/FQueue.ml

View File

@ -156,11 +156,22 @@ val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -102,138 +102,167 @@ type t =
| EOF of Region.t (* End of file *)
(* Projections *)
type token = t
let proj_token = function
ARROW region -> region, "ARROW"
| CONS region -> region, "CONS"
| CAT region -> region, "CAT"
| MINUS region -> region, "MINUS"
| PLUS region -> region, "PLUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| COMMA region -> region, "COMMA"
| SEMI region -> region, "SEMI"
| VBAR region -> region, "VBAR"
| COLON region -> region, "COLON"
| DOT region -> region, "DOT"
| WILD region -> region, "WILD"
| EQ region -> region, "EQ"
| NE region -> region, "NE"
| LT region -> region, "LT"
| GT region -> region, "GT"
| LE region -> region, "LE"
| GE region -> region, "GE"
| BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND"
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
(* Literals *)
String Region.{region; value} ->
region, sprintf "Str %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| String Region.{region; value} ->
region, sprintf "Str %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.show b)
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
| Attr Region.{region; value} ->
region, sprintf "Attr \"%s\"" value
(* Symbols *)
| ARROW region -> region, "ARROW"
| CONS region -> region, "CONS"
| CAT region -> region, "CAT"
| MINUS region -> region, "MINUS"
| PLUS region -> region, "PLUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| COMMA region -> region, "COMMA"
| SEMI region -> region, "SEMI"
| VBAR region -> region, "VBAR"
| COLON region -> region, "COLON"
| DOT region -> region, "DOT"
| WILD region -> region, "WILD"
| EQ region -> region, "EQ"
| NE region -> region, "NE"
| LT region -> region, "LT"
| GT region -> region, "GT"
| LE region -> region, "LE"
| GE region -> region, "GE"
| BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND"
(* Keywords *)
| Begin region -> region, "Begin"
| Else region -> region, "Else"
| End region -> region, "End"
| Else region -> region, "Else"
| End region -> region, "End"
| False region -> region, "False"
| Fun region -> region, "Fun"
| Rec region -> region, "Rec"
| If region -> region, "If"
| In region -> region, "In"
| Let region -> region, "Let"
| Fun region -> region, "Fun"
| Rec region -> region, "Rec"
| If region -> region, "If"
| In region -> region, "In"
| Let region -> region, "Let"
| Match region -> region, "Match"
| Mod region -> region, "Mod"
| Not region -> region, "Not"
| Of region -> region, "Of"
| Or region -> region, "Or"
| Then region -> region, "Then"
| True region -> region, "True"
| Type region -> region, "Type"
| With region -> region, "With"
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
| Mod region -> region, "Mod"
| Not region -> region, "Not"
| Of region -> region, "Of"
| Or region -> region, "Or"
| Then region -> region, "Then"
| True region -> region, "True"
| Type region -> region, "Type"
| With region -> region, "With"
(* Data *)
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
(* Virtual tokens *)
| EOF region -> region, "EOF"
let to_lexeme = function
ARROW _ -> "->"
| CONS _ -> "::"
| CAT _ -> "^"
| MINUS _ -> "-"
| PLUS _ -> "+"
| SLASH _ -> "/"
| TIMES _ -> "*"
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| COMMA _ -> ","
| SEMI _ -> ";"
| VBAR _ -> "|"
| COLON _ -> ":"
| DOT _ -> "."
| WILD _ -> "_"
| EQ _ -> "="
| NE _ -> "<>"
| LT _ -> "<"
| GT _ -> ">"
| LE _ -> "<="
| GE _ -> ">="
| BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&"
| Ident id -> id.Region.value
| Constr id -> id.Region.value
let to_lexeme = function
(* Literals *)
String s -> String.escaped s.Region.value
| Bytes b -> fst b.Region.value
| Int i
| Nat i
| Mutez i -> fst i.Region.value
| String s -> String.escaped s.Region.value
| Bytes b -> fst b.Region.value
| Attr a -> a.Region.value
| Mutez i -> fst i.Region.value
| Ident id -> id.Region.value
| Constr id -> id.Region.value
| Attr a -> a.Region.value
(* Symbols *)
| ARROW _ -> "->"
| CONS _ -> "::"
| CAT _ -> "^"
| MINUS _ -> "-"
| PLUS _ -> "+"
| SLASH _ -> "/"
| TIMES _ -> "*"
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| COMMA _ -> ","
| SEMI _ -> ";"
| VBAR _ -> "|"
| COLON _ -> ":"
| DOT _ -> "."
| WILD _ -> "_"
| EQ _ -> "="
| NE _ -> "<>"
| LT _ -> "<"
| GT _ -> ">"
| LE _ -> "<="
| GE _ -> ">="
| BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&"
(* Keywords *)
| Begin _ -> "begin"
| Else _ -> "else"
| End _ -> "end"
| Else _ -> "else"
| End _ -> "end"
| False _ -> "false"
| Fun _ -> "fun"
| Rec _ -> "rec"
| If _ -> "if"
| In _ -> "in"
| Let _ -> "let"
| Fun _ -> "fun"
| Rec _ -> "rec"
| If _ -> "if"
| In _ -> "in"
| Let _ -> "let"
| Match _ -> "match"
| Mod _ -> "mod"
| Not _ -> "not"
| Of _ -> "of"
| Or _ -> "or"
| True _ -> "true"
| Type _ -> "type"
| Then _ -> "then"
| With _ -> "with"
| Mod _ -> "mod"
| Not _ -> "not"
| Of _ -> "of"
| Or _ -> "or"
| True _ -> "true"
| Type _ -> "type"
| Then _ -> "then"
| With _ -> "with"
(* Data constructors *)
| C_None _ -> "None"
| C_Some _ -> "Some"
(* Virtual tokens *)
| EOF _ -> ""
(* CONVERSIONS *)
let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =
let to_region token = proj_token token |> fst
(* Injections *)
type int_err = Non_canonical_zero
(* LEXIS *)
let keywords = [
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
let value = lexeme, `Hex norm
in Bytes Region.{region; value}
type int_err = Non_canonical_zero
let mk_int lexeme region =
let z =
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
@ -398,23 +425,21 @@ type nat_err =
let mk_nat lexeme region =
match (String.index_opt lexeme 'n') with
| None -> Error Invalid_natural
| Some _ -> (
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z})
)
None -> Error Invalid_natural
| Some _ -> let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z})
let mk_mutez lexeme region =
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez"
if Z.equal z Z.zero && lexeme <> "0mutez"
then Error Non_canonical_zero
else Ok (Mutez Region.{region; value = lexeme, z})
@ -422,8 +447,6 @@ let eof region = EOF region
type sym_err = Invalid_symbol
type attr_err = Invalid_attribute
let mk_sym lexeme region =
match lexeme with
(* Lexemes in common with all concrete syntaxes *)
@ -473,88 +496,80 @@ let mk_constr lexeme region =
(* Attributes *)
type attr_err = Invalid_attribute
let mk_attr header lexeme region =
if header = "[@" then
Error Invalid_attribute
if header = "[@" then Error Invalid_attribute
else Ok (Attr Region.{value=lexeme; region})
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_string = function String _ -> true | _ -> false
let is_bytes = function Bytes _ -> true | _ -> false
let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_eof = function EOF _ -> true | _ -> false
let is_minus = function MINUS _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
(* Errors *)
let is_int = function
Int _ -> true
| _ -> false
type error =
Odd_lengthed_bytes
| Missing_break
| Negative_byte_sequence
let is_ident = function
Ident _ -> true
| _ -> false
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign."
let is_kwd = function
| Begin _
| Else _
| End _
| False _
| Fun _
| Rec _
| If _
| In _
| Let _
| Match _
| Mod _
| Not _
| Of _
| Or _
| Then _
| True _
| Type _
| With _ -> true
| _ -> false
exception Error of error Region.reg
let is_constr = function
| Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let is_sym = function
| ARROW _
| CONS _
| CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| WILD _
| EQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| BOOL_OR _
| BOOL_AND _ -> true
| _ -> false
let fail region value = raise (Error Region.{region; value})
let is_eof = function EOF _ -> true | _ -> false
let check_right_context token next_token buffer : unit =
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
match next_token buffer with
None -> ()
| Some (markup, next) ->
if is_minus token && is_bytes next
then let region =
Region.cover (to_region token) (to_region next)
in fail region Negative_byte_sequence
else
match markup with
[] ->
if is_int token
then if is_string next || is_ident next
then fail region Missing_break
else ()
else
if is_string token
then if is_int next || is_bytes next || is_ident next
then fail region Missing_break
else ()
else
if is_bytes token
then if is_string next || is_ident next
then fail region Missing_break
else if is_int next
then fail region Odd_lengthed_bytes
else ()
else ()
| _::_ -> ()
(* END TRAILER *)
}

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO =
struct
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".mligo"
end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () =
match M.trace () with
Stdlib.Ok () -> ()
| Error Region.{value; _} -> Utils.highlight value
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO =
struct
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".mligo"
end
module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : EvalOpt.language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs
method verbose = IO.options#verbose
method offsets = IO.options#offsets
method lang = IO.options#lang
method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext
method mode = IO.options#mode
method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -8,15 +8,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO =
struct
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".ligo" *)
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -24,22 +23,25 @@ module SubIO =
>
let options : options =
object
method libs = []
method verbose = SSet.empty
method offsets = true
method lang = `PascaLIGO
method ext = ".ligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
in object
method libs = []
method verbose = SSet.empty
method offsets = true
method block = Some block
method line = Some "//"
method ext = ".ligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let make =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -2,6 +2,8 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli
../shared/Lexer.mll
../shared/LexerLib.mli
../shared/LexerLib.ml
../shared/EvalOpt.ml
../shared/EvalOpt.mli
../shared/FQueue.ml
@ -18,5 +20,6 @@ $HOME/git/OCaml-build/Makefile
../shared/LexerUnit.ml
../shared/ParserUnit.mli
../shared/ParserUnit.ml
../shared/LexerLib.ml
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml

View File

@ -1,4 +1,4 @@
(** This signature defines the lexical tokens for LIGO
(* This signature defines the lexical tokens for LIGO
_Tokens_ are the abstract units which are used by the parser to
build the abstract syntax tree (AST), in other words, the stream of
@ -163,11 +163,22 @@ val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -5,24 +5,14 @@
(* Shorthands *)
type lexeme = string
let sprintf = Printf.sprintf
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
module SMap = Map.Make (String)
module SSet = Set.Make (String)
(* Hack to roll back one lexeme in the current semantic action *)
(*
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
*)
type lexeme = string
let sprintf = Printf.sprintf
(* TOKENS *)
@ -123,6 +113,8 @@ type t =
| EOF of Region.t
(* Projections *)
type token = t
let proj_token = function
@ -130,32 +122,20 @@ let proj_token = function
String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.show b)
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} ->
region, sprintf "Ident \"%s\"" value
| Constr Region.{region; value} ->
region, sprintf "Constr \"%s\"" value
(*
| Attr {header; string={region; value}} ->
region, sprintf "Attr (\"%s\",\"%s\")" header value
*)
(* Symbols *)
| SEMI region -> region, "SEMI"
@ -244,7 +224,7 @@ let to_lexeme = function
| Bytes b -> fst b.Region.value
| Int i
| Nat i
| Mutez i -> fst i.Region.value
| Mutez i -> fst i.Region.value
| Ident id
| Constr id -> id.Region.value
@ -382,9 +362,7 @@ let keywords = [
(fun reg -> With reg)
]
let reserved =
let open SSet in
empty |> add "arguments"
let reserved = SSet.empty
let constructors = [
(fun reg -> False reg);
@ -483,24 +461,21 @@ type nat_err =
| Non_canonical_zero_nat
let mk_nat lexeme region =
match (String.index_opt lexeme 'n') with
| None -> Error Invalid_natural
| Some _ -> (
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z})
)
match String.index_opt lexeme 'n' with
None -> Error Invalid_natural
| Some _ -> let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z})
let mk_mutez lexeme region =
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez"
let z = Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez"
then Error Non_canonical_zero
else Ok (Mutez Region.{region; value = lexeme, z})
@ -558,104 +533,76 @@ let mk_constr lexeme region =
type attr_err = Invalid_attribute
let mk_attr _header _string _region =
Error Invalid_attribute
let mk_attr _ _ _ = Error Invalid_attribute
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_string = function String _ -> true | _ -> false
let is_bytes = function Bytes _ -> true | _ -> false
let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_eof = function EOF _ -> true | _ -> false
let is_minus = function MINUS _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
(* Errors *)
let is_int = function
Int _ -> true
| _ -> false
type error =
Odd_lengthed_bytes
| Missing_break
| Negative_byte_sequence
let is_ident = function
Ident _ -> true
| _ -> false
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign."
let is_kwd = function
And _
| Attributes _
| Begin _
| BigMap _
| Block _
| Case _
| Const _
| Contains _
| Else _
| End _
| False _
| For _
| From _
| Function _
| If _
| In _
| Is _
| List _
| Map _
| Mod _
| Nil _
| Not _
| Of _
| Or _
| Patch _
| Record _
| Remove _
| Set _
| Skip _
| Step _
| Then _
| To _
| True _
| Type _
| Unit _
| Var _
| While _
| With _ -> true
| _ -> false
exception Error of error Region.reg
let is_constr = function
Constr _
| C_None _
| C_Some _ -> true
| _ -> false
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let is_sym = function
SEMI _
| COMMA _
| LPAR _
| RPAR _
| LBRACE _
| RBRACE _
| LBRACKET _
| RBRACKET _
| CONS _
| VBAR _
| ARROW _
| ASS _
| EQ _
| COLON _
| LT _
| LE _
| GT _
| GE _
| NE _
| PLUS _
| MINUS _
| SLASH _
| TIMES _
| DOT _
| WILD _
| CAT _ -> true
| _ -> false
let fail region value = raise (Error Region.{region; value})
let is_eof = function EOF _ -> true | _ -> false
let check_right_context token next_token buffer : unit =
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
match next_token buffer with
None -> ()
| Some (markup, next) ->
if is_minus token && is_bytes next
then let region =
Region.cover (to_region token) (to_region next)
in fail region Negative_byte_sequence
else
match markup with
[] ->
if is_int token
then if is_string next || is_ident next
then fail region Missing_break
else ()
else
if is_string token
then if is_int next || is_bytes next || is_ident next
then fail region Missing_break
else ()
else
if is_bytes token
then if is_string next || is_ident next
then fail region Missing_break
else if is_int next
then fail region Odd_lengthed_bytes
else ()
else ()
| _::_ -> ()
(* END TRAILER *)
}

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO =
struct
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO =
struct
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
end
module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : EvalOpt.language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs
method verbose = IO.options#verbose
method offsets = IO.options#offsets
method lang = IO.options#lang
method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext
method mode = IO.options#mode
method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -11,15 +11,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO =
struct
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".religo" *)
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -27,22 +26,25 @@ module SubIO =
>
let options : options =
object
method libs = []
method verbose = SSet.empty
method offsets = true
method lang = `ReasonLIGO
method ext = ".religo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/"
in object
method libs = []
method verbose = SSet.empty
method offsets = true
method block = Some block
method line = Some "//"
method ext = ".religo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let make =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli
../shared/Lexer.mll
../shared/LexerLib.ml
../shared/EvalOpt.ml
../shared/EvalOpt.mli
../shared/FQueue.ml

View File

@ -31,48 +31,49 @@ type lexeme = string
type t =
(* Symbols *)
CAT of Region.t (* "++" *)
CAT of Region.t (* "++" *)
(* Arithmetics *)
| MINUS of Region.t (* "-" *)
| PLUS of Region.t (* "+" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
| MINUS of Region.t (* "-" *)
| PLUS of Region.t (* "+" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
(* Compounds *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
(* Separators *)
| COMMA of Region.t (* "," *)
| SEMI of Region.t (* ";" *)
| VBAR of Region.t (* "|" *)
| COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *)
| COMMA of Region.t (* "," *)
| SEMI of Region.t (* ";" *)
| VBAR of Region.t (* "|" *)
| COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *)
| ELLIPSIS of Region.t (* "..." *)
| ARROW of Region.t (* "=>" *)
(* Wildcard *)
| WILD of Region.t (* "_" *)
| WILD of Region.t (* "_" *)
(* Comparisons *)
| EQ of Region.t (* "=" *)
| EQEQ of Region.t (* "==" *)
| NE of Region.t (* "!=" *)
| LT of Region.t (* "<" *)
| GT of Region.t (* ">" *)
| LE of Region.t (* "=<" *)
| GE of Region.t (* ">=" *)
| EQ of Region.t (* "=" *)
| EQEQ of Region.t (* "==" *)
| NE of Region.t (* "!=" *)
| LT of Region.t (* "<" *)
| GT of Region.t (* ">" *)
| LE of Region.t (* "<=" *)
| GE of Region.t (* ">=" *)
| ARROW of Region.t (* "=>" *)
(* Logic *)
| BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t (* "&&" *)
@ -91,18 +92,18 @@ type t =
(* Keywords *)
| Else of Region.t
| False of Region.t
| If of Region.t
| Let of Region.t
| Rec of Region.t
| Else of Region.t
| False of Region.t
| If of Region.t
| Let of Region.t
| Mod of Region.t
| Or of Region.t
| Rec of Region.t
| Switch of Region.t
| Mod of Region.t
| Or of Region.t
| True of Region.t
| Type of Region.t
| True of Region.t
| Type of Region.t
(* Data constructors *)
(* Data constructors *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
@ -111,8 +112,6 @@ type t =
| EOF of Region.t (* End of file *)
type token = t
(* Projections
The difference between extracting the lexeme and a string from a
@ -121,6 +120,8 @@ type token = t
lexeme (concrete syntax).
*)
type token = t
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
@ -154,11 +155,22 @@ val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -1,66 +1,69 @@
{
(* START OF HEADER *)
type lexeme = string
let sprintf = Printf.sprintf
(* Shorthands *)
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
module SMap = Utils.String.Map
module SSet = Utils.String.Set
type lexeme = string
let sprintf = Printf.sprintf
(* TOKENS *)
type t =
(* Symbols *)
CAT of Region.t (* "++" *)
CAT of Region.t (* "++" *)
(* Arithmetics *)
| MINUS of Region.t (* "-" *)
| PLUS of Region.t (* "+" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
| MINUS of Region.t (* "-" *)
| PLUS of Region.t (* "+" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
(* Compounds *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
(* Separators *)
| COMMA of Region.t (* "," *)
| SEMI of Region.t (* ";" *)
| VBAR of Region.t (* "|" *)
| COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *)
| COMMA of Region.t (* "," *)
| SEMI of Region.t (* ";" *)
| VBAR of Region.t (* "|" *)
| COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *)
| ELLIPSIS of Region.t (* "..." *)
| ARROW of Region.t (* "=>" *)
(* Wildcard *)
| WILD of Region.t (* "_" *)
| WILD of Region.t (* "_" *)
(* Comparisons *)
| EQ of Region.t (* "=" *)
| EQEQ of Region.t (* "==" *)
| NE of Region.t (* "!=" *)
| LT of Region.t (* "<" *)
| GT of Region.t (* ">" *)
| LE of Region.t (* "<=" *)
| GE of Region.t (* ">=" *)
| ARROW of Region.t (* "=>" *)
| EQ of Region.t (* "=" *)
| EQEQ of Region.t (* "==" *)
| NE of Region.t (* "!=" *)
| LT of Region.t (* "<" *)
| GT of Region.t (* ">" *)
| LE of Region.t (* "<=" *)
| GE of Region.t (* ">=" *)
| BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t (* "&&" *)
(* Logic *)
| NOT of Region.t (* ! *)
| BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t (* "&&" *)
| NOT of Region.t (* ! *)
(* Identifiers, labels, numbers and strings *)
@ -75,17 +78,17 @@ type t =
(* Keywords *)
(*| And*)
| Else of Region.t
| False of Region.t
| If of Region.t
| Let of Region.t
| Rec of Region.t
| Else of Region.t
| False of Region.t
| If of Region.t
| Let of Region.t
| Mod of Region.t
| Or of Region.t
| Rec of Region.t
| Switch of Region.t
| Mod of Region.t
| Or of Region.t
| True of Region.t
| Type of Region.t
| True of Region.t
| Type of Region.t
(* Data constructors *)
| C_None of Region.t (* "None" *)
@ -96,121 +99,143 @@ type t =
| EOF of Region.t (* End of file *)
(* Projections *)
type token = t
let proj_token = function
CAT region -> region, "CAT"
| MINUS region -> region, "MINUS"
| PLUS region -> region, "PLUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| COMMA region -> region, "COMMA"
| SEMI region -> region, "SEMI"
| VBAR region -> region, "VBAR"
| COLON region -> region, "COLON"
| DOT region -> region, "DOT"
| ELLIPSIS region -> region, "ELLIPSIS"
| WILD region -> region, "WILD"
| EQ region -> region, "EQ"
| EQEQ region -> region, "EQEQ"
| NE region -> region, "NE"
| LT region -> region, "LT"
| GT region -> region, "GT"
| LE region -> region, "LE"
| GE region -> region, "GE"
| ARROW region -> region, "ARROW"
| BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND"
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
(* Literals *)
String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.show b)
| Else region -> region, "Else"
| False region -> region, "False"
| If region -> region, "If"
| Let region -> region, "Let"
| Rec region -> region, "Rec"
| Switch region -> region, "Switch"
| Mod region -> region, "Mod"
| NOT region -> region, "!"
| Or region -> region, "Or"
| True region -> region, "True"
| Type region -> region, "Type"
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
| Attr Region.{region; value} -> region, sprintf "Attr %s" value
| EOF region -> region, "EOF"
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
(* Symbols *)
| CAT region -> region, "CAT"
| MINUS region -> region, "MINUS"
| PLUS region -> region, "PLUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| COMMA region -> region, "COMMA"
| SEMI region -> region, "SEMI"
| VBAR region -> region, "VBAR"
| COLON region -> region, "COLON"
| DOT region -> region, "DOT"
| ELLIPSIS region -> region, "ELLIPSIS"
| WILD region -> region, "WILD"
| EQ region -> region, "EQ"
| EQEQ region -> region, "EQEQ"
| NE region -> region, "NE"
| LT region -> region, "LT"
| GT region -> region, "GT"
| LE region -> region, "LE"
| GE region -> region, "GE"
| ARROW region -> region, "ARROW"
| NOT region -> region, "NOT"
| BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND"
| Else region -> region, "Else"
| False region -> region, "False"
| If region -> region, "If"
| Let region -> region, "Let"
| Rec region -> region, "Rec"
| Switch region -> region, "Switch"
| Mod region -> region, "Mod"
| Or region -> region, "Or"
| True region -> region, "True"
| Type region -> region, "Type"
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
| Attr Region.{region; value} -> region, sprintf "Attr %s" value
| EOF region -> region, "EOF"
let to_lexeme = function
CAT _ -> "++"
| MINUS _ -> "-"
| PLUS _ -> "+"
| SLASH _ -> "/"
| TIMES _ -> "*"
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| COMMA _ -> ","
| SEMI _ -> ";"
| VBAR _ -> "|"
| COLON _ -> ":"
| DOT _ -> "."
| ELLIPSIS _ -> "..."
| WILD _ -> "_"
| EQ _ -> "="
| EQEQ _ -> "=="
| NE _ -> "!="
| LT _ -> "<"
| GT _ -> ">"
| LE _ -> "<="
| GE _ -> ">="
| ARROW _ -> "=>"
| BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&"
| Ident id -> id.Region.value
| Constr id -> id.Region.value
(* Literals *)
String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Int i
| Nat i
| Mutez i -> fst i.Region.value
| String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Else _ -> "else"
| False _ -> "false"
| If _ -> "if"
| Let _ -> "let"
| Rec _ -> "rec"
| Mod _ -> "mod"
| NOT _ -> "!"
| Or _ -> "or"
| Switch _ -> "switch"
| True _ -> "true"
| Type _ -> "type"
| Mutez i -> fst i.Region.value
| Ident id -> id.Region.value
| Constr id -> id.Region.value
| Attr a -> a.Region.value
(* Symbols *)
| CAT _ -> "++"
| MINUS _ -> "-"
| PLUS _ -> "+"
| SLASH _ -> "/"
| TIMES _ -> "*"
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| COMMA _ -> ","
| SEMI _ -> ";"
| VBAR _ -> "|"
| COLON _ -> ":"
| DOT _ -> "."
| ELLIPSIS _ -> "..."
| WILD _ -> "_"
| EQ _ -> "="
| EQEQ _ -> "=="
| NE _ -> "!="
| LT _ -> "<"
| GT _ -> ">"
| LE _ -> "<="
| GE _ -> ">="
| ARROW _ -> "=>"
| BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&"
| NOT _ -> "!"
(* Keywords *)
| Else _ -> "else"
| False _ -> "false"
| If _ -> "if"
| Let _ -> "let"
| Mod _ -> "mod"
| Or _ -> "or"
| Rec _ -> "rec"
| Switch _ -> "switch"
| True _ -> "true"
| Type _ -> "type"
(* Data constructors *)
| C_None _ -> "None"
| C_Some _ -> "Some"
| Attr a -> a.Region.value
(* Virtual tokens *)
| EOF _ -> ""
(* CONVERSIONS *)
let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode
@ -261,12 +286,9 @@ let reserved =
|> add "functor"
|> add "inherit"
|> add "initializer"
(* |> add "land" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|> add "lazy"
(* |> add "lor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|> add "lsl"
|> add "lsr"
(* |> add "lxor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|> add "match"
|> add "method"
|> add "module"
@ -291,7 +313,7 @@ let reserved =
let constructors = [
(fun reg -> C_None reg);
(fun reg -> C_Some reg);
(fun reg -> C_Some reg)
]
let add map (key, value) = SMap.add key value map
@ -376,20 +398,18 @@ let mk_int lexeme region =
else Ok (Int Region.{region; value = lexeme, z})
let mk_nat lexeme region =
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
let z = Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme, z})
let mk_mutez lexeme region =
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez"
let z = Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez"
then Error Non_canonical_zero
else Ok (Mutez Region.{region; value = lexeme, z})
@ -426,11 +446,11 @@ let mk_sym lexeme region =
(* Symbols specific to ReasonLIGO *)
| "..." -> Ok (ELLIPSIS region)
| "=>" -> Ok (ARROW region)
| "==" -> Ok (EQEQ region)
| "!" -> Ok (NOT region)
| "++" -> Ok (CAT region)
| "..." -> Ok (ELLIPSIS region)
| "=>" -> Ok (ARROW region)
| "==" -> Ok (EQEQ region)
| "!" -> Ok (NOT region)
| "++" -> Ok (CAT region)
(* Invalid symbols *)
@ -460,75 +480,72 @@ let mk_attr header lexeme region =
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_string = function String _ -> true | _ -> false
let is_bytes = function Bytes _ -> true | _ -> false
let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_eof = function EOF _ -> true | _ -> false
let is_minus = function MINUS _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
(* Errors *)
let is_int = function
Int _ -> true
| _ -> false
type error =
Odd_lengthed_bytes
| Missing_break
| Negative_byte_sequence
let is_ident = function
Ident _ -> true
| _ -> false
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign."
let is_kwd = function
Else _
| False _
| If _
| Let _
| Rec _
| Switch _
| Mod _
| Or _
| True _
| Type _ -> true
| _ -> false
exception Error of error Region.reg
let is_constr = function
Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let is_sym = function
CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| ELLIPSIS _
| WILD _
| EQ _
| EQEQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| ARROW _
| BOOL_OR _
| NOT _
| BOOL_AND _ -> true
| _ -> false
let fail region value = raise (Error Region.{region; value})
let is_eof = function EOF _ -> true | _ -> false
let check_right_context token next_token buffer : unit =
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
match next_token buffer with
None -> ()
| Some (markup, next) ->
if is_minus token && is_bytes next
then let region =
Region.cover (to_region token) (to_region next)
in fail region Negative_byte_sequence
else
match markup with
[] ->
if is_int token
then if is_string next || is_ident next
then fail region Missing_break
else ()
else
if is_string token
then if is_int next || is_bytes next || is_ident next
then fail region Missing_break
else ()
else
if is_bytes token
then if is_string next || is_ident next
then fail region Missing_break
else if is_int next
then fail region Odd_lengthed_bytes
else ()
else ()
| _::_ -> ()
(* END TRAILER *)
}

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO =
struct
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"/*" ~closing:"*/"
in read ~block ~line:"//" ".religo"
end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () =
match M.trace () with
Stdlib.Ok () -> ()
| Error Region.{value; _} -> Utils.highlight value
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value

View File

@ -40,11 +40,11 @@ let rec curry hd = function
in TFun {value; region}
| [] -> hd
let wild_error e =
match e with
| EVar { value = "_"; _} as e ->
let wild_error e =
match e with
| EVar { value = "_"; _} as e ->
let open! SyntaxError in
raise (Error (InvalidWild e))
raise (Error (InvalidWild e))
| _ -> ()
(* END HEADER *)
@ -270,30 +270,30 @@ let_declaration:
let_binding:
"<ident>" type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Scoping.check_reserved_name $1;
{binders = PVar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| "_" type_annotation? "=" expr {
wild_error $4;
wild_error $4;
{binders = PWild $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| unit type_annotation? "=" expr {
wild_error $4;
wild_error $4;
{binders = PUnit $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| record_pattern type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Scoping.check_pattern (PRecord $1);
{binders = PRecord $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| par(closed_irrefutable) type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Scoping.check_pattern $1.value.inside;
{binders = PPar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| tuple(sub_irrefutable) type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Utils.nsepseq_iter Scoping.check_pattern $1;
let hd, tl = $1 in
let start = pattern_to_region hd in
@ -422,7 +422,7 @@ expr:
base_cond__open(expr) | switch_expr(base_cond) { $1 }
base_cond__open(x):
base_expr(x) | conditional(expr_with_let_expr) {
base_expr(x) | conditional(expr_with_let_expr) {
wild_error $1;
$1 }
@ -460,11 +460,11 @@ fun_expr:
let region = cover start stop in
let rec arg_to_pattern = function
EVar v ->
EVar v ->
if v.value = "_" then
PWild v.region
else (
Scoping.check_reserved_name v;
Scoping.check_reserved_name v;
PVar v
)
| EAnnot {region; value = {inside = EVar v, colon, typ; _}} ->
@ -895,7 +895,7 @@ path:
| projection { Path $1 }
update_record:
"{""..."path "," sep_or_term_list(field_path_assignment,",") "}" {
"{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" {
let region = cover $1 $6 in
let ne_elements, terminator = $5 in
let value = {
@ -913,7 +913,7 @@ expr_with_let_expr:
expr { $1 }
| let_expr(expr_with_let_expr) { $1 }
more_field_assignments:
more_field_assignments:
"," sep_or_term_list(field_assignment_punning,",") {
let elts, _region = $2 in
$1, elts
@ -926,9 +926,9 @@ sequence_or_record_in:
PaSequence {s_elts = elts; s_terminator=None}
}
| field_assignment more_field_assignments? {
match $2 with
| Some (comma, elts) ->
let r_elts = Utils.nsepseq_cons $1 comma elts in
match $2 with
| Some (comma, elts) ->
let r_elts = Utils.nsepseq_cons $1 comma elts in
PaRecord {r_elts; r_terminator = None}
| None ->
PaRecord {r_elts = ($1, []); r_terminator = None}
@ -961,15 +961,15 @@ sequence_or_record:
terminator = r.r_terminator}
in ERecord {region; value}}
field_assignment_punning:
(* This can only happen with multiple fields -
field_assignment_punning:
(* This can only happen with multiple fields -
one item punning does NOT work in ReasonML *)
field_name {
field_name {
let value = {
field_name = $1;
assignment = ghost;
field_expr = EVar $1 }
in
in
{$1 with value}
}
| field_assignment {

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO =
struct
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"/*" ~closing:"*/"
in read ~block ~line:"//" ".religo"
end
module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : EvalOpt.language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs
method verbose = IO.options#verbose
method offsets = IO.options#offsets
method lang = IO.options#lang
method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext
method mode = IO.options#mode
method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -1,7 +0,0 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml

View File

@ -5,37 +5,42 @@
type command = Quiet | Copy | Units | Tokens
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
let lang_to_string = function
`PascaLIGO -> "PascaLIGO"
| `CameLIGO -> "CameLIGO"
| `ReasonLIGO -> "ReasonLIGO"
(* The type [options] gathers the command-line options. *)
module SSet = Set.Make (String)
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
let mk_block ~opening ~closing : block_comment =
object
method opening = opening
method closing = closing
end
type options = <
input : string option;
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : block_comment option;
line : line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : command;
mono : bool;
expr : bool
>
let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options =
let make ~input ~libs ~verbose ~offsets ?block
?line ~ext ~mode ~cmd ~mono ~expr : options =
object
method input = input
method libs = libs
method verbose = verbose
method offsets = offsets
method lang = lang
method block = block
method line = line
method ext = ext
method mode = mode
method cmd = cmd
@ -58,10 +63,10 @@ let abort msg =
(* Help *)
let help language extension () =
let help extension () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
printf "where <input>%s is the %s source file (default: stdin),\n" extension language;
printf "where <input>%s is the LIGO source file (default: stdin),\n" extension;
print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)";
print " -t, --tokens Print tokens";
@ -105,8 +110,7 @@ let add_verbose d =
!verbose
(split_at_colon d)
let specs language extension =
let language = lang_to_string language in
let specs extension =
let open! Getopt in [
'I', nolong, None, Some add_path;
'c', "copy", set copy true, None;
@ -118,7 +122,7 @@ let specs language extension =
noshort, "mono", set mono true, None;
noshort, "expr", set expr true, None;
noshort, "verbose", None, Some add_verbose;
'h', "help", Some (help language extension), None;
'h', "help", Some (help extension), None;
noshort, "version", Some version, None
]
;;
@ -156,7 +160,7 @@ let print_opt () =
printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs)
let check lang ext =
let check ?block ?line ~ext =
let () =
if SSet.mem "cli" !verbose then print_opt () in
@ -209,16 +213,19 @@ let check lang ext =
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono ~expr ~lang ~ext
in make ~input ~libs ~verbose ~offsets ~mode
~cmd ~mono ~expr ?block ?line ~ext
(* Parsing the command-line options *)
let read ~lang ~ext =
type extension = string
let read ?block ?line (ext: extension) =
try
Getopt.parse_cmdline (specs lang ext) anonymous;
Getopt.parse_cmdline (specs ext) anonymous;
(verb_str :=
let apply e a =
if a = "" then e else Printf.sprintf "%s, %s" e a
in SSet.fold apply !verbose "");
check lang ext
check ?block ?line ~ext
with Getopt.Error msg -> abort msg

View File

@ -49,19 +49,21 @@ type command = Quiet | Copy | Units | Tokens
expected.}
} *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val lang_to_string : language -> string
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
val mk_block : opening:string -> closing:string -> block_comment
type options = <
input : string option;
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : block_comment option;
line : line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : command;
mono : bool;
@ -73,7 +75,8 @@ val make :
libs:string list ->
verbose:SSet.t ->
offsets:bool ->
lang:language ->
?block:block_comment ->
?line:line_comment ->
ext:string ->
mode:[`Byte | `Point] ->
cmd:command ->
@ -81,8 +84,9 @@ val make :
expr:bool ->
options
(** Parsing the command-line options on stdin. The first parameter is
the name of the concrete syntax, e.g., [PascaLIGO], and the second
is the expected file extension, e.g., ".ligo". *)
(** Parsing the command-line options on stdin. *)
val read : lang:language -> ext:string -> options
type extension = string
val read :
?block:block_comment -> ?line:line_comment -> extension -> options

View File

@ -38,8 +38,6 @@
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
type lexeme = string
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
@ -54,6 +52,8 @@ type lexeme = string
reading the ocamllex specification for the lexer ([Lexer.mll]).
*)
type lexeme = string
module type TOKEN =
sig
type token
@ -82,13 +82,6 @@ module type TOKEN =
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
@ -96,73 +89,41 @@ module type TOKEN =
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool ->
[`Byte | `Point] ->
error Region.reg ->
file:bool ->
string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit
end
(* The module type for lexers is [S]. It mainly exports the function
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
* a function [get_file] that returns the name of the file being scanned
(empty string if [stdin]).
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (also known as a
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion.
*)
(* The signature of the lexer *)
module type S =
sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
(* The scanner *)
type window =
Nil
| One of token
| Two of token * token
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
val slide : token -> window -> window
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type instance = {
input : input;
read : log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_win : unit -> window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
val lexbuf_from_input :
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val open_token_stream :
language -> input -> (instance, open_err) Stdlib.result
(* Error reporting *)
(* Errors (specific to the generic lexer, not to the tokens) *)
type error
@ -173,7 +134,6 @@ module type S =
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
end
(* The functorised interface
@ -182,4 +142,4 @@ module type S =
submodule in [S].
*)
module Make (Token: TOKEN) : S with module Token = Token
module Make (Token : TOKEN) : S with module Token = Token

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,387 @@
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
(* LEXER ENGINE *)
(* Resetting file name and line number in the lexing buffer
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset_offset ~offset buffer =
assert (offset >= 0);
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *)
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
(* Utility types *)
type file_path = string
type lexeme = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
type thread = <
opening : Region.t;
length : int;
acc : char list;
to_string : string;
push_char : char -> thread;
push_string : string -> thread;
set_opening : Region.t -> thread
>
let mk_thread region lexeme : thread =
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s) in
object
val opening = region
method opening = opening
val length = String.length lexeme
method length = length
val acc = explode lexeme []
method acc = acc
method set_opening opening = {< opening; length; acc >}
method push_char char =
{< opening; length=length+1; acc=char::acc >}
method push_string str =
{< opening;
length = length + String.length str;
acc = explode str acc >}
(* The value of [thread#to_string] is a string of length
[thread#length] containing the [thread#length] characters in
the list [thread#acc], in reverse order. For instance,
[thread#to_string = "abc"] if [thread#length = 3] and
[thread#acc = ['c';'b';'a']]. *)
method to_string =
let bytes = Bytes.make length ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (length-1) acc |> Bytes.to_string
end
(* STATE *)
(* Scanning the lexing buffer for tokens (and markup, as a
side-effect).
Because we want the lexer to have access to the right lexical
context of a recognised lexeme (to enforce stylistic constraints or
report special error patterns), we need to keep a hidden reference
to a queue of recognised lexical units (that is, tokens and markup)
that acts as a mutable state between the calls to [read]. When
[read] is called, that queue is examined first and, if it contains
at least one token, that token is returned; otherwise, the lexing
buffer is scanned for at least one more new token. That is the
general principle: we put a high-level buffer (our queue) on top of
the low-level lexing buffer.
One tricky and important detail is that we must make any parser
generated by Menhir (and calling [read]) believe that the last
region of the input source that was matched indeed corresponds to
the returned token, despite that many tokens and markup may have
been matched since it was actually read from the input. In other
words, the parser requests a token that is taken from the
high-level buffer, but the parser requests the source regions from
the _low-level_ lexing buffer, and they may disagree if more than
one token has actually been recognised.
Consequently, in order to maintain a consistent view for the
parser, we have to patch some fields of the lexing buffer, namely
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
generated by Menhir when querying source positions (regions). This
is the purpose of the function [patch_buffer]. After reading one or
more tokens and markup by the scanning rule [scan], we have to save
in the hidden reference [buf_reg] the region of the source that was
matched by [scan]. This atomic sequence of patching, scanning and
saving is implemented by the _function_ [scan] (beware: it shadows
the scanning rule [scan]). The function [patch_buffer] is, of
course, also called just before returning the token, so the parser
has a view of the lexing buffer consistent with the token.
*)
type 'token window =
Nil
| One of 'token
| Two of 'token * 'token
type 'token state = <
units : (Markup.t list * 'token) FQueue.t;
markup : Markup.t list;
window : 'token window;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
enqueue : 'token -> 'token state;
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
set_last : Region.t -> 'token state;
set_pos : Pos.t -> 'token state;
slide_token : 'token -> 'token state;
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
push_newline : Lexing.lexbuf -> 'token state;
push_line : thread -> 'token state;
push_block : thread -> 'token state;
push_space : Lexing.lexbuf -> 'token state;
push_tabs : Lexing.lexbuf -> 'token state;
push_bom : Lexing.lexbuf -> 'token state;
push_markup : Markup.t -> 'token state;
>
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
?block ?line () : _ state =
object (self)
val units = units
method units = units
val markup = markup
method markup = markup
val window = window
method window = window
val last = last
method last = last
val pos = pos
method pos = pos
method decoder = decoder
method supply = supply
method block = block
method line = line
method enqueue token =
{< units = FQueue.enq (markup, token) units;
markup = [] >}
method set_units units = {< units = units >}
method set_last region = {< last = region >}
method set_pos pos = {< pos = pos >}
method slide_token token =
match self#window with
Nil -> {< window = One token >}
| One t | Two (t,_) -> {< window = Two (token,t) >}
method sync buffer =
let lex = Lexing.lexeme buffer in
let len = String.length lex in
let start = pos in
let stop = start#shift_bytes len in
let state = {< pos = stop >}
in Region.make ~start ~stop, lex, state
(* MARKUP *)
(* Committing markup to the current logical state *)
method push_markup unit = {< markup = unit :: markup >}
method push_newline buffer =
let () = Lexing.new_line buffer in
let value = Lexing.lexeme buffer in
let start = self#pos in
let stop = start#new_line value in
let region = Region.make ~start ~stop in
let unit = Markup.Newline Region.{region; value}
in {< pos = stop; markup = unit::markup >}
method push_line thread =
let start = thread#opening#start in
let region = Region.make ~start ~stop:self#pos
and value = thread#to_string in
let unit = Markup.LineCom Region.{region; value}
in {< markup = unit::markup >}
method push_block thread =
let start = thread#opening#start in
let region = Region.make ~start ~stop:self#pos
and value = thread#to_string in
let unit = Markup.BlockCom Region.{region; value}
in {< markup = unit::markup >}
method push_space buffer =
let region, lex, state = self#sync buffer in
let value = String.length lex in
let unit = Markup.Space Region.{region; value}
in state#push_markup unit
method push_tabs buffer =
let region, lex, state = self#sync buffer in
let value = String.length lex in
let unit = Markup.Tabs Region.{region; value}
in state#push_markup unit
method push_bom buffer =
let region, value, state = self#sync buffer in
let unit = Markup.BOM Region.{region; value}
in state#push_markup unit
end
(* LEXER INSTANCE *)
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type 'token logger = Markup.t list -> 'token -> unit
type 'token instance = {
input : input;
read : log:('token logger) -> Lexing.lexbuf -> 'token;
buffer : Lexing.lexbuf;
get_win : unit -> 'token window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
let lexbuf_from_input = function
String s ->
Ok (Lexing.from_string s, fun () -> ())
| Channel chan ->
let close () = close_in chan in
Ok (Lexing.from_channel chan, close)
| Buffer b ->
Ok (b, fun () -> ())
| File path ->
try
let chan = open_in path in
let close () = close_in chan in
let lexbuf = Lexing.from_channel chan in
let () =
let open Lexing in
lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
in Ok (lexbuf, close)
with Sys_error msg -> Stdlib.Error (File_opening msg)
let open_token_stream ?line ?block ~scan
~token_to_region ~style input =
let file_path = match input with
File path -> path
| _ -> "" in
let pos = Pos.min ~file:file_path in
let buf_reg = ref (pos#byte, pos#byte)
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
let supply = Uutf.Manual.src decoder in
let state = ref (mk_state
~units:FQueue.empty
~last:Region.ghost
~window:Nil
~pos
~markup:[]
~decoder
~supply
?block
?line
()) in
let get_pos () = !state#pos
and get_last () = !state#last
and get_win () = !state#window
and get_file () = file_path in
let patch_buffer (start, stop) buffer =
let open Lexing in
let file_path = buffer.lex_curr_p.pos_fname in
buffer.lex_start_p <- {start with pos_fname = file_path};
buffer.lex_curr_p <- {stop with pos_fname = file_path}
and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
let scan' scan buffer =
patch_buffer !buf_reg buffer;
state := scan !state buffer;
save_region buffer in
let next_token scan buffer =
scan' scan buffer;
match FQueue.peek !state#units with
None -> None
| Some (units, ext_token) ->
state := !state#set_units units; Some ext_token in
let rec read scan ~token_to_region ~style ~log buffer =
match FQueue.deq !state#units with
None ->
scan' scan buffer;
read scan ~token_to_region ~style ~log buffer
| Some (units, (left_mark, token)) ->
log left_mark token;
state := ((!state#set_units units)
#set_last (token_to_region token))
#slide_token token;
style token (next_token scan) buffer;
patch_buffer (token_to_region token)#byte_pos buffer;
token in
match lexbuf_from_input input with
Ok (buffer, close) ->
let () =
match input with
File path when path <> "" -> reset ~file:path buffer
| _ -> () in
let instance = {
read = read scan ~token_to_region ~style;
input; buffer; get_win; get_pos; get_last; get_file; close}
in Ok instance
| Error _ as e -> e

View File

@ -0,0 +1,203 @@
(* A library for writing UTF8-aware lexers *)
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
(* The function [rollback] resets the lexing buffer to the state it
was when it matched the last regular expression. This function is
safe to use only in the semantic action of the rule which last
matched. *)
val rollback : Lexing.lexbuf -> unit
(* Utility types *)
type file_path = string
type lexeme = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
(* When scanning structured constructs, like strings and comments, we
need to keep the region of the opening symbol (like double quote,
"//" or "(*") in order to report any error more precisely. Since
ocamllex is byte-oriented, we need to store the parsed bytes as
characters in an accumulator [acc] and also its length [len], so,
we are done, it is easy to build the string making up the
structured construct with [mk_str] (see above).
The resulting data structure is called a _thread_. (Note for
Emacs: "*)".)
*)
type thread = <
opening : Region.t;
length : int;
acc : char list;
to_string : string;
push_char : char -> thread;
push_string : string -> thread;
set_opening : Region.t -> thread
>
val mk_thread : Region.t -> lexeme -> thread
(* STATE *)
(* Beyond producing tokens, the result of lexing is a _state_. The
type [state] represents the abstract logical state of the lexing
engine, that is, a value which is threaded during scanning and
which denotes useful, high-level information beyond what the type
[Lexing.lexbuf] in the standard library already provides for all
generic lexers. We qualify it as "logical state" because the lexing
buffer itself has a "physical state" defined by the type
[Lexing.lexbuf].
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
tokens, with the markup at the left of its lexeme until either the
start of the file or the end of the previously recognised token.
The markup from the last recognised token or, if the first token
has not been recognised yet, from the beginning of the file is
stored in the field [markup] of the state because it is a
side-effect, with respect to the output token list, and we use a
record with a single field [units] because that record may be
easily extended during the future maintenance of this lexer.
The state also includes a field [pos] which holds the current
position in the LIGO source file. The position is not always
updated after a single character has been matched: that depends on
the regular expression that matched the lexing buffer.
The field [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any).
Technically, it is a parametric type, but its use is meant for
tokens, wherever they are defined. In [Two (t1,t2)], and in case
of a syntax error, [t1] is the first invalid token and [t2] is the
last valid one.
The fields [decoder] and [supply] offer the support needed for
the lexing of UTF-8 encoded characters in comments (the only place
where they are allowed in LIGO). The former is the decoder proper
and the latter is the effectful function [supply] that takes a
byte, a start index and a length and feed it to [decoder]. See the
documentation of the third-party library Uutf.
Some methods are now documented.
The call [state#enqueue token] updates functionally the state
[state] by associating the token [token] with the stored markup and
enqueuing the pair into the units queue. The field [markup] is then
reset to the empty list.
The call [state#slide_token token] pushes the token [token] in
the buffer [buffer]. If the buffer is full, that is, it is [Two
(t1,t2)], then the token [t2] is discarded to make room for
[token].
The call [state#sync buffer] updates the current position in
accordance with the contents of the lexing buffer, more precisely,
depending on the length of the string which has just been
recognised by the scanner: that length is used as a positive offset
to the current column.
*)
type 'token window =
Nil
| One of 'token
| Two of 'token * 'token
type 'token state = <
units : (Markup.t list * 'token) FQueue.t;
markup : Markup.t list;
window : 'token window;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
enqueue : 'token -> 'token state;
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
set_last : Region.t -> 'token state;
set_pos : Pos.t -> 'token state;
slide_token : 'token -> 'token state;
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
push_newline : Lexing.lexbuf -> 'token state;
push_line : thread -> 'token state;
push_block : thread -> 'token state;
push_space : Lexing.lexbuf -> 'token state;
push_tabs : Lexing.lexbuf -> 'token state;
push_bom : Lexing.lexbuf -> 'token state;
push_markup : Markup.t -> 'token state;
>
(* LEXER INSTANCE *)
(* The function [open_token_stream] returns a lexer instance made of
* the input [input] of type [input];
* a function [read] that extracts tokens from a lexing buffer,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
* a function [get_file] that returns the name of the file being
scanned (empty string if [stdin]).
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The type [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any).
The call [read ?line ?block ~scan ~token_to_region ~style
input] evaluates in a lexer (also known as a tokeniser or scanner)
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion. The function
labelled [~scan] is the main scanner of the lexer. The function
labelled [~style] is used to check stylistic constraints on the
tokens and the markup between them.
*)
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type 'token logger = Markup.t list -> 'token -> unit
type 'token instance = {
input : input;
read : log:('token logger) -> Lexing.lexbuf -> 'token;
buffer : Lexing.lexbuf;
get_win : unit -> 'token window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
val lexbuf_from_input :
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
val open_token_stream :
?line:EvalOpt.line_comment ->
?block:EvalOpt.block_comment ->
scan:('token state -> Lexing.lexbuf -> 'token state) ->
token_to_region:('token -> Region.t) ->
style:('token ->
(Lexing.lexbuf -> (Markup.t list * 'token) option) ->
Lexing.lexbuf ->
unit) ->
input ->
('token instance, open_err) Stdlib.result

View File

@ -5,6 +5,7 @@ module Region = Simple_utils.Region
module type S =
sig
module Lexer : Lexer.S
type token = Lexer.token
val output_token :
?offsets:bool ->
@ -12,7 +13,7 @@ module type S =
EvalOpt.command ->
out_channel ->
Markup.t list ->
Lexer.token ->
token ->
unit
type file_path = string
@ -20,8 +21,14 @@ module type S =
val trace :
?offsets:bool ->
[`Byte | `Point] ->
EvalOpt.language ->
Lexer.input ->
?block:EvalOpt.block_comment ->
?line:EvalOpt.line_comment ->
token_to_region:(token -> Region.t) ->
style:(token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit) ->
LexerLib.input ->
EvalOpt.command ->
(unit, string Region.reg) Stdlib.result
end
@ -30,10 +37,12 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
struct
module Lexer = Lexer
module Token = Lexer.Token
type token = Lexer.token
(** Pretty-printing in a string the lexemes making up the markup
(* Pretty-printing in a string the lexemes making up the markup
between two tokens, concatenated with the last lexeme
itself. *)
let output_token ?(offsets=true) mode command
channel left_mark token : unit =
let output str = Printf.fprintf channel "%s%!" str in
@ -56,10 +65,16 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
type file_path = string
let trace ?(offsets=true) mode lang input command :
let trace ?(offsets=true) mode ?block ?line
~token_to_region ~style input command :
(unit, string Region.reg) Stdlib.result =
match Lexer.open_token_stream lang input with
Ok Lexer.{read; buffer; close; _} ->
match LexerLib.open_token_stream
~scan:Lexer.scan
~token_to_region
~style
?line ?block input
with
Ok LexerLib.{read; buffer; close; _} ->
let log = output_token ~offsets mode command stdout
and close_all () = flush_all (); close () in
let rec iter () =
@ -68,12 +83,17 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
if Token.is_eof token
then Stdlib.Ok ()
else iter ()
| exception Lexer.Token.Error error ->
let msg =
Lexer.Token.format_error
~offsets mode ~file:true error
in Stdlib.Error msg
| exception Lexer.Error error ->
let msg =
Lexer.format_error ~offsets mode ~file:true error
in Stdlib.Error msg in
let result = iter ()
in close_all (); result
| Stdlib.Error (Lexer.File_opening msg) ->
| Stdlib.Error (LexerLib.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg)
end

View File

@ -3,6 +3,7 @@ module Region = Simple_utils.Region
module type S =
sig
module Lexer : Lexer.S
type token = Lexer.token
val output_token :
?offsets:bool ->
@ -10,7 +11,7 @@ module type S =
EvalOpt.command ->
out_channel ->
Markup.t list ->
Lexer.token ->
token ->
unit
type file_path = string
@ -18,8 +19,14 @@ module type S =
val trace :
?offsets:bool ->
[`Byte | `Point] ->
EvalOpt.language ->
Lexer.input ->
?block:EvalOpt.block_comment ->
?line:EvalOpt.line_comment ->
token_to_region:(token -> Region.t) ->
style:(token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit) ->
LexerLib.input ->
EvalOpt.command ->
(unit, string Region.reg) Stdlib.result
end

View File

@ -39,31 +39,45 @@ module Make (IO: IO) (Lexer: Lexer.S) =
| Stdlib.Ok pp_buffer ->
(* Running the lexer on the preprocessed input *)
let source = Lexer.String (Buffer.contents pp_buffer) in
match Lexer.open_token_stream IO.options#lang source with
Ok Lexer.{read; buffer; close; _} ->
let close_all () = flush_all (); close () in
let rec read_tokens tokens =
match read ~log:(fun _ _ -> ()) buffer with
token ->
if Lexer.Token.is_eof token
then Stdlib.Ok (List.rev tokens)
else read_tokens (token::tokens)
| exception Lexer.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let () =
Printf.eprintf "[LexerUnit] file = %b\n%!" file in
let msg =
Lexer.format_error ~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg in
let result = read_tokens []
in close_all (); result
| Stdlib.Error (Lexer.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
let source = LexerLib.String (Buffer.contents pp_buffer) in
match LexerLib.open_token_stream
?line:IO.options#line
?block:IO.options#block
~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context
source with
Ok LexerLib.{read; buffer; close; _} ->
let close_all () = flush_all (); close () in
let rec read_tokens tokens =
match read ~log:(fun _ _ -> ()) buffer with
token ->
if Lexer.Token.is_eof token
then Stdlib.Ok (List.rev tokens)
else read_tokens (token::tokens)
| exception Lexer.Token.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let msg =
Lexer.Token.format_error
~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg
| exception Lexer.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let msg =
Lexer.format_error ~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg in
let result = read_tokens []
in close_all (); result
| Stdlib.Error (LexerLib.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
match IO.options#input with
None -> preproc stdin
| Some file_path ->
@ -101,8 +115,11 @@ module Make (IO: IO) (Lexer: Lexer.S) =
end
else Log.trace ~offsets:IO.options#offsets
IO.options#mode
IO.options#lang
(Lexer.String preproc_str)
?block:IO.options#block
?line:IO.options#line
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context
(LexerLib.String preproc_str)
IO.options#cmd
in match IO.options#input with
None -> preproc stdin

View File

@ -57,7 +57,7 @@ module type PARSER =
module Make (IO: IO)
(Lexer: Lexer.S)
(Parser: PARSER with type token = Lexer.Token.token)
(Parser: PARSER with type token = Lexer.token)
(ParErr: sig val message : int -> string end) =
struct
module I = Parser.MenhirInterpreter
@ -122,10 +122,10 @@ module Make (IO: IO)
message
in
match get_win () with
Lexer.Nil -> assert false
| Lexer.One invalid ->
LexerLib.Nil -> assert false
| LexerLib.One invalid ->
raise (Point (message, None, invalid))
| Lexer.Two (invalid, valid) ->
| LexerLib.Two (invalid, valid) ->
raise (Point (message, Some valid, invalid))
(* The monolithic API of Menhir *)
@ -143,14 +143,14 @@ module Make (IO: IO)
~offsets:IO.options#offsets
IO.options#mode IO.options#cmd stdout
let incr_contract Lexer.{read; buffer; get_win; close; _} =
let incr_contract LexerLib.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
and failure = failure get_win in
let parser = Incr.contract buffer.Lexing.lex_curr_p in
let ast = I.loop_handle success failure supplier parser
in flush_all (); close (); ast
let incr_expr Lexer.{read; buffer; get_win; close; _} =
let incr_expr LexerLib.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
and failure = failure get_win in
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in

View File

@ -56,7 +56,7 @@ module type PARSER =
module Make (IO: IO)
(Lexer: Lexer.S)
(Parser: PARSER with type token = Lexer.Token.token)
(Parser: PARSER with type token = Lexer.token)
(ParErr: sig val message : int -> string end) :
sig
(* WARNING: The following parsers may all raise [Lexer.Error] *)
@ -78,8 +78,8 @@ module Make (IO: IO)
exception Point of error
val incr_contract : Lexer.instance -> Parser.ast
val incr_expr : Lexer.instance -> Parser.expr
val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
val incr_expr : Lexer.token LexerLib.instance -> Parser.expr
val format_error :
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg

View File

@ -4,16 +4,15 @@ module Region = Simple_utils.Region
module Preproc = Preprocessor.Preproc
module SSet = Set.Make (String)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module type SubIO =
sig
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
mono : bool
@ -23,7 +22,7 @@ module type SubIO =
val make : input:string option -> expr:bool -> EvalOpt.options
end
module type Pretty =
module type Printer =
sig
type state
type ast
@ -45,7 +44,7 @@ module Make (Lexer: Lexer.S)
and type expr = AST.expr
and type token = Lexer.token)
(ParErr: sig val message : int -> string end)
(ParserLog: Pretty with type ast = AST.t
(ParserLog: Printer with type ast = AST.t
and type expr = AST.expr)
(SubIO: SubIO) =
struct
@ -90,12 +89,12 @@ module Make (Lexer: Lexer.S)
ParserLog.mk_state ~offsets:SubIO.options#offsets
~mode:SubIO.options#mode
~buffer:output in
let close () = lexer_inst.Lexer.close () in
let close () = lexer_inst.LexerLib.close () in
let expr =
try
if SubIO.options#mono then
let tokeniser = lexer_inst.Lexer.read ~log
and lexbuf = lexer_inst.Lexer.buffer
let tokeniser = lexer_inst.LexerLib.read ~log
and lexbuf = lexer_inst.LexerLib.buffer
in Front.mono_expr tokeniser lexbuf
else
Front.incr_expr lexer_inst
@ -125,12 +124,12 @@ module Make (Lexer: Lexer.S)
ParserLog.mk_state ~offsets:SubIO.options#offsets
~mode:SubIO.options#mode
~buffer:output in
let close () = lexer_inst.Lexer.close () in
let close () = lexer_inst.LexerLib.close () in
let ast =
try
if SubIO.options#mono then
let tokeniser = lexer_inst.Lexer.read ~log
and lexbuf = lexer_inst.Lexer.buffer
let tokeniser = lexer_inst.LexerLib.read ~log
and lexbuf = lexer_inst.LexerLib.buffer
in Front.mono_contract tokeniser lexbuf
else
Front.incr_contract lexer_inst
@ -164,10 +163,18 @@ module Make (Lexer: Lexer.S)
| exception Lexer.Error err ->
let file =
lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
let error =
Lexer.format_error ~offsets:SubIO.options#offsets
SubIO.options#mode err ~file:(file <> "")
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
let error = Lexer.format_error
~offsets:SubIO.options#offsets
SubIO.options#mode err ~file:(file <> "")
in Stdlib.Error error
| exception Lexer.Token.Error err ->
let file =
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
let error = Lexer.Token.format_error
~offsets:SubIO.options#offsets
SubIO.options#mode err ~file:(file <> "")
in Stdlib.Error error
(* Incremental API of Menhir *)
@ -182,11 +189,11 @@ module Make (Lexer: Lexer.S)
| exception Parser.Error ->
let invalid, valid_opt =
match lexer_inst.Lexer.get_win () with
Lexer.Nil ->
match lexer_inst.LexerLib.get_win () with
LexerLib.Nil ->
assert false (* Safe: There is always at least EOF. *)
| Lexer.One invalid -> invalid, None
| Lexer.Two (invalid, valid) -> invalid, Some valid in
| LexerLib.One invalid -> invalid, None
| LexerLib.Two (invalid, valid) -> invalid, Some valid in
let point = "", valid_opt, invalid in
let error =
Front.format_error ~offsets:SubIO.options#offsets
@ -206,8 +213,8 @@ module Make (Lexer: Lexer.S)
(* Parsing a contract *)
let gen_parser options input parser =
match Lexer.lexbuf_from_input input with
Stdlib.Error (Lexer.File_opening msg) ->
match LexerLib.lexbuf_from_input input with
Stdlib.Error (LexerLib.File_opening msg) ->
Stdlib.Error (Region.wrap_ghost msg)
| Ok (lexbuf, close) ->
(* Preprocessing the input source *)
@ -225,45 +232,52 @@ module Make (Lexer: Lexer.S)
(* Lexing and parsing the preprocessed input source *)
let () = close () in
let input' = Lexer.String (Buffer.contents buffer) in
match Lexer.open_token_stream options#lang input' with
let input' = LexerLib.String (Buffer.contents buffer) in
match LexerLib.open_token_stream
~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context
?line:options#line
?block:options#block
input'
with
Ok instance ->
let open Lexing in
instance.Lexer.buffer.lex_curr_p <-
{instance.Lexer.buffer.lex_curr_p with pos_fname = file};
instance.LexerLib.buffer.lex_curr_p <-
{instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
apply instance parser
| Stdlib.Error (Lexer.File_opening msg) ->
| Stdlib.Error (LexerLib.File_opening msg) ->
Stdlib.Error (Region.wrap_ghost msg)
(* Parsing a contract in a file *)
let contract_in_file (source : string) =
let options = SubIO.make ~input:(Some source) ~expr:false
in gen_parser options (Lexer.File source) parse_contract
in gen_parser options (LexerLib.File source) parse_contract
(* Parsing a contract in a string *)
let contract_in_string (source : string) =
let options = SubIO.make ~input:None ~expr:false in
gen_parser options (Lexer.String source) parse_contract
gen_parser options (LexerLib.String source) parse_contract
(* Parsing a contract in stdin *)
let contract_in_stdin () =
let options = SubIO.make ~input:None ~expr:false in
gen_parser options (Lexer.Channel stdin) parse_contract
gen_parser options (LexerLib.Channel stdin) parse_contract
(* Parsing an expression in a string *)
let expr_in_string (source : string) =
let options = SubIO.make ~input:None ~expr:true in
gen_parser options (Lexer.String source) parse_expr
gen_parser options (LexerLib.String source) parse_expr
(* Parsing an expression in stdin *)
let expr_in_stdin () =
let options = SubIO.make ~input:None ~expr:true in
gen_parser options (Lexer.Channel stdin) parse_expr
gen_parser options (LexerLib.Channel stdin) parse_expr
(* Preprocess only *)

View File

@ -2,18 +2,19 @@
module Region = Simple_utils.Region
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
(* A subtype of [EvalOpt.options] *)
module type SubIO =
sig
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
mono : bool
@ -23,7 +24,9 @@ module type SubIO =
val make : input:string option -> expr:bool -> EvalOpt.options
end
module type Pretty =
(* Signature for the printers *)
module type Printer =
sig
type state
type ast
@ -38,6 +41,8 @@ module type Pretty =
val print_expr : state -> expr -> unit
end
(* Main functor to make the parser *)
module Make (Lexer : Lexer.S)
(AST : sig type t type expr end)
(Parser : ParserAPI.PARSER
@ -45,7 +50,7 @@ module Make (Lexer : Lexer.S)
and type expr = AST.expr
and type token = Lexer.token)
(ParErr : sig val message : int -> string end)
(ParserLog : Pretty with type ast = AST.t
(ParserLog : Printer with type ast = AST.t
and type expr = AST.expr)
(SubIO: SubIO) :
sig
@ -82,4 +87,4 @@ module Make (Lexer : Lexer.S)
val preprocess :
string -> (Buffer.t, message Region.reg) Stdlib.result
end
end

View File

@ -13,6 +13,7 @@
(preprocess
(pps bisect_ppx --conditional))
(modules
LexerLib
LexerUnit
ParserUnit
ParserAPI

View File

@ -1 +1 @@
const a: string = -0x222;
const a: string = - (**) 0x2222

View File

@ -1 +1 @@
let a = -0x222;
let a = - (**) 0x2222

View File

@ -1 +1 @@
let a = -0x222;
let a = - /**/ 0x2222;

View File

@ -1 +0,0 @@
let arguments = 1;

View File

@ -1 +1 @@
let end = 1;
let end = 1;

View File

@ -5,7 +5,7 @@ module Region = Simple_utils.Region
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options = EvalOpt.read ".ligo" (* No comments allowed *)
let lex in_chan =
let buffer = Lexing.from_channel in_chan in

View File

@ -5,7 +5,7 @@ module Region = Simple_utils.Region
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options = EvalOpt.read ".ligo" (* No comments allowed *)
let parse in_chan =
let buffer = Lexing.from_channel in_chan in

View File

@ -2,29 +2,33 @@
(* The type [options] gathers the command-line options. *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
let lang_to_string = function
`PascaLIGO -> "PascaLIGO"
| `CameLIGO -> "CameLIGO"
| `ReasonLIGO -> "ReasonLIGO"
module SSet = Set.Make (String)
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
let mk_block ~opening ~closing : block_comment =
object
method opening = opening
method closing = closing
end
type options = <
input : string option;
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string (* ".ligo", ".mligo", ".religo" *)
block : block_comment option;
line : line_comment option;
ext : string
>
let make ~input ~libs ~lang ~offsets ~verbose ~ext : options =
let make ~input ~libs ?block ?line ~offsets ~verbose ~ext : options =
object
method input = input
method libs = libs
method lang = lang
method block = block
method line = line
method offsets = offsets
method verbose = verbose
method ext = ext
@ -47,10 +51,10 @@ let abort msg =
(* Help *)
let help lang ext () =
let help ext () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file ext;
printf "where <input>%s is the %s source file (default: stdin),\n" ext lang;
printf "where <input>%s is the LIGO source file (default: stdin),\n" ext;
print "and each <option> (if any) is one of the following:";
print " -I <paths> Inclusion paths (colon-separated)";
print " --columns Columns for source locations";
@ -74,11 +78,10 @@ let add_verbose d =
verbose := List.fold_left (fun x y -> SSet.add y x)
!verbose
(split_at_colon d)
let specs lang ext =
let lang_str = lang_to_string lang in
let open!Getopt in [
let specs ext =
let open! Getopt in [
'I', nolong, None, Some add_path;
'h', "help", Some (help lang_str ext), None;
'h', "help", Some (help ext), None;
noshort, "columns", set columns true, None;
noshort, "verbose", None, Some add_verbose
]
@ -92,7 +95,7 @@ let anonymous arg =
(* Checking options and exporting them as non-mutable values *)
let check lang ext =
let check ?block ?line ~ext =
let libs = !libs
and offsets = not !columns
@ -109,16 +112,18 @@ let check lang ext =
else abort "Source file not found."
else abort ("Source file lacks the extension " ^ ext ^ ".")
in make ~input ~libs ~lang ~offsets ~verbose ~ext
in make ~input ~libs ?block ?line ~offsets ~verbose ~ext
(* Parsing the command-line options *)
let read ~lang:(lang : language) ~ext:(ext : string) =
type extension = string
let read ?block ?line (ext: extension) =
try
Getopt.parse_cmdline (specs lang ext) anonymous;
Getopt.parse_cmdline (specs ext) anonymous;
(verb_str :=
let apply e a =
if a = "" then e else sprintf "%s, %s" e a
in SSet.fold apply !verbose "");
check lang ext
check ?block ?line ~ext
with Getopt.Error msg -> abort msg

View File

@ -2,25 +2,28 @@
(* The type [options] gathers the command-line options. *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val lang_to_string : language -> string
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
val mk_block : opening:string -> closing:string -> block_comment
type options = <
input : string option;
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string (* ".ligo", ".mligo", ".religo" *)
block : block_comment option;
line : line_comment option;
ext : string
>
val make :
input:string option ->
libs:string list ->
lang:language ->
?block:block_comment ->
?line:line_comment ->
offsets:bool ->
verbose:SSet.t ->
ext:string ->
@ -30,4 +33,7 @@ val make :
the name of the concrete syntax. This is needed to correctly handle
comments. *)
val read : lang:language -> ext:string -> options
type extension = string
val read :
?block:block_comment -> ?line:line_comment -> extension -> options

View File

@ -14,8 +14,8 @@ type error =
| Invalid_line_indicator of string
| No_line_indicator
| End_line_indicator
| Newline_in_string (* For #include argument only *)
| Open_string (* For #include argument only *)
| Newline_in_string (* For #include argument only *)
| Unterminated_string (* For #include argument only *)
| Dangling_endif
| Open_region_in_conditional
| Dangling_endregion
@ -29,10 +29,10 @@ type error =
| Multiply_defined_symbol of string
| Error_directive of string
| Parse_error
| No_line_comment_or_blank
| Invalid_symbol
| File_not_found of string
| Invalid_character of char
| Unterminated_comment of string
val format :
?offsets:bool -> error Region.reg -> file:bool -> string Region.reg

View File

@ -44,19 +44,6 @@ type mode = Copy | Skip
type cond = If of mode | Elif of mode | Else | Region
type trace = cond list
(* Line offsets
The value [Inline] of type [offset] means that the current location
cannot be reached from the start of the line with only white
space. The same holds for the special value [Prefix 0]. Values of
the form [Prefix n] mean that the current location can be reached
from the start of the line with [n] white spaces (padding). These
distinctions are needed because preprocessor directives cannot
occur inside lines.
*)
type offset = Prefix of int | Inline
(* Environments *)
module Env = Set.Make (String)
@ -78,8 +65,6 @@ in function
* the field [env] records the symbols defined;
* the field [mode] informs whether the preprocessor is in copying or
skipping mode;
* the field [offset] tells whether the current location can be
reached from the start of the line with only white space;
* the field [trace] is a stack of previous, still active conditional
directives;
* the field [out] keeps the output buffer;
@ -92,7 +77,6 @@ in function
type state = {
env : Env.t;
mode : mode;
offset : offset;
trace : trace;
out : Buffer.t;
incl : in_channel list;
@ -117,7 +101,7 @@ type error =
| No_line_indicator
| End_line_indicator
| Newline_in_string
| Open_string
| Unterminated_string
| Dangling_endif
| Open_region_in_conditional
| Dangling_endregion
@ -131,10 +115,10 @@ type error =
| Multiply_defined_symbol of string
| Error_directive of string
| Parse_error
| No_line_comment_or_blank
| Invalid_symbol
| File_not_found of string
| Invalid_character of char
| Unterminated_comment of string
let error_to_string = function
Directive_inside_line ->
@ -151,7 +135,7 @@ let error_to_string = function
Hint: Try a string, end of line, or a line comment."
| Newline_in_string ->
sprintf "Invalid newline character in string."
| Open_string ->
| Unterminated_string ->
sprintf "Unterminated string.\n\
Hint: Close with double quotes."
| Dangling_endif ->
@ -187,14 +171,15 @@ let error_to_string = function
msg
| Parse_error ->
"Parse error in expression."
| No_line_comment_or_blank ->
"Line comment or whitespace expected."
| Invalid_symbol ->
"Expected a symbol (identifier)."
| File_not_found name ->
sprintf "File \"%s\" to include not found." name
| Invalid_character c ->
E_Lexer.error_to_string (E_Lexer.Invalid_character c)
| Unterminated_comment ending ->
sprintf "Unterminated comment.\n\
Hint: Close with \"%s\"." ending
let format ?(offsets=true) Region.{region; value} ~file =
let msg = error_to_string value
@ -224,7 +209,7 @@ let fail error state buffer = stop error state (mk_reg buffer)
let reduce_cond state region =
let rec reduce = function
[] -> stop Dangling_endif state region
| If mode::trace -> {state with mode; trace; offset = Prefix 0}
| If mode::trace -> {state with mode; trace}
| Region::_ -> stop Open_region_in_conditional state region
| _::trace -> reduce trace
in reduce state.trace
@ -235,7 +220,7 @@ let reduce_cond state region =
let reduce_region state region =
match state.trace with
[] -> stop Dangling_endregion state region
| Region::trace -> {state with trace; offset = Prefix 0}
| Region::trace -> {state with trace}
| _ -> stop Conditional_in_region state region
(* The function [extend] is called when encountering conditional
@ -286,7 +271,7 @@ let find dir file libs =
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
(* End of lines *)
(* End of lines are always copied *)
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
@ -294,13 +279,6 @@ let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
let print state string = Buffer.add_string state.out string
(* Expanding the offset into whitespace *)
let expand_offset state =
match state.offset with
Prefix 0 | Inline -> ()
| Prefix n -> print state (String.make n ' ')
(* Evaluating a preprocessor expression
The evaluation of conditional directives may involve symbols whose
@ -346,6 +324,35 @@ let letter = small | capital
let ident = letter (letter | '_' | digit)*
let directive = '#' (blank* as space) (small+ as id)
(* Comments *)
let pascaligo_block_comment_opening = "(*"
let pascaligo_block_comment_closing = "*)"
let pascaligo_line_comment = "//"
let cameligo_block_comment_opening = "(*"
let cameligo_block_comment_closing = "*)"
let cameligo_line_comment = "//"
let reasonligo_block_comment_opening = "/*"
let reasonligo_block_comment_closing = "*/"
let reasonligo_line_comment = "//"
let block_comment_openings =
pascaligo_block_comment_opening
| cameligo_block_comment_opening
| reasonligo_block_comment_opening
let block_comment_closings =
pascaligo_block_comment_closing
| cameligo_block_comment_closing
| reasonligo_block_comment_closing
let line_comments =
pascaligo_line_comment
| cameligo_line_comment
| reasonligo_line_comment
(* Rules *)
(* The rule [scan] scans the input buffer for directives, strings,
@ -354,19 +361,17 @@ let directive = '#' (blank* as space) (small+ as id)
depending on the compilation directives. If not copied, new line
characters are output.
Scanning is triggered by the function call [scan env mode offset
trace lexbuf], where [env] is the set of defined symbols
(introduced by `#define'), [mode] specifies whether we are copying
or skipping the input, [offset] informs about the location in the
line (either there is a prefix of blanks, or at least a non-blank
character has been read), and [trace] is the stack of conditional
directives read so far.
Scanning is triggered by the function call [scan env mode trace
lexbuf], where [env] is the set of defined symbols (introduced by
`#define'), [mode] specifies whether we are copying or skipping the
input, and [trace] is the stack of conditional directives read so
far.
The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix
0; trace=[]; incl=[]; opt}], meaning that we start with an empty
environment, that copying the input is enabled by default, and that
we are at the start of a line and no previous conditional
directives have been read yet. The field [opt] is the CLI options.
The first call is [scan {env=Env.empty; mode=Copy; trace=[];
incl=[]; opt}], meaning that we start with an empty environment,
that copying the input is enabled by default, and that we are at
the start of a line and no previous conditional directives have
been read yet. The field [opt] is the CLI options.
When an "#if" is matched, the trace is extended by the call [extend
lexbuf (If mode) trace], during the evaluation of which the
@ -386,12 +391,11 @@ let directive = '#' (blank* as space) (small+ as id)
value of the conditional expression must be ignored (but not its
syntax), and we continue skipping the input.
When an "#else" is matched, the trace is extended with [Else],
then, if the directive is not at a wrong offset, the rest of the
line is scanned with [skip_line]. If we were in copy mode, the new
mode toggles to skipping mode; otherwise, the trace is searched for
the last encountered "#if" of "#elif" and the associated mode is
restored.
When an "#else" is matched, the trace is extended with [Else], then
the rest of the line is scanned with [skip_line]. If we were in
copy mode, the new mode toggles to skipping mode; otherwise, the
trace is searched for the last encountered "#if" of "#elif" and the
associated mode is restored.
The case "#elif" is the result of the fusion (in the technical
sense) of the code for dealing with an "#else" followed by an
@ -465,28 +469,23 @@ let directive = '#' (blank* as space) (small+ as id)
Important note: Comments and strings are recognised as such only in
copy mode, which is a different behaviour from the preprocessor of
GNU GCC, which always does.
*)
*)
rule scan state = parse
nl { expand_offset state; proc_nl state lexbuf;
scan {state with offset = Prefix 0} lexbuf }
| blank { match state.offset with
Prefix n ->
scan {state with offset = Prefix (n+1)} lexbuf
| Inline ->
if state.mode = Copy then copy state lexbuf;
scan state lexbuf }
nl { proc_nl state lexbuf; scan state lexbuf }
| blank { if state.mode = Copy then copy state lexbuf;
scan state lexbuf }
| directive {
let region = mk_reg lexbuf in
if not (List.mem id directives)
then begin
if state.mode = Copy then copy state lexbuf;
scan state lexbuf
end
else
if state.offset = Inline
if region#start#offset `Byte > 0
then fail Directive_inside_line state lexbuf
else
let region = mk_reg lexbuf in
match id with
"include" ->
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
@ -517,15 +516,15 @@ rule scan state = parse
let mode = expr state lexbuf in
let mode = if state.mode = Copy then mode else Skip in
let trace = extend (If state.mode) state region in
let state = {state with mode; offset = Prefix 0; trace}
let state = {state with mode; trace}
in scan state lexbuf
| "else" ->
let () = skip_line state lexbuf in
let mode = match state.mode with
Copy -> Skip
| Skip -> last_mode state.trace in
let () = skip_line state lexbuf in
let mode = match state.mode with
Copy -> Skip
| Skip -> last_mode state.trace in
let trace = extend Else state region
in scan {state with mode; offset = Prefix 0; trace} lexbuf
in scan {state with mode; trace} lexbuf
| "elif" ->
let mode = expr state lexbuf in
let trace, mode =
@ -534,7 +533,7 @@ rule scan state = parse
| Skip -> let old_mode = last_mode state.trace
in extend (Elif old_mode) state region,
if old_mode = Copy then mode else Skip
in scan {state with mode; offset = Prefix 0; trace} lexbuf
in scan {state with mode; trace} lexbuf
| "endif" ->
skip_line state lexbuf;
scan (reduce_cond state region) lexbuf
@ -544,89 +543,81 @@ rule scan state = parse
then stop (Reserved_symbol id) state region;
if Env.mem id state.env
then stop (Multiply_defined_symbol id) state region;
let state = {state with env = Env.add id state.env;
offset = Prefix 0}
let state = {state with env = Env.add id state.env}
in scan state lexbuf
| "undef" ->
let id, _ = variable state lexbuf in
let state = {state with env = Env.remove id state.env;
offset = Prefix 0}
let state = {state with env = Env.remove id state.env}
in scan state lexbuf
| "error" ->
stop (Error_directive (message [] lexbuf)) state region
| "region" ->
let msg = message [] lexbuf
in expand_offset state;
print state ("#" ^ space ^ "region" ^ msg ^ "\n");
let state =
{state with offset = Prefix 0; trace=Region::state.trace}
in print state ("#" ^ space ^ "region" ^ msg ^ "\n");
let state = {state with trace=Region::state.trace}
in scan state lexbuf
| "endregion" ->
let msg = message [] lexbuf
in expand_offset state;
print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
in print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
scan (reduce_region state region) lexbuf
(*
| "line" ->
expand_offset state;
print state ("#" ^ space ^ "line");
line_ind state lexbuf;
scan {state with offset = Prefix 0} lexbuf
| "warning" ->
let start_p, end_p = region in
let msg = message [] lexbuf in
let open Lexing
in prerr_endline
("Warning at line " ^ string_of_int start_p.pos_lnum
^ ", char "
^ string_of_int (start_p.pos_cnum - start_p.pos_bol)
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
^ ":\n" ^ msg);
scan env mode (Prefix 0) trace lexbuf
*)
| _ -> assert false
}
| eof { match state.trace with
[] -> expand_offset state; state
| _ -> fail Missing_endif state lexbuf }
| eof { if state.trace = [] then state
else fail Missing_endif state lexbuf }
| '"' { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| "//" { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
in_line_com state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| "/*" { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
if state.opt#lang = `ReasonLIGO then
reasonLIGO_com (mk_reg lexbuf) state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| "(*" { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf;
if state.opt#lang = `CameLIGO
|| state.opt#lang = `PascaLIGO then
cameLIGO_com (mk_reg lexbuf) state lexbuf
end;
scan {state with offset=Inline} lexbuf }
| _ { if state.mode = Copy then
begin
expand_offset state;
copy state lexbuf
end;
scan {state with offset=Inline} lexbuf }
scan (in_string (mk_reg lexbuf) state lexbuf) lexbuf
end
else scan state lexbuf }
| block_comment_openings {
let lexeme = Lexing.lexeme lexbuf in
match state.opt#block with
Some block when block#opening = lexeme ->
if state.mode = Copy then
begin
copy state lexbuf;
let state = in_block block (mk_reg lexbuf) state lexbuf
in scan state lexbuf
end
else scan state lexbuf
| Some _ | None ->
let n = String.length lexeme in
begin
rollback lexbuf;
assert (n > 0);
scan (scan_n_char n state lexbuf) lexbuf
end }
| line_comments {
let lexeme = Lexing.lexeme lexbuf in
match state.opt#line with
Some line when line = lexeme ->
if state.mode = Copy then
begin
copy state lexbuf;
scan (in_line_com state lexbuf) lexbuf
end
else scan state lexbuf
| Some _ | None ->
let n = String.length lexeme in
begin
rollback lexbuf;
assert (n > 0);
scan (scan_n_char n state lexbuf) lexbuf
end }
| _ { if state.mode = Copy then copy state lexbuf;
scan state lexbuf }
(* Scanning a series of characters *)
and scan_n_char n state = parse
_ { if state.mode = Copy then copy state lexbuf;
if n = 1 then state else scan_n_char (n-1) state lexbuf }
(* Support for #define and #undef *)
@ -638,47 +629,12 @@ and symbol state = parse
ident as id { id, mk_reg lexbuf }
| _ { fail Invalid_symbol state lexbuf }
(*
(* Line indicator (#line) *)
and line_ind state = parse
blank* { copy state lexbuf; line_indicator state lexbuf }
and line_indicator state = parse
natural { copy state lexbuf; end_indicator state lexbuf }
| ident as id {
match id with
"default" | "hidden" ->
print state (id ^ message [] lexbuf)
| _ -> fail (Invalid_line_indicator id) state lexbuf }
| _ { fail No_line_indicator state lexbuf }
and end_indicator state = parse
blank+ { copy state lexbuf; end_indicator state lexbuf }
| nl { proc_nl state lexbuf }
| eof { copy state lexbuf }
| "//" { copy state lexbuf;
print state (message [] lexbuf ^ "\n") }
| '"' { copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf;
opt_line_com state lexbuf }
| _ { fail End_line_indicator state lexbuf }
and opt_line_com state = parse
nl { proc_nl state lexbuf }
| eof { copy state lexbuf }
| blank+ { copy state lexbuf; opt_line_com state lexbuf }
| "//" { print state ("//" ^ message [] lexbuf) }
*)
(* New lines and verbatim sequence of characters *)
and skip_line state = parse
nl { proc_nl state lexbuf }
| blank+ { skip_line state lexbuf }
| "//" { in_line_com {state with mode=Skip} lexbuf }
| _ { fail No_line_comment_or_blank state lexbuf }
| eof { () }
nl { proc_nl state lexbuf }
| blank+ { skip_line state lexbuf }
| _ { () }
and message acc = parse
nl { Lexing.new_line lexbuf;
@ -689,22 +645,41 @@ and message acc = parse
(* Comments *)
and in_line_com state = parse
nl { proc_nl state lexbuf }
| eof { () }
nl { proc_nl state lexbuf; state }
| eof { state }
| _ { if state.mode = Copy then copy state lexbuf;
in_line_com state lexbuf }
and reasonLIGO_com opening state = parse
nl { proc_nl state lexbuf; reasonLIGO_com opening state lexbuf }
| "*/" { copy state lexbuf }
| eof { () }
| _ { copy state lexbuf; reasonLIGO_com opening state lexbuf }
and in_block block opening state = parse
'"' | block_comment_openings {
let lexeme = Lexing.lexeme lexbuf in
if block#opening = lexeme || lexeme = "\""
then let () = copy state lexbuf in
let opening' = mk_reg lexbuf in
let next = if lexeme = "\"" then in_string
else in_block block in
let state = next opening' state lexbuf
in in_block block opening state lexbuf
else let () = rollback lexbuf in
let n = String.length lexeme in
let () = assert (n > 0) in
let state = scan_n_char n state lexbuf
in in_block block opening state lexbuf }
and cameLIGO_com opening state = parse
nl { proc_nl state lexbuf; cameLIGO_com opening state lexbuf }
| "*)" { copy state lexbuf }
| eof { () }
| _ { copy state lexbuf; cameLIGO_com opening state lexbuf }
| block_comment_closings {
let lexeme = Lexing.lexeme lexbuf in
if block#closing = lexeme
then (copy state lexbuf; state)
else let () = rollback lexbuf in
let n = String.length lexeme in
let () = assert (n > 0) in
let state = scan_n_char n state lexbuf
in in_block block opening state lexbuf }
| nl { proc_nl state lexbuf; in_block block opening state lexbuf }
| eof { let err = Unterminated_comment (block#closing)
in stop err state opening }
| _ { copy state lexbuf; in_block block opening state lexbuf }
(* Included filename *)
@ -717,15 +692,15 @@ and in_inclusion opening acc len state = parse
in Region.cover opening closing,
mk_str len acc }
| nl { fail Newline_in_string state lexbuf }
| eof { stop Open_string state opening }
| eof { stop Unterminated_string state opening }
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
(* Strings *)
and in_string opening state = parse
"\\\"" { copy state lexbuf; in_string opening state lexbuf }
| '"' { copy state lexbuf }
| eof { () }
| '"' { copy state lexbuf; state }
| eof { state }
| _ { copy state lexbuf; in_string opening state lexbuf }
and preproc state = parse
@ -750,7 +725,6 @@ let lex opt buffer =
let state = {
env = Env.empty;
mode = Copy;
offset = Prefix 0;
trace = [];
out = Buffer.create 80;
incl = [];

View File

@ -4,9 +4,12 @@ module Region = Simple_utils.Region
module Preproc = Preprocessor.Preproc
module EvalOpt = Preprocessor.EvalOpt
let highlight msg = Printf.eprintf "\027[31m%s\027[0m\n%!" msg
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
let preproc cin =
let buffer = Lexing.from_channel cin in

View File

@ -1,22 +0,0 @@
#!/bin/sh
set -x
ocamllex.opt E_Lexer.mll
ocamllex.opt Preproc.mll
menhir -la 1 E_Parser.mly
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c EvalOpt.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_AST.ml
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_LexerMain.ml
camlcmd="ocamlfind ocamlc -I _x86_64 -strict-sequence -w +A-48-4 "
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package getopt,str -c EvalOpt.ml
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.ml
menhir --infer --ocamlc="$camlcmd" E_Parser.mly
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.ml
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_LexerMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo E_LexerMain.cmo
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c PreprocMain.ml
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.ml
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o PreprocMain.byte EvalOpt.cmo E_AST.cmo E_Parser.cmo E_Lexer.cmo Preproc.cmo PreprocMain.cmo
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_ParserMain.ml
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_ParserMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo Preproc.cmo E_ParserMain.cmo

View File

@ -1,4 +0,0 @@
#!/bin/sh
\rm -f *.cm* *.o *.byte *.opt
\rm E_Lexer.ml E_Parser.ml E_Parser.mli Preproc.ml