Merge branch 'rinderknecht@michelson' into 'dev'

Refactoring of the lexer and preprocessor

See merge request ligolang/ligo!591
This commit is contained in:
Sander 2020-04-29 14:19:29 +00:00
commit d0d495ccce
49 changed files with 2048 additions and 1876 deletions

View File

@ -54,7 +54,7 @@ ligo: : Lexical error in file "broken_string.religo", line 1, characters 8-9:
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ; run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.ligo" ; "main" ] ;
[%expect {| [%expect {|
ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-23: ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters 18-31:
Negative byte sequence. Negative byte sequence.
Hint: Remove the leading minus sign. Hint: Remove the leading minus sign.
{} {}
@ -71,7 +71,7 @@ ligo: : Lexical error in file "negative_byte_sequence.ligo", line 1, characters
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ; run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.mligo" ; "main" ] ;
[%expect {| [%expect {|
ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-13: ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters 8-21:
Negative byte sequence. Negative byte sequence.
Hint: Remove the leading minus sign. Hint: Remove the leading minus sign.
{} {}
@ -88,7 +88,7 @@ ligo: : Lexical error in file "negative_byte_sequence.mligo", line 1, characters
run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ; run_ligo_bad [ "compile-contract" ; "../../test/lexer/negative_byte_sequence.religo" ; "main" ] ;
[%expect {| [%expect {|
ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-13: ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, characters 8-21:
Negative byte sequence. Negative byte sequence.
Hint: Remove the leading minus sign. Hint: Remove the leading minus sign.
{} {}
@ -103,6 +103,7 @@ ligo: : Lexical error in file "negative_byte_sequence.religo", line 1, character
* Check the changelog by running 'ligo changelog' * Check the changelog by running 'ligo changelog'
|} ]; |} ];
(*
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ; run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.ligo" ; "main" ] ;
[%expect {| [%expect {|
ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13: ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
@ -119,6 +120,7 @@ ligo: : Lexical error in file "reserved_name.ligo", line 1, characters 4-13:
* Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new * Open a gitlab issue: https://gitlab.com/ligolang/ligo/issues/new
* Check the changelog by running 'ligo changelog' * Check the changelog by running 'ligo changelog'
|} ]; |} ];
*)
run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ; run_ligo_bad [ "compile-contract" ; "../../test/lexer/reserved_name.religo" ; "main" ] ;
[%expect {| [%expect {|
@ -204,10 +206,9 @@ ligo: : Lexical error in file "unexpected_character.religo", line 1, characters
run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ; run_ligo_bad [ "compile-contract" ; "../../test/lexer/unterminated_comment.mligo" ; "main" ] ;
[%expect {| [%expect {|
ligo: : Lexical error in file "unterminated_comment.mligo", line 1, characters 0-2: ligo: : Preprocessing error in file "../../test/lexer/unterminated_comment.mligo", line 1, characters 0-2:
Unterminated comment. Unterminated comment.
Hint: Close with "*)". Hint: Close with "*)". {}
{}
If you're not sure how to fix this error, you can If you're not sure how to fix this error, you can

View File

@ -8,15 +8,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *) (* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO = module SubIO =
struct struct
type options = < type options = <
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".mligo" *) ext : string; (* ".mligo" *)
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
@ -24,11 +23,13 @@ module SubIO =
> >
let options : options = let options : options =
object let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
in object
method libs = [] method libs = []
method verbose = SSet.empty method verbose = SSet.empty
method offsets = true method offsets = true
method lang = `CameLIGO method block = Some block
method line = Some "//"
method ext = ".mligo" method ext = ".mligo"
method mode = `Point method mode = `Point
method cmd = EvalOpt.Quiet method cmd = EvalOpt.Quiet
@ -39,7 +40,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs EvalOpt.make ~libs:options#libs
~verbose:options#verbose ~verbose:options#verbose
~offsets:options#offsets ~offsets:options#offsets
~lang:options#lang ?block:options#block
?line:options#line
~ext:options#ext ~ext:options#ext
~mode:options#mode ~mode:options#mode
~cmd:options#cmd ~cmd:options#cmd

View File

@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli ../shared/Lexer.mli
../shared/Lexer.mll ../shared/Lexer.mll
../shared/LexerLib.ml
../shared/EvalOpt.ml ../shared/EvalOpt.ml
../shared/EvalOpt.mli ../shared/EvalOpt.mli
../shared/FQueue.ml ../shared/FQueue.ml

View File

@ -156,11 +156,22 @@ val eof : Region.t -> token
(* Predicates *) (* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -102,10 +102,35 @@ type t =
| EOF of Region.t (* End of file *) | EOF of Region.t (* End of file *)
(* Projections *)
type token = t type token = t
let proj_token = function let proj_token = function
ARROW region -> region, "ARROW" (* Literals *)
String Region.{region; value} ->
region, sprintf "Str %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
| Attr Region.{region; value} ->
region, sprintf "Attr \"%s\"" value
(* Symbols *)
| ARROW region -> region, "ARROW"
| CONS region -> region, "CONS" | CONS region -> region, "CONS"
| CAT region -> region, "CAT" | CAT region -> region, "CAT"
| MINUS region -> region, "MINUS" | MINUS region -> region, "MINUS"
@ -132,24 +157,9 @@ let proj_token = function
| GE region -> region, "GE" | GE region -> region, "GE"
| BOOL_OR region -> region, "BOOL_OR" | BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND" | BOOL_AND region -> region, "BOOL_AND"
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value (* Keywords *)
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| String Region.{region; value} ->
region, sprintf "Str %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.show b)
| Attr Region.{region; value} ->
region, sprintf "Attr \"%s\"" value
| Begin region -> region, "Begin" | Begin region -> region, "Begin"
| Else region -> region, "Else" | Else region -> region, "Else"
| End region -> region, "End" | End region -> region, "End"
@ -168,12 +178,32 @@ let proj_token = function
| True region -> region, "True" | True region -> region, "True"
| Type region -> region, "Type" | Type region -> region, "Type"
| With region -> region, "With" | With region -> region, "With"
(* Data *)
| C_None region -> region, "C_None" | C_None region -> region, "C_None"
| C_Some region -> region, "C_Some" | C_Some region -> region, "C_Some"
(* Virtual tokens *)
| EOF region -> region, "EOF" | EOF region -> region, "EOF"
let to_lexeme = function let to_lexeme = function
ARROW _ -> "->" (* Literals *)
String s -> String.escaped s.Region.value
| Bytes b -> fst b.Region.value
| Int i
| Nat i
| Mutez i -> fst i.Region.value
| Ident id -> id.Region.value
| Constr id -> id.Region.value
| Attr a -> a.Region.value
(* Symbols *)
| ARROW _ -> "->"
| CONS _ -> "::" | CONS _ -> "::"
| CAT _ -> "^" | CAT _ -> "^"
| MINUS _ -> "-" | MINUS _ -> "-"
@ -201,14 +231,7 @@ let to_lexeme = function
| BOOL_OR _ -> "||" | BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&" | BOOL_AND _ -> "&&"
| Ident id -> id.Region.value (* Keywords *)
| Constr id -> id.Region.value
| Int i
| Nat i
| Mutez i -> fst i.Region.value
| String s -> String.escaped s.Region.value
| Bytes b -> fst b.Region.value
| Attr a -> a.Region.value
| Begin _ -> "begin" | Begin _ -> "begin"
| Else _ -> "else" | Else _ -> "else"
@ -229,11 +252,17 @@ let to_lexeme = function
| Then _ -> "then" | Then _ -> "then"
| With _ -> "with" | With _ -> "with"
(* Data constructors *)
| C_None _ -> "None" | C_None _ -> "None"
| C_Some _ -> "Some" | C_Some _ -> "Some"
(* Virtual tokens *)
| EOF _ -> "" | EOF _ -> ""
(* CONVERSIONS *)
let to_string token ?(offsets=true) mode = let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode let reg_str = region#compact ~offsets mode
@ -241,10 +270,6 @@ let to_string token ?(offsets=true) mode =
let to_region token = proj_token token |> fst let to_region token = proj_token token |> fst
(* Injections *)
type int_err = Non_canonical_zero
(* LEXIS *) (* LEXIS *)
let keywords = [ let keywords = [
@ -385,6 +410,8 @@ let mk_bytes lexeme region =
let value = lexeme, `Hex norm let value = lexeme, `Hex norm
in Bytes Region.{region; value} in Bytes Region.{region; value}
type int_err = Non_canonical_zero
let mk_int lexeme region = let mk_int lexeme region =
let z = let z =
Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string Str.(global_replace (regexp "_") "" lexeme) |> Z.of_string
@ -398,16 +425,14 @@ type nat_err =
let mk_nat lexeme region = let mk_nat lexeme region =
match (String.index_opt lexeme 'n') with match (String.index_opt lexeme 'n') with
| None -> Error Invalid_natural None -> Error Invalid_natural
| Some _ -> ( | Some _ -> let z =
let z =
Str.(global_replace (regexp "_") "" lexeme) |> Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |> Str.(global_replace (regexp "n") "") |>
Z.of_string in Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n" if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z}) else Ok (Nat Region.{region; value = lexeme,z})
)
let mk_mutez lexeme region = let mk_mutez lexeme region =
let z = let z =
@ -422,8 +447,6 @@ let eof region = EOF region
type sym_err = Invalid_symbol type sym_err = Invalid_symbol
type attr_err = Invalid_attribute
let mk_sym lexeme region = let mk_sym lexeme region =
match lexeme with match lexeme with
(* Lexemes in common with all concrete syntaxes *) (* Lexemes in common with all concrete syntaxes *)
@ -473,88 +496,80 @@ let mk_constr lexeme region =
(* Attributes *) (* Attributes *)
type attr_err = Invalid_attribute
let mk_attr header lexeme region = let mk_attr header lexeme region =
if header = "[@" then if header = "[@" then Error Invalid_attribute
Error Invalid_attribute
else Ok (Attr Region.{value=lexeme; region}) else Ok (Attr Region.{value=lexeme; region})
(* Predicates *) (* Predicates *)
let is_string = function let is_string = function String _ -> true | _ -> false
String _ -> true let is_bytes = function Bytes _ -> true | _ -> false
| _ -> false let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
| Begin _
| Else _
| End _
| False _
| Fun _
| Rec _
| If _
| In _
| Let _
| Match _
| Mod _
| Not _
| Of _
| Or _
| Then _
| True _
| Type _
| With _ -> true
| _ -> false
let is_constr = function
| Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let is_sym = function
| ARROW _
| CONS _
| CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| WILD _
| EQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| BOOL_OR _
| BOOL_AND _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false let is_eof = function EOF _ -> true | _ -> false
let is_minus = function MINUS _ -> true | _ -> false
(* Errors *)
type error =
Odd_lengthed_bytes
| Missing_break
| Negative_byte_sequence
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign."
exception Error of error Region.reg
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let fail region value = raise (Error Region.{region; value})
let check_right_context token next_token buffer : unit =
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
match next_token buffer with
None -> ()
| Some (markup, next) ->
if is_minus token && is_bytes next
then let region =
Region.cover (to_region token) (to_region next)
in fail region Negative_byte_sequence
else
match markup with
[] ->
if is_int token
then if is_string next || is_ident next
then fail region Missing_break
else ()
else
if is_string token
then if is_int next || is_bytes next || is_ident next
then fail region Missing_break
else ()
else
if is_bytes token
then if is_string next || is_ident next
then fail region Missing_break
else if is_int next
then fail region Odd_lengthed_bytes
else ()
else ()
| _::_ -> ()
(* END TRAILER *) (* END TRAILER *)
} }

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO = module IO =
struct struct
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo") let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".mligo"
end end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () = let () =
match M.trace () with match M.trace () with
Stdlib.Ok () -> () Stdlib.Ok () -> ()
| Error Region.{value; _} -> Utils.highlight value | Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO = module IO =
struct struct
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo") let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".mligo"
end end
module SubIO = module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : EvalOpt.language; block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs method libs = IO.options#libs
method verbose = IO.options#verbose method verbose = IO.options#verbose
method offsets = IO.options#offsets method offsets = IO.options#offsets
method lang = IO.options#lang method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext method ext = IO.options#ext
method mode = IO.options#mode method mode = IO.options#mode
method cmd = IO.options#cmd method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs EvalOpt.make ~libs:options#libs
~verbose:options#verbose ~verbose:options#verbose
~offsets:options#offsets ~offsets:options#offsets
~lang:options#lang ?block:options#block
?line:options#line
~ext:options#ext ~ext:options#ext
~mode:options#mode ~mode:options#mode
~cmd:options#cmd ~cmd:options#cmd

View File

@ -8,15 +8,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *) (* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO = module SubIO =
struct struct
type options = < type options = <
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".ligo" *) ext : string; (* ".ligo" *)
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
@ -24,11 +23,13 @@ module SubIO =
> >
let options : options = let options : options =
object let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
in object
method libs = [] method libs = []
method verbose = SSet.empty method verbose = SSet.empty
method offsets = true method offsets = true
method lang = `PascaLIGO method block = Some block
method line = Some "//"
method ext = ".ligo" method ext = ".ligo"
method mode = `Point method mode = `Point
method cmd = EvalOpt.Quiet method cmd = EvalOpt.Quiet
@ -39,7 +40,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs EvalOpt.make ~libs:options#libs
~verbose:options#verbose ~verbose:options#verbose
~offsets:options#offsets ~offsets:options#offsets
~lang:options#lang ?block:options#block
?line:options#line
~ext:options#ext ~ext:options#ext
~mode:options#mode ~mode:options#mode
~cmd:options#cmd ~cmd:options#cmd

View File

@ -2,6 +2,8 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli ../shared/Lexer.mli
../shared/Lexer.mll ../shared/Lexer.mll
../shared/LexerLib.mli
../shared/LexerLib.ml
../shared/EvalOpt.ml ../shared/EvalOpt.ml
../shared/EvalOpt.mli ../shared/EvalOpt.mli
../shared/FQueue.ml ../shared/FQueue.ml
@ -18,5 +20,6 @@ $HOME/git/OCaml-build/Makefile
../shared/LexerUnit.ml ../shared/LexerUnit.ml
../shared/ParserUnit.mli ../shared/ParserUnit.mli
../shared/ParserUnit.ml ../shared/ParserUnit.ml
../shared/LexerLib.ml
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml $HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml

View File

@ -1,4 +1,4 @@
(** This signature defines the lexical tokens for LIGO (* This signature defines the lexical tokens for LIGO
_Tokens_ are the abstract units which are used by the parser to _Tokens_ are the abstract units which are used by the parser to
build the abstract syntax tree (AST), in other words, the stream of build the abstract syntax tree (AST), in other words, the stream of
@ -163,11 +163,22 @@ val eof : Region.t -> token
(* Predicates *) (* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -5,24 +5,14 @@
(* Shorthands *) (* Shorthands *)
type lexeme = string
let sprintf = Printf.sprintf
module Region = Simple_utils.Region module Region = Simple_utils.Region
module Pos = Simple_utils.Pos module Pos = Simple_utils.Pos
module SMap = Map.Make (String) module SMap = Map.Make (String)
module SSet = Set.Make (String) module SSet = Set.Make (String)
(* Hack to roll back one lexeme in the current semantic action *) type lexeme = string
(*
let rollback buffer = let sprintf = Printf.sprintf
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
*)
(* TOKENS *) (* TOKENS *)
@ -123,6 +113,8 @@ type t =
| EOF of Region.t | EOF of Region.t
(* Projections *)
type token = t type token = t
let proj_token = function let proj_token = function
@ -130,32 +122,20 @@ let proj_token = function
String Region.{region; value} -> String Region.{region; value} ->
region, sprintf "String %s" value region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} -> | Bytes Region.{region; value = s,b} ->
region, region,
sprintf "Bytes (\"%s\", \"0x%s\")" sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
s (Hex.show b)
| Int Region.{region; value = s,n} -> | Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n) region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} -> | Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n) region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} -> | Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n) region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} -> | Ident Region.{region; value} ->
region, sprintf "Ident \"%s\"" value region, sprintf "Ident \"%s\"" value
| Constr Region.{region; value} -> | Constr Region.{region; value} ->
region, sprintf "Constr \"%s\"" value region, sprintf "Constr \"%s\"" value
(*
| Attr {header; string={region; value}} ->
region, sprintf "Attr (\"%s\",\"%s\")" header value
*)
(* Symbols *) (* Symbols *)
| SEMI region -> region, "SEMI" | SEMI region -> region, "SEMI"
@ -382,9 +362,7 @@ let keywords = [
(fun reg -> With reg) (fun reg -> With reg)
] ]
let reserved = let reserved = SSet.empty
let open SSet in
empty |> add "arguments"
let constructors = [ let constructors = [
(fun reg -> False reg); (fun reg -> False reg);
@ -483,21 +461,18 @@ type nat_err =
| Non_canonical_zero_nat | Non_canonical_zero_nat
let mk_nat lexeme region = let mk_nat lexeme region =
match (String.index_opt lexeme 'n') with match String.index_opt lexeme 'n' with
| None -> Error Invalid_natural None -> Error Invalid_natural
| Some _ -> ( | Some _ -> let z =
let z =
Str.(global_replace (regexp "_") "" lexeme) |> Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |> Str.(global_replace (regexp "n") "") |>
Z.of_string in Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n" if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z}) else Ok (Nat Region.{region; value = lexeme,z})
)
let mk_mutez lexeme region = let mk_mutez lexeme region =
let z = let z = Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |> Str.(global_replace (regexp "mutez") "") |>
Z.of_string in Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez" if Z.equal z Z.zero && lexeme <> "0mutez"
@ -558,104 +533,76 @@ let mk_constr lexeme region =
type attr_err = Invalid_attribute type attr_err = Invalid_attribute
let mk_attr _header _string _region = let mk_attr _ _ _ = Error Invalid_attribute
Error Invalid_attribute
(* Predicates *) (* Predicates *)
let is_string = function let is_string = function String _ -> true | _ -> false
String _ -> true let is_bytes = function Bytes _ -> true | _ -> false
| _ -> false let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
And _
| Attributes _
| Begin _
| BigMap _
| Block _
| Case _
| Const _
| Contains _
| Else _
| End _
| False _
| For _
| From _
| Function _
| If _
| In _
| Is _
| List _
| Map _
| Mod _
| Nil _
| Not _
| Of _
| Or _
| Patch _
| Record _
| Remove _
| Set _
| Skip _
| Step _
| Then _
| To _
| True _
| Type _
| Unit _
| Var _
| While _
| With _ -> true
| _ -> false
let is_constr = function
Constr _
| C_None _
| C_Some _ -> true
| _ -> false
let is_sym = function
SEMI _
| COMMA _
| LPAR _
| RPAR _
| LBRACE _
| RBRACE _
| LBRACKET _
| RBRACKET _
| CONS _
| VBAR _
| ARROW _
| ASS _
| EQ _
| COLON _
| LT _
| LE _
| GT _
| GE _
| NE _
| PLUS _
| MINUS _
| SLASH _
| TIMES _
| DOT _
| WILD _
| CAT _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false let is_eof = function EOF _ -> true | _ -> false
let is_minus = function MINUS _ -> true | _ -> false
(* Errors *)
type error =
Odd_lengthed_bytes
| Missing_break
| Negative_byte_sequence
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign."
exception Error of error Region.reg
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let fail region value = raise (Error Region.{region; value})
let check_right_context token next_token buffer : unit =
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
match next_token buffer with
None -> ()
| Some (markup, next) ->
if is_minus token && is_bytes next
then let region =
Region.cover (to_region token) (to_region next)
in fail region Negative_byte_sequence
else
match markup with
[] ->
if is_int token
then if is_string next || is_ident next
then fail region Missing_break
else ()
else
if is_string token
then if is_int next || is_bytes next || is_ident next
then fail region Missing_break
else ()
else
if is_bytes token
then if is_string next || is_ident next
then fail region Missing_break
else if is_int next
then fail region Odd_lengthed_bytes
else ()
else ()
| _::_ -> ()
(* END TRAILER *) (* END TRAILER *)
} }

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO = module IO =
struct struct
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
end end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO = module IO =
struct struct
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
end end
module SubIO = module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : EvalOpt.language; block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs method libs = IO.options#libs
method verbose = IO.options#verbose method verbose = IO.options#verbose
method offsets = IO.options#offsets method offsets = IO.options#offsets
method lang = IO.options#lang method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext method ext = IO.options#ext
method mode = IO.options#mode method mode = IO.options#mode
method cmd = IO.options#cmd method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs EvalOpt.make ~libs:options#libs
~verbose:options#verbose ~verbose:options#verbose
~offsets:options#offsets ~offsets:options#offsets
~lang:options#lang ?block:options#block
?line:options#line
~ext:options#ext ~ext:options#ext
~mode:options#mode ~mode:options#mode
~cmd:options#cmd ~cmd:options#cmd

View File

@ -11,15 +11,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *) (* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO = module SubIO =
struct struct
type options = < type options = <
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".religo" *) ext : string; (* ".religo" *)
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
@ -27,11 +26,13 @@ module SubIO =
> >
let options : options = let options : options =
object let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/"
in object
method libs = [] method libs = []
method verbose = SSet.empty method verbose = SSet.empty
method offsets = true method offsets = true
method lang = `ReasonLIGO method block = Some block
method line = Some "//"
method ext = ".religo" method ext = ".religo"
method mode = `Point method mode = `Point
method cmd = EvalOpt.Quiet method cmd = EvalOpt.Quiet
@ -42,7 +43,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs EvalOpt.make ~libs:options#libs
~verbose:options#verbose ~verbose:options#verbose
~offsets:options#offsets ~offsets:options#offsets
~lang:options#lang ?block:options#block
?line:options#line
~ext:options#ext ~ext:options#ext
~mode:options#mode ~mode:options#mode
~cmd:options#cmd ~cmd:options#cmd

View File

@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli ../shared/Lexer.mli
../shared/Lexer.mll ../shared/Lexer.mll
../shared/LexerLib.ml
../shared/EvalOpt.ml ../shared/EvalOpt.ml
../shared/EvalOpt.mli ../shared/EvalOpt.mli
../shared/FQueue.ml ../shared/FQueue.ml

View File

@ -57,6 +57,7 @@ type t =
| COLON of Region.t (* ":" *) | COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *) | DOT of Region.t (* "." *)
| ELLIPSIS of Region.t (* "..." *) | ELLIPSIS of Region.t (* "..." *)
| ARROW of Region.t (* "=>" *)
(* Wildcard *) (* Wildcard *)
@ -69,10 +70,10 @@ type t =
| NE of Region.t (* "!=" *) | NE of Region.t (* "!=" *)
| LT of Region.t (* "<" *) | LT of Region.t (* "<" *)
| GT of Region.t (* ">" *) | GT of Region.t (* ">" *)
| LE of Region.t (* "=<" *) | LE of Region.t (* "<=" *)
| GE of Region.t (* ">=" *) | GE of Region.t (* ">=" *)
| ARROW of Region.t (* "=>" *) (* Logic *)
| BOOL_OR of Region.t (* "||" *) | BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t (* "&&" *) | BOOL_AND of Region.t (* "&&" *)
@ -95,14 +96,14 @@ type t =
| False of Region.t | False of Region.t
| If of Region.t | If of Region.t
| Let of Region.t | Let of Region.t
| Rec of Region.t
| Switch of Region.t
| Mod of Region.t | Mod of Region.t
| Or of Region.t | Or of Region.t
| Rec of Region.t
| Switch of Region.t
| True of Region.t | True of Region.t
| Type of Region.t | Type of Region.t
(* Data constructors *) (* Data constructors *)
| C_None of Region.t (* "None" *) | C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *) | C_Some of Region.t (* "Some" *)
@ -111,8 +112,6 @@ type t =
| EOF of Region.t (* End of file *) | EOF of Region.t (* End of file *)
type token = t
(* Projections (* Projections
The difference between extracting the lexeme and a string from a The difference between extracting the lexeme and a string from a
@ -121,6 +120,8 @@ type token = t
lexeme (concrete syntax). lexeme (concrete syntax).
*) *)
type token = t
val to_lexeme : token -> lexeme val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t val to_region : token -> Region.t
@ -154,11 +155,22 @@ val eof : Region.t -> token
(* Predicates *) (* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -1,15 +1,17 @@
{ {
(* START OF HEADER *) (* START OF HEADER *)
type lexeme = string (* Shorthands *)
let sprintf = Printf.sprintf
module Region = Simple_utils.Region module Region = Simple_utils.Region
module Pos = Simple_utils.Pos module Pos = Simple_utils.Pos
module SMap = Utils.String.Map module SMap = Utils.String.Map
module SSet = Utils.String.Set module SSet = Utils.String.Set
type lexeme = string
let sprintf = Printf.sprintf
(* TOKENS *) (* TOKENS *)
type t = type t =
@ -41,6 +43,7 @@ type t =
| COLON of Region.t (* ":" *) | COLON of Region.t (* ":" *)
| DOT of Region.t (* "." *) | DOT of Region.t (* "." *)
| ELLIPSIS of Region.t (* "..." *) | ELLIPSIS of Region.t (* "..." *)
| ARROW of Region.t (* "=>" *)
(* Wildcard *) (* Wildcard *)
@ -55,11 +58,11 @@ type t =
| GT of Region.t (* ">" *) | GT of Region.t (* ">" *)
| LE of Region.t (* "<=" *) | LE of Region.t (* "<=" *)
| GE of Region.t (* ">=" *) | GE of Region.t (* ">=" *)
| ARROW of Region.t (* "=>" *)
(* Logic *)
| BOOL_OR of Region.t (* "||" *) | BOOL_OR of Region.t (* "||" *)
| BOOL_AND of Region.t (* "&&" *) | BOOL_AND of Region.t (* "&&" *)
| NOT of Region.t (* ! *) | NOT of Region.t (* ! *)
(* Identifiers, labels, numbers and strings *) (* Identifiers, labels, numbers and strings *)
@ -75,17 +78,17 @@ type t =
(* Keywords *) (* Keywords *)
(*| And*)
| Else of Region.t | Else of Region.t
| False of Region.t | False of Region.t
| If of Region.t | If of Region.t
| Let of Region.t | Let of Region.t
| Rec of Region.t
| Switch of Region.t
| Mod of Region.t | Mod of Region.t
| Or of Region.t | Or of Region.t
| Rec of Region.t
| Switch of Region.t
| True of Region.t | True of Region.t
| Type of Region.t | Type of Region.t
(* Data constructors *) (* Data constructors *)
| C_None of Region.t (* "None" *) | C_None of Region.t (* "None" *)
@ -96,10 +99,32 @@ type t =
| EOF of Region.t (* End of file *) | EOF of Region.t (* End of file *)
(* Projections *)
type token = t type token = t
let proj_token = function let proj_token = function
CAT region -> region, "CAT" (* Literals *)
String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")" s (Hex.show b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
(* Symbols *)
| CAT region -> region, "CAT"
| MINUS region -> region, "MINUS" | MINUS region -> region, "MINUS"
| PLUS region -> region, "PLUS" | PLUS region -> region, "PLUS"
| SLASH region -> region, "SLASH" | SLASH region -> region, "SLASH"
@ -125,24 +150,9 @@ let proj_token = function
| LE region -> region, "LE" | LE region -> region, "LE"
| GE region -> region, "GE" | GE region -> region, "GE"
| ARROW region -> region, "ARROW" | ARROW region -> region, "ARROW"
| NOT region -> region, "NOT"
| BOOL_OR region -> region, "BOOL_OR" | BOOL_OR region -> region, "BOOL_OR"
| BOOL_AND region -> region, "BOOL_AND" | BOOL_AND region -> region, "BOOL_AND"
| Ident Region.{region; value} ->
region, sprintf "Ident %s" value
| Constr Region.{region; value} ->
region, sprintf "Constr %s" value
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Nat Region.{region; value = s,n} ->
region, sprintf "Nat (\"%s\", %s)" s (Z.to_string n)
| Mutez Region.{region; value = s,n} ->
region, sprintf "Mutez (\"%s\", %s)" s (Z.to_string n)
| String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.show b)
| Else region -> region, "Else" | Else region -> region, "Else"
| False region -> region, "False" | False region -> region, "False"
| If region -> region, "If" | If region -> region, "If"
@ -150,7 +160,6 @@ let proj_token = function
| Rec region -> region, "Rec" | Rec region -> region, "Rec"
| Switch region -> region, "Switch" | Switch region -> region, "Switch"
| Mod region -> region, "Mod" | Mod region -> region, "Mod"
| NOT region -> region, "!"
| Or region -> region, "Or" | Or region -> region, "Or"
| True region -> region, "True" | True region -> region, "True"
| Type region -> region, "Type" | Type region -> region, "Type"
@ -160,7 +169,20 @@ let proj_token = function
| EOF region -> region, "EOF" | EOF region -> region, "EOF"
let to_lexeme = function let to_lexeme = function
CAT _ -> "++" (* Literals *)
String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Int i
| Nat i
| Mutez i -> fst i.Region.value
| Ident id -> id.Region.value
| Constr id -> id.Region.value
| Attr a -> a.Region.value
(* Symbols *)
| CAT _ -> "++"
| MINUS _ -> "-" | MINUS _ -> "-"
| PLUS _ -> "+" | PLUS _ -> "+"
| SLASH _ -> "/" | SLASH _ -> "/"
@ -188,29 +210,32 @@ let to_lexeme = function
| ARROW _ -> "=>" | ARROW _ -> "=>"
| BOOL_OR _ -> "||" | BOOL_OR _ -> "||"
| BOOL_AND _ -> "&&" | BOOL_AND _ -> "&&"
| Ident id -> id.Region.value | NOT _ -> "!"
| Constr id -> id.Region.value
| Int i (* Keywords *)
| Nat i
| Mutez i -> fst i.Region.value
| String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Else _ -> "else" | Else _ -> "else"
| False _ -> "false" | False _ -> "false"
| If _ -> "if" | If _ -> "if"
| Let _ -> "let" | Let _ -> "let"
| Rec _ -> "rec"
| Mod _ -> "mod" | Mod _ -> "mod"
| NOT _ -> "!"
| Or _ -> "or" | Or _ -> "or"
| Rec _ -> "rec"
| Switch _ -> "switch" | Switch _ -> "switch"
| True _ -> "true" | True _ -> "true"
| Type _ -> "type" | Type _ -> "type"
(* Data constructors *)
| C_None _ -> "None" | C_None _ -> "None"
| C_Some _ -> "Some" | C_Some _ -> "Some"
| Attr a -> a.Region.value
(* Virtual tokens *)
| EOF _ -> "" | EOF _ -> ""
(* CONVERSIONS *)
let to_string token ?(offsets=true) mode = let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode let reg_str = region#compact ~offsets mode
@ -261,12 +286,9 @@ let reserved =
|> add "functor" |> add "functor"
|> add "inherit" |> add "inherit"
|> add "initializer" |> add "initializer"
(* |> add "land" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|> add "lazy" |> add "lazy"
(* |> add "lor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|> add "lsl" |> add "lsl"
|> add "lsr" |> add "lsr"
(* |> add "lxor" - see https://ligo.atlassian.net/browse/LIGO-263 *)
|> add "match" |> add "match"
|> add "method" |> add "method"
|> add "module" |> add "module"
@ -291,7 +313,7 @@ let reserved =
let constructors = [ let constructors = [
(fun reg -> C_None reg); (fun reg -> C_None reg);
(fun reg -> C_Some reg); (fun reg -> C_Some reg)
] ]
let add map (key, value) = SMap.add key value map let add map (key, value) = SMap.add key value map
@ -376,8 +398,7 @@ let mk_int lexeme region =
else Ok (Int Region.{region; value = lexeme, z}) else Ok (Int Region.{region; value = lexeme, z})
let mk_nat lexeme region = let mk_nat lexeme region =
let z = let z = Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |> Str.(global_replace (regexp "n") "") |>
Z.of_string in Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n" if Z.equal z Z.zero && lexeme <> "0n"
@ -385,8 +406,7 @@ let mk_nat lexeme region =
else Ok (Nat Region.{region; value = lexeme, z}) else Ok (Nat Region.{region; value = lexeme, z})
let mk_mutez lexeme region = let mk_mutez lexeme region =
let z = let z = Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "mutez") "") |> Str.(global_replace (regexp "mutez") "") |>
Z.of_string in Z.of_string in
if Z.equal z Z.zero && lexeme <> "0mutez" if Z.equal z Z.zero && lexeme <> "0mutez"
@ -460,75 +480,72 @@ let mk_attr header lexeme region =
(* Predicates *) (* Predicates *)
let is_string = function let is_string = function String _ -> true | _ -> false
String _ -> true let is_bytes = function Bytes _ -> true | _ -> false
| _ -> false let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
Else _
| False _
| If _
| Let _
| Rec _
| Switch _
| Mod _
| Or _
| True _
| Type _ -> true
| _ -> false
let is_constr = function
Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let is_sym = function
CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| ELLIPSIS _
| WILD _
| EQ _
| EQEQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| ARROW _
| BOOL_OR _
| NOT _
| BOOL_AND _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false let is_eof = function EOF _ -> true | _ -> false
let is_minus = function MINUS _ -> true | _ -> false
(* Errors *)
type error =
Odd_lengthed_bytes
| Missing_break
| Negative_byte_sequence
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign."
exception Error of error Region.reg
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let fail region value = raise (Error Region.{region; value})
let check_right_context token next_token buffer : unit =
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
match next_token buffer with
None -> ()
| Some (markup, next) ->
if is_minus token && is_bytes next
then let region =
Region.cover (to_region token) (to_region next)
in fail region Negative_byte_sequence
else
match markup with
[] ->
if is_int token
then if is_string next || is_ident next
then fail region Missing_break
else ()
else
if is_string token
then if is_int next || is_bytes next || is_ident next
then fail region Missing_break
else ()
else
if is_bytes token
then if is_string next || is_ident next
then fail region Missing_break
else if is_int next
then fail region Odd_lengthed_bytes
else ()
else ()
| _::_ -> ()
(* END TRAILER *) (* END TRAILER *)
} }

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO = module IO =
struct struct
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo") let options =
let open EvalOpt in
let block = mk_block ~opening:"/*" ~closing:"*/"
in read ~block ~line:"//" ".religo"
end end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken)) module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () = let () =
match M.trace () with match M.trace () with
Stdlib.Ok () -> () Stdlib.Ok () -> ()
| Error Region.{value; _} -> Utils.highlight value | Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value

View File

@ -895,7 +895,7 @@ path:
| projection { Path $1 } | projection { Path $1 }
update_record: update_record:
"{""..."path "," sep_or_term_list(field_path_assignment,",") "}" { "{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" {
let region = cover $1 $6 in let region = cover $1 $6 in
let ne_elements, terminator = $5 in let ne_elements, terminator = $5 in
let value = { let value = {

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO = module IO =
struct struct
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo") let options =
let open EvalOpt in
let block = mk_block ~opening:"/*" ~closing:"*/"
in read ~block ~line:"//" ".religo"
end end
module SubIO = module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : EvalOpt.language; block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs method libs = IO.options#libs
method verbose = IO.options#verbose method verbose = IO.options#verbose
method offsets = IO.options#offsets method offsets = IO.options#offsets
method lang = IO.options#lang method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext method ext = IO.options#ext
method mode = IO.options#mode method mode = IO.options#mode
method cmd = IO.options#cmd method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs EvalOpt.make ~libs:options#libs
~verbose:options#verbose ~verbose:options#verbose
~offsets:options#offsets ~offsets:options#offsets
~lang:options#lang ?block:options#block
?line:options#line
~ext:options#ext ~ext:options#ext
~mode:options#mode ~mode:options#mode
~cmd:options#cmd ~cmd:options#cmd

View File

@ -1,7 +0,0 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml

View File

@ -5,37 +5,42 @@
type command = Quiet | Copy | Units | Tokens type command = Quiet | Copy | Units | Tokens
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
let lang_to_string = function
`PascaLIGO -> "PascaLIGO"
| `CameLIGO -> "CameLIGO"
| `ReasonLIGO -> "ReasonLIGO"
(* The type [options] gathers the command-line options. *) (* The type [options] gathers the command-line options. *)
module SSet = Set.Make (String) module SSet = Set.Make (String)
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
let mk_block ~opening ~closing : block_comment =
object
method opening = opening
method closing = closing
end
type options = < type options = <
input : string option; input : string option;
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : block_comment option;
ext : string; (* ".ligo", ".mligo", ".religo" *) line : line_comment option;
ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : command; cmd : command;
mono : bool; mono : bool;
expr : bool expr : bool
> >
let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options = let make ~input ~libs ~verbose ~offsets ?block
?line ~ext ~mode ~cmd ~mono ~expr : options =
object object
method input = input method input = input
method libs = libs method libs = libs
method verbose = verbose method verbose = verbose
method offsets = offsets method offsets = offsets
method lang = lang method block = block
method line = line
method ext = ext method ext = ext
method mode = mode method mode = mode
method cmd = cmd method cmd = cmd
@ -58,10 +63,10 @@ let abort msg =
(* Help *) (* Help *)
let help language extension () = let help extension () =
let file = Filename.basename Sys.argv.(0) in let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension; printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
printf "where <input>%s is the %s source file (default: stdin),\n" extension language; printf "where <input>%s is the LIGO source file (default: stdin),\n" extension;
print "and each <option> (if any) is one of the following:"; print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)"; print " -I <paths> Library paths (colon-separated)";
print " -t, --tokens Print tokens"; print " -t, --tokens Print tokens";
@ -105,8 +110,7 @@ let add_verbose d =
!verbose !verbose
(split_at_colon d) (split_at_colon d)
let specs language extension = let specs extension =
let language = lang_to_string language in
let open! Getopt in [ let open! Getopt in [
'I', nolong, None, Some add_path; 'I', nolong, None, Some add_path;
'c', "copy", set copy true, None; 'c', "copy", set copy true, None;
@ -118,7 +122,7 @@ let specs language extension =
noshort, "mono", set mono true, None; noshort, "mono", set mono true, None;
noshort, "expr", set expr true, None; noshort, "expr", set expr true, None;
noshort, "verbose", None, Some add_verbose; noshort, "verbose", None, Some add_verbose;
'h', "help", Some (help language extension), None; 'h', "help", Some (help extension), None;
noshort, "version", Some version, None noshort, "version", Some version, None
] ]
;; ;;
@ -156,7 +160,7 @@ let print_opt () =
printf "input = %s\n" (string_of quote !input); printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs) printf "libs = %s\n" (string_of_path !libs)
let check lang ext = let check ?block ?line ~ext =
let () = let () =
if SSet.mem "cli" !verbose then print_opt () in if SSet.mem "cli" !verbose then print_opt () in
@ -209,16 +213,19 @@ let check lang ext =
| false, false, false, true -> Tokens | false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t." | _ -> abort "Choose one of -q, -c, -u, -t."
in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono ~expr ~lang ~ext in make ~input ~libs ~verbose ~offsets ~mode
~cmd ~mono ~expr ?block ?line ~ext
(* Parsing the command-line options *) (* Parsing the command-line options *)
let read ~lang ~ext = type extension = string
let read ?block ?line (ext: extension) =
try try
Getopt.parse_cmdline (specs lang ext) anonymous; Getopt.parse_cmdline (specs ext) anonymous;
(verb_str := (verb_str :=
let apply e a = let apply e a =
if a = "" then e else Printf.sprintf "%s, %s" e a if a = "" then e else Printf.sprintf "%s, %s" e a
in SSet.fold apply !verbose ""); in SSet.fold apply !verbose "");
check lang ext check ?block ?line ~ext
with Getopt.Error msg -> abort msg with Getopt.Error msg -> abort msg

View File

@ -49,19 +49,21 @@ type command = Quiet | Copy | Units | Tokens
expected.} expected.}
} *) } *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val lang_to_string : language -> string
module SSet : Set.S with type elt = string and type t = Set.Make(String).t module SSet : Set.S with type elt = string and type t = Set.Make(String).t
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
val mk_block : opening:string -> closing:string -> block_comment
type options = < type options = <
input : string option; input : string option;
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : block_comment option;
ext : string; (* ".ligo", ".mligo", ".religo" *) line : line_comment option;
ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : command; cmd : command;
mono : bool; mono : bool;
@ -73,7 +75,8 @@ val make :
libs:string list -> libs:string list ->
verbose:SSet.t -> verbose:SSet.t ->
offsets:bool -> offsets:bool ->
lang:language -> ?block:block_comment ->
?line:line_comment ->
ext:string -> ext:string ->
mode:[`Byte | `Point] -> mode:[`Byte | `Point] ->
cmd:command -> cmd:command ->
@ -81,8 +84,9 @@ val make :
expr:bool -> expr:bool ->
options options
(** Parsing the command-line options on stdin. The first parameter is (** Parsing the command-line options on stdin. *)
the name of the concrete syntax, e.g., [PascaLIGO], and the second
is the expected file extension, e.g., ".ligo". *)
val read : lang:language -> ext:string -> options type extension = string
val read :
?block:block_comment -> ?line:line_comment -> extension -> options

View File

@ -38,8 +38,6 @@
module Region = Simple_utils.Region module Region = Simple_utils.Region
module Pos = Simple_utils.Pos module Pos = Simple_utils.Pos
type lexeme = string
(* TOKENS *) (* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer (* The signature [TOKEN] exports an abstract type [token], so a lexer
@ -54,6 +52,8 @@ type lexeme = string
reading the ocamllex specification for the lexer ([Lexer.mll]). reading the ocamllex specification for the lexer ([Lexer.mll]).
*) *)
type lexeme = string
module type TOKEN = module type TOKEN =
sig sig
type token type token
@ -82,13 +82,6 @@ module type TOKEN =
(* Predicates *) (* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool val is_eof : token -> bool
(* Projections *) (* Projections *)
@ -96,73 +89,41 @@ module type TOKEN =
val to_lexeme : token -> lexeme val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t val to_region : token -> Region.t
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool ->
[`Byte | `Point] ->
error Region.reg ->
file:bool ->
string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit
end end
(* The module type for lexers is [S]. It mainly exports the function (* The signature of the lexer *)
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
* a function [get_file] that returns the name of the file being scanned
(empty string if [stdin]).
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (also known as a
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion.
*)
module type S = module type S =
sig sig
module Token : TOKEN module Token : TOKEN
type token = Token.token type token = Token.token
type file_path = string (* The scanner *)
type logger = Markup.t list -> token -> unit
type window = val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
Nil
| One of token
| Two of token * token
val slide : token -> window -> window (* Errors (specific to the generic lexer, not to the tokens) *)
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type instance = {
input : input;
read : log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_win : unit -> window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
val lexbuf_from_input :
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val open_token_stream :
language -> input -> (instance, open_err) Stdlib.result
(* Error reporting *)
type error type error
@ -173,7 +134,6 @@ module type S =
val format_error : val format_error :
?offsets:bool -> [`Byte | `Point] -> ?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg error Region.reg -> file:bool -> string Region.reg
end end
(* The functorised interface (* The functorised interface
@ -182,4 +142,4 @@ module type S =
submodule in [S]. submodule in [S].
*) *)
module Make (Token: TOKEN) : S with module Token = Token module Make (Token : TOKEN) : S with module Token = Token

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,387 @@
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
(* LEXER ENGINE *)
(* Resetting file name and line number in the lexing buffer
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset_offset ~offset buffer =
assert (offset >= 0);
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *)
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
(* Utility types *)
type file_path = string
type lexeme = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
type thread = <
opening : Region.t;
length : int;
acc : char list;
to_string : string;
push_char : char -> thread;
push_string : string -> thread;
set_opening : Region.t -> thread
>
let mk_thread region lexeme : thread =
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s) in
object
val opening = region
method opening = opening
val length = String.length lexeme
method length = length
val acc = explode lexeme []
method acc = acc
method set_opening opening = {< opening; length; acc >}
method push_char char =
{< opening; length=length+1; acc=char::acc >}
method push_string str =
{< opening;
length = length + String.length str;
acc = explode str acc >}
(* The value of [thread#to_string] is a string of length
[thread#length] containing the [thread#length] characters in
the list [thread#acc], in reverse order. For instance,
[thread#to_string = "abc"] if [thread#length = 3] and
[thread#acc = ['c';'b';'a']]. *)
method to_string =
let bytes = Bytes.make length ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (length-1) acc |> Bytes.to_string
end
(* STATE *)
(* Scanning the lexing buffer for tokens (and markup, as a
side-effect).
Because we want the lexer to have access to the right lexical
context of a recognised lexeme (to enforce stylistic constraints or
report special error patterns), we need to keep a hidden reference
to a queue of recognised lexical units (that is, tokens and markup)
that acts as a mutable state between the calls to [read]. When
[read] is called, that queue is examined first and, if it contains
at least one token, that token is returned; otherwise, the lexing
buffer is scanned for at least one more new token. That is the
general principle: we put a high-level buffer (our queue) on top of
the low-level lexing buffer.
One tricky and important detail is that we must make any parser
generated by Menhir (and calling [read]) believe that the last
region of the input source that was matched indeed corresponds to
the returned token, despite that many tokens and markup may have
been matched since it was actually read from the input. In other
words, the parser requests a token that is taken from the
high-level buffer, but the parser requests the source regions from
the _low-level_ lexing buffer, and they may disagree if more than
one token has actually been recognised.
Consequently, in order to maintain a consistent view for the
parser, we have to patch some fields of the lexing buffer, namely
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
generated by Menhir when querying source positions (regions). This
is the purpose of the function [patch_buffer]. After reading one or
more tokens and markup by the scanning rule [scan], we have to save
in the hidden reference [buf_reg] the region of the source that was
matched by [scan]. This atomic sequence of patching, scanning and
saving is implemented by the _function_ [scan] (beware: it shadows
the scanning rule [scan]). The function [patch_buffer] is, of
course, also called just before returning the token, so the parser
has a view of the lexing buffer consistent with the token.
*)
type 'token window =
Nil
| One of 'token
| Two of 'token * 'token
type 'token state = <
units : (Markup.t list * 'token) FQueue.t;
markup : Markup.t list;
window : 'token window;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
enqueue : 'token -> 'token state;
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
set_last : Region.t -> 'token state;
set_pos : Pos.t -> 'token state;
slide_token : 'token -> 'token state;
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
push_newline : Lexing.lexbuf -> 'token state;
push_line : thread -> 'token state;
push_block : thread -> 'token state;
push_space : Lexing.lexbuf -> 'token state;
push_tabs : Lexing.lexbuf -> 'token state;
push_bom : Lexing.lexbuf -> 'token state;
push_markup : Markup.t -> 'token state;
>
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
?block ?line () : _ state =
object (self)
val units = units
method units = units
val markup = markup
method markup = markup
val window = window
method window = window
val last = last
method last = last
val pos = pos
method pos = pos
method decoder = decoder
method supply = supply
method block = block
method line = line
method enqueue token =
{< units = FQueue.enq (markup, token) units;
markup = [] >}
method set_units units = {< units = units >}
method set_last region = {< last = region >}
method set_pos pos = {< pos = pos >}
method slide_token token =
match self#window with
Nil -> {< window = One token >}
| One t | Two (t,_) -> {< window = Two (token,t) >}
method sync buffer =
let lex = Lexing.lexeme buffer in
let len = String.length lex in
let start = pos in
let stop = start#shift_bytes len in
let state = {< pos = stop >}
in Region.make ~start ~stop, lex, state
(* MARKUP *)
(* Committing markup to the current logical state *)
method push_markup unit = {< markup = unit :: markup >}
method push_newline buffer =
let () = Lexing.new_line buffer in
let value = Lexing.lexeme buffer in
let start = self#pos in
let stop = start#new_line value in
let region = Region.make ~start ~stop in
let unit = Markup.Newline Region.{region; value}
in {< pos = stop; markup = unit::markup >}
method push_line thread =
let start = thread#opening#start in
let region = Region.make ~start ~stop:self#pos
and value = thread#to_string in
let unit = Markup.LineCom Region.{region; value}
in {< markup = unit::markup >}
method push_block thread =
let start = thread#opening#start in
let region = Region.make ~start ~stop:self#pos
and value = thread#to_string in
let unit = Markup.BlockCom Region.{region; value}
in {< markup = unit::markup >}
method push_space buffer =
let region, lex, state = self#sync buffer in
let value = String.length lex in
let unit = Markup.Space Region.{region; value}
in state#push_markup unit
method push_tabs buffer =
let region, lex, state = self#sync buffer in
let value = String.length lex in
let unit = Markup.Tabs Region.{region; value}
in state#push_markup unit
method push_bom buffer =
let region, value, state = self#sync buffer in
let unit = Markup.BOM Region.{region; value}
in state#push_markup unit
end
(* LEXER INSTANCE *)
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type 'token logger = Markup.t list -> 'token -> unit
type 'token instance = {
input : input;
read : log:('token logger) -> Lexing.lexbuf -> 'token;
buffer : Lexing.lexbuf;
get_win : unit -> 'token window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
let lexbuf_from_input = function
String s ->
Ok (Lexing.from_string s, fun () -> ())
| Channel chan ->
let close () = close_in chan in
Ok (Lexing.from_channel chan, close)
| Buffer b ->
Ok (b, fun () -> ())
| File path ->
try
let chan = open_in path in
let close () = close_in chan in
let lexbuf = Lexing.from_channel chan in
let () =
let open Lexing in
lexbuf.lex_curr_p <- {lexbuf.lex_curr_p with pos_fname=path}
in Ok (lexbuf, close)
with Sys_error msg -> Stdlib.Error (File_opening msg)
let open_token_stream ?line ?block ~scan
~token_to_region ~style input =
let file_path = match input with
File path -> path
| _ -> "" in
let pos = Pos.min ~file:file_path in
let buf_reg = ref (pos#byte, pos#byte)
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
let supply = Uutf.Manual.src decoder in
let state = ref (mk_state
~units:FQueue.empty
~last:Region.ghost
~window:Nil
~pos
~markup:[]
~decoder
~supply
?block
?line
()) in
let get_pos () = !state#pos
and get_last () = !state#last
and get_win () = !state#window
and get_file () = file_path in
let patch_buffer (start, stop) buffer =
let open Lexing in
let file_path = buffer.lex_curr_p.pos_fname in
buffer.lex_start_p <- {start with pos_fname = file_path};
buffer.lex_curr_p <- {stop with pos_fname = file_path}
and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
let scan' scan buffer =
patch_buffer !buf_reg buffer;
state := scan !state buffer;
save_region buffer in
let next_token scan buffer =
scan' scan buffer;
match FQueue.peek !state#units with
None -> None
| Some (units, ext_token) ->
state := !state#set_units units; Some ext_token in
let rec read scan ~token_to_region ~style ~log buffer =
match FQueue.deq !state#units with
None ->
scan' scan buffer;
read scan ~token_to_region ~style ~log buffer
| Some (units, (left_mark, token)) ->
log left_mark token;
state := ((!state#set_units units)
#set_last (token_to_region token))
#slide_token token;
style token (next_token scan) buffer;
patch_buffer (token_to_region token)#byte_pos buffer;
token in
match lexbuf_from_input input with
Ok (buffer, close) ->
let () =
match input with
File path when path <> "" -> reset ~file:path buffer
| _ -> () in
let instance = {
read = read scan ~token_to_region ~style;
input; buffer; get_win; get_pos; get_last; get_file; close}
in Ok instance
| Error _ as e -> e

View File

@ -0,0 +1,203 @@
(* A library for writing UTF8-aware lexers *)
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
(* The function [rollback] resets the lexing buffer to the state it
was when it matched the last regular expression. This function is
safe to use only in the semantic action of the rule which last
matched. *)
val rollback : Lexing.lexbuf -> unit
(* Utility types *)
type file_path = string
type lexeme = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
(* When scanning structured constructs, like strings and comments, we
need to keep the region of the opening symbol (like double quote,
"//" or "(*") in order to report any error more precisely. Since
ocamllex is byte-oriented, we need to store the parsed bytes as
characters in an accumulator [acc] and also its length [len], so,
we are done, it is easy to build the string making up the
structured construct with [mk_str] (see above).
The resulting data structure is called a _thread_. (Note for
Emacs: "*)".)
*)
type thread = <
opening : Region.t;
length : int;
acc : char list;
to_string : string;
push_char : char -> thread;
push_string : string -> thread;
set_opening : Region.t -> thread
>
val mk_thread : Region.t -> lexeme -> thread
(* STATE *)
(* Beyond producing tokens, the result of lexing is a _state_. The
type [state] represents the abstract logical state of the lexing
engine, that is, a value which is threaded during scanning and
which denotes useful, high-level information beyond what the type
[Lexing.lexbuf] in the standard library already provides for all
generic lexers. We qualify it as "logical state" because the lexing
buffer itself has a "physical state" defined by the type
[Lexing.lexbuf].
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
tokens, with the markup at the left of its lexeme until either the
start of the file or the end of the previously recognised token.
The markup from the last recognised token or, if the first token
has not been recognised yet, from the beginning of the file is
stored in the field [markup] of the state because it is a
side-effect, with respect to the output token list, and we use a
record with a single field [units] because that record may be
easily extended during the future maintenance of this lexer.
The state also includes a field [pos] which holds the current
position in the LIGO source file. The position is not always
updated after a single character has been matched: that depends on
the regular expression that matched the lexing buffer.
The field [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any).
Technically, it is a parametric type, but its use is meant for
tokens, wherever they are defined. In [Two (t1,t2)], and in case
of a syntax error, [t1] is the first invalid token and [t2] is the
last valid one.
The fields [decoder] and [supply] offer the support needed for
the lexing of UTF-8 encoded characters in comments (the only place
where they are allowed in LIGO). The former is the decoder proper
and the latter is the effectful function [supply] that takes a
byte, a start index and a length and feed it to [decoder]. See the
documentation of the third-party library Uutf.
Some methods are now documented.
The call [state#enqueue token] updates functionally the state
[state] by associating the token [token] with the stored markup and
enqueuing the pair into the units queue. The field [markup] is then
reset to the empty list.
The call [state#slide_token token] pushes the token [token] in
the buffer [buffer]. If the buffer is full, that is, it is [Two
(t1,t2)], then the token [t2] is discarded to make room for
[token].
The call [state#sync buffer] updates the current position in
accordance with the contents of the lexing buffer, more precisely,
depending on the length of the string which has just been
recognised by the scanner: that length is used as a positive offset
to the current column.
*)
type 'token window =
Nil
| One of 'token
| Two of 'token * 'token
type 'token state = <
units : (Markup.t list * 'token) FQueue.t;
markup : Markup.t list;
window : 'token window;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
enqueue : 'token -> 'token state;
set_units : (Markup.t list * 'token) FQueue.t -> 'token state;
set_last : Region.t -> 'token state;
set_pos : Pos.t -> 'token state;
slide_token : 'token -> 'token state;
sync : Lexing.lexbuf -> Region.t * lexeme * 'token state;
push_newline : Lexing.lexbuf -> 'token state;
push_line : thread -> 'token state;
push_block : thread -> 'token state;
push_space : Lexing.lexbuf -> 'token state;
push_tabs : Lexing.lexbuf -> 'token state;
push_bom : Lexing.lexbuf -> 'token state;
push_markup : Markup.t -> 'token state;
>
(* LEXER INSTANCE *)
(* The function [open_token_stream] returns a lexer instance made of
* the input [input] of type [input];
* a function [read] that extracts tokens from a lexing buffer,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
* a function [get_file] that returns the name of the file being
scanned (empty string if [stdin]).
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The type [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any).
The call [read ?line ?block ~scan ~token_to_region ~style
input] evaluates in a lexer (also known as a tokeniser or scanner)
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion. The function
labelled [~scan] is the main scanner of the lexer. The function
labelled [~style] is used to check stylistic constraints on the
tokens and the markup between them.
*)
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type 'token logger = Markup.t list -> 'token -> unit
type 'token instance = {
input : input;
read : log:('token logger) -> Lexing.lexbuf -> 'token;
buffer : Lexing.lexbuf;
get_win : unit -> 'token window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
val lexbuf_from_input :
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
val open_token_stream :
?line:EvalOpt.line_comment ->
?block:EvalOpt.block_comment ->
scan:('token state -> Lexing.lexbuf -> 'token state) ->
token_to_region:('token -> Region.t) ->
style:('token ->
(Lexing.lexbuf -> (Markup.t list * 'token) option) ->
Lexing.lexbuf ->
unit) ->
input ->
('token instance, open_err) Stdlib.result

View File

@ -5,6 +5,7 @@ module Region = Simple_utils.Region
module type S = module type S =
sig sig
module Lexer : Lexer.S module Lexer : Lexer.S
type token = Lexer.token
val output_token : val output_token :
?offsets:bool -> ?offsets:bool ->
@ -12,7 +13,7 @@ module type S =
EvalOpt.command -> EvalOpt.command ->
out_channel -> out_channel ->
Markup.t list -> Markup.t list ->
Lexer.token -> token ->
unit unit
type file_path = string type file_path = string
@ -20,8 +21,14 @@ module type S =
val trace : val trace :
?offsets:bool -> ?offsets:bool ->
[`Byte | `Point] -> [`Byte | `Point] ->
EvalOpt.language -> ?block:EvalOpt.block_comment ->
Lexer.input -> ?line:EvalOpt.line_comment ->
token_to_region:(token -> Region.t) ->
style:(token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit) ->
LexerLib.input ->
EvalOpt.command -> EvalOpt.command ->
(unit, string Region.reg) Stdlib.result (unit, string Region.reg) Stdlib.result
end end
@ -30,10 +37,12 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
struct struct
module Lexer = Lexer module Lexer = Lexer
module Token = Lexer.Token module Token = Lexer.Token
type token = Lexer.token
(** Pretty-printing in a string the lexemes making up the markup (* Pretty-printing in a string the lexemes making up the markup
between two tokens, concatenated with the last lexeme between two tokens, concatenated with the last lexeme
itself. *) itself. *)
let output_token ?(offsets=true) mode command let output_token ?(offsets=true) mode command
channel left_mark token : unit = channel left_mark token : unit =
let output str = Printf.fprintf channel "%s%!" str in let output str = Printf.fprintf channel "%s%!" str in
@ -56,10 +65,16 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
type file_path = string type file_path = string
let trace ?(offsets=true) mode lang input command : let trace ?(offsets=true) mode ?block ?line
~token_to_region ~style input command :
(unit, string Region.reg) Stdlib.result = (unit, string Region.reg) Stdlib.result =
match Lexer.open_token_stream lang input with match LexerLib.open_token_stream
Ok Lexer.{read; buffer; close; _} -> ~scan:Lexer.scan
~token_to_region
~style
?line ?block input
with
Ok LexerLib.{read; buffer; close; _} ->
let log = output_token ~offsets mode command stdout let log = output_token ~offsets mode command stdout
and close_all () = flush_all (); close () in and close_all () = flush_all (); close () in
let rec iter () = let rec iter () =
@ -68,12 +83,17 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
if Token.is_eof token if Token.is_eof token
then Stdlib.Ok () then Stdlib.Ok ()
else iter () else iter ()
| exception Lexer.Token.Error error ->
let msg =
Lexer.Token.format_error
~offsets mode ~file:true error
in Stdlib.Error msg
| exception Lexer.Error error -> | exception Lexer.Error error ->
let msg = let msg =
Lexer.format_error ~offsets mode ~file:true error Lexer.format_error ~offsets mode ~file:true error
in Stdlib.Error msg in in Stdlib.Error msg in
let result = iter () let result = iter ()
in close_all (); result in close_all (); result
| Stdlib.Error (Lexer.File_opening msg) -> | Stdlib.Error (LexerLib.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg) flush_all (); Stdlib.Error (Region.wrap_ghost msg)
end end

View File

@ -3,6 +3,7 @@ module Region = Simple_utils.Region
module type S = module type S =
sig sig
module Lexer : Lexer.S module Lexer : Lexer.S
type token = Lexer.token
val output_token : val output_token :
?offsets:bool -> ?offsets:bool ->
@ -10,7 +11,7 @@ module type S =
EvalOpt.command -> EvalOpt.command ->
out_channel -> out_channel ->
Markup.t list -> Markup.t list ->
Lexer.token -> token ->
unit unit
type file_path = string type file_path = string
@ -18,8 +19,14 @@ module type S =
val trace : val trace :
?offsets:bool -> ?offsets:bool ->
[`Byte | `Point] -> [`Byte | `Point] ->
EvalOpt.language -> ?block:EvalOpt.block_comment ->
Lexer.input -> ?line:EvalOpt.line_comment ->
token_to_region:(token -> Region.t) ->
style:(token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit) ->
LexerLib.input ->
EvalOpt.command -> EvalOpt.command ->
(unit, string Region.reg) Stdlib.result (unit, string Region.reg) Stdlib.result
end end

View File

@ -39,9 +39,15 @@ module Make (IO: IO) (Lexer: Lexer.S) =
| Stdlib.Ok pp_buffer -> | Stdlib.Ok pp_buffer ->
(* Running the lexer on the preprocessed input *) (* Running the lexer on the preprocessed input *)
let source = Lexer.String (Buffer.contents pp_buffer) in let source = LexerLib.String (Buffer.contents pp_buffer) in
match Lexer.open_token_stream IO.options#lang source with match LexerLib.open_token_stream
Ok Lexer.{read; buffer; close; _} -> ?line:IO.options#line
?block:IO.options#block
~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context
source with
Ok LexerLib.{read; buffer; close; _} ->
let close_all () = flush_all (); close () in let close_all () = flush_all (); close () in
let rec read_tokens tokens = let rec read_tokens tokens =
match read ~log:(fun _ _ -> ()) buffer with match read ~log:(fun _ _ -> ()) buffer with
@ -49,20 +55,28 @@ module Make (IO: IO) (Lexer: Lexer.S) =
if Lexer.Token.is_eof token if Lexer.Token.is_eof token
then Stdlib.Ok (List.rev tokens) then Stdlib.Ok (List.rev tokens)
else read_tokens (token::tokens) else read_tokens (token::tokens)
| exception Lexer.Token.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let msg =
Lexer.Token.format_error
~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg
| exception Lexer.Error error -> | exception Lexer.Error error ->
let file = let file =
match IO.options#input with match IO.options#input with
None | Some "-" -> false None | Some "-" -> false
| Some _ -> true in | Some _ -> true in
let () =
Printf.eprintf "[LexerUnit] file = %b\n%!" file in
let msg = let msg =
Lexer.format_error ~offsets:IO.options#offsets Lexer.format_error ~offsets:IO.options#offsets
IO.options#mode ~file error IO.options#mode ~file error
in Stdlib.Error msg in in Stdlib.Error msg in
let result = read_tokens [] let result = read_tokens []
in close_all (); result in close_all (); result
| Stdlib.Error (Lexer.File_opening msg) -> | Stdlib.Error (LexerLib.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
match IO.options#input with match IO.options#input with
None -> preproc stdin None -> preproc stdin
@ -101,8 +115,11 @@ module Make (IO: IO) (Lexer: Lexer.S) =
end end
else Log.trace ~offsets:IO.options#offsets else Log.trace ~offsets:IO.options#offsets
IO.options#mode IO.options#mode
IO.options#lang ?block:IO.options#block
(Lexer.String preproc_str) ?line:IO.options#line
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context
(LexerLib.String preproc_str)
IO.options#cmd IO.options#cmd
in match IO.options#input with in match IO.options#input with
None -> preproc stdin None -> preproc stdin

View File

@ -57,7 +57,7 @@ module type PARSER =
module Make (IO: IO) module Make (IO: IO)
(Lexer: Lexer.S) (Lexer: Lexer.S)
(Parser: PARSER with type token = Lexer.Token.token) (Parser: PARSER with type token = Lexer.token)
(ParErr: sig val message : int -> string end) = (ParErr: sig val message : int -> string end) =
struct struct
module I = Parser.MenhirInterpreter module I = Parser.MenhirInterpreter
@ -122,10 +122,10 @@ module Make (IO: IO)
message message
in in
match get_win () with match get_win () with
Lexer.Nil -> assert false LexerLib.Nil -> assert false
| Lexer.One invalid -> | LexerLib.One invalid ->
raise (Point (message, None, invalid)) raise (Point (message, None, invalid))
| Lexer.Two (invalid, valid) -> | LexerLib.Two (invalid, valid) ->
raise (Point (message, Some valid, invalid)) raise (Point (message, Some valid, invalid))
(* The monolithic API of Menhir *) (* The monolithic API of Menhir *)
@ -143,14 +143,14 @@ module Make (IO: IO)
~offsets:IO.options#offsets ~offsets:IO.options#offsets
IO.options#mode IO.options#cmd stdout IO.options#mode IO.options#cmd stdout
let incr_contract Lexer.{read; buffer; get_win; close; _} = let incr_contract LexerLib.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
and failure = failure get_win in and failure = failure get_win in
let parser = Incr.contract buffer.Lexing.lex_curr_p in let parser = Incr.contract buffer.Lexing.lex_curr_p in
let ast = I.loop_handle success failure supplier parser let ast = I.loop_handle success failure supplier parser
in flush_all (); close (); ast in flush_all (); close (); ast
let incr_expr Lexer.{read; buffer; get_win; close; _} = let incr_expr LexerLib.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
and failure = failure get_win in and failure = failure get_win in
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in

View File

@ -56,7 +56,7 @@ module type PARSER =
module Make (IO: IO) module Make (IO: IO)
(Lexer: Lexer.S) (Lexer: Lexer.S)
(Parser: PARSER with type token = Lexer.Token.token) (Parser: PARSER with type token = Lexer.token)
(ParErr: sig val message : int -> string end) : (ParErr: sig val message : int -> string end) :
sig sig
(* WARNING: The following parsers may all raise [Lexer.Error] *) (* WARNING: The following parsers may all raise [Lexer.Error] *)
@ -78,8 +78,8 @@ module Make (IO: IO)
exception Point of error exception Point of error
val incr_contract : Lexer.instance -> Parser.ast val incr_contract : Lexer.token LexerLib.instance -> Parser.ast
val incr_expr : Lexer.instance -> Parser.expr val incr_expr : Lexer.token LexerLib.instance -> Parser.expr
val format_error : val format_error :
?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg ?offsets:bool -> [`Point | `Byte] -> error -> string Region.reg

View File

@ -4,16 +4,15 @@ module Region = Simple_utils.Region
module Preproc = Preprocessor.Preproc module Preproc = Preprocessor.Preproc
module SSet = Set.Make (String) module SSet = Set.Make (String)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module type SubIO = module type SubIO =
sig sig
type options = < type options = <
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : EvalOpt.block_comment option;
ext : string; (* ".ligo", ".mligo", ".religo" *) line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
mono : bool mono : bool
@ -23,7 +22,7 @@ module type SubIO =
val make : input:string option -> expr:bool -> EvalOpt.options val make : input:string option -> expr:bool -> EvalOpt.options
end end
module type Pretty = module type Printer =
sig sig
type state type state
type ast type ast
@ -45,7 +44,7 @@ module Make (Lexer: Lexer.S)
and type expr = AST.expr and type expr = AST.expr
and type token = Lexer.token) and type token = Lexer.token)
(ParErr: sig val message : int -> string end) (ParErr: sig val message : int -> string end)
(ParserLog: Pretty with type ast = AST.t (ParserLog: Printer with type ast = AST.t
and type expr = AST.expr) and type expr = AST.expr)
(SubIO: SubIO) = (SubIO: SubIO) =
struct struct
@ -90,12 +89,12 @@ module Make (Lexer: Lexer.S)
ParserLog.mk_state ~offsets:SubIO.options#offsets ParserLog.mk_state ~offsets:SubIO.options#offsets
~mode:SubIO.options#mode ~mode:SubIO.options#mode
~buffer:output in ~buffer:output in
let close () = lexer_inst.Lexer.close () in let close () = lexer_inst.LexerLib.close () in
let expr = let expr =
try try
if SubIO.options#mono then if SubIO.options#mono then
let tokeniser = lexer_inst.Lexer.read ~log let tokeniser = lexer_inst.LexerLib.read ~log
and lexbuf = lexer_inst.Lexer.buffer and lexbuf = lexer_inst.LexerLib.buffer
in Front.mono_expr tokeniser lexbuf in Front.mono_expr tokeniser lexbuf
else else
Front.incr_expr lexer_inst Front.incr_expr lexer_inst
@ -125,12 +124,12 @@ module Make (Lexer: Lexer.S)
ParserLog.mk_state ~offsets:SubIO.options#offsets ParserLog.mk_state ~offsets:SubIO.options#offsets
~mode:SubIO.options#mode ~mode:SubIO.options#mode
~buffer:output in ~buffer:output in
let close () = lexer_inst.Lexer.close () in let close () = lexer_inst.LexerLib.close () in
let ast = let ast =
try try
if SubIO.options#mono then if SubIO.options#mono then
let tokeniser = lexer_inst.Lexer.read ~log let tokeniser = lexer_inst.LexerLib.read ~log
and lexbuf = lexer_inst.Lexer.buffer and lexbuf = lexer_inst.LexerLib.buffer
in Front.mono_contract tokeniser lexbuf in Front.mono_contract tokeniser lexbuf
else else
Front.incr_contract lexer_inst Front.incr_contract lexer_inst
@ -164,9 +163,17 @@ module Make (Lexer: Lexer.S)
| exception Lexer.Error err -> | exception Lexer.Error err ->
let file = let file =
lexer_inst.Lexer.buffer.Lexing.lex_curr_p.Lexing.pos_fname in lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
let error = let error = Lexer.format_error
Lexer.format_error ~offsets:SubIO.options#offsets ~offsets:SubIO.options#offsets
SubIO.options#mode err ~file:(file <> "")
in Stdlib.Error error
| exception Lexer.Token.Error err ->
let file =
lexer_inst.LexerLib.buffer.Lexing.lex_curr_p.Lexing.pos_fname in
let error = Lexer.Token.format_error
~offsets:SubIO.options#offsets
SubIO.options#mode err ~file:(file <> "") SubIO.options#mode err ~file:(file <> "")
in Stdlib.Error error in Stdlib.Error error
@ -182,11 +189,11 @@ module Make (Lexer: Lexer.S)
| exception Parser.Error -> | exception Parser.Error ->
let invalid, valid_opt = let invalid, valid_opt =
match lexer_inst.Lexer.get_win () with match lexer_inst.LexerLib.get_win () with
Lexer.Nil -> LexerLib.Nil ->
assert false (* Safe: There is always at least EOF. *) assert false (* Safe: There is always at least EOF. *)
| Lexer.One invalid -> invalid, None | LexerLib.One invalid -> invalid, None
| Lexer.Two (invalid, valid) -> invalid, Some valid in | LexerLib.Two (invalid, valid) -> invalid, Some valid in
let point = "", valid_opt, invalid in let point = "", valid_opt, invalid in
let error = let error =
Front.format_error ~offsets:SubIO.options#offsets Front.format_error ~offsets:SubIO.options#offsets
@ -206,8 +213,8 @@ module Make (Lexer: Lexer.S)
(* Parsing a contract *) (* Parsing a contract *)
let gen_parser options input parser = let gen_parser options input parser =
match Lexer.lexbuf_from_input input with match LexerLib.lexbuf_from_input input with
Stdlib.Error (Lexer.File_opening msg) -> Stdlib.Error (LexerLib.File_opening msg) ->
Stdlib.Error (Region.wrap_ghost msg) Stdlib.Error (Region.wrap_ghost msg)
| Ok (lexbuf, close) -> | Ok (lexbuf, close) ->
(* Preprocessing the input source *) (* Preprocessing the input source *)
@ -225,45 +232,52 @@ module Make (Lexer: Lexer.S)
(* Lexing and parsing the preprocessed input source *) (* Lexing and parsing the preprocessed input source *)
let () = close () in let () = close () in
let input' = Lexer.String (Buffer.contents buffer) in let input' = LexerLib.String (Buffer.contents buffer) in
match Lexer.open_token_stream options#lang input' with match LexerLib.open_token_stream
~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context
?line:options#line
?block:options#block
input'
with
Ok instance -> Ok instance ->
let open Lexing in let open Lexing in
instance.Lexer.buffer.lex_curr_p <- instance.LexerLib.buffer.lex_curr_p <-
{instance.Lexer.buffer.lex_curr_p with pos_fname = file}; {instance.LexerLib.buffer.lex_curr_p with pos_fname=file};
apply instance parser apply instance parser
| Stdlib.Error (Lexer.File_opening msg) -> | Stdlib.Error (LexerLib.File_opening msg) ->
Stdlib.Error (Region.wrap_ghost msg) Stdlib.Error (Region.wrap_ghost msg)
(* Parsing a contract in a file *) (* Parsing a contract in a file *)
let contract_in_file (source : string) = let contract_in_file (source : string) =
let options = SubIO.make ~input:(Some source) ~expr:false let options = SubIO.make ~input:(Some source) ~expr:false
in gen_parser options (Lexer.File source) parse_contract in gen_parser options (LexerLib.File source) parse_contract
(* Parsing a contract in a string *) (* Parsing a contract in a string *)
let contract_in_string (source : string) = let contract_in_string (source : string) =
let options = SubIO.make ~input:None ~expr:false in let options = SubIO.make ~input:None ~expr:false in
gen_parser options (Lexer.String source) parse_contract gen_parser options (LexerLib.String source) parse_contract
(* Parsing a contract in stdin *) (* Parsing a contract in stdin *)
let contract_in_stdin () = let contract_in_stdin () =
let options = SubIO.make ~input:None ~expr:false in let options = SubIO.make ~input:None ~expr:false in
gen_parser options (Lexer.Channel stdin) parse_contract gen_parser options (LexerLib.Channel stdin) parse_contract
(* Parsing an expression in a string *) (* Parsing an expression in a string *)
let expr_in_string (source : string) = let expr_in_string (source : string) =
let options = SubIO.make ~input:None ~expr:true in let options = SubIO.make ~input:None ~expr:true in
gen_parser options (Lexer.String source) parse_expr gen_parser options (LexerLib.String source) parse_expr
(* Parsing an expression in stdin *) (* Parsing an expression in stdin *)
let expr_in_stdin () = let expr_in_stdin () =
let options = SubIO.make ~input:None ~expr:true in let options = SubIO.make ~input:None ~expr:true in
gen_parser options (Lexer.Channel stdin) parse_expr gen_parser options (LexerLib.Channel stdin) parse_expr
(* Preprocess only *) (* Preprocess only *)

View File

@ -2,18 +2,19 @@
module Region = Simple_utils.Region module Region = Simple_utils.Region
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SSet : Set.S with type elt = string and type t = Set.Make(String).t module SSet : Set.S with type elt = string and type t = Set.Make(String).t
(* A subtype of [EvalOpt.options] *)
module type SubIO = module type SubIO =
sig sig
type options = < type options = <
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : EvalOpt.block_comment option;
ext : string; (* ".ligo", ".mligo", ".religo" *) line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point]; mode : [`Byte | `Point];
cmd : EvalOpt.command; cmd : EvalOpt.command;
mono : bool mono : bool
@ -23,7 +24,9 @@ module type SubIO =
val make : input:string option -> expr:bool -> EvalOpt.options val make : input:string option -> expr:bool -> EvalOpt.options
end end
module type Pretty = (* Signature for the printers *)
module type Printer =
sig sig
type state type state
type ast type ast
@ -38,6 +41,8 @@ module type Pretty =
val print_expr : state -> expr -> unit val print_expr : state -> expr -> unit
end end
(* Main functor to make the parser *)
module Make (Lexer : Lexer.S) module Make (Lexer : Lexer.S)
(AST : sig type t type expr end) (AST : sig type t type expr end)
(Parser : ParserAPI.PARSER (Parser : ParserAPI.PARSER
@ -45,7 +50,7 @@ module Make (Lexer : Lexer.S)
and type expr = AST.expr and type expr = AST.expr
and type token = Lexer.token) and type token = Lexer.token)
(ParErr : sig val message : int -> string end) (ParErr : sig val message : int -> string end)
(ParserLog : Pretty with type ast = AST.t (ParserLog : Printer with type ast = AST.t
and type expr = AST.expr) and type expr = AST.expr)
(SubIO: SubIO) : (SubIO: SubIO) :
sig sig
@ -82,4 +87,4 @@ module Make (Lexer : Lexer.S)
val preprocess : val preprocess :
string -> (Buffer.t, message Region.reg) Stdlib.result string -> (Buffer.t, message Region.reg) Stdlib.result
end end

View File

@ -13,6 +13,7 @@
(preprocess (preprocess
(pps bisect_ppx --conditional)) (pps bisect_ppx --conditional))
(modules (modules
LexerLib
LexerUnit LexerUnit
ParserUnit ParserUnit
ParserAPI ParserAPI

View File

@ -1 +1 @@
const a: string = -0x222; const a: string = - (**) 0x2222

View File

@ -1 +1 @@
let a = -0x222; let a = - (**) 0x2222

View File

@ -1 +1 @@
let a = -0x222; let a = - /**/ 0x2222;

View File

@ -1 +0,0 @@
let arguments = 1;

View File

@ -5,7 +5,7 @@ module Region = Simple_utils.Region
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") let options = EvalOpt.read ".ligo" (* No comments allowed *)
let lex in_chan = let lex in_chan =
let buffer = Lexing.from_channel in_chan in let buffer = Lexing.from_channel in_chan in

View File

@ -5,7 +5,7 @@ module Region = Simple_utils.Region
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") let options = EvalOpt.read ".ligo" (* No comments allowed *)
let parse in_chan = let parse in_chan =
let buffer = Lexing.from_channel in_chan in let buffer = Lexing.from_channel in_chan in

View File

@ -2,29 +2,33 @@
(* The type [options] gathers the command-line options. *) (* The type [options] gathers the command-line options. *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
let lang_to_string = function
`PascaLIGO -> "PascaLIGO"
| `CameLIGO -> "CameLIGO"
| `ReasonLIGO -> "ReasonLIGO"
module SSet = Set.Make (String) module SSet = Set.Make (String)
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
let mk_block ~opening ~closing : block_comment =
object
method opening = opening
method closing = closing
end
type options = < type options = <
input : string option; input : string option;
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : block_comment option;
ext : string (* ".ligo", ".mligo", ".religo" *) line : line_comment option;
ext : string
> >
let make ~input ~libs ~lang ~offsets ~verbose ~ext : options = let make ~input ~libs ?block ?line ~offsets ~verbose ~ext : options =
object object
method input = input method input = input
method libs = libs method libs = libs
method lang = lang method block = block
method line = line
method offsets = offsets method offsets = offsets
method verbose = verbose method verbose = verbose
method ext = ext method ext = ext
@ -47,10 +51,10 @@ let abort msg =
(* Help *) (* Help *)
let help lang ext () = let help ext () =
let file = Filename.basename Sys.argv.(0) in let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file ext; printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file ext;
printf "where <input>%s is the %s source file (default: stdin),\n" ext lang; printf "where <input>%s is the LIGO source file (default: stdin),\n" ext;
print "and each <option> (if any) is one of the following:"; print "and each <option> (if any) is one of the following:";
print " -I <paths> Inclusion paths (colon-separated)"; print " -I <paths> Inclusion paths (colon-separated)";
print " --columns Columns for source locations"; print " --columns Columns for source locations";
@ -74,11 +78,10 @@ let add_verbose d =
verbose := List.fold_left (fun x y -> SSet.add y x) verbose := List.fold_left (fun x y -> SSet.add y x)
!verbose !verbose
(split_at_colon d) (split_at_colon d)
let specs lang ext = let specs ext =
let lang_str = lang_to_string lang in let open! Getopt in [
let open!Getopt in [
'I', nolong, None, Some add_path; 'I', nolong, None, Some add_path;
'h', "help", Some (help lang_str ext), None; 'h', "help", Some (help ext), None;
noshort, "columns", set columns true, None; noshort, "columns", set columns true, None;
noshort, "verbose", None, Some add_verbose noshort, "verbose", None, Some add_verbose
] ]
@ -92,7 +95,7 @@ let anonymous arg =
(* Checking options and exporting them as non-mutable values *) (* Checking options and exporting them as non-mutable values *)
let check lang ext = let check ?block ?line ~ext =
let libs = !libs let libs = !libs
and offsets = not !columns and offsets = not !columns
@ -109,16 +112,18 @@ let check lang ext =
else abort "Source file not found." else abort "Source file not found."
else abort ("Source file lacks the extension " ^ ext ^ ".") else abort ("Source file lacks the extension " ^ ext ^ ".")
in make ~input ~libs ~lang ~offsets ~verbose ~ext in make ~input ~libs ?block ?line ~offsets ~verbose ~ext
(* Parsing the command-line options *) (* Parsing the command-line options *)
let read ~lang:(lang : language) ~ext:(ext : string) = type extension = string
let read ?block ?line (ext: extension) =
try try
Getopt.parse_cmdline (specs lang ext) anonymous; Getopt.parse_cmdline (specs ext) anonymous;
(verb_str := (verb_str :=
let apply e a = let apply e a =
if a = "" then e else sprintf "%s, %s" e a if a = "" then e else sprintf "%s, %s" e a
in SSet.fold apply !verbose ""); in SSet.fold apply !verbose "");
check lang ext check ?block ?line ~ext
with Getopt.Error msg -> abort msg with Getopt.Error msg -> abort msg

View File

@ -2,25 +2,28 @@
(* The type [options] gathers the command-line options. *) (* The type [options] gathers the command-line options. *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val lang_to_string : language -> string
module SSet : Set.S with type elt = string and type t = Set.Make(String).t module SSet : Set.S with type elt = string and type t = Set.Make(String).t
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
val mk_block : opening:string -> closing:string -> block_comment
type options = < type options = <
input : string option; input : string option;
libs : string list; libs : string list;
verbose : SSet.t; verbose : SSet.t;
offsets : bool; offsets : bool;
lang : language; block : block_comment option;
ext : string (* ".ligo", ".mligo", ".religo" *) line : line_comment option;
ext : string
> >
val make : val make :
input:string option -> input:string option ->
libs:string list -> libs:string list ->
lang:language -> ?block:block_comment ->
?line:line_comment ->
offsets:bool -> offsets:bool ->
verbose:SSet.t -> verbose:SSet.t ->
ext:string -> ext:string ->
@ -30,4 +33,7 @@ val make :
the name of the concrete syntax. This is needed to correctly handle the name of the concrete syntax. This is needed to correctly handle
comments. *) comments. *)
val read : lang:language -> ext:string -> options type extension = string
val read :
?block:block_comment -> ?line:line_comment -> extension -> options

View File

@ -15,7 +15,7 @@ type error =
| No_line_indicator | No_line_indicator
| End_line_indicator | End_line_indicator
| Newline_in_string (* For #include argument only *) | Newline_in_string (* For #include argument only *)
| Open_string (* For #include argument only *) | Unterminated_string (* For #include argument only *)
| Dangling_endif | Dangling_endif
| Open_region_in_conditional | Open_region_in_conditional
| Dangling_endregion | Dangling_endregion
@ -29,10 +29,10 @@ type error =
| Multiply_defined_symbol of string | Multiply_defined_symbol of string
| Error_directive of string | Error_directive of string
| Parse_error | Parse_error
| No_line_comment_or_blank
| Invalid_symbol | Invalid_symbol
| File_not_found of string | File_not_found of string
| Invalid_character of char | Invalid_character of char
| Unterminated_comment of string
val format : val format :
?offsets:bool -> error Region.reg -> file:bool -> string Region.reg ?offsets:bool -> error Region.reg -> file:bool -> string Region.reg

View File

@ -44,19 +44,6 @@ type mode = Copy | Skip
type cond = If of mode | Elif of mode | Else | Region type cond = If of mode | Elif of mode | Else | Region
type trace = cond list type trace = cond list
(* Line offsets
The value [Inline] of type [offset] means that the current location
cannot be reached from the start of the line with only white
space. The same holds for the special value [Prefix 0]. Values of
the form [Prefix n] mean that the current location can be reached
from the start of the line with [n] white spaces (padding). These
distinctions are needed because preprocessor directives cannot
occur inside lines.
*)
type offset = Prefix of int | Inline
(* Environments *) (* Environments *)
module Env = Set.Make (String) module Env = Set.Make (String)
@ -78,8 +65,6 @@ in function
* the field [env] records the symbols defined; * the field [env] records the symbols defined;
* the field [mode] informs whether the preprocessor is in copying or * the field [mode] informs whether the preprocessor is in copying or
skipping mode; skipping mode;
* the field [offset] tells whether the current location can be
reached from the start of the line with only white space;
* the field [trace] is a stack of previous, still active conditional * the field [trace] is a stack of previous, still active conditional
directives; directives;
* the field [out] keeps the output buffer; * the field [out] keeps the output buffer;
@ -92,7 +77,6 @@ in function
type state = { type state = {
env : Env.t; env : Env.t;
mode : mode; mode : mode;
offset : offset;
trace : trace; trace : trace;
out : Buffer.t; out : Buffer.t;
incl : in_channel list; incl : in_channel list;
@ -117,7 +101,7 @@ type error =
| No_line_indicator | No_line_indicator
| End_line_indicator | End_line_indicator
| Newline_in_string | Newline_in_string
| Open_string | Unterminated_string
| Dangling_endif | Dangling_endif
| Open_region_in_conditional | Open_region_in_conditional
| Dangling_endregion | Dangling_endregion
@ -131,10 +115,10 @@ type error =
| Multiply_defined_symbol of string | Multiply_defined_symbol of string
| Error_directive of string | Error_directive of string
| Parse_error | Parse_error
| No_line_comment_or_blank
| Invalid_symbol | Invalid_symbol
| File_not_found of string | File_not_found of string
| Invalid_character of char | Invalid_character of char
| Unterminated_comment of string
let error_to_string = function let error_to_string = function
Directive_inside_line -> Directive_inside_line ->
@ -151,7 +135,7 @@ let error_to_string = function
Hint: Try a string, end of line, or a line comment." Hint: Try a string, end of line, or a line comment."
| Newline_in_string -> | Newline_in_string ->
sprintf "Invalid newline character in string." sprintf "Invalid newline character in string."
| Open_string -> | Unterminated_string ->
sprintf "Unterminated string.\n\ sprintf "Unterminated string.\n\
Hint: Close with double quotes." Hint: Close with double quotes."
| Dangling_endif -> | Dangling_endif ->
@ -187,14 +171,15 @@ let error_to_string = function
msg msg
| Parse_error -> | Parse_error ->
"Parse error in expression." "Parse error in expression."
| No_line_comment_or_blank ->
"Line comment or whitespace expected."
| Invalid_symbol -> | Invalid_symbol ->
"Expected a symbol (identifier)." "Expected a symbol (identifier)."
| File_not_found name -> | File_not_found name ->
sprintf "File \"%s\" to include not found." name sprintf "File \"%s\" to include not found." name
| Invalid_character c -> | Invalid_character c ->
E_Lexer.error_to_string (E_Lexer.Invalid_character c) E_Lexer.error_to_string (E_Lexer.Invalid_character c)
| Unterminated_comment ending ->
sprintf "Unterminated comment.\n\
Hint: Close with \"%s\"." ending
let format ?(offsets=true) Region.{region; value} ~file = let format ?(offsets=true) Region.{region; value} ~file =
let msg = error_to_string value let msg = error_to_string value
@ -224,7 +209,7 @@ let fail error state buffer = stop error state (mk_reg buffer)
let reduce_cond state region = let reduce_cond state region =
let rec reduce = function let rec reduce = function
[] -> stop Dangling_endif state region [] -> stop Dangling_endif state region
| If mode::trace -> {state with mode; trace; offset = Prefix 0} | If mode::trace -> {state with mode; trace}
| Region::_ -> stop Open_region_in_conditional state region | Region::_ -> stop Open_region_in_conditional state region
| _::trace -> reduce trace | _::trace -> reduce trace
in reduce state.trace in reduce state.trace
@ -235,7 +220,7 @@ let reduce_cond state region =
let reduce_region state region = let reduce_region state region =
match state.trace with match state.trace with
[] -> stop Dangling_endregion state region [] -> stop Dangling_endregion state region
| Region::trace -> {state with trace; offset = Prefix 0} | Region::trace -> {state with trace}
| _ -> stop Conditional_in_region state region | _ -> stop Conditional_in_region state region
(* The function [extend] is called when encountering conditional (* The function [extend] is called when encountering conditional
@ -286,7 +271,7 @@ let find dir file libs =
let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer) let copy state buffer = Buffer.add_string state.out (Lexing.lexeme buffer)
(* End of lines *) (* End of lines are always copied *)
let proc_nl state buffer = Lexing.new_line buffer; copy state buffer let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
@ -294,13 +279,6 @@ let proc_nl state buffer = Lexing.new_line buffer; copy state buffer
let print state string = Buffer.add_string state.out string let print state string = Buffer.add_string state.out string
(* Expanding the offset into whitespace *)
let expand_offset state =
match state.offset with
Prefix 0 | Inline -> ()
| Prefix n -> print state (String.make n ' ')
(* Evaluating a preprocessor expression (* Evaluating a preprocessor expression
The evaluation of conditional directives may involve symbols whose The evaluation of conditional directives may involve symbols whose
@ -346,6 +324,35 @@ let letter = small | capital
let ident = letter (letter | '_' | digit)* let ident = letter (letter | '_' | digit)*
let directive = '#' (blank* as space) (small+ as id) let directive = '#' (blank* as space) (small+ as id)
(* Comments *)
let pascaligo_block_comment_opening = "(*"
let pascaligo_block_comment_closing = "*)"
let pascaligo_line_comment = "//"
let cameligo_block_comment_opening = "(*"
let cameligo_block_comment_closing = "*)"
let cameligo_line_comment = "//"
let reasonligo_block_comment_opening = "/*"
let reasonligo_block_comment_closing = "*/"
let reasonligo_line_comment = "//"
let block_comment_openings =
pascaligo_block_comment_opening
| cameligo_block_comment_opening
| reasonligo_block_comment_opening
let block_comment_closings =
pascaligo_block_comment_closing
| cameligo_block_comment_closing
| reasonligo_block_comment_closing
let line_comments =
pascaligo_line_comment
| cameligo_line_comment
| reasonligo_line_comment
(* Rules *) (* Rules *)
(* The rule [scan] scans the input buffer for directives, strings, (* The rule [scan] scans the input buffer for directives, strings,
@ -354,19 +361,17 @@ let directive = '#' (blank* as space) (small+ as id)
depending on the compilation directives. If not copied, new line depending on the compilation directives. If not copied, new line
characters are output. characters are output.
Scanning is triggered by the function call [scan env mode offset Scanning is triggered by the function call [scan env mode trace
trace lexbuf], where [env] is the set of defined symbols lexbuf], where [env] is the set of defined symbols (introduced by
(introduced by `#define'), [mode] specifies whether we are copying `#define'), [mode] specifies whether we are copying or skipping the
or skipping the input, [offset] informs about the location in the input, and [trace] is the stack of conditional directives read so
line (either there is a prefix of blanks, or at least a non-blank far.
character has been read), and [trace] is the stack of conditional
directives read so far.
The first call is [scan {env=Env.empty; mode=Copy; offset = Prefix The first call is [scan {env=Env.empty; mode=Copy; trace=[];
0; trace=[]; incl=[]; opt}], meaning that we start with an empty incl=[]; opt}], meaning that we start with an empty environment,
environment, that copying the input is enabled by default, and that that copying the input is enabled by default, and that we are at
we are at the start of a line and no previous conditional the start of a line and no previous conditional directives have
directives have been read yet. The field [opt] is the CLI options. been read yet. The field [opt] is the CLI options.
When an "#if" is matched, the trace is extended by the call [extend When an "#if" is matched, the trace is extended by the call [extend
lexbuf (If mode) trace], during the evaluation of which the lexbuf (If mode) trace], during the evaluation of which the
@ -386,12 +391,11 @@ let directive = '#' (blank* as space) (small+ as id)
value of the conditional expression must be ignored (but not its value of the conditional expression must be ignored (but not its
syntax), and we continue skipping the input. syntax), and we continue skipping the input.
When an "#else" is matched, the trace is extended with [Else], When an "#else" is matched, the trace is extended with [Else], then
then, if the directive is not at a wrong offset, the rest of the the rest of the line is scanned with [skip_line]. If we were in
line is scanned with [skip_line]. If we were in copy mode, the new copy mode, the new mode toggles to skipping mode; otherwise, the
mode toggles to skipping mode; otherwise, the trace is searched for trace is searched for the last encountered "#if" of "#elif" and the
the last encountered "#if" of "#elif" and the associated mode is associated mode is restored.
restored.
The case "#elif" is the result of the fusion (in the technical The case "#elif" is the result of the fusion (in the technical
sense) of the code for dealing with an "#else" followed by an sense) of the code for dealing with an "#else" followed by an
@ -465,28 +469,23 @@ let directive = '#' (blank* as space) (small+ as id)
Important note: Comments and strings are recognised as such only in Important note: Comments and strings are recognised as such only in
copy mode, which is a different behaviour from the preprocessor of copy mode, which is a different behaviour from the preprocessor of
GNU GCC, which always does. GNU GCC, which always does.
*) *)
rule scan state = parse rule scan state = parse
nl { expand_offset state; proc_nl state lexbuf; nl { proc_nl state lexbuf; scan state lexbuf }
scan {state with offset = Prefix 0} lexbuf } | blank { if state.mode = Copy then copy state lexbuf;
| blank { match state.offset with
Prefix n ->
scan {state with offset = Prefix (n+1)} lexbuf
| Inline ->
if state.mode = Copy then copy state lexbuf;
scan state lexbuf } scan state lexbuf }
| directive { | directive {
let region = mk_reg lexbuf in
if not (List.mem id directives) if not (List.mem id directives)
then begin then begin
if state.mode = Copy then copy state lexbuf; if state.mode = Copy then copy state lexbuf;
scan state lexbuf scan state lexbuf
end end
else else
if state.offset = Inline if region#start#offset `Byte > 0
then fail Directive_inside_line state lexbuf then fail Directive_inside_line state lexbuf
else else
let region = mk_reg lexbuf in
match id with match id with
"include" -> "include" ->
let line = Lexing.(lexbuf.lex_curr_p.pos_lnum) let line = Lexing.(lexbuf.lex_curr_p.pos_lnum)
@ -517,7 +516,7 @@ rule scan state = parse
let mode = expr state lexbuf in let mode = expr state lexbuf in
let mode = if state.mode = Copy then mode else Skip in let mode = if state.mode = Copy then mode else Skip in
let trace = extend (If state.mode) state region in let trace = extend (If state.mode) state region in
let state = {state with mode; offset = Prefix 0; trace} let state = {state with mode; trace}
in scan state lexbuf in scan state lexbuf
| "else" -> | "else" ->
let () = skip_line state lexbuf in let () = skip_line state lexbuf in
@ -525,7 +524,7 @@ rule scan state = parse
Copy -> Skip Copy -> Skip
| Skip -> last_mode state.trace in | Skip -> last_mode state.trace in
let trace = extend Else state region let trace = extend Else state region
in scan {state with mode; offset = Prefix 0; trace} lexbuf in scan {state with mode; trace} lexbuf
| "elif" -> | "elif" ->
let mode = expr state lexbuf in let mode = expr state lexbuf in
let trace, mode = let trace, mode =
@ -534,7 +533,7 @@ rule scan state = parse
| Skip -> let old_mode = last_mode state.trace | Skip -> let old_mode = last_mode state.trace
in extend (Elif old_mode) state region, in extend (Elif old_mode) state region,
if old_mode = Copy then mode else Skip if old_mode = Copy then mode else Skip
in scan {state with mode; offset = Prefix 0; trace} lexbuf in scan {state with mode; trace} lexbuf
| "endif" -> | "endif" ->
skip_line state lexbuf; skip_line state lexbuf;
scan (reduce_cond state region) lexbuf scan (reduce_cond state region) lexbuf
@ -544,89 +543,81 @@ rule scan state = parse
then stop (Reserved_symbol id) state region; then stop (Reserved_symbol id) state region;
if Env.mem id state.env if Env.mem id state.env
then stop (Multiply_defined_symbol id) state region; then stop (Multiply_defined_symbol id) state region;
let state = {state with env = Env.add id state.env; let state = {state with env = Env.add id state.env}
offset = Prefix 0}
in scan state lexbuf in scan state lexbuf
| "undef" -> | "undef" ->
let id, _ = variable state lexbuf in let id, _ = variable state lexbuf in
let state = {state with env = Env.remove id state.env; let state = {state with env = Env.remove id state.env}
offset = Prefix 0}
in scan state lexbuf in scan state lexbuf
| "error" -> | "error" ->
stop (Error_directive (message [] lexbuf)) state region stop (Error_directive (message [] lexbuf)) state region
| "region" -> | "region" ->
let msg = message [] lexbuf let msg = message [] lexbuf
in expand_offset state; in print state ("#" ^ space ^ "region" ^ msg ^ "\n");
print state ("#" ^ space ^ "region" ^ msg ^ "\n"); let state = {state with trace=Region::state.trace}
let state =
{state with offset = Prefix 0; trace=Region::state.trace}
in scan state lexbuf in scan state lexbuf
| "endregion" -> | "endregion" ->
let msg = message [] lexbuf let msg = message [] lexbuf
in expand_offset state; in print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
print state ("#" ^ space ^ "endregion" ^ msg ^ "\n");
scan (reduce_region state region) lexbuf scan (reduce_region state region) lexbuf
(*
| "line" ->
expand_offset state;
print state ("#" ^ space ^ "line");
line_ind state lexbuf;
scan {state with offset = Prefix 0} lexbuf
| "warning" ->
let start_p, end_p = region in
let msg = message [] lexbuf in
let open Lexing
in prerr_endline
("Warning at line " ^ string_of_int start_p.pos_lnum
^ ", char "
^ string_of_int (start_p.pos_cnum - start_p.pos_bol)
^ "--" ^ string_of_int (end_p.pos_cnum - end_p.pos_bol)
^ ":\n" ^ msg);
scan env mode (Prefix 0) trace lexbuf
*)
| _ -> assert false | _ -> assert false
} }
| eof { match state.trace with
[] -> expand_offset state; state | eof { if state.trace = [] then state
| _ -> fail Missing_endif state lexbuf } else fail Missing_endif state lexbuf }
| '"' { if state.mode = Copy then | '"' { if state.mode = Copy then
begin begin
expand_offset state;
copy state lexbuf; copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf scan (in_string (mk_reg lexbuf) state lexbuf) lexbuf
end; end
scan {state with offset=Inline} lexbuf } else scan state lexbuf }
| "//" { if state.mode = Copy then
| block_comment_openings {
let lexeme = Lexing.lexeme lexbuf in
match state.opt#block with
Some block when block#opening = lexeme ->
if state.mode = Copy then
begin begin
expand_offset state;
copy state lexbuf; copy state lexbuf;
in_line_com state lexbuf let state = in_block block (mk_reg lexbuf) state lexbuf
end; in scan state lexbuf
scan {state with offset=Inline} lexbuf } end
| "/*" { if state.mode = Copy then else scan state lexbuf
| Some _ | None ->
let n = String.length lexeme in
begin
rollback lexbuf;
assert (n > 0);
scan (scan_n_char n state lexbuf) lexbuf
end }
| line_comments {
let lexeme = Lexing.lexeme lexbuf in
match state.opt#line with
Some line when line = lexeme ->
if state.mode = Copy then
begin begin
expand_offset state;
copy state lexbuf; copy state lexbuf;
if state.opt#lang = `ReasonLIGO then scan (in_line_com state lexbuf) lexbuf
reasonLIGO_com (mk_reg lexbuf) state lexbuf end
end; else scan state lexbuf
scan {state with offset=Inline} lexbuf } | Some _ | None ->
| "(*" { if state.mode = Copy then let n = String.length lexeme in
begin begin
expand_offset state; rollback lexbuf;
copy state lexbuf; assert (n > 0);
if state.opt#lang = `CameLIGO scan (scan_n_char n state lexbuf) lexbuf
|| state.opt#lang = `PascaLIGO then end }
cameLIGO_com (mk_reg lexbuf) state lexbuf
end; | _ { if state.mode = Copy then copy state lexbuf;
scan {state with offset=Inline} lexbuf } scan state lexbuf }
| _ { if state.mode = Copy then
begin (* Scanning a series of characters *)
expand_offset state;
copy state lexbuf and scan_n_char n state = parse
end; _ { if state.mode = Copy then copy state lexbuf;
scan {state with offset=Inline} lexbuf } if n = 1 then state else scan_n_char (n-1) state lexbuf }
(* Support for #define and #undef *) (* Support for #define and #undef *)
@ -638,47 +629,12 @@ and symbol state = parse
ident as id { id, mk_reg lexbuf } ident as id { id, mk_reg lexbuf }
| _ { fail Invalid_symbol state lexbuf } | _ { fail Invalid_symbol state lexbuf }
(*
(* Line indicator (#line) *)
and line_ind state = parse
blank* { copy state lexbuf; line_indicator state lexbuf }
and line_indicator state = parse
natural { copy state lexbuf; end_indicator state lexbuf }
| ident as id {
match id with
"default" | "hidden" ->
print state (id ^ message [] lexbuf)
| _ -> fail (Invalid_line_indicator id) state lexbuf }
| _ { fail No_line_indicator state lexbuf }
and end_indicator state = parse
blank+ { copy state lexbuf; end_indicator state lexbuf }
| nl { proc_nl state lexbuf }
| eof { copy state lexbuf }
| "//" { copy state lexbuf;
print state (message [] lexbuf ^ "\n") }
| '"' { copy state lexbuf;
in_string (mk_reg lexbuf) state lexbuf;
opt_line_com state lexbuf }
| _ { fail End_line_indicator state lexbuf }
and opt_line_com state = parse
nl { proc_nl state lexbuf }
| eof { copy state lexbuf }
| blank+ { copy state lexbuf; opt_line_com state lexbuf }
| "//" { print state ("//" ^ message [] lexbuf) }
*)
(* New lines and verbatim sequence of characters *) (* New lines and verbatim sequence of characters *)
and skip_line state = parse and skip_line state = parse
nl { proc_nl state lexbuf } nl { proc_nl state lexbuf }
| blank+ { skip_line state lexbuf } | blank+ { skip_line state lexbuf }
| "//" { in_line_com {state with mode=Skip} lexbuf } | _ { () }
| _ { fail No_line_comment_or_blank state lexbuf }
| eof { () }
and message acc = parse and message acc = parse
nl { Lexing.new_line lexbuf; nl { Lexing.new_line lexbuf;
@ -689,22 +645,41 @@ and message acc = parse
(* Comments *) (* Comments *)
and in_line_com state = parse and in_line_com state = parse
nl { proc_nl state lexbuf } nl { proc_nl state lexbuf; state }
| eof { () } | eof { state }
| _ { if state.mode = Copy then copy state lexbuf; | _ { if state.mode = Copy then copy state lexbuf;
in_line_com state lexbuf } in_line_com state lexbuf }
and reasonLIGO_com opening state = parse and in_block block opening state = parse
nl { proc_nl state lexbuf; reasonLIGO_com opening state lexbuf } '"' | block_comment_openings {
| "*/" { copy state lexbuf } let lexeme = Lexing.lexeme lexbuf in
| eof { () } if block#opening = lexeme || lexeme = "\""
| _ { copy state lexbuf; reasonLIGO_com opening state lexbuf } then let () = copy state lexbuf in
let opening' = mk_reg lexbuf in
let next = if lexeme = "\"" then in_string
else in_block block in
let state = next opening' state lexbuf
in in_block block opening state lexbuf
else let () = rollback lexbuf in
let n = String.length lexeme in
let () = assert (n > 0) in
let state = scan_n_char n state lexbuf
in in_block block opening state lexbuf }
and cameLIGO_com opening state = parse | block_comment_closings {
nl { proc_nl state lexbuf; cameLIGO_com opening state lexbuf } let lexeme = Lexing.lexeme lexbuf in
| "*)" { copy state lexbuf } if block#closing = lexeme
| eof { () } then (copy state lexbuf; state)
| _ { copy state lexbuf; cameLIGO_com opening state lexbuf } else let () = rollback lexbuf in
let n = String.length lexeme in
let () = assert (n > 0) in
let state = scan_n_char n state lexbuf
in in_block block opening state lexbuf }
| nl { proc_nl state lexbuf; in_block block opening state lexbuf }
| eof { let err = Unterminated_comment (block#closing)
in stop err state opening }
| _ { copy state lexbuf; in_block block opening state lexbuf }
(* Included filename *) (* Included filename *)
@ -717,15 +692,15 @@ and in_inclusion opening acc len state = parse
in Region.cover opening closing, in Region.cover opening closing,
mk_str len acc } mk_str len acc }
| nl { fail Newline_in_string state lexbuf } | nl { fail Newline_in_string state lexbuf }
| eof { stop Open_string state opening } | eof { stop Unterminated_string state opening }
| _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf } | _ as c { in_inclusion opening (c::acc) (len+1) state lexbuf }
(* Strings *) (* Strings *)
and in_string opening state = parse and in_string opening state = parse
"\\\"" { copy state lexbuf; in_string opening state lexbuf } "\\\"" { copy state lexbuf; in_string opening state lexbuf }
| '"' { copy state lexbuf } | '"' { copy state lexbuf; state }
| eof { () } | eof { state }
| _ { copy state lexbuf; in_string opening state lexbuf } | _ { copy state lexbuf; in_string opening state lexbuf }
and preproc state = parse and preproc state = parse
@ -750,7 +725,6 @@ let lex opt buffer =
let state = { let state = {
env = Env.empty; env = Env.empty;
mode = Copy; mode = Copy;
offset = Prefix 0;
trace = []; trace = [];
out = Buffer.create 80; out = Buffer.create 80;
incl = []; incl = [];

View File

@ -4,9 +4,12 @@ module Region = Simple_utils.Region
module Preproc = Preprocessor.Preproc module Preproc = Preprocessor.Preproc
module EvalOpt = Preprocessor.EvalOpt module EvalOpt = Preprocessor.EvalOpt
let highlight msg = Printf.eprintf "\027[31m%s\027[0m\n%!" msg let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo") let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
let preproc cin = let preproc cin =
let buffer = Lexing.from_channel cin in let buffer = Lexing.from_channel cin in

View File

@ -1,22 +0,0 @@
#!/bin/sh
set -x
ocamllex.opt E_Lexer.mll
ocamllex.opt Preproc.mll
menhir -la 1 E_Parser.mly
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c EvalOpt.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_AST.ml
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_LexerMain.ml
camlcmd="ocamlfind ocamlc -I _x86_64 -strict-sequence -w +A-48-4 "
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package getopt,str -c EvalOpt.ml
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_Lexer.ml
menhir --infer --ocamlc="$camlcmd" E_Parser.mly
ocamlfind ocamlc -strict-sequence -w +A-48-4 -c E_Parser.ml
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_LexerMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo E_LexerMain.cmo
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.mli
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c PreprocMain.ml
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c Preproc.ml
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o PreprocMain.byte EvalOpt.cmo E_AST.cmo E_Parser.cmo E_Lexer.cmo Preproc.cmo PreprocMain.cmo
ocamlfind ocamlc -strict-sequence -w +A-48-4 -package simple-utils -c E_ParserMain.ml
ocamlfind ocamlc -package getopt,simple-utils,str -linkpkg -o E_ParserMain.byte E_AST.cmo E_Parser.cmo E_Lexer.cmo EvalOpt.cmo Preproc.cmo E_ParserMain.cmo

View File

@ -1,4 +0,0 @@
#!/bin/sh
\rm -f *.cm* *.o *.byte *.opt
\rm E_Lexer.ml E_Parser.ml E_Parser.mli Preproc.ml