The preprocessor library depends now on the kinds of comments
instead of a closed set of languages. I also removed the offsets: I simply use the current region to determine whether the preprocessing directie starts at the beginning of a line. I also removed scanning line indicators, to make the lexer simpler. LexToken.mll: Moved the function [check_right_context] that checks stylistic constraints from Lexer.mll to LexToken.mll. While this triplicates code (as CameLIGO, PascaLIGO and ReasonLIGO share the same constraints), the benefit is that Lexer.mll becomes more generic and the signature for the TOKEN module is simpler (no more exporting predicates, except for EOF). In accordance with the change of the preprocessor, the lexers and parsers for LIGO now depend on the kind of comments, not a fixed set of syntaxes. This gives more versatility when adding a new language: only the kinds of its comments are needed, although Lexer.mll and Preproc.mll may have to be modified if they do not already know the comment delimiters, for example line comments starting with #. **************************************************************** BUG: The exceptions coming from LexToken.mll when a stylistic constraint is broken in [LexToken.check_right_context] are not caught yet. **************************************************************** Lexer.mll: I moved out as much as I could from the header into a new module LexerLib. The aim is to make it easy to reuse as much as possible of the lexer machinerie, when it cannot be used as is.
This commit is contained in:
parent
ce5464f9af
commit
f3777b4af8
@ -8,15 +8,14 @@ module SSet = Set.Make (String)
|
||||
|
||||
(* Mock IOs TODO: Fill them with CLI options *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SubIO =
|
||||
struct
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string; (* ".mligo" *)
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -24,22 +23,25 @@ module SubIO =
|
||||
>
|
||||
|
||||
let options : options =
|
||||
object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method lang = `CameLIGO
|
||||
method ext = ".mligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
|
||||
in object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method block = Some block
|
||||
method line = Some "//"
|
||||
method ext = ".mligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
|
||||
let make =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -156,11 +156,22 @@ val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
|
@ -480,81 +480,60 @@ let mk_attr header lexeme region =
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
let is_string = function String _ -> true | _ -> false
|
||||
let is_bytes = function Bytes _ -> true | _ -> false
|
||||
let is_int = function Int _ -> true | _ -> false
|
||||
let is_ident = function Ident _ -> true | _ -> false
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
(* Errors *)
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
type error =
|
||||
Odd_lengthed_bytes
|
||||
| Missing_break
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
let error_to_string = function
|
||||
Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit."
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space."
|
||||
|
||||
let is_kwd = function
|
||||
| Begin _
|
||||
| Else _
|
||||
| End _
|
||||
| False _
|
||||
| Fun _
|
||||
| Rec _
|
||||
| If _
|
||||
| In _
|
||||
| Let _
|
||||
| Match _
|
||||
| Mod _
|
||||
| Not _
|
||||
| Of _
|
||||
| Or _
|
||||
| Then _
|
||||
| True _
|
||||
| Type _
|
||||
| With _ -> true
|
||||
| _ -> false
|
||||
exception Error of error Region.reg
|
||||
|
||||
let is_constr = function
|
||||
| Constr _
|
||||
| Ident _
|
||||
| False _
|
||||
| True _ -> true
|
||||
| _ -> false
|
||||
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
and reg = region#to_string ~file ~offsets mode in
|
||||
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||
in Region.{value; region}
|
||||
|
||||
let is_sym = function
|
||||
| ARROW _
|
||||
| CONS _
|
||||
| CAT _
|
||||
| MINUS _
|
||||
| PLUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| COMMA _
|
||||
| SEMI _
|
||||
| VBAR _
|
||||
| COLON _
|
||||
| DOT _
|
||||
| WILD _
|
||||
| EQ _
|
||||
| NE _
|
||||
| LT _
|
||||
| GT _
|
||||
| LE _
|
||||
| GE _
|
||||
| BOOL_OR _
|
||||
| BOOL_AND _ -> true
|
||||
| _ -> false
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let check_right_context token next_token buffer : unit =
|
||||
if not (is_eof token) then
|
||||
if is_int token || is_bytes token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
if is_int next then
|
||||
fail region Odd_lengthed_bytes
|
||||
else
|
||||
if is_ident next || is_string next || is_bytes next then
|
||||
fail region Missing_break
|
||||
| Some (_::_, _) | None -> ()
|
||||
else
|
||||
if is_ident token || is_string token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
if is_ident next || is_string next
|
||||
|| is_bytes next || is_int next
|
||||
then
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos
|
||||
in fail region Missing_break
|
||||
| Some (_::_, _) | None -> ()
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".mligo"
|
||||
end
|
||||
|
||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
let () =
|
||||
match M.trace () with
|
||||
Stdlib.Ok () -> ()
|
||||
| Error Region.{value; _} -> Utils.highlight value
|
||||
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value
|
||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".mligo"
|
||||
end
|
||||
|
||||
module SubIO =
|
||||
@ -14,7 +17,8 @@ module SubIO =
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : EvalOpt.language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -26,7 +30,8 @@ module SubIO =
|
||||
method libs = IO.options#libs
|
||||
method verbose = IO.options#verbose
|
||||
method offsets = IO.options#offsets
|
||||
method lang = IO.options#lang
|
||||
method block = IO.options#block
|
||||
method line = IO.options#line
|
||||
method ext = IO.options#ext
|
||||
method mode = IO.options#mode
|
||||
method cmd = IO.options#cmd
|
||||
@ -37,7 +42,8 @@ module SubIO =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -8,15 +8,14 @@ module SSet = Set.Make (String)
|
||||
|
||||
(* Mock IOs TODO: Fill them with CLI options *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SubIO =
|
||||
struct
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string; (* ".ligo" *)
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -24,22 +23,25 @@ module SubIO =
|
||||
>
|
||||
|
||||
let options : options =
|
||||
object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method lang = `PascaLIGO
|
||||
method ext = ".ligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
|
||||
in object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method block = Some block
|
||||
method line = Some "//"
|
||||
method ext = ".ligo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
|
||||
let make =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
|
||||
|
||||
../shared/Lexer.mli
|
||||
../shared/Lexer.mll
|
||||
../shared/LexerLib.ml
|
||||
../shared/EvalOpt.ml
|
||||
../shared/EvalOpt.mli
|
||||
../shared/FQueue.ml
|
||||
@ -18,5 +19,6 @@ $HOME/git/OCaml-build/Makefile
|
||||
../shared/LexerUnit.ml
|
||||
../shared/ParserUnit.mli
|
||||
../shared/ParserUnit.ml
|
||||
../shared/LexerLib.ml
|
||||
|
||||
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml
|
||||
|
@ -163,11 +163,22 @@ val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
|
@ -483,17 +483,16 @@ type nat_err =
|
||||
| Non_canonical_zero_nat
|
||||
|
||||
let mk_nat lexeme region =
|
||||
match (String.index_opt lexeme 'n') with
|
||||
| None -> Error Invalid_natural
|
||||
| Some _ -> (
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme,z})
|
||||
)
|
||||
match String.index_opt lexeme 'n' with
|
||||
None -> Error Invalid_natural
|
||||
| Some _ ->
|
||||
let z =
|
||||
Str.(global_replace (regexp "_") "" lexeme) |>
|
||||
Str.(global_replace (regexp "n") "") |>
|
||||
Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0n"
|
||||
then Error Non_canonical_zero_nat
|
||||
else Ok (Nat Region.{region; value = lexeme,z})
|
||||
|
||||
let mk_mutez lexeme region =
|
||||
let z =
|
||||
@ -563,99 +562,60 @@ let mk_attr _header _string _region =
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
let is_string = function String _ -> true | _ -> false
|
||||
let is_bytes = function Bytes _ -> true | _ -> false
|
||||
let is_int = function Int _ -> true | _ -> false
|
||||
let is_ident = function Ident _ -> true | _ -> false
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
(* Errors *)
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
type error =
|
||||
Odd_lengthed_bytes
|
||||
| Missing_break
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
let error_to_string = function
|
||||
Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit."
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space."
|
||||
|
||||
let is_kwd = function
|
||||
And _
|
||||
| Attributes _
|
||||
| Begin _
|
||||
| BigMap _
|
||||
| Block _
|
||||
| Case _
|
||||
| Const _
|
||||
| Contains _
|
||||
| Else _
|
||||
| End _
|
||||
| False _
|
||||
| For _
|
||||
| From _
|
||||
| Function _
|
||||
| If _
|
||||
| In _
|
||||
| Is _
|
||||
| List _
|
||||
| Map _
|
||||
| Mod _
|
||||
| Nil _
|
||||
| Not _
|
||||
| Of _
|
||||
| Or _
|
||||
| Patch _
|
||||
| Record _
|
||||
| Remove _
|
||||
| Set _
|
||||
| Skip _
|
||||
| Step _
|
||||
| Then _
|
||||
| To _
|
||||
| True _
|
||||
| Type _
|
||||
| Unit _
|
||||
| Var _
|
||||
| While _
|
||||
| With _ -> true
|
||||
| _ -> false
|
||||
exception Error of error Region.reg
|
||||
|
||||
let is_constr = function
|
||||
Constr _
|
||||
| C_None _
|
||||
| C_Some _ -> true
|
||||
| _ -> false
|
||||
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
and reg = region#to_string ~file ~offsets mode in
|
||||
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||
in Region.{value; region}
|
||||
|
||||
let is_sym = function
|
||||
SEMI _
|
||||
| COMMA _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| CONS _
|
||||
| VBAR _
|
||||
| ARROW _
|
||||
| ASS _
|
||||
| EQ _
|
||||
| COLON _
|
||||
| LT _
|
||||
| LE _
|
||||
| GT _
|
||||
| GE _
|
||||
| NE _
|
||||
| PLUS _
|
||||
| MINUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| DOT _
|
||||
| WILD _
|
||||
| CAT _ -> true
|
||||
| _ -> false
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let check_right_context token next_token buffer : unit =
|
||||
if not (is_eof token) then
|
||||
if is_int token || is_bytes token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
if is_int next then
|
||||
fail region Odd_lengthed_bytes
|
||||
else
|
||||
if is_ident next || is_string next || is_bytes next then
|
||||
fail region Missing_break
|
||||
| Some (_::_, _) | None -> ()
|
||||
else
|
||||
if is_ident token || is_string token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
if is_ident next || is_string next
|
||||
|| is_bytes next || is_int next
|
||||
then
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos
|
||||
in fail region Missing_break
|
||||
| Some (_::_, _) | None -> ()
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".ligo"
|
||||
end
|
||||
|
||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"(*" ~closing:"*)"
|
||||
in read ~block ~line:"//" ".ligo"
|
||||
end
|
||||
|
||||
module SubIO =
|
||||
@ -14,7 +17,8 @@ module SubIO =
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : EvalOpt.language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -26,7 +30,8 @@ module SubIO =
|
||||
method libs = IO.options#libs
|
||||
method verbose = IO.options#verbose
|
||||
method offsets = IO.options#offsets
|
||||
method lang = IO.options#lang
|
||||
method block = IO.options#block
|
||||
method line = IO.options#line
|
||||
method ext = IO.options#ext
|
||||
method mode = IO.options#mode
|
||||
method cmd = IO.options#cmd
|
||||
@ -37,7 +42,8 @@ module SubIO =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -11,15 +11,14 @@ module SSet = Set.Make (String)
|
||||
|
||||
(* Mock IOs TODO: Fill them with CLI options *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SubIO =
|
||||
struct
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string; (* ".religo" *)
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -27,22 +26,25 @@ module SubIO =
|
||||
>
|
||||
|
||||
let options : options =
|
||||
object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method lang = `ReasonLIGO
|
||||
method ext = ".religo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/"
|
||||
in object
|
||||
method libs = []
|
||||
method verbose = SSet.empty
|
||||
method offsets = true
|
||||
method block = Some block
|
||||
method line = Some "//"
|
||||
method ext = ".religo"
|
||||
method mode = `Point
|
||||
method cmd = EvalOpt.Quiet
|
||||
method mono = false
|
||||
end
|
||||
|
||||
let make =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -154,11 +154,22 @@ val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
|
@ -460,75 +460,60 @@ let mk_attr header lexeme region =
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
let is_string = function String _ -> true | _ -> false
|
||||
let is_bytes = function Bytes _ -> true | _ -> false
|
||||
let is_int = function Int _ -> true | _ -> false
|
||||
let is_ident = function Ident _ -> true | _ -> false
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
(* Errors *)
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
type error =
|
||||
Odd_lengthed_bytes
|
||||
| Missing_break
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
let error_to_string = function
|
||||
Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit."
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space."
|
||||
|
||||
let is_kwd = function
|
||||
Else _
|
||||
| False _
|
||||
| If _
|
||||
| Let _
|
||||
| Rec _
|
||||
| Switch _
|
||||
| Mod _
|
||||
| Or _
|
||||
| True _
|
||||
| Type _ -> true
|
||||
| _ -> false
|
||||
exception Error of error Region.reg
|
||||
|
||||
let is_constr = function
|
||||
Constr _
|
||||
| Ident _
|
||||
| False _
|
||||
| True _ -> true
|
||||
| _ -> false
|
||||
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
||||
let msg = error_to_string value
|
||||
and reg = region#to_string ~file ~offsets mode in
|
||||
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
||||
in Region.{value; region}
|
||||
|
||||
let is_sym = function
|
||||
CAT _
|
||||
| MINUS _
|
||||
| PLUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| COMMA _
|
||||
| SEMI _
|
||||
| VBAR _
|
||||
| COLON _
|
||||
| DOT _
|
||||
| ELLIPSIS _
|
||||
| WILD _
|
||||
| EQ _
|
||||
| EQEQ _
|
||||
| NE _
|
||||
| LT _
|
||||
| GT _
|
||||
| LE _
|
||||
| GE _
|
||||
| ARROW _
|
||||
| BOOL_OR _
|
||||
| NOT _
|
||||
| BOOL_AND _ -> true
|
||||
| _ -> false
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
let check_right_context token next_token buffer : unit =
|
||||
if not (is_eof token) then
|
||||
if is_int token || is_bytes token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
if is_int next then
|
||||
fail region Odd_lengthed_bytes
|
||||
else
|
||||
if is_ident next || is_string next || is_bytes next then
|
||||
fail region Missing_break
|
||||
| Some (_::_, _) | None -> ()
|
||||
else
|
||||
if is_ident token || is_string token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
if is_ident next || is_string next
|
||||
|| is_bytes next || is_int next
|
||||
then
|
||||
let pos = (to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos
|
||||
in fail region Missing_break
|
||||
| Some (_::_, _) | None -> ()
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -4,7 +4,10 @@ module Region = Simple_utils.Region
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"/*" ~closing:"*/"
|
||||
in read ~block ~line:"//" ".religo"
|
||||
end
|
||||
|
||||
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
|
||||
let () =
|
||||
match M.trace () with
|
||||
Stdlib.Ok () -> ()
|
||||
| Error Region.{value; _} -> Utils.highlight value
|
||||
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value
|
||||
|
@ -40,11 +40,11 @@ let rec curry hd = function
|
||||
in TFun {value; region}
|
||||
| [] -> hd
|
||||
|
||||
let wild_error e =
|
||||
match e with
|
||||
| EVar { value = "_"; _} as e ->
|
||||
let wild_error e =
|
||||
match e with
|
||||
| EVar { value = "_"; _} as e ->
|
||||
let open! SyntaxError in
|
||||
raise (Error (InvalidWild e))
|
||||
raise (Error (InvalidWild e))
|
||||
| _ -> ()
|
||||
|
||||
(* END HEADER *)
|
||||
@ -270,30 +270,30 @@ let_declaration:
|
||||
|
||||
let_binding:
|
||||
"<ident>" type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Scoping.check_reserved_name $1;
|
||||
{binders = PVar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| "_" type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
{binders = PWild $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| unit type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
{binders = PUnit $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| record_pattern type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Scoping.check_pattern (PRecord $1);
|
||||
{binders = PRecord $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| par(closed_irrefutable) type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Scoping.check_pattern $1.value.inside;
|
||||
{binders = PPar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
|
||||
}
|
||||
| tuple(sub_irrefutable) type_annotation? "=" expr {
|
||||
wild_error $4;
|
||||
wild_error $4;
|
||||
Utils.nsepseq_iter Scoping.check_pattern $1;
|
||||
let hd, tl = $1 in
|
||||
let start = pattern_to_region hd in
|
||||
@ -422,7 +422,7 @@ expr:
|
||||
base_cond__open(expr) | switch_expr(base_cond) { $1 }
|
||||
|
||||
base_cond__open(x):
|
||||
base_expr(x) | conditional(expr_with_let_expr) {
|
||||
base_expr(x) | conditional(expr_with_let_expr) {
|
||||
wild_error $1;
|
||||
$1 }
|
||||
|
||||
@ -460,11 +460,11 @@ fun_expr:
|
||||
let region = cover start stop in
|
||||
|
||||
let rec arg_to_pattern = function
|
||||
EVar v ->
|
||||
EVar v ->
|
||||
if v.value = "_" then
|
||||
PWild v.region
|
||||
else (
|
||||
Scoping.check_reserved_name v;
|
||||
Scoping.check_reserved_name v;
|
||||
PVar v
|
||||
)
|
||||
| EAnnot {region; value = {inside = EVar v, colon, typ; _}} ->
|
||||
@ -895,7 +895,7 @@ path:
|
||||
| projection { Path $1 }
|
||||
|
||||
update_record:
|
||||
"{""..."path "," sep_or_term_list(field_path_assignment,",") "}" {
|
||||
"{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" {
|
||||
let region = cover $1 $6 in
|
||||
let ne_elements, terminator = $5 in
|
||||
let value = {
|
||||
@ -913,7 +913,7 @@ expr_with_let_expr:
|
||||
expr { $1 }
|
||||
| let_expr(expr_with_let_expr) { $1 }
|
||||
|
||||
more_field_assignments:
|
||||
more_field_assignments:
|
||||
"," sep_or_term_list(field_assignment_punning,",") {
|
||||
let elts, _region = $2 in
|
||||
$1, elts
|
||||
@ -926,9 +926,9 @@ sequence_or_record_in:
|
||||
PaSequence {s_elts = elts; s_terminator=None}
|
||||
}
|
||||
| field_assignment more_field_assignments? {
|
||||
match $2 with
|
||||
| Some (comma, elts) ->
|
||||
let r_elts = Utils.nsepseq_cons $1 comma elts in
|
||||
match $2 with
|
||||
| Some (comma, elts) ->
|
||||
let r_elts = Utils.nsepseq_cons $1 comma elts in
|
||||
PaRecord {r_elts; r_terminator = None}
|
||||
| None ->
|
||||
PaRecord {r_elts = ($1, []); r_terminator = None}
|
||||
@ -950,15 +950,15 @@ sequence_or_record:
|
||||
terminator = r.r_terminator}
|
||||
in ERecord {region; value}}
|
||||
|
||||
field_assignment_punning:
|
||||
(* This can only happen with multiple fields -
|
||||
field_assignment_punning:
|
||||
(* This can only happen with multiple fields -
|
||||
one item punning does NOT work in ReasonML *)
|
||||
field_name {
|
||||
field_name {
|
||||
let value = {
|
||||
field_name = $1;
|
||||
assignment = ghost;
|
||||
field_expr = EVar $1 }
|
||||
in
|
||||
in
|
||||
{$1 with value}
|
||||
}
|
||||
| field_assignment {
|
||||
|
@ -5,7 +5,10 @@ module SSet = Set.Make (String)
|
||||
|
||||
module IO =
|
||||
struct
|
||||
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
|
||||
let options =
|
||||
let open EvalOpt in
|
||||
let block = mk_block ~opening:"/*" ~closing:"*/"
|
||||
in read ~block ~line:"//" ".religo"
|
||||
end
|
||||
|
||||
module SubIO =
|
||||
@ -14,7 +17,8 @@ module SubIO =
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : EvalOpt.language;
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
@ -26,7 +30,8 @@ module SubIO =
|
||||
method libs = IO.options#libs
|
||||
method verbose = IO.options#verbose
|
||||
method offsets = IO.options#offsets
|
||||
method lang = IO.options#lang
|
||||
method block = IO.options#block
|
||||
method line = IO.options#line
|
||||
method ext = IO.options#ext
|
||||
method mode = IO.options#mode
|
||||
method cmd = IO.options#cmd
|
||||
@ -37,7 +42,8 @@ module SubIO =
|
||||
EvalOpt.make ~libs:options#libs
|
||||
~verbose:options#verbose
|
||||
~offsets:options#offsets
|
||||
~lang:options#lang
|
||||
?block:options#block
|
||||
?line:options#line
|
||||
~ext:options#ext
|
||||
~mode:options#mode
|
||||
~cmd:options#cmd
|
||||
|
@ -1,7 +1,2 @@
|
||||
$HOME/git/OCaml-build/Makefile
|
||||
$HOME/git/OCaml-build/Makefile.cfg
|
||||
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
|
||||
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml
|
||||
|
@ -5,37 +5,42 @@
|
||||
|
||||
type command = Quiet | Copy | Units | Tokens
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
let lang_to_string = function
|
||||
`PascaLIGO -> "PascaLIGO"
|
||||
| `CameLIGO -> "CameLIGO"
|
||||
| `ReasonLIGO -> "ReasonLIGO"
|
||||
|
||||
(* The type [options] gathers the command-line options. *)
|
||||
|
||||
module SSet = Set.Make (String)
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
let mk_block ~opening ~closing : block_comment =
|
||||
object
|
||||
method opening = opening
|
||||
method closing = closing
|
||||
end
|
||||
|
||||
type options = <
|
||||
input : string option;
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : command;
|
||||
mono : bool;
|
||||
expr : bool
|
||||
>
|
||||
|
||||
let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options =
|
||||
let make ~input ~libs ~verbose ~offsets ?block
|
||||
?line ~ext ~mode ~cmd ~mono ~expr : options =
|
||||
object
|
||||
method input = input
|
||||
method libs = libs
|
||||
method verbose = verbose
|
||||
method offsets = offsets
|
||||
method lang = lang
|
||||
method block = block
|
||||
method line = line
|
||||
method ext = ext
|
||||
method mode = mode
|
||||
method cmd = cmd
|
||||
@ -58,10 +63,10 @@ let abort msg =
|
||||
|
||||
(* Help *)
|
||||
|
||||
let help language extension () =
|
||||
let help extension () =
|
||||
let file = Filename.basename Sys.argv.(0) in
|
||||
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
|
||||
printf "where <input>%s is the %s source file (default: stdin),\n" extension language;
|
||||
printf "where <input>%s is the LIGO source file (default: stdin),\n" extension;
|
||||
print "and each <option> (if any) is one of the following:";
|
||||
print " -I <paths> Library paths (colon-separated)";
|
||||
print " -t, --tokens Print tokens";
|
||||
@ -105,8 +110,7 @@ let add_verbose d =
|
||||
!verbose
|
||||
(split_at_colon d)
|
||||
|
||||
let specs language extension =
|
||||
let language = lang_to_string language in
|
||||
let specs extension =
|
||||
let open! Getopt in [
|
||||
'I', nolong, None, Some add_path;
|
||||
'c', "copy", set copy true, None;
|
||||
@ -118,7 +122,7 @@ let specs language extension =
|
||||
noshort, "mono", set mono true, None;
|
||||
noshort, "expr", set expr true, None;
|
||||
noshort, "verbose", None, Some add_verbose;
|
||||
'h', "help", Some (help language extension), None;
|
||||
'h', "help", Some (help extension), None;
|
||||
noshort, "version", Some version, None
|
||||
]
|
||||
;;
|
||||
@ -156,7 +160,7 @@ let print_opt () =
|
||||
printf "input = %s\n" (string_of quote !input);
|
||||
printf "libs = %s\n" (string_of_path !libs)
|
||||
|
||||
let check lang ext =
|
||||
let check ?block ?line ~ext =
|
||||
let () =
|
||||
if SSet.mem "cli" !verbose then print_opt () in
|
||||
|
||||
@ -209,16 +213,19 @@ let check lang ext =
|
||||
| false, false, false, true -> Tokens
|
||||
| _ -> abort "Choose one of -q, -c, -u, -t."
|
||||
|
||||
in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono ~expr ~lang ~ext
|
||||
in make ~input ~libs ~verbose ~offsets ~mode
|
||||
~cmd ~mono ~expr ?block ?line ~ext
|
||||
|
||||
(* Parsing the command-line options *)
|
||||
|
||||
let read ~lang ~ext =
|
||||
type extension = string
|
||||
|
||||
let read ?block ?line (ext: extension) =
|
||||
try
|
||||
Getopt.parse_cmdline (specs lang ext) anonymous;
|
||||
Getopt.parse_cmdline (specs ext) anonymous;
|
||||
(verb_str :=
|
||||
let apply e a =
|
||||
if a = "" then e else Printf.sprintf "%s, %s" e a
|
||||
in SSet.fold apply !verbose "");
|
||||
check lang ext
|
||||
check ?block ?line ~ext
|
||||
with Getopt.Error msg -> abort msg
|
||||
|
@ -49,19 +49,21 @@ type command = Quiet | Copy | Units | Tokens
|
||||
expected.}
|
||||
} *)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
val lang_to_string : language -> string
|
||||
|
||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
val mk_block : opening:string -> closing:string -> block_comment
|
||||
|
||||
type options = <
|
||||
input : string option;
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : command;
|
||||
mono : bool;
|
||||
@ -73,7 +75,8 @@ val make :
|
||||
libs:string list ->
|
||||
verbose:SSet.t ->
|
||||
offsets:bool ->
|
||||
lang:language ->
|
||||
?block:block_comment ->
|
||||
?line:line_comment ->
|
||||
ext:string ->
|
||||
mode:[`Byte | `Point] ->
|
||||
cmd:command ->
|
||||
@ -81,8 +84,9 @@ val make :
|
||||
expr:bool ->
|
||||
options
|
||||
|
||||
(** Parsing the command-line options on stdin. The first parameter is
|
||||
the name of the concrete syntax, e.g., [PascaLIGO], and the second
|
||||
is the expected file extension, e.g., ".ligo". *)
|
||||
(** Parsing the command-line options on stdin. *)
|
||||
|
||||
val read : lang:language -> ext:string -> options
|
||||
type extension = string
|
||||
|
||||
val read :
|
||||
?block:block_comment -> ?line:line_comment -> extension -> options
|
||||
|
@ -82,13 +82,6 @@ module type TOKEN =
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Projections *)
|
||||
@ -96,73 +89,8 @@ module type TOKEN =
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
end
|
||||
|
||||
(* The module type for lexers is [S]. It mainly exports the function
|
||||
[open_token_stream], which returns
|
||||
|
||||
* a function [read] that extracts tokens from a lexing buffer,
|
||||
together with a lexing buffer [buffer] to read from,
|
||||
* a function [close] that closes that buffer,
|
||||
* a function [get_pos] that returns the current position, and
|
||||
* a function [get_last] that returns the region of the last
|
||||
recognised token.
|
||||
* a function [get_file] that returns the name of the file being scanned
|
||||
(empty string if [stdin]).
|
||||
|
||||
Note that a module [Token] is exported too, because the signature
|
||||
of the exported functions depend on it.
|
||||
|
||||
The call [read ~log] evaluates in a lexer (also known as a
|
||||
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
|
||||
suitable for a parser generated by Menhir. The argument labelled
|
||||
[log] is a logger, that is, it may print a token and its left
|
||||
markup to a given channel, at the caller's discretion.
|
||||
*)
|
||||
|
||||
module type S =
|
||||
sig
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
type file_path = string
|
||||
type logger = Markup.t list -> token -> unit
|
||||
|
||||
type window =
|
||||
Nil
|
||||
| One of token
|
||||
| Two of token * token
|
||||
|
||||
val slide : token -> window -> window
|
||||
|
||||
type input =
|
||||
File of file_path
|
||||
| String of string
|
||||
| Channel of in_channel
|
||||
| Buffer of Lexing.lexbuf
|
||||
|
||||
type instance = {
|
||||
input : input;
|
||||
read : log:logger -> Lexing.lexbuf -> token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_win : unit -> window;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
get_file : unit -> file_path;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
type open_err = File_opening of string
|
||||
|
||||
val lexbuf_from_input :
|
||||
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
val open_token_stream :
|
||||
language -> input -> (instance, open_err) Stdlib.result
|
||||
|
||||
(* Error reporting *)
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
@ -171,9 +99,17 @@ module type S =
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
error Region.reg -> file:bool -> string Region.reg
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
error Region.reg ->
|
||||
file:bool ->
|
||||
string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
end
|
||||
|
||||
(* The functorised interface
|
||||
@ -182,4 +118,4 @@ module type S =
|
||||
submodule in [S].
|
||||
*)
|
||||
|
||||
module Make (Token: TOKEN) : S with module Token = Token
|
||||
module Make (Token: TOKEN) : LexerLib.S with module Token = Token
|
||||
|
File diff suppressed because it is too large
Load Diff
451
src/passes/1-parser/shared/LexerLib.ml
Normal file
451
src/passes/1-parser/shared/LexerLib.ml
Normal file
@ -0,0 +1,451 @@
|
||||
(* Sliding window *)
|
||||
|
||||
(* The type [window] models a two-slot buffer of tokens for reporting
|
||||
after a parse error. Technically, it is a parametric type, but its
|
||||
use is meant for tokens, wherever they are defined.
|
||||
|
||||
In [Two(t1,t2)], the token [t2] is the next to be sent to the
|
||||
parser.
|
||||
|
||||
The call [slide token buffer] pushes the token [token] in the
|
||||
buffer [buffer]. If the buffer is full, that is, it is
|
||||
[Two(t1,t2)], then the token [t2] is discarded to make room for
|
||||
[token].
|
||||
*)
|
||||
|
||||
type 'a window =
|
||||
Nil
|
||||
| One of 'a
|
||||
| Two of 'a * 'a
|
||||
|
||||
let slide token = function
|
||||
Nil -> One token
|
||||
| One t | Two (t,_) -> Two (token,t)
|
||||
|
||||
module Region = Simple_utils.Region
|
||||
module Pos = Simple_utils.Pos
|
||||
|
||||
type lexeme = string
|
||||
|
||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||
can be a functor over tokens. This enables to externalise
|
||||
version-dependent constraints in any module whose signature matches
|
||||
[TOKEN]. Generic functions to construct tokens are required.
|
||||
|
||||
Note the predicate [is_eof], which caracterises the virtual token
|
||||
for end-of-file, because it requires special handling.
|
||||
*)
|
||||
|
||||
module type TOKEN =
|
||||
sig
|
||||
type token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Projections *)
|
||||
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
|
||||
(* Style *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
error Region.reg ->
|
||||
file:bool ->
|
||||
string Region.reg
|
||||
|
||||
val check_right_context :
|
||||
token ->
|
||||
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
||||
Lexing.lexbuf ->
|
||||
unit
|
||||
end
|
||||
|
||||
(* The module type for lexers is [S]. It mainly exports the function
|
||||
[open_token_stream], which returns
|
||||
|
||||
* a function [read] that extracts tokens from a lexing buffer,
|
||||
together with a lexing buffer [buffer] to read from,
|
||||
* a function [close] that closes that buffer,
|
||||
* a function [get_pos] that returns the current position, and
|
||||
* a function [get_last] that returns the region of the last
|
||||
recognised token.
|
||||
* a function [get_file] that returns the name of the file being
|
||||
scanned (empty string if [stdin]).
|
||||
|
||||
Note that a module [Token] is exported too, because the signature
|
||||
of the exported functions depend on it.
|
||||
|
||||
The type [window] is a two-token window, that is, a buffer that
|
||||
contains the last recognised token, and the penultimate (if any).
|
||||
|
||||
The call [read ~log] evaluates in a lexer (also known as a
|
||||
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
|
||||
suitable for a parser generated by Menhir. The argument labelled
|
||||
[log] is a logger, that is, it may print a token and its left
|
||||
markup to a given channel, at the caller's discretion.
|
||||
*)
|
||||
|
||||
module type S =
|
||||
sig
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
type file_path = string
|
||||
type logger = Markup.t list -> token -> unit
|
||||
|
||||
type input =
|
||||
File of file_path
|
||||
| String of string
|
||||
| Channel of in_channel
|
||||
| Buffer of Lexing.lexbuf
|
||||
|
||||
type instance = {
|
||||
input : input;
|
||||
read : log:logger -> Lexing.lexbuf -> token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_win : unit -> token window;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
get_file : unit -> file_path;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
type open_err = File_opening of string
|
||||
|
||||
val lexbuf_from_input :
|
||||
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
val mk_block : opening:string -> closing:string -> block_comment
|
||||
|
||||
val open_token_stream :
|
||||
?line:line_comment ->
|
||||
?block:block_comment ->
|
||||
input ->
|
||||
(instance, open_err) Stdlib.result
|
||||
|
||||
(* Error reporting *)
|
||||
|
||||
type error
|
||||
|
||||
val error_to_string : error -> string
|
||||
|
||||
exception Error of error Region.reg
|
||||
|
||||
val format_error :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
error Region.reg ->
|
||||
file:bool ->
|
||||
string Region.reg
|
||||
end
|
||||
|
||||
(* LEXER ENGINE *)
|
||||
|
||||
(* Resetting file name and line number in the lexing buffer
|
||||
|
||||
The call [reset ~file ~line buffer] modifies in-place the lexing
|
||||
buffer [buffer] so the lexing engine records that the file
|
||||
associated with [buffer] is named [file], and the current line is
|
||||
[line]. This function is useful when lexing a file that has been
|
||||
previously preprocessed by the C preprocessor, in which case the
|
||||
argument [file] is the name of the file that was preprocessed,
|
||||
_not_ the preprocessed file (of which the user is not normally
|
||||
aware). By default, the [line] argument is [1].
|
||||
*)
|
||||
|
||||
let reset_file ~file buffer =
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||
|
||||
let reset_line ~line buffer =
|
||||
assert (line >= 0);
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
|
||||
|
||||
let reset_offset ~offset buffer =
|
||||
assert (offset >= 0);
|
||||
let open Lexing in
|
||||
let bol = buffer.lex_curr_p.pos_bol in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
|
||||
|
||||
let reset ?file ?line ?offset buffer =
|
||||
let () =
|
||||
match file with
|
||||
Some file -> reset_file ~file buffer
|
||||
| None -> () in
|
||||
let () =
|
||||
match line with
|
||||
Some line -> reset_line ~line buffer
|
||||
| None -> () in
|
||||
match offset with
|
||||
Some offset -> reset_offset ~offset buffer
|
||||
| None -> ()
|
||||
|
||||
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||
|
||||
let rollback buffer =
|
||||
let open Lexing in
|
||||
let len = String.length (lexeme buffer) in
|
||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||
|
||||
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||
|
||||
(* When scanning structured constructs, like strings and comments, we
|
||||
need to keep the region of the opening symbol (like double quote,
|
||||
"//" or "(*") in order to report any error more precisely. Since
|
||||
ocamllex is byte-oriented, we need to store the parsed bytes as
|
||||
characters in an accumulator [acc] and also its length [len], so,
|
||||
we are done, it is easy to build the string making up the
|
||||
structured construct with [mk_str] (see above).
|
||||
|
||||
The resulting data structure is called a _thread_. (Note for
|
||||
Emacs: "*)".)
|
||||
*)
|
||||
|
||||
type thread = <
|
||||
opening : Region.t;
|
||||
length : int;
|
||||
acc : char list;
|
||||
push_char : char -> thread;
|
||||
push_string : string -> thread;
|
||||
to_string : string;
|
||||
set_opening : Region.t -> thread
|
||||
>
|
||||
|
||||
let mk_thread region lexeme : thread =
|
||||
(* The call [explode s a] is the list made by pushing the characters
|
||||
in the string [s] on top of [a], in reverse order. For example,
|
||||
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
||||
let explode s acc =
|
||||
let rec push = function
|
||||
0 -> acc
|
||||
| i -> s.[i-1] :: push (i-1)
|
||||
in push (String.length s)
|
||||
in
|
||||
object
|
||||
val opening = region
|
||||
method opening = opening
|
||||
|
||||
val length = String.length lexeme
|
||||
method length = length
|
||||
|
||||
val acc = explode lexeme []
|
||||
method acc = acc
|
||||
|
||||
method set_opening opening = {< opening; length; acc >}
|
||||
|
||||
method push_char char =
|
||||
{< opening; length=length+1; acc=char::acc >}
|
||||
|
||||
method push_string str =
|
||||
{< opening;
|
||||
length = length + String.length str;
|
||||
acc = explode str acc >}
|
||||
|
||||
(* The value of [thread#to_string] is a string of length
|
||||
[thread#length] containing the [thread#length] characters in
|
||||
the list [thread#acc], in reverse order. For instance,
|
||||
[thread#to_string = "abc"] if [thread#length = 3] and
|
||||
[thread#acc = ['c';'b';'a']]. *)
|
||||
|
||||
method to_string =
|
||||
let bytes = Bytes.make length ' ' in
|
||||
let rec fill i = function
|
||||
[] -> bytes
|
||||
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
||||
in fill (length-1) acc |> Bytes.to_string
|
||||
end
|
||||
|
||||
type file_path = string
|
||||
|
||||
(* STATE *)
|
||||
|
||||
(* Beyond tokens, the result of lexing is a state. The type
|
||||
[state] represents the logical state of the lexing engine, that
|
||||
is, a value which is threaded during scanning and which denotes
|
||||
useful, high-level information beyond what the type
|
||||
[Lexing.lexbuf] in the standard library already provides for
|
||||
all generic lexers.
|
||||
|
||||
Tokens are the smallest units used by the parser to build the
|
||||
abstract syntax tree. The state includes a queue of recognised
|
||||
tokens, with the markup at the left of its lexeme until either
|
||||
the start of the file or the end of the previously recognised
|
||||
token.
|
||||
|
||||
The markup from the last recognised token or, if the first
|
||||
token has not been recognised yet, from the beginning of the
|
||||
file is stored in the field [markup] of the state because it is
|
||||
a side-effect, with respect to the output token list, and we
|
||||
use a record with a single field [units] because that record
|
||||
may be easily extended during the future maintenance of this
|
||||
lexer.
|
||||
|
||||
The state also includes a field [pos] which holds the current
|
||||
position in the LIGO source file. The position is not always
|
||||
updated after a single character has been matched: that depends
|
||||
on the regular expression that matched the lexing buffer.
|
||||
|
||||
The field [window] is a two-token window, that is, a buffer
|
||||
that contains the last recognised token, and the penultimate
|
||||
(if any).
|
||||
|
||||
The fields [decoder] and [supply] offer the support needed
|
||||
for the lexing of UTF-8 encoded characters in comments (the
|
||||
only place where they are allowed in LIGO). The former is the
|
||||
decoder proper and the latter is the effectful function
|
||||
[supply] that takes a byte, a start index and a length and feed
|
||||
it to [decoder]. See the documentation of the third-party
|
||||
library Uutf.
|
||||
*)
|
||||
|
||||
type line_comment = string (* Opening of a line comment *)
|
||||
type block_comment = <opening : string; closing : string>
|
||||
|
||||
let mk_block ~opening ~closing : block_comment =
|
||||
object
|
||||
method opening = opening
|
||||
method closing = closing
|
||||
end
|
||||
|
||||
type 'a state = <
|
||||
units : (Markup.t list * 'a) FQueue.t;
|
||||
markup : Markup.t list;
|
||||
window : 'a window;
|
||||
last : Region.t;
|
||||
pos : Pos.t;
|
||||
decoder : Uutf.decoder;
|
||||
supply : Bytes.t -> int -> int -> unit;
|
||||
block : block_comment option;
|
||||
line : line_comment option;
|
||||
|
||||
enqueue : 'a -> 'a state;
|
||||
set_units : (Markup.t list * 'a) FQueue.t -> 'a state;
|
||||
set_last : Region.t -> 'a state;
|
||||
set_pos : Pos.t -> 'a state;
|
||||
slide_token : 'a -> 'a state;
|
||||
|
||||
sync : Lexing.lexbuf -> Region.t * lexeme * 'a state;
|
||||
|
||||
push_newline : Lexing.lexbuf -> 'a state;
|
||||
push_line : thread -> 'a state;
|
||||
push_block : thread -> 'a state;
|
||||
push_space : Lexing.lexbuf -> 'a state;
|
||||
push_tabs : Lexing.lexbuf -> 'a state;
|
||||
push_bom : Lexing.lexbuf -> 'a state;
|
||||
push_markup : Markup.t -> 'a state;
|
||||
>
|
||||
|
||||
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
|
||||
?block ?line () : _ state =
|
||||
object (self)
|
||||
val units = units
|
||||
method units = units
|
||||
val markup = markup
|
||||
method markup = markup
|
||||
val window = window
|
||||
method window = window
|
||||
val last = last
|
||||
method last = last
|
||||
val pos = pos
|
||||
method pos = pos
|
||||
method decoder = decoder
|
||||
method supply = supply
|
||||
method block = block
|
||||
method line = line
|
||||
|
||||
(* The call [enqueue (token, state)] updates functionally the
|
||||
state [state] by associating the token [token] with the
|
||||
stored markup and enqueuing the pair into the units
|
||||
queue. The field [markup] is then reset to the empty
|
||||
list. *)
|
||||
|
||||
method enqueue token =
|
||||
{< units = FQueue.enq (markup, token) units;
|
||||
markup = [] >}
|
||||
|
||||
method set_units units = {< units = units >}
|
||||
method set_last region = {< last = region >}
|
||||
method set_pos pos = {< pos = pos >}
|
||||
|
||||
method slide_token token =
|
||||
{< window = slide token window >}
|
||||
|
||||
(* The call [sync state buffer] updates the current position
|
||||
in accordance with the contents of the lexing buffer, more
|
||||
precisely, depending on the length of the string which has
|
||||
just been recognised by the scanner: that length is used as
|
||||
a positive offset to the current column. *)
|
||||
|
||||
method sync buffer =
|
||||
let lex = Lexing.lexeme buffer in
|
||||
let len = String.length lex in
|
||||
let start = pos in
|
||||
let stop = start#shift_bytes len in
|
||||
let state = {< pos = stop >}
|
||||
in Region.make ~start ~stop, lex, state
|
||||
|
||||
(* MARKUP *)
|
||||
|
||||
(* Committing markup to the current logical state *)
|
||||
|
||||
method push_markup unit = {< markup = unit :: markup >}
|
||||
|
||||
method push_newline buffer =
|
||||
let () = Lexing.new_line buffer in
|
||||
let value = Lexing.lexeme buffer in
|
||||
let start = self#pos in
|
||||
let stop = start#new_line value in
|
||||
let region = Region.make ~start ~stop in
|
||||
let unit = Markup.Newline Region.{region; value}
|
||||
in {< pos = stop; markup = unit::markup >}
|
||||
|
||||
method push_line thread =
|
||||
let start = thread#opening#start in
|
||||
let region = Region.make ~start ~stop:self#pos
|
||||
and value = thread#to_string in
|
||||
let unit = Markup.LineCom Region.{region; value}
|
||||
in {< markup = unit::markup >}
|
||||
|
||||
method push_block thread =
|
||||
let start = thread#opening#start in
|
||||
let region = Region.make ~start ~stop:self#pos
|
||||
and value = thread#to_string in
|
||||
let unit = Markup.BlockCom Region.{region; value}
|
||||
in {< markup = unit::markup >}
|
||||
|
||||
method push_space buffer =
|
||||
let region, lex, state = self#sync buffer in
|
||||
let value = String.length lex in
|
||||
let unit = Markup.Space Region.{region; value}
|
||||
in state#push_markup unit
|
||||
|
||||
method push_tabs buffer =
|
||||
let region, lex, state = self#sync buffer in
|
||||
let value = String.length lex in
|
||||
let unit = Markup.Tabs Region.{region; value}
|
||||
in state#push_markup unit
|
||||
|
||||
method push_bom buffer =
|
||||
let region, value, state = self#sync buffer in
|
||||
let unit = Markup.BOM Region.{region; value}
|
||||
in state#push_markup unit
|
||||
|
||||
end
|
@ -4,7 +4,7 @@ module Region = Simple_utils.Region
|
||||
|
||||
module type S =
|
||||
sig
|
||||
module Lexer : Lexer.S
|
||||
module Lexer : LexerLib.S
|
||||
|
||||
val output_token :
|
||||
?offsets:bool ->
|
||||
@ -20,13 +20,14 @@ module type S =
|
||||
val trace :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
EvalOpt.language ->
|
||||
?block:EvalOpt.block_comment ->
|
||||
?line:EvalOpt.line_comment ->
|
||||
Lexer.input ->
|
||||
EvalOpt.command ->
|
||||
(unit, string Region.reg) Stdlib.result
|
||||
end
|
||||
|
||||
module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||
module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
|
||||
struct
|
||||
module Lexer = Lexer
|
||||
module Token = Lexer.Token
|
||||
@ -56,9 +57,9 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||
|
||||
type file_path = string
|
||||
|
||||
let trace ?(offsets=true) mode lang input command :
|
||||
let trace ?(offsets=true) mode ?block ?line input command :
|
||||
(unit, string Region.reg) Stdlib.result =
|
||||
match Lexer.open_token_stream lang input with
|
||||
match Lexer.open_token_stream ?line ?block input with
|
||||
Ok Lexer.{read; buffer; close; _} ->
|
||||
let log = output_token ~offsets mode command stdout
|
||||
and close_all () = flush_all (); close () in
|
||||
|
@ -2,7 +2,7 @@ module Region = Simple_utils.Region
|
||||
|
||||
module type S =
|
||||
sig
|
||||
module Lexer : Lexer.S
|
||||
module Lexer : LexerLib.S
|
||||
|
||||
val output_token :
|
||||
?offsets:bool ->
|
||||
@ -18,10 +18,11 @@ module type S =
|
||||
val trace :
|
||||
?offsets:bool ->
|
||||
[`Byte | `Point] ->
|
||||
EvalOpt.language ->
|
||||
?block:EvalOpt.block_comment ->
|
||||
?line:EvalOpt.line_comment ->
|
||||
Lexer.input ->
|
||||
EvalOpt.command ->
|
||||
(unit, string Region.reg) Stdlib.result
|
||||
end
|
||||
|
||||
module Make (Lexer: Lexer.S) : S with module Lexer = Lexer
|
||||
module Make (Lexer: LexerLib.S) : S with module Lexer = Lexer
|
||||
|
@ -9,7 +9,7 @@ module type IO =
|
||||
val options : EvalOpt.options (* CLI options *)
|
||||
end
|
||||
|
||||
module Make (IO: IO) (Lexer: Lexer.S) =
|
||||
module Make (IO: IO) (Lexer: LexerLib.S) =
|
||||
struct
|
||||
(* Error printing and exception tracing *)
|
||||
|
||||
@ -40,30 +40,42 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
||||
(* Running the lexer on the preprocessed input *)
|
||||
|
||||
let source = Lexer.String (Buffer.contents pp_buffer) in
|
||||
match Lexer.open_token_stream IO.options#lang source with
|
||||
Ok Lexer.{read; buffer; close; _} ->
|
||||
let close_all () = flush_all (); close () in
|
||||
let rec read_tokens tokens =
|
||||
match read ~log:(fun _ _ -> ()) buffer with
|
||||
token ->
|
||||
if Lexer.Token.is_eof token
|
||||
then Stdlib.Ok (List.rev tokens)
|
||||
else read_tokens (token::tokens)
|
||||
| exception Lexer.Error error ->
|
||||
let file =
|
||||
match IO.options#input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let () =
|
||||
Printf.eprintf "[LexerUnit] file = %b\n%!" file in
|
||||
let msg =
|
||||
Lexer.format_error ~offsets:IO.options#offsets
|
||||
IO.options#mode ~file error
|
||||
in Stdlib.Error msg in
|
||||
let result = read_tokens []
|
||||
in close_all (); result
|
||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
||||
match Lexer.open_token_stream ?line:IO.options#line
|
||||
?block:IO.options#block
|
||||
source with
|
||||
Ok Lexer.{read; buffer; close; _} ->
|
||||
let close_all () = flush_all (); close () in
|
||||
let rec read_tokens tokens =
|
||||
match read ~log:(fun _ _ -> ()) buffer with
|
||||
token ->
|
||||
if Lexer.Token.is_eof token
|
||||
then Stdlib.Ok (List.rev tokens)
|
||||
else read_tokens (token::tokens)
|
||||
(* | exception _ ->
|
||||
Printf.eprintf "Here\n%!"; exit 1
|
||||
*) | exception Lexer.Token.Error error ->
|
||||
let file =
|
||||
match IO.options#input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let msg =
|
||||
Lexer.Token.format_error
|
||||
~offsets:IO.options#offsets
|
||||
IO.options#mode ~file error
|
||||
in Stdlib.Error msg
|
||||
| exception Lexer.Error error ->
|
||||
let file =
|
||||
match IO.options#input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let msg =
|
||||
Lexer.format_error ~offsets:IO.options#offsets
|
||||
IO.options#mode ~file error
|
||||
in Stdlib.Error msg in
|
||||
let result = read_tokens []
|
||||
in close_all (); result
|
||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
||||
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
|
||||
match IO.options#input with
|
||||
None -> preproc stdin
|
||||
| Some file_path ->
|
||||
@ -101,7 +113,8 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
||||
end
|
||||
else Log.trace ~offsets:IO.options#offsets
|
||||
IO.options#mode
|
||||
IO.options#lang
|
||||
?block:IO.options#block
|
||||
?line:IO.options#line
|
||||
(Lexer.String preproc_str)
|
||||
IO.options#cmd
|
||||
in match IO.options#input with
|
||||
|
@ -7,7 +7,7 @@ module type IO =
|
||||
val options : EvalOpt.options (* CLI options *)
|
||||
end
|
||||
|
||||
module Make (IO: IO) (Lexer: Lexer.S) :
|
||||
module Make (IO: IO) (Lexer: LexerLib.S) :
|
||||
sig
|
||||
val scan : unit -> (Lexer.token list, string Region.reg) Stdlib.result
|
||||
val trace : unit -> (unit, string Region.reg) Stdlib.result
|
||||
|
@ -56,7 +56,7 @@ module type PARSER =
|
||||
(* Main functor *)
|
||||
|
||||
module Make (IO: IO)
|
||||
(Lexer: Lexer.S)
|
||||
(Lexer: LexerLib.S)
|
||||
(Parser: PARSER with type token = Lexer.Token.token)
|
||||
(ParErr: sig val message : int -> string end) =
|
||||
struct
|
||||
@ -122,10 +122,10 @@ module Make (IO: IO)
|
||||
message
|
||||
in
|
||||
match get_win () with
|
||||
Lexer.Nil -> assert false
|
||||
| Lexer.One invalid ->
|
||||
LexerLib.Nil -> assert false
|
||||
| LexerLib.One invalid ->
|
||||
raise (Point (message, None, invalid))
|
||||
| Lexer.Two (invalid, valid) ->
|
||||
| LexerLib.Two (invalid, valid) ->
|
||||
raise (Point (message, Some valid, invalid))
|
||||
|
||||
(* The monolithic API of Menhir *)
|
||||
|
@ -55,7 +55,7 @@ module type PARSER =
|
||||
end
|
||||
|
||||
module Make (IO: IO)
|
||||
(Lexer: Lexer.S)
|
||||
(Lexer: LexerLib.S)
|
||||
(Parser: PARSER with type token = Lexer.Token.token)
|
||||
(ParErr: sig val message : int -> string end) :
|
||||
sig
|
||||
|
@ -4,16 +4,15 @@ module Region = Simple_utils.Region
|
||||
module Preproc = Preprocessor.Preproc
|
||||
module SSet = Set.Make (String)
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module type SubIO =
|
||||
sig
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
mono : bool
|
||||
@ -23,7 +22,7 @@ module type SubIO =
|
||||
val make : input:string option -> expr:bool -> EvalOpt.options
|
||||
end
|
||||
|
||||
module type Pretty =
|
||||
module type Printer =
|
||||
sig
|
||||
type state
|
||||
type ast
|
||||
@ -38,14 +37,14 @@ module type Pretty =
|
||||
val print_expr : state -> expr -> unit
|
||||
end
|
||||
|
||||
module Make (Lexer: Lexer.S)
|
||||
module Make (Lexer: LexerLib.S)
|
||||
(AST: sig type t type expr end)
|
||||
(Parser: ParserAPI.PARSER
|
||||
with type ast = AST.t
|
||||
and type expr = AST.expr
|
||||
and type token = Lexer.token)
|
||||
(ParErr: sig val message : int -> string end)
|
||||
(ParserLog: Pretty with type ast = AST.t
|
||||
(ParserLog: Printer with type ast = AST.t
|
||||
and type expr = AST.expr)
|
||||
(SubIO: SubIO) =
|
||||
struct
|
||||
@ -183,10 +182,10 @@ module Make (Lexer: Lexer.S)
|
||||
| exception Parser.Error ->
|
||||
let invalid, valid_opt =
|
||||
match lexer_inst.Lexer.get_win () with
|
||||
Lexer.Nil ->
|
||||
LexerLib.Nil ->
|
||||
assert false (* Safe: There is always at least EOF. *)
|
||||
| Lexer.One invalid -> invalid, None
|
||||
| Lexer.Two (invalid, valid) -> invalid, Some valid in
|
||||
| LexerLib.One invalid -> invalid, None
|
||||
| LexerLib.Two (invalid, valid) -> invalid, Some valid in
|
||||
let point = "", valid_opt, invalid in
|
||||
let error =
|
||||
Front.format_error ~offsets:SubIO.options#offsets
|
||||
@ -226,11 +225,14 @@ module Make (Lexer: Lexer.S)
|
||||
|
||||
let () = close () in
|
||||
let input' = Lexer.String (Buffer.contents buffer) in
|
||||
match Lexer.open_token_stream options#lang input' with
|
||||
match Lexer.open_token_stream ?line:options#line
|
||||
?block:options#block
|
||||
input'
|
||||
with
|
||||
Ok instance ->
|
||||
let open Lexing in
|
||||
instance.Lexer.buffer.lex_curr_p <-
|
||||
{instance.Lexer.buffer.lex_curr_p with pos_fname = file};
|
||||
{instance.Lexer.buffer.lex_curr_p with pos_fname=file};
|
||||
apply instance parser
|
||||
| Stdlib.Error (Lexer.File_opening msg) ->
|
||||
Stdlib.Error (Region.wrap_ghost msg)
|
||||
|
@ -2,18 +2,19 @@
|
||||
|
||||
module Region = Simple_utils.Region
|
||||
|
||||
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
|
||||
|
||||
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
|
||||
|
||||
(* A subtype of [EvalOpt.options] *)
|
||||
|
||||
module type SubIO =
|
||||
sig
|
||||
type options = <
|
||||
libs : string list;
|
||||
verbose : SSet.t;
|
||||
offsets : bool;
|
||||
lang : language;
|
||||
ext : string; (* ".ligo", ".mligo", ".religo" *)
|
||||
block : EvalOpt.block_comment option;
|
||||
line : EvalOpt.line_comment option;
|
||||
ext : string;
|
||||
mode : [`Byte | `Point];
|
||||
cmd : EvalOpt.command;
|
||||
mono : bool
|
||||
@ -23,7 +24,9 @@ module type SubIO =
|
||||
val make : input:string option -> expr:bool -> EvalOpt.options
|
||||
end
|
||||
|
||||
module type Pretty =
|
||||
(* Signature for the printers *)
|
||||
|
||||
module type Printer =
|
||||
sig
|
||||
type state
|
||||
type ast
|
||||
@ -38,14 +41,16 @@ module type Pretty =
|
||||
val print_expr : state -> expr -> unit
|
||||
end
|
||||
|
||||
module Make (Lexer : Lexer.S)
|
||||
(* Main functor to make the parser *)
|
||||
|
||||
module Make (Lexer : LexerLib.S)
|
||||
(AST : sig type t type expr end)
|
||||
(Parser : ParserAPI.PARSER
|
||||
with type ast = AST.t
|
||||
and type expr = AST.expr
|
||||
and type token = Lexer.token)
|
||||
(ParErr : sig val message : int -> string end)
|
||||
(ParserLog : Pretty with type ast = AST.t
|
||||
(ParserLog : Printer with type ast = AST.t
|
||||
and type expr = AST.expr)
|
||||
(SubIO: SubIO) :
|
||||
sig
|
||||
@ -82,4 +87,4 @@ module Make (Lexer : Lexer.S)
|
||||
|
||||
val preprocess :
|
||||
string -> (Buffer.t, message Region.reg) Stdlib.result
|
||||
end
|
||||
end
|
||||
|
@ -13,6 +13,7 @@
|
||||
(preprocess
|
||||
(pps bisect_ppx --conditional))
|
||||
(modules
|
||||
LexerLib
|
||||
LexerUnit
|
||||
ParserUnit
|
||||
ParserAPI
|
||||
|
Loading…
Reference in New Issue
Block a user