The preprocessor library depends now on the kinds of comments

instead of a closed set of languages. I also removed the offsets:
I simply use the current region to determine whether the
preprocessing directie starts at the beginning of a line. I also
removed scanning line indicators, to make the lexer simpler.

LexToken.mll: Moved the function [check_right_context] that
checks stylistic constraints from Lexer.mll to
LexToken.mll. While this triplicates code (as CameLIGO, PascaLIGO
and ReasonLIGO share the same constraints), the benefit is that
Lexer.mll becomes more generic and the signature for the TOKEN
module is simpler (no more exporting predicates, except for
EOF). In accordance with the change of the preprocessor, the
lexers and parsers for LIGO now depend on the kind of comments,
not a fixed set of syntaxes. This gives more versatility when
adding a new language: only the kinds of its comments are needed,
although Lexer.mll and Preproc.mll may have to be modified if
they do not already know the comment delimiters, for example line
comments starting with #.

****************************************************************
BUG: The exceptions coming from LexToken.mll when a stylistic
constraint is broken in [LexToken.check_right_context] are not
caught yet.
****************************************************************

Lexer.mll: I moved out as much as I could from the header into a
new module LexerLib. The aim is to make it easy to reuse as much
as possible of the lexer machinerie, when it cannot be used as
is.
This commit is contained in:
Christian Rinderknecht 2020-04-24 21:06:18 +02:00
parent ce5464f9af
commit f3777b4af8
32 changed files with 1193 additions and 1071 deletions

View File

@ -8,15 +8,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO =
struct
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".mligo" *)
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -24,22 +23,25 @@ module SubIO =
>
let options : options =
object
method libs = []
method verbose = SSet.empty
method offsets = true
method lang = `CameLIGO
method ext = ".mligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
in object
method libs = []
method verbose = SSet.empty
method offsets = true
method block = Some block
method line = Some "//"
method ext = ".mligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let make =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -156,11 +156,22 @@ val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -480,81 +480,60 @@ let mk_attr header lexeme region =
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_string = function String _ -> true | _ -> false
let is_bytes = function Bytes _ -> true | _ -> false
let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_eof = function EOF _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
(* Errors *)
let is_int = function
Int _ -> true
| _ -> false
type error =
Odd_lengthed_bytes
| Missing_break
let is_ident = function
Ident _ -> true
| _ -> false
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
let is_kwd = function
| Begin _
| Else _
| End _
| False _
| Fun _
| Rec _
| If _
| In _
| Let _
| Match _
| Mod _
| Not _
| Of _
| Or _
| Then _
| True _
| Type _
| With _ -> true
| _ -> false
exception Error of error Region.reg
let is_constr = function
| Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let is_sym = function
| ARROW _
| CONS _
| CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| WILD _
| EQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| BOOL_OR _
| BOOL_AND _ -> true
| _ -> false
let fail region value = raise (Error Region.{region; value})
let is_eof = function EOF _ -> true | _ -> false
let check_right_context token next_token buffer : unit =
if not (is_eof token) then
if is_int token || is_bytes token then
match next_token buffer with
Some ([], next) ->
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
if is_int next then
fail region Odd_lengthed_bytes
else
if is_ident next || is_string next || is_bytes next then
fail region Missing_break
| Some (_::_, _) | None -> ()
else
if is_ident token || is_string token then
match next_token buffer with
Some ([], next) ->
if is_ident next || is_string next
|| is_bytes next || is_int next
then
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos
in fail region Missing_break
| Some (_::_, _) | None -> ()
(* END TRAILER *)
}

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO =
struct
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".mligo"
end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () =
match M.trace () with
Stdlib.Ok () -> ()
| Error Region.{value; _} -> Utils.highlight value
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO =
struct
let options = EvalOpt.(read ~lang:`CameLIGO ~ext:".mligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".mligo"
end
module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : EvalOpt.language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs
method verbose = IO.options#verbose
method offsets = IO.options#offsets
method lang = IO.options#lang
method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext
method mode = IO.options#mode
method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -8,15 +8,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO =
struct
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".ligo" *)
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -24,22 +23,25 @@ module SubIO =
>
let options : options =
object
method libs = []
method verbose = SSet.empty
method offsets = true
method lang = `PascaLIGO
method ext = ".ligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let block = EvalOpt.mk_block ~opening:"(*" ~closing:"*)"
in object
method libs = []
method verbose = SSet.empty
method offsets = true
method block = Some block
method line = Some "//"
method ext = ".ligo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let make =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -2,6 +2,7 @@ $HOME/git/OCaml-build/Makefile
../shared/Lexer.mli
../shared/Lexer.mll
../shared/LexerLib.ml
../shared/EvalOpt.ml
../shared/EvalOpt.mli
../shared/FQueue.ml
@ -18,5 +19,6 @@ $HOME/git/OCaml-build/Makefile
../shared/LexerUnit.ml
../shared/ParserUnit.mli
../shared/ParserUnit.ml
../shared/LexerLib.ml
$HOME/git/ligo/_build/default/src/passes/1-parser/pascaligo/ParErr.ml

View File

@ -163,11 +163,22 @@ val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -483,17 +483,16 @@ type nat_err =
| Non_canonical_zero_nat
let mk_nat lexeme region =
match (String.index_opt lexeme 'n') with
| None -> Error Invalid_natural
| Some _ -> (
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z})
)
match String.index_opt lexeme 'n' with
None -> Error Invalid_natural
| Some _ ->
let z =
Str.(global_replace (regexp "_") "" lexeme) |>
Str.(global_replace (regexp "n") "") |>
Z.of_string in
if Z.equal z Z.zero && lexeme <> "0n"
then Error Non_canonical_zero_nat
else Ok (Nat Region.{region; value = lexeme,z})
let mk_mutez lexeme region =
let z =
@ -563,99 +562,60 @@ let mk_attr _header _string _region =
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_string = function String _ -> true | _ -> false
let is_bytes = function Bytes _ -> true | _ -> false
let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_eof = function EOF _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
(* Errors *)
let is_int = function
Int _ -> true
| _ -> false
type error =
Odd_lengthed_bytes
| Missing_break
let is_ident = function
Ident _ -> true
| _ -> false
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
let is_kwd = function
And _
| Attributes _
| Begin _
| BigMap _
| Block _
| Case _
| Const _
| Contains _
| Else _
| End _
| False _
| For _
| From _
| Function _
| If _
| In _
| Is _
| List _
| Map _
| Mod _
| Nil _
| Not _
| Of _
| Or _
| Patch _
| Record _
| Remove _
| Set _
| Skip _
| Step _
| Then _
| To _
| True _
| Type _
| Unit _
| Var _
| While _
| With _ -> true
| _ -> false
exception Error of error Region.reg
let is_constr = function
Constr _
| C_None _
| C_Some _ -> true
| _ -> false
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let is_sym = function
SEMI _
| COMMA _
| LPAR _
| RPAR _
| LBRACE _
| RBRACE _
| LBRACKET _
| RBRACKET _
| CONS _
| VBAR _
| ARROW _
| ASS _
| EQ _
| COLON _
| LT _
| LE _
| GT _
| GE _
| NE _
| PLUS _
| MINUS _
| SLASH _
| TIMES _
| DOT _
| WILD _
| CAT _ -> true
| _ -> false
let fail region value = raise (Error Region.{region; value})
let is_eof = function EOF _ -> true | _ -> false
let check_right_context token next_token buffer : unit =
if not (is_eof token) then
if is_int token || is_bytes token then
match next_token buffer with
Some ([], next) ->
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
if is_int next then
fail region Odd_lengthed_bytes
else
if is_ident next || is_string next || is_bytes next then
fail region Missing_break
| Some (_::_, _) | None -> ()
else
if is_ident token || is_string token then
match next_token buffer with
Some ([], next) ->
if is_ident next || is_string next
|| is_bytes next || is_int next
then
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos
in fail region Missing_break
| Some (_::_, _) | None -> ()
(* END TRAILER *)
}

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO =
struct
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO =
struct
let options = EvalOpt.(read ~lang:`PascaLIGO ~ext:".ligo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"(*" ~closing:"*)"
in read ~block ~line:"//" ".ligo"
end
module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : EvalOpt.language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs
method verbose = IO.options#verbose
method offsets = IO.options#offsets
method lang = IO.options#lang
method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext
method mode = IO.options#mode
method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -11,15 +11,14 @@ module SSet = Set.Make (String)
(* Mock IOs TODO: Fill them with CLI options *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SubIO =
struct
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string; (* ".religo" *)
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -27,22 +26,25 @@ module SubIO =
>
let options : options =
object
method libs = []
method verbose = SSet.empty
method offsets = true
method lang = `ReasonLIGO
method ext = ".religo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let block = EvalOpt.mk_block ~opening:"/*" ~closing:"*/"
in object
method libs = []
method verbose = SSet.empty
method offsets = true
method block = Some block
method line = Some "//"
method ext = ".religo"
method mode = `Point
method cmd = EvalOpt.Quiet
method mono = false
end
let make =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -154,11 +154,22 @@ val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit

View File

@ -460,75 +460,60 @@ let mk_attr header lexeme region =
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_string = function String _ -> true | _ -> false
let is_bytes = function Bytes _ -> true | _ -> false
let is_int = function Int _ -> true | _ -> false
let is_ident = function Ident _ -> true | _ -> false
let is_eof = function EOF _ -> true | _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
(* Errors *)
let is_int = function
Int _ -> true
| _ -> false
type error =
Odd_lengthed_bytes
| Missing_break
let is_ident = function
Ident _ -> true
| _ -> false
let error_to_string = function
Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit."
| Missing_break ->
"Missing break.\n\
Hint: Insert some space."
let is_kwd = function
Else _
| False _
| If _
| Let _
| Rec _
| Switch _
| Mod _
| Or _
| True _
| Type _ -> true
| _ -> false
exception Error of error Region.reg
let is_constr = function
Constr _
| Ident _
| False _
| True _ -> true
| _ -> false
let format_error ?(offsets=true) mode Region.{region; value} ~file =
let msg = error_to_string value
and reg = region#to_string ~file ~offsets mode in
let value = sprintf "Lexical error %s:\n%s\n" reg msg
in Region.{value; region}
let is_sym = function
CAT _
| MINUS _
| PLUS _
| SLASH _
| TIMES _
| LPAR _
| RPAR _
| LBRACKET _
| RBRACKET _
| LBRACE _
| RBRACE _
| COMMA _
| SEMI _
| VBAR _
| COLON _
| DOT _
| ELLIPSIS _
| WILD _
| EQ _
| EQEQ _
| NE _
| LT _
| GT _
| LE _
| GE _
| ARROW _
| BOOL_OR _
| NOT _
| BOOL_AND _ -> true
| _ -> false
let fail region value = raise (Error Region.{region; value})
let is_eof = function EOF _ -> true | _ -> false
let check_right_context token next_token buffer : unit =
if not (is_eof token) then
if is_int token || is_bytes token then
match next_token buffer with
Some ([], next) ->
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
if is_int next then
fail region Odd_lengthed_bytes
else
if is_ident next || is_string next || is_bytes next then
fail region Missing_break
| Some (_::_, _) | None -> ()
else
if is_ident token || is_string token then
match next_token buffer with
Some ([], next) ->
if is_ident next || is_string next
|| is_bytes next || is_int next
then
let pos = (to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos
in fail region Missing_break
| Some (_::_, _) | None -> ()
(* END TRAILER *)
}

View File

@ -4,7 +4,10 @@ module Region = Simple_utils.Region
module IO =
struct
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"/*" ~closing:"*/"
in read ~block ~line:"//" ".religo"
end
module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
@ -12,4 +15,4 @@ module M = LexerUnit.Make (IO) (Lexer.Make (LexToken))
let () =
match M.trace () with
Stdlib.Ok () -> ()
| Error Region.{value; _} -> Utils.highlight value
| Error Region.{value; _} -> Printf.eprintf "\027[31m%s\027[0m%!" value

View File

@ -40,11 +40,11 @@ let rec curry hd = function
in TFun {value; region}
| [] -> hd
let wild_error e =
match e with
| EVar { value = "_"; _} as e ->
let wild_error e =
match e with
| EVar { value = "_"; _} as e ->
let open! SyntaxError in
raise (Error (InvalidWild e))
raise (Error (InvalidWild e))
| _ -> ()
(* END HEADER *)
@ -270,30 +270,30 @@ let_declaration:
let_binding:
"<ident>" type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Scoping.check_reserved_name $1;
{binders = PVar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| "_" type_annotation? "=" expr {
wild_error $4;
wild_error $4;
{binders = PWild $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| unit type_annotation? "=" expr {
wild_error $4;
wild_error $4;
{binders = PUnit $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| record_pattern type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Scoping.check_pattern (PRecord $1);
{binders = PRecord $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| par(closed_irrefutable) type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Scoping.check_pattern $1.value.inside;
{binders = PPar $1, []; lhs_type=$2; eq=$3; let_rhs=$4}
}
| tuple(sub_irrefutable) type_annotation? "=" expr {
wild_error $4;
wild_error $4;
Utils.nsepseq_iter Scoping.check_pattern $1;
let hd, tl = $1 in
let start = pattern_to_region hd in
@ -422,7 +422,7 @@ expr:
base_cond__open(expr) | switch_expr(base_cond) { $1 }
base_cond__open(x):
base_expr(x) | conditional(expr_with_let_expr) {
base_expr(x) | conditional(expr_with_let_expr) {
wild_error $1;
$1 }
@ -460,11 +460,11 @@ fun_expr:
let region = cover start stop in
let rec arg_to_pattern = function
EVar v ->
EVar v ->
if v.value = "_" then
PWild v.region
else (
Scoping.check_reserved_name v;
Scoping.check_reserved_name v;
PVar v
)
| EAnnot {region; value = {inside = EVar v, colon, typ; _}} ->
@ -895,7 +895,7 @@ path:
| projection { Path $1 }
update_record:
"{""..."path "," sep_or_term_list(field_path_assignment,",") "}" {
"{" "..." path "," sep_or_term_list(field_path_assignment,",") "}" {
let region = cover $1 $6 in
let ne_elements, terminator = $5 in
let value = {
@ -913,7 +913,7 @@ expr_with_let_expr:
expr { $1 }
| let_expr(expr_with_let_expr) { $1 }
more_field_assignments:
more_field_assignments:
"," sep_or_term_list(field_assignment_punning,",") {
let elts, _region = $2 in
$1, elts
@ -926,9 +926,9 @@ sequence_or_record_in:
PaSequence {s_elts = elts; s_terminator=None}
}
| field_assignment more_field_assignments? {
match $2 with
| Some (comma, elts) ->
let r_elts = Utils.nsepseq_cons $1 comma elts in
match $2 with
| Some (comma, elts) ->
let r_elts = Utils.nsepseq_cons $1 comma elts in
PaRecord {r_elts; r_terminator = None}
| None ->
PaRecord {r_elts = ($1, []); r_terminator = None}
@ -950,15 +950,15 @@ sequence_or_record:
terminator = r.r_terminator}
in ERecord {region; value}}
field_assignment_punning:
(* This can only happen with multiple fields -
field_assignment_punning:
(* This can only happen with multiple fields -
one item punning does NOT work in ReasonML *)
field_name {
field_name {
let value = {
field_name = $1;
assignment = ghost;
field_expr = EVar $1 }
in
in
{$1 with value}
}
| field_assignment {

View File

@ -5,7 +5,10 @@ module SSet = Set.Make (String)
module IO =
struct
let options = EvalOpt.(read ~lang:`ReasonLIGO ~ext:".religo")
let options =
let open EvalOpt in
let block = mk_block ~opening:"/*" ~closing:"*/"
in read ~block ~line:"//" ".religo"
end
module SubIO =
@ -14,7 +17,8 @@ module SubIO =
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : EvalOpt.language;
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
@ -26,7 +30,8 @@ module SubIO =
method libs = IO.options#libs
method verbose = IO.options#verbose
method offsets = IO.options#offsets
method lang = IO.options#lang
method block = IO.options#block
method line = IO.options#line
method ext = IO.options#ext
method mode = IO.options#mode
method cmd = IO.options#cmd
@ -37,7 +42,8 @@ module SubIO =
EvalOpt.make ~libs:options#libs
~verbose:options#verbose
~offsets:options#offsets
~lang:options#lang
?block:options#block
?line:options#line
~ext:options#ext
~mode:options#mode
~cmd:options#cmd

View File

@ -1,7 +1,2 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/pos.ml
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.mli
$HOME/git/ligo/vendors/ligo-utils/simple-utils/region.ml

View File

@ -5,37 +5,42 @@
type command = Quiet | Copy | Units | Tokens
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
let lang_to_string = function
`PascaLIGO -> "PascaLIGO"
| `CameLIGO -> "CameLIGO"
| `ReasonLIGO -> "ReasonLIGO"
(* The type [options] gathers the command-line options. *)
module SSet = Set.Make (String)
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
let mk_block ~opening ~closing : block_comment =
object
method opening = opening
method closing = closing
end
type options = <
input : string option;
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : block_comment option;
line : line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : command;
mono : bool;
expr : bool
>
let make ~input ~libs ~verbose ~offsets ~lang ~ext ~mode ~cmd ~mono ~expr : options =
let make ~input ~libs ~verbose ~offsets ?block
?line ~ext ~mode ~cmd ~mono ~expr : options =
object
method input = input
method libs = libs
method verbose = verbose
method offsets = offsets
method lang = lang
method block = block
method line = line
method ext = ext
method mode = mode
method cmd = cmd
@ -58,10 +63,10 @@ let abort msg =
(* Help *)
let help language extension () =
let help extension () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>%s | \"-\"]\n" file extension;
printf "where <input>%s is the %s source file (default: stdin),\n" extension language;
printf "where <input>%s is the LIGO source file (default: stdin),\n" extension;
print "and each <option> (if any) is one of the following:";
print " -I <paths> Library paths (colon-separated)";
print " -t, --tokens Print tokens";
@ -105,8 +110,7 @@ let add_verbose d =
!verbose
(split_at_colon d)
let specs language extension =
let language = lang_to_string language in
let specs extension =
let open! Getopt in [
'I', nolong, None, Some add_path;
'c', "copy", set copy true, None;
@ -118,7 +122,7 @@ let specs language extension =
noshort, "mono", set mono true, None;
noshort, "expr", set expr true, None;
noshort, "verbose", None, Some add_verbose;
'h', "help", Some (help language extension), None;
'h', "help", Some (help extension), None;
noshort, "version", Some version, None
]
;;
@ -156,7 +160,7 @@ let print_opt () =
printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs)
let check lang ext =
let check ?block ?line ~ext =
let () =
if SSet.mem "cli" !verbose then print_opt () in
@ -209,16 +213,19 @@ let check lang ext =
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
in make ~input ~libs ~verbose ~offsets ~mode ~cmd ~mono ~expr ~lang ~ext
in make ~input ~libs ~verbose ~offsets ~mode
~cmd ~mono ~expr ?block ?line ~ext
(* Parsing the command-line options *)
let read ~lang ~ext =
type extension = string
let read ?block ?line (ext: extension) =
try
Getopt.parse_cmdline (specs lang ext) anonymous;
Getopt.parse_cmdline (specs ext) anonymous;
(verb_str :=
let apply e a =
if a = "" then e else Printf.sprintf "%s, %s" e a
in SSet.fold apply !verbose "");
check lang ext
check ?block ?line ~ext
with Getopt.Error msg -> abort msg

View File

@ -49,19 +49,21 @@ type command = Quiet | Copy | Units | Tokens
expected.}
} *)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val lang_to_string : language -> string
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
val mk_block : opening:string -> closing:string -> block_comment
type options = <
input : string option;
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : block_comment option;
line : line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : command;
mono : bool;
@ -73,7 +75,8 @@ val make :
libs:string list ->
verbose:SSet.t ->
offsets:bool ->
lang:language ->
?block:block_comment ->
?line:line_comment ->
ext:string ->
mode:[`Byte | `Point] ->
cmd:command ->
@ -81,8 +84,9 @@ val make :
expr:bool ->
options
(** Parsing the command-line options on stdin. The first parameter is
the name of the concrete syntax, e.g., [PascaLIGO], and the second
is the expected file extension, e.g., ".ligo". *)
(** Parsing the command-line options on stdin. *)
val read : lang:language -> ext:string -> options
type extension = string
val read :
?block:block_comment -> ?line:line_comment -> extension -> options

View File

@ -82,13 +82,6 @@ module type TOKEN =
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
@ -96,73 +89,8 @@ module type TOKEN =
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. It mainly exports the function
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
* a function [get_file] that returns the name of the file being scanned
(empty string if [stdin]).
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (also known as a
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion.
*)
module type S =
sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
type window =
Nil
| One of token
| Two of token * token
val slide : token -> window -> window
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type instance = {
input : input;
read : log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_win : unit -> window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
val lexbuf_from_input :
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
val open_token_stream :
language -> input -> (instance, open_err) Stdlib.result
(* Error reporting *)
(* Style *)
type error
@ -171,9 +99,17 @@ module type S =
exception Error of error Region.reg
val format_error :
?offsets:bool -> [`Byte | `Point] ->
error Region.reg -> file:bool -> string Region.reg
?offsets:bool ->
[`Byte | `Point] ->
error Region.reg ->
file:bool ->
string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit
end
(* The functorised interface
@ -182,4 +118,4 @@ module type S =
submodule in [S].
*)
module Make (Token: TOKEN) : S with module Token = Token
module Make (Token: TOKEN) : LexerLib.S with module Token = Token

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,451 @@
(* Sliding window *)
(* The type [window] models a two-slot buffer of tokens for reporting
after a parse error. Technically, it is a parametric type, but its
use is meant for tokens, wherever they are defined.
In [Two(t1,t2)], the token [t2] is the next to be sent to the
parser.
The call [slide token buffer] pushes the token [token] in the
buffer [buffer]. If the buffer is full, that is, it is
[Two(t1,t2)], then the token [t2] is discarded to make room for
[token].
*)
type 'a window =
Nil
| One of 'a
| Two of 'a * 'a
let slide token = function
Nil -> One token
| One t | Two (t,_) -> Two (token,t)
module Region = Simple_utils.Region
module Pos = Simple_utils.Pos
type lexeme = string
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. This enables to externalise
version-dependent constraints in any module whose signature matches
[TOKEN]. Generic functions to construct tokens are required.
Note the predicate [is_eof], which caracterises the virtual token
for end-of-file, because it requires special handling.
*)
module type TOKEN =
sig
type token
(* Predicates *)
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
(* Style *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool ->
[`Byte | `Point] ->
error Region.reg ->
file:bool ->
string Region.reg
val check_right_context :
token ->
(Lexing.lexbuf -> (Markup.t list * token) option) ->
Lexing.lexbuf ->
unit
end
(* The module type for lexers is [S]. It mainly exports the function
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
* a function [get_file] that returns the name of the file being
scanned (empty string if [stdin]).
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The type [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any).
The call [read ~log] evaluates in a lexer (also known as a
tokeniser or scanner) whose type is [Lexing.lexbuf -> token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion.
*)
module type S =
sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
type input =
File of file_path
| String of string
| Channel of in_channel
| Buffer of Lexing.lexbuf
type instance = {
input : input;
read : log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_win : unit -> token window;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
get_file : unit -> file_path;
close : unit -> unit
}
type open_err = File_opening of string
val lexbuf_from_input :
input -> (Lexing.lexbuf * (unit -> unit), open_err) Stdlib.result
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
val mk_block : opening:string -> closing:string -> block_comment
val open_token_stream :
?line:line_comment ->
?block:block_comment ->
input ->
(instance, open_err) Stdlib.result
(* Error reporting *)
type error
val error_to_string : error -> string
exception Error of error Region.reg
val format_error :
?offsets:bool ->
[`Byte | `Point] ->
error Region.reg ->
file:bool ->
string Region.reg
end
(* LEXER ENGINE *)
(* Resetting file name and line number in the lexing buffer
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset_offset ~offset buffer =
assert (offset >= 0);
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol + offset }
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *)
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
(* When scanning structured constructs, like strings and comments, we
need to keep the region of the opening symbol (like double quote,
"//" or "(*") in order to report any error more precisely. Since
ocamllex is byte-oriented, we need to store the parsed bytes as
characters in an accumulator [acc] and also its length [len], so,
we are done, it is easy to build the string making up the
structured construct with [mk_str] (see above).
The resulting data structure is called a _thread_. (Note for
Emacs: "*)".)
*)
type thread = <
opening : Region.t;
length : int;
acc : char list;
push_char : char -> thread;
push_string : string -> thread;
to_string : string;
set_opening : Region.t -> thread
>
let mk_thread region lexeme : thread =
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
in
object
val opening = region
method opening = opening
val length = String.length lexeme
method length = length
val acc = explode lexeme []
method acc = acc
method set_opening opening = {< opening; length; acc >}
method push_char char =
{< opening; length=length+1; acc=char::acc >}
method push_string str =
{< opening;
length = length + String.length str;
acc = explode str acc >}
(* The value of [thread#to_string] is a string of length
[thread#length] containing the [thread#length] characters in
the list [thread#acc], in reverse order. For instance,
[thread#to_string = "abc"] if [thread#length = 3] and
[thread#acc = ['c';'b';'a']]. *)
method to_string =
let bytes = Bytes.make length ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (length-1) acc |> Bytes.to_string
end
type file_path = string
(* STATE *)
(* Beyond tokens, the result of lexing is a state. The type
[state] represents the logical state of the lexing engine, that
is, a value which is threaded during scanning and which denotes
useful, high-level information beyond what the type
[Lexing.lexbuf] in the standard library already provides for
all generic lexers.
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
tokens, with the markup at the left of its lexeme until either
the start of the file or the end of the previously recognised
token.
The markup from the last recognised token or, if the first
token has not been recognised yet, from the beginning of the
file is stored in the field [markup] of the state because it is
a side-effect, with respect to the output token list, and we
use a record with a single field [units] because that record
may be easily extended during the future maintenance of this
lexer.
The state also includes a field [pos] which holds the current
position in the LIGO source file. The position is not always
updated after a single character has been matched: that depends
on the regular expression that matched the lexing buffer.
The field [window] is a two-token window, that is, a buffer
that contains the last recognised token, and the penultimate
(if any).
The fields [decoder] and [supply] offer the support needed
for the lexing of UTF-8 encoded characters in comments (the
only place where they are allowed in LIGO). The former is the
decoder proper and the latter is the effectful function
[supply] that takes a byte, a start index and a length and feed
it to [decoder]. See the documentation of the third-party
library Uutf.
*)
type line_comment = string (* Opening of a line comment *)
type block_comment = <opening : string; closing : string>
let mk_block ~opening ~closing : block_comment =
object
method opening = opening
method closing = closing
end
type 'a state = <
units : (Markup.t list * 'a) FQueue.t;
markup : Markup.t list;
window : 'a window;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit;
block : block_comment option;
line : line_comment option;
enqueue : 'a -> 'a state;
set_units : (Markup.t list * 'a) FQueue.t -> 'a state;
set_last : Region.t -> 'a state;
set_pos : Pos.t -> 'a state;
slide_token : 'a -> 'a state;
sync : Lexing.lexbuf -> Region.t * lexeme * 'a state;
push_newline : Lexing.lexbuf -> 'a state;
push_line : thread -> 'a state;
push_block : thread -> 'a state;
push_space : Lexing.lexbuf -> 'a state;
push_tabs : Lexing.lexbuf -> 'a state;
push_bom : Lexing.lexbuf -> 'a state;
push_markup : Markup.t -> 'a state;
>
let mk_state ~units ~markup ~window ~last ~pos ~decoder ~supply
?block ?line () : _ state =
object (self)
val units = units
method units = units
val markup = markup
method markup = markup
val window = window
method window = window
val last = last
method last = last
val pos = pos
method pos = pos
method decoder = decoder
method supply = supply
method block = block
method line = line
(* The call [enqueue (token, state)] updates functionally the
state [state] by associating the token [token] with the
stored markup and enqueuing the pair into the units
queue. The field [markup] is then reset to the empty
list. *)
method enqueue token =
{< units = FQueue.enq (markup, token) units;
markup = [] >}
method set_units units = {< units = units >}
method set_last region = {< last = region >}
method set_pos pos = {< pos = pos >}
method slide_token token =
{< window = slide token window >}
(* The call [sync state buffer] updates the current position
in accordance with the contents of the lexing buffer, more
precisely, depending on the length of the string which has
just been recognised by the scanner: that length is used as
a positive offset to the current column. *)
method sync buffer =
let lex = Lexing.lexeme buffer in
let len = String.length lex in
let start = pos in
let stop = start#shift_bytes len in
let state = {< pos = stop >}
in Region.make ~start ~stop, lex, state
(* MARKUP *)
(* Committing markup to the current logical state *)
method push_markup unit = {< markup = unit :: markup >}
method push_newline buffer =
let () = Lexing.new_line buffer in
let value = Lexing.lexeme buffer in
let start = self#pos in
let stop = start#new_line value in
let region = Region.make ~start ~stop in
let unit = Markup.Newline Region.{region; value}
in {< pos = stop; markup = unit::markup >}
method push_line thread =
let start = thread#opening#start in
let region = Region.make ~start ~stop:self#pos
and value = thread#to_string in
let unit = Markup.LineCom Region.{region; value}
in {< markup = unit::markup >}
method push_block thread =
let start = thread#opening#start in
let region = Region.make ~start ~stop:self#pos
and value = thread#to_string in
let unit = Markup.BlockCom Region.{region; value}
in {< markup = unit::markup >}
method push_space buffer =
let region, lex, state = self#sync buffer in
let value = String.length lex in
let unit = Markup.Space Region.{region; value}
in state#push_markup unit
method push_tabs buffer =
let region, lex, state = self#sync buffer in
let value = String.length lex in
let unit = Markup.Tabs Region.{region; value}
in state#push_markup unit
method push_bom buffer =
let region, value, state = self#sync buffer in
let unit = Markup.BOM Region.{region; value}
in state#push_markup unit
end

View File

@ -4,7 +4,7 @@ module Region = Simple_utils.Region
module type S =
sig
module Lexer : Lexer.S
module Lexer : LexerLib.S
val output_token :
?offsets:bool ->
@ -20,13 +20,14 @@ module type S =
val trace :
?offsets:bool ->
[`Byte | `Point] ->
EvalOpt.language ->
?block:EvalOpt.block_comment ->
?line:EvalOpt.line_comment ->
Lexer.input ->
EvalOpt.command ->
(unit, string Region.reg) Stdlib.result
end
module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
module Make (Lexer: LexerLib.S) : (S with module Lexer = Lexer) =
struct
module Lexer = Lexer
module Token = Lexer.Token
@ -56,9 +57,9 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
type file_path = string
let trace ?(offsets=true) mode lang input command :
let trace ?(offsets=true) mode ?block ?line input command :
(unit, string Region.reg) Stdlib.result =
match Lexer.open_token_stream lang input with
match Lexer.open_token_stream ?line ?block input with
Ok Lexer.{read; buffer; close; _} ->
let log = output_token ~offsets mode command stdout
and close_all () = flush_all (); close () in

View File

@ -2,7 +2,7 @@ module Region = Simple_utils.Region
module type S =
sig
module Lexer : Lexer.S
module Lexer : LexerLib.S
val output_token :
?offsets:bool ->
@ -18,10 +18,11 @@ module type S =
val trace :
?offsets:bool ->
[`Byte | `Point] ->
EvalOpt.language ->
?block:EvalOpt.block_comment ->
?line:EvalOpt.line_comment ->
Lexer.input ->
EvalOpt.command ->
(unit, string Region.reg) Stdlib.result
end
module Make (Lexer: Lexer.S) : S with module Lexer = Lexer
module Make (Lexer: LexerLib.S) : S with module Lexer = Lexer

View File

@ -9,7 +9,7 @@ module type IO =
val options : EvalOpt.options (* CLI options *)
end
module Make (IO: IO) (Lexer: Lexer.S) =
module Make (IO: IO) (Lexer: LexerLib.S) =
struct
(* Error printing and exception tracing *)
@ -40,30 +40,42 @@ module Make (IO: IO) (Lexer: Lexer.S) =
(* Running the lexer on the preprocessed input *)
let source = Lexer.String (Buffer.contents pp_buffer) in
match Lexer.open_token_stream IO.options#lang source with
Ok Lexer.{read; buffer; close; _} ->
let close_all () = flush_all (); close () in
let rec read_tokens tokens =
match read ~log:(fun _ _ -> ()) buffer with
token ->
if Lexer.Token.is_eof token
then Stdlib.Ok (List.rev tokens)
else read_tokens (token::tokens)
| exception Lexer.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let () =
Printf.eprintf "[LexerUnit] file = %b\n%!" file in
let msg =
Lexer.format_error ~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg in
let result = read_tokens []
in close_all (); result
| Stdlib.Error (Lexer.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
match Lexer.open_token_stream ?line:IO.options#line
?block:IO.options#block
source with
Ok Lexer.{read; buffer; close; _} ->
let close_all () = flush_all (); close () in
let rec read_tokens tokens =
match read ~log:(fun _ _ -> ()) buffer with
token ->
if Lexer.Token.is_eof token
then Stdlib.Ok (List.rev tokens)
else read_tokens (token::tokens)
(* | exception _ ->
Printf.eprintf "Here\n%!"; exit 1
*) | exception Lexer.Token.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let msg =
Lexer.Token.format_error
~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg
| exception Lexer.Error error ->
let file =
match IO.options#input with
None | Some "-" -> false
| Some _ -> true in
let msg =
Lexer.format_error ~offsets:IO.options#offsets
IO.options#mode ~file error
in Stdlib.Error msg in
let result = read_tokens []
in close_all (); result
| Stdlib.Error (Lexer.File_opening msg) ->
flush_all (); Stdlib.Error (Region.wrap_ghost msg) in
match IO.options#input with
None -> preproc stdin
| Some file_path ->
@ -101,7 +113,8 @@ module Make (IO: IO) (Lexer: Lexer.S) =
end
else Log.trace ~offsets:IO.options#offsets
IO.options#mode
IO.options#lang
?block:IO.options#block
?line:IO.options#line
(Lexer.String preproc_str)
IO.options#cmd
in match IO.options#input with

View File

@ -7,7 +7,7 @@ module type IO =
val options : EvalOpt.options (* CLI options *)
end
module Make (IO: IO) (Lexer: Lexer.S) :
module Make (IO: IO) (Lexer: LexerLib.S) :
sig
val scan : unit -> (Lexer.token list, string Region.reg) Stdlib.result
val trace : unit -> (unit, string Region.reg) Stdlib.result

View File

@ -56,7 +56,7 @@ module type PARSER =
(* Main functor *)
module Make (IO: IO)
(Lexer: Lexer.S)
(Lexer: LexerLib.S)
(Parser: PARSER with type token = Lexer.Token.token)
(ParErr: sig val message : int -> string end) =
struct
@ -122,10 +122,10 @@ module Make (IO: IO)
message
in
match get_win () with
Lexer.Nil -> assert false
| Lexer.One invalid ->
LexerLib.Nil -> assert false
| LexerLib.One invalid ->
raise (Point (message, None, invalid))
| Lexer.Two (invalid, valid) ->
| LexerLib.Two (invalid, valid) ->
raise (Point (message, Some valid, invalid))
(* The monolithic API of Menhir *)

View File

@ -55,7 +55,7 @@ module type PARSER =
end
module Make (IO: IO)
(Lexer: Lexer.S)
(Lexer: LexerLib.S)
(Parser: PARSER with type token = Lexer.Token.token)
(ParErr: sig val message : int -> string end) :
sig

View File

@ -4,16 +4,15 @@ module Region = Simple_utils.Region
module Preproc = Preprocessor.Preproc
module SSet = Set.Make (String)
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module type SubIO =
sig
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
mono : bool
@ -23,7 +22,7 @@ module type SubIO =
val make : input:string option -> expr:bool -> EvalOpt.options
end
module type Pretty =
module type Printer =
sig
type state
type ast
@ -38,14 +37,14 @@ module type Pretty =
val print_expr : state -> expr -> unit
end
module Make (Lexer: Lexer.S)
module Make (Lexer: LexerLib.S)
(AST: sig type t type expr end)
(Parser: ParserAPI.PARSER
with type ast = AST.t
and type expr = AST.expr
and type token = Lexer.token)
(ParErr: sig val message : int -> string end)
(ParserLog: Pretty with type ast = AST.t
(ParserLog: Printer with type ast = AST.t
and type expr = AST.expr)
(SubIO: SubIO) =
struct
@ -183,10 +182,10 @@ module Make (Lexer: Lexer.S)
| exception Parser.Error ->
let invalid, valid_opt =
match lexer_inst.Lexer.get_win () with
Lexer.Nil ->
LexerLib.Nil ->
assert false (* Safe: There is always at least EOF. *)
| Lexer.One invalid -> invalid, None
| Lexer.Two (invalid, valid) -> invalid, Some valid in
| LexerLib.One invalid -> invalid, None
| LexerLib.Two (invalid, valid) -> invalid, Some valid in
let point = "", valid_opt, invalid in
let error =
Front.format_error ~offsets:SubIO.options#offsets
@ -226,11 +225,14 @@ module Make (Lexer: Lexer.S)
let () = close () in
let input' = Lexer.String (Buffer.contents buffer) in
match Lexer.open_token_stream options#lang input' with
match Lexer.open_token_stream ?line:options#line
?block:options#block
input'
with
Ok instance ->
let open Lexing in
instance.Lexer.buffer.lex_curr_p <-
{instance.Lexer.buffer.lex_curr_p with pos_fname = file};
{instance.Lexer.buffer.lex_curr_p with pos_fname=file};
apply instance parser
| Stdlib.Error (Lexer.File_opening msg) ->
Stdlib.Error (Region.wrap_ghost msg)

View File

@ -2,18 +2,19 @@
module Region = Simple_utils.Region
type language = [`PascaLIGO | `CameLIGO | `ReasonLIGO]
module SSet : Set.S with type elt = string and type t = Set.Make(String).t
(* A subtype of [EvalOpt.options] *)
module type SubIO =
sig
type options = <
libs : string list;
verbose : SSet.t;
offsets : bool;
lang : language;
ext : string; (* ".ligo", ".mligo", ".religo" *)
block : EvalOpt.block_comment option;
line : EvalOpt.line_comment option;
ext : string;
mode : [`Byte | `Point];
cmd : EvalOpt.command;
mono : bool
@ -23,7 +24,9 @@ module type SubIO =
val make : input:string option -> expr:bool -> EvalOpt.options
end
module type Pretty =
(* Signature for the printers *)
module type Printer =
sig
type state
type ast
@ -38,14 +41,16 @@ module type Pretty =
val print_expr : state -> expr -> unit
end
module Make (Lexer : Lexer.S)
(* Main functor to make the parser *)
module Make (Lexer : LexerLib.S)
(AST : sig type t type expr end)
(Parser : ParserAPI.PARSER
with type ast = AST.t
and type expr = AST.expr
and type token = Lexer.token)
(ParErr : sig val message : int -> string end)
(ParserLog : Pretty with type ast = AST.t
(ParserLog : Printer with type ast = AST.t
and type expr = AST.expr)
(SubIO: SubIO) :
sig
@ -82,4 +87,4 @@ module Make (Lexer : Lexer.S)
val preprocess :
string -> (Buffer.t, message Region.reg) Stdlib.result
end
end

View File

@ -13,6 +13,7 @@
(preprocess
(pps bisect_ppx --conditional))
(modules
LexerLib
LexerUnit
ParserUnit
ParserAPI