Simplified the interface of the lexer by not exporting the scanner

[init] (which reads the BOM, if any).
This commit is contained in:
Christian Rinderknecht 2020-04-28 21:17:34 +02:00
parent de7864a500
commit 6ce6ebfec3
7 changed files with 33 additions and 40 deletions

View File

@ -119,10 +119,8 @@ module type S =
module Token : TOKEN
type token = Token.token
(* The scanner [init] is meant to be called first to read the
BOM. Then [scan] is called. *)
(* The scanner *)
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
(* Errors (specific to the generic lexer, not to the tokens) *)

View File

@ -85,8 +85,8 @@ module type S =
module Token : TOKEN
type token = Token.token
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
val scan :
token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
type error
@ -601,6 +601,14 @@ and scan_utf8_inline thread state = parse
{
(* START TRAILER *)
let scan =
let first_call = ref true in
fun state lexbuf ->
if !first_call
then (first_call := false; init state lexbuf)
else scan state lexbuf
end (* of functor [Make] in HEADER *)
(* END TRAILER *)
}

View File

@ -151,11 +151,6 @@ let mk_thread region lexeme : thread =
the scanning rule [scan]). The function [patch_buffer] is, of
course, also called just before returning the token, so the parser
has a view of the lexing buffer consistent with the token.
Note that an additional reference [first_call] is needed to
distinguish the first call to the function [scan], as the first
scanning rule is actually [init] (which can handle the BOM), not
[scan].
*)
type 'token window =
@ -319,14 +314,13 @@ let lexbuf_from_input = function
in Ok (lexbuf, close)
with Sys_error msg -> Stdlib.Error (File_opening msg)
let open_token_stream ?line ?block ~init ~scan
let open_token_stream ?line ?block ~scan
~token_to_region ~style input =
let file_path = match input with
File path -> path
| _ -> "" in
let pos = Pos.min ~file:file_path in
let buf_reg = ref (pos#byte, pos#byte)
and first_call = ref true
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
let supply = Uutf.Manual.src decoder in
let state = ref (mk_state
@ -354,31 +348,29 @@ let open_token_stream ?line ?block ~init ~scan
and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
let scan' init scan buffer =
let scan' scan buffer =
patch_buffer !buf_reg buffer;
(if !first_call
then (state := init !state buffer; first_call := false)
else state := scan !state buffer);
state := scan !state buffer;
save_region buffer in
let next_token init scan buffer =
scan' init scan buffer;
let next_token scan buffer =
scan' scan buffer;
match FQueue.peek !state#units with
None -> None
| Some (units, ext_token) ->
state := !state#set_units units; Some ext_token in
let rec read init scan ~token_to_region ~style ~log buffer =
let rec read scan ~token_to_region ~style ~log buffer =
match FQueue.deq !state#units with
None ->
scan' init scan buffer;
read init scan ~token_to_region ~style ~log buffer
scan' scan buffer;
read scan ~token_to_region ~style ~log buffer
| Some (units, (left_mark, token)) ->
log left_mark token;
state := ((!state#set_units units)
#set_last (token_to_region token))
#slide_token token;
style token (next_token init scan) buffer;
style token (next_token scan) buffer;
patch_buffer (token_to_region token)#byte_pos buffer;
token in
@ -389,7 +381,7 @@ let open_token_stream ?line ?block ~init ~scan
File path when path <> "" -> reset ~file:path buffer
| _ -> () in
let instance = {
read = read init scan ~token_to_region ~style;
read = read scan ~token_to_region ~style;
input; buffer; get_win; get_pos; get_last; get_file; close}
in Ok instance
| Error _ as e -> e

View File

@ -155,17 +155,16 @@ type 'token state = <
The type [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any).
The call [read ?line ?block ~init ~scan ~token_to_region ~style
The call [read ?line ?block ~scan ~token_to_region ~style
input] evaluates in a lexer (also known as a tokeniser or scanner)
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion. The argument
labelled [~init] is the scanner to be called first, usually for
reading the BOM, then [scan] is used for the following calls. The
function labelled [~style] is used to check stylistic constraints
on the tokens and the markup between them.
*)
markup to a given channel, at the caller's discretion. The function
labelled [~scan] is the main scanner of the lexer. The function
labelled [~style] is used to check stylistic constraints on the
tokens and the markup between them.
*)
type input =
File of file_path
@ -194,7 +193,6 @@ val lexbuf_from_input :
val open_token_stream :
?line:EvalOpt.line_comment ->
?block:EvalOpt.block_comment ->
init:('token state -> Lexing.lexbuf -> 'token state) ->
scan:('token state -> Lexing.lexbuf -> 'token state) ->
token_to_region:('token -> Region.t) ->
style:('token ->

View File

@ -69,7 +69,6 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
~token_to_region ~style input command :
(unit, string Region.reg) Stdlib.result =
match LexerLib.open_token_stream
~init:Lexer.init
~scan:Lexer.scan
~token_to_region
~style

View File

@ -43,7 +43,6 @@ module Make (IO: IO) (Lexer: Lexer.S) =
match LexerLib.open_token_stream
?line:IO.options#line
?block:IO.options#block
~init:Lexer.init
~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context

View File

@ -234,7 +234,6 @@ module Make (Lexer: Lexer.S)
let () = close () in
let input' = LexerLib.String (Buffer.contents buffer) in
match LexerLib.open_token_stream
~init:Lexer.init
~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context