Simplified the interface of the lexer by not exporting the scanner

[init] (which reads the BOM, if any).
This commit is contained in:
Christian Rinderknecht 2020-04-28 21:17:34 +02:00
parent de7864a500
commit 6ce6ebfec3
7 changed files with 33 additions and 40 deletions

View File

@ -119,10 +119,8 @@ module type S =
module Token : TOKEN module Token : TOKEN
type token = Token.token type token = Token.token
(* The scanner [init] is meant to be called first to read the (* The scanner *)
BOM. Then [scan] is called. *)
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
(* Errors (specific to the generic lexer, not to the tokens) *) (* Errors (specific to the generic lexer, not to the tokens) *)

View File

@ -85,8 +85,8 @@ module type S =
module Token : TOKEN module Token : TOKEN
type token = Token.token type token = Token.token
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state val scan :
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
type error type error
@ -601,6 +601,14 @@ and scan_utf8_inline thread state = parse
{ {
(* START TRAILER *) (* START TRAILER *)
let scan =
let first_call = ref true in
fun state lexbuf ->
if !first_call
then (first_call := false; init state lexbuf)
else scan state lexbuf
end (* of functor [Make] in HEADER *) end (* of functor [Make] in HEADER *)
(* END TRAILER *) (* END TRAILER *)
} }

View File

@ -151,11 +151,6 @@ let mk_thread region lexeme : thread =
the scanning rule [scan]). The function [patch_buffer] is, of the scanning rule [scan]). The function [patch_buffer] is, of
course, also called just before returning the token, so the parser course, also called just before returning the token, so the parser
has a view of the lexing buffer consistent with the token. has a view of the lexing buffer consistent with the token.
Note that an additional reference [first_call] is needed to
distinguish the first call to the function [scan], as the first
scanning rule is actually [init] (which can handle the BOM), not
[scan].
*) *)
type 'token window = type 'token window =
@ -319,14 +314,13 @@ let lexbuf_from_input = function
in Ok (lexbuf, close) in Ok (lexbuf, close)
with Sys_error msg -> Stdlib.Error (File_opening msg) with Sys_error msg -> Stdlib.Error (File_opening msg)
let open_token_stream ?line ?block ~init ~scan let open_token_stream ?line ?block ~scan
~token_to_region ~style input = ~token_to_region ~style input =
let file_path = match input with let file_path = match input with
File path -> path File path -> path
| _ -> "" in | _ -> "" in
let pos = Pos.min ~file:file_path in let pos = Pos.min ~file:file_path in
let buf_reg = ref (pos#byte, pos#byte) let buf_reg = ref (pos#byte, pos#byte)
and first_call = ref true
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
let supply = Uutf.Manual.src decoder in let supply = Uutf.Manual.src decoder in
let state = ref (mk_state let state = ref (mk_state
@ -354,33 +348,31 @@ let open_token_stream ?line ?block ~init ~scan
and save_region buffer = and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
let scan' init scan buffer = let scan' scan buffer =
patch_buffer !buf_reg buffer; patch_buffer !buf_reg buffer;
(if !first_call state := scan !state buffer;
then (state := init !state buffer; first_call := false)
else state := scan !state buffer);
save_region buffer in save_region buffer in
let next_token init scan buffer = let next_token scan buffer =
scan' init scan buffer; scan' scan buffer;
match FQueue.peek !state#units with match FQueue.peek !state#units with
None -> None None -> None
| Some (units, ext_token) -> | Some (units, ext_token) ->
state := !state#set_units units; Some ext_token in state := !state#set_units units; Some ext_token in
let rec read init scan ~token_to_region ~style ~log buffer = let rec read scan ~token_to_region ~style ~log buffer =
match FQueue.deq !state#units with match FQueue.deq !state#units with
None -> None ->
scan' init scan buffer; scan' scan buffer;
read init scan ~token_to_region ~style ~log buffer read scan ~token_to_region ~style ~log buffer
| Some (units, (left_mark, token)) -> | Some (units, (left_mark, token)) ->
log left_mark token; log left_mark token;
state := ((!state#set_units units) state := ((!state#set_units units)
#set_last (token_to_region token)) #set_last (token_to_region token))
#slide_token token; #slide_token token;
style token (next_token init scan) buffer; style token (next_token scan) buffer;
patch_buffer (token_to_region token)#byte_pos buffer; patch_buffer (token_to_region token)#byte_pos buffer;
token in token in
match lexbuf_from_input input with match lexbuf_from_input input with
Ok (buffer, close) -> Ok (buffer, close) ->
@ -389,7 +381,7 @@ let open_token_stream ?line ?block ~init ~scan
File path when path <> "" -> reset ~file:path buffer File path when path <> "" -> reset ~file:path buffer
| _ -> () in | _ -> () in
let instance = { let instance = {
read = read init scan ~token_to_region ~style; read = read scan ~token_to_region ~style;
input; buffer; get_win; get_pos; get_last; get_file; close} input; buffer; get_win; get_pos; get_last; get_file; close}
in Ok instance in Ok instance
| Error _ as e -> e | Error _ as e -> e

View File

@ -155,17 +155,16 @@ type 'token state = <
The type [window] is a two-token window, that is, a buffer that The type [window] is a two-token window, that is, a buffer that
contains the last recognised token, and the penultimate (if any). contains the last recognised token, and the penultimate (if any).
The call [read ?line ?block ~init ~scan ~token_to_region ~style The call [read ?line ?block ~scan ~token_to_region ~style
input] evaluates in a lexer (also known as a tokeniser or scanner) input] evaluates in a lexer (also known as a tokeniser or scanner)
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
suitable for a parser generated by Menhir. The argument labelled suitable for a parser generated by Menhir. The argument labelled
[log] is a logger, that is, it may print a token and its left [log] is a logger, that is, it may print a token and its left
markup to a given channel, at the caller's discretion. The argument markup to a given channel, at the caller's discretion. The function
labelled [~init] is the scanner to be called first, usually for labelled [~scan] is the main scanner of the lexer. The function
reading the BOM, then [scan] is used for the following calls. The labelled [~style] is used to check stylistic constraints on the
function labelled [~style] is used to check stylistic constraints tokens and the markup between them.
on the tokens and the markup between them. *)
*)
type input = type input =
File of file_path File of file_path
@ -194,7 +193,6 @@ val lexbuf_from_input :
val open_token_stream : val open_token_stream :
?line:EvalOpt.line_comment -> ?line:EvalOpt.line_comment ->
?block:EvalOpt.block_comment -> ?block:EvalOpt.block_comment ->
init:('token state -> Lexing.lexbuf -> 'token state) ->
scan:('token state -> Lexing.lexbuf -> 'token state) -> scan:('token state -> Lexing.lexbuf -> 'token state) ->
token_to_region:('token -> Region.t) -> token_to_region:('token -> Region.t) ->
style:('token -> style:('token ->

View File

@ -69,7 +69,6 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
~token_to_region ~style input command : ~token_to_region ~style input command :
(unit, string Region.reg) Stdlib.result = (unit, string Region.reg) Stdlib.result =
match LexerLib.open_token_stream match LexerLib.open_token_stream
~init:Lexer.init
~scan:Lexer.scan ~scan:Lexer.scan
~token_to_region ~token_to_region
~style ~style

View File

@ -43,7 +43,6 @@ module Make (IO: IO) (Lexer: Lexer.S) =
match LexerLib.open_token_stream match LexerLib.open_token_stream
?line:IO.options#line ?line:IO.options#line
?block:IO.options#block ?block:IO.options#block
~init:Lexer.init
~scan:Lexer.scan ~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region ~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context ~style:Lexer.Token.check_right_context

View File

@ -234,7 +234,6 @@ module Make (Lexer: Lexer.S)
let () = close () in let () = close () in
let input' = LexerLib.String (Buffer.contents buffer) in let input' = LexerLib.String (Buffer.contents buffer) in
match LexerLib.open_token_stream match LexerLib.open_token_stream
~init:Lexer.init
~scan:Lexer.scan ~scan:Lexer.scan
~token_to_region:Lexer.Token.to_region ~token_to_region:Lexer.Token.to_region
~style:Lexer.Token.check_right_context ~style:Lexer.Token.check_right_context