Simplified the interface of the lexer by not exporting the scanner
[init] (which reads the BOM, if any).
This commit is contained in:
parent
de7864a500
commit
6ce6ebfec3
@ -119,10 +119,8 @@ module type S =
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
(* The scanner [init] is meant to be called first to read the
|
||||
BOM. Then [scan] is called. *)
|
||||
(* The scanner *)
|
||||
|
||||
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||
|
||||
(* Errors (specific to the generic lexer, not to the tokens) *)
|
||||
|
@ -85,8 +85,8 @@ module type S =
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||
val scan :
|
||||
token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||
|
||||
type error
|
||||
|
||||
@ -601,6 +601,14 @@ and scan_utf8_inline thread state = parse
|
||||
|
||||
{
|
||||
(* START TRAILER *)
|
||||
|
||||
let scan =
|
||||
let first_call = ref true in
|
||||
fun state lexbuf ->
|
||||
if !first_call
|
||||
then (first_call := false; init state lexbuf)
|
||||
else scan state lexbuf
|
||||
|
||||
end (* of functor [Make] in HEADER *)
|
||||
(* END TRAILER *)
|
||||
}
|
||||
|
@ -151,11 +151,6 @@ let mk_thread region lexeme : thread =
|
||||
the scanning rule [scan]). The function [patch_buffer] is, of
|
||||
course, also called just before returning the token, so the parser
|
||||
has a view of the lexing buffer consistent with the token.
|
||||
|
||||
Note that an additional reference [first_call] is needed to
|
||||
distinguish the first call to the function [scan], as the first
|
||||
scanning rule is actually [init] (which can handle the BOM), not
|
||||
[scan].
|
||||
*)
|
||||
|
||||
type 'token window =
|
||||
@ -319,14 +314,13 @@ let lexbuf_from_input = function
|
||||
in Ok (lexbuf, close)
|
||||
with Sys_error msg -> Stdlib.Error (File_opening msg)
|
||||
|
||||
let open_token_stream ?line ?block ~init ~scan
|
||||
let open_token_stream ?line ?block ~scan
|
||||
~token_to_region ~style input =
|
||||
let file_path = match input with
|
||||
File path -> path
|
||||
| _ -> "" in
|
||||
let pos = Pos.min ~file:file_path in
|
||||
let buf_reg = ref (pos#byte, pos#byte)
|
||||
and first_call = ref true
|
||||
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
||||
let supply = Uutf.Manual.src decoder in
|
||||
let state = ref (mk_state
|
||||
@ -354,31 +348,29 @@ let open_token_stream ?line ?block ~init ~scan
|
||||
and save_region buffer =
|
||||
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
||||
|
||||
let scan' init scan buffer =
|
||||
let scan' scan buffer =
|
||||
patch_buffer !buf_reg buffer;
|
||||
(if !first_call
|
||||
then (state := init !state buffer; first_call := false)
|
||||
else state := scan !state buffer);
|
||||
state := scan !state buffer;
|
||||
save_region buffer in
|
||||
|
||||
let next_token init scan buffer =
|
||||
scan' init scan buffer;
|
||||
let next_token scan buffer =
|
||||
scan' scan buffer;
|
||||
match FQueue.peek !state#units with
|
||||
None -> None
|
||||
| Some (units, ext_token) ->
|
||||
state := !state#set_units units; Some ext_token in
|
||||
|
||||
let rec read init scan ~token_to_region ~style ~log buffer =
|
||||
let rec read scan ~token_to_region ~style ~log buffer =
|
||||
match FQueue.deq !state#units with
|
||||
None ->
|
||||
scan' init scan buffer;
|
||||
read init scan ~token_to_region ~style ~log buffer
|
||||
scan' scan buffer;
|
||||
read scan ~token_to_region ~style ~log buffer
|
||||
| Some (units, (left_mark, token)) ->
|
||||
log left_mark token;
|
||||
state := ((!state#set_units units)
|
||||
#set_last (token_to_region token))
|
||||
#slide_token token;
|
||||
style token (next_token init scan) buffer;
|
||||
style token (next_token scan) buffer;
|
||||
patch_buffer (token_to_region token)#byte_pos buffer;
|
||||
token in
|
||||
|
||||
@ -389,7 +381,7 @@ let open_token_stream ?line ?block ~init ~scan
|
||||
File path when path <> "" -> reset ~file:path buffer
|
||||
| _ -> () in
|
||||
let instance = {
|
||||
read = read init scan ~token_to_region ~style;
|
||||
read = read scan ~token_to_region ~style;
|
||||
input; buffer; get_win; get_pos; get_last; get_file; close}
|
||||
in Ok instance
|
||||
| Error _ as e -> e
|
||||
|
@ -155,16 +155,15 @@ type 'token state = <
|
||||
The type [window] is a two-token window, that is, a buffer that
|
||||
contains the last recognised token, and the penultimate (if any).
|
||||
|
||||
The call [read ?line ?block ~init ~scan ~token_to_region ~style
|
||||
The call [read ?line ?block ~scan ~token_to_region ~style
|
||||
input] evaluates in a lexer (also known as a tokeniser or scanner)
|
||||
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
|
||||
suitable for a parser generated by Menhir. The argument labelled
|
||||
[log] is a logger, that is, it may print a token and its left
|
||||
markup to a given channel, at the caller's discretion. The argument
|
||||
labelled [~init] is the scanner to be called first, usually for
|
||||
reading the BOM, then [scan] is used for the following calls. The
|
||||
function labelled [~style] is used to check stylistic constraints
|
||||
on the tokens and the markup between them.
|
||||
markup to a given channel, at the caller's discretion. The function
|
||||
labelled [~scan] is the main scanner of the lexer. The function
|
||||
labelled [~style] is used to check stylistic constraints on the
|
||||
tokens and the markup between them.
|
||||
*)
|
||||
|
||||
type input =
|
||||
@ -194,7 +193,6 @@ val lexbuf_from_input :
|
||||
val open_token_stream :
|
||||
?line:EvalOpt.line_comment ->
|
||||
?block:EvalOpt.block_comment ->
|
||||
init:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||
scan:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||
token_to_region:('token -> Region.t) ->
|
||||
style:('token ->
|
||||
|
@ -69,7 +69,6 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
||||
~token_to_region ~style input command :
|
||||
(unit, string Region.reg) Stdlib.result =
|
||||
match LexerLib.open_token_stream
|
||||
~init:Lexer.init
|
||||
~scan:Lexer.scan
|
||||
~token_to_region
|
||||
~style
|
||||
|
@ -43,7 +43,6 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
||||
match LexerLib.open_token_stream
|
||||
?line:IO.options#line
|
||||
?block:IO.options#block
|
||||
~init:Lexer.init
|
||||
~scan:Lexer.scan
|
||||
~token_to_region:Lexer.Token.to_region
|
||||
~style:Lexer.Token.check_right_context
|
||||
|
@ -234,7 +234,6 @@ module Make (Lexer: Lexer.S)
|
||||
let () = close () in
|
||||
let input' = LexerLib.String (Buffer.contents buffer) in
|
||||
match LexerLib.open_token_stream
|
||||
~init:Lexer.init
|
||||
~scan:Lexer.scan
|
||||
~token_to_region:Lexer.Token.to_region
|
||||
~style:Lexer.Token.check_right_context
|
||||
|
Loading…
Reference in New Issue
Block a user