Simplified the interface of the lexer by not exporting the scanner
[init] (which reads the BOM, if any).
This commit is contained in:
parent
de7864a500
commit
6ce6ebfec3
@ -119,10 +119,8 @@ module type S =
|
|||||||
module Token : TOKEN
|
module Token : TOKEN
|
||||||
type token = Token.token
|
type token = Token.token
|
||||||
|
|
||||||
(* The scanner [init] is meant to be called first to read the
|
(* The scanner *)
|
||||||
BOM. Then [scan] is called. *)
|
|
||||||
|
|
||||||
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
|
||||||
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
|
|
||||||
(* Errors (specific to the generic lexer, not to the tokens) *)
|
(* Errors (specific to the generic lexer, not to the tokens) *)
|
||||||
|
@ -85,8 +85,8 @@ module type S =
|
|||||||
module Token : TOKEN
|
module Token : TOKEN
|
||||||
type token = Token.token
|
type token = Token.token
|
||||||
|
|
||||||
val init : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
val scan :
|
||||||
val scan : token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
||||||
|
|
||||||
type error
|
type error
|
||||||
|
|
||||||
@ -601,6 +601,14 @@ and scan_utf8_inline thread state = parse
|
|||||||
|
|
||||||
{
|
{
|
||||||
(* START TRAILER *)
|
(* START TRAILER *)
|
||||||
|
|
||||||
|
let scan =
|
||||||
|
let first_call = ref true in
|
||||||
|
fun state lexbuf ->
|
||||||
|
if !first_call
|
||||||
|
then (first_call := false; init state lexbuf)
|
||||||
|
else scan state lexbuf
|
||||||
|
|
||||||
end (* of functor [Make] in HEADER *)
|
end (* of functor [Make] in HEADER *)
|
||||||
(* END TRAILER *)
|
(* END TRAILER *)
|
||||||
}
|
}
|
||||||
|
@ -151,11 +151,6 @@ let mk_thread region lexeme : thread =
|
|||||||
the scanning rule [scan]). The function [patch_buffer] is, of
|
the scanning rule [scan]). The function [patch_buffer] is, of
|
||||||
course, also called just before returning the token, so the parser
|
course, also called just before returning the token, so the parser
|
||||||
has a view of the lexing buffer consistent with the token.
|
has a view of the lexing buffer consistent with the token.
|
||||||
|
|
||||||
Note that an additional reference [first_call] is needed to
|
|
||||||
distinguish the first call to the function [scan], as the first
|
|
||||||
scanning rule is actually [init] (which can handle the BOM), not
|
|
||||||
[scan].
|
|
||||||
*)
|
*)
|
||||||
|
|
||||||
type 'token window =
|
type 'token window =
|
||||||
@ -319,14 +314,13 @@ let lexbuf_from_input = function
|
|||||||
in Ok (lexbuf, close)
|
in Ok (lexbuf, close)
|
||||||
with Sys_error msg -> Stdlib.Error (File_opening msg)
|
with Sys_error msg -> Stdlib.Error (File_opening msg)
|
||||||
|
|
||||||
let open_token_stream ?line ?block ~init ~scan
|
let open_token_stream ?line ?block ~scan
|
||||||
~token_to_region ~style input =
|
~token_to_region ~style input =
|
||||||
let file_path = match input with
|
let file_path = match input with
|
||||||
File path -> path
|
File path -> path
|
||||||
| _ -> "" in
|
| _ -> "" in
|
||||||
let pos = Pos.min ~file:file_path in
|
let pos = Pos.min ~file:file_path in
|
||||||
let buf_reg = ref (pos#byte, pos#byte)
|
let buf_reg = ref (pos#byte, pos#byte)
|
||||||
and first_call = ref true
|
|
||||||
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual in
|
||||||
let supply = Uutf.Manual.src decoder in
|
let supply = Uutf.Manual.src decoder in
|
||||||
let state = ref (mk_state
|
let state = ref (mk_state
|
||||||
@ -354,33 +348,31 @@ let open_token_stream ?line ?block ~init ~scan
|
|||||||
and save_region buffer =
|
and save_region buffer =
|
||||||
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p) in
|
||||||
|
|
||||||
let scan' init scan buffer =
|
let scan' scan buffer =
|
||||||
patch_buffer !buf_reg buffer;
|
patch_buffer !buf_reg buffer;
|
||||||
(if !first_call
|
state := scan !state buffer;
|
||||||
then (state := init !state buffer; first_call := false)
|
|
||||||
else state := scan !state buffer);
|
|
||||||
save_region buffer in
|
save_region buffer in
|
||||||
|
|
||||||
let next_token init scan buffer =
|
let next_token scan buffer =
|
||||||
scan' init scan buffer;
|
scan' scan buffer;
|
||||||
match FQueue.peek !state#units with
|
match FQueue.peek !state#units with
|
||||||
None -> None
|
None -> None
|
||||||
| Some (units, ext_token) ->
|
| Some (units, ext_token) ->
|
||||||
state := !state#set_units units; Some ext_token in
|
state := !state#set_units units; Some ext_token in
|
||||||
|
|
||||||
let rec read init scan ~token_to_region ~style ~log buffer =
|
let rec read scan ~token_to_region ~style ~log buffer =
|
||||||
match FQueue.deq !state#units with
|
match FQueue.deq !state#units with
|
||||||
None ->
|
None ->
|
||||||
scan' init scan buffer;
|
scan' scan buffer;
|
||||||
read init scan ~token_to_region ~style ~log buffer
|
read scan ~token_to_region ~style ~log buffer
|
||||||
| Some (units, (left_mark, token)) ->
|
| Some (units, (left_mark, token)) ->
|
||||||
log left_mark token;
|
log left_mark token;
|
||||||
state := ((!state#set_units units)
|
state := ((!state#set_units units)
|
||||||
#set_last (token_to_region token))
|
#set_last (token_to_region token))
|
||||||
#slide_token token;
|
#slide_token token;
|
||||||
style token (next_token init scan) buffer;
|
style token (next_token scan) buffer;
|
||||||
patch_buffer (token_to_region token)#byte_pos buffer;
|
patch_buffer (token_to_region token)#byte_pos buffer;
|
||||||
token in
|
token in
|
||||||
|
|
||||||
match lexbuf_from_input input with
|
match lexbuf_from_input input with
|
||||||
Ok (buffer, close) ->
|
Ok (buffer, close) ->
|
||||||
@ -389,7 +381,7 @@ let open_token_stream ?line ?block ~init ~scan
|
|||||||
File path when path <> "" -> reset ~file:path buffer
|
File path when path <> "" -> reset ~file:path buffer
|
||||||
| _ -> () in
|
| _ -> () in
|
||||||
let instance = {
|
let instance = {
|
||||||
read = read init scan ~token_to_region ~style;
|
read = read scan ~token_to_region ~style;
|
||||||
input; buffer; get_win; get_pos; get_last; get_file; close}
|
input; buffer; get_win; get_pos; get_last; get_file; close}
|
||||||
in Ok instance
|
in Ok instance
|
||||||
| Error _ as e -> e
|
| Error _ as e -> e
|
||||||
|
@ -155,17 +155,16 @@ type 'token state = <
|
|||||||
The type [window] is a two-token window, that is, a buffer that
|
The type [window] is a two-token window, that is, a buffer that
|
||||||
contains the last recognised token, and the penultimate (if any).
|
contains the last recognised token, and the penultimate (if any).
|
||||||
|
|
||||||
The call [read ?line ?block ~init ~scan ~token_to_region ~style
|
The call [read ?line ?block ~scan ~token_to_region ~style
|
||||||
input] evaluates in a lexer (also known as a tokeniser or scanner)
|
input] evaluates in a lexer (also known as a tokeniser or scanner)
|
||||||
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
|
whose type is [log:('token logger) -> Lexing.lexbuf -> 'token], and
|
||||||
suitable for a parser generated by Menhir. The argument labelled
|
suitable for a parser generated by Menhir. The argument labelled
|
||||||
[log] is a logger, that is, it may print a token and its left
|
[log] is a logger, that is, it may print a token and its left
|
||||||
markup to a given channel, at the caller's discretion. The argument
|
markup to a given channel, at the caller's discretion. The function
|
||||||
labelled [~init] is the scanner to be called first, usually for
|
labelled [~scan] is the main scanner of the lexer. The function
|
||||||
reading the BOM, then [scan] is used for the following calls. The
|
labelled [~style] is used to check stylistic constraints on the
|
||||||
function labelled [~style] is used to check stylistic constraints
|
tokens and the markup between them.
|
||||||
on the tokens and the markup between them.
|
*)
|
||||||
*)
|
|
||||||
|
|
||||||
type input =
|
type input =
|
||||||
File of file_path
|
File of file_path
|
||||||
@ -194,7 +193,6 @@ val lexbuf_from_input :
|
|||||||
val open_token_stream :
|
val open_token_stream :
|
||||||
?line:EvalOpt.line_comment ->
|
?line:EvalOpt.line_comment ->
|
||||||
?block:EvalOpt.block_comment ->
|
?block:EvalOpt.block_comment ->
|
||||||
init:('token state -> Lexing.lexbuf -> 'token state) ->
|
|
||||||
scan:('token state -> Lexing.lexbuf -> 'token state) ->
|
scan:('token state -> Lexing.lexbuf -> 'token state) ->
|
||||||
token_to_region:('token -> Region.t) ->
|
token_to_region:('token -> Region.t) ->
|
||||||
style:('token ->
|
style:('token ->
|
||||||
|
@ -69,7 +69,6 @@ module Make (Lexer: Lexer.S) : (S with module Lexer = Lexer) =
|
|||||||
~token_to_region ~style input command :
|
~token_to_region ~style input command :
|
||||||
(unit, string Region.reg) Stdlib.result =
|
(unit, string Region.reg) Stdlib.result =
|
||||||
match LexerLib.open_token_stream
|
match LexerLib.open_token_stream
|
||||||
~init:Lexer.init
|
|
||||||
~scan:Lexer.scan
|
~scan:Lexer.scan
|
||||||
~token_to_region
|
~token_to_region
|
||||||
~style
|
~style
|
||||||
|
@ -43,7 +43,6 @@ module Make (IO: IO) (Lexer: Lexer.S) =
|
|||||||
match LexerLib.open_token_stream
|
match LexerLib.open_token_stream
|
||||||
?line:IO.options#line
|
?line:IO.options#line
|
||||||
?block:IO.options#block
|
?block:IO.options#block
|
||||||
~init:Lexer.init
|
|
||||||
~scan:Lexer.scan
|
~scan:Lexer.scan
|
||||||
~token_to_region:Lexer.Token.to_region
|
~token_to_region:Lexer.Token.to_region
|
||||||
~style:Lexer.Token.check_right_context
|
~style:Lexer.Token.check_right_context
|
||||||
|
@ -234,7 +234,6 @@ module Make (Lexer: Lexer.S)
|
|||||||
let () = close () in
|
let () = close () in
|
||||||
let input' = LexerLib.String (Buffer.contents buffer) in
|
let input' = LexerLib.String (Buffer.contents buffer) in
|
||||||
match LexerLib.open_token_stream
|
match LexerLib.open_token_stream
|
||||||
~init:Lexer.init
|
|
||||||
~scan:Lexer.scan
|
~scan:Lexer.scan
|
||||||
~token_to_region:Lexer.Token.to_region
|
~token_to_region:Lexer.Token.to_region
|
||||||
~style:Lexer.Token.check_right_context
|
~style:Lexer.Token.check_right_context
|
||||||
|
Loading…
Reference in New Issue
Block a user