ligo/src/passes/1-parser/shared/ParserAPI.ml

160 lines
4.9 KiB
OCaml
Raw Normal View History

(* Generic parser for LIGO *)
module Region = Simple_utils.Region
type options = <
offsets : bool;
mode : [`Byte | `Point];
cmd : EvalOpt.command
>
module type IO =
sig
val options : options
end
module type PARSER =
sig
(* The type of tokens, abstract syntax trees and expressions *)
type token
type ast
type expr
(* This exception is raised by the monolithic API functions. *)
exception Error
(* The monolithic API. *)
val interactive_expr :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> expr
val contract :
(Lexing.lexbuf -> token) -> Lexing.lexbuf -> ast
module MenhirInterpreter :
sig
(* The incremental API. *)
include MenhirLib.IncrementalEngine.INCREMENTAL_ENGINE
with type token = token
end
(* The entry point(s) to the incremental API. *)
module Incremental :
sig
val interactive_expr :
Lexing.position -> expr MenhirInterpreter.checkpoint
val contract :
Lexing.position -> ast MenhirInterpreter.checkpoint
end
end
(* Main functor *)
module Make (IO: IO)
2020-04-28 19:26:31 +02:00
(Lexer: Lexer.S)
(Parser: PARSER with type token = Lexer.token)
(ParErr: sig val message : int -> string end) =
struct
module I = Parser.MenhirInterpreter
module S = MenhirLib.General (* Streams *)
(* The call [stack checkpoint] extracts the parser's stack out of
a checkpoint. *)
let stack = function
I.HandlingError env -> I.stack env
| _ -> assert false
(* The call [state checkpoint] extracts the number of the current
state out of a parser checkpoint. *)
let state checkpoint : int =
match Lazy.force (stack checkpoint) with
S.Nil -> 0 (* WARNING: Hack. The first state should be 0. *)
| S.Cons (I.Element (s,_,_,_),_) -> I.number s
(* The parser has successfully produced a semantic value. *)
let success v = v
(* The parser has suspended itself because of a syntax error. Stop. *)
type message = string
type valid = Parser.token
type invalid = Parser.token
type error = message * valid option * invalid
exception Point of error
let format_error ?(offsets=true) mode (msg, valid_opt, invalid) =
let invalid_region = Lexer.Token.to_region invalid in
let header =
"Parse error " ^ invalid_region#to_string ~offsets mode in
let trailer =
match valid_opt with
None ->
if Lexer.Token.is_eof invalid then ""
else let invalid_lexeme = Lexer.Token.to_lexeme invalid in
Printf.sprintf ", at \"%s\"" invalid_lexeme
| Some valid ->
let valid_lexeme = Lexer.Token.to_lexeme valid in
2020-04-13 16:31:56 +02:00
if Lexer.Token.is_eof invalid then
Printf.sprintf ", after \"%s\"" valid_lexeme
else
let invalid_lexeme = Lexer.Token.to_lexeme invalid in
2020-04-13 16:31:56 +02:00
Printf.sprintf " at \"%s\", after \"%s\""
invalid_lexeme valid_lexeme in
let header = header ^ trailer in
let msg =
header ^ (if msg = "" then ".\n" else ":\n" ^ msg)
in Region.{value=msg; region=invalid_region}
let failure get_win checkpoint =
let message = ParErr.message (state checkpoint) in
let message = if message = "<YOUR SYNTAX ERROR MESSAGE HERE>\n" then
2020-01-31 16:05:59 +01:00
(string_of_int (state checkpoint)) ^ ": <syntax error>"
else
2020-01-31 16:05:59 +01:00
message
in
match get_win () with
The preprocessor library depends now on the kinds of comments instead of a closed set of languages. I also removed the offsets: I simply use the current region to determine whether the preprocessing directie starts at the beginning of a line. I also removed scanning line indicators, to make the lexer simpler. LexToken.mll: Moved the function [check_right_context] that checks stylistic constraints from Lexer.mll to LexToken.mll. While this triplicates code (as CameLIGO, PascaLIGO and ReasonLIGO share the same constraints), the benefit is that Lexer.mll becomes more generic and the signature for the TOKEN module is simpler (no more exporting predicates, except for EOF). In accordance with the change of the preprocessor, the lexers and parsers for LIGO now depend on the kind of comments, not a fixed set of syntaxes. This gives more versatility when adding a new language: only the kinds of its comments are needed, although Lexer.mll and Preproc.mll may have to be modified if they do not already know the comment delimiters, for example line comments starting with #. **************************************************************** BUG: The exceptions coming from LexToken.mll when a stylistic constraint is broken in [LexToken.check_right_context] are not caught yet. **************************************************************** Lexer.mll: I moved out as much as I could from the header into a new module LexerLib. The aim is to make it easy to reuse as much as possible of the lexer machinerie, when it cannot be used as is.
2020-04-24 21:06:18 +02:00
LexerLib.Nil -> assert false
| LexerLib.One invalid ->
raise (Point (message, None, invalid))
The preprocessor library depends now on the kinds of comments instead of a closed set of languages. I also removed the offsets: I simply use the current region to determine whether the preprocessing directie starts at the beginning of a line. I also removed scanning line indicators, to make the lexer simpler. LexToken.mll: Moved the function [check_right_context] that checks stylistic constraints from Lexer.mll to LexToken.mll. While this triplicates code (as CameLIGO, PascaLIGO and ReasonLIGO share the same constraints), the benefit is that Lexer.mll becomes more generic and the signature for the TOKEN module is simpler (no more exporting predicates, except for EOF). In accordance with the change of the preprocessor, the lexers and parsers for LIGO now depend on the kind of comments, not a fixed set of syntaxes. This gives more versatility when adding a new language: only the kinds of its comments are needed, although Lexer.mll and Preproc.mll may have to be modified if they do not already know the comment delimiters, for example line comments starting with #. **************************************************************** BUG: The exceptions coming from LexToken.mll when a stylistic constraint is broken in [LexToken.check_right_context] are not caught yet. **************************************************************** Lexer.mll: I moved out as much as I could from the header into a new module LexerLib. The aim is to make it easy to reuse as much as possible of the lexer machinerie, when it cannot be used as is.
2020-04-24 21:06:18 +02:00
| LexerLib.Two (invalid, valid) ->
raise (Point (message, Some valid, invalid))
(* The monolithic API of Menhir *)
let mono_contract = Parser.contract
let mono_expr = Parser.interactive_expr
(* Incremental API of Menhir *)
module Incr = Parser.Incremental
module Log = LexerLog.Make (Lexer)
let log = Log.output_token
~offsets:IO.options#offsets
IO.options#mode IO.options#cmd stdout
2020-04-28 19:26:31 +02:00
let incr_contract LexerLib.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
and failure = failure get_win in
let parser = Incr.contract buffer.Lexing.lex_curr_p in
let ast = I.loop_handle success failure supplier parser
in flush_all (); close (); ast
2020-04-28 19:26:31 +02:00
let incr_expr LexerLib.{read; buffer; get_win; close; _} =
let supplier = I.lexer_lexbuf_to_supplier (read ~log) buffer
and failure = failure get_win in
let parser = Incr.interactive_expr buffer.Lexing.lex_curr_p in
let expr = I.loop_handle success failure supplier parser
in flush_all (); close (); expr
end