2019-05-12 20:56:22 +00:00
|
|
|
(* Lexer specification for LIGO, to be processed by [ocamllex]. *)
|
|
|
|
|
|
|
|
{
|
Refactoring of comments (for [dune build @doc]).
Refactoring of parsing command-line arguments
* The type [options] is now abstract and implemented as an
object type to avoid struggling with scoping and type
inference when record types share some common field names.
Refactoring of ParserLog for PascaLIGO and CameLIGO
* The immediate motivation behind that refactoring was to
remove the use of a couple of global references. A
consequence is that we have a nicer and more compact code, by
threading a state. The files [pascaligo/Tests/pp.ligo] and
[ligodity/Tests/pp.mligo].
* Another consequence is that the choice of making strings from
AST nodes depends on the CLI (offsets? mode?). After this
refactoring, that choice is hardcoded in the simplifiers in a
few places (TODO), waiting for a general solution that would
have all CL options flow through the compiler.
* I removed the use of vendors [x_option.ml], [x_map.ml] and
[x_list.ml] when handling optional values. (Less dependencies
this way.)
Refactoring of the ASTs
* I removed the node [local_decl], which was set to [[]]
already in a previous commit (which removed local
declarations as being redundant, as statements could already
be instructions or declarations).
* I changed [StrLit] to [String] in the AST of CameLIGO and
ReasonLIGO.
* I also changed the type [fun_expr] so now either a block is
present, and therefore followed by the [with] keyword, or it
is not. (Before, the presence of a block was not enforced in
the type with the presence of the keyword.)
Notes
* [LexerMain.ml] and [ParserMain.ml] for CameLIGO and PascaLIGO
are almost identical and differ in the same way (language
name and file extension), which suggests that they should be
in the [shared] folder and instanciated as a functor in the
future (TODO).
* I removed the blank characters at the end of many lines in
the parser of ReasonLIGO.
2019-12-13 12:21:52 +01:00
|
|
|
[@@@warning "-42"]
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
module Region = Simple_utils.Region
|
|
|
|
module Pos = Simple_utils.Pos
|
|
|
|
|
|
|
|
(* START HEADER *)
|
|
|
|
|
|
|
|
(* TOKENS *)
|
|
|
|
|
|
|
|
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
|
|
|
can be a functor over tokens. Consequently, generic functions to
|
|
|
|
construct tokens are provided. Note predicate [is_eof], which
|
|
|
|
caracterises the virtual token for end-of-file, because it requires
|
|
|
|
special handling. *)
|
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
type lexeme = string
|
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
module type TOKEN =
|
|
|
|
sig
|
|
|
|
type token
|
|
|
|
|
|
|
|
(* Errors *)
|
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
type int_err = Non_canonical_zero
|
|
|
|
type ident_err = Reserved_name
|
|
|
|
type nat_err = Invalid_natural
|
|
|
|
| Non_canonical_zero_nat
|
|
|
|
type sym_err = Invalid_symbol
|
2020-01-16 19:36:04 +00:00
|
|
|
type attr_err = Invalid_attribute
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
(* Injections *)
|
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
|
|
|
val mk_nat : lexeme -> Region.t -> (token, nat_err) result
|
|
|
|
val mk_mutez : lexeme -> Region.t -> (token, int_err) result
|
|
|
|
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
|
|
|
val mk_sym : lexeme -> Region.t -> (token, sym_err) result
|
|
|
|
val mk_string : lexeme -> Region.t -> token
|
|
|
|
val mk_verbatim : lexeme -> Region.t -> token
|
|
|
|
val mk_bytes : lexeme -> Region.t -> token
|
|
|
|
val mk_constr : lexeme -> Region.t -> token
|
|
|
|
val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result
|
|
|
|
val eof : Region.t -> token
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
(* Predicates *)
|
|
|
|
|
|
|
|
val is_eof : token -> bool
|
|
|
|
|
|
|
|
(* Projections *)
|
|
|
|
|
|
|
|
val to_lexeme : token -> lexeme
|
|
|
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
|
|
|
val to_region : token -> Region.t
|
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
(* Style *)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2019-12-20 16:44:03 +01:00
|
|
|
type error
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-01-04 17:47:20 +01:00
|
|
|
val error_to_string : error -> string
|
2020-01-04 08:24:16 +00:00
|
|
|
|
2019-12-20 16:44:03 +01:00
|
|
|
exception Error of error Region.reg
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-01-20 10:57:07 +01:00
|
|
|
val format_error :
|
|
|
|
?offsets:bool -> [`Byte | `Point] ->
|
2020-01-23 18:28:04 +01:00
|
|
|
error Region.reg -> file:bool -> string Region.reg
|
2020-04-24 21:06:18 +02:00
|
|
|
|
|
|
|
val check_right_context :
|
|
|
|
token ->
|
|
|
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
|
|
|
Lexing.lexbuf ->
|
|
|
|
unit
|
2019-12-20 16:44:03 +01:00
|
|
|
end
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
(* The functorised interface
|
|
|
|
|
|
|
|
Note that the module parameter [Token] is re-exported as a
|
|
|
|
submodule in [S].
|
|
|
|
*)
|
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
module type S =
|
|
|
|
sig
|
|
|
|
module Token : TOKEN
|
2019-05-12 20:56:22 +00:00
|
|
|
type token = Token.token
|
|
|
|
|
2020-04-28 21:17:34 +02:00
|
|
|
val scan :
|
|
|
|
token LexerLib.state -> Lexing.lexbuf -> token LexerLib.state
|
2020-04-28 19:26:31 +02:00
|
|
|
|
|
|
|
type error
|
|
|
|
|
|
|
|
val error_to_string : error -> string
|
|
|
|
|
|
|
|
exception Error of error Region.reg
|
|
|
|
|
|
|
|
val format_error :
|
|
|
|
?offsets:bool -> [`Byte | `Point] ->
|
|
|
|
error Region.reg -> file:bool -> string Region.reg
|
|
|
|
end
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
module Make (Token : TOKEN) : (S with module Token = Token) =
|
|
|
|
struct
|
|
|
|
module Token = Token
|
|
|
|
type token = Token.token
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
(* ERRORS *)
|
|
|
|
|
2019-12-17 14:56:16 +01:00
|
|
|
type error =
|
|
|
|
Invalid_utf8_sequence
|
|
|
|
| Unexpected_character of char
|
|
|
|
| Undefined_escape_sequence
|
|
|
|
| Unterminated_string
|
2020-04-27 11:31:16 +02:00
|
|
|
| Unterminated_verbatim
|
2020-04-08 20:24:34 +02:00
|
|
|
| Unterminated_comment of string
|
2019-12-17 14:56:16 +01:00
|
|
|
| Non_canonical_zero
|
|
|
|
| Broken_string
|
|
|
|
| Invalid_character_in_string
|
2020-01-04 08:24:16 +00:00
|
|
|
| Reserved_name of string
|
2019-12-17 14:56:16 +01:00
|
|
|
| Invalid_symbol
|
|
|
|
| Invalid_natural
|
2020-01-16 19:36:04 +00:00
|
|
|
| Invalid_attribute
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
let sprintf = Printf.sprintf
|
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
let error_to_string = function
|
|
|
|
Invalid_utf8_sequence ->
|
2020-04-01 19:22:46 +02:00
|
|
|
"Invalid UTF-8 sequence."
|
2019-05-12 20:56:22 +00:00
|
|
|
| Unexpected_character c ->
|
2020-04-01 19:22:46 +02:00
|
|
|
sprintf "Unexpected character '%s'." (Char.escaped c)
|
2019-05-12 20:56:22 +00:00
|
|
|
| Undefined_escape_sequence ->
|
|
|
|
"Undefined escape sequence.\n\
|
2020-04-01 19:22:46 +02:00
|
|
|
Hint: Remove or replace the sequence."
|
2020-04-28 19:26:31 +02:00
|
|
|
| Unterminated_string ->
|
2019-05-12 20:56:22 +00:00
|
|
|
"Unterminated string.\n\
|
2020-04-01 19:22:46 +02:00
|
|
|
Hint: Close with double quotes."
|
2020-04-27 11:31:16 +02:00
|
|
|
| Unterminated_verbatim ->
|
|
|
|
"Unterminated verbatim.\n\
|
|
|
|
Hint: Close with \"|}\"."
|
2020-04-28 19:26:31 +02:00
|
|
|
| Unterminated_comment ending ->
|
2020-04-08 20:24:34 +02:00
|
|
|
sprintf "Unterminated comment.\n\
|
|
|
|
Hint: Close with \"%s\"." ending
|
2019-05-12 20:56:22 +00:00
|
|
|
| Non_canonical_zero ->
|
|
|
|
"Non-canonical zero.\n\
|
2020-04-01 19:22:46 +02:00
|
|
|
Hint: Use 0."
|
2019-05-12 20:56:22 +00:00
|
|
|
| Broken_string ->
|
|
|
|
"The string starting here is interrupted by a line break.\n\
|
2020-01-31 12:31:25 +01:00
|
|
|
Hint: Remove the break, close the string before or insert a \
|
2020-04-01 19:22:46 +02:00
|
|
|
backslash."
|
2019-05-12 20:56:22 +00:00
|
|
|
| Invalid_character_in_string ->
|
|
|
|
"Invalid character in string.\n\
|
2020-04-01 19:22:46 +02:00
|
|
|
Hint: Remove or replace the character."
|
2020-01-04 08:24:16 +00:00
|
|
|
| Reserved_name s ->
|
2020-01-31 12:31:25 +01:00
|
|
|
sprintf "Reserved name: \"%s\".\n\
|
2020-04-01 19:22:46 +02:00
|
|
|
Hint: Change the name." s
|
2019-10-12 23:42:26 +02:00
|
|
|
| Invalid_symbol ->
|
|
|
|
"Invalid symbol.\n\
|
2020-04-01 19:22:46 +02:00
|
|
|
Hint: Check the LIGO syntax you use."
|
2019-09-27 13:33:25 +00:00
|
|
|
| Invalid_natural ->
|
2020-04-01 19:22:46 +02:00
|
|
|
"Invalid natural number."
|
2020-01-20 10:57:07 +01:00
|
|
|
| Invalid_attribute ->
|
2020-01-16 19:36:04 +00:00
|
|
|
"Invalid attribute."
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2019-12-17 14:56:16 +01:00
|
|
|
exception Error of error Region.reg
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2019-12-20 16:44:03 +01:00
|
|
|
let format_error ?(offsets=true) mode Region.{region; value} ~file =
|
2020-01-20 10:57:07 +01:00
|
|
|
let msg = error_to_string value
|
2020-01-23 18:28:04 +01:00
|
|
|
and reg = region#to_string ~file ~offsets mode in
|
2020-04-01 19:22:46 +02:00
|
|
|
let value = sprintf "Lexical error %s:\n%s\n" reg msg
|
2020-01-23 18:28:04 +01:00
|
|
|
in Region.{value; region}
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
let fail region value = raise (Error Region.{region; value})
|
|
|
|
|
|
|
|
(* TOKENS *)
|
|
|
|
|
|
|
|
(* Making tokens *)
|
|
|
|
|
|
|
|
let mk_string (thread, state) =
|
2020-04-24 21:06:18 +02:00
|
|
|
let start = thread#opening#start in
|
|
|
|
let stop = state#pos in
|
2019-05-12 20:56:22 +00:00
|
|
|
let region = Region.make ~start ~stop in
|
2020-04-24 21:06:18 +02:00
|
|
|
let lexeme = thread#to_string in
|
2019-05-12 20:56:22 +00:00
|
|
|
let token = Token.mk_string lexeme region
|
2020-04-24 21:06:18 +02:00
|
|
|
in state#enqueue token
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
let mk_verbatim (thread, state) =
|
|
|
|
let start = thread#opening#start in
|
|
|
|
let stop = state#pos in
|
|
|
|
let region = Region.make ~start ~stop in
|
|
|
|
let lexeme = thread#to_string in
|
|
|
|
let token = Token.mk_verbatim lexeme region
|
|
|
|
in state#enqueue token
|
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
let mk_bytes bytes state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, _, state = state#sync buffer in
|
2019-05-12 20:56:22 +00:00
|
|
|
let token = Token.mk_bytes bytes region
|
2020-04-24 21:06:18 +02:00
|
|
|
in state#enqueue token
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
let mk_int state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2019-05-12 20:56:22 +00:00
|
|
|
match Token.mk_int lexeme region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2019-05-12 20:56:22 +00:00
|
|
|
| Error Token.Non_canonical_zero ->
|
|
|
|
fail region Non_canonical_zero
|
|
|
|
|
|
|
|
let mk_nat state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2019-05-12 20:56:22 +00:00
|
|
|
match Token.mk_nat lexeme region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2019-09-27 13:33:25 +00:00
|
|
|
| Error Token.Non_canonical_zero_nat ->
|
2019-05-12 20:56:22 +00:00
|
|
|
fail region Non_canonical_zero
|
2019-09-27 13:33:25 +00:00
|
|
|
| Error Token.Invalid_natural ->
|
|
|
|
fail region Invalid_natural
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2019-10-27 11:50:24 -05:00
|
|
|
let mk_mutez state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2019-10-27 11:50:24 -05:00
|
|
|
match Token.mk_mutez lexeme region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2019-05-12 20:56:22 +00:00
|
|
|
| Error Token.Non_canonical_zero ->
|
|
|
|
fail region Non_canonical_zero
|
|
|
|
|
2020-02-21 17:16:53 +01:00
|
|
|
let mk_tez state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2019-09-27 13:33:25 +00:00
|
|
|
let lexeme = Str.string_before lexeme (String.index lexeme 't') in
|
|
|
|
let lexeme = Z.mul (Z.of_int 1_000_000) (Z.of_string lexeme) in
|
2019-10-27 11:50:24 -05:00
|
|
|
match Token.mk_mutez (Z.to_string lexeme ^ "mutez") region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2019-09-27 13:33:25 +00:00
|
|
|
| Error Token.Non_canonical_zero ->
|
|
|
|
fail region Non_canonical_zero
|
|
|
|
|
2020-02-21 17:16:53 +01:00
|
|
|
let format_tez s =
|
2019-09-27 13:33:25 +00:00
|
|
|
match String.index s '.' with
|
|
|
|
index ->
|
|
|
|
let len = String.length s in
|
|
|
|
let integral = Str.first_chars s index
|
|
|
|
and fractional = Str.last_chars s (len-index-1) in
|
|
|
|
let num = Z.of_string (integral ^ fractional)
|
|
|
|
and den = Z.of_string ("1" ^ String.make (len-index-1) '0')
|
|
|
|
and million = Q.of_string "1000000" in
|
2020-01-20 10:57:07 +01:00
|
|
|
let mutez = Q.make num den |> Q.mul million in
|
2019-10-27 11:50:24 -05:00
|
|
|
let should_be_1 = Q.den mutez in
|
|
|
|
if Z.equal Z.one should_be_1 then Some (Q.num mutez) else None
|
2019-09-27 13:33:25 +00:00
|
|
|
| exception Not_found -> assert false
|
|
|
|
|
2020-02-21 17:16:53 +01:00
|
|
|
let mk_tez_decimal state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2020-02-21 17:16:53 +01:00
|
|
|
let lexeme = Str.(global_replace (regexp "_") "" lexeme) in
|
2019-09-27 13:33:25 +00:00
|
|
|
let lexeme = Str.string_before lexeme (String.index lexeme 't') in
|
2020-02-21 17:16:53 +01:00
|
|
|
match format_tez lexeme with
|
2020-01-27 16:05:47 +01:00
|
|
|
None -> assert false
|
|
|
|
| Some tz ->
|
|
|
|
match Token.mk_mutez (Z.to_string tz ^ "mutez") region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2019-09-27 13:33:25 +00:00
|
|
|
| Error Token.Non_canonical_zero ->
|
|
|
|
fail region Non_canonical_zero
|
2019-09-27 17:07:36 +02:00
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
let mk_ident state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2019-05-12 20:56:22 +00:00
|
|
|
match Token.mk_ident lexeme region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2020-01-04 08:24:16 +00:00
|
|
|
| Error Token.Reserved_name -> fail region (Reserved_name lexeme)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-01-20 10:57:07 +01:00
|
|
|
let mk_attr header attr state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, _, state = state#sync buffer in
|
2020-01-20 10:57:07 +01:00
|
|
|
match Token.mk_attr header attr region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2020-01-20 10:57:07 +01:00
|
|
|
| Error Token.Invalid_attribute ->
|
|
|
|
fail region Invalid_attribute
|
2020-01-16 19:36:04 +00:00
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
let mk_constr state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
|
|
|
let token = Token.mk_constr lexeme region
|
|
|
|
in state#enqueue token
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
let mk_sym state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, lexeme, state = state#sync buffer in
|
2019-10-12 23:42:26 +02:00
|
|
|
match Token.mk_sym lexeme region with
|
2020-04-24 21:06:18 +02:00
|
|
|
Ok token -> state#enqueue token
|
2019-10-12 23:42:26 +02:00
|
|
|
| Error Token.Invalid_symbol -> fail region Invalid_symbol
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
let mk_eof state buffer =
|
2020-04-24 21:06:18 +02:00
|
|
|
let region, _, state = state#sync buffer in
|
|
|
|
let token = Token.eof region
|
|
|
|
in state#enqueue token
|
2020-01-20 10:57:07 +01:00
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
(* END HEADER *)
|
|
|
|
}
|
|
|
|
|
|
|
|
(* START LEXER DEFINITION *)
|
|
|
|
|
|
|
|
(* Named regular expressions *)
|
|
|
|
|
|
|
|
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
|
|
|
|
let nl = ['\n' '\r'] | "\r\n"
|
|
|
|
let blank = ' ' | '\t'
|
|
|
|
let digit = ['0'-'9']
|
|
|
|
let natural = digit | digit (digit | '_')* digit
|
2020-02-21 17:16:53 +01:00
|
|
|
let decimal = natural '.' natural
|
2019-05-12 20:56:22 +00:00
|
|
|
let small = ['a'-'z']
|
|
|
|
let capital = ['A'-'Z']
|
|
|
|
let letter = small | capital
|
Refactoring of Ligodity (CameLIGO) and making an AST pretty-printer
- AST.ml/AST.mli:
- The AST now distinguishes the constructors `None` and `Some` as being
predefined, as in PascaLIGO. See type `AST.constr_pattern`.
- I removed the nodes specific to Liquidity,
e.g. `let%entry`, and, in particular, the natural literals
ending with `p`. Now it should be `n`, as in `10n`.
- I renamed the node `TAlias` to `TVar`.
- I have applied the rule of expanding type expressions after `of` when
those were not records.
- The type of the argument to a data constructor is now
`type_expr`, instead of `cartesian`.
- I added the patterns for bytes (`PBytes`) and natural literals (`PNat`).
- I renamed the node `Sugar` into
`PListComp` (meaning "pattern of list comprehension").
- Record types in CameLIGO now must have at least one field declaration.
- Replaced the type `closing` and `opening` with one type `compound`,
which captures only the right combinations of opening and closing.
- Components of tuples in a selection must not be written
between parentheses. For example, `a.b.(1).(0)` is now
`a.b.1.0`, as in PascaLIGO.
- LexToken.mli/LexToken.mll
- I renamed the string literal `Str` into `String`.
- I added the tokens `C_None` and `C_Some` (to distinguish the
constructors `None` and `Some`. See AST.ml)
- Fixed the function `mk_sym` so it does not fail with `failwith`, but
with `Error Invalid_symbol`.
- Lexer.mll (shared)
- I removed the character `%` from the identifiers (used to
support Liquidity, like `entry%point` and `match%nat`).
- I adde to the hint on broken strings: "or insert a backslash"
(from a Gitlab issue).
- ParToken.mly
- I added the tokens `C_None` and `C_Some` (to distinguish the
constructors `None` and `Some`. See AST.ml and LexToken.mll)
- Parser.mly
- Fixed the order of declarations in the AST (it was reversed).
- I removed syntax support for Liquidity.
- I added user-defined constructor applications to irrefutable
patterns (the ones afer a `let`), even though only the type
checker can decide that they are truly irrefutable because they
are the only constructors of their types.
- I added natural numbers and bytes to patterns.
- Access of tuple components do not require parentheses now, like
`a.b.1.0`.
- I refactored the semantic actions.
- I added the empty sequence `begin end`.
- ParserLog.ml/ParserLog.mli
- I added a pretty-printer for the AST (with source locations).
- ParserMain.ml
- The CLI for the pretty-printer is now `--verbose=ast`.
- The old CLI `--verbose=ast` is now `--verbose=ast-tokens`.
- ligodity.ml (simplifier)
- I removed the constructions of sets, lists and maps with
`Set [...]`, `List [...]` and `Map [...]`, as there are already
better ways (that is, more like the OCaml's way), like
`Set.literal [...]` and `Map.literal [...]`. (The case for lists
was entirely redundant with the rest of the language as it is.)
- Everywhere there is now a non-empty list of elements, I made a
change. In particular, I removed a corner case ("let without
binding"), thanks to more precise OCaml types for non-empty
lists.
- I ported all the changes to the AST above.
- region.ml (vendors)
- I changed the method `compact` so the end-line is not repeated
if it is the same as the start line: this is even more compact. I
use this in the new pretty-printer for the AST (see above)
- I updated all the CameLIGO contracts.
2019-11-04 23:51:47 +01:00
|
|
|
let ident = small (letter | '_' | digit)*
|
2019-05-12 20:56:22 +00:00
|
|
|
let constr = capital (letter | '_' | digit)*
|
2020-01-20 10:57:07 +01:00
|
|
|
let attr = ident | constr
|
2020-02-20 12:07:01 -06:00
|
|
|
let hexa_digit = digit | ['A'-'F' 'a'-'f']
|
2019-05-12 20:56:22 +00:00
|
|
|
let byte = hexa_digit hexa_digit
|
|
|
|
let byte_seq = byte | byte (byte | '_')* byte
|
|
|
|
let bytes = "0x" (byte_seq? as seq)
|
|
|
|
let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
|
|
|
|
| "\\r" | "\\t" | "\\x" byte
|
2020-01-27 16:05:47 +01:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
(* Symbols *)
|
|
|
|
|
|
|
|
let common_sym = ';' | ',' | '(' | ')' | '[' | ']' | '{' | '}'
|
|
|
|
| '=' | ':' | '|' | "->" | '.' | '_' | '^'
|
2020-04-15 17:15:55 +02:00
|
|
|
| '+' | '-' | '*' | '/' | '%' | '<' | "<=" | '>' | ">="
|
2020-01-27 16:05:47 +01:00
|
|
|
let pascaligo_sym = "=/=" | '#' | ":="
|
|
|
|
let cameligo_sym = "<>" | "::" | "||" | "&&"
|
2020-01-20 10:57:07 +01:00
|
|
|
let reasonligo_sym = '!' | "=>" | "!=" | "==" | "++" | "..." | "||" | "&&"
|
2019-10-12 23:42:26 +02:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
let symbol = common_sym | pascaligo_sym | cameligo_sym | reasonligo_sym
|
|
|
|
|
|
|
|
(* Comments *)
|
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
let pascaligo_block_comment_opening = "(*"
|
|
|
|
let pascaligo_block_comment_closing = "*)"
|
|
|
|
let pascaligo_line_comment = "//"
|
2020-04-24 21:06:18 +02:00
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
let cameligo_block_comment_opening = "(*"
|
|
|
|
let cameligo_block_comment_closing = "*)"
|
|
|
|
let cameligo_line_comment = "//"
|
2020-04-24 21:06:18 +02:00
|
|
|
|
|
|
|
let reasonligo_block_comment_opening = "/*"
|
|
|
|
let reasonligo_block_comment_closing = "*/"
|
|
|
|
let reasonligo_line_comment = "//"
|
|
|
|
|
|
|
|
let block_comment_openings =
|
|
|
|
pascaligo_block_comment_opening
|
|
|
|
| cameligo_block_comment_opening
|
|
|
|
| reasonligo_block_comment_opening
|
|
|
|
|
|
|
|
let block_comment_closings =
|
|
|
|
pascaligo_block_comment_closing
|
|
|
|
| cameligo_block_comment_closing
|
|
|
|
| reasonligo_block_comment_closing
|
|
|
|
|
|
|
|
let line_comments =
|
|
|
|
pascaligo_line_comment
|
|
|
|
| cameligo_line_comment
|
|
|
|
| reasonligo_line_comment
|
|
|
|
|
|
|
|
(* #include files *)
|
2019-10-12 23:42:26 +02:00
|
|
|
|
|
|
|
let string = [^'"' '\\' '\n']* (* For strings of #include *)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
(* RULES *)
|
|
|
|
|
|
|
|
(* Except for the first rule [init], all rules bear a name starting
|
|
|
|
with "scan".
|
|
|
|
|
|
|
|
All have a parameter [state] that they thread through their
|
|
|
|
recursive calls. The rules for the structured constructs (strings
|
2020-04-24 21:06:18 +02:00
|
|
|
and comments) have an extra parameter of type [thread] to record
|
|
|
|
the location where they start, and their contents (see above).
|
|
|
|
*)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
rule init state = parse
|
2020-04-24 21:06:18 +02:00
|
|
|
utf8_bom { scan (state#push_bom lexbuf) lexbuf }
|
|
|
|
| _ { LexerLib.rollback lexbuf; scan state lexbuf }
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
and scan state = parse
|
2020-04-24 21:06:18 +02:00
|
|
|
nl { scan (state#push_newline lexbuf) lexbuf }
|
|
|
|
| ' '+ { scan (state#push_space lexbuf) lexbuf }
|
|
|
|
| '\t'+ { scan (state#push_tabs lexbuf) lexbuf }
|
2020-04-28 19:26:31 +02:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
| ident { mk_ident state lexbuf }
|
|
|
|
| constr { mk_constr state lexbuf }
|
|
|
|
| bytes { mk_bytes seq state lexbuf }
|
|
|
|
| natural 'n' { mk_nat state lexbuf }
|
|
|
|
| natural "mutez" { mk_mutez state lexbuf }
|
2020-01-20 10:57:07 +01:00
|
|
|
| natural "tz"
|
2020-04-24 21:06:18 +02:00
|
|
|
| natural "tez" { mk_tez state lexbuf }
|
2020-01-20 10:57:07 +01:00
|
|
|
| decimal "tz"
|
2020-04-24 21:06:18 +02:00
|
|
|
| decimal "tez" { mk_tez_decimal state lexbuf }
|
|
|
|
| natural { mk_int state lexbuf }
|
|
|
|
| symbol { mk_sym state lexbuf }
|
|
|
|
| eof { mk_eof state lexbuf }
|
|
|
|
| "[@" (attr as a) "]" { mk_attr "[@" a state lexbuf }
|
|
|
|
| "[@@" (attr as a) "]" { mk_attr "[@@" a state lexbuf }
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-03-31 19:44:10 +02:00
|
|
|
(* Management of #include preprocessing directives
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-03-31 19:44:10 +02:00
|
|
|
An input LIGO program may contain preprocessing directives, and
|
|
|
|
the entry modules (named *Main.ml) run the preprocessor on them,
|
|
|
|
as if using the GNU C preprocessor in traditional mode:
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
|
|
|
|
|
2020-03-31 19:44:10 +02:00
|
|
|
The main interest in using a preprocessor is that it can stand
|
|
|
|
for a poor man's (flat) module system for LIGO thanks to #include
|
|
|
|
directives, and the equivalent of the traditional mode leaves the
|
|
|
|
markup undisturbed.
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-03-31 19:44:10 +02:00
|
|
|
Contrary to the C preprocessor, our preprocessor does not
|
|
|
|
generate #line resulting from processing #include directives deal
|
|
|
|
with system file headers and thus have to be ignored for our
|
2019-05-12 20:56:22 +00:00
|
|
|
purpose. Moreover, these #line directives may also carry some
|
|
|
|
additional flags:
|
|
|
|
|
|
|
|
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
|
|
|
|
|
|
|
|
of which 1 and 2 indicate, respectively, the start of a new file
|
|
|
|
and the return from a file (after its inclusion has been
|
|
|
|
processed).
|
2020-03-31 19:44:10 +02:00
|
|
|
*)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
| '#' blank* (natural as line) blank+ '"' (string as file) '"' {
|
|
|
|
let _, _, state = state#sync lexbuf in
|
2019-05-12 20:56:22 +00:00
|
|
|
let flags, state = scan_flags state [] lexbuf in
|
|
|
|
let () = ignore flags in
|
|
|
|
let line = int_of_string line
|
|
|
|
and file = Filename.basename file in
|
2020-04-24 21:06:18 +02:00
|
|
|
let pos = state#pos#set ~file ~line ~offset:0 in
|
|
|
|
let state = state#set_pos pos in
|
2020-01-20 10:57:07 +01:00
|
|
|
scan state lexbuf }
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
(* String *)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
| '"' { let opening, _, state = state#sync lexbuf in
|
|
|
|
let thread = LexerLib.mk_thread opening "" in
|
2020-04-24 21:06:18 +02:00
|
|
|
scan_string thread state lexbuf |> mk_string }
|
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
| "{|" { let opening, _, state = state#sync lexbuf in
|
|
|
|
let thread = LexerLib.mk_thread opening "" in
|
|
|
|
scan_verbatim thread state lexbuf |> mk_verbatim }
|
|
|
|
|
2020-04-28 19:26:31 +02:00
|
|
|
(* Comments *)
|
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
| block_comment_openings {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
match state#block with
|
|
|
|
Some block when block#opening = lexeme ->
|
|
|
|
let opening, _, state = state#sync lexbuf in
|
|
|
|
let thread = LexerLib.mk_thread opening lexeme in
|
|
|
|
let thread, state = scan_block block thread state lexbuf in
|
|
|
|
let state = state#push_block thread
|
|
|
|
in scan state lexbuf
|
|
|
|
| Some _ | None ->
|
|
|
|
let n = String.length lexeme in
|
|
|
|
begin
|
|
|
|
LexerLib.rollback lexbuf;
|
|
|
|
assert (n > 0);
|
|
|
|
scan (scan_n_sym n state lexbuf) lexbuf
|
|
|
|
end }
|
|
|
|
|
|
|
|
| line_comments {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
match state#line with
|
|
|
|
Some line when line = lexeme ->
|
|
|
|
let opening, _, state = state#sync lexbuf in
|
|
|
|
let thread = LexerLib.mk_thread opening lexeme in
|
|
|
|
let thread, state = scan_line thread state lexbuf in
|
|
|
|
let state = state#push_line thread
|
|
|
|
in scan state lexbuf
|
|
|
|
| Some _ | None ->
|
|
|
|
let n = String.length lexeme in
|
|
|
|
begin
|
|
|
|
LexerLib.rollback lexbuf;
|
|
|
|
scan (scan_n_sym n state lexbuf) lexbuf
|
|
|
|
end }
|
|
|
|
|
|
|
|
| _ as c { let region, _, _ = state#sync lexbuf
|
2019-05-12 20:56:22 +00:00
|
|
|
in fail region (Unexpected_character c) }
|
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
(* Scanning a series of symbols *)
|
2020-04-08 20:24:34 +02:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
and scan_n_sym n state = parse
|
|
|
|
symbol { let state = mk_sym state lexbuf in
|
|
|
|
if n = 1 then state
|
|
|
|
else scan_n_sym (n-1) state lexbuf }
|
2020-04-08 20:24:34 +02:00
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
(* Scanning #include flag *)
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
and scan_flags state acc = parse
|
2020-04-24 21:06:18 +02:00
|
|
|
blank+ { let _, _, state = state#sync lexbuf
|
2019-05-12 20:56:22 +00:00
|
|
|
in scan_flags state acc lexbuf }
|
2020-04-24 21:06:18 +02:00
|
|
|
| natural as code { let _, _, state = state#sync lexbuf in
|
2019-05-12 20:56:22 +00:00
|
|
|
let acc = int_of_string code :: acc
|
|
|
|
in scan_flags state acc lexbuf }
|
2020-04-24 21:06:18 +02:00
|
|
|
| nl { List.rev acc, state#push_newline lexbuf }
|
|
|
|
| eof { let _, _, state = state#sync lexbuf
|
|
|
|
in List.rev acc, state }
|
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
(* Finishing a string *)
|
|
|
|
|
|
|
|
and scan_string thread state = parse
|
2020-04-24 21:06:18 +02:00
|
|
|
nl { fail thread#opening Broken_string }
|
|
|
|
| eof { fail thread#opening Unterminated_string }
|
2019-05-12 20:56:22 +00:00
|
|
|
| ['\t' '\r' '\b']
|
2020-04-24 21:06:18 +02:00
|
|
|
{ let region, _, _ = state#sync lexbuf
|
|
|
|
in fail region Invalid_character_in_string }
|
|
|
|
| '"' { let _, _, state = state#sync lexbuf
|
2020-04-27 11:31:16 +02:00
|
|
|
in thread, state }
|
2020-04-24 21:06:18 +02:00
|
|
|
| esc { let _, lexeme, state = state#sync lexbuf in
|
|
|
|
let thread = thread#push_string lexeme
|
|
|
|
in scan_string thread state lexbuf }
|
|
|
|
| '\\' _ { let region, _, _ = state#sync lexbuf
|
|
|
|
in fail region Undefined_escape_sequence }
|
|
|
|
| _ as c { let _, _, state = state#sync lexbuf in
|
|
|
|
scan_string (thread#push_char c) state lexbuf }
|
2019-05-12 20:56:22 +00:00
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
and scan_verbatim thread state = parse
|
|
|
|
| eof { fail thread#opening Unterminated_verbatim}
|
|
|
|
| "|}" { let _, _, state = state#sync lexbuf
|
|
|
|
in thread, state }
|
|
|
|
| _ as c { let _, _, state = state#sync lexbuf in
|
|
|
|
scan_verbatim (thread#push_char c) state lexbuf }
|
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
(* Finishing a block comment
|
|
|
|
|
2020-04-08 20:24:34 +02:00
|
|
|
(For Emacs: ("(*") The lexing of block comments must take care of
|
|
|
|
embedded block comments that may occur within, as well as strings,
|
|
|
|
so no substring "*/" or "*)" may inadvertently close the
|
2020-04-24 21:06:18 +02:00
|
|
|
block. This is the purpose of the first case of the scanner
|
|
|
|
[scan_block].
|
2019-05-12 20:56:22 +00:00
|
|
|
*)
|
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
and scan_block block thread state = parse
|
|
|
|
'"' | block_comment_openings {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
if block#opening = lexeme || lexeme = "\""
|
|
|
|
then let opening = thread#opening in
|
|
|
|
let opening', _, state = state#sync lexbuf in
|
|
|
|
let thread = thread#push_string lexeme in
|
|
|
|
let thread = thread#set_opening opening' in
|
|
|
|
let next = if lexeme = "\"" then scan_string
|
|
|
|
else scan_block block in
|
|
|
|
let thread, state = next thread state lexbuf in
|
|
|
|
let thread = thread#set_opening opening
|
|
|
|
in scan_block block thread state lexbuf
|
|
|
|
else let () = LexerLib.rollback lexbuf in
|
|
|
|
let n = String.length lexeme in
|
|
|
|
let () = assert (n > 0) in
|
|
|
|
let state = scan_n_sym n state lexbuf
|
|
|
|
in scan_block block thread state lexbuf }
|
|
|
|
|
|
|
|
| block_comment_closings {
|
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
|
|
|
if block#closing = lexeme
|
|
|
|
then let _, _, state = state#sync lexbuf
|
|
|
|
in thread#push_string lexeme, state
|
|
|
|
else let () = LexerLib.rollback lexbuf in
|
|
|
|
let n = String.length lexeme in
|
|
|
|
let () = assert (n > 0) in
|
|
|
|
let state = scan_n_sym n state lexbuf
|
|
|
|
in scan_block block thread state lexbuf }
|
|
|
|
|
|
|
|
| nl as nl {
|
|
|
|
let () = Lexing.new_line lexbuf
|
|
|
|
and state = state#set_pos (state#pos#new_line nl)
|
|
|
|
and thread = thread#push_string nl in
|
|
|
|
scan_block block thread state lexbuf }
|
|
|
|
|
|
|
|
| eof { let err = Unterminated_comment (block#closing)
|
|
|
|
in fail thread#opening err }
|
|
|
|
|
|
|
|
| _ { let () = LexerLib.rollback lexbuf in
|
|
|
|
let len = thread#length in
|
|
|
|
let thread, status = scan_utf8 block thread state lexbuf in
|
|
|
|
let delta = thread#length - len in
|
|
|
|
let pos = state#pos#shift_one_uchar delta in
|
|
|
|
match status with
|
|
|
|
Stdlib.Ok () ->
|
|
|
|
scan_block block thread (state#set_pos pos) lexbuf
|
|
|
|
| Error error ->
|
|
|
|
let region = Region.make ~start:state#pos ~stop:pos
|
|
|
|
in fail region error }
|
2019-05-12 20:56:22 +00:00
|
|
|
|
|
|
|
(* Finishing a line comment *)
|
|
|
|
|
|
|
|
and scan_line thread state = parse
|
|
|
|
nl as nl { let () = Lexing.new_line lexbuf
|
2020-04-24 21:06:18 +02:00
|
|
|
and thread = thread#push_string nl
|
|
|
|
and state = state#set_pos (state#pos#new_line nl)
|
2019-05-12 20:56:22 +00:00
|
|
|
in thread, state }
|
2020-01-31 12:31:25 +01:00
|
|
|
| eof { thread, state }
|
2020-04-24 21:06:18 +02:00
|
|
|
| _ { let () = LexerLib.rollback lexbuf in
|
|
|
|
let len = thread#length in
|
2019-05-12 20:56:22 +00:00
|
|
|
let thread,
|
2020-04-08 20:24:34 +02:00
|
|
|
status = scan_utf8_inline thread state lexbuf in
|
2020-04-24 21:06:18 +02:00
|
|
|
let delta = thread#length - len in
|
|
|
|
let pos = state#pos#shift_one_uchar delta in
|
2019-05-12 20:56:22 +00:00
|
|
|
match status with
|
2020-04-08 20:24:34 +02:00
|
|
|
Stdlib.Ok () ->
|
2020-04-24 21:06:18 +02:00
|
|
|
scan_line thread (state#set_pos pos) lexbuf
|
2020-04-08 20:24:34 +02:00
|
|
|
| Error error ->
|
2020-04-24 21:06:18 +02:00
|
|
|
let region = Region.make ~start:state#pos ~stop:pos
|
2019-05-12 20:56:22 +00:00
|
|
|
in fail region error }
|
|
|
|
|
2020-04-24 21:06:18 +02:00
|
|
|
and scan_utf8 block thread state = parse
|
|
|
|
eof { let err = Unterminated_comment block#closing
|
|
|
|
in fail thread#opening err }
|
|
|
|
| _ as c { let thread = thread#push_char c in
|
2019-05-12 20:56:22 +00:00
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
2020-04-24 21:06:18 +02:00
|
|
|
let () = state#supply (Bytes.of_string lexeme) 0 1 in
|
|
|
|
match Uutf.decode state#decoder with
|
2020-04-08 20:24:34 +02:00
|
|
|
`Uchar _ -> thread, Stdlib.Ok ()
|
|
|
|
| `Malformed _ -> thread, Stdlib.Error Invalid_utf8_sequence
|
2020-04-24 21:06:18 +02:00
|
|
|
| `Await -> scan_utf8 block thread state lexbuf
|
2020-04-08 20:24:34 +02:00
|
|
|
| `End -> assert false }
|
|
|
|
|
|
|
|
and scan_utf8_inline thread state = parse
|
|
|
|
eof { thread, Stdlib.Ok () }
|
2020-04-24 21:06:18 +02:00
|
|
|
| _ as c { let thread = thread#push_char c in
|
2020-04-08 20:24:34 +02:00
|
|
|
let lexeme = Lexing.lexeme lexbuf in
|
2020-04-24 21:06:18 +02:00
|
|
|
let () = state#supply (Bytes.of_string lexeme) 0 1 in
|
|
|
|
match Uutf.decode state#decoder with
|
2020-04-08 20:24:34 +02:00
|
|
|
`Uchar _ -> thread, Stdlib.Ok ()
|
|
|
|
| `Malformed _ -> thread, Stdlib.Error Invalid_utf8_sequence
|
|
|
|
| `Await -> scan_utf8_inline thread state lexbuf
|
2019-05-12 20:56:22 +00:00
|
|
|
| `End -> assert false }
|
|
|
|
|
|
|
|
(* END LEXER DEFINITION *)
|
|
|
|
|
|
|
|
{
|
|
|
|
(* START TRAILER *)
|
2020-04-28 21:17:34 +02:00
|
|
|
|
|
|
|
let scan =
|
|
|
|
let first_call = ref true in
|
|
|
|
fun state lexbuf ->
|
|
|
|
if !first_call
|
|
|
|
then (first_call := false; init state lexbuf)
|
|
|
|
else scan state lexbuf
|
|
|
|
|
2019-05-12 20:56:22 +00:00
|
|
|
end (* of functor [Make] in HEADER *)
|
|
|
|
(* END TRAILER *)
|
|
|
|
}
|