2019-09-27 13:33:25 +00:00
|
|
|
(* This signature defines the lexical tokens for LIGO
|
|
|
|
|
|
|
|
_Tokens_ are the abstract units which are used by the parser to
|
|
|
|
build the abstract syntax tree (AST), in other words, the stream of
|
|
|
|
tokens is the minimal model of the input program, carrying
|
|
|
|
implicitly all its structure in a linear encoding, and nothing
|
|
|
|
else, in particular, comments and whitespace are absent.
|
|
|
|
|
|
|
|
A _lexeme_ is a specific character string (concrete
|
|
|
|
representation) denoting a token (abstract representation). Tokens
|
|
|
|
can be thought of as sets, and lexemes as elements of those sets --
|
|
|
|
there is often an infinite number of lexemes, but a small number of
|
|
|
|
tokens. (Think of identifiers as lexemes and one token.)
|
|
|
|
|
|
|
|
The tokens are qualified here as being "lexical" because the
|
|
|
|
parser generator Menhir expects to define them, in which context
|
|
|
|
they are called "parsing tokens", and they are made to match each
|
|
|
|
other. (This is an idiosyncratic terminology.)
|
|
|
|
|
|
|
|
The type of the lexical tokens is the variant [t], also
|
|
|
|
aliased to [token].
|
|
|
|
*)
|
|
|
|
|
|
|
|
module Region = Simple_utils.Region
|
|
|
|
module Pos = Simple_utils.Pos
|
|
|
|
|
|
|
|
type lexeme = string
|
|
|
|
|
|
|
|
(* TOKENS *)
|
|
|
|
|
|
|
|
type t =
|
|
|
|
(* Symbols *)
|
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
ARROW of Region.t (* "->" *)
|
|
|
|
| CONS of Region.t (* "::" *)
|
|
|
|
| CAT of Region.t (* "^" *)
|
|
|
|
(*| APPEND (* "@" *)*)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Arithmetics *)
|
|
|
|
|
2020-04-15 17:15:55 +02:00
|
|
|
| MINUS of Region.t (* "-" *)
|
|
|
|
| PLUS of Region.t (* "+" *)
|
|
|
|
| SLASH of Region.t (* "/" *)
|
|
|
|
| TIMES of Region.t (* "*" *)
|
2020-05-20 11:09:21 +02:00
|
|
|
| PERCENT of Region.t (* "%" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Compounds *)
|
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
| LPAR of Region.t (* "(" *)
|
|
|
|
| RPAR of Region.t (* ")" *)
|
|
|
|
| LBRACKET of Region.t (* "[" *)
|
|
|
|
| RBRACKET of Region.t (* "]" *)
|
|
|
|
| LBRACE of Region.t (* "{" *)
|
|
|
|
| RBRACE of Region.t (* "}" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Separators *)
|
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
| COMMA of Region.t (* "," *)
|
|
|
|
| SEMI of Region.t (* ";" *)
|
|
|
|
| VBAR of Region.t (* "|" *)
|
|
|
|
| COLON of Region.t (* ":" *)
|
|
|
|
| DOT of Region.t (* "." *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Wildcard *)
|
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
| WILD of Region.t (* "_" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Comparisons *)
|
|
|
|
|
|
|
|
| EQ of Region.t (* "=" *)
|
2019-10-12 23:42:26 +02:00
|
|
|
| NE of Region.t (* "<>" *)
|
|
|
|
| LT of Region.t (* "<" *)
|
|
|
|
| GT of Region.t (* ">" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
| LE of Region.t (* "=<" *)
|
2019-10-12 23:42:26 +02:00
|
|
|
| GE of Region.t (* ">=" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
| BOOL_OR of Region.t (* "||" *)
|
|
|
|
| BOOL_AND of Region.t (* "&&" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Identifiers, labels, numbers and strings *)
|
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
| Ident of string Region.reg
|
|
|
|
| Constr of string Region.reg
|
|
|
|
| Int of (string * Z.t) Region.reg
|
|
|
|
| Nat of (string * Z.t) Region.reg
|
|
|
|
| Mutez of (string * Z.t) Region.reg
|
|
|
|
| String of string Region.reg
|
|
|
|
| Verbatim of string Region.reg
|
|
|
|
| Bytes of (string * Hex.t) Region.reg
|
|
|
|
| Attr of string Region.reg
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Keywords *)
|
|
|
|
|
|
|
|
(*| And*)
|
2020-04-15 17:15:55 +02:00
|
|
|
| Begin of Region.t
|
|
|
|
| Else of Region.t
|
|
|
|
| End of Region.t
|
|
|
|
| False of Region.t
|
|
|
|
| Fun of Region.t
|
|
|
|
| Rec of Region.t
|
|
|
|
| If of Region.t
|
|
|
|
| In of Region.t
|
|
|
|
| Let of Region.t
|
|
|
|
| Match of Region.t
|
|
|
|
| Mod of Region.t
|
|
|
|
| Not of Region.t
|
|
|
|
| Of of Region.t
|
|
|
|
| Or of Region.t
|
|
|
|
| Then of Region.t
|
|
|
|
| True of Region.t
|
|
|
|
| Type of Region.t
|
|
|
|
| With of Region.t
|
2019-09-27 13:33:25 +00:00
|
|
|
|
Refactoring of Ligodity (CameLIGO) and making an AST pretty-printer
- AST.ml/AST.mli:
- The AST now distinguishes the constructors `None` and `Some` as being
predefined, as in PascaLIGO. See type `AST.constr_pattern`.
- I removed the nodes specific to Liquidity,
e.g. `let%entry`, and, in particular, the natural literals
ending with `p`. Now it should be `n`, as in `10n`.
- I renamed the node `TAlias` to `TVar`.
- I have applied the rule of expanding type expressions after `of` when
those were not records.
- The type of the argument to a data constructor is now
`type_expr`, instead of `cartesian`.
- I added the patterns for bytes (`PBytes`) and natural literals (`PNat`).
- I renamed the node `Sugar` into
`PListComp` (meaning "pattern of list comprehension").
- Record types in CameLIGO now must have at least one field declaration.
- Replaced the type `closing` and `opening` with one type `compound`,
which captures only the right combinations of opening and closing.
- Components of tuples in a selection must not be written
between parentheses. For example, `a.b.(1).(0)` is now
`a.b.1.0`, as in PascaLIGO.
- LexToken.mli/LexToken.mll
- I renamed the string literal `Str` into `String`.
- I added the tokens `C_None` and `C_Some` (to distinguish the
constructors `None` and `Some`. See AST.ml)
- Fixed the function `mk_sym` so it does not fail with `failwith`, but
with `Error Invalid_symbol`.
- Lexer.mll (shared)
- I removed the character `%` from the identifiers (used to
support Liquidity, like `entry%point` and `match%nat`).
- I adde to the hint on broken strings: "or insert a backslash"
(from a Gitlab issue).
- ParToken.mly
- I added the tokens `C_None` and `C_Some` (to distinguish the
constructors `None` and `Some`. See AST.ml and LexToken.mll)
- Parser.mly
- Fixed the order of declarations in the AST (it was reversed).
- I removed syntax support for Liquidity.
- I added user-defined constructor applications to irrefutable
patterns (the ones afer a `let`), even though only the type
checker can decide that they are truly irrefutable because they
are the only constructors of their types.
- I added natural numbers and bytes to patterns.
- Access of tuple components do not require parentheses now, like
`a.b.1.0`.
- I refactored the semantic actions.
- I added the empty sequence `begin end`.
- ParserLog.ml/ParserLog.mli
- I added a pretty-printer for the AST (with source locations).
- ParserMain.ml
- The CLI for the pretty-printer is now `--verbose=ast`.
- The old CLI `--verbose=ast` is now `--verbose=ast-tokens`.
- ligodity.ml (simplifier)
- I removed the constructions of sets, lists and maps with
`Set [...]`, `List [...]` and `Map [...]`, as there are already
better ways (that is, more like the OCaml's way), like
`Set.literal [...]` and `Map.literal [...]`. (The case for lists
was entirely redundant with the rest of the language as it is.)
- Everywhere there is now a non-empty list of elements, I made a
change. In particular, I removed a corner case ("let without
binding"), thanks to more precise OCaml types for non-empty
lists.
- I ported all the changes to the AST above.
- region.ml (vendors)
- I changed the method `compact` so the end-line is not repeated
if it is the same as the start line: this is even more compact. I
use this in the new pretty-printer for the AST (see above)
- I updated all the CameLIGO contracts.
2019-11-04 23:51:47 +01:00
|
|
|
(* Data constructors *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
Refactoring of Ligodity (CameLIGO) and making an AST pretty-printer
- AST.ml/AST.mli:
- The AST now distinguishes the constructors `None` and `Some` as being
predefined, as in PascaLIGO. See type `AST.constr_pattern`.
- I removed the nodes specific to Liquidity,
e.g. `let%entry`, and, in particular, the natural literals
ending with `p`. Now it should be `n`, as in `10n`.
- I renamed the node `TAlias` to `TVar`.
- I have applied the rule of expanding type expressions after `of` when
those were not records.
- The type of the argument to a data constructor is now
`type_expr`, instead of `cartesian`.
- I added the patterns for bytes (`PBytes`) and natural literals (`PNat`).
- I renamed the node `Sugar` into
`PListComp` (meaning "pattern of list comprehension").
- Record types in CameLIGO now must have at least one field declaration.
- Replaced the type `closing` and `opening` with one type `compound`,
which captures only the right combinations of opening and closing.
- Components of tuples in a selection must not be written
between parentheses. For example, `a.b.(1).(0)` is now
`a.b.1.0`, as in PascaLIGO.
- LexToken.mli/LexToken.mll
- I renamed the string literal `Str` into `String`.
- I added the tokens `C_None` and `C_Some` (to distinguish the
constructors `None` and `Some`. See AST.ml)
- Fixed the function `mk_sym` so it does not fail with `failwith`, but
with `Error Invalid_symbol`.
- Lexer.mll (shared)
- I removed the character `%` from the identifiers (used to
support Liquidity, like `entry%point` and `match%nat`).
- I adde to the hint on broken strings: "or insert a backslash"
(from a Gitlab issue).
- ParToken.mly
- I added the tokens `C_None` and `C_Some` (to distinguish the
constructors `None` and `Some`. See AST.ml and LexToken.mll)
- Parser.mly
- Fixed the order of declarations in the AST (it was reversed).
- I removed syntax support for Liquidity.
- I added user-defined constructor applications to irrefutable
patterns (the ones afer a `let`), even though only the type
checker can decide that they are truly irrefutable because they
are the only constructors of their types.
- I added natural numbers and bytes to patterns.
- Access of tuple components do not require parentheses now, like
`a.b.1.0`.
- I refactored the semantic actions.
- I added the empty sequence `begin end`.
- ParserLog.ml/ParserLog.mli
- I added a pretty-printer for the AST (with source locations).
- ParserMain.ml
- The CLI for the pretty-printer is now `--verbose=ast`.
- The old CLI `--verbose=ast` is now `--verbose=ast-tokens`.
- ligodity.ml (simplifier)
- I removed the constructions of sets, lists and maps with
`Set [...]`, `List [...]` and `Map [...]`, as there are already
better ways (that is, more like the OCaml's way), like
`Set.literal [...]` and `Map.literal [...]`. (The case for lists
was entirely redundant with the rest of the language as it is.)
- Everywhere there is now a non-empty list of elements, I made a
change. In particular, I removed a corner case ("let without
binding"), thanks to more precise OCaml types for non-empty
lists.
- I ported all the changes to the AST above.
- region.ml (vendors)
- I changed the method `compact` so the end-line is not repeated
if it is the same as the start line: this is even more compact. I
use this in the new pretty-printer for the AST (see above)
- I updated all the CameLIGO contracts.
2019-11-04 23:51:47 +01:00
|
|
|
| C_None of Region.t (* "None" *)
|
|
|
|
| C_Some of Region.t (* "Some" *)
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Virtual tokens *)
|
|
|
|
|
|
|
|
| EOF of Region.t (* End of file *)
|
|
|
|
|
|
|
|
type token = t
|
|
|
|
|
|
|
|
(* Projections
|
|
|
|
|
|
|
|
The difference between extracting the lexeme and a string from a
|
|
|
|
token is that the latter is the textual representation of the OCaml
|
|
|
|
value denoting the token (its abstract syntax), rather than its
|
|
|
|
lexeme (concrete syntax).
|
|
|
|
*)
|
|
|
|
|
|
|
|
val to_lexeme : token -> lexeme
|
|
|
|
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
|
|
|
val to_region : token -> Region.t
|
|
|
|
|
|
|
|
(* Injections *)
|
|
|
|
|
2019-10-12 23:42:26 +02:00
|
|
|
type int_err = Non_canonical_zero
|
2019-09-27 13:33:25 +00:00
|
|
|
type ident_err = Reserved_name
|
2019-10-12 23:42:26 +02:00
|
|
|
type nat_err = Invalid_natural
|
|
|
|
| Non_canonical_zero_nat
|
|
|
|
type sym_err = Invalid_symbol
|
2020-01-16 19:36:04 +00:00
|
|
|
type attr_err = Invalid_attribute
|
2020-01-08 16:39:52 +01:00
|
|
|
type kwd_err = Invalid_keyword
|
2019-09-27 13:33:25 +00:00
|
|
|
|
2020-04-27 11:31:16 +02:00
|
|
|
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
|
|
|
val mk_nat : lexeme -> Region.t -> (token, nat_err) result
|
|
|
|
val mk_mutez : lexeme -> Region.t -> (token, int_err) result
|
|
|
|
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
|
|
|
val mk_sym : lexeme -> Region.t -> (token, sym_err) result
|
|
|
|
val mk_kwd : lexeme -> Region.t -> (token, kwd_err) result
|
|
|
|
val mk_string : lexeme -> Region.t -> token
|
|
|
|
val mk_verbatim : lexeme -> Region.t -> token
|
|
|
|
val mk_bytes : lexeme -> Region.t -> token
|
|
|
|
val mk_constr : lexeme -> Region.t -> token
|
|
|
|
val mk_attr : string -> lexeme -> Region.t -> (token, attr_err) result
|
|
|
|
val eof : Region.t -> token
|
2019-09-27 13:33:25 +00:00
|
|
|
|
|
|
|
(* Predicates *)
|
|
|
|
|
|
|
|
val is_eof : token -> bool
|
2020-04-24 21:06:18 +02:00
|
|
|
|
|
|
|
(* Style *)
|
|
|
|
|
|
|
|
type error
|
|
|
|
|
|
|
|
val error_to_string : error -> string
|
|
|
|
|
|
|
|
exception Error of error Region.reg
|
|
|
|
|
|
|
|
val format_error :
|
|
|
|
?offsets:bool -> [`Byte | `Point] ->
|
|
|
|
error Region.reg -> file:bool -> string Region.reg
|
|
|
|
|
|
|
|
val check_right_context :
|
|
|
|
token ->
|
|
|
|
(Lexing.lexbuf -> (Markup.t list * token) option) ->
|
|
|
|
Lexing.lexbuf ->
|
|
|
|
unit
|