816 lines
34 KiB
OCaml
816 lines
34 KiB
OCaml
(**************************************************************************)
|
|
(* *)
|
|
(* Copyright (c) 2014 - 2018. *)
|
|
(* Dynamic Ledger Solutions, Inc. <contact@tezos.com> *)
|
|
(* *)
|
|
(* All rights reserved. No warranty, explicit or implicit, provided. *)
|
|
(* *)
|
|
(**************************************************************************)
|
|
|
|
open Error_monad
|
|
open Micheline
|
|
|
|
type 'a parsing_result = 'a * error list
|
|
|
|
type point =
|
|
{ point : int ;
|
|
byte : int ;
|
|
line : int ;
|
|
column : int }
|
|
|
|
let point_zero =
|
|
{ point = 0 ;
|
|
byte = 0 ;
|
|
line = 0 ;
|
|
column = 0 }
|
|
|
|
let point_encoding =
|
|
let open Data_encoding in
|
|
conv
|
|
(fun { line ; column ; point ; byte } -> (line, column, point, byte))
|
|
(fun (line, column, point, byte) -> { line ; column ; point ; byte })
|
|
(obj4
|
|
(req "line" uint16)
|
|
(req "column" uint16)
|
|
(req "point" uint16)
|
|
(req "byte" uint16))
|
|
|
|
type location =
|
|
{ start : point ;
|
|
stop : point }
|
|
|
|
let location_zero =
|
|
{ start = point_zero ;
|
|
stop = point_zero }
|
|
|
|
let location_encoding =
|
|
let open Data_encoding in
|
|
conv
|
|
(fun { start ; stop } -> (start, stop))
|
|
(fun (start, stop) -> { start ; stop })
|
|
(obj2
|
|
(req "start" point_encoding)
|
|
(req "stop" point_encoding))
|
|
|
|
type token_value =
|
|
| String of string
|
|
| Int of string
|
|
| Ident of string
|
|
| Annot of string
|
|
| Comment of string
|
|
| Eol_comment of string
|
|
| Semi
|
|
| Open_paren | Close_paren
|
|
| Open_brace | Close_brace
|
|
|
|
let token_value_encoding =
|
|
let open Data_encoding in
|
|
union
|
|
[ case (Tag 0)
|
|
~title:"String"
|
|
(obj1 (req "string" string))
|
|
(function String s -> Some s | _ -> None)
|
|
(fun s -> String s) ;
|
|
case (Tag 1)
|
|
~title:"Int"
|
|
(obj1 (req "int" string))
|
|
(function Int s -> Some s | _ -> None)
|
|
(fun s -> Int s) ;
|
|
case (Tag 2)
|
|
~title:"Annot"
|
|
(obj1 (req "annot" string))
|
|
(function Annot s -> Some s | _ -> None)
|
|
(fun s -> Annot s) ;
|
|
case (Tag 3)
|
|
~title:"Comment"
|
|
(obj2 (req "comment" string) (dft "end_of_line" bool false))
|
|
(function
|
|
| Comment s -> Some (s, false)
|
|
| Eol_comment s -> Some (s, true) | _ -> None)
|
|
(function
|
|
| (s, false) -> Comment s
|
|
| (s, true) -> Eol_comment s) ;
|
|
case (Tag 4)
|
|
~title:"Punctuation"
|
|
(obj1 (req "punctuation" (string_enum [
|
|
"(", Open_paren ;
|
|
")", Close_paren ;
|
|
"{", Open_brace ;
|
|
"}", Close_brace ;
|
|
";", Semi ])))
|
|
(fun t -> Some t) (fun t -> t) ]
|
|
|
|
type token =
|
|
{ token : token_value ;
|
|
loc : location }
|
|
|
|
let max_annot_length = 255
|
|
|
|
type error += Invalid_utf8_sequence of point * string
|
|
type error += Unexpected_character of point * string
|
|
type error += Undefined_escape_sequence of point * string
|
|
type error += Missing_break_after_number of point
|
|
type error += Unterminated_string of location
|
|
type error += Unterminated_integer of location
|
|
type error += Unterminated_comment of location
|
|
type error += Annotation_length of location
|
|
|
|
let tokenize source =
|
|
let decoder = Uutf.decoder ~encoding:`UTF_8 (`String source) in
|
|
let here () =
|
|
{ point = Uutf.decoder_count decoder ;
|
|
byte = Uutf.decoder_byte_count decoder ;
|
|
line = Uutf.decoder_line decoder ;
|
|
column = Uutf.decoder_col decoder } in
|
|
let tok start stop token =
|
|
{ loc = { start ; stop } ; token } in
|
|
let stack = ref [] in
|
|
let errors = ref [] in
|
|
let rec next () =
|
|
match !stack with
|
|
| charloc :: charlocs ->
|
|
stack := charlocs ;
|
|
charloc
|
|
| [] ->
|
|
let loc = here () in
|
|
match Uutf.decode decoder with
|
|
| `Await -> assert false
|
|
| `Malformed s ->
|
|
errors := Invalid_utf8_sequence (loc, s) :: !errors ;
|
|
next ()
|
|
| `Uchar _ | `End as other -> other, loc in
|
|
let back charloc =
|
|
stack := charloc :: !stack in
|
|
let uchar_to_char c =
|
|
if Uchar.is_char c then
|
|
Some (Uchar.to_char c)
|
|
else
|
|
None in
|
|
let rec skip acc =
|
|
match next () with
|
|
| `End, _ -> List.rev acc
|
|
| `Uchar c, start ->
|
|
begin match uchar_to_char c with
|
|
| Some ('a'..'z' | 'A'..'Z') -> ident acc start (fun s _ -> Ident s)
|
|
| Some ('@' | ':' | '$' | '&' | '%' | '!' | '?') ->
|
|
ident acc start
|
|
(fun str stop ->
|
|
if String.length str > max_annot_length
|
|
then errors := (Annotation_length { start ; stop }) :: !errors ;
|
|
Annot str)
|
|
| Some '-' ->
|
|
begin match next () with
|
|
| `End, stop ->
|
|
errors := Unterminated_integer { start ; stop } :: !errors ;
|
|
List.rev acc
|
|
| `Uchar c, stop as first ->
|
|
begin match uchar_to_char c with
|
|
| Some '0' -> base acc start
|
|
| Some ('1'..'9') -> integer `dec acc start false
|
|
| Some _ | None ->
|
|
errors := Unterminated_integer { start ; stop } :: !errors ;
|
|
back first ;
|
|
skip acc
|
|
end
|
|
end
|
|
| Some '0' -> base acc start
|
|
| Some ('1'..'9') -> integer `dec acc start false
|
|
| Some (' ' | '\n') -> skip acc
|
|
| Some ';' -> skip (tok start (here ()) Semi :: acc)
|
|
| Some '{' -> skip (tok start (here ()) Open_brace :: acc)
|
|
| Some '}' -> skip (tok start (here ()) Close_brace :: acc)
|
|
| Some '(' -> skip (tok start (here ()) Open_paren :: acc)
|
|
| Some ')' -> skip (tok start (here ()) Close_paren :: acc)
|
|
| Some '"' -> string acc [] start
|
|
| Some '#' -> eol_comment acc start
|
|
| Some '/' ->
|
|
begin match next () with
|
|
| `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') ->
|
|
comment acc start 0
|
|
| (`Uchar _ | `End), _ as charloc ->
|
|
errors := Unexpected_character (start, "/") :: !errors ;
|
|
back charloc ;
|
|
skip acc
|
|
end
|
|
| Some _ | None ->
|
|
let byte = Uutf.decoder_byte_count decoder in
|
|
let s = String.sub source start.byte (byte - start.byte) in
|
|
errors := Unexpected_character (start, s) :: !errors ;
|
|
skip acc
|
|
end
|
|
and base acc start =
|
|
match next () with
|
|
| (`Uchar c, stop) as charloc ->
|
|
begin match uchar_to_char c with
|
|
| Some ('0'.. '9') -> integer `dec acc start false
|
|
| Some 'x' -> integer `hex acc start true
|
|
| Some 'b' -> integer `bin acc start true
|
|
| Some ('a' | 'c'..'w' | 'y' | 'z' | 'A'..'Z') ->
|
|
errors := Missing_break_after_number stop :: !errors ;
|
|
back charloc ;
|
|
skip (tok start stop (Int "0") :: acc)
|
|
| Some _ | None ->
|
|
back charloc ;
|
|
skip (tok start stop (Int "0") :: acc)
|
|
end
|
|
| (_, stop) as other ->
|
|
back other ;
|
|
skip (tok start stop (Int "0") :: acc)
|
|
and integer base acc start first =
|
|
let tok stop =
|
|
let value =
|
|
String.sub source start.byte (stop.byte - start.byte) in
|
|
tok start stop (Int value) in
|
|
match next () with
|
|
| (`Uchar c, stop) as charloc ->
|
|
let missing_break () =
|
|
errors := Missing_break_after_number stop :: !errors ;
|
|
back charloc ;
|
|
skip (tok stop :: acc) in
|
|
begin match base, Uchar.to_char c with
|
|
| `dec, ('0'.. '9') ->
|
|
integer `dec acc start false
|
|
| `dec, ('a'..'z' | 'A'..'Z') ->
|
|
missing_break ()
|
|
| `hex, ('0'..'9' | 'a'..'f' | 'A'..'F') ->
|
|
integer `hex acc start false
|
|
| `hex, ('g'..'z' | 'G'..'Z') ->
|
|
missing_break ()
|
|
| `bin, ('0' | '1') ->
|
|
integer `bin acc start false
|
|
| `bin, ('2'..'9' | 'a'..'z' | 'A'..'Z') ->
|
|
missing_break ()
|
|
| (`bin | `hex), _ when first ->
|
|
errors := Unterminated_integer { start ; stop } :: !errors ;
|
|
back charloc ;
|
|
skip (tok stop :: acc)
|
|
| _ ->
|
|
back charloc ;
|
|
skip (tok stop :: acc)
|
|
end
|
|
| (`End, stop) as other ->
|
|
if first && (base = `bin || base = `hex) then begin
|
|
errors := Unterminated_integer { start ; stop } :: !errors
|
|
end ;
|
|
back other ;
|
|
skip (tok stop :: acc)
|
|
and string acc sacc start =
|
|
let tok () =
|
|
tok start (here ()) (String (String.concat "" (List.rev sacc))) in
|
|
match next () with
|
|
| `End, stop ->
|
|
errors := Unterminated_string { start ; stop } :: !errors ;
|
|
skip (tok () :: acc)
|
|
| `Uchar c, stop ->
|
|
match uchar_to_char c with
|
|
| Some '"' -> skip (tok () :: acc)
|
|
| Some ('\n' | '\r') ->
|
|
errors := Unterminated_string { start ; stop } :: !errors ;
|
|
skip (tok () :: acc)
|
|
| Some '\\' ->
|
|
begin match next () with
|
|
| `End, stop ->
|
|
errors := Unterminated_string { start ; stop } :: !errors ;
|
|
skip (tok () :: acc)
|
|
| `Uchar c, loc ->
|
|
match uchar_to_char c with
|
|
| Some '"' -> string acc ("\"" :: sacc) start
|
|
| Some 'r' -> string acc ("\r" :: sacc) start
|
|
| Some 'n' -> string acc ("\n" :: sacc) start
|
|
| Some 't' -> string acc ("\t" :: sacc) start
|
|
| Some 'b' -> string acc ("\b" :: sacc) start
|
|
| Some '\\' -> string acc ("\\" :: sacc) start
|
|
| Some _ | None ->
|
|
let byte = Uutf.decoder_byte_count decoder in
|
|
let s = String.sub source loc.byte (byte - loc.byte) in
|
|
errors := Undefined_escape_sequence (loc, s) :: !errors ;
|
|
string acc sacc start
|
|
end
|
|
| Some _ | None ->
|
|
let byte = Uutf.decoder_byte_count decoder in
|
|
let s = String.sub source stop.byte (byte - stop.byte) in
|
|
string acc (s :: sacc) start
|
|
and ident acc start (ret : string -> point -> token_value) =
|
|
let tok stop =
|
|
let name =
|
|
String.sub source start.byte (stop.byte - start.byte) in
|
|
tok start stop (ret name stop) in
|
|
match next () with
|
|
| (`Uchar c, stop) as charloc ->
|
|
begin match uchar_to_char c with
|
|
| Some ('a'..'z' | 'A'..'Z' | '_' | '0'..'9') ->
|
|
ident acc start ret
|
|
| Some _ | None ->
|
|
back charloc ;
|
|
skip (tok stop :: acc)
|
|
end
|
|
| (_, stop) as other ->
|
|
back other ;
|
|
skip (tok stop :: acc)
|
|
and comment acc start lvl =
|
|
match next () with
|
|
| `End, stop ->
|
|
errors := Unterminated_comment { start ; stop } :: !errors ;
|
|
let text = String.sub source start.byte (stop.byte - start.byte) in
|
|
skip (tok start stop (Comment text) :: acc)
|
|
| `Uchar c, _ ->
|
|
begin match uchar_to_char c with
|
|
| Some '*' ->
|
|
begin match next () with
|
|
| `Uchar c, _ when Uchar.equal c (Uchar.of_char '/') ->
|
|
if lvl = 0 then
|
|
let stop = here () in
|
|
let text =
|
|
String.sub source start.byte (stop.byte - start.byte) in
|
|
skip (tok start stop (Comment text) :: acc)
|
|
else
|
|
comment acc start (lvl - 1)
|
|
| other ->
|
|
back other ;
|
|
comment acc start lvl
|
|
end
|
|
| Some '/' ->
|
|
begin match next () with
|
|
| `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') ->
|
|
comment acc start (lvl + 1)
|
|
| other ->
|
|
back other ;
|
|
comment acc start lvl
|
|
end
|
|
| Some _ | None -> comment acc start lvl
|
|
end
|
|
and eol_comment acc start =
|
|
let tok stop =
|
|
let text = String.sub source start.byte (stop.byte - start.byte) in
|
|
tok start stop (Eol_comment text) in
|
|
match next () with
|
|
| `Uchar c, stop ->
|
|
begin match uchar_to_char c with
|
|
| Some '\n' -> skip (tok stop :: acc)
|
|
| Some _ | None -> eol_comment acc start
|
|
end
|
|
| (_, stop) as other ->
|
|
back other ;
|
|
skip (tok stop :: acc) in
|
|
let tokens = skip [] in
|
|
tokens, List.rev !errors
|
|
|
|
type node = (location, string) Micheline.node
|
|
|
|
let node_encoding =
|
|
Micheline.table_encoding ~variant:"generic" location_encoding Data_encoding.string
|
|
|
|
(* Beginning of a sequence of consecutive primitives *)
|
|
let min_point : node list -> point = function
|
|
| [] -> point_zero
|
|
| Int ({ start }, _) :: _
|
|
| String ({ start }, _) :: _
|
|
| Prim ({ start }, _, _, _) :: _
|
|
| Seq ({ start }, _) :: _ -> start
|
|
|
|
(* End of a sequence of consecutive primitives *)
|
|
let rec max_point : node list -> point = function
|
|
| [] -> point_zero
|
|
| _ :: (_ :: _ as rest) -> max_point rest
|
|
| Int ({ stop }, _) :: []
|
|
| String ({ stop }, _) :: []
|
|
| Prim ({ stop }, _, _, _) :: []
|
|
| Seq ({ stop }, _) :: [] -> stop
|
|
|
|
(* An item in the parser's state stack.
|
|
Not every value of type [mode list] is a valid parsing context.
|
|
It must respect the following additional invariants.
|
|
- a state stack always ends in [Toplevel _],
|
|
- [Toplevel _] does not appear anywhere else,
|
|
- [Unwrapped _] cannot appear directly on top of [Wrapped _],
|
|
- [Wrapped _] cannot appear directly on top of [Sequence _],
|
|
- [Wrapped _] cannot appear directly on top of [Sequence _]. *)
|
|
type mode =
|
|
| Toplevel of node list
|
|
| Expression of node option
|
|
| Sequence of token * node list
|
|
| Unwrapped of location * string * node list * string list
|
|
| Wrapped of token * string * node list * string list
|
|
|
|
(* Enter a new parsing state. *)
|
|
let push_mode mode stack =
|
|
mode :: stack
|
|
|
|
(* Leave a parsing state. *)
|
|
let pop_mode = function
|
|
| [] -> assert false
|
|
| _ :: rest -> rest
|
|
|
|
(* Usually after a [pop_mode], jump back into the previous parsing
|
|
state, injecting the current reduction (insert the just parsed item
|
|
of a sequence or argument of a primitive application). *)
|
|
let fill_mode result = function
|
|
| [] -> assert false
|
|
| Expression _ :: _ :: _ -> assert false
|
|
| Expression (Some _) :: [] -> assert false
|
|
| Toplevel _ :: _ :: _ -> assert false
|
|
| Expression None :: [] ->
|
|
Expression (Some result) :: []
|
|
| Toplevel exprs :: [] ->
|
|
Toplevel (result :: exprs) :: []
|
|
| Sequence (token, exprs) :: rest ->
|
|
Sequence (token, result :: exprs) :: rest
|
|
| Wrapped (token, name, exprs, annot) :: rest ->
|
|
Wrapped (token, name, result :: exprs, annot) :: rest
|
|
| Unwrapped (start, name, exprs, annot) :: rest ->
|
|
Unwrapped (start, name, result :: exprs, annot) :: rest
|
|
|
|
type error += Unclosed of token
|
|
type error += Unexpected of token
|
|
type error += Extra of token
|
|
type error += Misaligned of node
|
|
type error += Empty
|
|
|
|
let rec annots = function
|
|
| { token = Annot annot } :: rest ->
|
|
let annots, rest = annots rest in
|
|
annot :: annots, rest
|
|
| rest -> [], rest
|
|
|
|
let rec parse ?(check = true) errors tokens stack =
|
|
(* Two steps:
|
|
- 1. parse without checking indentation [parse]
|
|
- 2. check indentation [check] (inlined in 1) *)
|
|
match stack, tokens with
|
|
(* Start by preventing all absurd cases, so now the pattern
|
|
matching exhaustivity can tell us that we treater all
|
|
possible tokens for all possible valid states. *)
|
|
| [], _
|
|
| [ Wrapped _ ], _
|
|
| [ Unwrapped _ ], _
|
|
| Unwrapped _ :: Unwrapped _ :: _, _
|
|
| Unwrapped _ :: Wrapped _ :: _, _
|
|
| Toplevel _ :: _ :: _, _
|
|
| Expression _ :: _ :: _, _ ->
|
|
assert false
|
|
(* Return *)
|
|
| Expression (Some result) :: _, [] ->
|
|
[ result ], List.rev errors
|
|
| Expression (Some _) :: _, token :: rem ->
|
|
let errors = Unexpected token :: errors in
|
|
parse ~check errors rem (* skip *) stack
|
|
| Expression None :: _, [] ->
|
|
let errors = Empty :: errors in
|
|
let ghost = { start = point_zero ; stop = point_zero} in
|
|
[ Seq (ghost, []) ], List.rev errors
|
|
| Toplevel [ Seq (_, exprs) as expr ] :: [],
|
|
[] ->
|
|
let errors = if check then do_check ~toplevel: false errors expr else errors in
|
|
exprs, List.rev errors
|
|
| Toplevel exprs :: [],
|
|
[] ->
|
|
let exprs = List.rev exprs in
|
|
let loc = { start = min_point exprs ; stop = max_point exprs } in
|
|
let expr = Seq (loc, exprs) in
|
|
let errors = if check then do_check ~toplevel: true errors expr else errors in
|
|
exprs, List.rev errors
|
|
(* Ignore comments *)
|
|
| _,
|
|
{ token = Eol_comment _ | Comment _ } :: rest ->
|
|
parse ~check errors rest stack
|
|
| (Expression None | Sequence _ | Toplevel _) :: _,
|
|
({ token = Int _ | String _ } as token):: { token = Eol_comment _ | Comment _ } :: rest
|
|
| (Wrapped _ | Unwrapped _) :: _,
|
|
({ token = Open_paren } as token)
|
|
:: { token = Eol_comment _ | Comment _ } :: rest ->
|
|
parse ~check errors (token :: rest) stack
|
|
(* Erroneous states *)
|
|
| (Wrapped _ | Unwrapped _) :: _ ,
|
|
({ token = Open_paren } as token)
|
|
:: { token = Open_paren | Open_brace } :: rem
|
|
| Unwrapped _ :: Expression _ :: _ ,
|
|
({ token = Semi | Close_brace | Close_paren } as token) :: rem
|
|
| Expression None :: _ ,
|
|
({ token = Semi | Close_brace | Close_paren | Open_paren } as token) :: rem ->
|
|
let errors = Unexpected token :: errors in
|
|
parse ~check errors rem (* skip *) stack
|
|
| (Sequence _ | Toplevel _) :: _ ,
|
|
({ token = Semi } as valid) :: ({ token = Semi } as token) :: rem ->
|
|
let errors = Extra token :: errors in
|
|
parse ~check errors (valid (* skip *) :: rem) stack
|
|
| (Wrapped _ | Unwrapped _) :: _ ,
|
|
{ token = Open_paren }
|
|
:: ({ token = Int _ | String _ | Annot _ | Close_paren } as token) :: rem
|
|
| (Expression None | Sequence _ | Toplevel _) :: _,
|
|
{ token = Int _ | String _ } :: ({ token = Ident _ | Int _ | String _ | Annot _ | Close_paren | Open_paren | Open_brace } as token) :: rem
|
|
| Unwrapped (_, _, _, _) :: Toplevel _ :: _,
|
|
({ token = Close_brace } as token) :: rem
|
|
| Unwrapped (_, _, _, _) :: _,
|
|
({ token = Close_paren } as token) :: rem
|
|
| Toplevel _ :: [],
|
|
({ token = Close_paren } as token) :: rem
|
|
| Toplevel _ :: [],
|
|
({ token = Open_paren } as token) :: rem
|
|
| Toplevel _ :: [],
|
|
({ token = Close_brace } as token) :: rem
|
|
| Sequence _ :: _,
|
|
({ token = Open_paren } as token) :: rem
|
|
| Sequence _ :: _,
|
|
({ token = Close_paren } as token :: rem)
|
|
| (Wrapped _ | Unwrapped _) :: _,
|
|
({ token = Open_paren } as token) :: ({ token = Close_brace | Semi } :: _ | [] as rem)
|
|
| _,
|
|
({ token = Annot _ } as token) :: rem ->
|
|
let errors = Unexpected token :: errors in
|
|
parse ~check errors rem (* skip *) stack
|
|
| Wrapped (token, _, _, _) :: _, ([] | { token = Close_brace | Semi } :: _) ->
|
|
let errors = Unclosed token :: errors in
|
|
let fake = { token with token = Close_paren } in
|
|
let tokens = (* insert *) fake :: tokens in
|
|
parse ~check errors tokens stack
|
|
| (Sequence (token, _) :: _ | Unwrapped _ :: Sequence (token, _) :: _), [] ->
|
|
let errors = Unclosed token :: errors in
|
|
let fake = { token with token = Close_brace } in
|
|
let tokens = (* insert *) fake :: tokens in
|
|
parse ~check errors tokens stack
|
|
(* Valid states *)
|
|
| (Toplevel _ | Sequence (_, _)) :: _ ,
|
|
{ token = Ident name ; loc } :: ({ token = Annot _ } :: _ as rest) ->
|
|
let annots, rest = annots rest in
|
|
let mode = Unwrapped (loc, name, [], annots) in
|
|
parse ~check errors rest (push_mode mode stack)
|
|
| (Expression None | Toplevel _ | Sequence (_, _)) :: _ ,
|
|
{ token = Ident name ; loc } :: rest ->
|
|
let mode = Unwrapped (loc, name, [], []) in
|
|
parse ~check errors rest (push_mode mode stack)
|
|
| (Unwrapped _ | Wrapped _) :: _,
|
|
{ token = Int value ; loc } :: rest
|
|
| (Expression None | Sequence _ | Toplevel _) :: _,
|
|
{ token = Int value ; loc } :: ([] | { token = Semi | Close_brace} :: _ as rest) ->
|
|
let expr : node = Int (loc, Z.of_string value) in
|
|
let errors = if check then do_check ~toplevel: false errors expr else errors in
|
|
parse ~check errors rest (fill_mode expr stack)
|
|
| (Unwrapped _ | Wrapped _) :: _,
|
|
{ token = String contents ; loc } :: rest
|
|
| (Expression None | Sequence _ | Toplevel _) :: _,
|
|
{ token = String contents ; loc } :: ([] | { token = Semi | Close_brace} :: _ as rest) ->
|
|
let expr : node = String (loc, contents) in
|
|
let errors = if check then do_check ~toplevel: false errors expr else errors in
|
|
parse ~check errors rest (fill_mode expr stack)
|
|
| Sequence ({ loc = { start } }, exprs) :: _ ,
|
|
{ token = Close_brace ; loc = { stop } } :: rest ->
|
|
let exprs = List.rev exprs in
|
|
let expr = Micheline.Seq ({ start ; stop }, exprs) in
|
|
let errors = if check then do_check ~toplevel: false errors expr else errors in
|
|
parse ~check errors rest (fill_mode expr (pop_mode stack))
|
|
| (Sequence _ | Toplevel _) :: _ ,
|
|
{ token = Semi } :: rest ->
|
|
parse ~check errors rest stack
|
|
| Unwrapped ({ start ; stop }, name, exprs, annot) :: Expression _ :: _,
|
|
([] as rest)
|
|
| Unwrapped ({ start ; stop }, name, exprs, annot) :: Toplevel _ :: _,
|
|
({ token = Semi } :: _ | [] as rest)
|
|
| Unwrapped ({ start ; stop }, name, exprs, annot) :: Sequence _ :: _ ,
|
|
({ token = Close_brace | Semi } :: _ as rest)
|
|
| Wrapped ({ loc = { start ; stop } }, name, exprs, annot) :: _ ,
|
|
{ token = Close_paren } :: rest ->
|
|
let exprs = List.rev exprs in
|
|
let stop = if exprs = [] then stop else max_point exprs in
|
|
let expr = Micheline.Prim ({ start ; stop }, name, exprs, annot) in
|
|
let errors = if check then do_check ~toplevel: false errors expr else errors in
|
|
parse ~check errors rest (fill_mode expr (pop_mode stack))
|
|
| (Wrapped _ | Unwrapped _) :: _ ,
|
|
({ token = Open_paren } as token) :: { token = Ident name } :: ({ token = Annot _ } :: _ as rest) ->
|
|
let annots, rest = annots rest in
|
|
let mode = Wrapped (token, name, [], annots) in
|
|
parse ~check errors rest (push_mode mode stack)
|
|
| (Wrapped _ | Unwrapped _) :: _ ,
|
|
({ token = Open_paren } as token) :: { token = Ident name } :: rest ->
|
|
let mode = Wrapped (token, name, [], []) in
|
|
parse ~check errors rest (push_mode mode stack)
|
|
| (Wrapped _ | Unwrapped _) :: _ ,
|
|
{ token = Ident name ; loc } :: rest ->
|
|
let expr = Micheline.Prim (loc, name, [], []) in
|
|
let errors = if check then do_check ~toplevel: false errors expr else errors in
|
|
parse ~check errors rest (fill_mode expr stack)
|
|
| (Wrapped _ | Unwrapped _ | Toplevel _ | Sequence _ | Expression None) :: _ ,
|
|
({ token = Open_brace } as token) :: rest ->
|
|
let mode = Sequence (token, []) in
|
|
parse ~check errors rest (push_mode mode stack)
|
|
(* indentation checker *)
|
|
and do_check ?(toplevel = false) errors = function
|
|
| Seq ({ start ; stop }, []) as expr ->
|
|
if start.column >= stop.column then
|
|
Misaligned expr :: errors
|
|
else errors
|
|
| Prim ({ start ; stop }, _, first :: rest, _)
|
|
| Seq ({ start ; stop }, first :: rest) as expr ->
|
|
let { column = first_column ; line = first_line } =
|
|
min_point [ first ] in
|
|
if start.column >= stop.column then
|
|
Misaligned expr :: errors
|
|
else if not toplevel && start.column >= first_column then
|
|
Misaligned expr :: errors
|
|
else
|
|
(* In a sequence or in the arguments of a primitive, we
|
|
require all items to be aligned, but we relax the rule to
|
|
allow consecutive items to be writtem on the same line. *)
|
|
let rec in_line_or_aligned prev_start_line errors = function
|
|
| [] -> errors
|
|
| expr :: rest ->
|
|
let { column ; line = start_line } = min_point [ expr ] in
|
|
let { line = stop_line } = max_point [ expr ] in
|
|
let errors =
|
|
if stop_line <> prev_start_line
|
|
&& column <> first_column then
|
|
Misaligned expr :: errors
|
|
else
|
|
errors in
|
|
in_line_or_aligned start_line errors rest in
|
|
in_line_or_aligned first_line errors rest
|
|
| Prim (_, _, [], _) | String _ | Int _ -> errors
|
|
|
|
let parse_expression ?check tokens =
|
|
let result = match tokens with
|
|
| ({ token = Open_paren } as token) :: { token = Ident name } :: { token = Annot _ } :: rest ->
|
|
let annots, rest = annots rest in
|
|
let mode = Wrapped (token, name, [], annots) in
|
|
parse ?check [] rest [ mode ; Expression None ]
|
|
| ({ token = Open_paren } as token) :: { token = Ident name } :: rest ->
|
|
let mode = Wrapped (token, name, [], []) in
|
|
parse ?check [] rest [ mode ; Expression None ]
|
|
| _ ->
|
|
parse ?check [] tokens [ Expression None ] in
|
|
match result with
|
|
| [ single ], errors -> single, errors
|
|
| _ -> assert false
|
|
|
|
let parse_toplevel ?check tokens =
|
|
parse ?check [] tokens [ Toplevel [] ]
|
|
|
|
let print_point ppf { line ; column } =
|
|
Format.fprintf ppf
|
|
"At line %d character %d"
|
|
line column
|
|
|
|
let print_token_kind ppf = function
|
|
| Open_paren | Close_paren -> Format.fprintf ppf "parenthesis"
|
|
| Open_brace | Close_brace -> Format.fprintf ppf "curly brace"
|
|
| String _ -> Format.fprintf ppf "string constant"
|
|
| Int _ -> Format.fprintf ppf "integer constant"
|
|
| Ident _ -> Format.fprintf ppf "identifier"
|
|
| Annot _ -> Format.fprintf ppf "annotation"
|
|
| Comment _ | Eol_comment _ -> Format.fprintf ppf "comment"
|
|
| Semi -> Format.fprintf ppf "semi colon"
|
|
|
|
let print_location ppf loc =
|
|
if loc.start.line = loc.stop.line then
|
|
if loc.start.column = loc.stop.column then
|
|
Format.fprintf ppf
|
|
"At line %d character %d"
|
|
loc.start.line loc.start.column
|
|
else
|
|
Format.fprintf ppf
|
|
"At line %d characters %d to %d"
|
|
loc.start.line loc.start.column loc.stop.column
|
|
else
|
|
Format.fprintf ppf
|
|
"From line %d character %d to line %d character %d"
|
|
loc.start.line loc.start.column loc.stop.line loc.stop.column
|
|
|
|
let no_parsing_error (ast, errors) =
|
|
match errors with
|
|
| [] -> ok ast
|
|
| errors -> Error errors
|
|
|
|
let () =
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.invalid_utf8_sequence"
|
|
~title: "Micheline parser error: invalid UTF-8 sequence"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
a sequence of bytes that is not valid UTF-8 \
|
|
was encountered."
|
|
~pp:(fun ppf (point, str) -> Format.fprintf ppf "%a, invalid UTF-8 sequence %S" print_point point str)
|
|
Data_encoding.(obj2 (req "point" point_encoding) (req "sequence" string))
|
|
(function Invalid_utf8_sequence (point, str) -> Some (point, str) | _ -> None)
|
|
(fun (point, str) -> Invalid_utf8_sequence (point, str)) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.unexpected_character"
|
|
~title: "Micheline parser error: unexpected character"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
an unexpected character was encountered."
|
|
~pp:(fun ppf (point, str) -> Format.fprintf ppf "%a, unexpected character %s" print_point point str)
|
|
Data_encoding.(obj2 (req "point" point_encoding) (req "character" string))
|
|
(function Unexpected_character (point, str) -> Some (point, str) | _ -> None)
|
|
(fun (point, str) -> Unexpected_character (point, str)) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.undefined_escape_sequence"
|
|
~title: "Micheline parser error: undefined escape sequence"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
an unexpected escape sequence was encountered in a string."
|
|
~pp:(fun ppf (point, str) -> Format.fprintf ppf "%a, undefined escape sequence \"%s\"" print_point point str)
|
|
Data_encoding.(obj2 (req "point" point_encoding) (req "sequence" string))
|
|
(function Undefined_escape_sequence (point, str) -> Some (point, str) | _ -> None)
|
|
(fun (point, str) -> Undefined_escape_sequence (point, str)) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.missing_break_after_number"
|
|
~title: "Micheline parser error: missing break after number"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
a number was not visually separated from \
|
|
its follower token, leading to misreadability."
|
|
~pp:(fun ppf point -> Format.fprintf ppf "%a, missing break after number" print_point point)
|
|
Data_encoding.(obj1 (req "point" point_encoding))
|
|
(function Missing_break_after_number point -> Some point | _ -> None)
|
|
(fun point -> Missing_break_after_number point) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.unterminated_string"
|
|
~title: "Micheline parser error: unterminated string"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
a string was not terminated."
|
|
~pp:(fun ppf loc -> Format.fprintf ppf "%a, unterminated string" print_location loc)
|
|
Data_encoding.(obj1 (req "location" location_encoding))
|
|
(function Unterminated_string loc -> Some loc | _ -> None)
|
|
(fun loc -> Unterminated_string loc) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.unterminated_integer"
|
|
~title: "Micheline parser error: unterminated integer"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
an integer was not terminated."
|
|
~pp:(fun ppf loc -> Format.fprintf ppf "%a, unterminated integer" print_location loc)
|
|
Data_encoding.(obj1 (req "location" location_encoding))
|
|
(function Unterminated_integer loc -> Some loc | _ -> None)
|
|
(fun loc -> Unterminated_integer loc) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.unterminated_comment"
|
|
~title: "Micheline parser error: unterminated comment"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
a commentX was not terminated."
|
|
~pp:(fun ppf loc -> Format.fprintf ppf "%a, unterminated comment" print_location loc)
|
|
Data_encoding.(obj1 (req "location" location_encoding))
|
|
(function Unterminated_comment loc -> Some loc | _ -> None)
|
|
(fun loc -> Unterminated_comment loc) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.annotation_exceeds_max_length"
|
|
~title: "Micheline parser error: annotation exceeds max length"
|
|
~description: (Format.sprintf
|
|
"While parsing a piece of Micheline source, \
|
|
an annotation exceeded the maximum length (%d)." max_annot_length)
|
|
~pp:(fun ppf loc -> Format.fprintf ppf "%a, annotation exceeded maximum length (%d chars)"
|
|
print_location
|
|
loc max_annot_length)
|
|
Data_encoding.(obj1 (req "location" location_encoding))
|
|
(function Annotation_length loc -> Some loc | _ -> None)
|
|
(fun loc -> Annotation_length loc) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.unclosed_token"
|
|
~title: "Micheline parser error: unclosed token"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
a parenthesis or a brace was unclosed."
|
|
~pp:(fun ppf (loc, token) ->
|
|
Format.fprintf ppf "%a, unclosed %a" print_location loc print_token_kind token)
|
|
Data_encoding.(obj2
|
|
(req "location"location_encoding)
|
|
(req "token" token_value_encoding))
|
|
(function Unclosed { loc ; token } -> Some (loc, token) | _ -> None)
|
|
(fun (loc, token) -> Unclosed { loc ; token }) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.unexpected_token"
|
|
~title: "Micheline parser error: unexpected token"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
an unexpected token was encountered."
|
|
~pp:(fun ppf (loc, token) ->
|
|
Format.fprintf ppf "%a, unexpected %a" print_location loc print_token_kind token)
|
|
Data_encoding.(obj2
|
|
(req "location"location_encoding)
|
|
(req "token" token_value_encoding))
|
|
(function Unexpected { loc ; token } -> Some (loc, token) | _ -> None)
|
|
(fun (loc, token) -> Unexpected { loc ; token }) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.extra_token"
|
|
~title: "Micheline parser error: extra token"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
an extra semi colon or parenthesis was encountered."
|
|
~pp:(fun ppf (loc, token) ->
|
|
Format.fprintf ppf "%a, extra %a" print_location loc print_token_kind token)
|
|
Data_encoding.(obj2
|
|
(req "location"location_encoding)
|
|
(req "token" token_value_encoding))
|
|
(function Extra { loc ; token } -> Some (loc, token) | _ -> None)
|
|
(fun (loc, token) -> Extra { loc ; token }) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.misaligned_node"
|
|
~title: "Micheline parser error: misaligned node"
|
|
~description: "While parsing a piece of Micheline source, \
|
|
an expression was not aligned with its \
|
|
siblings of the same mother application \
|
|
or sequence."
|
|
~pp:(fun ppf node ->
|
|
Format.fprintf ppf "%a, misaligned expression" print_location (location node))
|
|
Data_encoding.(obj1 (req "expression" node_encoding))
|
|
(function Misaligned node -> Some node | _ -> None)
|
|
(fun node -> Misaligned node) ;
|
|
register_error_kind `Permanent
|
|
~id: "micheline.parse_error.empty_expression"
|
|
~title: "Micheline parser error: empty_expression"
|
|
~description: "Tried to interpret an empty piece or \
|
|
Micheline source as a single expression."
|
|
~pp:(fun ppf () -> Format.fprintf ppf "empty expression")
|
|
Data_encoding.empty
|
|
(function Empty -> Some () | _ -> None)
|
|
(fun () -> Empty)
|