Michelson: fix inconsistency when reading and printing strings

This commit is contained in:
Benjamin Canou 2018-05-01 23:39:16 +02:00 committed by Grégoire Henry
parent f119a9b2a5
commit c57458ea01
4 changed files with 27 additions and 15 deletions

View File

@ -1741,6 +1741,9 @@ language can only be one of the four following constructs.
This simple four cases notation is called Micheline. This simple four cases notation is called Micheline.
The encoding of a Micheline source file must be UTF-8, and non-ASCII
characters can only appear in comments and strings.
Constants Constants
~~~~~~~~~ ~~~~~~~~~
@ -1748,12 +1751,12 @@ There are two kinds of constants:
1. Integers or naturals in decimal (no prefix), hexadecimal (0x prefix), 1. Integers or naturals in decimal (no prefix), hexadecimal (0x prefix),
octal (0o prefix) or binary (0b prefix). octal (0o prefix) or binary (0b prefix).
2. Strings with usual escapes ``\n``, ``\t``, ``\b``, ``\r``, ``\\``, 2. Strings, with usual escape sequences: ``\n``, ``\t``, ``\b``,
``\"``. The encoding of a Michelson source file must be UTF-8, and ``\r``, ``\\``, ``\"``. Unescaped line breaks (both ``\n`` and ``\r``)
non-ASCII characters can only appear in comments. No line break can cannot appear in the middle of a string.
appear in a string. Any non-printable characters must be escaped
using two hexadecimal characters, as in ``\xHH`` or the The current version of Michelson restricts strings to be the printable
predefine escape sequences above.. subset of 7-bit ASCII, plus the line break ``\n``.
Primitive applications Primitive applications
~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~

View File

@ -255,7 +255,7 @@ let tokenize source =
| `Uchar c, stop -> | `Uchar c, stop ->
match uchar_to_char c with match uchar_to_char c with
| Some '"' -> skip (tok () :: acc) | Some '"' -> skip (tok () :: acc)
| Some '\n' -> | Some ('\n' | '\r') ->
errors := Unterminated_string { start ; stop } :: !errors ; errors := Unterminated_string { start ; stop } :: !errors ;
skip (tok () :: acc) skip (tok () :: acc)
| Some '\\' -> | Some '\\' ->

View File

@ -26,12 +26,13 @@ let print_comment ppf text =
let print_string ppf text = let print_string ppf text =
Format.fprintf ppf "\"" ; Format.fprintf ppf "\"" ;
String.iter (function String.iter (function
| '"' | 'r' | 'n' | 't' | 'b' | '\\' as c -> | '"' -> Format.fprintf ppf "\\\""
Format.fprintf ppf "%c" c | '\n' -> Format.fprintf ppf "\\n"
| '\x20'..'\x7E' as c -> | '\r' -> Format.fprintf ppf "\\r"
Format.fprintf ppf "%c" c | '\b' -> Format.fprintf ppf "\\b"
| c -> | '\t' -> Format.fprintf ppf "\\t"
Format.fprintf ppf "\\x%02X" (Char.code c)) | '\\' -> Format.fprintf ppf "\\\\"
| c -> Format.fprintf ppf "%c" c)
text ; text ;
Format.fprintf ppf "\"" Format.fprintf ppf "\""

View File

@ -1109,8 +1109,16 @@ let rec parse_data
traced (fail (unexpected expr [] Constant_namespace [ D_True ; D_False ])) traced (fail (unexpected expr [] Constant_namespace [ D_True ; D_False ]))
(* Strings *) (* Strings *)
| String_t, String (_, v) -> | String_t, String (_, v) ->
Lwt.return (Gas.consume ctxt (Typecheck_costs.string (String.length v))) >>|? fun ctxt -> Lwt.return (Gas.consume ctxt (Typecheck_costs.string (String.length v))) >>=? fun ctxt ->
(v, ctxt) let rec check_printable_ascii i =
if Compare.Int.(i < 0) then true
else match String.get v i with
| '\n' | '\x20'..'\x7E' -> check_printable_ascii (i - 1)
| _ -> false in
if check_printable_ascii (String.length v - 1) then
return (v, ctxt)
else
fail (error ())
| String_t, expr -> | String_t, expr ->
traced (fail (Invalid_kind (location expr, [ String_kind ], kind expr))) traced (fail (Invalid_kind (location expr, [ String_kind ], kind expr)))
(* Integers *) (* Integers *)