diff --git a/docs/whitedoc/michelson.rst b/docs/whitedoc/michelson.rst index b50842202..b83fb973e 100644 --- a/docs/whitedoc/michelson.rst +++ b/docs/whitedoc/michelson.rst @@ -1741,6 +1741,9 @@ language can only be one of the four following constructs. This simple four cases notation is called Micheline. +The encoding of a Micheline source file must be UTF-8, and non-ASCII +characters can only appear in comments and strings. + Constants ~~~~~~~~~ @@ -1748,12 +1751,12 @@ There are two kinds of constants: 1. Integers or naturals in decimal (no prefix), hexadecimal (0x prefix), octal (0o prefix) or binary (0b prefix). -2. Strings with usual escapes ``\n``, ``\t``, ``\b``, ``\r``, ``\\``, - ``\"``. The encoding of a Michelson source file must be UTF-8, and - non-ASCII characters can only appear in comments. No line break can - appear in a string. Any non-printable characters must be escaped - using two hexadecimal characters, as in ``\xHH`` or the - predefine escape sequences above.. +2. Strings, with usual escape sequences: ``\n``, ``\t``, ``\b``, + ``\r``, ``\\``, ``\"``. Unescaped line breaks (both ``\n`` and ``\r``) + cannot appear in the middle of a string. + +The current version of Michelson restricts strings to be the printable +subset of 7-bit ASCII, plus the line break ``\n``. Primitive applications ~~~~~~~~~~~~~~~~~~~~~~ diff --git a/src/lib_micheline/micheline_parser.ml b/src/lib_micheline/micheline_parser.ml index bcaf38927..70cf12819 100644 --- a/src/lib_micheline/micheline_parser.ml +++ b/src/lib_micheline/micheline_parser.ml @@ -255,7 +255,7 @@ let tokenize source = | `Uchar c, stop -> match uchar_to_char c with | Some '"' -> skip (tok () :: acc) - | Some '\n' -> + | Some ('\n' | '\r') -> errors := Unterminated_string { start ; stop } :: !errors ; skip (tok () :: acc) | Some '\\' -> diff --git a/src/lib_micheline/micheline_printer.ml b/src/lib_micheline/micheline_printer.ml index 7513e4350..370b6b68e 100644 --- a/src/lib_micheline/micheline_printer.ml +++ b/src/lib_micheline/micheline_printer.ml @@ -26,12 +26,13 @@ let print_comment ppf text = let print_string ppf text = Format.fprintf ppf "\"" ; String.iter (function - | '"' | 'r' | 'n' | 't' | 'b' | '\\' as c -> - Format.fprintf ppf "%c" c - | '\x20'..'\x7E' as c -> - Format.fprintf ppf "%c" c - | c -> - Format.fprintf ppf "\\x%02X" (Char.code c)) + | '"' -> Format.fprintf ppf "\\\"" + | '\n' -> Format.fprintf ppf "\\n" + | '\r' -> Format.fprintf ppf "\\r" + | '\b' -> Format.fprintf ppf "\\b" + | '\t' -> Format.fprintf ppf "\\t" + | '\\' -> Format.fprintf ppf "\\\\" + | c -> Format.fprintf ppf "%c" c) text ; Format.fprintf ppf "\"" diff --git a/src/proto_alpha/lib_protocol/src/script_ir_translator.ml b/src/proto_alpha/lib_protocol/src/script_ir_translator.ml index 0d6e27978..b0393139a 100644 --- a/src/proto_alpha/lib_protocol/src/script_ir_translator.ml +++ b/src/proto_alpha/lib_protocol/src/script_ir_translator.ml @@ -1109,8 +1109,16 @@ let rec parse_data traced (fail (unexpected expr [] Constant_namespace [ D_True ; D_False ])) (* Strings *) | String_t, String (_, v) -> - Lwt.return (Gas.consume ctxt (Typecheck_costs.string (String.length v))) >>|? fun ctxt -> - (v, ctxt) + Lwt.return (Gas.consume ctxt (Typecheck_costs.string (String.length v))) >>=? fun ctxt -> + let rec check_printable_ascii i = + if Compare.Int.(i < 0) then true + else match String.get v i with + | '\n' | '\x20'..'\x7E' -> check_printable_ascii (i - 1) + | _ -> false in + if check_printable_ascii (String.length v - 1) then + return (v, ctxt) + else + fail (error ()) | String_t, expr -> traced (fail (Invalid_kind (location expr, [ String_kind ], kind expr))) (* Integers *)