From c3f4aa14541703c92ea18dea9c1a794f1b4d5b54 Mon Sep 17 00:00:00 2001 From: Benjamin Canou Date: Thu, 14 Jun 2018 15:26:20 +0200 Subject: [PATCH] Michelson: new bytes type --- docs/whitedoc/michelson.rst | 17 ++++++------- .../lib_protocol/src/alpha_context.mli | 1 + .../lib_protocol/src/michelson_v1_gas.ml | 3 +++ .../lib_protocol/src/michelson_v1_gas.mli | 1 + .../src/michelson_v1_primitives.ml | 4 ++++ .../src/michelson_v1_primitives.mli | 1 + .../lib_protocol/src/script_ir_translator.ml | 24 +++++++++++++++++-- .../lib_protocol/src/script_typed_ir.ml | 1 + 8 files changed, 42 insertions(+), 10 deletions(-) diff --git a/docs/whitedoc/michelson.rst b/docs/whitedoc/michelson.rst index de941935f..5e3f8fc8f 100644 --- a/docs/whitedoc/michelson.rst +++ b/docs/whitedoc/michelson.rst @@ -153,6 +153,7 @@ Data patterns are of one of the following syntactical forms. - integer/natural number literals, (e.g. ``3``) ; - string literals, (e.g. ``"contents"``) ; +- raw byte sequence literals (e.g. ``0xABCDEF42``) - ``Tag`` (capitalized) is a symbolic constant, (e.g. ``Unit``, ``True``, ``False``) ; - ``(Tag (arg) ...)`` tagged constructed data, (e.g. ``(Pair 3 4)``) ; @@ -338,7 +339,8 @@ polymorphism. III - Core data types and notations ----------------------------------- -- ``string``, ``nat``, ``int``: The core primitive constant types. +- ``string``, ``nat``, ``int`` and ``bytes``: The core primitive + constant types. - ``bool``: The type for booleans whose values are ``True`` and ``False`` @@ -1714,13 +1716,13 @@ characters can only appear in comments and strings. Constants ~~~~~~~~~ -There are two kinds of constants: +There are three kinds of constants: -1. Integers or naturals in decimal (no prefix), hexadecimal (``0x`` prefix), - octal (``0o`` prefix) or binary (``0b`` prefix). +1. Integers or naturals in decimal notation. 2. Strings, with usual escape sequences: ``\n``, ``\t``, ``\b``, ``\r``, ``\\``, ``\"``. Unescaped line-breaks (both ``\n`` and ``\r``) cannot appear in the middle of a string. +3. Byte sequences in hexadecimal notation, prefixed with ``0x``. The current version of Michelson restricts strings to be the printable subset of 7-bit ASCII, plus the escaped characters mentioned above. @@ -1796,8 +1798,8 @@ type constructors by lowercase identifiers, and constant constructors are Capitalized. All domain specific constants are Micheline constants with specific -formats. Some have two representations accepted by the data type -checker: a readable one in a string and an optimized one in a natural. +formats. Some have two variants accepted by the data type checker: a +readable one in a string and an optimized. - ``mutez`` amounts are written as naturals. - ``timestamp``\ s are written either using ``RFC 339`` notation @@ -1805,8 +1807,7 @@ checker: a readable one in a string and an optimized one in a natural. in a natural (optimized). - ``contract``\ s, ``address``\ es, ``key``\ s and ``signature``\ s are written as strings, in their usual Base58 encoded versions - (readable), or as the little indian interpretation of their - bytes in a natural (optimized). + (readable), or as their raw bytes (optimized). The optimized versions should not reach the RPCs, the protocol code will convert to optimized by itself when forging operations, storing diff --git a/src/proto_alpha/lib_protocol/src/alpha_context.mli b/src/proto_alpha/lib_protocol/src/alpha_context.mli index 1d9523367..fd827879a 100644 --- a/src/proto_alpha/lib_protocol/src/alpha_context.mli +++ b/src/proto_alpha/lib_protocol/src/alpha_context.mli @@ -266,6 +266,7 @@ module Script : sig | T_set | T_signature | T_string + | T_bytes | T_mutez | T_timestamp | T_unit diff --git a/src/proto_alpha/lib_protocol/src/michelson_v1_gas.ml b/src/proto_alpha/lib_protocol/src/michelson_v1_gas.ml index 42a4c1b0b..1fec742fa 100644 --- a/src/proto_alpha/lib_protocol/src/michelson_v1_gas.ml +++ b/src/proto_alpha/lib_protocol/src/michelson_v1_gas.ml @@ -238,7 +238,10 @@ module Cost_of = struct let cycle = step_cost 1 let bool = prim_cost let unit = prim_cost + (* FIXME: not sure we should count the length of strings and bytes + as they are shared *) let string s = string_cost (String.length s) + let bytes s = alloc_bytes_cost (MBytes.length s) (* Approximates log10(x) *) let int i = let decimal_digits = (Z.numbits (Z.abs (Script_int.to_zint i))) / 4 in diff --git a/src/proto_alpha/lib_protocol/src/michelson_v1_gas.mli b/src/proto_alpha/lib_protocol/src/michelson_v1_gas.mli index a4704bde4..274dde44b 100644 --- a/src/proto_alpha/lib_protocol/src/michelson_v1_gas.mli +++ b/src/proto_alpha/lib_protocol/src/michelson_v1_gas.mli @@ -138,6 +138,7 @@ module Cost_of : sig val int : 'a Script_int.num -> Gas.cost val tez : Gas.cost val string : string -> Gas.cost + val bytes : MBytes.t -> Gas.cost val timestamp : Script_timestamp.t -> Gas.cost val key : Gas.cost val key_hash : Gas.cost diff --git a/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.ml b/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.ml index 56167e520..a6d62e31a 100644 --- a/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.ml +++ b/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.ml @@ -115,6 +115,7 @@ type prim = | T_set | T_signature | T_string + | T_bytes | T_mutez | T_timestamp | T_unit @@ -242,6 +243,7 @@ let string_of_prim = function | T_set -> "set" | T_signature -> "signature" | T_string -> "string" + | T_bytes -> "bytes" | T_mutez -> "mutez" | T_timestamp -> "timestamp" | T_unit -> "unit" @@ -350,6 +352,7 @@ let prim_of_string = function | "set" -> ok T_set | "signature" -> ok T_signature | "string" -> ok T_string + | "bytes" -> ok T_bytes | "mutez" -> ok T_mutez | "timestamp" -> ok T_timestamp | "unit" -> ok T_unit @@ -503,6 +506,7 @@ let prim_encoding = ("set", T_set) ; ("signature", T_signature) ; ("string", T_string) ; + ("bytes", T_bytes) ; ("mutez", T_mutez) ; ("timestamp", T_timestamp) ; ("unit", T_unit) ; diff --git a/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.mli b/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.mli index 5aa0c5445..ac41731cf 100644 --- a/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.mli +++ b/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.mli @@ -113,6 +113,7 @@ type prim = | T_set | T_signature | T_string + | T_bytes | T_mutez | T_timestamp | T_unit diff --git a/src/proto_alpha/lib_protocol/src/script_ir_translator.ml b/src/proto_alpha/lib_protocol/src/script_ir_translator.ml index b39301e4c..f5e18c338 100644 --- a/src/proto_alpha/lib_protocol/src/script_ir_translator.ml +++ b/src/proto_alpha/lib_protocol/src/script_ir_translator.ml @@ -55,6 +55,7 @@ let rec type_size : type t. t ty -> int = | Int_t _ -> 1 | Nat_t _ -> 1 | Signature_t _ -> 1 + | Bytes_t _ -> 1 | String_t _ -> 1 | Mutez_t _ -> 1 | Key_hash_t _ -> 1 @@ -327,6 +328,7 @@ let namespace = function | T_set | T_signature | T_string + | T_bytes | T_mutez | T_timestamp | T_unit @@ -531,6 +533,7 @@ let rec unparse_ty | Int_t tname -> Prim (-1, T_int, [], unparse_type_annot tname) | Nat_t tname -> Prim (-1, T_nat, [], unparse_type_annot tname) | String_t tname -> Prim (-1, T_string, [], unparse_type_annot tname) + | Bytes_t tname -> Prim (-1, T_bytes, [], unparse_type_annot tname) | Mutez_t tname -> Prim (-1, T_mutez, [], unparse_type_annot tname) | Bool_t tname -> Prim (-1, T_bool, [], unparse_type_annot tname) | Key_hash_t tname -> Prim (-1, T_key_hash, [], unparse_type_annot tname) @@ -582,6 +585,7 @@ let name_of_ty | Int_t tname -> tname | Nat_t tname -> tname | String_t tname -> tname + | Bytes_t tname -> tname | Mutez_t tname -> tname | Bool_t tname -> tname | Key_hash_t tname -> tname @@ -629,6 +633,7 @@ let rec ty_eq | Key_t _, Key_t _ -> Ok Eq | Key_hash_t _, Key_hash_t _ -> Ok Eq | String_t _, String_t _ -> Ok Eq + | Bytes_t _, Bytes_t _ -> Ok Eq | Signature_t _, Signature_t _ -> Ok Eq | Mutez_t _, Mutez_t _ -> Ok Eq | Timestamp_t _, Timestamp_t _ -> Ok Eq @@ -751,6 +756,9 @@ let merge_types : | String_t tn1, String_t tn2 -> merge_type_annot tn1 tn2 >|? fun tname -> String_t tname + | Bytes_t tn1, Bytes_t tn2 -> + merge_type_annot tn1 tn2 >|? fun tname -> + Bytes_t tname | Signature_t tn1, Signature_t tn2 -> merge_type_annot tn1 tn2 >|? fun tname -> Signature_t tname @@ -960,6 +968,9 @@ and parse_ty : | Prim (loc, T_string, [], annot) -> parse_type_annot loc annot >|? fun ty_name -> Ex_ty (String_t ty_name) + | Prim (loc, T_bytes, [], annot) -> + parse_type_annot loc annot >|? fun ty_name -> + Ex_ty (Bytes_t ty_name) | Prim (loc, T_mutez, [], annot) -> parse_type_annot loc annot >|? fun ty_name -> Ex_ty (Mutez_t ty_name) @@ -1032,7 +1043,7 @@ and parse_ty : error (Unexpected_big_map loc) | Prim (loc, (T_unit | T_signature | T_int | T_nat - | T_string | T_mutez | T_bool + | T_string | T_bytes | T_mutez | T_bool | T_key | T_key_hash | T_timestamp | T_address as prim), l, _) -> error (Invalid_arity (loc, prim, 0, List.length l)) @@ -1046,7 +1057,7 @@ and parse_ty : T_list ; T_option ; T_lambda ; T_unit ; T_signature ; T_contract ; T_int ; T_nat ; T_operation ; - T_string ; T_mutez ; T_bool ; + T_string ; T_bytes ; T_mutez ; T_bool ; T_key ; T_key_hash ; T_timestamp ] let rec unparse_stack @@ -1144,6 +1155,12 @@ let rec parse_data fail (error ()) | String_t _, expr -> traced (fail (Invalid_kind (location expr, [ String_kind ], kind expr))) + (* Byte sequences *) + | Bytes_t _, Bytes (_, v) -> + Lwt.return (Gas.consume ctxt (Typecheck_costs.string (MBytes.length v))) >>=? fun ctxt -> + return (v, ctxt) + | Bytes_t _, expr -> + traced (fail (Invalid_kind (location expr, [ Bytes_kind ], kind expr))) (* Integers *) | Int_t _, Int (_, v) -> return (Script_int.of_zint v, ctxt) @@ -2620,6 +2637,9 @@ let rec unparse_data | String_t _, s -> Lwt.return (Gas.consume ctxt (Unparse_costs.string s)) >>=? fun ctxt -> return (String (-1, s), ctxt) + | Bytes_t _, s -> + Lwt.return (Gas.consume ctxt (Unparse_costs.bytes s)) >>=? fun ctxt -> + return (Bytes (-1, s), ctxt) | Bool_t _, true -> Lwt.return (Gas.consume ctxt Unparse_costs.bool) >>=? fun ctxt -> return (Prim (-1, D_True, [], []), ctxt) diff --git a/src/proto_alpha/lib_protocol/src/script_typed_ir.ml b/src/proto_alpha/lib_protocol/src/script_typed_ir.ml index a48e6aac1..a090dcdc5 100644 --- a/src/proto_alpha/lib_protocol/src/script_typed_ir.ml +++ b/src/proto_alpha/lib_protocol/src/script_typed_ir.ml @@ -72,6 +72,7 @@ and 'ty ty = | Nat_t : type_annot option -> n num ty | Signature_t : type_annot option -> signature ty | String_t : type_annot option -> string ty + | Bytes_t : type_annot option -> MBytes.t ty | Mutez_t : type_annot option -> Tez.t ty | Key_hash_t : type_annot option -> public_key_hash ty | Key_t : type_annot option -> public_key ty