Michelson: new bytes type

2018-06-14 15:26:20 +02:00 · 2018-06-14 15:26:20 +02:00 · c3f4aa1454
commit c3f4aa1454
parent 45d8fd11ae
8 changed files with 42 additions and 10 deletions
--- a/docs/whitedoc/michelson.rst
+++ b/docs/whitedoc/michelson.rst
@ -153,6 +153,7 @@ Data patterns are of one of the following syntactical forms.

 -  integer/natural number literals, (e.g. ``3``) ;
 -  string literals, (e.g. ``"contents"``) ;
+-  raw byte sequence literals (e.g. ``0xABCDEF42``)
 -  ``Tag`` (capitalized) is a symbolic constant, (e.g. ``Unit``,
   ``True``, ``False``) ;
 -  ``(Tag (arg) ...)`` tagged constructed data, (e.g. ``(Pair 3 4)``) ;
@ -338,7 +339,8 @@ polymorphism.
 III - Core data types and notations
 -----------------------------------

-  ``string``, ``nat``, ``int``: The core primitive constant types.
+-  ``string``, ``nat``, ``int`` and ``bytes``: The core primitive
+   constant types.

 -  ``bool``: The type for booleans whose values are ``True`` and
   ``False``
@ -1714,13 +1716,13 @@ characters can only appear in comments and strings.
 Constants
 ~~~~~~~~~

-There are two kinds of constants:
+There are three kinds of constants:

-1. Integers or naturals in decimal (no prefix), hexadecimal (``0x`` prefix),
-   octal (``0o`` prefix) or binary (``0b`` prefix).
+1. Integers or naturals in decimal notation.
 2. Strings, with usual escape sequences: ``\n``, ``\t``, ``\b``,
   ``\r``, ``\\``, ``\"``. Unescaped line-breaks (both ``\n`` and ``\r``)
   cannot appear in the middle of a string.
+3. Byte sequences in hexadecimal notation, prefixed with ``0x``.

 The current version of Michelson restricts strings to be the printable
 subset of 7-bit ASCII, plus the escaped characters mentioned above.
@ -1796,8 +1798,8 @@ type constructors by lowercase identifiers, and constant constructors
 are Capitalized.

 All domain specific constants are Micheline constants with specific
-formats. Some have two representations accepted by the data type
-checker: a readable one in a string and an optimized one in a natural.
+formats. Some have two variants accepted by the data type checker: a
+readable one in a string and an optimized.

 -  ``mutez`` amounts are written as naturals.
 -  ``timestamp``\ s are written either using ``RFC 339`` notation
@ -1805,8 +1807,7 @@ checker: a readable one in a string and an optimized one in a natural.
   in a natural (optimized).
 -  ``contract``\ s, ``address``\ es, ``key``\ s and ``signature``\ s
   are written as strings, in their usual Base58 encoded versions
-   (readable), or as the little indian interpretation of their
-   bytes in a natural (optimized).
+   (readable), or as their raw bytes (optimized).

 The optimized versions should not reach the RPCs, the protocol code
 will convert to optimized by itself when forging operations, storing
--- a/src/proto_alpha/lib_protocol/src/alpha_context.mli
+++ b/src/proto_alpha/lib_protocol/src/alpha_context.mli
@ -266,6 +266,7 @@ module Script : sig
    | T_set
    | T_signature
    | T_string
+    | T_bytes
    | T_mutez
    | T_timestamp
    | T_unit
--- a/src/proto_alpha/lib_protocol/src/michelson_v1_gas.ml
+++ b/src/proto_alpha/lib_protocol/src/michelson_v1_gas.ml
@ -238,7 +238,10 @@ module Cost_of = struct
    let cycle = step_cost 1
    let bool = prim_cost
    let unit = prim_cost
+    (* FIXME: not sure we should count the length of strings and bytes
+       as they are shared *)
    let string s = string_cost (String.length s)
+    let bytes s = alloc_bytes_cost (MBytes.length s)
    (* Approximates log10(x) *)
    let int i =
      let decimal_digits = (Z.numbits (Z.abs (Script_int.to_zint i))) / 4 in
--- a/src/proto_alpha/lib_protocol/src/michelson_v1_gas.mli
+++ b/src/proto_alpha/lib_protocol/src/michelson_v1_gas.mli
@ -138,6 +138,7 @@ module Cost_of : sig
    val int : 'a Script_int.num -> Gas.cost
    val tez : Gas.cost
    val string : string -> Gas.cost
+    val bytes : MBytes.t -> Gas.cost
    val timestamp : Script_timestamp.t -> Gas.cost
    val key : Gas.cost
    val key_hash : Gas.cost
--- a/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.ml
+++ b/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.ml
@ -115,6 +115,7 @@ type prim =
  | T_set
  | T_signature
  | T_string
+  | T_bytes
  | T_mutez
  | T_timestamp
  | T_unit
@ -242,6 +243,7 @@ let string_of_prim = function
  | T_set -> "set"
  | T_signature -> "signature"
  | T_string -> "string"
+  | T_bytes -> "bytes"
  | T_mutez -> "mutez"
  | T_timestamp -> "timestamp"
  | T_unit -> "unit"
@ -350,6 +352,7 @@ let prim_of_string = function
  | "set" -> ok T_set
  | "signature" -> ok T_signature
  | "string" -> ok T_string
+  | "bytes" -> ok T_bytes
  | "mutez" -> ok T_mutez
  | "timestamp" -> ok T_timestamp
  | "unit" -> ok T_unit
@ -503,6 +506,7 @@ let prim_encoding =
    ("set", T_set) ;
    ("signature", T_signature) ;
    ("string", T_string) ;
+    ("bytes", T_bytes) ;
    ("mutez", T_mutez) ;
    ("timestamp", T_timestamp) ;
    ("unit", T_unit) ;
--- a/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.mli
+++ b/src/proto_alpha/lib_protocol/src/michelson_v1_primitives.mli
@ -113,6 +113,7 @@ type prim =
  | T_set
  | T_signature
  | T_string
+  | T_bytes
  | T_mutez
  | T_timestamp
  | T_unit
--- a/src/proto_alpha/lib_protocol/src/script_ir_translator.ml
+++ b/src/proto_alpha/lib_protocol/src/script_ir_translator.ml
@ -55,6 +55,7 @@ let rec type_size : type t. t ty -> int =
    | Int_t _ -> 1
    | Nat_t _ -> 1
    | Signature_t _ -> 1
+    | Bytes_t _ -> 1
    | String_t _ -> 1
    | Mutez_t _ -> 1
    | Key_hash_t _ -> 1
@ -327,6 +328,7 @@ let namespace = function
  | T_set
  | T_signature
  | T_string
+  | T_bytes
  | T_mutez
  | T_timestamp
  | T_unit
@ -531,6 +533,7 @@ let rec unparse_ty
    | Int_t tname -> Prim (-1, T_int, [], unparse_type_annot tname)
    | Nat_t tname -> Prim (-1, T_nat, [], unparse_type_annot tname)
    | String_t tname -> Prim (-1, T_string, [], unparse_type_annot tname)
+    | Bytes_t tname -> Prim (-1, T_bytes, [], unparse_type_annot tname)
    | Mutez_t tname -> Prim (-1, T_mutez, [], unparse_type_annot tname)
    | Bool_t tname -> Prim (-1, T_bool, [], unparse_type_annot tname)
    | Key_hash_t tname -> Prim (-1, T_key_hash, [], unparse_type_annot tname)
@ -582,6 +585,7 @@ let name_of_ty
    | Int_t tname -> tname
    | Nat_t tname -> tname
    | String_t tname -> tname
+    | Bytes_t tname -> tname
    | Mutez_t tname -> tname
    | Bool_t tname -> tname
    | Key_hash_t tname -> tname
@ -629,6 +633,7 @@ let rec ty_eq
    | Key_t _, Key_t _ -> Ok Eq
    | Key_hash_t _, Key_hash_t _ -> Ok Eq
    | String_t _, String_t _ -> Ok Eq
+    | Bytes_t _, Bytes_t _ -> Ok Eq
    | Signature_t _, Signature_t _ -> Ok Eq
    | Mutez_t _, Mutez_t _ -> Ok Eq
    | Timestamp_t _, Timestamp_t _ -> Ok Eq
@ -751,6 +756,9 @@ let merge_types :
      | String_t tn1, String_t tn2 ->
          merge_type_annot tn1 tn2 >|? fun tname ->
          String_t tname
+      | Bytes_t tn1, Bytes_t tn2 ->
+          merge_type_annot tn1 tn2 >|? fun tname ->
+          Bytes_t tname
      | Signature_t tn1, Signature_t tn2 ->
          merge_type_annot tn1 tn2 >|? fun tname ->
          Signature_t tname
@ -960,6 +968,9 @@ and parse_ty :
    | Prim (loc, T_string, [], annot) ->
        parse_type_annot loc annot >|? fun ty_name ->
        Ex_ty (String_t ty_name)
+    | Prim (loc, T_bytes, [], annot) ->
+        parse_type_annot loc annot >|? fun ty_name ->
+        Ex_ty (Bytes_t ty_name)
    | Prim (loc, T_mutez, [], annot) ->
        parse_type_annot loc annot >|? fun ty_name ->
        Ex_ty (Mutez_t ty_name)
@ -1032,7 +1043,7 @@ and parse_ty :
        error (Unexpected_big_map loc)
    | Prim (loc, (T_unit | T_signature
                 | T_int | T_nat
-                 | T_string | T_mutez | T_bool
+                 | T_string | T_bytes | T_mutez | T_bool
                 | T_key | T_key_hash
                 | T_timestamp | T_address as prim), l, _) ->
        error (Invalid_arity (loc, prim, 0, List.length l))
@ -1046,7 +1057,7 @@ and parse_ty :
            T_list ; T_option  ; T_lambda ;
            T_unit ; T_signature  ; T_contract ;
            T_int ; T_nat ; T_operation ;
-            T_string ; T_mutez ; T_bool ;
+            T_string ; T_bytes ; T_mutez ; T_bool ;
            T_key ; T_key_hash ; T_timestamp ]

 let rec unparse_stack
@ -1144,6 +1155,12 @@ let rec parse_data
          fail (error ())
    | String_t _, expr ->
        traced (fail (Invalid_kind (location expr, [ String_kind ], kind expr)))
+    (* Byte sequences *)
+    | Bytes_t _, Bytes (_, v) ->
+        Lwt.return (Gas.consume ctxt (Typecheck_costs.string (MBytes.length v))) >>=? fun ctxt ->
+        return (v, ctxt)
+    | Bytes_t _, expr ->
+        traced (fail (Invalid_kind (location expr, [ Bytes_kind ], kind expr)))
    (* Integers *)
    | Int_t _, Int (_, v) ->
        return (Script_int.of_zint v, ctxt)
@ -2620,6 +2637,9 @@ let rec unparse_data
    | String_t _, s ->
        Lwt.return (Gas.consume ctxt (Unparse_costs.string s)) >>=? fun ctxt ->
        return (String (-1, s), ctxt)
+    | Bytes_t _, s ->
+        Lwt.return (Gas.consume ctxt (Unparse_costs.bytes s)) >>=? fun ctxt ->
+        return (Bytes (-1, s), ctxt)
    | Bool_t _, true ->
        Lwt.return (Gas.consume ctxt Unparse_costs.bool) >>=? fun ctxt ->
        return (Prim (-1, D_True, [], []), ctxt)
--- a/src/proto_alpha/lib_protocol/src/script_typed_ir.ml
+++ b/src/proto_alpha/lib_protocol/src/script_typed_ir.ml
@ -72,6 +72,7 @@ and 'ty ty =
  | Nat_t : type_annot option -> n num ty
  | Signature_t : type_annot option -> signature ty
  | String_t : type_annot option -> string ty
+  | Bytes_t : type_annot option -> MBytes.t ty
  | Mutez_t : type_annot option -> Tez.t ty
  | Key_hash_t : type_annot option -> public_key_hash ty
  | Key_t : type_annot option -> public_key ty