Michelson: new bytes type

This commit is contained in:
Benjamin Canou 2018-06-14 15:26:20 +02:00
parent 45d8fd11ae
commit c3f4aa1454
8 changed files with 42 additions and 10 deletions

View File

@ -153,6 +153,7 @@ Data patterns are of one of the following syntactical forms.
- integer/natural number literals, (e.g. ``3``) ;
- string literals, (e.g. ``"contents"``) ;
- raw byte sequence literals (e.g. ``0xABCDEF42``)
- ``Tag`` (capitalized) is a symbolic constant, (e.g. ``Unit``,
``True``, ``False``) ;
- ``(Tag (arg) ...)`` tagged constructed data, (e.g. ``(Pair 3 4)``) ;
@ -338,7 +339,8 @@ polymorphism.
III - Core data types and notations
-----------------------------------
- ``string``, ``nat``, ``int``: The core primitive constant types.
- ``string``, ``nat``, ``int`` and ``bytes``: The core primitive
constant types.
- ``bool``: The type for booleans whose values are ``True`` and
``False``
@ -1714,13 +1716,13 @@ characters can only appear in comments and strings.
Constants
~~~~~~~~~
There are two kinds of constants:
There are three kinds of constants:
1. Integers or naturals in decimal (no prefix), hexadecimal (``0x`` prefix),
octal (``0o`` prefix) or binary (``0b`` prefix).
1. Integers or naturals in decimal notation.
2. Strings, with usual escape sequences: ``\n``, ``\t``, ``\b``,
``\r``, ``\\``, ``\"``. Unescaped line-breaks (both ``\n`` and ``\r``)
cannot appear in the middle of a string.
3. Byte sequences in hexadecimal notation, prefixed with ``0x``.
The current version of Michelson restricts strings to be the printable
subset of 7-bit ASCII, plus the escaped characters mentioned above.
@ -1796,8 +1798,8 @@ type constructors by lowercase identifiers, and constant constructors
are Capitalized.
All domain specific constants are Micheline constants with specific
formats. Some have two representations accepted by the data type
checker: a readable one in a string and an optimized one in a natural.
formats. Some have two variants accepted by the data type checker: a
readable one in a string and an optimized.
- ``mutez`` amounts are written as naturals.
- ``timestamp``\ s are written either using ``RFC 339`` notation
@ -1805,8 +1807,7 @@ checker: a readable one in a string and an optimized one in a natural.
in a natural (optimized).
- ``contract``\ s, ``address``\ es, ``key``\ s and ``signature``\ s
are written as strings, in their usual Base58 encoded versions
(readable), or as the little indian interpretation of their
bytes in a natural (optimized).
(readable), or as their raw bytes (optimized).
The optimized versions should not reach the RPCs, the protocol code
will convert to optimized by itself when forging operations, storing

View File

@ -266,6 +266,7 @@ module Script : sig
| T_set
| T_signature
| T_string
| T_bytes
| T_mutez
| T_timestamp
| T_unit

View File

@ -238,7 +238,10 @@ module Cost_of = struct
let cycle = step_cost 1
let bool = prim_cost
let unit = prim_cost
(* FIXME: not sure we should count the length of strings and bytes
as they are shared *)
let string s = string_cost (String.length s)
let bytes s = alloc_bytes_cost (MBytes.length s)
(* Approximates log10(x) *)
let int i =
let decimal_digits = (Z.numbits (Z.abs (Script_int.to_zint i))) / 4 in

View File

@ -138,6 +138,7 @@ module Cost_of : sig
val int : 'a Script_int.num -> Gas.cost
val tez : Gas.cost
val string : string -> Gas.cost
val bytes : MBytes.t -> Gas.cost
val timestamp : Script_timestamp.t -> Gas.cost
val key : Gas.cost
val key_hash : Gas.cost

View File

@ -115,6 +115,7 @@ type prim =
| T_set
| T_signature
| T_string
| T_bytes
| T_mutez
| T_timestamp
| T_unit
@ -242,6 +243,7 @@ let string_of_prim = function
| T_set -> "set"
| T_signature -> "signature"
| T_string -> "string"
| T_bytes -> "bytes"
| T_mutez -> "mutez"
| T_timestamp -> "timestamp"
| T_unit -> "unit"
@ -350,6 +352,7 @@ let prim_of_string = function
| "set" -> ok T_set
| "signature" -> ok T_signature
| "string" -> ok T_string
| "bytes" -> ok T_bytes
| "mutez" -> ok T_mutez
| "timestamp" -> ok T_timestamp
| "unit" -> ok T_unit
@ -503,6 +506,7 @@ let prim_encoding =
("set", T_set) ;
("signature", T_signature) ;
("string", T_string) ;
("bytes", T_bytes) ;
("mutez", T_mutez) ;
("timestamp", T_timestamp) ;
("unit", T_unit) ;

View File

@ -113,6 +113,7 @@ type prim =
| T_set
| T_signature
| T_string
| T_bytes
| T_mutez
| T_timestamp
| T_unit

View File

@ -55,6 +55,7 @@ let rec type_size : type t. t ty -> int =
| Int_t _ -> 1
| Nat_t _ -> 1
| Signature_t _ -> 1
| Bytes_t _ -> 1
| String_t _ -> 1
| Mutez_t _ -> 1
| Key_hash_t _ -> 1
@ -327,6 +328,7 @@ let namespace = function
| T_set
| T_signature
| T_string
| T_bytes
| T_mutez
| T_timestamp
| T_unit
@ -531,6 +533,7 @@ let rec unparse_ty
| Int_t tname -> Prim (-1, T_int, [], unparse_type_annot tname)
| Nat_t tname -> Prim (-1, T_nat, [], unparse_type_annot tname)
| String_t tname -> Prim (-1, T_string, [], unparse_type_annot tname)
| Bytes_t tname -> Prim (-1, T_bytes, [], unparse_type_annot tname)
| Mutez_t tname -> Prim (-1, T_mutez, [], unparse_type_annot tname)
| Bool_t tname -> Prim (-1, T_bool, [], unparse_type_annot tname)
| Key_hash_t tname -> Prim (-1, T_key_hash, [], unparse_type_annot tname)
@ -582,6 +585,7 @@ let name_of_ty
| Int_t tname -> tname
| Nat_t tname -> tname
| String_t tname -> tname
| Bytes_t tname -> tname
| Mutez_t tname -> tname
| Bool_t tname -> tname
| Key_hash_t tname -> tname
@ -629,6 +633,7 @@ let rec ty_eq
| Key_t _, Key_t _ -> Ok Eq
| Key_hash_t _, Key_hash_t _ -> Ok Eq
| String_t _, String_t _ -> Ok Eq
| Bytes_t _, Bytes_t _ -> Ok Eq
| Signature_t _, Signature_t _ -> Ok Eq
| Mutez_t _, Mutez_t _ -> Ok Eq
| Timestamp_t _, Timestamp_t _ -> Ok Eq
@ -751,6 +756,9 @@ let merge_types :
| String_t tn1, String_t tn2 ->
merge_type_annot tn1 tn2 >|? fun tname ->
String_t tname
| Bytes_t tn1, Bytes_t tn2 ->
merge_type_annot tn1 tn2 >|? fun tname ->
Bytes_t tname
| Signature_t tn1, Signature_t tn2 ->
merge_type_annot tn1 tn2 >|? fun tname ->
Signature_t tname
@ -960,6 +968,9 @@ and parse_ty :
| Prim (loc, T_string, [], annot) ->
parse_type_annot loc annot >|? fun ty_name ->
Ex_ty (String_t ty_name)
| Prim (loc, T_bytes, [], annot) ->
parse_type_annot loc annot >|? fun ty_name ->
Ex_ty (Bytes_t ty_name)
| Prim (loc, T_mutez, [], annot) ->
parse_type_annot loc annot >|? fun ty_name ->
Ex_ty (Mutez_t ty_name)
@ -1032,7 +1043,7 @@ and parse_ty :
error (Unexpected_big_map loc)
| Prim (loc, (T_unit | T_signature
| T_int | T_nat
| T_string | T_mutez | T_bool
| T_string | T_bytes | T_mutez | T_bool
| T_key | T_key_hash
| T_timestamp | T_address as prim), l, _) ->
error (Invalid_arity (loc, prim, 0, List.length l))
@ -1046,7 +1057,7 @@ and parse_ty :
T_list ; T_option ; T_lambda ;
T_unit ; T_signature ; T_contract ;
T_int ; T_nat ; T_operation ;
T_string ; T_mutez ; T_bool ;
T_string ; T_bytes ; T_mutez ; T_bool ;
T_key ; T_key_hash ; T_timestamp ]
let rec unparse_stack
@ -1144,6 +1155,12 @@ let rec parse_data
fail (error ())
| String_t _, expr ->
traced (fail (Invalid_kind (location expr, [ String_kind ], kind expr)))
(* Byte sequences *)
| Bytes_t _, Bytes (_, v) ->
Lwt.return (Gas.consume ctxt (Typecheck_costs.string (MBytes.length v))) >>=? fun ctxt ->
return (v, ctxt)
| Bytes_t _, expr ->
traced (fail (Invalid_kind (location expr, [ Bytes_kind ], kind expr)))
(* Integers *)
| Int_t _, Int (_, v) ->
return (Script_int.of_zint v, ctxt)
@ -2620,6 +2637,9 @@ let rec unparse_data
| String_t _, s ->
Lwt.return (Gas.consume ctxt (Unparse_costs.string s)) >>=? fun ctxt ->
return (String (-1, s), ctxt)
| Bytes_t _, s ->
Lwt.return (Gas.consume ctxt (Unparse_costs.bytes s)) >>=? fun ctxt ->
return (Bytes (-1, s), ctxt)
| Bool_t _, true ->
Lwt.return (Gas.consume ctxt Unparse_costs.bool) >>=? fun ctxt ->
return (Prim (-1, D_True, [], []), ctxt)

View File

@ -72,6 +72,7 @@ and 'ty ty =
| Nat_t : type_annot option -> n num ty
| Signature_t : type_annot option -> signature ty
| String_t : type_annot option -> string ty
| Bytes_t : type_annot option -> MBytes.t ty
| Mutez_t : type_annot option -> Tez.t ty
| Key_hash_t : type_annot option -> public_key_hash ty
| Key_t : type_annot option -> public_key ty