From 9a43902f03beaac1bca7d3f123d1ea06c2a110c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Henry?= Date: Fri, 1 Jun 2018 00:32:47 +0200 Subject: [PATCH] Data_encoding: allow to add padding --- src/lib_data_encoding/binary_description.ml | 11 +++++++- src/lib_data_encoding/binary_length.ml | 1 + src/lib_data_encoding/binary_reader.ml | 4 +++ src/lib_data_encoding/binary_schema.ml | 15 +++++++++-- src/lib_data_encoding/binary_schema.mli | 1 + src/lib_data_encoding/binary_stream_reader.ml | 11 ++++++++ src/lib_data_encoding/binary_writer.ml | 3 +++ src/lib_data_encoding/data_encoding.mli | 1 + src/lib_data_encoding/encoding.ml | 25 ++++++++++++++----- src/lib_data_encoding/encoding.mli | 2 ++ src/lib_data_encoding/json.ml | 1 + .../sigs/v1/data_encoding.mli | 1 + 12 files changed, 67 insertions(+), 9 deletions(-) diff --git a/src/lib_data_encoding/binary_description.ml b/src/lib_data_encoding/binary_description.ml index 07c25693e..b74a0b956 100644 --- a/src/lib_data_encoding/binary_description.ml +++ b/src/lib_data_encoding/binary_description.ml @@ -66,7 +66,8 @@ let fixup_references uf = | RangedFloat (_, _) | Float | Bytes - | String) as enc -> enc in + | String + | Padding) as enc -> enc in let field = function | Named_field (name, kind, layout) -> Named_field (name, kind, fixup_layout layout) @@ -308,6 +309,9 @@ let describe (type x) (encoding : x Encoding.t) = ([ Anonymous_field ((kind :> Kind.t), Bytes) ], references) | String kind -> ([ Anonymous_field ((kind :> Kind.t), String) ], references) + | Padded ({ encoding = e }, n) -> + let fields, references = fields ref_name recursives references e in + (fields @ [ Named_field ("padding", `Fixed n, Padding) ], references) | (String_enum (tbl, encoding_array) as encoding) -> let size, cases = enum tbl encoding_array in let name = may_new_reference ref_name in @@ -422,6 +426,11 @@ let describe (type x) (encoding : x Encoding.t) = (Bytes, references) | String _kind -> (String, references) + | Padded _ as enc -> + let name = may_new_reference ref_name in + let fields, references = fields None recursives references enc in + let references = add_reference name (obj fields) references in + (Ref name, references) | String_enum (tbl, encoding_array) -> let name = may_new_reference ref_name in let size, cases = enum tbl encoding_array in diff --git a/src/lib_data_encoding/binary_length.ml b/src/lib_data_encoding/binary_length.ml index 230977ed8..92ba5bd93 100644 --- a/src/lib_data_encoding/binary_length.ml +++ b/src/lib_data_encoding/binary_length.ml @@ -39,6 +39,7 @@ let rec length : type x. x Encoding.t -> x -> int = | RangedFloat _ -> Binary_size.float | Bytes `Fixed n -> n | String `Fixed n -> n + | Padded (e, n) -> length e value + n | String_enum (_, arr) -> Binary_size.integer_to_size @@ Binary_size.enum_size arr | Objs { kind = `Fixed n } -> n diff --git a/src/lib_data_encoding/binary_reader.ml b/src/lib_data_encoding/binary_reader.ml index e0e916933..e08f05fcf 100644 --- a/src/lib_data_encoding/binary_reader.ml +++ b/src/lib_data_encoding/binary_reader.ml @@ -180,6 +180,10 @@ let rec read_rec : type ret. ret Encoding.t -> state -> ret | String (`Fixed n) -> Atom.fixed_length_string n state | String `Variable -> Atom.fixed_length_string state.remaining_bytes state + | Padded (e, n) -> + let v = read_rec e state in + ignore (Atom.fixed_length_string n state : string) ; + v | RangedInt { minimum ; maximum } -> Atom.ranged_int ~minimum ~maximum state | RangedFloat { minimum ; maximum } -> diff --git a/src/lib_data_encoding/binary_schema.ml b/src/lib_data_encoding/binary_schema.ml index 006fccb17..374a84691 100644 --- a/src/lib_data_encoding/binary_schema.ml +++ b/src/lib_data_encoding/binary_schema.ml @@ -29,6 +29,7 @@ and layout = | Enum of Binary_size.integer * string | Seq of layout (* For arrays and lists *) | Ref of string + | Padding and fields = field_descr list @@ -108,6 +109,8 @@ module Printer_ast = struct Format.fprintf ppf "bytes" | Ref reference -> Format.fprintf ppf "$%s" reference + | Padding -> + Format.fprintf ppf "padding" | Enum (size, reference) -> Format.fprintf ppf "%a encoding an enumeration (see %s)" pp_int (size :> integer_extended) @@ -395,11 +398,19 @@ module Encoding = struct (Tag 10) (obj2 (req "name" string) - (req "kind" (constant "Float"))) + (req "kind" (constant "Ref"))) (function | Ref layout -> Some (layout, ()) | _ -> None) - (fun (name, ()) -> Ref name) + (fun (name, ()) -> Ref name) ; + case ~title:"Padding" + (Tag 11) + (obj1 + (req "kind" (constant "Padding"))) + (function + | Padding -> Some () + | _ -> None) + (fun () -> Padding) ; ]) let kind_enum_cases = diff --git a/src/lib_data_encoding/binary_schema.mli b/src/lib_data_encoding/binary_schema.mli index 7c546d540..9dd904c98 100644 --- a/src/lib_data_encoding/binary_schema.mli +++ b/src/lib_data_encoding/binary_schema.mli @@ -29,6 +29,7 @@ and layout = | Enum of Binary_size.integer * string | Seq of layout (* For arrays and lists *) | Ref of string + | Padding and fields = field_descr list diff --git a/src/lib_data_encoding/binary_stream_reader.ml b/src/lib_data_encoding/binary_stream_reader.ml index a5828f196..222a789df 100644 --- a/src/lib_data_encoding/binary_stream_reader.ml +++ b/src/lib_data_encoding/binary_stream_reader.ml @@ -207,6 +207,14 @@ module Atom = struct end +let rec skip n state k = + let resume buffer = + let stream = Binary_stream.push buffer state.stream in + try skip n { state with stream } k + with Read_error err -> Error err in + Atom.fixed_length_string n resume state @@ fun (_, state : string * _) -> + k state + (** Main recursive reading function, in continuation passing style. *) let rec read_rec : type next ret. @@ -242,6 +250,9 @@ let rec read_rec | String `Variable -> let size = remaining_bytes state in Atom.fixed_length_string size resume state k + | Padded (e, n) -> + read_rec e state @@ fun (v, state) -> + skip n state @@ (fun state -> k (v, state)) | RangedInt { minimum ; maximum } -> Atom.ranged_int ~minimum ~maximum resume state k | RangedFloat { minimum ; maximum } -> diff --git a/src/lib_data_encoding/binary_writer.ml b/src/lib_data_encoding/binary_writer.ml index baf69b51f..7ff235fc2 100644 --- a/src/lib_data_encoding/binary_writer.ml +++ b/src/lib_data_encoding/binary_writer.ml @@ -216,6 +216,9 @@ let rec write_rec : type a. a Encoding.t -> state -> a -> unit = | String `Variable -> let length = String.length value in Atom.fixed_kind_string length state value + | Padded (e, n) -> + write_rec e state value ; + Atom.fixed_kind_string n state (String.make n '\000') | RangedInt { minimum ; maximum } -> Atom.ranged_int ~minimum ~maximum state value | RangedFloat { minimum ; maximum } -> diff --git a/src/lib_data_encoding/data_encoding.mli b/src/lib_data_encoding/data_encoding.mli index 92efdb518..73c800422 100644 --- a/src/lib_data_encoding/data_encoding.mli +++ b/src/lib_data_encoding/data_encoding.mli @@ -402,6 +402,7 @@ module Encoding: sig module Fixed : sig val string : int -> string encoding val bytes : int -> MBytes.t encoding + val add_padding : 'a encoding -> int -> 'a encoding end (** Create encodings that produce data of a variable length when binary encoded. diff --git a/src/lib_data_encoding/encoding.ml b/src/lib_data_encoding/encoding.ml index 065c78302..73060c00c 100644 --- a/src/lib_data_encoding/encoding.ml +++ b/src/lib_data_encoding/encoding.ml @@ -83,6 +83,7 @@ type 'a desc = | Float : float desc | Bytes : Kind.length -> MBytes.t desc | String : Kind.length -> string desc + | Padded : 'a t * int -> 'a desc | String_enum : ('a, string * int) Hashtbl.t * 'a array -> 'a desc | Array : 'a t -> 'a array desc | List : 'a t -> 'a list desc @@ -187,6 +188,11 @@ and classify_desc : type a. a desc -> Kind.t = fun e -> (* Tagged *) | Bytes kind -> (kind :> Kind.t) | String kind -> (kind :> Kind.t) + | Padded ({ encoding }, n) -> begin + match classify_desc encoding with + | `Fixed m -> `Fixed (n+m) + | _ -> assert false (* by construction (see [Fixed.padded]) *) + end | String_enum (_, cases) -> `Fixed Binary_size.(integer_to_size @@ enum_size cases) | Obj (Opt { kind }) -> (kind :> Kind.t) @@ -214,14 +220,19 @@ let make ?json_encoding encoding = { encoding ; json_encoding } module Fixed = struct let string n = if n <= 0 then - invalid_arg "Cannot create a string encoding fo negative or null fixed length." - else - make @@ String (`Fixed n) + invalid_arg "Cannot create a string encoding of negative or null fixed length." ; + make @@ String (`Fixed n) let bytes n = if n <= 0 then - invalid_arg "Cannot create a byte encoding fo negative or null fixed length." - else - make @@ Bytes (`Fixed n) + invalid_arg "Cannot create a byte encoding of negative or null fixed length." ; + make @@ Bytes (`Fixed n) + let add_padding e n = + if n <= 0 then + invalid_arg "Cannot create a padding of negative or null fixed length." ; + match classify e with + | `Fixed _ -> + make @@ Padded (e, n) + | _ -> invalid_arg "Cannot pad non-fixed size encoding" end let rec is_zeroable: type t. t encoding -> bool = fun e -> @@ -250,6 +261,7 @@ let rec is_zeroable: type t. t encoding -> bool = fun e -> | Float -> false | Bytes _ -> false | String _ -> false + | Padded _ -> false | String_enum _ -> false (* true in some cases, but in practice always protected by Dynamic *) | Array _ -> true (* 0-element array *) @@ -584,6 +596,7 @@ let rec is_nullable: type t. t encoding -> bool = fun e -> | Float -> false | Bytes _ -> false | String _ -> false + | Padded (e, _) -> is_nullable e | String_enum _ -> false | Array _ -> false | List _ -> false diff --git a/src/lib_data_encoding/encoding.mli b/src/lib_data_encoding/encoding.mli index fccabd7e0..de74a3ecb 100644 --- a/src/lib_data_encoding/encoding.mli +++ b/src/lib_data_encoding/encoding.mli @@ -41,6 +41,7 @@ type 'a desc = | Float : float desc | Bytes : Kind.length -> MBytes.t desc | String : Kind.length -> string desc + | Padded : 'a t * int -> 'a desc | String_enum : ('a, string * int) Hashtbl.t * 'a array -> 'a desc | Array : 'a t -> 'a array desc | List : 'a t -> 'a list desc @@ -148,6 +149,7 @@ val is_tup : 'a encoding -> bool module Fixed : sig val string : int -> string encoding val bytes : int -> MBytes.t encoding + val add_padding : 'a encoding -> int -> 'a encoding end module Variable : sig val string : string encoding diff --git a/src/lib_data_encoding/json.ml b/src/lib_data_encoding/json.ml index 23e69b622..247c72186 100644 --- a/src/lib_data_encoding/json.ml +++ b/src/lib_data_encoding/json.ml @@ -186,6 +186,7 @@ let rec json : type a. a Encoding.desc -> a Json_encoding.encoding = s in conv check check string | String _ -> string + | Padded (e, _) -> get_json e | Bytes (`Fixed expected) -> let check s = let found = MBytes.length s in diff --git a/src/lib_protocol_environment/sigs/v1/data_encoding.mli b/src/lib_protocol_environment/sigs/v1/data_encoding.mli index 02cdc25f9..6abd1d811 100644 --- a/src/lib_protocol_environment/sigs/v1/data_encoding.mli +++ b/src/lib_protocol_environment/sigs/v1/data_encoding.mli @@ -48,6 +48,7 @@ val string_enum : (string * 'a) list -> 'a encoding module Fixed : sig val string : int -> string encoding val bytes : int -> MBytes.t encoding + val add_padding : 'a encoding -> int -> 'a encoding end module Variable : sig