From c2241c034a5f6625dd7e9d569d0328588bd6dcee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gr=C3=A9goire=20Henry?= Date: Fri, 25 May 2018 14:08:46 +0200 Subject: [PATCH] Data_encoding: allow smaller size field for `Dynamic_size` --- src/lib_data_encoding/binary_length.ml | 5 +- src/lib_data_encoding/binary_reader.ml | 14 ++- src/lib_data_encoding/binary_size.ml | 13 ++ src/lib_data_encoding/binary_size.mli | 11 +- src/lib_data_encoding/binary_stream_reader.ml | 30 +++-- src/lib_data_encoding/binary_writer.ml | 111 ++++++++---------- src/lib_data_encoding/data_encoding.mli | 4 +- src/lib_data_encoding/encoding.ml | 14 ++- src/lib_data_encoding/encoding.mli | 7 +- src/lib_data_encoding/json.ml | 2 +- .../sigs/v1/data_encoding.mli | 4 +- 11 files changed, 119 insertions(+), 96 deletions(-) diff --git a/src/lib_data_encoding/binary_length.ml b/src/lib_data_encoding/binary_length.ml index a6374c6a1..335a7e5f1 100644 --- a/src/lib_data_encoding/binary_length.ml +++ b/src/lib_data_encoding/binary_length.ml @@ -114,8 +114,9 @@ let rec length : type x. x Encoding.t -> x -> int = | Describe { encoding = e } -> length e value | Def { encoding = e } -> length e value | Splitted { encoding = e } -> length e value - | Dynamic_size e -> - Binary_size.int32 + length e value + | Dynamic_size { kind ; encoding = e } -> + let length = length e value in + Binary_size.integer_to_size kind + length | Check_size { limit ; encoding = e } -> let length = length e value in if length > limit then raise (Write_error Size_limit_exceeded) ; diff --git a/src/lib_data_encoding/binary_reader.ml b/src/lib_data_encoding/binary_reader.ml index 1b669befb..caae71eff 100644 --- a/src/lib_data_encoding/binary_reader.ml +++ b/src/lib_data_encoding/binary_reader.ml @@ -62,6 +62,14 @@ module Atom = struct read_atom Binary_size.int31 @@ fun buffer ofs -> Int32.to_int (MBytes.get_int32 buffer ofs) + let int = function + | `Int31 -> int31 + | `Int16 -> int16 + | `Int8 -> int8 + | `Uint30 -> uint30 + | `Uint16 -> uint16 + | `Uint8 -> uint8 + let ranged_int ~minimum ~maximum state = let read_int = match Binary_size.range_to_size ~minimum ~maximum with @@ -233,10 +241,8 @@ let rec read_rec : type ret. ret Encoding.t -> state -> ret cases with Not_found -> raise (Unexpected_tag ctag) in inj (read_rec encoding state) - | Dynamic_size e -> - let sz = Atom.int32 state in - let sz = Int32.to_int sz in - if sz < 0 then raise (Invalid_size sz) ; + | Dynamic_size { kind ; encoding = e } -> + let sz = Atom.int kind state in let remaining = check_remaining_bytes state sz in state.remaining_bytes <- sz ; ignore (check_allowed_bytes state sz : int option) ; diff --git a/src/lib_data_encoding/binary_size.ml b/src/lib_data_encoding/binary_size.ml index 2e8c047a0..3b95516d9 100644 --- a/src/lib_data_encoding/binary_size.ml +++ b/src/lib_data_encoding/binary_size.ml @@ -54,6 +54,19 @@ let integer_to_size = function | `Uint16 -> uint16 | `Uint8 -> uint8 +let max_int = function + | `Uint30 | `Int31 -> (1 lsl 30) - 1 + | `Int16 -> 1 lsl 15 - 1 + | `Int8 -> 1 lsl 7 - 1 + | `Uint16 -> 1 lsl 16 - 1 + | `Uint8 -> 1 lsl 8 - 1 + +let min_int = function + | `Uint8 | `Uint16 | `Uint30 -> 0 + | `Int31 -> - (1 lsl 30) + | `Int16 -> - (1 lsl 15) + | `Int8 -> - (1 lsl 7) + let range_to_size ~minimum ~maximum : integer = if minimum < 0 then signed_range_to_size minimum maximum diff --git a/src/lib_data_encoding/binary_size.mli b/src/lib_data_encoding/binary_size.mli index 9807d2a8d..5e0fe433e 100644 --- a/src/lib_data_encoding/binary_size.mli +++ b/src/lib_data_encoding/binary_size.mli @@ -30,7 +30,12 @@ val tag_size: tag_size -> int type signed_integer = [ `Int31 | `Int16 | `Int8 ] type unsigned_integer = [ `Uint30 | `Uint16 | `Uint8 ] type integer = [ signed_integer | unsigned_integer ] -val integer_to_size: integer -> int -val range_to_size: minimum:int -> maximum:int -> integer -val enum_size: 'a array -> [> unsigned_integer ] +val integer_to_size: [< integer ] -> int + +val min_int: [< integer ] -> int +val max_int: [< integer ] -> int + +val range_to_size: minimum:int -> maximum:int -> integer + +val enum_size: 'a array -> [> unsigned_integer ] diff --git a/src/lib_data_encoding/binary_stream_reader.ml b/src/lib_data_encoding/binary_stream_reader.ml index 07d079cb9..2e3f4bca5 100644 --- a/src/lib_data_encoding/binary_stream_reader.ml +++ b/src/lib_data_encoding/binary_stream_reader.ml @@ -109,6 +109,14 @@ module Atom = struct read_atom r Binary_size.int31 @@ fun buffer ofs -> Int32.to_int (MBytes.get_int32 buffer ofs) + let int = function + | `Int31 -> int31 + | `Int16 -> int16 + | `Int8 -> int8 + | `Uint30 -> uint30 + | `Uint16 -> uint16 + | `Uint8 -> uint8 + let ranged_int ~minimum ~maximum resume state k = let read_int = match Binary_size.range_to_size ~minimum ~maximum with @@ -302,20 +310,16 @@ let rec read_rec read_rec encoding state @@ fun (v, state) -> k (inj v, state) end - | Dynamic_size e -> - Atom.int32 resume state @@ fun (sz, state) -> - let sz = Int32.to_int sz in - if sz < 0 then - Error (Invalid_size sz) + | Dynamic_size { kind ; encoding = e } -> + Atom.int kind resume state @@ fun (sz, state) -> + let remaining = check_remaining_bytes state sz in + let state = { state with remaining_bytes = Some sz } in + ignore (check_allowed_bytes state sz : int option) ; + read_rec e state @@ fun (v, state) -> + if state.remaining_bytes <> Some 0 then + Error Extra_bytes else - let remaining = check_remaining_bytes state sz in - let state = { state with remaining_bytes = Some sz } in - ignore (check_allowed_bytes state sz : int option) ; - read_rec e state @@ fun (v, state) -> - if state.remaining_bytes <> Some 0 then - Error Extra_bytes - else - k (v, { state with remaining_bytes = remaining }) + k (v, { state with remaining_bytes = remaining }) | Check_size { limit ; encoding = e } -> let old_allowed_bytes = state.allowed_bytes in let limit = diff --git a/src/lib_data_encoding/binary_writer.ml b/src/lib_data_encoding/binary_writer.ml index 06c607cce..2bf7a0ef8 100644 --- a/src/lib_data_encoding/binary_writer.ml +++ b/src/lib_data_encoding/binary_writer.ml @@ -64,45 +64,28 @@ module Atom = struct if (v < min || max < v) then raise (Invalid_float { min ; v ; max }) - let int8 state v = - check_int_range (- (1 lsl 7)) v (1 lsl 7 - 1) ; - let ofs = state.offset in - may_resize state Binary_size.int8 ; - MBytes.set_int8 state.buffer ofs v + let set_int kind buffer ofs v = + match kind with + | `Int31 | `Uint30 -> MBytes.set_int32 buffer ofs (Int32.of_int v) + | `Int16 | `Uint16 -> MBytes.set_int16 buffer ofs v + | `Int8 | `Uint8 -> MBytes.set_int8 buffer ofs v - let uint8 state v = - check_int_range 0 v (1 lsl 8 - 1) ; + let int kind state v = + check_int_range (Binary_size.min_int kind) v (Binary_size.max_int kind) ; let ofs = state.offset in - may_resize state Binary_size.uint8 ; - MBytes.set_int8 state.buffer ofs v + may_resize state (Binary_size.integer_to_size kind) ; + set_int kind state.buffer ofs v + + let int8 = int `Int8 + let uint8 = int `Uint8 + let int16 = int `Int16 + let uint16 = int `Uint16 + let uint30 = int `Uint30 + let int31 = int `Int31 let char state v = int8 state (int_of_char v) let bool state v = uint8 state (if v then 255 else 0) - let int16 state v = - check_int_range (- (1 lsl 15)) v (1 lsl 15 - 1) ; - let ofs = state.offset in - may_resize state Binary_size.int16 ; - MBytes.set_int16 state.buffer ofs v - - let uint16 state v = - check_int_range 0 v (1 lsl 16 - 1) ; - let ofs = state.offset in - may_resize state Binary_size.uint16 ; - MBytes.set_int16 state.buffer ofs v - - let uint30 state v = - check_int_range 0 v (1 lsl 30 - 1) ; - let ofs = state.offset in - may_resize state Binary_size.uint30 ; - MBytes.set_int32 state.buffer ofs (Int32.of_int v) - - let int31 state v = - check_int_range (- (1 lsl 30)) v (1 lsl 30 - 1) ; - let ofs = state.offset in - may_resize state Binary_size.int31 ; - MBytes.set_int32 state.buffer ofs (Int32.of_int v) - let int32 state v = let ofs = state.offset in may_resize state Binary_size.int32 ; @@ -277,44 +260,46 @@ let rec write_rec : type a. a Encoding.t -> state -> a -> unit = Atom.tag sz state tag ; write_rec e state value in write_case cases - | Dynamic_size e -> - Atom.int32 state 0l ; (* place holder for [size] *) + | Dynamic_size { kind ; encoding = e } -> let initial_offset = state.offset in - write_rec e state value ; + Atom.int kind state 0 ; (* place holder for [size] *) + write_with_limit (Binary_size.max_int kind) e state value ; (* patch the written [size] *) - let size = state.offset - initial_offset in - (* FIXME overflow *) - MBytes.set_int32 - state.buffer (initial_offset - Binary_size.int32) - (Int32.of_int size) - | Check_size { limit ; encoding = e } -> begin - (* backup the current limit *) - let old_limit = state.allowed_bytes in - (* install the new limit (only if smaller than the current limit) *) - let limit = - match state.allowed_bytes with - | None -> limit - | Some old_limit -> min old_limit limit in - state.allowed_bytes <- Some limit ; - write_rec e state value ; - (* restore the previous limit (minus the read bytes) *) - match old_limit with - | None -> - state.allowed_bytes <- None - | Some old_limit -> - let remaining = - match state.allowed_bytes with - | None -> assert false - | Some len -> len in - let read = limit - remaining in - state.allowed_bytes <- Some (old_limit - read) - end + Atom.set_int kind + state.buffer + initial_offset + (state.offset - initial_offset - Binary_size.integer_to_size kind) + | Check_size { limit ; encoding = e } -> + write_with_limit limit e state value | Describe { encoding = e } -> write_rec e state value | Def { encoding = e } -> write_rec e state value | Splitted { encoding = e } -> write_rec e state value | Mu (_, _, self) -> write_rec (self e) state value | Delayed f -> write_rec (f ()) state value +and write_with_limit : type a. int -> a Encoding.t -> state -> a -> unit = + fun limit e state value -> + (* backup the current limit *) + let old_limit = state.allowed_bytes in + (* install the new limit (only if smaller than the current limit) *) + let limit = + match state.allowed_bytes with + | None -> limit + | Some old_limit -> min old_limit limit in + state.allowed_bytes <- Some limit ; + write_rec e state value ; + (* restore the previous limit (minus the read bytes) *) + match old_limit with + | None -> + state.allowed_bytes <- None + | Some old_limit -> + let remaining = + match state.allowed_bytes with + | None -> assert false + | Some len -> len in + let read = limit - remaining in + state.allowed_bytes <- Some (old_limit - read) + (** ******************** *) (** Various entry points *) diff --git a/src/lib_data_encoding/data_encoding.mli b/src/lib_data_encoding/data_encoding.mli index 6e7634a66..dee1acb33 100644 --- a/src/lib_data_encoding/data_encoding.mli +++ b/src/lib_data_encoding/data_encoding.mli @@ -416,7 +416,9 @@ module Encoding: sig Forces the size to be stored alongside content when needed. Typically used to combine two variable encodings in a same objects or tuple, or to use a variable encoding in an array or a list. *) - val dynamic_size : 'a encoding -> 'a encoding + val dynamic_size : + ?kind: [ `Uint30 | `Uint16 | `Uint8 ] -> + 'a encoding -> 'a encoding (** [check_size size encoding] ensures that the binary encoding of a value will not be allowed to exceed [size] bytes. The reader and diff --git a/src/lib_data_encoding/encoding.ml b/src/lib_data_encoding/encoding.ml index a9e7b2c4a..f7cd8a1d8 100644 --- a/src/lib_data_encoding/encoding.ml +++ b/src/lib_data_encoding/encoding.ml @@ -107,7 +107,9 @@ type 'a desc = { encoding : 'a t ; json_encoding : 'a Json_encoding.encoding ; is_obj : bool ; is_tup : bool } -> 'a desc - | Dynamic_size : 'a t -> 'a desc + | Dynamic_size : + { kind : Binary_size.unsigned_integer ; + encoding : 'a t } -> 'a desc | Check_size : { limit : int ; encoding : 'a t } -> 'a desc | Delayed : (unit -> 'a t) -> 'a desc @@ -269,8 +271,8 @@ module Variable = struct make @@ List e end -let dynamic_size e = - make @@ Dynamic_size e +let dynamic_size ?(kind = `Uint30) e = + make @@ Dynamic_size { kind ; encoding = e } let check_size limit encoding = make @@ Check_size { limit ; encoding } @@ -352,7 +354,7 @@ let rec is_obj : type a. a t -> bool = fun e -> | Obj _ -> true | Objs _ (* by construction *) -> true | Conv { encoding = e } -> is_obj e - | Dynamic_size e -> is_obj e + | Dynamic_size { encoding = e } -> is_obj e | Union (_,_,cases) -> List.for_all (fun (Case { encoding = e }) -> is_obj e) cases | Empty -> true @@ -369,7 +371,7 @@ let rec is_tup : type a. a t -> bool = fun e -> | Tup _ -> true | Tups _ (* by construction *) -> true | Conv { encoding = e } -> is_tup e - | Dynamic_size e -> is_tup e + | Dynamic_size { encoding = e } -> is_tup e | Union (_,_,cases) -> List.for_all (function Case { encoding = e} -> is_tup e) cases | Mu (_,_,self) -> is_tup (self e) @@ -571,7 +573,7 @@ let rec is_nullable: type t. t encoding -> bool = fun e -> | Describe { encoding = e } -> is_nullable e | Def { encoding = e } -> is_nullable e | Splitted { json_encoding } -> Json_encoding.is_nullable json_encoding - | Dynamic_size e -> is_nullable e + | Dynamic_size { encoding = e } -> is_nullable e | Check_size { encoding = e } -> is_nullable e | Delayed _ -> true diff --git a/src/lib_data_encoding/encoding.mli b/src/lib_data_encoding/encoding.mli index 22d317c86..517f83723 100644 --- a/src/lib_data_encoding/encoding.mli +++ b/src/lib_data_encoding/encoding.mli @@ -65,7 +65,9 @@ type 'a desc = { encoding : 'a t ; json_encoding : 'a Json_encoding.encoding ; is_obj : bool ; is_tup : bool } -> 'a desc - | Dynamic_size : 'a t -> 'a desc + | Dynamic_size : + { kind : Binary_size.unsigned_integer ; + encoding : 'a t } -> 'a desc | Check_size : { limit : int ; encoding : 'a t } -> 'a desc | Delayed : (unit -> 'a t) -> 'a desc @@ -123,7 +125,8 @@ module Variable : sig val array : 'a encoding -> 'a array encoding val list : 'a encoding -> 'a list encoding end -val dynamic_size : 'a encoding -> 'a encoding +val dynamic_size : + ?kind:Binary_size.unsigned_integer -> 'a encoding -> 'a encoding val check_size : int -> 'a encoding -> 'a encoding val delayed : (unit -> 'a encoding) -> 'a encoding val req : diff --git a/src/lib_data_encoding/json.ml b/src/lib_data_encoding/json.ml index b68354533..617a7d3c7 100644 --- a/src/lib_data_encoding/json.ml +++ b/src/lib_data_encoding/json.ml @@ -217,7 +217,7 @@ let rec json : type a. a Encoding.desc -> a Json_encoding.encoding = mu name (fun json_encoding -> get_json @@ self (make ~json_encoding ty)) | Union (_tag_size, _, cases) -> union (List.map case_json cases) | Splitted { json_encoding } -> json_encoding - | Dynamic_size e -> get_json e + | Dynamic_size { encoding = e } -> get_json e | Check_size { encoding } -> get_json encoding | Delayed f -> get_json (f ()) diff --git a/src/lib_protocol_environment/sigs/v1/data_encoding.mli b/src/lib_protocol_environment/sigs/v1/data_encoding.mli index b55eed69e..43dd509cd 100644 --- a/src/lib_protocol_environment/sigs/v1/data_encoding.mli +++ b/src/lib_protocol_environment/sigs/v1/data_encoding.mli @@ -57,7 +57,9 @@ module Variable : sig val list : 'a encoding -> 'a list encoding end -val dynamic_size : 'a encoding -> 'a encoding +val dynamic_size : + ?kind: [ `Uint30 | `Uint16 | `Uint8 ] -> + 'a encoding -> 'a encoding val json : json encoding val json_schema : json_schema encoding