Data_encoding: add more compact binary serializer for Z

This commit is contained in:
Benjamin Canou 2018-03-28 20:26:00 +02:00
parent 441149880c
commit deffa28aa4
10 changed files with 181 additions and 7 deletions

View File

@ -39,6 +39,7 @@ let rec length : type x. x Encoding.t -> x -> int = fun e ->
| Int31 -> fun _ -> Size.int31
| Int32 -> fun _ -> Size.int32
| Int64 -> fun _ -> Size.int64
| Z -> fun z -> (Z.numbits z + 1 + 6) / 7
| RangedInt { minimum ; maximum } ->
fun _ -> Size.(integer_to_size @@ range_to_size ~minimum ~maximum)
| Float -> fun _ -> Size.float
@ -188,6 +189,38 @@ module Writer = struct
MBytes.set_int64 buf ofs v;
ofs + Size.int64
let z v res ofs =
let sign = Z.sign v < 0 in
let bits = Z.numbits v in
if Z.equal v Z.zero then begin
MBytes.set_int8 res ofs 0x00 ;
ofs + 1
end else
let raw = Z.to_bits v in
let get_chunk pos len (* < 8 *) =
let byte = pos / 8 in
let bit = pos mod 8 in
if bit + len <= 8 then
let mask = 0xFF lsr (8 - len) in
(Char.code (String.get raw byte) lsr bit) land mask
else
let mask = 0xFF lsr (16 - len - bit) in
(Char.code (String.get raw byte) lsr bit)
lor ((Char.code (String.get raw (byte + 1))) land mask) lsl (8 - bit) in
let length = (bits + 1 + 6) / 7 in
MBytes.set_int8 res ofs
((if sign then 0x40 else 0x00)
lor (if bits > 6 then 0x80 else 0x00)
lor (get_chunk 0 6)) ;
for i = 1 to length - 1 do
let pos = 6 + (i - 1) * 7 in
let chunk_len = if i = length - 1 then bits - pos else 7 in
MBytes.set_int8 res (ofs + i)
((if i = bits / 7 then 0x00 else 0x80)
lor (get_chunk pos chunk_len))
done ;
ofs + length
(** write a float64 (double) **)
let float v buf ofs =
(*Here, float means float64, which is written using MBytes.set_double !!*)
@ -289,6 +322,13 @@ module BufferedWriter = struct
let int64 v buf =
MBytes_buffer.write_int64 buf v
let z v buf =
let bits = Z.numbits v in
let length = (bits + 1 + 6) / 7 in
let res = MBytes.create length in
ignore (Writer.z v res 0) ;
MBytes_buffer.write_mbytes buf res 0 length
(** write a float64 (double) **)
let float v buf =
MBytes_buffer.write_double buf v
@ -343,6 +383,7 @@ let rec write_rec
| Int31 -> int31
| Int32 -> int32
| Int64 -> int64
| Z -> z
| RangedInt { minimum ; maximum } ->
fun v ->
begin
@ -422,6 +463,7 @@ let rec write_rec_buffer
| Int31 -> int31 value buffer
| Int32 -> int32 value buffer
| Int64 -> int64 value buffer
| Z -> z value buffer
| Float -> float value buffer
| Bytes (`Fixed n) -> fixed_kind_bytes n value buffer
| String (`Fixed n) -> fixed_kind_string n value buffer
@ -572,6 +614,34 @@ module Reader = struct
let int64 buf ofs _len =
ofs + Size.int64, MBytes.get_int64 buf ofs
let z buf ofs _len =
let res = Buffer.create 100 in
let rec read prev i value bit =
if prev land 0x80 = 0x00 then begin
if bit > 0 then Buffer.add_char res (Char.unsafe_chr value) ;
if prev = 0x00 then failwith "trailing zeroes in Z encoding" ;
i
end else
let byte = MBytes.get_uint8 buf (ofs + i) in
let value = value lor ((byte land 0x7F) lsl bit) in
let bit = bit + 7 in
let bit, value = if bit >= 8 then begin
Buffer.add_char res (Char.unsafe_chr (value land 0xFF)) ;
bit - 8, value lsr 8
end else bit, value in
read byte (i + 1) value bit in
let first = MBytes.get_uint8 buf ofs in
if first = 0 then
ofs + 1, Z.zero
else
let value = first land 0x3F in
let sign = (first land 0x40) <> 0 in
let length = read first 1 value 6 in
let bits = Buffer.contents res in
let res = Z.of_bits bits in
let res = if sign then Z.neg res else res in
ofs + length, res
(** read a float64 (double) **)
let float buf ofs _len =
(*Here, float means float64, which is read using MBytes.get_double !!*)
@ -671,6 +741,7 @@ let rec read_rec : type a. a Encoding.t-> MBytes.t -> int -> int -> int * a = fu
| Int31 -> int31
| Int32 -> int32
| Int64 -> int64
| Z -> z
| RangedInt { minimum ; maximum } ->
(fun buf ofs alpha ->
let ofs, value =

View File

@ -32,7 +32,7 @@ type mbytes_stream = {
(* exception raised when additional mbytes are needed to continue
decoding *)
exception Need_more_data
exception Need_more_data of mbytes_stream
(* read a data that is stored in may Mbytes *)
let read_from_many_blocks reader buf ofs d_ofs =
@ -63,7 +63,7 @@ let read_from_many_blocks reader buf ofs d_ofs =
let generic_read_data delta_ofs reader buf =
let absolute_ofs = buf.ofs in
if buf.unread < delta_ofs then (*not enough data*)
raise Need_more_data ;
raise (Need_more_data buf) ;
if delta_ofs = 0 then (*we'll read nothing*)
buf, reader (MBytes.create 0) 0 0
else
@ -176,6 +176,17 @@ let rec data_checker
| Int31 -> next_path path (fst (int31 buf))
| Int32 -> next_path path (fst (int32 buf))
| Int64 -> next_path path (fst (int64 buf))
| Z ->
let rec while_not_terminator i buf =
let buf, byte = uint8 buf in
if (byte land 0x80) = 0x00 then
if byte = 0x00 && i <> 0 then
failwith "trailing zeroes in Z encoding"
else
next_path path buf
else
while_not_terminator (i + 1) buf in
while_not_terminator 0 buf
| RangedInt { minimum ; maximum } ->
let (stream, ranged) =
match Size.range_to_size ~minimum ~maximum with
@ -296,7 +307,7 @@ let rec data_checker
| Delayed f -> data_checker path (f ()) buf len
with Need_more_data ->
with Need_more_data buf ->
P_await { path ; encoding = e ; data_len = len }, buf
and next_path : path -> mbytes_stream -> path * mbytes_stream =

View File

@ -122,6 +122,17 @@ module Encoding: sig
*)
val ranged_int : int -> int -> int encoding
(** Big number
In JSON, data is encoded as a decimal string.
In binary, data is encoded as a variable length sequence of
bytes, with a running unary size bit: the most significant bit of
each byte tells is this is the last byte in the sequence (0) or if
there is more to read (1). The second most significant bit of the
first byte is reserved for the sign (positive if zero). Size and
sign bits ignored, data is then the binary representation of the
absolute value of the number in little endian order. *)
val z : Z.t encoding
(** Encoding of floating point number
(encoded as a floating point number in JSON and a double in binary). *)
val float : float encoding

View File

@ -87,6 +87,7 @@ type 'a desc =
| Int31 : int desc
| Int32 : Int32.t desc
| Int64 : Int64.t desc
| Z : Z.t desc
| RangedInt : { minimum : int ; maximum : int } -> int desc
| RangedFloat : { minimum : float ; maximum : float } -> float desc
| Float : float desc
@ -152,6 +153,7 @@ let rec classify : type a. a t -> Kind.t = fun e ->
| Int31 -> `Fixed Size.int31
| Int32 -> `Fixed Size.int32
| Int64 -> `Fixed Size.int64
| Z -> `Dynamic
| RangedInt { minimum ; maximum } ->
`Fixed Size.(integer_to_size @@ range_to_size ~minimum ~maximum)
| Float -> `Fixed Size.float
@ -234,6 +236,7 @@ let ranged_float minimum maximum =
and maximum = max minimum maximum in
make @@ RangedFloat { minimum ; maximum }
let int64 = make @@ Int64
let z = make @@ Z
let float = make @@ Float
let string = dynamic_size Variable.string

View File

@ -34,6 +34,7 @@ type 'a desc =
| Int31 : int desc
| Int32 : Int32.t desc
| Int64 : Int64.t desc
| Z : Z.t desc
| RangedInt : { minimum : int ; maximum : int } -> int desc
| RangedFloat : { minimum : float ; maximum : float } -> float desc
| Float : float desc
@ -109,6 +110,7 @@ val uint16 : int encoding
val int31 : int encoding
val int32 : int32 encoding
val int64 : int64 encoding
val z : Z.t encoding
val ranged_int : int -> int -> int encoding
val ranged_float : float -> float -> float encoding
val bool : bool encoding

View File

@ -6,6 +6,7 @@
(libraries (tezos-stdlib
ocplib-json-typed
ocplib-json-typed-bson
zarith
ezjsonm))
(flags (:standard -w -9+27-30-32-40@8
-safe-string

View File

@ -44,6 +44,14 @@ let int64_encoding =
Int64.of_string
]
let z_encoding =
let open Json_encoding in
def "bignum" @@
describe
~title: "Big number"
~description: "Decimal representation of a big number" @@
conv Z.to_string Z.of_string string
let bytes_jsont =
let open Json_encoding in
let schema =
@ -147,6 +155,7 @@ let rec json : type a. a Encoding.desc -> a Json_encoding.encoding =
| Int31 -> int
| Int32 -> int32
| Int64 -> int64_encoding
| Z -> z_encoding
| Bool -> bool
| Float -> float
| RangedFloat { minimum; maximum } -> ranged_float ~minimum ~maximum "rangedFloat"

View File

@ -15,17 +15,35 @@ let is_invalid_arg = function
| _ -> false
let test_simple_json ?msg ?(equal=Assert.equal) encoding value =
let json = Json.construct encoding value in
let result = Json.destruct encoding json in
let result = try
let json = Json.construct encoding value in
Json.destruct encoding json
with exn ->
let trace = Printexc.get_backtrace () in
Assert.fail_msg "%s %s\n%s"
(match msg with Some msg -> msg | None -> "no message")
(Printexc.to_string exn)
trace in
equal ?msg value result
let test_simple_bin ?msg ?(equal=Assert.equal) encoding value =
let bin = Binary.to_bytes encoding value in
let opt = Binary.of_bytes encoding bin in
let opt = try
let bin = Binary.to_bytes encoding value in
Binary.of_bytes encoding bin
with exn ->
let trace = Printexc.get_backtrace () in
Assert.fail_msg "%s %s\n%s"
(match msg with Some msg -> msg | None -> "no message")
(Printexc.to_string exn)
trace in
Assert.is_some ?msg opt;
let result = match opt with None -> assert false | Some v -> v in
equal ?msg value result
let test_simple_of_bin ?msg ?(equal=Assert.equal) encoding value bin =
let opt = Binary.of_bytes encoding bin in
equal ?msg value opt
let test_json_exn ?msg encoding value fail =
let get_result () =
let bin = Json.construct encoding value in
@ -114,6 +132,31 @@ let test_simple_values _ =
(* test_bin_exn ~msg:__LOC__ (string_enum ["a", 1; "a", 2]) 2 (...duplicatate...); *)
(* test_json_exn ~msg:__LOC__ (string_enum ["a", 1; "a", 2]) 1 (... duplicate...); *)
let test_zarith _ =
let test i = test_simple ~msg:("failed on Z number " ^ Z.to_string i) z i in
let test_of_bin bin exp name = test_simple_of_bin ~msg:("failed on " ^ name) z exp (MBytes.of_string bin) in
for i = -1_00_000 to 1_00_000 do test (Z.of_int i) done ;
for i = 100_000_000 to 100_100_000 do test (Z.of_int i) done ;
for i = -100_000_000 downto -100_100_000 do test (Z.of_int i) done ;
let rec fact n l =
if n > 1 then
let l = Z.mul l (Z.of_int n) in
test l ;
fact (n - 1) l in
fact 35 Z.one ;
test (Z.of_string "123574503164821730218493275982143254986574985328") ;
test (Z.of_string "8493275982143254986574985328") ;
test (Z.of_string "123574503164821730218474985328") ;
test (Z.of_string "10000000000100000000001000003050000000060600000000000777000008") ;
test (Z.of_string "-123574503164821730218493275982143254986574985328") ;
test (Z.of_string "-8493275982143254986574985328") ;
test (Z.of_string "-123574503164821730218474985328") ;
test (Z.of_string "-10000000000100000000001000003050000000060600000000000777000008") ;
test_of_bin "\x03" (Some (Z.of_int 3)) "3 (size OK)" ;
test_of_bin "\x83" None "3 (size + 1, truncated)" ;
test_of_bin "\x83\x00" None "3 (size + 1)" ;
test_of_bin "\x83\x80\x00" None "3 (size + 2)" ;
type t = A of int | B of string | C of int | D of string | E
let prn_t = function
@ -319,6 +362,7 @@ let test_randomized_variant_list _ =
(make_int_list [] 100 ()))
let tests = [
"zarith", `Quick, test_zarith ;
"simple", `Quick, test_simple_values ;
"union", `Quick, test_union ;
"splitted", `Quick, test_splitted ;

View File

@ -432,7 +432,28 @@ let test_splitted _ =
Assert.equal ~msg:__LOC__ "43" (get_result ~msg:__LOC__ binA);
Assert.equal ~msg:__LOC__ "44" (get_result ~msg:__LOC__ binB)
let test_zarith value =
let msg = "failed on Z number " ^ Z.to_string value in
test_check_simple_bin_ok z value;
test_check_simple_bin_ko_await z value;
test_read_simple_bin_ok ~msg ~equal:Assert.equal z value;
test_read_simple_bin_ko_await z value
let test_zarith _ =
for i = -1_00_000 to 1_00_000 do test_zarith (Z.of_int i) done ;
for i = 100_000_000 to 100_100_000 do test_zarith (Z.of_int i) done ;
for i = -100_000_000 downto -100_100_000 do test_zarith (Z.of_int i) done ;
test_zarith (Z.of_string "123574503164821730218493275982143254986574985328") ;
test_zarith (Z.of_string "8493275982143254986574985328") ;
test_zarith (Z.of_string "123574503164821730218474985328") ;
test_zarith (Z.of_string "10000000000100000000001000003050000000060600000000000777000008") ;
test_zarith (Z.of_string "-123574503164821730218493275982143254986574985328") ;
test_zarith (Z.of_string "-8493275982143254986574985328") ;
test_zarith (Z.of_string "-123574503164821730218474985328") ;
test_zarith (Z.of_string "-10000000000100000000001000003050000000060600000000000777000008")
let tests = [
"zarith", `Quick, test_zarith ;
"simple", `Quick, test_simple_values ;
"union", `Quick, test_union ;
"splitted", `Quick, test_splitted ;

View File

@ -42,6 +42,7 @@ val uint16 : int encoding
val int31 : int encoding
val int32 : int32 encoding
val int64 : int64 encoding
val z : Z.t encoding
val bool : bool encoding
val string : string encoding
val bytes : MBytes.t encoding