Data_encoding: add more compact binary serializer for Z
This commit is contained in:
parent
441149880c
commit
deffa28aa4
@ -39,6 +39,7 @@ let rec length : type x. x Encoding.t -> x -> int = fun e ->
|
||||
| Int31 -> fun _ -> Size.int31
|
||||
| Int32 -> fun _ -> Size.int32
|
||||
| Int64 -> fun _ -> Size.int64
|
||||
| Z -> fun z -> (Z.numbits z + 1 + 6) / 7
|
||||
| RangedInt { minimum ; maximum } ->
|
||||
fun _ -> Size.(integer_to_size @@ range_to_size ~minimum ~maximum)
|
||||
| Float -> fun _ -> Size.float
|
||||
@ -188,6 +189,38 @@ module Writer = struct
|
||||
MBytes.set_int64 buf ofs v;
|
||||
ofs + Size.int64
|
||||
|
||||
let z v res ofs =
|
||||
let sign = Z.sign v < 0 in
|
||||
let bits = Z.numbits v in
|
||||
if Z.equal v Z.zero then begin
|
||||
MBytes.set_int8 res ofs 0x00 ;
|
||||
ofs + 1
|
||||
end else
|
||||
let raw = Z.to_bits v in
|
||||
let get_chunk pos len (* < 8 *) =
|
||||
let byte = pos / 8 in
|
||||
let bit = pos mod 8 in
|
||||
if bit + len <= 8 then
|
||||
let mask = 0xFF lsr (8 - len) in
|
||||
(Char.code (String.get raw byte) lsr bit) land mask
|
||||
else
|
||||
let mask = 0xFF lsr (16 - len - bit) in
|
||||
(Char.code (String.get raw byte) lsr bit)
|
||||
lor ((Char.code (String.get raw (byte + 1))) land mask) lsl (8 - bit) in
|
||||
let length = (bits + 1 + 6) / 7 in
|
||||
MBytes.set_int8 res ofs
|
||||
((if sign then 0x40 else 0x00)
|
||||
lor (if bits > 6 then 0x80 else 0x00)
|
||||
lor (get_chunk 0 6)) ;
|
||||
for i = 1 to length - 1 do
|
||||
let pos = 6 + (i - 1) * 7 in
|
||||
let chunk_len = if i = length - 1 then bits - pos else 7 in
|
||||
MBytes.set_int8 res (ofs + i)
|
||||
((if i = bits / 7 then 0x00 else 0x80)
|
||||
lor (get_chunk pos chunk_len))
|
||||
done ;
|
||||
ofs + length
|
||||
|
||||
(** write a float64 (double) **)
|
||||
let float v buf ofs =
|
||||
(*Here, float means float64, which is written using MBytes.set_double !!*)
|
||||
@ -289,6 +322,13 @@ module BufferedWriter = struct
|
||||
let int64 v buf =
|
||||
MBytes_buffer.write_int64 buf v
|
||||
|
||||
let z v buf =
|
||||
let bits = Z.numbits v in
|
||||
let length = (bits + 1 + 6) / 7 in
|
||||
let res = MBytes.create length in
|
||||
ignore (Writer.z v res 0) ;
|
||||
MBytes_buffer.write_mbytes buf res 0 length
|
||||
|
||||
(** write a float64 (double) **)
|
||||
let float v buf =
|
||||
MBytes_buffer.write_double buf v
|
||||
@ -343,6 +383,7 @@ let rec write_rec
|
||||
| Int31 -> int31
|
||||
| Int32 -> int32
|
||||
| Int64 -> int64
|
||||
| Z -> z
|
||||
| RangedInt { minimum ; maximum } ->
|
||||
fun v ->
|
||||
begin
|
||||
@ -422,6 +463,7 @@ let rec write_rec_buffer
|
||||
| Int31 -> int31 value buffer
|
||||
| Int32 -> int32 value buffer
|
||||
| Int64 -> int64 value buffer
|
||||
| Z -> z value buffer
|
||||
| Float -> float value buffer
|
||||
| Bytes (`Fixed n) -> fixed_kind_bytes n value buffer
|
||||
| String (`Fixed n) -> fixed_kind_string n value buffer
|
||||
@ -572,6 +614,34 @@ module Reader = struct
|
||||
let int64 buf ofs _len =
|
||||
ofs + Size.int64, MBytes.get_int64 buf ofs
|
||||
|
||||
let z buf ofs _len =
|
||||
let res = Buffer.create 100 in
|
||||
let rec read prev i value bit =
|
||||
if prev land 0x80 = 0x00 then begin
|
||||
if bit > 0 then Buffer.add_char res (Char.unsafe_chr value) ;
|
||||
if prev = 0x00 then failwith "trailing zeroes in Z encoding" ;
|
||||
i
|
||||
end else
|
||||
let byte = MBytes.get_uint8 buf (ofs + i) in
|
||||
let value = value lor ((byte land 0x7F) lsl bit) in
|
||||
let bit = bit + 7 in
|
||||
let bit, value = if bit >= 8 then begin
|
||||
Buffer.add_char res (Char.unsafe_chr (value land 0xFF)) ;
|
||||
bit - 8, value lsr 8
|
||||
end else bit, value in
|
||||
read byte (i + 1) value bit in
|
||||
let first = MBytes.get_uint8 buf ofs in
|
||||
if first = 0 then
|
||||
ofs + 1, Z.zero
|
||||
else
|
||||
let value = first land 0x3F in
|
||||
let sign = (first land 0x40) <> 0 in
|
||||
let length = read first 1 value 6 in
|
||||
let bits = Buffer.contents res in
|
||||
let res = Z.of_bits bits in
|
||||
let res = if sign then Z.neg res else res in
|
||||
ofs + length, res
|
||||
|
||||
(** read a float64 (double) **)
|
||||
let float buf ofs _len =
|
||||
(*Here, float means float64, which is read using MBytes.get_double !!*)
|
||||
@ -671,6 +741,7 @@ let rec read_rec : type a. a Encoding.t-> MBytes.t -> int -> int -> int * a = fu
|
||||
| Int31 -> int31
|
||||
| Int32 -> int32
|
||||
| Int64 -> int64
|
||||
| Z -> z
|
||||
| RangedInt { minimum ; maximum } ->
|
||||
(fun buf ofs alpha ->
|
||||
let ofs, value =
|
||||
|
@ -32,7 +32,7 @@ type mbytes_stream = {
|
||||
|
||||
(* exception raised when additional mbytes are needed to continue
|
||||
decoding *)
|
||||
exception Need_more_data
|
||||
exception Need_more_data of mbytes_stream
|
||||
|
||||
(* read a data that is stored in may Mbytes *)
|
||||
let read_from_many_blocks reader buf ofs d_ofs =
|
||||
@ -63,7 +63,7 @@ let read_from_many_blocks reader buf ofs d_ofs =
|
||||
let generic_read_data delta_ofs reader buf =
|
||||
let absolute_ofs = buf.ofs in
|
||||
if buf.unread < delta_ofs then (*not enough data*)
|
||||
raise Need_more_data ;
|
||||
raise (Need_more_data buf) ;
|
||||
if delta_ofs = 0 then (*we'll read nothing*)
|
||||
buf, reader (MBytes.create 0) 0 0
|
||||
else
|
||||
@ -176,6 +176,17 @@ let rec data_checker
|
||||
| Int31 -> next_path path (fst (int31 buf))
|
||||
| Int32 -> next_path path (fst (int32 buf))
|
||||
| Int64 -> next_path path (fst (int64 buf))
|
||||
| Z ->
|
||||
let rec while_not_terminator i buf =
|
||||
let buf, byte = uint8 buf in
|
||||
if (byte land 0x80) = 0x00 then
|
||||
if byte = 0x00 && i <> 0 then
|
||||
failwith "trailing zeroes in Z encoding"
|
||||
else
|
||||
next_path path buf
|
||||
else
|
||||
while_not_terminator (i + 1) buf in
|
||||
while_not_terminator 0 buf
|
||||
| RangedInt { minimum ; maximum } ->
|
||||
let (stream, ranged) =
|
||||
match Size.range_to_size ~minimum ~maximum with
|
||||
@ -296,7 +307,7 @@ let rec data_checker
|
||||
|
||||
| Delayed f -> data_checker path (f ()) buf len
|
||||
|
||||
with Need_more_data ->
|
||||
with Need_more_data buf ->
|
||||
P_await { path ; encoding = e ; data_len = len }, buf
|
||||
|
||||
and next_path : path -> mbytes_stream -> path * mbytes_stream =
|
||||
|
@ -122,6 +122,17 @@ module Encoding: sig
|
||||
*)
|
||||
val ranged_int : int -> int -> int encoding
|
||||
|
||||
(** Big number
|
||||
In JSON, data is encoded as a decimal string.
|
||||
In binary, data is encoded as a variable length sequence of
|
||||
bytes, with a running unary size bit: the most significant bit of
|
||||
each byte tells is this is the last byte in the sequence (0) or if
|
||||
there is more to read (1). The second most significant bit of the
|
||||
first byte is reserved for the sign (positive if zero). Size and
|
||||
sign bits ignored, data is then the binary representation of the
|
||||
absolute value of the number in little endian order. *)
|
||||
val z : Z.t encoding
|
||||
|
||||
(** Encoding of floating point number
|
||||
(encoded as a floating point number in JSON and a double in binary). *)
|
||||
val float : float encoding
|
||||
|
@ -87,6 +87,7 @@ type 'a desc =
|
||||
| Int31 : int desc
|
||||
| Int32 : Int32.t desc
|
||||
| Int64 : Int64.t desc
|
||||
| Z : Z.t desc
|
||||
| RangedInt : { minimum : int ; maximum : int } -> int desc
|
||||
| RangedFloat : { minimum : float ; maximum : float } -> float desc
|
||||
| Float : float desc
|
||||
@ -152,6 +153,7 @@ let rec classify : type a. a t -> Kind.t = fun e ->
|
||||
| Int31 -> `Fixed Size.int31
|
||||
| Int32 -> `Fixed Size.int32
|
||||
| Int64 -> `Fixed Size.int64
|
||||
| Z -> `Dynamic
|
||||
| RangedInt { minimum ; maximum } ->
|
||||
`Fixed Size.(integer_to_size @@ range_to_size ~minimum ~maximum)
|
||||
| Float -> `Fixed Size.float
|
||||
@ -234,6 +236,7 @@ let ranged_float minimum maximum =
|
||||
and maximum = max minimum maximum in
|
||||
make @@ RangedFloat { minimum ; maximum }
|
||||
let int64 = make @@ Int64
|
||||
let z = make @@ Z
|
||||
let float = make @@ Float
|
||||
|
||||
let string = dynamic_size Variable.string
|
||||
|
@ -34,6 +34,7 @@ type 'a desc =
|
||||
| Int31 : int desc
|
||||
| Int32 : Int32.t desc
|
||||
| Int64 : Int64.t desc
|
||||
| Z : Z.t desc
|
||||
| RangedInt : { minimum : int ; maximum : int } -> int desc
|
||||
| RangedFloat : { minimum : float ; maximum : float } -> float desc
|
||||
| Float : float desc
|
||||
@ -109,6 +110,7 @@ val uint16 : int encoding
|
||||
val int31 : int encoding
|
||||
val int32 : int32 encoding
|
||||
val int64 : int64 encoding
|
||||
val z : Z.t encoding
|
||||
val ranged_int : int -> int -> int encoding
|
||||
val ranged_float : float -> float -> float encoding
|
||||
val bool : bool encoding
|
||||
|
@ -6,6 +6,7 @@
|
||||
(libraries (tezos-stdlib
|
||||
ocplib-json-typed
|
||||
ocplib-json-typed-bson
|
||||
zarith
|
||||
ezjsonm))
|
||||
(flags (:standard -w -9+27-30-32-40@8
|
||||
-safe-string
|
||||
|
@ -44,6 +44,14 @@ let int64_encoding =
|
||||
Int64.of_string
|
||||
]
|
||||
|
||||
let z_encoding =
|
||||
let open Json_encoding in
|
||||
def "bignum" @@
|
||||
describe
|
||||
~title: "Big number"
|
||||
~description: "Decimal representation of a big number" @@
|
||||
conv Z.to_string Z.of_string string
|
||||
|
||||
let bytes_jsont =
|
||||
let open Json_encoding in
|
||||
let schema =
|
||||
@ -147,6 +155,7 @@ let rec json : type a. a Encoding.desc -> a Json_encoding.encoding =
|
||||
| Int31 -> int
|
||||
| Int32 -> int32
|
||||
| Int64 -> int64_encoding
|
||||
| Z -> z_encoding
|
||||
| Bool -> bool
|
||||
| Float -> float
|
||||
| RangedFloat { minimum; maximum } -> ranged_float ~minimum ~maximum "rangedFloat"
|
||||
|
@ -15,17 +15,35 @@ let is_invalid_arg = function
|
||||
| _ -> false
|
||||
|
||||
let test_simple_json ?msg ?(equal=Assert.equal) encoding value =
|
||||
let json = Json.construct encoding value in
|
||||
let result = Json.destruct encoding json in
|
||||
let result = try
|
||||
let json = Json.construct encoding value in
|
||||
Json.destruct encoding json
|
||||
with exn ->
|
||||
let trace = Printexc.get_backtrace () in
|
||||
Assert.fail_msg "%s %s\n%s"
|
||||
(match msg with Some msg -> msg | None -> "no message")
|
||||
(Printexc.to_string exn)
|
||||
trace in
|
||||
equal ?msg value result
|
||||
|
||||
let test_simple_bin ?msg ?(equal=Assert.equal) encoding value =
|
||||
let bin = Binary.to_bytes encoding value in
|
||||
let opt = Binary.of_bytes encoding bin in
|
||||
let opt = try
|
||||
let bin = Binary.to_bytes encoding value in
|
||||
Binary.of_bytes encoding bin
|
||||
with exn ->
|
||||
let trace = Printexc.get_backtrace () in
|
||||
Assert.fail_msg "%s %s\n%s"
|
||||
(match msg with Some msg -> msg | None -> "no message")
|
||||
(Printexc.to_string exn)
|
||||
trace in
|
||||
Assert.is_some ?msg opt;
|
||||
let result = match opt with None -> assert false | Some v -> v in
|
||||
equal ?msg value result
|
||||
|
||||
let test_simple_of_bin ?msg ?(equal=Assert.equal) encoding value bin =
|
||||
let opt = Binary.of_bytes encoding bin in
|
||||
equal ?msg value opt
|
||||
|
||||
let test_json_exn ?msg encoding value fail =
|
||||
let get_result () =
|
||||
let bin = Json.construct encoding value in
|
||||
@ -114,6 +132,31 @@ let test_simple_values _ =
|
||||
(* test_bin_exn ~msg:__LOC__ (string_enum ["a", 1; "a", 2]) 2 (...duplicatate...); *)
|
||||
(* test_json_exn ~msg:__LOC__ (string_enum ["a", 1; "a", 2]) 1 (... duplicate...); *)
|
||||
|
||||
let test_zarith _ =
|
||||
let test i = test_simple ~msg:("failed on Z number " ^ Z.to_string i) z i in
|
||||
let test_of_bin bin exp name = test_simple_of_bin ~msg:("failed on " ^ name) z exp (MBytes.of_string bin) in
|
||||
for i = -1_00_000 to 1_00_000 do test (Z.of_int i) done ;
|
||||
for i = 100_000_000 to 100_100_000 do test (Z.of_int i) done ;
|
||||
for i = -100_000_000 downto -100_100_000 do test (Z.of_int i) done ;
|
||||
let rec fact n l =
|
||||
if n > 1 then
|
||||
let l = Z.mul l (Z.of_int n) in
|
||||
test l ;
|
||||
fact (n - 1) l in
|
||||
fact 35 Z.one ;
|
||||
test (Z.of_string "123574503164821730218493275982143254986574985328") ;
|
||||
test (Z.of_string "8493275982143254986574985328") ;
|
||||
test (Z.of_string "123574503164821730218474985328") ;
|
||||
test (Z.of_string "10000000000100000000001000003050000000060600000000000777000008") ;
|
||||
test (Z.of_string "-123574503164821730218493275982143254986574985328") ;
|
||||
test (Z.of_string "-8493275982143254986574985328") ;
|
||||
test (Z.of_string "-123574503164821730218474985328") ;
|
||||
test (Z.of_string "-10000000000100000000001000003050000000060600000000000777000008") ;
|
||||
test_of_bin "\x03" (Some (Z.of_int 3)) "3 (size OK)" ;
|
||||
test_of_bin "\x83" None "3 (size + 1, truncated)" ;
|
||||
test_of_bin "\x83\x00" None "3 (size + 1)" ;
|
||||
test_of_bin "\x83\x80\x00" None "3 (size + 2)" ;
|
||||
|
||||
type t = A of int | B of string | C of int | D of string | E
|
||||
|
||||
let prn_t = function
|
||||
@ -319,6 +362,7 @@ let test_randomized_variant_list _ =
|
||||
(make_int_list [] 100 ()))
|
||||
|
||||
let tests = [
|
||||
"zarith", `Quick, test_zarith ;
|
||||
"simple", `Quick, test_simple_values ;
|
||||
"union", `Quick, test_union ;
|
||||
"splitted", `Quick, test_splitted ;
|
||||
|
@ -432,7 +432,28 @@ let test_splitted _ =
|
||||
Assert.equal ~msg:__LOC__ "43" (get_result ~msg:__LOC__ binA);
|
||||
Assert.equal ~msg:__LOC__ "44" (get_result ~msg:__LOC__ binB)
|
||||
|
||||
let test_zarith value =
|
||||
let msg = "failed on Z number " ^ Z.to_string value in
|
||||
test_check_simple_bin_ok z value;
|
||||
test_check_simple_bin_ko_await z value;
|
||||
test_read_simple_bin_ok ~msg ~equal:Assert.equal z value;
|
||||
test_read_simple_bin_ko_await z value
|
||||
|
||||
let test_zarith _ =
|
||||
for i = -1_00_000 to 1_00_000 do test_zarith (Z.of_int i) done ;
|
||||
for i = 100_000_000 to 100_100_000 do test_zarith (Z.of_int i) done ;
|
||||
for i = -100_000_000 downto -100_100_000 do test_zarith (Z.of_int i) done ;
|
||||
test_zarith (Z.of_string "123574503164821730218493275982143254986574985328") ;
|
||||
test_zarith (Z.of_string "8493275982143254986574985328") ;
|
||||
test_zarith (Z.of_string "123574503164821730218474985328") ;
|
||||
test_zarith (Z.of_string "10000000000100000000001000003050000000060600000000000777000008") ;
|
||||
test_zarith (Z.of_string "-123574503164821730218493275982143254986574985328") ;
|
||||
test_zarith (Z.of_string "-8493275982143254986574985328") ;
|
||||
test_zarith (Z.of_string "-123574503164821730218474985328") ;
|
||||
test_zarith (Z.of_string "-10000000000100000000001000003050000000060600000000000777000008")
|
||||
|
||||
let tests = [
|
||||
"zarith", `Quick, test_zarith ;
|
||||
"simple", `Quick, test_simple_values ;
|
||||
"union", `Quick, test_union ;
|
||||
"splitted", `Quick, test_splitted ;
|
||||
|
@ -42,6 +42,7 @@ val uint16 : int encoding
|
||||
val int31 : int encoding
|
||||
val int32 : int32 encoding
|
||||
val int64 : int64 encoding
|
||||
val z : Z.t encoding
|
||||
val bool : bool encoding
|
||||
val string : string encoding
|
||||
val bytes : MBytes.t encoding
|
||||
|
Loading…
Reference in New Issue
Block a user