From 56fbc5267dcf0067b1aef059dc651d132aab17d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Proust?= Date: Thu, 3 May 2018 12:27:17 +0800 Subject: [PATCH] Data_encoding: split implementation In an effort to keep a clean commit history, the interface is unchanged. --- src/lib_data_encoding/binary.ml | 1227 +++++++++++++ src/lib_data_encoding/binary.mli | 52 + src/lib_data_encoding/bson.ml | 14 + src/lib_data_encoding/bson.mli | 18 + src/lib_data_encoding/data_encoding.ml | 2283 +++--------------------- src/lib_data_encoding/encoding.ml | 567 ++++++ src/lib_data_encoding/encoding.mli | 489 +++++ src/lib_data_encoding/json.ml | 277 +++ src/lib_data_encoding/json.mli | 96 + 9 files changed, 2954 insertions(+), 2069 deletions(-) create mode 100644 src/lib_data_encoding/binary.ml create mode 100644 src/lib_data_encoding/binary.mli create mode 100644 src/lib_data_encoding/bson.ml create mode 100644 src/lib_data_encoding/bson.mli create mode 100644 src/lib_data_encoding/encoding.ml create mode 100644 src/lib_data_encoding/encoding.mli create mode 100644 src/lib_data_encoding/json.ml create mode 100644 src/lib_data_encoding/json.mli diff --git a/src/lib_data_encoding/binary.ml b/src/lib_data_encoding/binary.ml new file mode 100644 index 000000000..f937e7445 --- /dev/null +++ b/src/lib_data_encoding/binary.ml @@ -0,0 +1,1227 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +open Encoding (* TODO: unoppen *) + +type 'l writer = { + write: 'a. 'a t -> 'a -> MBytes.t -> int -> int ; +} + +type 'l reader = { + read: 'a. 'a t -> MBytes.t -> int -> int -> (int * 'a) ; +} + +let rec length : type x. x t -> x -> int = fun e -> + match e.encoding with + (* Fixed *) + | Null -> fun _ -> 0 + | Empty -> fun _ -> 0 + | Constant _ -> fun _ -> 0 + | Bool -> fun _ -> Size.bool + | Int8 -> fun _ -> Size.int8 + | Uint8 -> fun _ -> Size.uint8 + | Int16 -> fun _ -> Size.int16 + | Uint16 -> fun _ -> Size.uint16 + | Int31 -> fun _ -> Size.int31 + | Int32 -> fun _ -> Size.int32 + | Int64 -> fun _ -> Size.int64 + | RangedInt { minimum ; maximum } -> + fun _ -> integer_to_size @@ range_to_size ~minimum ~maximum + | Float -> fun _ -> Size.float + | RangedFloat _ -> fun _ -> Size.float + | Bytes `Fixed n -> fun _ -> n + | String `Fixed n -> fun _ -> n + | String_enum (_, arr) -> + fun _ -> integer_to_size @@ enum_size arr + | Objs (`Fixed n, _, _) -> fun _ -> n + | Tups (`Fixed n, _, _) -> fun _ -> n + | Union (`Fixed n, _, _) -> fun _ -> n + (* Dynamic *) + | Objs (`Dynamic, e1, e2) -> + let length1 = length e1 in + let length2 = length e2 in + fun (v1, v2) -> length1 v1 + length2 v2 + | Tups (`Dynamic, e1, e2) -> + let length1 = length e1 in + let length2 = length e2 in + fun (v1, v2) -> length1 v1 + length2 v2 + | Union (`Dynamic, sz, cases) -> + let tag_size = tag_size sz in + let case_length (Case { encoding = e ; proj }) = + let length v = tag_size + length e v in + fun v -> Option.map ~f:length (proj v) in + apply (List.map case_length cases) + | Mu (`Dynamic, _name, self) -> + fun v -> length (self e) v + | Obj (Opt (`Dynamic, _, e)) -> + let length = length e in + (function None -> 1 | Some x -> 1 + length x) + (* Variable *) + | Ignore -> fun _ -> 0 + | Bytes `Variable -> MBytes.length + | String `Variable -> String.length + | Array e -> + let length = length e in + fun v -> + Array.fold_left + (fun acc v -> length v + acc) + 0 v + | List e -> + let length = length e in + fun v -> + List.fold_left + (fun acc v -> length v + acc) + 0 v + | Objs (`Variable, e1, e2) -> + let length1 = length e1 in + let length2 = length e2 in + fun (v1, v2) -> length1 v1 + length2 v2 + | Tups (`Variable, e1, e2) -> + let length1 = length e1 + and length2 = length e2 in + fun (v1, v2) -> length1 v1 + length2 v2 + | Obj (Opt (`Variable, _, e)) -> + let length = length e in + (function None -> 0 | Some x -> length x) + | Union (`Variable, sz, cases) -> + let rec case_lengths json_only_cases acc = function + | [] -> (List.rev acc, json_only_cases) + | Case { tag = Json_only } :: tl -> case_lengths true acc tl + | Case { encoding = e ; proj ; tag = Tag _ } :: tl -> + let length v = tag_size sz + length e v in + case_lengths + json_only_cases + ((fun v -> + match proj v with + | None -> None + | Some v -> Some (length v)) :: acc) + tl in + let cases, json_only = case_lengths false [] cases in + apply + ~error:(if json_only + then Failure "No case matched, but JSON only cases were present in union" + else No_case_matched) + cases + | Mu (`Variable, _name, self) -> + fun v -> length (self e) v + (* Recursive*) + | Obj (Req (_, e)) -> length e + | Obj (Dft (_, e, _)) -> length e + | Tup e -> length e + | Conv { encoding = e ; proj } -> + let length = length e in + fun v -> length (proj v) + | Describe { encoding = e } -> length e + | Def { encoding = e } -> length e + | Splitted { encoding = e } -> length e + | Dynamic_size e -> + let length = length e in + fun v -> Size.int32 + length v + | Delayed f -> length (f ()) + +(** Writer *) + +module Writer = struct + + let int8 v buf ofs = + if (v < - (1 lsl 7) || v >= 1 lsl 7) then + invalid_arg "Data_encoding.Binary.Writer.int8" ; + MBytes.set_int8 buf ofs v; + ofs + Size.int8 + + let uint8 v buf ofs = + if (v < 0 || v >= 1 lsl 8) then + invalid_arg "Data_encoding.Binary.Writer.uint8" ; + MBytes.set_int8 buf ofs v; + ofs + Size.uint8 + + let char v buf ofs = + MBytes.set_char buf ofs v; + ofs + Size.char + + let bool v buf ofs = + uint8 (if v then 255 else 0) buf ofs + + let int16 v buf ofs = + if (v < - (1 lsl 15) || v >= 1 lsl 15) then + invalid_arg "Data_encoding.Binary.Writer.int16" ; + MBytes.set_int16 buf ofs v; + ofs + Size.int16 + + let uint16 v buf ofs = + if (v < 0 || v >= 1 lsl 16) then + invalid_arg "Data_encoding.Binary.Writer.uint16" ; + MBytes.set_int16 buf ofs v; + ofs + Size.uint16 + + let uint30 v buf ofs = + if v < 0 || (Sys.int_size > 31 && v >= 1 lsl 30) then + invalid_arg "Data_encoding.Binary.Writer.uint30" ; + MBytes.set_int32 buf ofs (Int32.of_int v); + ofs + Size.uint30 + + let int31 v buf ofs = + if Sys.int_size > 31 && (v < ~- (1 lsl 30) || v >= 1 lsl 30) then + invalid_arg "Data_encoding.Binary.Writer.int31" ; + MBytes.set_int32 buf ofs (Int32.of_int v); + ofs + Size.int31 + + let int32 v buf ofs = + MBytes.set_int32 buf ofs v; + ofs + Size.int32 + + let int64 v buf ofs = + MBytes.set_int64 buf ofs v; + ofs + Size.int64 + + (** write a float64 (double) **) + let float v buf ofs = + (*Here, float means float64, which is written using MBytes.set_double !!*) + MBytes.set_double buf ofs v; + ofs + Size.float + + let fixed_kind_bytes length s buf ofs = + MBytes.blit s 0 buf ofs length; + ofs + length + + let variable_length_bytes s buf ofs = + let length = MBytes.length s in + MBytes.blit s 0 buf ofs length ; + ofs + length + + let fixed_kind_string length s buf ofs = + if String.length s <> length then invalid_arg "fixed_kind_string"; + MBytes.blit_from_string s 0 buf ofs length; + ofs + length + + let variable_length_string s buf ofs = + let length = String.length s in + MBytes.blit_from_string s 0 buf ofs length ; + ofs + length + + let objs w1 w2 (v1,v2) buf ofs = + w1 v1 buf ofs |> w2 v2 buf + + let array w a buf ofs = + Array.fold_left (fun ofs v -> w v buf ofs) ofs a + + let list w l buf ofs = + List.fold_left (fun ofs v -> w v buf ofs) ofs l + + let conv proj w v buf ofs = + w (proj v) buf ofs + + let write_tag = function + | `Uint8 -> uint8 + | `Uint16 -> uint16 + + let union w sz cases = + let writes_case = function + | Case { tag = Json_only } -> None + | Case { encoding = e ; proj ; tag = Tag tag } -> + let write = w.write e in + let write v buf ofs = + write_tag sz tag buf ofs |> write v buf in + Some (fun v -> + match proj v with + | None -> None + | Some v -> Some (write v)) in + apply (TzList.filter_map writes_case cases) + +end + +module BufferedWriter = struct + + let int8 v buf = + if (v < - (1 lsl 7) || v >= 1 lsl 7) then + invalid_arg "Data_encoding.Binary.Writer.int8" ; + MBytes_buffer.write_int8 buf v + + let uint8 v buf = + if (v < 0 || v >= 1 lsl 8) then + invalid_arg "Data_encoding.Binary.Writer.uint8" ; + MBytes_buffer.write_int8 buf v + + let char v buf = + MBytes_buffer.write_char buf v + + let bool v buf = + uint8 (if v then 255 else 0) buf + + let int16 v buf = + if (v < - (1 lsl 15) || v >= 1 lsl 15) then + invalid_arg "Data_encoding.Binary.Writer.int16" ; + MBytes_buffer.write_int16 buf v + + let uint16 v buf = + if (v < 0 || v >= 1 lsl 16) then + invalid_arg "Data_encoding.Binary.Writer.uint16" ; + MBytes_buffer.write_int16 buf v + + let uint30 v buf = + if v < 0 || (Sys.int_size > 31 && v >= 1 lsl 30) then + invalid_arg "Data_encoding.Binary.Writer.uint30" ; + MBytes_buffer.write_int32 buf (Int32.of_int v) + + let int31 v buf = + if Sys.int_size > 31 && (v < ~- (1 lsl 30) || v >= 1 lsl 30) then + invalid_arg "Data_encoding.Binary.Writer.int31" ; + MBytes_buffer.write_int32 buf (Int32.of_int v) + + let int32 v buf = + MBytes_buffer.write_int32 buf v + + let int64 v buf = + MBytes_buffer.write_int64 buf v + + (** write a float64 (double) **) + let float v buf = + MBytes_buffer.write_double buf v + + let fixed_kind_bytes length s buf = + MBytes_buffer.write_mbytes buf s 0 length + + let variable_length_bytes s buf = + let length = MBytes.length s in + MBytes_buffer.write_mbytes buf s 0 length + + let fixed_kind_string length s buf = + if String.length s <> length then invalid_arg "fixed_kind_string"; + MBytes_buffer.write_string_data buf s + + let variable_length_string s buf = + MBytes_buffer.write_string_data buf s + + let write_tag = function + | `Uint8 -> uint8 + | `Uint16 -> uint16 + +end + +let rec assoc_snd target = function + | [] -> raise No_case_matched + | (value, hd) :: tl -> + if hd = target + then value + else assoc_snd target tl + +let get_string_enum_case tbl v = + try + snd (Hashtbl.find tbl v) + with _ -> + raise No_case_matched + +let rec write_rec + : type a. a t -> a -> MBytes.t -> int -> int = fun e -> + let open Writer in + match e.encoding with + | Null -> (fun () _buf ofs -> ofs) + | Empty -> (fun () _buf ofs -> ofs) + | Constant _ -> (fun () _buf ofs -> ofs) + | Ignore -> (fun () _buf ofs -> ofs) + | Bool -> bool + | Int8 -> int8 + | Uint8 -> uint8 + | Int16 -> int16 + | Uint16 -> uint16 + | Int31 -> int31 + | Int32 -> int32 + | Int64 -> int64 + | RangedInt { minimum ; maximum } -> + fun v -> + begin + if v < minimum || v > maximum + then invalid_arg (Printf.sprintf "Integer %d not in range [%d, %d]." v minimum maximum) ; + let v = if minimum >= 0 then v - minimum else v in + match range_to_size ~minimum ~maximum with + | `Uint8 -> uint8 v + | `Uint16 -> uint16 v + | `Uint30 -> uint30 v + | `Int8 -> int8 v + | `Int16 -> int16 v + | `Int31 -> int31 v + end + | Float -> float + | RangedFloat { minimum ; maximum } -> + fun v -> + if v < minimum || v > maximum + then invalid_arg (Printf.sprintf "Float %f not in range [%f, %f]." v minimum maximum) ; + float v + | Bytes (`Fixed n) -> fixed_kind_bytes n + | String (`Fixed n) -> fixed_kind_string n + | Bytes `Variable -> variable_length_bytes + | String `Variable -> variable_length_string + | Array t -> array (write_rec t) + | List t -> list (write_rec t) + | String_enum (tbl, arr) -> + (fun v -> + let value = get_string_enum_case tbl v in + match enum_size arr with + | `Uint30 -> uint30 value + | `Uint16 -> uint16 value + | `Uint8 -> uint8 value) + | Obj (Req (_, e)) -> write_rec e + | Obj (Opt (`Dynamic, _, e)) -> + let write = write_rec e in + (function None -> int8 0 + | Some x -> fun buf ofs -> int8 1 buf ofs |> write x buf) + | Obj (Opt (`Variable, _, e)) -> + let write = write_rec e in + (function None -> fun _buf ofs -> ofs + | Some x -> write x) + | Obj (Dft (_, e, _)) -> write_rec e + | Objs (_, e1, e2) -> + objs (write_rec e1) (write_rec e2) + | Tup e -> write_rec e + | Tups (_, e1, e2) -> + objs (write_rec e1) (write_rec e2) + | Conv { encoding = e; proj } -> conv proj (write_rec e) + | Describe { encoding = e } -> write_rec e + | Def { encoding = e } -> write_rec e + | Splitted { encoding = e } -> write_rec e + | Union (_, sz, cases) -> union { write = write_rec } sz cases + | Mu (_, _, self) -> fun v buf ofs -> write_rec (self e) v buf ofs + | Dynamic_size e -> + let length = length e + and write = write_rec e in + fun v buf ofs -> + int32 (Int32.of_int @@ length v) buf ofs |> write v buf + | Delayed f -> write_rec (f ()) + +let rec write_rec_buffer + : type a. a encoding -> a -> MBytes_buffer.t -> unit = + fun encoding value buffer -> + let open BufferedWriter in + match encoding.encoding with + | Null -> () + | Empty -> () + | Constant _ -> () + | Ignore -> () + | Bool -> bool value buffer + | Int8 -> int8 value buffer + | Uint8 -> uint8 value buffer + | Int16 -> int16 value buffer + | Uint16 -> uint16 value buffer + | Int31 -> int31 value buffer + | Int32 -> int32 value buffer + | Int64 -> int64 value buffer + | Float -> float value buffer + | Bytes (`Fixed n) -> fixed_kind_bytes n value buffer + | String (`Fixed n) -> fixed_kind_string n value buffer + | Bytes `Variable -> variable_length_bytes value buffer + | String `Variable -> variable_length_string value buffer + | Array t -> Array.iter (fun x -> write_rec_buffer t x buffer) value + | List t -> List.iter (fun x -> write_rec_buffer t x buffer) value + | RangedInt { minimum ; maximum } -> + if value < minimum || value > maximum + then invalid_arg (Printf.sprintf "Integer %d not in range [%d, %d]." + value minimum maximum) ; + let value = if minimum >= 0 then value - minimum else value in + begin + match range_to_size ~minimum ~maximum with + | `Uint30 -> uint30 value buffer + | `Uint16 -> uint16 value buffer + | `Uint8 -> uint8 value buffer + | `Int8 -> int8 value buffer + | `Int16 -> int16 value buffer + | `Int31 -> int31 value buffer + end + | RangedFloat { minimum ; maximum } -> + if value < minimum || value > maximum + then invalid_arg (Printf.sprintf "Float %f not in range [%f, %f]." + value minimum maximum) ; + float value buffer + | String_enum (tbl, arr) -> + (match enum_size arr with + | `Uint30 -> BufferedWriter.uint30 + | `Uint16 -> BufferedWriter.uint16 + | `Uint8 -> BufferedWriter.uint8) + (get_string_enum_case tbl value) + buffer + | Obj (Req (_, e)) -> write_rec_buffer e value buffer + | Obj (Opt (`Dynamic, _, e)) -> + (match value with + | None -> int8 0 buffer + | Some x -> + begin + int8 1 buffer ; + write_rec_buffer e x buffer + end) + | Obj (Opt (`Variable, _, e)) -> + (match value with + | None -> () + | Some x -> write_rec_buffer e x buffer) + | Obj (Dft (_, e, _)) -> write_rec_buffer e value buffer + | Objs (_, e1, e2) -> + let v1, v2 = value in + write_rec_buffer e1 v1 buffer ; + write_rec_buffer e2 v2 buffer + | Tup e -> write_rec_buffer e value buffer + | Tups (_, e1, e2) -> + let v1, v2 = value in + write_rec_buffer e1 v1 buffer ; + write_rec_buffer e2 v2 buffer + | Conv { encoding = e; proj } -> + write_rec_buffer e (proj value) buffer + | Describe { encoding = e } -> write_rec_buffer e value buffer + | Def { encoding = e } -> write_rec_buffer e value buffer + | Splitted { encoding = e } -> write_rec_buffer e value buffer + | Union (_, sz, cases) -> + let rec write_case = function + | [] -> raise No_case_matched + | Case { tag = Json_only } :: tl -> write_case tl + | Case { encoding = e ; proj ; tag = Tag tag } :: tl -> + begin + match proj value with + | None -> write_case tl + | Some data -> + write_tag sz tag buffer ; + write_rec_buffer e data buffer + end in + write_case cases + | Mu (_, _, self) -> + write_rec_buffer (self encoding) value buffer + | Dynamic_size e -> + MBytes_buffer.write_sized buffer (fun () -> write_rec_buffer e value buffer) + | Delayed f -> write_rec_buffer (f ()) value buffer + +let write t v buf ofs = + try Some (write_rec t v buf ofs) + with _ -> None + +let to_bytes t v = + let bytes = MBytes_buffer.create () in + write_rec_buffer t v bytes ; + MBytes_buffer.to_mbytes bytes + +let to_bytes_list ?(copy_blocks=false) block_sz t v = + assert (block_sz > 0); + let bytes = to_bytes t v in (* call to generic function to_bytes *) + let length = MBytes.length bytes in + if length <= block_sz then + [bytes] (* if the result fits in the given block_sz *) + else + let may_copy = if copy_blocks then MBytes.copy else fun t -> t in + let nb_full = length / block_sz in (* nb of blocks of size block_sz *) + let sz_full = nb_full * block_sz in (* size of the full part *) + let acc = (* eventually init acc with a non-full block *) + if sz_full = length then [] + else [may_copy (MBytes.sub bytes sz_full (length - sz_full))] + in + let rec split_full_blocks curr_upper_limit acc = + let start = curr_upper_limit - block_sz in + assert (start >= 0); + (* copy the block [ start, curr_upper_limit [ of size block_sz *) + let acc = (may_copy (MBytes.sub bytes start block_sz)) :: acc in + if start = 0 then acc else split_full_blocks start acc + in + split_full_blocks sz_full acc + +(** Reader *) + +module Reader = struct + + let int8 buf ofs _len = + ofs + Size.int8, MBytes.get_int8 buf ofs + + let uint8 buf ofs _len = + ofs + Size.uint8, MBytes.get_uint8 buf ofs + + let char buf ofs _len = + ofs + Size.char, MBytes.get_char buf ofs + + let bool buf ofs len = + let ofs, v = int8 buf ofs len in + ofs, v <> 0 + + let int16 buf ofs _len = + ofs + Size.int16, MBytes.get_int16 buf ofs + + let uint16 buf ofs _len = + ofs + Size.uint16, MBytes.get_uint16 buf ofs + + let uint30 buf ofs _len = + let v = Int32.to_int (MBytes.get_int32 buf ofs) in + if v < 0 then + failwith "Data_encoding.Binary.Reader.uint30: invalid data." ; + ofs + Size.uint30, v + + let int31 buf ofs _len = + ofs + Size.int31, Int32.to_int (MBytes.get_int32 buf ofs) + + let int32 buf ofs _len = + ofs + Size.int32, MBytes.get_int32 buf ofs + + let int64 buf ofs _len = + ofs + Size.int64, MBytes.get_int64 buf ofs + + (** read a float64 (double) **) + let float buf ofs _len = + (*Here, float means float64, which is read using MBytes.get_double !!*) + ofs + Size.float, MBytes.get_double buf ofs + + let int_of_int32 i = + let i' = Int32.to_int i in + let i'' = Int32.of_int i' in + if i'' = i then + i' + else + invalid_arg "int_of_int32 overflow" + + let fixed_length_bytes length buf ofs _len = + let s = MBytes.sub buf ofs length in + ofs + length, s + + let fixed_length_string length buf ofs _len = + let s = MBytes.substring buf ofs length in + ofs + length, s + + let seq r1 r2 buf ofs len = + let ofs', v1 = r1 buf ofs len in + let ofs'', v2 = r2 buf ofs' (len - (ofs' - ofs)) in + ofs'', (v1, v2) + + let varseq r e1 e2 buf ofs len = + let k1 = classify e1 + and k2 = classify e2 in + match k1, k2 with + | (`Dynamic | `Fixed _), `Variable -> + let ofs', v1 = r.read e1 buf ofs len in + let ofs'', v2 = r.read e2 buf ofs' (len - (ofs' - ofs)) in + ofs'', (v1, v2) + | `Variable, `Fixed n -> + let ofs', v1 = r.read e1 buf ofs (len - n) in + let ofs'', v2 = r.read e2 buf ofs' n in + ofs'', (v1, v2) + | _ -> assert false (* Should be rejected by Kind.combine *) + + let list read buf ofs len = + let rec loop acc ofs len = + assert (len >= 0); + if len <= 0 + then ofs, List.rev acc + else + let ofs', v = read buf ofs len in + assert (ofs' > ofs); + loop (v :: acc) ofs' (len - (ofs' - ofs)) + in + loop [] ofs len + + let array read buf ofs len = + let ofs, l = list read buf ofs len in + ofs, Array.of_list l + + let conv inj r buf ofs len = + let ofs, v = r buf ofs len in + ofs, inj v + + let read_tag = function + | `Uint8 -> uint8 + | `Uint16 -> uint16 + + let union r sz cases = + let read_cases = + TzList.filter_map + (function + | (Case { tag = Json_only }) -> None + | (Case { encoding = e ; inj ; tag = Tag tag }) -> + let read = r.read e in + Some (tag, fun len buf ofs -> + let ofs, v = read len buf ofs in + ofs, inj v)) + cases in + fun buf ofs len -> + let ofs, tag = read_tag sz buf ofs len in + try List.assoc tag read_cases buf ofs (len - tag_size sz) + with Not_found -> raise (Unexpected_tag tag) + +end + +let rec read_rec : type a. a t-> MBytes.t -> int -> int -> int * a = fun e -> + let open Reader in + match e.encoding with + | Null -> (fun _buf ofs _len -> ofs, ()) + | Empty -> (fun _buf ofs _len -> ofs, ()) + | Constant _ -> (fun _buf ofs _len -> ofs, ()) + | Ignore -> (fun _buf ofs len -> ofs + len, ()) + | Bool -> bool + | Int8 -> int8 + | Uint8 -> uint8 + | Int16 -> int16 + | Uint16 -> uint16 + | Int31 -> int31 + | Int32 -> int32 + | Int64 -> int64 + | RangedInt { minimum ; maximum } -> + (fun buf ofs alpha -> + let ofs, value = + match range_to_size ~minimum ~maximum with + | `Int8 -> int8 buf ofs alpha + | `Int16 -> int16 buf ofs alpha + | `Int31 -> int31 buf ofs alpha + | `Uint8 -> uint8 buf ofs alpha + | `Uint16 -> uint16 buf ofs alpha + | `Uint30 -> uint30 buf ofs alpha in + let value = if minimum > 0 then value + minimum else value in + if value < minimum || value > maximum + then raise (Int_out_of_range (value, minimum, maximum)) ; + (ofs, value)) + | Float -> float + | RangedFloat { minimum ; maximum } -> + (fun buf ofs len -> + let offset, value = float buf ofs len in + if value < minimum || value > maximum + then raise (Float_out_of_range (value, minimum, maximum)) ; + (offset, value)) + | Bytes (`Fixed n) -> fixed_length_bytes n + | String (`Fixed n) -> fixed_length_string n + | Bytes `Variable -> fun buf ofs len -> fixed_length_bytes len buf ofs len + | String `Variable -> fun buf ofs len -> fixed_length_string len buf ofs len + | String_enum (_, arr) -> begin + fun buf ofs a -> + let ofs, ind = + match enum_size arr with + | `Uint8 -> uint8 buf ofs a + | `Uint16 -> uint16 buf ofs a + | `Uint30 -> uint30 buf ofs a in + if ind >= Array.length arr + then raise No_case_matched + else (ofs, arr.(ind)) + end + | Array e -> array (read_rec e) + | List e -> list (read_rec e) + | Obj (Req (_, e)) -> read_rec e + | Obj (Opt (`Dynamic, _, t)) -> + let read = read_rec t in + (fun buf ofs len -> + let ofs, v = int8 buf ofs len in + if v = 0 then ofs, None + else let ofs, v = read buf ofs (len - Size.int8) in ofs, Some v) + | Obj (Opt (`Variable, _, t)) -> + let read = read_rec t in + (fun buf ofs len -> + if len = 0 then ofs, None + else + let ofs', v = read buf ofs len in + assert (ofs' = ofs + len) ; + ofs + len, Some v) + | Obj (Dft (_, e, _)) -> read_rec e + | Objs ((`Fixed _ | `Dynamic), e1, e2) -> + seq (read_rec e1) (read_rec e2) + | Objs (`Variable, e1, e2) -> + varseq { read = fun t -> read_rec t } e1 e2 + | Tup e -> read_rec e + | Tups ((`Fixed _ | `Dynamic), e1, e2) -> + seq (read_rec e1) (read_rec e2) + | Tups (`Variable, e1, e2) -> + varseq { read = fun t -> read_rec t } e1 e2 + | Conv { inj ; encoding = e } -> conv inj (read_rec e) + | Describe { encoding = e } -> read_rec e + | Def { encoding = e } -> read_rec e + | Splitted { encoding = e } -> read_rec e + | Union (_, sz, cases) -> + union { read = fun t -> read_rec t } sz cases + | Mu (_, _, self) -> fun buf ofs len -> read_rec (self e) buf ofs len + | Dynamic_size e -> + let read = read_rec e in + fun buf ofs len -> + let ofs, sz = int32 buf ofs len in + let sz = Int32.to_int sz in + if sz < 0 then raise (Invalid_size sz); + read buf ofs sz + | Delayed f -> read_rec (f ()) + +let read t buf ofs len = + try Some (read_rec t buf ofs len) + with _ -> None +let write = write +let of_bytes_exn ty buf = + let len = MBytes.length buf in + let read_len, r = read_rec ty buf 0 len in + if read_len <> len then + failwith "Data_encoding.Binary.of_bytes_exn: remainig data" ; + r +let of_bytes ty buf = + try Some (of_bytes_exn ty buf) + with _ -> None +let to_bytes = to_bytes + +let length = length + +let fixed_length e = + match classify e with + | `Fixed n -> Some n + | `Dynamic | `Variable -> None +let fixed_length_exn e = + match fixed_length e with + | Some n -> n + | None -> invalid_arg "Data_encoding.Binary.fixed_length_exn" + + +(* Facilities to decode streams of binary data *) + +type 'a status = + | Success of { res : 'a ; res_len : int ; remaining : MBytes.t list } + | Await of (MBytes.t -> 'a status) + | Error + +module Stream_reader = struct + + (* used as a zipper to code the function read_checker with the + ability to stop and wait for more data. In 'P_seq' case, data + length is parameterized by the current offset. Hence, it's a + function 'fun_data_len'. For the 'P_list' case, we store the + base offset (before starting reading the elements) and the + number of elements that have been read so far. *) + type path = + | P_top : path + | P_await : { path : path ; encoding : 'a t ; data_len : int } -> path + | P_seq : { path : path ; encoding : 'a t ; + fun_data_len : int -> int } -> path + | P_list : { path:path ; encoding:'a t ; data_len : int ; + base_ofs : int ; nb_elts_read : int } -> path + + (* used to accumulate given mbytes when reading a list of blocks, + as well as the current offset and the number of unread bytes *) + type mbytes_stream = { + past : MBytes.t Queue.t ; (* data that have been entirely read *) + future : (MBytes.t * int) Queue.t ; (* data that are not (fully) read *) + mutable past_len : int ; (*length of concatenation of data in 'past'*) + mutable unread : int ; (*number of cells that are unread in 'future'*) + ofs : int (*current absolute offset wrt to concatenation past @ future*) + } + + (* exception raised when additional mbytes are needed to continue + decoding *) + exception Need_more_data + + (* read a data that is stored in may Mbytes *) + let read_from_many_blocks reader buf ofs d_ofs = + let tmp = MBytes.create d_ofs in (*we will merge data in this mbyte*) + let r = ref d_ofs in (*to count the cells to be read*) + let rel_ofs = ref ofs in (*= ofs for first mbyte, 0 for others*) + while !r > 0 do + assert (not (Queue.is_empty buf.future)) ; + let b, len_b = Queue.peek buf.future in (*take the next mbyte*) + let len_chunk = len_b - !rel_ofs in (*the number of cells to read*) + if !r >= len_chunk then + begin (*copy b in 'past' if it is read entirely*) + ignore (Queue.pop buf.future) ; + Queue.push b buf.past ; + buf.past_len <- buf.past_len + len_b ; + end ; + (* copy (min !r len_chunk) data from b to tmp *) + MBytes.blit b !rel_ofs tmp (d_ofs - !r) (min !r len_chunk) ; + r := !r - len_chunk ; (* len_chunk data read during this round*) + rel_ofs := 0 ; (*next mbytes will be read starting from zero*) + done ; + reader tmp 0 d_ofs + + + (* generic function that reads data from an mbytes_stream. It is + parameterized by a function "reader" that effectively reads the + data *) + let generic_read_data delta_ofs reader buf = + let absolute_ofs = buf.ofs in + if buf.unread < delta_ofs then (*not enough data*) + raise Need_more_data ; + if delta_ofs = 0 then (*we'll read nothing*) + buf, reader (MBytes.create 0) 0 0 + else + let new_ofs = absolute_ofs + delta_ofs in + let ofs = absolute_ofs - buf.past_len in (*relative ofs wrt 'future'*) + buf.unread <- buf.unread-delta_ofs ; (*'delta_ofs' cells will be read*) + assert (not (Queue.is_empty buf.future)) ; (*we have some data to read*) + let b, len_b = Queue.peek buf.future in + let buf = { buf with ofs = new_ofs } in + if ofs + delta_ofs > len_b then + (*should read data from many mbytes*) + buf, read_from_many_blocks reader buf ofs delta_ofs + else + begin + if ofs + delta_ofs = len_b then + begin (*the rest of b will be entirely read. Put it in 'past'*) + ignore (Queue.pop buf.future) ; + Queue.push b buf.past ; + buf.past_len <- buf.past_len + len_b ; + end ; + buf, reader b ofs delta_ofs + end + + + (* functions that try to read data from a given mbytes_stream, + or raise Need_more_data *) + + let int8 buf = + generic_read_data Size.int8 (fun x y _ -> MBytes.get_int8 x y) buf + + let uint8 buf = + generic_read_data Size.uint8 (fun x y _ -> MBytes.get_uint8 x y) buf + + let char buf = + let buf, v = int8 buf in + buf, Char.chr v + + let bool buf = + let buf, v = int8 buf in + buf, v <> 0 + + let int16 buf = + generic_read_data Size.int16 (fun x y _ -> MBytes.get_int16 x y) buf + + let uint16 buf = + generic_read_data Size.uint16 (fun x y _ -> MBytes.get_uint16 x y) buf + + let uint30 buf = + generic_read_data Size.uint30 + (fun x y _ -> + let v = Int32.to_int (MBytes.get_int32 x y) in + if v < 0 then + failwith "Data_encoding.Binary.Reader.uint30: invalid data." ; + v) buf + + let int31 buf = + generic_read_data Size.int31 + (fun x y _ -> Int32.to_int (MBytes.get_int32 x y)) buf + + let int32 buf = + generic_read_data Size.int32 (fun x y _ -> MBytes.get_int32 x y) buf + + let int64 buf = + generic_read_data Size.int64 (fun x y _ -> MBytes.get_int64 x y) buf + + (** read a float64 (double) **) + let float buf = + (*Here, float means float64, which is read using MBytes.get_double !!*) + generic_read_data Size.float (fun x y _ -> MBytes.get_double x y) buf + + let fixed_length_bytes length buf = + generic_read_data length MBytes.sub buf + + let fixed_length_string length buf = + generic_read_data length MBytes.substring buf + + let read_tag = function + | `Uint8 -> uint8 + | `Uint16 -> uint16 + + (* auxiliary function: computing size of data in branches + Objs(`Variable) and Tups(`Variable) *) + let varseq_lengths e1 e2 ofs len = match classify e1, classify e2 with + | (`Dynamic | `Fixed _), `Variable -> len, (fun ofs' -> len - ofs' + ofs) + | `Variable, `Fixed n -> (len - n), (fun _ -> n) + | _ -> assert false (* Should be rejected by Kind.combine *) + + + (* adaptation of function read_rec to check binary data + incrementally. The function takes (and returns) a 'path' (for + incrementality), and 'mbytes_stream' *) + let rec data_checker + : type a. + path -> a encoding -> mbytes_stream -> int -> + path * mbytes_stream = + fun path e buf len -> + (*length of data with `Variable kind should be given by the caller*) + assert (classify e != `Variable || len >= 0) ; + try match e.encoding with + | Null -> next_path path buf + | Empty -> next_path path buf + | Constant _ -> next_path path buf + | Ignore -> next_path path { buf with ofs = buf.ofs + len } + | Bool -> next_path path (fst (bool buf)) + | Int8 -> next_path path (fst (int8 buf)) + | Uint8 -> next_path path (fst (uint8 buf)) + | Int16 -> next_path path (fst (int16 buf)) + | Uint16 -> next_path path (fst (uint16 buf)) + | Int31 -> next_path path (fst (int31 buf)) + | Int32 -> next_path path (fst (int32 buf)) + | Int64 -> next_path path (fst (int64 buf)) + | RangedInt { minimum ; maximum } -> + let (stream, ranged) = + match range_to_size ~minimum ~maximum with + | `Int8 -> int8 buf + | `Int16 -> int16 buf + | `Int31 -> int31 buf + | `Uint8 -> uint8 buf + | `Uint16 -> uint16 buf + | `Uint30 -> uint30 buf in + let ranged = if minimum > 0 then ranged + minimum else ranged in + assert (minimum < ranged && ranged < maximum) ; + next_path path stream + | Float -> next_path path (fst (float buf)) + | RangedFloat { minimum ; maximum } -> + let stream, float = float buf in + assert (minimum < float && maximum > float) ; + next_path path stream + | Bytes (`Fixed n) -> + next_path path (fst (fixed_length_bytes n buf)) + + | String (`Fixed n) -> + next_path path (fst (fixed_length_string n buf)) + + | Bytes `Variable -> + next_path path (fst (fixed_length_bytes len buf)) + + | String `Variable -> + next_path path (fst (fixed_length_string len buf)) + + | String_enum (_, arr) -> + next_path path + (match enum_size arr with + | `Uint8 -> fst @@ uint8 buf + | `Uint16 -> fst @@ uint16 buf + | `Uint30 -> fst @@ uint30 buf) + + | Array e -> + let p = P_list { path ; encoding = e ; base_ofs = buf.ofs ; + data_len = len ; nb_elts_read = 0 } in + next_path p buf + + | List e -> + let p = P_list { path ; encoding = e ; base_ofs = buf.ofs ; + data_len = len ; nb_elts_read = 0 } in + next_path p buf + + | Obj (Req (_, e)) -> data_checker path e buf len + + | Obj (Opt (`Dynamic, _, e)) -> + let buf, v = int8 buf in + if v = 0 then next_path path buf + else data_checker path e buf (len - Size.int8) + + | Obj (Opt (`Variable, _, e)) -> + if len = 0 then next_path path buf + else data_checker path e buf len + + | Obj (Dft (_, e, _)) -> data_checker path e buf len + + | Objs ((`Fixed _ | `Dynamic), e1, e2) -> + let f_len2 ofs' = len - (ofs' - buf.ofs) in + let path = + P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in + data_checker path e1 buf len + + | Objs (`Variable, e1, e2) -> + let len1, f_len2 = varseq_lengths e1 e2 buf.ofs len in + let path = + P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in + data_checker path e1 buf len1 + + | Tup e -> data_checker path e buf len + + | Tups ((`Fixed _ | `Dynamic), e1, e2) -> + let f_len2 ofs' = len - (ofs' - buf.ofs) in + let path = + P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in + data_checker path e1 buf len + + | Tups (`Variable, e1, e2) -> + let len1, f_len2 = varseq_lengths e1 e2 buf.ofs len in + let path = + P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in + data_checker path e1 buf len1 + + | Conv { encoding = e } -> data_checker path e buf len + + | Describe { encoding = e } -> data_checker path e buf len + + | Def { encoding = e } -> data_checker path e buf len + + | Splitted { encoding = e } -> data_checker path e buf len + + | Mu (_, _, self) -> data_checker path (self e) buf len + + | Union (_, sz, cases) -> + let buf, ctag = read_tag sz buf in + let opt = + List.fold_left + (fun acc c -> match c with + | (Case { encoding ; tag = Tag tag }) + when tag == ctag -> + assert (acc == None) ; + Some (data_checker path encoding buf) + | _ -> acc + )None cases + in + begin match opt with + | None -> raise (Unexpected_tag ctag) + | Some func -> func (len - (tag_size sz)) + end + + | Dynamic_size e -> + let buf, sz = int32 buf in + let sz = Int32.to_int sz in + if sz < 0 then raise (Invalid_size sz) ; + data_checker path e buf sz + + | Delayed f -> data_checker path (f ()) buf len + + with Need_more_data -> + P_await { path ; encoding = e ; data_len = len }, buf + + and next_path : path -> mbytes_stream -> path * mbytes_stream = + fun path buf -> + match path with + | P_top -> + P_top, buf (* success case *) + + | P_seq { path ; encoding ; fun_data_len } -> + (* check the right branch of a sequence. fun_data_len ofs gives + the length of the data to read *) + data_checker path encoding buf (fun_data_len buf.ofs) + + | P_await { path ; encoding ; data_len } -> + (* resume from an await *) + data_checker path encoding buf data_len + + | P_list + ({ path ; encoding ; base_ofs ; data_len ; nb_elts_read } as r) -> + (* read/check an eventual element of a list *) + if data_len = buf.ofs - base_ofs then + (* we've read all the elements of the list *) + next_path path buf + else + begin + (*some more elements to read*) + assert (data_len > buf.ofs - base_ofs) ; + (*check: if we've already read some elements, then currrent ofs + should be greater then initial ofs *) + assert (nb_elts_read <= 0 || buf.ofs - base_ofs > 0) ; + let path = + P_list { r with nb_elts_read = nb_elts_read + 1} in + data_checker path encoding buf data_len + end + + let data_checker = next_path + + (* insert a given MBytes.t in a given mbytes_stream *) + let insert_mbytes mb_buf mb = + let len = MBytes.length mb in + if len > 0 then begin + Queue.push (mb, len) mb_buf.future ; + mb_buf.unread <- mb_buf.unread + len ; + end + + (* aux function called when data_checker succeeds: splits a given + mbytes_stream into a 'read' and 'unread' queues. This may + modify the content of the given mbytes_stream *) + let split_mbytes_stream { past_len ; past ; future ; unread ; ofs } = + let rel_ofs = ofs - past_len in + assert (rel_ofs >= 0) ; + if rel_ofs = 0 then past, future (* already done *) + else begin + assert (not(Queue.is_empty future)) ; (*because data_checker succeeded*) + let b, len = Queue.pop future in + assert (rel_ofs < len) ; (*inv. maintained by read_from_many_blocks*) + let b1 = MBytes.sub b 0 rel_ofs in (* read part of b *) + let b2 = MBytes.sub b rel_ofs (len-rel_ofs) in (* unread part of b *) + Queue.push b1 past ; + + (* push b2 at the beginning of 'future' using Queue.transfer*) + let tmp = Queue.create() in + Queue.push (b2, unread) tmp ; + Queue.transfer future tmp ; (*tmp === b2 ::: future in constant time*) + past, tmp + end + + (* given a state, this function returns a new status: + - if data are successfully checked, accumulated mbytes are + passed to 'success_result' that computes the final + result. Unread mbytes are also returned + - if some more data are needed, a function that waits for some + additional mbytes is returned + - eventual errors are reported/returned *) + let rec bytes_stream_reader_rec (path, mb_buf) success_result = + let success = + match path with + | P_top -> true + | P_await _ -> false + | _ -> assert false + in + assert (mb_buf.ofs >= mb_buf.past_len) ; + if success then + let q_read, q_unread = split_mbytes_stream mb_buf in + match success_result q_read mb_buf.ofs with + | Some a -> + let remaining = + List.rev @@ + Queue.fold + (fun acc (b, len) -> + if len = 0 then acc else b:: acc) [] q_unread + in + Success { res = a ; res_len = mb_buf.ofs ; remaining } + | None -> Error + (* success_result may fail because data_checker is + approximative in some situations *) + else + Await + (fun mb -> + insert_mbytes mb_buf mb ; + try + let state = data_checker path mb_buf in + bytes_stream_reader_rec state success_result + with _ -> Error) + + (* This function checks reading a stream of 'MBytes.t' wrt. a given + encoding: + - the given data encoding should have a 'Fixed' or a 'Dynamic' + size, otherwise an error is returned, + - the function returns an 'Error', a function w + ('Await w') that waits for more data (Mbytes.t), or + 'Success'. The function is parameterized by 'success_result' + that computes the data to return in case of success. + An exception 'Invalid_argument "streaming data with variable + size"' is raised if the encoding has a variable size *) + let bytes_stream_reader : + MBytes.t list -> 'a t -> + (MBytes.t Queue.t -> int -> 'b option) -> 'b status + = fun l e success_result -> + match classify e with + | `Variable -> invalid_arg "streaming data with variable size" + | `Fixed _ | `Dynamic -> + let mb_buf = { + past = Queue.create() ; past_len = 0 ; + future = Queue.create() ; unread = 0; ofs = 0 } + in + List.iter (insert_mbytes mb_buf) l ; + let path = + P_await { path = P_top ; encoding = e ; data_len = - 1 } in + try bytes_stream_reader_rec (data_checker path mb_buf) success_result + with _ -> Error + +end + +(* concats a queue of mbytes into one MByte *) +let concat_mbyte_chunks queue tot_len = + if Queue.length queue = 1 then Queue.pop queue (* no copy *) + else (* copy smaller mbytes into one big mbyte *) + let buf = MBytes.create tot_len in + let cpt = ref 0 in + let tot_len' = ref tot_len in + while not (Queue.is_empty queue) do + let mb = Queue.pop queue in + let len = MBytes.length mb in + tot_len' := !tot_len' - len ; + assert (!tot_len' >= 0) ; + MBytes.blit mb 0 buf !cpt len ; + cpt := !cpt + len ; + done ; + assert (!tot_len' = 0) ; + buf + +(* Decode a stream of MBytes. see + Stream_reader.bytes_stream_traversal for more details *) +let read_stream_of_bytes ?(init=[]) encoding = + Stream_reader.bytes_stream_reader init encoding + (fun read_q ofs -> of_bytes encoding (concat_mbyte_chunks read_q ofs)) + +(* Check reading a stream of MBytes. see + Stream_reader.bytes_stream_traversal for more details *) +let check_stream_of_bytes ?(init=[]) encoding = + Stream_reader.bytes_stream_reader init encoding (fun _ _ -> Some ()) diff --git a/src/lib_data_encoding/binary.mli b/src/lib_data_encoding/binary.mli new file mode 100644 index 000000000..e9c64b9f6 --- /dev/null +++ b/src/lib_data_encoding/binary.mli @@ -0,0 +1,52 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +val length : 'a Encoding.t -> 'a -> int +val read : 'a Encoding.t -> MBytes.t -> int -> int -> (int * 'a) option +val write : 'a Encoding.t -> 'a -> MBytes.t -> int -> int option +val to_bytes : 'a Encoding.t -> 'a -> MBytes.t +val of_bytes : 'a Encoding.t -> MBytes.t -> 'a option +val of_bytes_exn : 'a Encoding.t -> MBytes.t -> 'a + +(** [to_bytes_list ?copy_blocks blocks_size encod data] encode the + given data as a list of successive blocks of length + 'blocks_size' at most. + + NB. If 'copy_blocks' is false (default), the blocks of the list + can be garbage-collected only when all the blocks are + unreachable (because of the 'optimized' implementation of + MBytes.sub used internally *) +val to_bytes_list : ?copy_blocks:bool -> int -> 'a Encoding.t -> 'a -> MBytes.t list + +(** This type is used when decoding binary data incrementally. + - In case of 'Success', the decoded data, the size of used data + to decode the result, and the remaining data are returned + - In case of error, 'Error' is returned + - 'Await' status embeds a function that waits for additional data + to continue decoding, when given data are not sufficient *) +type 'a status = + | Success of { res : 'a ; res_len : int ; remaining : MBytes.t list } + | Await of (MBytes.t -> 'a status) + | Error + +(** This function allows to decode (or to initialize decoding) a + stream of 'MByte.t'. The given data encoding should have a + 'Fixed' or a 'Dynamic' size, otherwise an exception + 'Invalid_argument "streaming data with variable size"' is + raised *) +val read_stream_of_bytes : ?init:MBytes.t list -> 'a Encoding.t -> 'a status + +(** Like read_stream_of_bytes, but only checks that the stream can + be read. Note that this is an approximation because failures + that may come from conversion functions present in encodings are + not checked *) +val check_stream_of_bytes : ?init:MBytes.t list -> 'a Encoding.t -> unit status + +val fixed_length : 'a Encoding.t -> int option +val fixed_length_exn : 'a Encoding.t -> int diff --git a/src/lib_data_encoding/bson.ml b/src/lib_data_encoding/bson.ml new file mode 100644 index 000000000..5549acb2e --- /dev/null +++ b/src/lib_data_encoding/bson.ml @@ -0,0 +1,14 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +type bson = Json_repr_bson.bson +type t = bson + +let construct e v = Json_repr_bson.Json_encoding.construct (Json.convert e) v +let destruct e v = Json_repr_bson.Json_encoding.destruct (Json.convert e) v diff --git a/src/lib_data_encoding/bson.mli b/src/lib_data_encoding/bson.mli new file mode 100644 index 000000000..0a670dec4 --- /dev/null +++ b/src/lib_data_encoding/bson.mli @@ -0,0 +1,18 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +type bson = Json_repr_bson.bson +type t = bson + +(** Construct a BSON object from an encoding. *) +val construct : 't Encoding.t -> 't -> bson + +(** Destruct a BSON object into a value. + Fail with an exception if the JSON object and encoding do not match.. *) +val destruct : 't Encoding.t -> bson -> 't diff --git a/src/lib_data_encoding/data_encoding.ml b/src/lib_data_encoding/data_encoding.ml index 3540920ac..53306cfff 100644 --- a/src/lib_data_encoding/data_encoding.ml +++ b/src/lib_data_encoding/data_encoding.ml @@ -6,2085 +6,230 @@ (* All rights reserved. No warranty, explicit or implicit, provided. *) (* *) (**************************************************************************) - -type json = - [ `O of (string * json) list - | `Bool of bool - | `Float of float - | `A of json list - | `Null - | `String of string ] -type bson = Json_repr_bson.bson - -type json_schema = Json_schema.schema - -exception No_case_matched -exception Unexpected_tag of int -exception Duplicated_tag of int -exception Invalid_tag of int * [ `Uint8 | `Uint16 ] -exception Unexpected_enum of string * string list -exception Invalid_size of int -exception Int_out_of_range of int * int * int -exception Float_out_of_range of float * float * float - -let apply ?(error=No_case_matched) fs v = - let rec loop = function - | [] -> raise error - | f :: fs -> - match f v with - | Some l -> l - | None -> loop fs in - loop fs - -let apply_map ?(error=No_case_matched) f fs v = - let rec loop = function - | [] -> raise error - | x :: fs -> - match (f x) v with - | Some l -> l - | None -> loop fs in - loop fs - -module Size = struct - let bool = 1 - let int8 = 1 - let uint8 = 1 - let char = 1 - let int16 = 2 - let uint16 = 2 - let uint30 = 4 - let uint32 = 4 - let uint64 = 8 - let int31 = 4 - let int32 = 4 - let int64 = 8 - let float = 8 -end - -type tag_size = [ `Uint8 | `Uint16 ] - -let tag_size = function - | `Uint8 -> Size.uint8 - | `Uint16 -> Size.uint16 - -module Kind = struct - - type t = - [ `Fixed of int - | `Dynamic - | `Variable ] - - type length = - [ `Fixed of int - | `Variable ] - - type enum = - [ `Dynamic - | `Variable ] - - let combine name : t -> t -> t = fun k1 k2 -> - match k1, k2 with - | `Fixed n1, `Fixed n2 -> `Fixed (n1 + n2) - | `Dynamic, `Dynamic | `Fixed _, `Dynamic - | `Dynamic, `Fixed _ -> `Dynamic - | `Variable, `Fixed _ - | (`Dynamic | `Fixed _), `Variable -> `Variable - | `Variable, `Dynamic -> - Printf.ksprintf invalid_arg - "Cannot merge two %s when the left element is of variable length \ - and the right one of dynamic length. \ - You should use the reverse order, or wrap the second one \ - with Data_encoding.dynamic_size." - name - | `Variable, `Variable -> - Printf.ksprintf invalid_arg - "Cannot merge two %s with variable length. \ - You should wrap one of them with Data_encoding.dynamic_size." - name - - let merge : t -> t -> t = fun k1 k2 -> - match k1, k2 with - | `Fixed n1, `Fixed n2 when n1 = n2 -> `Fixed n1 - | `Fixed _, `Fixed _ -> `Dynamic - | `Dynamic, `Dynamic | `Fixed _, `Dynamic - | `Dynamic, `Fixed _ -> `Dynamic - | `Variable, (`Dynamic | `Fixed _) - | (`Dynamic | `Fixed _), `Variable - | `Variable, `Variable -> `Variable - - let merge_list sz : t list -> t = function - | [] -> assert false (* should be rejected by Data_encoding.union *) - | k :: ks -> - match List.fold_left merge k ks with - | `Fixed n -> `Fixed (n + tag_size sz) - | k -> k - -end - -type case_tag = Tag of int | Json_only - -type 'a desc = - | Null : unit desc - | Empty : unit desc - | Ignore : unit desc - | Constant : string -> unit desc - | Bool : bool desc - | Int8 : int desc - | Uint8 : int desc - | Int16 : int desc - | Uint16 : int desc - | Int31 : int desc - | Int32 : Int32.t desc - | Int64 : Int64.t desc - | RangedInt : { minimum : int ; maximum : int } -> int desc - | RangedFloat : { minimum : float ; maximum : float } -> float desc - | Float : float desc - | Bytes : Kind.length -> MBytes.t desc - | String : Kind.length -> string desc - | String_enum : ('a, string * int) Hashtbl.t * 'a array -> 'a desc - | Array : 'a t -> 'a array desc - | List : 'a t -> 'a list desc - | Obj : 'a field -> 'a desc - | Objs : Kind.t * 'a t * 'b t -> ('a * 'b) desc - | Tup : 'a t -> 'a desc - | Tups : Kind.t * 'a t * 'b t -> ('a * 'b) desc - | Union : Kind.t * tag_size * 'a case list -> 'a desc - | Mu : Kind.enum * string * ('a t -> 'a t) -> 'a desc - | Conv : - { proj : ('a -> 'b) ; - inj : ('b -> 'a) ; - encoding : 'b t ; - schema : Json_schema.schema option } -> 'a desc - | Describe : - { title : string option ; - description : string option ; - encoding : 'a t } -> 'a desc - | Def : { name : string ; - encoding : 'a t } -> 'a desc - | Splitted : - { encoding : 'a t ; - json_encoding : 'a Json_encoding.encoding ; - is_obj : bool ; is_tup : bool } -> 'a desc - | Dynamic_size : 'a t -> 'a desc - | Delayed : (unit -> 'a t) -> 'a desc - -and _ field = - | Req : string * 'a t -> 'a field - | Opt : Kind.enum * string * 'a t -> 'a option field - | Dft : string * 'a t * 'a -> 'a field - -and 'a case = - | Case : { name : string option ; - encoding : 'a t ; - proj : ('t -> 'a option) ; - inj : ('a -> 't) ; - tag : case_tag } -> 't case - -and 'a t = { - encoding: 'a desc ; - mutable json_encoding: 'a Json_encoding.encoding option ; -} - -type signed_integer = [ `Int31 | `Int16 | `Int8 ] -type unsigned_integer = [ `Uint30 | `Uint16 | `Uint8 ] -type integer = [ signed_integer | unsigned_integer ] - -let signed_range_to_size min max : [> signed_integer ] = - if min >= ~-128 && max <= 127 - then `Int8 - else if min >= ~-32_768 && max <= 32_767 - then `Int16 - else `Int31 - -(* max should be centered at zero *) -let unsigned_range_to_size max : [> unsigned_integer ] = - if max <= 255 - then `Uint8 - else if max <= 65535 - then `Uint16 - else `Uint30 - -let integer_to_size = function - | `Int31 -> Size.int31 - | `Int16 -> Size.int16 - | `Int8 -> Size.int8 - | `Uint30 -> Size.uint30 - | `Uint16 -> Size.uint16 - | `Uint8 -> Size.uint8 - -let range_to_size ~minimum ~maximum : integer = - if minimum < 0 - then signed_range_to_size minimum maximum - else unsigned_range_to_size (maximum - minimum) - -let enum_size arr = - unsigned_range_to_size (Array.length arr) - -type 'a encoding = 'a t - -let rec classify : type a. a t -> Kind.t = fun e -> - match e.encoding with - (* Fixed *) - | Null -> `Fixed 0 - | Empty -> `Fixed 0 - | Constant _ -> `Fixed 0 - | Bool -> `Fixed Size.bool - | Int8 -> `Fixed Size.int8 - | Uint8 -> `Fixed Size.uint8 - | Int16 -> `Fixed Size.int16 - | Uint16 -> `Fixed Size.uint16 - | Int31 -> `Fixed Size.int31 - | Int32 -> `Fixed Size.int32 - | Int64 -> `Fixed Size.int64 - | RangedInt { minimum ; maximum } -> - `Fixed (integer_to_size @@ range_to_size ~minimum ~maximum) - | Float -> `Fixed Size.float - | RangedFloat _ -> `Fixed Size.float - (* Tagged *) - | Bytes kind -> (kind :> Kind.t) - | String kind -> (kind :> Kind.t) - | String_enum (_, cases) -> - `Fixed (integer_to_size (enum_size cases)) - | Obj (Opt (kind, _, _)) -> (kind :> Kind.t) - | Objs (kind, _, _) -> kind - | Tups (kind, _, _) -> kind - | Union (kind, _, _) -> (kind :> Kind.t) - | Mu (kind, _, _) -> (kind :> Kind.t) - (* Variable *) - | Ignore -> `Variable - | Array _ -> `Variable - | List _ -> `Variable - (* Recursive *) - | Obj (Req (_, encoding)) -> classify encoding - | Obj (Dft (_, encoding, _)) -> classify encoding - | Tup encoding -> classify encoding - | Conv { encoding } -> classify encoding - | Describe { encoding } -> classify encoding - | Def { encoding } -> classify encoding - | Splitted { encoding } -> classify encoding - | Dynamic_size _ -> `Dynamic - | Delayed f -> classify (f ()) - -let make ?json_encoding encoding = { encoding ; json_encoding } - -module Json = struct - - type pair_builder = { - build: 'a 'b. Kind.t -> 'a t -> 'b t -> ('a * 'b) t - } - - exception Parse_error of string - - let wrap_error f = - fun str -> - try f str - with exn -> raise (Json_encoding.Cannot_destruct ([], exn)) - - let int64_encoding = - let open Json_encoding in - union [ - case - int32 - (fun i -> - let j = Int64.to_int32 i in - if Int64.equal (Int64.of_int32 j) i then Some j else None) - Int64.of_int32 ; - case - string - (fun i -> Some (Int64.to_string i)) - Int64.of_string - ] - - let bytes_jsont = - let open Json_encoding in - let schema = - let open Json_schema in - create - { title = None ; - description = None ; - default = None; - enum = None; - kind = String { - pattern = Some "^[a-zA-Z0-9]+$"; - min_length = 0; - max_length = None; - }; - format = None ; - id = None } in - conv ~schema - MBytes.to_hex - (wrap_error MBytes.of_hex) - (conv - (fun (`Hex h) -> h) - (fun h -> `Hex h) - string) - - let rec lift_union : type a. a t -> a t = fun e -> - match e.encoding with - | Conv { proj ; inj ; encoding = e ; schema } -> begin - match lift_union e with - | { encoding = Union (kind, tag, cases) } -> - make @@ - Union (kind, tag, - List.map - (fun (Case { name ; encoding ; proj = proj' ; inj = inj' ; tag }) -> - Case { encoding ; - name ; - proj = (fun x -> proj' (proj x)); - inj = (fun x -> inj (inj' x)) ; - tag }) - cases) - | e -> make @@ Conv { proj ; inj ; encoding = e ; schema } - end - | Objs (p, e1, e2) -> - lift_union_in_pair - { build = fun p e1 e2 -> make @@ Objs (p, e1, e2) } - p e1 e2 - | Tups (p, e1, e2) -> - lift_union_in_pair - { build = fun p e1 e2 -> make @@ Tups (p, e1, e2) } - p e1 e2 - | _ -> e - - and lift_union_in_pair - : type a b. pair_builder -> Kind.t -> a t -> b t -> (a * b) t - = fun b p e1 e2 -> - match lift_union e1, lift_union e2 with - | e1, { encoding = Union (_kind, tag, cases) } -> - make @@ - Union (`Dynamic (* ignored *), tag, - List.map - (fun (Case { name ; encoding = e2 ; proj ; inj ; tag }) -> - Case { encoding = lift_union_in_pair b p e1 e2 ; - name ; - proj = (fun (x, y) -> - match proj y with - | None -> None - | Some y -> Some (x, y)) ; - inj = (fun (x, y) -> (x, inj y)) ; - tag }) - cases) - | { encoding = Union (_kind, tag, cases) }, e2 -> - make @@ - Union (`Dynamic (* ignored *), tag, - List.map - (fun (Case { name ; encoding = e1 ; proj ; inj ; tag }) -> - Case { encoding = lift_union_in_pair b p e1 e2 ; - name ; - proj = (fun (x, y) -> - match proj x with - | None -> None - | Some x -> Some (x, y)) ; - inj = (fun (x, y) -> (inj x, y)) ; - tag }) - cases) - | e1, e2 -> b.build p e1 e2 - - let rec json : type a. a desc -> a Json_encoding.encoding = - let open Json_encoding in - function - | Null -> null - | Empty -> empty - | Constant s -> constant s - | Ignore -> unit - | Int8 -> ranged_int ~minimum:~-(1 lsl 7) ~maximum:((1 lsl 7) - 1) "int8" - | Uint8 -> ranged_int ~minimum:0 ~maximum:((1 lsl 8) - 1) "uint8" - | Int16 -> ranged_int ~minimum:~-(1 lsl 15) ~maximum:((1 lsl 15) - 1) "int16" - | Uint16 -> ranged_int ~minimum:0 ~maximum:((1 lsl 16) - 1) "uint16" - | RangedInt { minimum ; maximum } -> ranged_int ~minimum ~maximum "rangedInt" - | Int31 -> int - | Int32 -> int32 - | Int64 -> int64_encoding - | Bool -> bool - | Float -> float - | RangedFloat { minimum; maximum } -> ranged_float ~minimum ~maximum "rangedFloat" - | String _ -> string (* TODO: check length *) - | Bytes _ -> bytes_jsont (* TODO check length *) - | String_enum (tbl, _) -> string_enum (Hashtbl.fold (fun a (str, _) acc -> (str, a) :: acc) tbl []) - | Array e -> array (get_json e) - | List e -> list (get_json e) - | Obj f -> obj1 (field_json f) - | Objs (_, e1, e2) -> - merge_objs (get_json e1) (get_json e2) - | Tup e -> tup1 (get_json e) - | Tups (_, e1, e2) -> - merge_tups (get_json e1) (get_json e2) - | Conv { proj ; inj ; encoding = e ; schema } -> conv ?schema proj inj (get_json e) - | Describe { title ; description ; encoding = e } -> - describe ?title ?description (get_json e) - | Def { name ; encoding = e } -> def name (get_json e) - | Mu (_, name, self) as ty -> - mu name (fun json_encoding -> get_json @@ self (make ~json_encoding ty)) - | Union (_tag_size, _, cases) -> union (List.map case_json cases) - | Splitted { json_encoding } -> json_encoding - | Dynamic_size e -> get_json e - | Delayed f -> get_json (f ()) - - and field_json - : type a. a field -> a Json_encoding.field = - let open Json_encoding in - function - | Req (name, e) -> req name (get_json e) - | Opt (_, name, e) -> opt name (get_json e) - | Dft (name, e, d) -> dft name (get_json e) d - - and case_json : type a. a case -> a Json_encoding.case = - let open Json_encoding in - function - | Case { encoding = e ; proj ; inj ; _ } -> case (get_json e) proj inj - - and get_json : type a. a t -> a Json_encoding.encoding = fun e -> - match e.json_encoding with - | None -> - let json_encoding = json (lift_union e).encoding in - e.json_encoding <- Some json_encoding ; - json_encoding - | Some json_encoding -> json_encoding - - let convert = get_json - - type path = path_item list - and path_item = - [ `Field of string - (** A field in an object. *) - | `Index of int - (** An index in an array. *) - | `Star - (** Any / every field or index. *) - | `Next - (** The next element after an array. *) ] - - include Json_encoding - - let construct e v = construct (get_json e) v - let destruct e v = destruct (get_json e) v - let schema e = schema (get_json e) - - let cannot_destruct fmt = - Format.kasprintf - (fun msg -> raise (Cannot_destruct ([], Failure msg))) - fmt - - type t = json - - let to_root = function - | `O ctns -> `O ctns - | `A ctns -> `A ctns - | `Null -> `O [] - | oth -> `A [ oth ] - - let to_string ?minify j = Ezjsonm.to_string ?minify (to_root j) - - let pp = Json_repr.(pp (module Ezjsonm)) - - let from_string s = - try Ok (Ezjsonm.from_string s :> json) - with Ezjsonm.Parse_error (_, msg) -> Error msg - - let from_stream (stream: string Lwt_stream.t) = - let buffer = ref "" in - Lwt_stream.filter_map - (fun str -> - buffer := !buffer ^ str ; - try - let json = Ezjsonm.from_string !buffer in - buffer := "" ; - Some (Ok json) - with Ezjsonm.Parse_error _ -> - None) - stream - -end - -module Bson = struct - - type t = Json_repr_bson.bson - - include Json_repr_bson.Json_encoding - - let construct e v = construct (Json.get_json e) v - let destruct e v = destruct (Json.get_json e) v - -end - -module Encoding = struct - - module Fixed = struct - let string n = make @@ String (`Fixed n) - let bytes n = make @@ Bytes (`Fixed n) +module Encoding: sig + type 'a t = 'a Encoding.t + type 'a encoding = 'a t + exception No_case_matched + exception Unexpected_tag of int + exception Duplicated_tag of int + exception Invalid_tag of int * [ `Uint8 | `Uint16 ] + exception Unexpected_enum of string * string list + val null : unit encoding + val empty : unit encoding + val unit : unit encoding + val constant : string -> unit encoding + val int8 : int encoding + val uint8 : int encoding + val int16 : int encoding + val uint16 : int encoding + val int31 : int encoding + val int32 : int32 encoding + val int64 : int64 encoding + val ranged_int : int -> int -> int encoding + val ranged_float : float -> float -> float encoding + val bool : bool encoding + val string : string encoding + val bytes : MBytes.t encoding + val float : float encoding + val option : 'a encoding -> 'a option encoding + val result : 'a encoding -> 'b encoding -> ('a, 'b) result encoding + val string_enum : (string * 'a) list -> 'a encoding + val is_obj : 'a encoding -> bool + val is_tup : 'a encoding -> bool + module Fixed : sig + val string : int -> string encoding + val bytes : int -> MBytes.t encoding end - - module Variable = struct - let string = make @@ String `Variable - let bytes = make @@ Bytes `Variable - let check_not_variable name e = - match classify e with - | `Variable -> - Printf.ksprintf invalid_arg - "Cannot insert variable length element in %s. \ - You should wrap the contents using Data_encoding.dynamic_size." name - | `Dynamic | `Fixed _ -> () - let array e = - check_not_variable "an array" e ; - make @@ Array e - let list e = - check_not_variable "a list" e ; - make @@ List e + module Variable : sig + val string : string encoding + val bytes : MBytes.t encoding + val array : 'a encoding -> 'a array encoding + val list : 'a encoding -> 'a list encoding end - - let dynamic_size e = - make @@ Dynamic_size e - - let delayed f = - make @@ Delayed f - - let null = make @@ Null - let empty = make @@ Empty - let unit = make @@ Ignore - let constant s = make @@ Constant s - let bool = make @@ Bool - let int8 = make @@ Int8 - let uint8 = make @@ Uint8 - let int16 = make @@ Int16 - let uint16 = make @@ Uint16 - let int31 = make @@ Int31 - let int32 = make @@ Int32 - let ranged_int minimum maximum = - let minimum = min minimum maximum - and maximum = max minimum maximum in - if minimum < -(1 lsl 30) || (1 lsl 30) - 1 < maximum then - invalid_arg "Data_encoding.ranged_int" ; - make @@ RangedInt { minimum ; maximum } - let ranged_float minimum maximum = - let minimum = min minimum maximum - and maximum = max minimum maximum in - make @@ RangedFloat { minimum ; maximum } - let int64 = make @@ Int64 - let float = make @@ Float - - let string = dynamic_size Variable.string - let bytes = dynamic_size Variable.bytes - let array e = dynamic_size (Variable.array e) - let list e = dynamic_size (Variable.list e) - - let string_enum = function - | [] -> invalid_arg "data_encoding.string_enum: cannot have zero cases" - | [ _case ] -> invalid_arg "data_encoding.string_enum: cannot have a single case, use constant instead" - | _ :: _ as cases -> - let arr = Array.of_list (List.map snd cases) in - let tbl = Hashtbl.create (Array.length arr) in - List.iteri (fun ind (str, a) -> Hashtbl.add tbl a (str, ind)) cases ; - make @@ String_enum (tbl, arr) - - let conv proj inj ?schema encoding = - make @@ Conv { proj ; inj ; encoding ; schema } - - let describe ?title ?description encoding = - match title, description with - | None, None -> encoding - | _, _ -> make @@ Describe { title ; description ; encoding } - - let def name encoding = make @@ Def { name ; encoding } - - let req ?title ?description n t = - Req (n, describe ?title ?description t) - let opt ?title ?description n encoding = - let kind = - match classify encoding with - | `Variable -> `Variable - | `Fixed _ | `Dynamic -> `Dynamic in - Opt (kind, n, make @@ Describe { title ; description ; encoding }) - let varopt ?title ?description n encoding = - Opt (`Variable, n, make @@ Describe { title ; description ; encoding }) - let dft ?title ?description n t d = - Dft (n, describe ?title ?description t, d) - - let raw_splitted ~json ~binary = - make @@ Splitted { encoding = binary ; - json_encoding = json ; - is_obj = false ; - is_tup = false } - - let rec is_obj : type a. a t -> bool = fun e -> - match e.encoding with - | Obj _ -> true - | Objs _ (* by construction *) -> true - | Conv { encoding = e } -> is_obj e - | Dynamic_size e -> is_obj e - | Union (_,_,cases) -> - List.for_all (fun (Case { encoding = e }) -> is_obj e) cases - | Empty -> true - | Ignore -> true - | Mu (_,_,self) -> is_obj (self e) - | Splitted { is_obj } -> is_obj - | Delayed f -> is_obj (f ()) - | Describe { encoding } -> is_obj encoding - | Def { encoding } -> is_obj encoding - | _ -> false - - let rec is_tup : type a. a t -> bool = fun e -> - match e.encoding with - | Tup _ -> true - | Tups _ (* by construction *) -> true - | Conv { encoding = e } -> is_tup e - | Dynamic_size e -> is_tup e - | Union (_,_,cases) -> - List.for_all (function Case { encoding = e} -> is_tup e) cases - | Mu (_,_,self) -> is_tup (self e) - | Splitted { is_tup } -> is_tup - | Delayed f -> is_tup (f ()) - | Describe { encoding } -> is_tup encoding - | Def { encoding } -> is_tup encoding - | _ -> false - - let splitted ~json ~binary = - make @@ Splitted { encoding = binary ; - json_encoding = Json.convert json ; - is_obj = is_obj json ; - is_tup = is_tup json } - - let json = - let binary = - conv - (fun json -> - Json_repr.convert - (module Json_repr.Ezjsonm) - (module Json_repr_bson.Repr) - json |> - Json_repr_bson.bson_to_bytes |> - Bytes.to_string) - (fun s -> try - Bytes.of_string s |> - Json_repr_bson.bytes_to_bson ~copy:false |> - Json_repr.convert - (module Json_repr_bson.Repr) - (module Json_repr.Ezjsonm) - with - | Json_repr_bson.Bson_decoding_error (msg, _, _) -> - raise (Json.Parse_error msg)) - string in - let json = - Json_encoding.any_ezjson_value in - raw_splitted ~binary ~json - - let json_schema = - conv - Json_schema.to_json - Json_schema.of_json - json - - let raw_merge_objs e1 e2 = - let kind = Kind.combine "objects" (classify e1) (classify e2) in - make @@ Objs (kind, e1, e2) - - let obj1 f1 = make @@ Obj f1 - let obj2 f2 f1 = - raw_merge_objs (obj1 f2) (obj1 f1) - let obj3 f3 f2 f1 = - raw_merge_objs (obj1 f3) (obj2 f2 f1) - let obj4 f4 f3 f2 f1 = - raw_merge_objs (obj2 f4 f3) (obj2 f2 f1) - let obj5 f5 f4 f3 f2 f1 = - raw_merge_objs (obj1 f5) (obj4 f4 f3 f2 f1) - let obj6 f6 f5 f4 f3 f2 f1 = - raw_merge_objs (obj2 f6 f5) (obj4 f4 f3 f2 f1) - let obj7 f7 f6 f5 f4 f3 f2 f1 = - raw_merge_objs (obj3 f7 f6 f5) (obj4 f4 f3 f2 f1) - let obj8 f8 f7 f6 f5 f4 f3 f2 f1 = - raw_merge_objs (obj4 f8 f7 f6 f5) (obj4 f4 f3 f2 f1) - let obj9 f9 f8 f7 f6 f5 f4 f3 f2 f1 = - raw_merge_objs (obj1 f9) (obj8 f8 f7 f6 f5 f4 f3 f2 f1) - let obj10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 = - raw_merge_objs (obj2 f10 f9) (obj8 f8 f7 f6 f5 f4 f3 f2 f1) - - let merge_objs o1 o2 = - if is_obj o1 && is_obj o2 then - raw_merge_objs o1 o2 - else - invalid_arg "Json_encoding.merge_objs" - - let raw_merge_tups e1 e2 = - let kind = Kind.combine "tuples" (classify e1) (classify e2) in - make @@ Tups (kind, e1, e2) - - let tup1 e1 = make @@ Tup e1 - let tup2 e2 e1 = - raw_merge_tups (tup1 e2) (tup1 e1) - let tup3 e3 e2 e1 = - raw_merge_tups (tup1 e3) (tup2 e2 e1) - let tup4 e4 e3 e2 e1 = - raw_merge_tups (tup2 e4 e3) (tup2 e2 e1) - let tup5 e5 e4 e3 e2 e1 = - raw_merge_tups (tup1 e5) (tup4 e4 e3 e2 e1) - let tup6 e6 e5 e4 e3 e2 e1 = - raw_merge_tups (tup2 e6 e5) (tup4 e4 e3 e2 e1) - let tup7 e7 e6 e5 e4 e3 e2 e1 = - raw_merge_tups (tup3 e7 e6 e5) (tup4 e4 e3 e2 e1) - let tup8 e8 e7 e6 e5 e4 e3 e2 e1 = - raw_merge_tups (tup4 e8 e7 e6 e5) (tup4 e4 e3 e2 e1) - let tup9 e9 e8 e7 e6 e5 e4 e3 e2 e1 = - raw_merge_tups (tup1 e9) (tup8 e8 e7 e6 e5 e4 e3 e2 e1) - let tup10 e10 e9 e8 e7 e6 e5 e4 e3 e2 e1 = - raw_merge_tups (tup2 e10 e9) (tup8 e8 e7 e6 e5 e4 e3 e2 e1) - - let merge_tups t1 t2 = - if is_tup t1 && is_tup t2 then - raw_merge_tups t1 t2 - else - invalid_arg "Tezos_serial.Encoding.merge_tups" - - let conv3 ty = - conv - (fun (c, b, a) -> (c, (b, a))) - (fun (c, (b, a)) -> (c, b, a)) - ty - let obj3 f3 f2 f1 = conv3 (obj3 f3 f2 f1) - let tup3 f3 f2 f1 = conv3 (tup3 f3 f2 f1) - let conv4 ty = - conv - (fun (d, c, b, a) -> ((d, c), (b, a))) - (fun ((d, c), (b, a)) -> (d, c, b, a)) - ty - let obj4 f4 f3 f2 f1 = conv4 (obj4 f4 f3 f2 f1) - let tup4 f4 f3 f2 f1 = conv4 (tup4 f4 f3 f2 f1) - let conv5 ty = - conv - (fun (e, d, c, b, a) -> (e, ((d, c), (b, a)))) - (fun (e, ((d, c), (b, a))) -> (e, d, c, b, a)) - ty - let obj5 f5 f4 f3 f2 f1 = conv5 (obj5 f5 f4 f3 f2 f1) - let tup5 f5 f4 f3 f2 f1 = conv5 (tup5 f5 f4 f3 f2 f1) - let conv6 ty = - conv - (fun (f, e, d, c, b, a) -> ((f, e), ((d, c), (b, a)))) - (fun ((f, e), ((d, c), (b, a))) -> (f, e, d, c, b, a)) - ty - let obj6 f6 f5 f4 f3 f2 f1 = conv6 (obj6 f6 f5 f4 f3 f2 f1) - let tup6 f6 f5 f4 f3 f2 f1 = conv6 (tup6 f6 f5 f4 f3 f2 f1) - let conv7 ty = - conv - (fun (g, f, e, d, c, b, a) -> ((g, (f, e)), ((d, c), (b, a)))) - (fun ((g, (f, e)), ((d, c), (b, a))) -> (g, f, e, d, c, b, a)) - ty - let obj7 f7 f6 f5 f4 f3 f2 f1 = conv7 (obj7 f7 f6 f5 f4 f3 f2 f1) - let tup7 f7 f6 f5 f4 f3 f2 f1 = conv7 (tup7 f7 f6 f5 f4 f3 f2 f1) - let conv8 ty = - conv (fun (h, g, f, e, d, c, b, a) -> - (((h, g), (f, e)), ((d, c), (b, a)))) - (fun (((h, g), (f, e)), ((d, c), (b, a))) -> - (h, g, f, e, d, c, b, a)) - ty - let obj8 f8 f7 f6 f5 f4 f3 f2 f1 = conv8 (obj8 f8 f7 f6 f5 f4 f3 f2 f1) - let tup8 f8 f7 f6 f5 f4 f3 f2 f1 = conv8 (tup8 f8 f7 f6 f5 f4 f3 f2 f1) - let conv9 ty = - conv - (fun (i, h, g, f, e, d, c, b, a) -> - (i, (((h, g), (f, e)), ((d, c), (b, a))))) - (fun (i, (((h, g), (f, e)), ((d, c), (b, a)))) -> - (i, h, g, f, e, d, c, b, a)) - ty - let obj9 f9 f8 f7 f6 f5 f4 f3 f2 f1 = - conv9 (obj9 f9 f8 f7 f6 f5 f4 f3 f2 f1) - let tup9 f9 f8 f7 f6 f5 f4 f3 f2 f1 = - conv9 (tup9 f9 f8 f7 f6 f5 f4 f3 f2 f1) - let conv10 ty = - conv - (fun (j, i, h, g, f, e, d, c, b, a) -> - ((j, i), (((h, g), (f, e)), ((d, c), (b, a))))) - (fun ((j, i), (((h, g), (f, e)), ((d, c), (b, a)))) -> - (j, i, h, g, f, e, d, c, b, a)) - ty - let obj10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 = - conv10 (obj10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1) - let tup10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 = - conv10 (tup10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1) - - let check_cases tag_size cases = - if cases = [] then - invalid_arg "Data_encoding.union: empty list of cases." ; - let max_tag = - match tag_size with - | `Uint8 -> 256 - | `Uint16 -> 256 * 256 in - ignore @@ - List.fold_left - (fun others (Case { tag }) -> - match tag with - | Json_only -> others - | Tag tag -> - if List.mem tag others then raise (Duplicated_tag tag) ; - if tag < 0 || max_tag <= tag then - raise (Invalid_tag (tag, tag_size)) ; - tag :: others - ) - [] cases - - let union ?(tag_size = `Uint8) cases = - check_cases tag_size cases ; - let kinds = - List.map (fun (Case { encoding }) -> classify encoding) cases in - let kind = Kind.merge_list tag_size kinds in - make @@ Union (kind, tag_size, cases) - let case ?name tag encoding proj inj = Case { name ; encoding ; proj ; inj ; tag } - let option ty = - union - ~tag_size:`Uint8 - [ case (Tag 1) ty - ~name:"Some" - (fun x -> x) - (fun x -> Some x) ; - case (Tag 0) empty - ~name:"None" - (function None -> Some () | Some _ -> None) - (fun () -> None) ; - ] - let mu name self = - let kind = - try - match classify (self (make @@ Mu (`Dynamic, name, self))) with - | `Fixed _ | `Dynamic -> `Dynamic - | `Variable -> raise Exit - with Exit | _ (* TODO variability error *) -> - ignore @@ classify (self (make @@ Mu (`Variable, name, self))) ; - `Variable in - make @@ Mu (kind, name, self) - - let result ok_enc error_enc = - union - ~tag_size:`Uint8 - [ case (Tag 1) ok_enc - (function Ok x -> Some x | Error _ -> None) - (fun x -> Ok x) ; - case (Tag 0) error_enc - (function Ok _ -> None | Error x -> Some x) - (fun x -> Error x) ; - ] - + val dynamic_size : 'a encoding -> 'a encoding + val delayed : (unit -> 'a encoding) -> 'a encoding + type 'a field + val req : + ?title:string -> ?description:string -> + string -> 't encoding -> 't field + val opt : + ?title:string -> ?description:string -> + string -> 't encoding -> 't option field + val varopt : + ?title:string -> ?description:string -> + string -> 't encoding -> 't option field + val dft : + ?title:string -> ?description:string -> + string -> 't encoding -> 't -> 't field + val obj1 : + 'f1 field -> 'f1 encoding + val obj2 : + 'f1 field -> 'f2 field -> ('f1 * 'f2) encoding + val obj3 : + 'f1 field -> 'f2 field -> 'f3 field -> ('f1 * 'f2 * 'f3) encoding + val obj4 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> + ('f1 * 'f2 * 'f3 * 'f4) encoding + val obj5 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5) encoding + val obj6 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6) encoding + val obj7 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7) encoding + val obj8 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> 'f8 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8) encoding + val obj9 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> 'f8 field -> 'f9 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9) encoding + val obj10 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> 'f8 field -> 'f9 field -> 'f10 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9 * 'f10) encoding + val tup1 : + 'f1 encoding -> + 'f1 encoding + val tup2 : + 'f1 encoding -> 'f2 encoding -> + ('f1 * 'f2) encoding + val tup3 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> + ('f1 * 'f2 * 'f3) encoding + val tup4 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + ('f1 * 'f2 * 'f3 * 'f4) encoding + val tup5 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5) encoding + val tup6 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6) encoding + val tup7 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7) encoding + val tup8 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> 'f8 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8) encoding + val tup9 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> 'f8 encoding -> + 'f9 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9) encoding + val tup10 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> 'f8 encoding -> + 'f9 encoding -> 'f10 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9 * 'f10) encoding + val merge_objs : 'o1 encoding -> 'o2 encoding -> ('o1 * 'o2) encoding + val merge_tups : 'a1 encoding -> 'a2 encoding -> ('a1 * 'a2) encoding + val array : 'a encoding -> 'a array encoding + val list : 'a encoding -> 'a list encoding + val assoc : 'a encoding -> (string * 'a) list encoding + type 't case + type case_tag = Tag of int | Json_only + val case : + ?name:string -> + case_tag -> + 'a encoding -> ('t -> 'a option) -> ('a -> 't) -> 't case + val union : + ?tag_size:[ `Uint8 | `Uint16 ] -> 't case list -> 't encoding + val describe : + ?title:string -> ?description:string -> + 't encoding ->'t encoding + val def : string -> 'a encoding -> 'a encoding + val conv : + ('a -> 'b) -> ('b -> 'a) -> + ?schema:Json_schema.schema -> + 'b encoding -> 'a encoding + val mu : string -> ('a encoding -> 'a encoding) -> 'a encoding + val classify : 'a encoding -> [ `Fixed of int | `Dynamic | `Variable ] + val splitted : json:'a encoding -> binary:'a encoding -> 'a encoding +end = struct + include Encoding + let splitted ~json ~binary = raw_splitted ~json:(Json.convert json) ~binary let assoc enc = - let json = Json_encoding.assoc (Json.get_json enc) in + let json = Json_encoding.assoc (Json.convert enc) in let binary = list (tup2 string enc) in raw_splitted ~json ~binary end include Encoding -module Binary = struct - - type 'l writer = { - write: 'a. 'a t -> 'a -> MBytes.t -> int -> int ; - } - - type 'l reader = { - read: 'a. 'a t -> MBytes.t -> int -> int -> (int * 'a) ; - } - - let rec length : type x. x t -> x -> int = fun e -> - match e.encoding with - (* Fixed *) - | Null -> fun _ -> 0 - | Empty -> fun _ -> 0 - | Constant _ -> fun _ -> 0 - | Bool -> fun _ -> Size.bool - | Int8 -> fun _ -> Size.int8 - | Uint8 -> fun _ -> Size.uint8 - | Int16 -> fun _ -> Size.int16 - | Uint16 -> fun _ -> Size.uint16 - | Int31 -> fun _ -> Size.int31 - | Int32 -> fun _ -> Size.int32 - | Int64 -> fun _ -> Size.int64 - | RangedInt { minimum ; maximum } -> - fun _ -> integer_to_size @@ range_to_size ~minimum ~maximum - | Float -> fun _ -> Size.float - | RangedFloat _ -> fun _ -> Size.float - | Bytes `Fixed n -> fun _ -> n - | String `Fixed n -> fun _ -> n - | String_enum (_, arr) -> - fun _ -> integer_to_size @@ enum_size arr - | Objs (`Fixed n, _, _) -> fun _ -> n - | Tups (`Fixed n, _, _) -> fun _ -> n - | Union (`Fixed n, _, _) -> fun _ -> n - (* Dynamic *) - | Objs (`Dynamic, e1, e2) -> - let length1 = length e1 in - let length2 = length e2 in - fun (v1, v2) -> length1 v1 + length2 v2 - | Tups (`Dynamic, e1, e2) -> - let length1 = length e1 in - let length2 = length e2 in - fun (v1, v2) -> length1 v1 + length2 v2 - | Union (`Dynamic, sz, cases) -> - let tag_size = tag_size sz in - let case_length (Case { encoding = e ; proj }) = - let length v = tag_size + length e v in - fun v -> Option.map ~f:length (proj v) in - apply (List.map case_length cases) - | Mu (`Dynamic, _name, self) -> - fun v -> length (self e) v - | Obj (Opt (`Dynamic, _, e)) -> - let length = length e in - (function None -> 1 | Some x -> 1 + length x) - (* Variable *) - | Ignore -> fun _ -> 0 - | Bytes `Variable -> MBytes.length - | String `Variable -> String.length - | Array e -> - let length = length e in - fun v -> - Array.fold_left - (fun acc v -> length v + acc) - 0 v - | List e -> - let length = length e in - fun v -> - List.fold_left - (fun acc v -> length v + acc) - 0 v - | Objs (`Variable, e1, e2) -> - let length1 = length e1 in - let length2 = length e2 in - fun (v1, v2) -> length1 v1 + length2 v2 - | Tups (`Variable, e1, e2) -> - let length1 = length e1 - and length2 = length e2 in - fun (v1, v2) -> length1 v1 + length2 v2 - | Obj (Opt (`Variable, _, e)) -> - let length = length e in - (function None -> 0 | Some x -> length x) - | Union (`Variable, sz, cases) -> - let rec case_lengths json_only_cases acc = function - | [] -> (List.rev acc, json_only_cases) - | Case { tag = Json_only } :: tl -> case_lengths true acc tl - | Case { encoding = e ; proj ; tag = Tag _ } :: tl -> - let length v = tag_size sz + length e v in - case_lengths - json_only_cases - ((fun v -> - match proj v with - | None -> None - | Some v -> Some (length v)) :: acc) - tl in - let cases, json_only = case_lengths false [] cases in - apply - ~error:(if json_only - then Failure "No case matched, but JSON only cases were present in union" - else No_case_matched) - cases - | Mu (`Variable, _name, self) -> - fun v -> length (self e) v - (* Recursive*) - | Obj (Req (_, e)) -> length e - | Obj (Dft (_, e, _)) -> length e - | Tup e -> length e - | Conv { encoding = e ; proj } -> - let length = length e in - fun v -> length (proj v) - | Describe { encoding = e } -> length e - | Def { encoding = e } -> length e - | Splitted { encoding = e } -> length e - | Dynamic_size e -> - let length = length e in - fun v -> Size.int32 + length v - | Delayed f -> length (f ()) - - (** Writer *) - - module Writer = struct - - let int8 v buf ofs = - if (v < - (1 lsl 7) || v >= 1 lsl 7) then - invalid_arg "Data_encoding.Binary.Writer.int8" ; - MBytes.set_int8 buf ofs v; - ofs + Size.int8 - - let uint8 v buf ofs = - if (v < 0 || v >= 1 lsl 8) then - invalid_arg "Data_encoding.Binary.Writer.uint8" ; - MBytes.set_int8 buf ofs v; - ofs + Size.uint8 - - let char v buf ofs = - MBytes.set_char buf ofs v; - ofs + Size.char - - let bool v buf ofs = - uint8 (if v then 255 else 0) buf ofs - - let int16 v buf ofs = - if (v < - (1 lsl 15) || v >= 1 lsl 15) then - invalid_arg "Data_encoding.Binary.Writer.int16" ; - MBytes.set_int16 buf ofs v; - ofs + Size.int16 - - let uint16 v buf ofs = - if (v < 0 || v >= 1 lsl 16) then - invalid_arg "Data_encoding.Binary.Writer.uint16" ; - MBytes.set_int16 buf ofs v; - ofs + Size.uint16 - - let uint30 v buf ofs = - if v < 0 || (Sys.int_size > 31 && v >= 1 lsl 30) then - invalid_arg "Data_encoding.Binary.Writer.uint30" ; - MBytes.set_int32 buf ofs (Int32.of_int v); - ofs + Size.uint30 - - let int31 v buf ofs = - if Sys.int_size > 31 && (v < ~- (1 lsl 30) || v >= 1 lsl 30) then - invalid_arg "Data_encoding.Binary.Writer.int31" ; - MBytes.set_int32 buf ofs (Int32.of_int v); - ofs + Size.int31 - - let int32 v buf ofs = - MBytes.set_int32 buf ofs v; - ofs + Size.int32 - - let int64 v buf ofs = - MBytes.set_int64 buf ofs v; - ofs + Size.int64 - - (** write a float64 (double) **) - let float v buf ofs = - (*Here, float means float64, which is written using MBytes.set_double !!*) - MBytes.set_double buf ofs v; - ofs + Size.float - - let fixed_kind_bytes length s buf ofs = - MBytes.blit s 0 buf ofs length; - ofs + length - - let variable_length_bytes s buf ofs = - let length = MBytes.length s in - MBytes.blit s 0 buf ofs length ; - ofs + length - - let fixed_kind_string length s buf ofs = - if String.length s <> length then invalid_arg "fixed_kind_string"; - MBytes.blit_from_string s 0 buf ofs length; - ofs + length - - let variable_length_string s buf ofs = - let length = String.length s in - MBytes.blit_from_string s 0 buf ofs length ; - ofs + length - - let objs w1 w2 (v1,v2) buf ofs = - w1 v1 buf ofs |> w2 v2 buf - - let array w a buf ofs = - Array.fold_left (fun ofs v -> w v buf ofs) ofs a - - let list w l buf ofs = - List.fold_left (fun ofs v -> w v buf ofs) ofs l - - let conv proj w v buf ofs = - w (proj v) buf ofs - - let write_tag = function - | `Uint8 -> uint8 - | `Uint16 -> uint16 - - let union w sz cases = - let writes_case = function - | Case { tag = Json_only } -> None - | Case { encoding = e ; proj ; tag = Tag tag } -> - let write = w.write e in - let write v buf ofs = - write_tag sz tag buf ofs |> write v buf in - Some (fun v -> - match proj v with - | None -> None - | Some v -> Some (write v)) in - apply (TzList.filter_map writes_case cases) - - end - - module BufferedWriter = struct - - let int8 v buf = - if (v < - (1 lsl 7) || v >= 1 lsl 7) then - invalid_arg "Data_encoding.Binary.Writer.int8" ; - MBytes_buffer.write_int8 buf v - - let uint8 v buf = - if (v < 0 || v >= 1 lsl 8) then - invalid_arg "Data_encoding.Binary.Writer.uint8" ; - MBytes_buffer.write_int8 buf v - - let char v buf = - MBytes_buffer.write_char buf v - - let bool v buf = - uint8 (if v then 255 else 0) buf - - let int16 v buf = - if (v < - (1 lsl 15) || v >= 1 lsl 15) then - invalid_arg "Data_encoding.Binary.Writer.int16" ; - MBytes_buffer.write_int16 buf v - - let uint16 v buf = - if (v < 0 || v >= 1 lsl 16) then - invalid_arg "Data_encoding.Binary.Writer.uint16" ; - MBytes_buffer.write_int16 buf v - - let uint30 v buf = - if v < 0 || (Sys.int_size > 31 && v >= 1 lsl 30) then - invalid_arg "Data_encoding.Binary.Writer.uint30" ; - MBytes_buffer.write_int32 buf (Int32.of_int v) - - let int31 v buf = - if Sys.int_size > 31 && (v < ~- (1 lsl 30) || v >= 1 lsl 30) then - invalid_arg "Data_encoding.Binary.Writer.int31" ; - MBytes_buffer.write_int32 buf (Int32.of_int v) - - let int32 v buf = - MBytes_buffer.write_int32 buf v - - let int64 v buf = - MBytes_buffer.write_int64 buf v - - (** write a float64 (double) **) - let float v buf = - MBytes_buffer.write_double buf v - - let fixed_kind_bytes length s buf = - MBytes_buffer.write_mbytes buf s 0 length - - let variable_length_bytes s buf = - let length = MBytes.length s in - MBytes_buffer.write_mbytes buf s 0 length - - let fixed_kind_string length s buf = - if String.length s <> length then invalid_arg "fixed_kind_string"; - MBytes_buffer.write_string_data buf s - - let variable_length_string s buf = - MBytes_buffer.write_string_data buf s - - let write_tag = function - | `Uint8 -> uint8 - | `Uint16 -> uint16 - - end - - let rec assoc_snd target = function - | [] -> raise No_case_matched - | (value, hd) :: tl -> - if hd = target - then value - else assoc_snd target tl - - let get_string_enum_case tbl v = - try - snd (Hashtbl.find tbl v) - with _ -> - raise No_case_matched - - let rec write_rec - : type a. a t -> a -> MBytes.t -> int -> int = fun e -> - let open Writer in - match e.encoding with - | Null -> (fun () _buf ofs -> ofs) - | Empty -> (fun () _buf ofs -> ofs) - | Constant _ -> (fun () _buf ofs -> ofs) - | Ignore -> (fun () _buf ofs -> ofs) - | Bool -> bool - | Int8 -> int8 - | Uint8 -> uint8 - | Int16 -> int16 - | Uint16 -> uint16 - | Int31 -> int31 - | Int32 -> int32 - | Int64 -> int64 - | RangedInt { minimum ; maximum } -> - fun v -> - begin - if v < minimum || v > maximum - then invalid_arg (Printf.sprintf "Integer %d not in range [%d, %d]." v minimum maximum) ; - let v = if minimum >= 0 then v - minimum else v in - match range_to_size ~minimum ~maximum with - | `Uint8 -> uint8 v - | `Uint16 -> uint16 v - | `Uint30 -> uint30 v - | `Int8 -> int8 v - | `Int16 -> int16 v - | `Int31 -> int31 v - end - | Float -> float - | RangedFloat { minimum ; maximum } -> - fun v -> - if v < minimum || v > maximum - then invalid_arg (Printf.sprintf "Float %f not in range [%f, %f]." v minimum maximum) ; - float v - | Bytes (`Fixed n) -> fixed_kind_bytes n - | String (`Fixed n) -> fixed_kind_string n - | Bytes `Variable -> variable_length_bytes - | String `Variable -> variable_length_string - | Array t -> array (write_rec t) - | List t -> list (write_rec t) - | String_enum (tbl, arr) -> - (fun v -> - let value = get_string_enum_case tbl v in - match enum_size arr with - | `Uint30 -> uint30 value - | `Uint16 -> uint16 value - | `Uint8 -> uint8 value) - | Obj (Req (_, e)) -> write_rec e - | Obj (Opt (`Dynamic, _, e)) -> - let write = write_rec e in - (function None -> int8 0 - | Some x -> fun buf ofs -> int8 1 buf ofs |> write x buf) - | Obj (Opt (`Variable, _, e)) -> - let write = write_rec e in - (function None -> fun _buf ofs -> ofs - | Some x -> write x) - | Obj (Dft (_, e, _)) -> write_rec e - | Objs (_, e1, e2) -> - objs (write_rec e1) (write_rec e2) - | Tup e -> write_rec e - | Tups (_, e1, e2) -> - objs (write_rec e1) (write_rec e2) - | Conv { encoding = e; proj } -> conv proj (write_rec e) - | Describe { encoding = e } -> write_rec e - | Def { encoding = e } -> write_rec e - | Splitted { encoding = e } -> write_rec e - | Union (_, sz, cases) -> union { write = write_rec } sz cases - | Mu (_, _, self) -> fun v buf ofs -> write_rec (self e) v buf ofs - | Dynamic_size e -> - let length = length e - and write = write_rec e in - fun v buf ofs -> - int32 (Int32.of_int @@ length v) buf ofs |> write v buf - | Delayed f -> write_rec (f ()) - - let rec write_rec_buffer - : type a. a encoding -> a -> MBytes_buffer.t -> unit = - fun encoding value buffer -> - let open BufferedWriter in - match encoding.encoding with - | Null -> () - | Empty -> () - | Constant _ -> () - | Ignore -> () - | Bool -> bool value buffer - | Int8 -> int8 value buffer - | Uint8 -> uint8 value buffer - | Int16 -> int16 value buffer - | Uint16 -> uint16 value buffer - | Int31 -> int31 value buffer - | Int32 -> int32 value buffer - | Int64 -> int64 value buffer - | Float -> float value buffer - | Bytes (`Fixed n) -> fixed_kind_bytes n value buffer - | String (`Fixed n) -> fixed_kind_string n value buffer - | Bytes `Variable -> variable_length_bytes value buffer - | String `Variable -> variable_length_string value buffer - | Array t -> Array.iter (fun x -> write_rec_buffer t x buffer) value - | List t -> List.iter (fun x -> write_rec_buffer t x buffer) value - | RangedInt { minimum ; maximum } -> - if value < minimum || value > maximum - then invalid_arg (Printf.sprintf "Integer %d not in range [%d, %d]." - value minimum maximum) ; - let value = if minimum >= 0 then value - minimum else value in - begin - match range_to_size ~minimum ~maximum with - | `Uint30 -> uint30 value buffer - | `Uint16 -> uint16 value buffer - | `Uint8 -> uint8 value buffer - | `Int8 -> int8 value buffer - | `Int16 -> int16 value buffer - | `Int31 -> int31 value buffer - end - | RangedFloat { minimum ; maximum } -> - if value < minimum || value > maximum - then invalid_arg (Printf.sprintf "Float %f not in range [%f, %f]." - value minimum maximum) ; - float value buffer - | String_enum (tbl, arr) -> - (match enum_size arr with - | `Uint30 -> BufferedWriter.uint30 - | `Uint16 -> BufferedWriter.uint16 - | `Uint8 -> BufferedWriter.uint8) - (get_string_enum_case tbl value) - buffer - | Obj (Req (_, e)) -> write_rec_buffer e value buffer - | Obj (Opt (`Dynamic, _, e)) -> - (match value with - | None -> int8 0 buffer - | Some x -> - begin - int8 1 buffer ; - write_rec_buffer e x buffer - end) - | Obj (Opt (`Variable, _, e)) -> - (match value with - | None -> () - | Some x -> write_rec_buffer e x buffer) - | Obj (Dft (_, e, _)) -> write_rec_buffer e value buffer - | Objs (_, e1, e2) -> - let v1, v2 = value in - write_rec_buffer e1 v1 buffer ; - write_rec_buffer e2 v2 buffer - | Tup e -> write_rec_buffer e value buffer - | Tups (_, e1, e2) -> - let v1, v2 = value in - write_rec_buffer e1 v1 buffer ; - write_rec_buffer e2 v2 buffer - | Conv { encoding = e; proj } -> - write_rec_buffer e (proj value) buffer - | Describe { encoding = e } -> write_rec_buffer e value buffer - | Def { encoding = e } -> write_rec_buffer e value buffer - | Splitted { encoding = e } -> write_rec_buffer e value buffer - | Union (_, sz, cases) -> - let rec write_case = function - | [] -> raise No_case_matched - | Case { tag = Json_only } :: tl -> write_case tl - | Case { encoding = e ; proj ; tag = Tag tag } :: tl -> - begin - match proj value with - | None -> write_case tl - | Some data -> - write_tag sz tag buffer ; - write_rec_buffer e data buffer - end in - write_case cases - | Mu (_, _, self) -> - write_rec_buffer (self encoding) value buffer - | Dynamic_size e -> - MBytes_buffer.write_sized buffer (fun () -> write_rec_buffer e value buffer) - | Delayed f -> write_rec_buffer (f ()) value buffer - - let write t v buf ofs = - try Some (write_rec t v buf ofs) - with _ -> None - - let to_bytes t v = - let bytes = MBytes_buffer.create () in - write_rec_buffer t v bytes ; - MBytes_buffer.to_mbytes bytes - - let to_bytes_list ?(copy_blocks=false) block_sz t v = - assert (block_sz > 0); - let bytes = to_bytes t v in (* call to generic function to_bytes *) - let length = MBytes.length bytes in - if length <= block_sz then - [bytes] (* if the result fits in the given block_sz *) - else - let may_copy = if copy_blocks then MBytes.copy else fun t -> t in - let nb_full = length / block_sz in (* nb of blocks of size block_sz *) - let sz_full = nb_full * block_sz in (* size of the full part *) - let acc = (* eventually init acc with a non-full block *) - if sz_full = length then [] - else [may_copy (MBytes.sub bytes sz_full (length - sz_full))] - in - let rec split_full_blocks curr_upper_limit acc = - let start = curr_upper_limit - block_sz in - assert (start >= 0); - (* copy the block [ start, curr_upper_limit [ of size block_sz *) - let acc = (may_copy (MBytes.sub bytes start block_sz)) :: acc in - if start = 0 then acc else split_full_blocks start acc - in - split_full_blocks sz_full acc - - (** Reader *) - - module Reader = struct - - let int8 buf ofs _len = - ofs + Size.int8, MBytes.get_int8 buf ofs - - let uint8 buf ofs _len = - ofs + Size.uint8, MBytes.get_uint8 buf ofs - - let char buf ofs _len = - ofs + Size.char, MBytes.get_char buf ofs - - let bool buf ofs len = - let ofs, v = int8 buf ofs len in - ofs, v <> 0 - - let int16 buf ofs _len = - ofs + Size.int16, MBytes.get_int16 buf ofs - - let uint16 buf ofs _len = - ofs + Size.uint16, MBytes.get_uint16 buf ofs - - let uint30 buf ofs _len = - let v = Int32.to_int (MBytes.get_int32 buf ofs) in - if v < 0 then - failwith "Data_encoding.Binary.Reader.uint30: invalid data." ; - ofs + Size.uint30, v - - let int31 buf ofs _len = - ofs + Size.int31, Int32.to_int (MBytes.get_int32 buf ofs) - - let int32 buf ofs _len = - ofs + Size.int32, MBytes.get_int32 buf ofs - - let int64 buf ofs _len = - ofs + Size.int64, MBytes.get_int64 buf ofs - - (** read a float64 (double) **) - let float buf ofs _len = - (*Here, float means float64, which is read using MBytes.get_double !!*) - ofs + Size.float, MBytes.get_double buf ofs - - let int_of_int32 i = - let i' = Int32.to_int i in - let i'' = Int32.of_int i' in - if i'' = i then - i' - else - invalid_arg "int_of_int32 overflow" - - let fixed_length_bytes length buf ofs _len = - let s = MBytes.sub buf ofs length in - ofs + length, s - - let fixed_length_string length buf ofs _len = - let s = MBytes.substring buf ofs length in - ofs + length, s - - let seq r1 r2 buf ofs len = - let ofs', v1 = r1 buf ofs len in - let ofs'', v2 = r2 buf ofs' (len - (ofs' - ofs)) in - ofs'', (v1, v2) - - let varseq r e1 e2 buf ofs len = - let k1 = classify e1 - and k2 = classify e2 in - match k1, k2 with - | (`Dynamic | `Fixed _), `Variable -> - let ofs', v1 = r.read e1 buf ofs len in - let ofs'', v2 = r.read e2 buf ofs' (len - (ofs' - ofs)) in - ofs'', (v1, v2) - | `Variable, `Fixed n -> - let ofs', v1 = r.read e1 buf ofs (len - n) in - let ofs'', v2 = r.read e2 buf ofs' n in - ofs'', (v1, v2) - | _ -> assert false (* Should be rejected by Kind.combine *) - - let list read buf ofs len = - let rec loop acc ofs len = - assert (len >= 0); - if len <= 0 - then ofs, List.rev acc - else - let ofs', v = read buf ofs len in - assert (ofs' > ofs); - loop (v :: acc) ofs' (len - (ofs' - ofs)) - in - loop [] ofs len - - let array read buf ofs len = - let ofs, l = list read buf ofs len in - ofs, Array.of_list l - - let conv inj r buf ofs len = - let ofs, v = r buf ofs len in - ofs, inj v - - let read_tag = function - | `Uint8 -> uint8 - | `Uint16 -> uint16 - - let union r sz cases = - let read_cases = - TzList.filter_map - (function - | (Case { tag = Json_only }) -> None - | (Case { encoding = e ; inj ; tag = Tag tag }) -> - let read = r.read e in - Some (tag, fun len buf ofs -> - let ofs, v = read len buf ofs in - ofs, inj v)) - cases in - fun buf ofs len -> - let ofs, tag = read_tag sz buf ofs len in - try List.assoc tag read_cases buf ofs (len - tag_size sz) - with Not_found -> raise (Unexpected_tag tag) - - end - - let rec read_rec : type a. a t-> MBytes.t -> int -> int -> int * a = fun e -> - let open Reader in - match e.encoding with - | Null -> (fun _buf ofs _len -> ofs, ()) - | Empty -> (fun _buf ofs _len -> ofs, ()) - | Constant _ -> (fun _buf ofs _len -> ofs, ()) - | Ignore -> (fun _buf ofs len -> ofs + len, ()) - | Bool -> bool - | Int8 -> int8 - | Uint8 -> uint8 - | Int16 -> int16 - | Uint16 -> uint16 - | Int31 -> int31 - | Int32 -> int32 - | Int64 -> int64 - | RangedInt { minimum ; maximum } -> - (fun buf ofs alpha -> - let ofs, value = - match range_to_size ~minimum ~maximum with - | `Int8 -> int8 buf ofs alpha - | `Int16 -> int16 buf ofs alpha - | `Int31 -> int31 buf ofs alpha - | `Uint8 -> uint8 buf ofs alpha - | `Uint16 -> uint16 buf ofs alpha - | `Uint30 -> uint30 buf ofs alpha in - let value = if minimum > 0 then value + minimum else value in - if value < minimum || value > maximum - then raise (Int_out_of_range (value, minimum, maximum)) ; - (ofs, value)) - | Float -> float - | RangedFloat { minimum ; maximum } -> - (fun buf ofs len -> - let offset, value = float buf ofs len in - if value < minimum || value > maximum - then raise (Float_out_of_range (value, minimum, maximum)) ; - (offset, value)) - | Bytes (`Fixed n) -> fixed_length_bytes n - | String (`Fixed n) -> fixed_length_string n - | Bytes `Variable -> fun buf ofs len -> fixed_length_bytes len buf ofs len - | String `Variable -> fun buf ofs len -> fixed_length_string len buf ofs len - | String_enum (_, arr) -> begin - fun buf ofs a -> - let ofs, ind = - match enum_size arr with - | `Uint8 -> uint8 buf ofs a - | `Uint16 -> uint16 buf ofs a - | `Uint30 -> uint30 buf ofs a in - if ind >= Array.length arr - then raise No_case_matched - else (ofs, arr.(ind)) - end - | Array e -> array (read_rec e) - | List e -> list (read_rec e) - | Obj (Req (_, e)) -> read_rec e - | Obj (Opt (`Dynamic, _, t)) -> - let read = read_rec t in - (fun buf ofs len -> - let ofs, v = int8 buf ofs len in - if v = 0 then ofs, None - else let ofs, v = read buf ofs (len - Size.int8) in ofs, Some v) - | Obj (Opt (`Variable, _, t)) -> - let read = read_rec t in - (fun buf ofs len -> - if len = 0 then ofs, None - else - let ofs', v = read buf ofs len in - assert (ofs' = ofs + len) ; - ofs + len, Some v) - | Obj (Dft (_, e, _)) -> read_rec e - | Objs ((`Fixed _ | `Dynamic), e1, e2) -> - seq (read_rec e1) (read_rec e2) - | Objs (`Variable, e1, e2) -> - varseq { read = fun t -> read_rec t } e1 e2 - | Tup e -> read_rec e - | Tups ((`Fixed _ | `Dynamic), e1, e2) -> - seq (read_rec e1) (read_rec e2) - | Tups (`Variable, e1, e2) -> - varseq { read = fun t -> read_rec t } e1 e2 - | Conv { inj ; encoding = e } -> conv inj (read_rec e) - | Describe { encoding = e } -> read_rec e - | Def { encoding = e } -> read_rec e - | Splitted { encoding = e } -> read_rec e - | Union (_, sz, cases) -> - union { read = fun t -> read_rec t } sz cases - | Mu (_, _, self) -> fun buf ofs len -> read_rec (self e) buf ofs len - | Dynamic_size e -> - let read = read_rec e in - fun buf ofs len -> - let ofs, sz = int32 buf ofs len in - let sz = Int32.to_int sz in - if sz < 0 then raise (Invalid_size sz); - read buf ofs sz - | Delayed f -> read_rec (f ()) - - let read t buf ofs len = - try Some (read_rec t buf ofs len) - with _ -> None - let write = write - let of_bytes_exn ty buf = - let len = MBytes.length buf in - let read_len, r = read_rec ty buf 0 len in - if read_len <> len then - failwith "Data_encoding.Binary.of_bytes_exn: remainig data" ; - r - let of_bytes ty buf = - try Some (of_bytes_exn ty buf) - with _ -> None - let to_bytes = to_bytes - - let length = length - - let fixed_length e = - match classify e with - | `Fixed n -> Some n - | `Dynamic | `Variable -> None - let fixed_length_exn e = - match fixed_length e with - | Some n -> n - | None -> invalid_arg "Data_encoding.Binary.fixed_length_exn" - - - (* Facilities to decode streams of binary data *) - +module Json: sig + type json = + [ `O of (string * json) list + | `Bool of bool + | `Float of float + | `A of json list + | `Null + | `String of string ] + type t = json + type schema = Json_schema.schema + val encoding : json Encoding.t + val schema_encoding : schema Encoding.t + val convert : 'a Encoding.t -> 'a Json_encoding.encoding + val schema : 'a Encoding.t -> schema + val construct : 't Encoding.t -> 't -> json + val destruct : 't Encoding.t -> json -> 't + type path = path_item list + and path_item = + [ `Field of string + | `Index of int + | `Star + | `Next ] + exception Cannot_destruct of (path * exn) + exception Unexpected of string * string + exception No_case_matched of exn list + exception Bad_array_size of int * int + exception Missing_field of string + exception Unexpected_field of string + val print_error : + ?print_unknown: (Format.formatter -> exn -> unit) -> + Format.formatter -> exn -> unit + val cannot_destruct : ('a, Format.formatter, unit, 'b) format4 -> 'a + val wrap_error : ('a -> 'b) -> 'a -> 'b + val from_string : string -> (json, string) result + val from_stream : string Lwt_stream.t -> (json, string) result Lwt_stream.t + val to_string : ?minify:bool -> json -> string + val pp : Format.formatter -> json -> unit +end = Json +module Bson: sig + type bson = Json_repr_bson.bson + type t = bson + val construct : 't encoding -> 't -> bson + val destruct : 't encoding -> bson -> 't +end = Bson +module Binary: sig + val length : 'a encoding -> 'a -> int + val read : 'a encoding -> MBytes.t -> int -> int -> (int * 'a) option + val write : 'a encoding -> 'a -> MBytes.t -> int -> int option + val to_bytes : 'a encoding -> 'a -> MBytes.t + val of_bytes : 'a encoding -> MBytes.t -> 'a option + val of_bytes_exn : 'a encoding -> MBytes.t -> 'a + val to_bytes_list : ?copy_blocks:bool -> int -> 'a encoding -> 'a -> MBytes.t list type 'a status = | Success of { res : 'a ; res_len : int ; remaining : MBytes.t list } | Await of (MBytes.t -> 'a status) | Error - - module Stream_reader = struct - - (* used as a zipper to code the function read_checker with the - ability to stop and wait for more data. In 'P_seq' case, data - length is parameterized by the current offset. Hence, it's a - function 'fun_data_len'. For the 'P_list' case, we store the - base offset (before starting reading the elements) and the - number of elements that have been read so far. *) - type path = - | P_top : path - | P_await : { path : path ; encoding : 'a t ; data_len : int } -> path - | P_seq : { path : path ; encoding : 'a t ; - fun_data_len : int -> int } -> path - | P_list : { path:path ; encoding:'a t ; data_len : int ; - base_ofs : int ; nb_elts_read : int } -> path - - (* used to accumulate given mbytes when reading a list of blocks, - as well as the current offset and the number of unread bytes *) - type mbytes_stream = { - past : MBytes.t Queue.t ; (* data that have been entirely read *) - future : (MBytes.t * int) Queue.t ; (* data that are not (fully) read *) - mutable past_len : int ; (*length of concatenation of data in 'past'*) - mutable unread : int ; (*number of cells that are unread in 'future'*) - ofs : int (*current absolute offset wrt to concatenation past @ future*) - } - - (* exception raised when additional mbytes are needed to continue - decoding *) - exception Need_more_data - - (* read a data that is stored in may Mbytes *) - let read_from_many_blocks reader buf ofs d_ofs = - let tmp = MBytes.create d_ofs in (*we will merge data in this mbyte*) - let r = ref d_ofs in (*to count the cells to be read*) - let rel_ofs = ref ofs in (*= ofs for first mbyte, 0 for others*) - while !r > 0 do - assert (not (Queue.is_empty buf.future)) ; - let b, len_b = Queue.peek buf.future in (*take the next mbyte*) - let len_chunk = len_b - !rel_ofs in (*the number of cells to read*) - if !r >= len_chunk then - begin (*copy b in 'past' if it is read entirely*) - ignore (Queue.pop buf.future) ; - Queue.push b buf.past ; - buf.past_len <- buf.past_len + len_b ; - end ; - (* copy (min !r len_chunk) data from b to tmp *) - MBytes.blit b !rel_ofs tmp (d_ofs - !r) (min !r len_chunk) ; - r := !r - len_chunk ; (* len_chunk data read during this round*) - rel_ofs := 0 ; (*next mbytes will be read starting from zero*) - done ; - reader tmp 0 d_ofs - - - (* generic function that reads data from an mbytes_stream. It is - parameterized by a function "reader" that effectively reads the - data *) - let generic_read_data delta_ofs reader buf = - let absolute_ofs = buf.ofs in - if buf.unread < delta_ofs then (*not enough data*) - raise Need_more_data ; - if delta_ofs = 0 then (*we'll read nothing*) - buf, reader (MBytes.create 0) 0 0 - else - let new_ofs = absolute_ofs + delta_ofs in - let ofs = absolute_ofs - buf.past_len in (*relative ofs wrt 'future'*) - buf.unread <- buf.unread-delta_ofs ; (*'delta_ofs' cells will be read*) - assert (not (Queue.is_empty buf.future)) ; (*we have some data to read*) - let b, len_b = Queue.peek buf.future in - let buf = { buf with ofs = new_ofs } in - if ofs + delta_ofs > len_b then - (*should read data from many mbytes*) - buf, read_from_many_blocks reader buf ofs delta_ofs - else - begin - if ofs + delta_ofs = len_b then - begin (*the rest of b will be entirely read. Put it in 'past'*) - ignore (Queue.pop buf.future) ; - Queue.push b buf.past ; - buf.past_len <- buf.past_len + len_b ; - end ; - buf, reader b ofs delta_ofs - end - - - (* functions that try to read data from a given mbytes_stream, - or raise Need_more_data *) - - let int8 buf = - generic_read_data Size.int8 (fun x y _ -> MBytes.get_int8 x y) buf - - let uint8 buf = - generic_read_data Size.uint8 (fun x y _ -> MBytes.get_uint8 x y) buf - - let char buf = - let buf, v = int8 buf in - buf, Char.chr v - - let bool buf = - let buf, v = int8 buf in - buf, v <> 0 - - let int16 buf = - generic_read_data Size.int16 (fun x y _ -> MBytes.get_int16 x y) buf - - let uint16 buf = - generic_read_data Size.uint16 (fun x y _ -> MBytes.get_uint16 x y) buf - - let uint30 buf = - generic_read_data Size.uint30 - (fun x y _ -> - let v = Int32.to_int (MBytes.get_int32 x y) in - if v < 0 then - failwith "Data_encoding.Binary.Reader.uint30: invalid data." ; - v) buf - - let int31 buf = - generic_read_data Size.int31 - (fun x y _ -> Int32.to_int (MBytes.get_int32 x y)) buf - - let int32 buf = - generic_read_data Size.int32 (fun x y _ -> MBytes.get_int32 x y) buf - - let int64 buf = - generic_read_data Size.int64 (fun x y _ -> MBytes.get_int64 x y) buf - - (** read a float64 (double) **) - let float buf = - (*Here, float means float64, which is read using MBytes.get_double !!*) - generic_read_data Size.float (fun x y _ -> MBytes.get_double x y) buf - - let fixed_length_bytes length buf = - generic_read_data length MBytes.sub buf - - let fixed_length_string length buf = - generic_read_data length MBytes.substring buf - - let read_tag = function - | `Uint8 -> uint8 - | `Uint16 -> uint16 - - (* auxiliary function: computing size of data in branches - Objs(`Variable) and Tups(`Variable) *) - let varseq_lengths e1 e2 ofs len = match classify e1, classify e2 with - | (`Dynamic | `Fixed _), `Variable -> len, (fun ofs' -> len - ofs' + ofs) - | `Variable, `Fixed n -> (len - n), (fun _ -> n) - | _ -> assert false (* Should be rejected by Kind.combine *) - - - (* adaptation of function read_rec to check binary data - incrementally. The function takes (and returns) a 'path' (for - incrementality), and 'mbytes_stream' *) - let rec data_checker - : type a. - path -> a encoding -> mbytes_stream -> int -> - path * mbytes_stream = - fun path e buf len -> - (*length of data with `Variable kind should be given by the caller*) - assert (classify e != `Variable || len >= 0) ; - try match e.encoding with - | Null -> next_path path buf - | Empty -> next_path path buf - | Constant _ -> next_path path buf - | Ignore -> next_path path { buf with ofs = buf.ofs + len } - | Bool -> next_path path (fst (bool buf)) - | Int8 -> next_path path (fst (int8 buf)) - | Uint8 -> next_path path (fst (uint8 buf)) - | Int16 -> next_path path (fst (int16 buf)) - | Uint16 -> next_path path (fst (uint16 buf)) - | Int31 -> next_path path (fst (int31 buf)) - | Int32 -> next_path path (fst (int32 buf)) - | Int64 -> next_path path (fst (int64 buf)) - | RangedInt { minimum ; maximum } -> - let (stream, ranged) = - match range_to_size ~minimum ~maximum with - | `Int8 -> int8 buf - | `Int16 -> int16 buf - | `Int31 -> int31 buf - | `Uint8 -> uint8 buf - | `Uint16 -> uint16 buf - | `Uint30 -> uint30 buf in - let ranged = if minimum > 0 then ranged + minimum else ranged in - assert (minimum < ranged && ranged < maximum) ; - next_path path stream - | Float -> next_path path (fst (float buf)) - | RangedFloat { minimum ; maximum } -> - let stream, float = float buf in - assert (minimum < float && maximum > float) ; - next_path path stream - | Bytes (`Fixed n) -> - next_path path (fst (fixed_length_bytes n buf)) - - | String (`Fixed n) -> - next_path path (fst (fixed_length_string n buf)) - - | Bytes `Variable -> - next_path path (fst (fixed_length_bytes len buf)) - - | String `Variable -> - next_path path (fst (fixed_length_string len buf)) - - | String_enum (_, arr) -> - next_path path - (match enum_size arr with - | `Uint8 -> fst @@ uint8 buf - | `Uint16 -> fst @@ uint16 buf - | `Uint30 -> fst @@ uint30 buf) - - | Array e -> - let p = P_list { path ; encoding = e ; base_ofs = buf.ofs ; - data_len = len ; nb_elts_read = 0 } in - next_path p buf - - | List e -> - let p = P_list { path ; encoding = e ; base_ofs = buf.ofs ; - data_len = len ; nb_elts_read = 0 } in - next_path p buf - - | Obj (Req (_, e)) -> data_checker path e buf len - - | Obj (Opt (`Dynamic, _, e)) -> - let buf, v = int8 buf in - if v = 0 then next_path path buf - else data_checker path e buf (len - Size.int8) - - | Obj (Opt (`Variable, _, e)) -> - if len = 0 then next_path path buf - else data_checker path e buf len - - | Obj (Dft (_, e, _)) -> data_checker path e buf len - - | Objs ((`Fixed _ | `Dynamic), e1, e2) -> - let f_len2 ofs' = len - (ofs' - buf.ofs) in - let path = - P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in - data_checker path e1 buf len - - | Objs (`Variable, e1, e2) -> - let len1, f_len2 = varseq_lengths e1 e2 buf.ofs len in - let path = - P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in - data_checker path e1 buf len1 - - | Tup e -> data_checker path e buf len - - | Tups ((`Fixed _ | `Dynamic), e1, e2) -> - let f_len2 ofs' = len - (ofs' - buf.ofs) in - let path = - P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in - data_checker path e1 buf len - - | Tups (`Variable, e1, e2) -> - let len1, f_len2 = varseq_lengths e1 e2 buf.ofs len in - let path = - P_seq { path ; encoding = e2 ; fun_data_len = f_len2 } in - data_checker path e1 buf len1 - - | Conv { encoding = e } -> data_checker path e buf len - - | Describe { encoding = e } -> data_checker path e buf len - - | Def { encoding = e } -> data_checker path e buf len - - | Splitted { encoding = e } -> data_checker path e buf len - - | Mu (_, _, self) -> data_checker path (self e) buf len - - | Union (_, sz, cases) -> - let buf, ctag = read_tag sz buf in - let opt = - List.fold_left - (fun acc c -> match c with - | (Case { encoding ; tag = Tag tag }) - when tag == ctag -> - assert (acc == None) ; - Some (data_checker path encoding buf) - | _ -> acc - )None cases - in - begin match opt with - | None -> raise (Unexpected_tag ctag) - | Some func -> func (len - (tag_size sz)) - end - - | Dynamic_size e -> - let buf, sz = int32 buf in - let sz = Int32.to_int sz in - if sz < 0 then raise (Invalid_size sz) ; - data_checker path e buf sz - - | Delayed f -> data_checker path (f ()) buf len - - with Need_more_data -> - P_await { path ; encoding = e ; data_len = len }, buf - - and next_path : path -> mbytes_stream -> path * mbytes_stream = - fun path buf -> - match path with - | P_top -> - P_top, buf (* success case *) - - | P_seq { path ; encoding ; fun_data_len } -> - (* check the right branch of a sequence. fun_data_len ofs gives - the length of the data to read *) - data_checker path encoding buf (fun_data_len buf.ofs) - - | P_await { path ; encoding ; data_len } -> - (* resume from an await *) - data_checker path encoding buf data_len - - | P_list - ({ path ; encoding ; base_ofs ; data_len ; nb_elts_read } as r) -> - (* read/check an eventual element of a list *) - if data_len = buf.ofs - base_ofs then - (* we've read all the elements of the list *) - next_path path buf - else - begin - (*some more elements to read*) - assert (data_len > buf.ofs - base_ofs) ; - (*check: if we've already read some elements, then currrent ofs - should be greater then initial ofs *) - assert (nb_elts_read <= 0 || buf.ofs - base_ofs > 0) ; - let path = - P_list { r with nb_elts_read = nb_elts_read + 1} in - data_checker path encoding buf data_len - end - - let data_checker = next_path - - (* insert a given MBytes.t in a given mbytes_stream *) - let insert_mbytes mb_buf mb = - let len = MBytes.length mb in - if len > 0 then begin - Queue.push (mb, len) mb_buf.future ; - mb_buf.unread <- mb_buf.unread + len ; - end - - (* aux function called when data_checker succeeds: splits a given - mbytes_stream into a 'read' and 'unread' queues. This may - modify the content of the given mbytes_stream *) - let split_mbytes_stream { past_len ; past ; future ; unread ; ofs } = - let rel_ofs = ofs - past_len in - assert (rel_ofs >= 0) ; - if rel_ofs = 0 then past, future (* already done *) - else begin - assert (not(Queue.is_empty future)) ; (*because data_checker succeeded*) - let b, len = Queue.pop future in - assert (rel_ofs < len) ; (*inv. maintained by read_from_many_blocks*) - let b1 = MBytes.sub b 0 rel_ofs in (* read part of b *) - let b2 = MBytes.sub b rel_ofs (len-rel_ofs) in (* unread part of b *) - Queue.push b1 past ; - - (* push b2 at the beginning of 'future' using Queue.transfer*) - let tmp = Queue.create() in - Queue.push (b2, unread) tmp ; - Queue.transfer future tmp ; (*tmp === b2 ::: future in constant time*) - past, tmp - end - - (* given a state, this function returns a new status: - - if data are successfully checked, accumulated mbytes are - passed to 'success_result' that computes the final - result. Unread mbytes are also returned - - if some more data are needed, a function that waits for some - additional mbytes is returned - - eventual errors are reported/returned *) - let rec bytes_stream_reader_rec (path, mb_buf) success_result = - let success = - match path with - | P_top -> true - | P_await _ -> false - | _ -> assert false - in - assert (mb_buf.ofs >= mb_buf.past_len) ; - if success then - let q_read, q_unread = split_mbytes_stream mb_buf in - match success_result q_read mb_buf.ofs with - | Some a -> - let remaining = - List.rev @@ - Queue.fold - (fun acc (b, len) -> - if len = 0 then acc else b:: acc) [] q_unread - in - Success { res = a ; res_len = mb_buf.ofs ; remaining } - | None -> Error - (* success_result may fail because data_checker is - approximative in some situations *) - else - Await - (fun mb -> - insert_mbytes mb_buf mb ; - try - let state = data_checker path mb_buf in - bytes_stream_reader_rec state success_result - with _ -> Error) - - (* This function checks reading a stream of 'MBytes.t' wrt. a given - encoding: - - the given data encoding should have a 'Fixed' or a 'Dynamic' - size, otherwise an error is returned, - - the function returns an 'Error', a function w - ('Await w') that waits for more data (Mbytes.t), or - 'Success'. The function is parameterized by 'success_result' - that computes the data to return in case of success. - An exception 'Invalid_argument "streaming data with variable - size"' is raised if the encoding has a variable size *) - let bytes_stream_reader : - MBytes.t list -> 'a t -> - (MBytes.t Queue.t -> int -> 'b option) -> 'b status - = fun l e success_result -> - match classify e with - | `Variable -> invalid_arg "streaming data with variable size" - | `Fixed _ | `Dynamic -> - let mb_buf = { - past = Queue.create() ; past_len = 0 ; - future = Queue.create() ; unread = 0; ofs = 0 } - in - List.iter (insert_mbytes mb_buf) l ; - let path = - P_await { path = P_top ; encoding = e ; data_len = - 1 } in - try bytes_stream_reader_rec (data_checker path mb_buf) success_result - with _ -> Error - - end - - (* concats a queue of mbytes into one MByte *) - let concat_mbyte_chunks queue tot_len = - if Queue.length queue = 1 then Queue.pop queue (* no copy *) - else (* copy smaller mbytes into one big mbyte *) - let buf = MBytes.create tot_len in - let cpt = ref 0 in - let tot_len' = ref tot_len in - while not (Queue.is_empty queue) do - let mb = Queue.pop queue in - let len = MBytes.length mb in - tot_len' := !tot_len' - len ; - assert (!tot_len' >= 0) ; - MBytes.blit mb 0 buf !cpt len ; - cpt := !cpt + len ; - done ; - assert (!tot_len' = 0) ; - buf - - (* Decode a stream of MBytes. see - Stream_reader.bytes_stream_traversal for more details *) - let read_stream_of_bytes ?(init=[]) encoding = - Stream_reader.bytes_stream_reader init encoding - (fun read_q ofs -> of_bytes encoding (concat_mbyte_chunks read_q ofs)) - - (* Check reading a stream of MBytes. see - Stream_reader.bytes_stream_traversal for more details *) - let check_stream_of_bytes ?(init=[]) encoding = - Stream_reader.bytes_stream_reader init encoding (fun _ _ -> Some ()) - -end + val read_stream_of_bytes : ?init:MBytes.t list -> 'a encoding -> 'a status + val check_stream_of_bytes : ?init:MBytes.t list -> 'a encoding -> unit status + val fixed_length : 'a encoding -> int option + val fixed_length_exn : 'a encoding -> int +end = Binary + +type json = Json.t +let json = Json.encoding +type json_schema = Json.schema +let json_schema = Json.schema_encoding +type bson = Bson.t diff --git a/src/lib_data_encoding/encoding.ml b/src/lib_data_encoding/encoding.ml new file mode 100644 index 000000000..6bfe5747a --- /dev/null +++ b/src/lib_data_encoding/encoding.ml @@ -0,0 +1,567 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +exception No_case_matched +exception Unexpected_tag of int +exception Duplicated_tag of int +exception Invalid_tag of int * [ `Uint8 | `Uint16 ] +exception Unexpected_enum of string * string list +exception Invalid_size of int +exception Int_out_of_range of int * int * int +exception Float_out_of_range of float * float * float +exception Parse_error of string + +(*TODO: provide a more specialised function that doesn't need as many closures*) +let apply ?(error=No_case_matched) fs v = + let rec loop = function + | [] -> raise error + | f :: fs -> + match f v with + | Some l -> l + | None -> loop fs in + loop fs + + +module Size = struct + let bool = 1 + let int8 = 1 + let uint8 = 1 + let char = 1 + let int16 = 2 + let uint16 = 2 + let uint30 = 4 + let uint32 = 4 + let uint64 = 8 + let int31 = 4 + let int32 = 4 + let int64 = 8 + let float = 8 +end + +type tag_size = [ `Uint8 | `Uint16 ] + +let tag_size = function + | `Uint8 -> Size.uint8 + | `Uint16 -> Size.uint16 + +module Kind = struct + + type t = + [ `Fixed of int + | `Dynamic + | `Variable ] + + type length = + [ `Fixed of int + | `Variable ] + + type enum = + [ `Dynamic + | `Variable ] + + let combine name : t -> t -> t = fun k1 k2 -> + match k1, k2 with + | `Fixed n1, `Fixed n2 -> `Fixed (n1 + n2) + | `Dynamic, `Dynamic | `Fixed _, `Dynamic + | `Dynamic, `Fixed _ -> `Dynamic + | `Variable, `Fixed _ + | (`Dynamic | `Fixed _), `Variable -> `Variable + | `Variable, `Dynamic -> + Printf.ksprintf invalid_arg + "Cannot merge two %s when the left element is of variable length \ + and the right one of dynamic length. \ + You should use the reverse order, or wrap the second one \ + with Data_encoding.dynamic_size." + name + | `Variable, `Variable -> + Printf.ksprintf invalid_arg + "Cannot merge two %s with variable length. \ + You should wrap one of them with Data_encoding.dynamic_size." + name + + let merge : t -> t -> t = fun k1 k2 -> + match k1, k2 with + | `Fixed n1, `Fixed n2 when n1 = n2 -> `Fixed n1 + | `Fixed _, `Fixed _ -> `Dynamic + | `Dynamic, `Dynamic | `Fixed _, `Dynamic + | `Dynamic, `Fixed _ -> `Dynamic + | `Variable, (`Dynamic | `Fixed _) + | (`Dynamic | `Fixed _), `Variable + | `Variable, `Variable -> `Variable + + let merge_list sz : t list -> t = function + | [] -> assert false (* should be rejected by Data_encoding.union *) + | k :: ks -> + match List.fold_left merge k ks with + | `Fixed n -> `Fixed (n + tag_size sz) + | k -> k + +end + +type case_tag = Tag of int | Json_only + +type 'a desc = + | Null : unit desc + | Empty : unit desc + | Ignore : unit desc + | Constant : string -> unit desc + | Bool : bool desc + | Int8 : int desc + | Uint8 : int desc + | Int16 : int desc + | Uint16 : int desc + | Int31 : int desc + | Int32 : Int32.t desc + | Int64 : Int64.t desc + | RangedInt : { minimum : int ; maximum : int } -> int desc + | RangedFloat : { minimum : float ; maximum : float } -> float desc + | Float : float desc + | Bytes : Kind.length -> MBytes.t desc + | String : Kind.length -> string desc + | String_enum : ('a, string * int) Hashtbl.t * 'a array -> 'a desc + | Array : 'a t -> 'a array desc + | List : 'a t -> 'a list desc + | Obj : 'a field -> 'a desc + | Objs : Kind.t * 'a t * 'b t -> ('a * 'b) desc + | Tup : 'a t -> 'a desc + | Tups : Kind.t * 'a t * 'b t -> ('a * 'b) desc + | Union : Kind.t * tag_size * 'a case list -> 'a desc + | Mu : Kind.enum * string * ('a t -> 'a t) -> 'a desc + | Conv : + { proj : ('a -> 'b) ; + inj : ('b -> 'a) ; + encoding : 'b t ; + schema : Json_schema.schema option } -> 'a desc + | Describe : + { title : string option ; + description : string option ; + encoding : 'a t } -> 'a desc + | Def : { name : string ; + encoding : 'a t } -> 'a desc + | Splitted : + { encoding : 'a t ; + json_encoding : 'a Json_encoding.encoding ; + is_obj : bool ; is_tup : bool } -> 'a desc + | Dynamic_size : 'a t -> 'a desc + | Delayed : (unit -> 'a t) -> 'a desc + +and _ field = + | Req : string * 'a t -> 'a field + | Opt : Kind.enum * string * 'a t -> 'a option field + | Dft : string * 'a t * 'a -> 'a field + +and 'a case = + | Case : { name : string option ; + encoding : 'a t ; + proj : ('t -> 'a option) ; + inj : ('a -> 't) ; + tag : case_tag } -> 't case + +and 'a t = { + encoding: 'a desc ; + mutable json_encoding: 'a Json_encoding.encoding option ; +} + +type signed_integer = [ `Int31 | `Int16 | `Int8 ] +type unsigned_integer = [ `Uint30 | `Uint16 | `Uint8 ] +type integer = [ signed_integer | unsigned_integer ] + +let signed_range_to_size min max : [> signed_integer ] = + if min >= ~-128 && max <= 127 + then `Int8 + else if min >= ~-32_768 && max <= 32_767 + then `Int16 + else `Int31 + +(* max should be centered at zero *) +let unsigned_range_to_size max : [> unsigned_integer ] = + if max <= 255 + then `Uint8 + else if max <= 65535 + then `Uint16 + else `Uint30 + +let integer_to_size = function + | `Int31 -> Size.int31 + | `Int16 -> Size.int16 + | `Int8 -> Size.int8 + | `Uint30 -> Size.uint30 + | `Uint16 -> Size.uint16 + | `Uint8 -> Size.uint8 + +let range_to_size ~minimum ~maximum : integer = + if minimum < 0 + then signed_range_to_size minimum maximum + else unsigned_range_to_size (maximum - minimum) + +let enum_size arr = + unsigned_range_to_size (Array.length arr) + +type 'a encoding = 'a t + +let rec classify : type a. a t -> Kind.t = fun e -> + match e.encoding with + (* Fixed *) + | Null -> `Fixed 0 + | Empty -> `Fixed 0 + | Constant _ -> `Fixed 0 + | Bool -> `Fixed Size.bool + | Int8 -> `Fixed Size.int8 + | Uint8 -> `Fixed Size.uint8 + | Int16 -> `Fixed Size.int16 + | Uint16 -> `Fixed Size.uint16 + | Int31 -> `Fixed Size.int31 + | Int32 -> `Fixed Size.int32 + | Int64 -> `Fixed Size.int64 + | RangedInt { minimum ; maximum } -> + `Fixed (integer_to_size @@ range_to_size ~minimum ~maximum) + | Float -> `Fixed Size.float + | RangedFloat _ -> `Fixed Size.float + (* Tagged *) + | Bytes kind -> (kind :> Kind.t) + | String kind -> (kind :> Kind.t) + | String_enum (_, cases) -> + `Fixed (integer_to_size (enum_size cases)) + | Obj (Opt (kind, _, _)) -> (kind :> Kind.t) + | Objs (kind, _, _) -> kind + | Tups (kind, _, _) -> kind + | Union (kind, _, _) -> (kind :> Kind.t) + | Mu (kind, _, _) -> (kind :> Kind.t) + (* Variable *) + | Ignore -> `Variable + | Array _ -> `Variable + | List _ -> `Variable + (* Recursive *) + | Obj (Req (_, encoding)) -> classify encoding + | Obj (Dft (_, encoding, _)) -> classify encoding + | Tup encoding -> classify encoding + | Conv { encoding } -> classify encoding + | Describe { encoding } -> classify encoding + | Def { encoding } -> classify encoding + | Splitted { encoding } -> classify encoding + | Dynamic_size _ -> `Dynamic + | Delayed f -> classify (f ()) + +let make ?json_encoding encoding = { encoding ; json_encoding } + +module Fixed = struct + let string n = make @@ String (`Fixed n) + let bytes n = make @@ Bytes (`Fixed n) +end + +module Variable = struct + let string = make @@ String `Variable + let bytes = make @@ Bytes `Variable + let check_not_variable name e = + match classify e with + | `Variable -> + Printf.ksprintf invalid_arg + "Cannot insert variable length element in %s. \ + You should wrap the contents using Data_encoding.dynamic_size." name + | `Dynamic | `Fixed _ -> () + let array e = + check_not_variable "an array" e ; + make @@ Array e + let list e = + check_not_variable "a list" e ; + make @@ List e +end + +let dynamic_size e = + make @@ Dynamic_size e + +let delayed f = + make @@ Delayed f + +let null = make @@ Null +let empty = make @@ Empty +let unit = make @@ Ignore +let constant s = make @@ Constant s +let bool = make @@ Bool +let int8 = make @@ Int8 +let uint8 = make @@ Uint8 +let int16 = make @@ Int16 +let uint16 = make @@ Uint16 +let int31 = make @@ Int31 +let int32 = make @@ Int32 +let ranged_int minimum maximum = + let minimum = min minimum maximum + and maximum = max minimum maximum in + if minimum < -(1 lsl 30) || (1 lsl 30) - 1 < maximum then + invalid_arg "Data_encoding.ranged_int" ; + make @@ RangedInt { minimum ; maximum } +let ranged_float minimum maximum = + let minimum = min minimum maximum + and maximum = max minimum maximum in + make @@ RangedFloat { minimum ; maximum } +let int64 = make @@ Int64 +let float = make @@ Float + +let string = dynamic_size Variable.string +let bytes = dynamic_size Variable.bytes +let array e = dynamic_size (Variable.array e) +let list e = dynamic_size (Variable.list e) + +let string_enum = function + | [] -> invalid_arg "data_encoding.string_enum: cannot have zero cases" + | [ _case ] -> invalid_arg "data_encoding.string_enum: cannot have a single case, use constant instead" + | _ :: _ as cases -> + let arr = Array.of_list (List.map snd cases) in + let tbl = Hashtbl.create (Array.length arr) in + List.iteri (fun ind (str, a) -> Hashtbl.add tbl a (str, ind)) cases ; + make @@ String_enum (tbl, arr) + +let conv proj inj ?schema encoding = + make @@ Conv { proj ; inj ; encoding ; schema } + +let describe ?title ?description encoding = + match title, description with + | None, None -> encoding + | _, _ -> make @@ Describe { title ; description ; encoding } + +let def name encoding = make @@ Def { name ; encoding } + +let req ?title ?description n t = + Req (n, describe ?title ?description t) +let opt ?title ?description n encoding = + let kind = + match classify encoding with + | `Variable -> `Variable + | `Fixed _ | `Dynamic -> `Dynamic in + Opt (kind, n, make @@ Describe { title ; description ; encoding }) +let varopt ?title ?description n encoding = + Opt (`Variable, n, make @@ Describe { title ; description ; encoding }) +let dft ?title ?description n t d = + Dft (n, describe ?title ?description t, d) + +let raw_splitted ~json ~binary = + make @@ Splitted { encoding = binary ; + json_encoding = json ; + is_obj = false ; + is_tup = false } + +let rec is_obj : type a. a t -> bool = fun e -> + match e.encoding with + | Obj _ -> true + | Objs _ (* by construction *) -> true + | Conv { encoding = e } -> is_obj e + | Dynamic_size e -> is_obj e + | Union (_,_,cases) -> + List.for_all (fun (Case { encoding = e }) -> is_obj e) cases + | Empty -> true + | Ignore -> true + | Mu (_,_,self) -> is_obj (self e) + | Splitted { is_obj } -> is_obj + | Delayed f -> is_obj (f ()) + | Describe { encoding } -> is_obj encoding + | Def { encoding } -> is_obj encoding + | _ -> false + +let rec is_tup : type a. a t -> bool = fun e -> + match e.encoding with + | Tup _ -> true + | Tups _ (* by construction *) -> true + | Conv { encoding = e } -> is_tup e + | Dynamic_size e -> is_tup e + | Union (_,_,cases) -> + List.for_all (function Case { encoding = e} -> is_tup e) cases + | Mu (_,_,self) -> is_tup (self e) + | Splitted { is_tup } -> is_tup + | Delayed f -> is_tup (f ()) + | Describe { encoding } -> is_tup encoding + | Def { encoding } -> is_tup encoding + | _ -> false + +let raw_merge_objs e1 e2 = + let kind = Kind.combine "objects" (classify e1) (classify e2) in + make @@ Objs (kind, e1, e2) + +let obj1 f1 = make @@ Obj f1 +let obj2 f2 f1 = + raw_merge_objs (obj1 f2) (obj1 f1) +let obj3 f3 f2 f1 = + raw_merge_objs (obj1 f3) (obj2 f2 f1) +let obj4 f4 f3 f2 f1 = + raw_merge_objs (obj2 f4 f3) (obj2 f2 f1) +let obj5 f5 f4 f3 f2 f1 = + raw_merge_objs (obj1 f5) (obj4 f4 f3 f2 f1) +let obj6 f6 f5 f4 f3 f2 f1 = + raw_merge_objs (obj2 f6 f5) (obj4 f4 f3 f2 f1) +let obj7 f7 f6 f5 f4 f3 f2 f1 = + raw_merge_objs (obj3 f7 f6 f5) (obj4 f4 f3 f2 f1) +let obj8 f8 f7 f6 f5 f4 f3 f2 f1 = + raw_merge_objs (obj4 f8 f7 f6 f5) (obj4 f4 f3 f2 f1) +let obj9 f9 f8 f7 f6 f5 f4 f3 f2 f1 = + raw_merge_objs (obj1 f9) (obj8 f8 f7 f6 f5 f4 f3 f2 f1) +let obj10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 = + raw_merge_objs (obj2 f10 f9) (obj8 f8 f7 f6 f5 f4 f3 f2 f1) + +let merge_objs o1 o2 = + if is_obj o1 && is_obj o2 then + raw_merge_objs o1 o2 + else + invalid_arg "Json_encoding.merge_objs" + +let raw_merge_tups e1 e2 = + let kind = Kind.combine "tuples" (classify e1) (classify e2) in + make @@ Tups (kind, e1, e2) + +let tup1 e1 = make @@ Tup e1 +let tup2 e2 e1 = + raw_merge_tups (tup1 e2) (tup1 e1) +let tup3 e3 e2 e1 = + raw_merge_tups (tup1 e3) (tup2 e2 e1) +let tup4 e4 e3 e2 e1 = + raw_merge_tups (tup2 e4 e3) (tup2 e2 e1) +let tup5 e5 e4 e3 e2 e1 = + raw_merge_tups (tup1 e5) (tup4 e4 e3 e2 e1) +let tup6 e6 e5 e4 e3 e2 e1 = + raw_merge_tups (tup2 e6 e5) (tup4 e4 e3 e2 e1) +let tup7 e7 e6 e5 e4 e3 e2 e1 = + raw_merge_tups (tup3 e7 e6 e5) (tup4 e4 e3 e2 e1) +let tup8 e8 e7 e6 e5 e4 e3 e2 e1 = + raw_merge_tups (tup4 e8 e7 e6 e5) (tup4 e4 e3 e2 e1) +let tup9 e9 e8 e7 e6 e5 e4 e3 e2 e1 = + raw_merge_tups (tup1 e9) (tup8 e8 e7 e6 e5 e4 e3 e2 e1) +let tup10 e10 e9 e8 e7 e6 e5 e4 e3 e2 e1 = + raw_merge_tups (tup2 e10 e9) (tup8 e8 e7 e6 e5 e4 e3 e2 e1) + +let merge_tups t1 t2 = + if is_tup t1 && is_tup t2 then + raw_merge_tups t1 t2 + else + invalid_arg "Tezos_serial.Encoding.merge_tups" + +let conv3 ty = + conv + (fun (c, b, a) -> (c, (b, a))) + (fun (c, (b, a)) -> (c, b, a)) + ty +let obj3 f3 f2 f1 = conv3 (obj3 f3 f2 f1) +let tup3 f3 f2 f1 = conv3 (tup3 f3 f2 f1) +let conv4 ty = + conv + (fun (d, c, b, a) -> ((d, c), (b, a))) + (fun ((d, c), (b, a)) -> (d, c, b, a)) + ty +let obj4 f4 f3 f2 f1 = conv4 (obj4 f4 f3 f2 f1) +let tup4 f4 f3 f2 f1 = conv4 (tup4 f4 f3 f2 f1) +let conv5 ty = + conv + (fun (e, d, c, b, a) -> (e, ((d, c), (b, a)))) + (fun (e, ((d, c), (b, a))) -> (e, d, c, b, a)) + ty +let obj5 f5 f4 f3 f2 f1 = conv5 (obj5 f5 f4 f3 f2 f1) +let tup5 f5 f4 f3 f2 f1 = conv5 (tup5 f5 f4 f3 f2 f1) +let conv6 ty = + conv + (fun (f, e, d, c, b, a) -> ((f, e), ((d, c), (b, a)))) + (fun ((f, e), ((d, c), (b, a))) -> (f, e, d, c, b, a)) + ty +let obj6 f6 f5 f4 f3 f2 f1 = conv6 (obj6 f6 f5 f4 f3 f2 f1) +let tup6 f6 f5 f4 f3 f2 f1 = conv6 (tup6 f6 f5 f4 f3 f2 f1) +let conv7 ty = + conv + (fun (g, f, e, d, c, b, a) -> ((g, (f, e)), ((d, c), (b, a)))) + (fun ((g, (f, e)), ((d, c), (b, a))) -> (g, f, e, d, c, b, a)) + ty +let obj7 f7 f6 f5 f4 f3 f2 f1 = conv7 (obj7 f7 f6 f5 f4 f3 f2 f1) +let tup7 f7 f6 f5 f4 f3 f2 f1 = conv7 (tup7 f7 f6 f5 f4 f3 f2 f1) +let conv8 ty = + conv (fun (h, g, f, e, d, c, b, a) -> + (((h, g), (f, e)), ((d, c), (b, a)))) + (fun (((h, g), (f, e)), ((d, c), (b, a))) -> + (h, g, f, e, d, c, b, a)) + ty +let obj8 f8 f7 f6 f5 f4 f3 f2 f1 = conv8 (obj8 f8 f7 f6 f5 f4 f3 f2 f1) +let tup8 f8 f7 f6 f5 f4 f3 f2 f1 = conv8 (tup8 f8 f7 f6 f5 f4 f3 f2 f1) +let conv9 ty = + conv + (fun (i, h, g, f, e, d, c, b, a) -> + (i, (((h, g), (f, e)), ((d, c), (b, a))))) + (fun (i, (((h, g), (f, e)), ((d, c), (b, a)))) -> + (i, h, g, f, e, d, c, b, a)) + ty +let obj9 f9 f8 f7 f6 f5 f4 f3 f2 f1 = + conv9 (obj9 f9 f8 f7 f6 f5 f4 f3 f2 f1) +let tup9 f9 f8 f7 f6 f5 f4 f3 f2 f1 = + conv9 (tup9 f9 f8 f7 f6 f5 f4 f3 f2 f1) +let conv10 ty = + conv + (fun (j, i, h, g, f, e, d, c, b, a) -> + ((j, i), (((h, g), (f, e)), ((d, c), (b, a))))) + (fun ((j, i), (((h, g), (f, e)), ((d, c), (b, a)))) -> + (j, i, h, g, f, e, d, c, b, a)) + ty +let obj10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 = + conv10 (obj10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1) +let tup10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1 = + conv10 (tup10 f10 f9 f8 f7 f6 f5 f4 f3 f2 f1) + +let check_cases tag_size cases = + if cases = [] then + invalid_arg "Data_encoding.union: empty list of cases." ; + let max_tag = + match tag_size with + | `Uint8 -> 256 + | `Uint16 -> 256 * 256 in + ignore @@ + List.fold_left + (fun others (Case { tag }) -> + match tag with + | Json_only -> others + | Tag tag -> + if List.mem tag others then raise (Duplicated_tag tag) ; + if tag < 0 || max_tag <= tag then + raise (Invalid_tag (tag, tag_size)) ; + tag :: others + ) + [] cases + +let union ?(tag_size = `Uint8) cases = + check_cases tag_size cases ; + let kinds = + List.map (fun (Case { encoding }) -> classify encoding) cases in + let kind = Kind.merge_list tag_size kinds in + make @@ Union (kind, tag_size, cases) +let case ?name tag encoding proj inj = Case { name ; encoding ; proj ; inj ; tag } +let option ty = + union + ~tag_size:`Uint8 + [ case (Tag 1) ty + ~name:"Some" + (fun x -> x) + (fun x -> Some x) ; + case (Tag 0) empty + ~name:"None" + (function None -> Some () | Some _ -> None) + (fun () -> None) ; + ] +let mu name self = + let kind = + try + match classify (self (make @@ Mu (`Dynamic, name, self))) with + | `Fixed _ | `Dynamic -> `Dynamic + | `Variable -> raise Exit + with Exit | _ (* TODO variability error *) -> + ignore @@ classify (self (make @@ Mu (`Variable, name, self))) ; + `Variable in + make @@ Mu (kind, name, self) + +let result ok_enc error_enc = + union + ~tag_size:`Uint8 + [ case (Tag 1) ok_enc + (function Ok x -> Some x | Error _ -> None) + (fun x -> Ok x) ; + case (Tag 0) error_enc + (function Ok _ -> None | Error x -> Some x) + (fun x -> Error x) ; + ] + diff --git a/src/lib_data_encoding/encoding.mli b/src/lib_data_encoding/encoding.mli new file mode 100644 index 000000000..5cf24fbe6 --- /dev/null +++ b/src/lib_data_encoding/encoding.mli @@ -0,0 +1,489 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +(** Type-safe serialization and deserialization of data structures. *) + +(** {1 Data Encoding} *) + +(** This module provides type-safe serialization and deserialization of + data structures. Backends are provided to both binary and JSON. + + This works by writing type descriptors by hand, using the provided + combinators. These combinators can fine-tune the binary + representation to be compact and efficient, but also provide + proper field names and meta information, so the API of Tezos can + be automatically introspected and documented. + + Here is an example encoding for type [(int * string)]. + + [let enc = obj2 (req "code" uint16) (req "message" string)] + + In JSON, this encoding maps values of type [int * string] to JSON + objects with a field [code] whose value is a number and a field + [message] whose value is a string. + + In binary, this encoding maps to two raw bytes for the [int] + followed by the size of the string in bytes, and finally the raw + contents of the string. This binary format is mostly tagless, + meaning that serialized data cannot be interpreted without the + encoding that was used for serialization. + + Regarding binary serialization, encodings are classified as either: + - fixed size (booleans, integers, numbers) + data is always the same size for that type ; + - dynamically sized (arbitrary strings and bytes) + data is of unknown size and requires an explicit length field ; + - variable size (special case of strings, bytes, and arrays) + data makes up the remainder of an object of known size, + thus its size is given by the context, and does not + have to be serialized. + + JSON operations are delegated to [ocplib-json-typed]. *) + +(* TODO: reorder all the functions so it makes sense (ground, combinator, + * predicates, etc.) *) +(* TODO: move the doc into the packing module *) + +module Size: sig + val bool: int + val int8: int + val uint8: int + val char: int + val int16: int + val uint16: int + val uint30: int + val uint32: int + val uint64: int + val int31: int + val int32: int + val int64: int + val float: int +end + +type tag_size = [ `Uint8 | `Uint16 ] + +val tag_size: tag_size -> int + +val apply: ?error:exn -> ('a -> 'b option) list -> 'a -> 'b + +module Kind: sig + + type t = [ `Fixed of int | `Dynamic | `Variable ] + + type length = [ `Fixed of int | `Variable ] + + type enum = [ `Dynamic | `Variable ] + + val combine: string -> t -> t -> t + + val merge : t -> t -> t + + val merge_list: tag_size -> t list -> t + +end + +type case_tag = Tag of int | Json_only + +type 'a desc = + | Null : unit desc + | Empty : unit desc + | Ignore : unit desc + | Constant : string -> unit desc + | Bool : bool desc + | Int8 : int desc + | Uint8 : int desc + | Int16 : int desc + | Uint16 : int desc + | Int31 : int desc + | Int32 : Int32.t desc + | Int64 : Int64.t desc + | RangedInt : { minimum : int ; maximum : int } -> int desc + | RangedFloat : { minimum : float ; maximum : float } -> float desc + | Float : float desc + | Bytes : Kind.length -> MBytes.t desc + | String : Kind.length -> string desc + | String_enum : ('a, string * int) Hashtbl.t * 'a array -> 'a desc + | Array : 'a t -> 'a array desc + | List : 'a t -> 'a list desc + | Obj : 'a field -> 'a desc + | Objs : Kind.t * 'a t * 'b t -> ('a * 'b) desc + | Tup : 'a t -> 'a desc + | Tups : Kind.t * 'a t * 'b t -> ('a * 'b) desc + | Union : Kind.t * tag_size * 'a case list -> 'a desc + | Mu : Kind.enum * string * ('a t -> 'a t) -> 'a desc + | Conv : + { proj : ('a -> 'b) ; + inj : ('b -> 'a) ; + encoding : 'b t ; + schema : Json_schema.schema option } -> 'a desc + | Describe : + { title : string option ; + description : string option ; + encoding : 'a t } -> 'a desc + | Def : { name : string ; + encoding : 'a t } -> 'a desc + | Splitted : + { encoding : 'a t ; + json_encoding : 'a Json_encoding.encoding ; + is_obj : bool ; is_tup : bool } -> 'a desc + | Dynamic_size : 'a t -> 'a desc + | Delayed : (unit -> 'a t) -> 'a desc + +and _ field = + | Req : string * 'a t -> 'a field + | Opt : Kind.enum * string * 'a t -> 'a option field + | Dft : string * 'a t * 'a -> 'a field + +and 'a case = + | Case : { name : string option ; + encoding : 'a t ; + proj : ('t -> 'a option) ; + inj : ('a -> 't) ; + tag : case_tag } -> 't case + +and 'a t = { + encoding: 'a desc ; + mutable json_encoding: 'a Json_encoding.encoding option ; +} +type 'a encoding = 'a t + +val make: ?json_encoding: 'a Json_encoding.encoding -> 'a desc -> 'a t + +type signed_integer = [ `Int31 | `Int16 | `Int8 ] +type unsigned_integer = [ `Uint30 | `Uint16 | `Uint8 ] +type integer = [ signed_integer | unsigned_integer ] +val integer_to_size: integer -> int +val range_to_size: minimum:int -> maximum:int -> integer +val enum_size: 'a array -> [> unsigned_integer ] + + +exception No_case_matched +exception Unexpected_tag of int +exception Duplicated_tag of int +exception Invalid_tag of int * [ `Uint8 | `Uint16 ] +exception Unexpected_enum of string * string list +exception Parse_error of string +exception Float_out_of_range of float * float * float +exception Int_out_of_range of int * int * int +exception Invalid_size of int + + +(** Special value [null] in JSON, nothing in binary. *) +val null : unit encoding + +(** Empty object (not included in binary, encoded as empty object in JSON). *) +val empty : unit encoding + +(** Unit value, ommitted in binary. + Serialized as an empty object in JSON, accepts any object when deserializing. *) +val unit : unit encoding + +(** Constant string (data is not included in the binary data). *) +val constant : string -> unit encoding + +(** Signed 8 bit integer + (data is encoded as a byte in binary and an integer in JSON). *) +val int8 : int encoding + +(** Unsigned 8 bit integer + (data is encoded as a byte in binary and an integer in JSON). *) +val uint8 : int encoding + +(** Signed 16 bit integer + (data is encoded as a short in binary and an integer in JSON). *) +val int16 : int encoding + +(** Unsigned 16 bit integer + (data is encoded as a short in binary and an integer in JSON). *) +val uint16 : int encoding + +(** Signed 31 bit integer, which corresponds to type int on 32-bit OCaml systems + (data is encoded as a 32 bit int in binary and an integer in JSON). *) +val int31 : int encoding + +(** Signed 32 bit integer + (data is encoded as a 32-bit int in binary and an integer in JSON). *) +val int32 : int32 encoding + +(** Signed 64 bit integer + (data is encodedas a 64-bit int in binary and a decimal string in JSON). *) +val int64 : int64 encoding + +(** Integer with bounds in a given range. Both bounds are inclusive. + + Raises [Invalid_argument] if the bounds are beyond the interval + [-2^30; 2^30-1]. These bounds are chosen to be compatible with all versions + of OCaml. +*) +val ranged_int : int -> int -> int encoding + +(** Float with bounds in a given range. Both bounds are inclusive *) +val ranged_float : float -> float -> float encoding + +(** Encoding of a boolean + (data is encoded as a byte in binary and a boolean in JSON). *) +val bool : bool encoding + +(** Encoding of a string + - default variable in width + - encoded as a byte sequence in binary + - encoded as a string in JSON. *) +val string : string encoding + +(** Encoding of arbitrary bytes + (encoded via hex in JSON and directly as a sequence byte in binary). *) +val bytes : MBytes.t encoding + +(** Encoding of floating point number + (encoded as a floating point number in JSON and a double in binary). *) +val float : float encoding + +(** Combinator to make an optional value + (represented as a 1-byte tag followed by the data (or nothing) in binary + and either the raw value or an empty object in JSON). *) +val option : 'a encoding -> 'a option encoding + +(** Combinator to make a {!result} value + (represented as a 1-byte tag followed by the data of either type in binary, + and either unwrapped value in JSON (the caller must ensure that both + encodings do not collide)). *) +val result : 'a encoding -> 'b encoding -> ('a, 'b) result encoding + +(** Encode enumeration via association list + (represented as a string in JSON and binary). *) +val string_enum : (string * 'a) list -> 'a encoding + + +(** Is the given encoding serialized as a JSON object? *) +val is_obj : 'a encoding -> bool + +(** Does the given encoding encode a tuple? *) +val is_tup : 'a encoding -> bool + +(** Create encodings that produce data of a fixed length when binary encoded. + See the preamble for an explanation. *) +module Fixed : sig + + (** Encode a fixed length string *) + val string : int -> string encoding + + (** Encode a fixed length byte sequence *) + val bytes : int -> MBytes.t encoding +end + +(** Create encodings that produce data of a variable length when binary encoded. + See the preamble for an explanation. *) +module Variable : sig + (** Encode a string *) + val string : string encoding + + (** Encode a byte sequence *) + val bytes : MBytes.t encoding + + (** Array encoding combinator *) + val array : 'a encoding -> 'a array encoding + + (** List encoding combinator *) + val list : 'a encoding -> 'a list encoding +end + +(** Mark an encoding as being of dynamic size. + Forces the size to be stored alongside content when needed. + Usually used to fix errors from combining two encodings. *) +val dynamic_size : 'a encoding -> 'a encoding + +(** Recompute the encoding definition each time it is used. + Useful for dynamically updating the encoding of values of an extensible + type via a global reference (e.g. exceptions). *) +val delayed : (unit -> 'a encoding) -> 'a encoding + +(** Required field. *) +val req : + ?title:string -> ?description:string -> + string -> 't encoding -> 't field + +(** Optional field. Omitted entirely in JSON encoding if None. + Omitted in binary if the only optional field in a [`Variable] + encoding, otherwise a 1-byte prefix (`0` or `1`) tells if the + field is present or not. *) +val opt : + ?title:string -> ?description:string -> + string -> 't encoding -> 't option field + +(** Optional field of variable length. + Only one can be present in a given object. *) +val varopt : + ?title:string -> ?description:string -> + string -> 't encoding -> 't option field + +(** Required field with a default value. + If the default value is passed, the field is omitted in JSON. + The value is always serialized in binary. *) +val dft : + ?title:string -> ?description:string -> + string -> 't encoding -> 't -> 't field + +(** {2 Constructors for objects with N fields} *) + +(** These are serialized to binary by converting each internal object to binary + and placing them in the order of the original object. + These are serialized to JSON as a JSON object with the field names. *) + +val obj1 : + 'f1 field -> 'f1 encoding +val obj2 : + 'f1 field -> 'f2 field -> ('f1 * 'f2) encoding +val obj3 : + 'f1 field -> 'f2 field -> 'f3 field -> ('f1 * 'f2 * 'f3) encoding +val obj4 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> + ('f1 * 'f2 * 'f3 * 'f4) encoding +val obj5 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5) encoding +val obj6 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6) encoding +val obj7 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7) encoding +val obj8 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> 'f8 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8) encoding +val obj9 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> 'f8 field -> 'f9 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9) encoding +val obj10 : + 'f1 field -> 'f2 field -> 'f3 field -> 'f4 field -> 'f5 field -> + 'f6 field -> 'f7 field -> 'f8 field -> 'f9 field -> 'f10 field -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9 * 'f10) encoding + +(** {2 Constructors for tuples with N fields} *) + +(** These are serialized to binary by converting each internal object to binary + and placing them in the order of the original object. + These are serialized to JSON as JSON arrays/lists. *) + +val tup1 : + 'f1 encoding -> + 'f1 encoding +val tup2 : + 'f1 encoding -> 'f2 encoding -> + ('f1 * 'f2) encoding +val tup3 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> + ('f1 * 'f2 * 'f3) encoding +val tup4 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + ('f1 * 'f2 * 'f3 * 'f4) encoding +val tup5 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5) encoding +val tup6 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6) encoding +val tup7 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7) encoding +val tup8 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> 'f8 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8) encoding +val tup9 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> 'f8 encoding -> + 'f9 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9) encoding +val tup10 : + 'f1 encoding -> 'f2 encoding -> 'f3 encoding -> 'f4 encoding -> + 'f5 encoding -> 'f6 encoding -> 'f7 encoding -> 'f8 encoding -> + 'f9 encoding -> 'f10 encoding -> + ('f1 * 'f2 * 'f3 * 'f4 * 'f5 * 'f6 * 'f7 * 'f8 * 'f9 * 'f10) encoding + +(** {2 Combinators} *) + +(** Create a larger object from the encodings of two smaller ones. + @raise invalid_arg if both arguments are not objects. *) +val merge_objs : 'o1 encoding -> 'o2 encoding -> ('o1 * 'o2) encoding + +(** Create a large tuple encoding from two smaller ones. + @raise invalid_arg if both values are not tuples. *) +val merge_tups : 'a1 encoding -> 'a2 encoding -> ('a1 * 'a2) encoding + +(** Array combinator. *) +val array : 'a encoding -> 'a array encoding + +(** List combinator. *) +val list : 'a encoding -> 'a list encoding + +(** Encodes a variant constructor. Takes the encoding for the specific + parameters, a recognizer function that will extract the parameters + in case the expected case of the variant is being serialized, and + a constructor function for deserialization. + + The tag must be less than the tag size of the union in which you use the case. + An optional tag gives a name to a case and should be used to maintain + compatibility. + + An optional name for the case can be provided, + which is used in the binary documentation. *) +val case : + ?name:string -> + case_tag -> + 'a encoding -> ('t -> 'a option) -> ('a -> 't) -> 't case + +(** Create a single encoding from a series of cases. + + In JSON, all cases are tried one after the other. The caller must + check for collisions. + + In binary, a prefix tag is added to discriminate quickly between + cases. The default is `Uint8 and you must use a `Uint16 if you are + going to have more than 256 cases. + + This function will raise an exception if it is given the empty list + or if there are more cases than can fit in the tag size. *) +val union : + ?tag_size:[ `Uint8 | `Uint16 ] -> 't case list -> 't encoding + +(** Add documentation to an encoding. *) +val describe : + ?title:string -> ?description:string -> + 't encoding ->'t encoding + +(** Give a name to an encoding. *) +val def : string -> 'a encoding -> 'a encoding + +(** Provide a transformer from one encoding to a different one. + + Used to simplify nested encodings or to change the generic tuples + built by {obj1}, {tup1} and the like into proper records. + + A schema may optionally be provided as documentation of the new encoding. *) +val conv : + ('a -> 'b) -> ('b -> 'a) -> + ?schema:Json_schema.schema -> + 'b encoding -> 'a encoding + +(** Combinator for recursive encodings. *) +val mu : string -> ('a encoding -> 'a encoding) -> 'a encoding + +(** Classify an encoding wrt. its binary serialization as explained in the preamble. *) +val classify : 'a encoding -> [ `Fixed of int | `Dynamic | `Variable ] + +(** Define different encodings for JSON and binary serialization. *) +val raw_splitted : json:'a Json_encoding.encoding -> binary:'a encoding -> 'a encoding + diff --git a/src/lib_data_encoding/json.ml b/src/lib_data_encoding/json.ml new file mode 100644 index 000000000..5fc5972e3 --- /dev/null +++ b/src/lib_data_encoding/json.ml @@ -0,0 +1,277 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +open Encoding (* TODO: unopen *) + + +type json = + [ `O of (string * json) list + | `Bool of bool + | `Float of float + | `A of json list + | `Null + | `String of string ] + +type schema = Json_schema.schema + +type pair_builder = { + build: 'a 'b. Kind.t -> 'a t -> 'b t -> ('a * 'b) t +} + +exception Parse_error of string + +let wrap_error f = + fun str -> + try f str + with exn -> raise (Json_encoding.Cannot_destruct ([], exn)) + +let int64_encoding = + let open Json_encoding in + union [ + case + int32 + (fun i -> + let j = Int64.to_int32 i in + if Int64.equal (Int64.of_int32 j) i then Some j else None) + Int64.of_int32 ; + case + string + (fun i -> Some (Int64.to_string i)) + Int64.of_string + ] + +let bytes_jsont = + let open Json_encoding in + let schema = + let open Json_schema in + create + { title = None ; + description = None ; + default = None; + enum = None; + kind = String { + pattern = Some "^[a-zA-Z0-9]+$"; + min_length = 0; + max_length = None; + }; + format = None ; + id = None } in + conv ~schema + MBytes.to_hex + (wrap_error MBytes.of_hex) + (conv + (fun (`Hex h) -> h) + (fun h -> `Hex h) + string) + +let rec lift_union : type a. a t -> a t = fun e -> + match e.encoding with + | Conv { proj ; inj ; encoding = e ; schema } -> begin + match lift_union e with + | { encoding = Union (kind, tag, cases) } -> + make @@ + Union (kind, tag, + List.map + (fun (Case { name ; encoding ; proj = proj' ; inj = inj' ; tag }) -> + Case { encoding ; + name ; + proj = (fun x -> proj' (proj x)); + inj = (fun x -> inj (inj' x)) ; + tag }) + cases) + | e -> make @@ Conv { proj ; inj ; encoding = e ; schema } + end + | Objs (p, e1, e2) -> + lift_union_in_pair + { build = fun p e1 e2 -> make @@ Objs (p, e1, e2) } + p e1 e2 + | Tups (p, e1, e2) -> + lift_union_in_pair + { build = fun p e1 e2 -> make @@ Tups (p, e1, e2) } + p e1 e2 + | _ -> e + +and lift_union_in_pair + : type a b. pair_builder -> Kind.t -> a t -> b t -> (a * b) t + = fun b p e1 e2 -> + match lift_union e1, lift_union e2 with + | e1, { encoding = Union (_kind, tag, cases) } -> + make @@ + Union (`Dynamic (* ignored *), tag, + List.map + (fun (Case { name ; encoding = e2 ; proj ; inj ; tag }) -> + Case { encoding = lift_union_in_pair b p e1 e2 ; + name ; + proj = (fun (x, y) -> + match proj y with + | None -> None + | Some y -> Some (x, y)) ; + inj = (fun (x, y) -> (x, inj y)) ; + tag }) + cases) + | { encoding = Union (_kind, tag, cases) }, e2 -> + make @@ + Union (`Dynamic (* ignored *), tag, + List.map + (fun (Case { name ; encoding = e1 ; proj ; inj ; tag }) -> + Case { encoding = lift_union_in_pair b p e1 e2 ; + name ; + proj = (fun (x, y) -> + match proj x with + | None -> None + | Some x -> Some (x, y)) ; + inj = (fun (x, y) -> (inj x, y)) ; + tag }) + cases) + | e1, e2 -> b.build p e1 e2 + +let rec json : type a. a desc -> a Json_encoding.encoding = + let open Json_encoding in + function + | Null -> null + | Empty -> empty + | Constant s -> constant s + | Ignore -> unit + | Int8 -> ranged_int ~minimum:~-(1 lsl 7) ~maximum:((1 lsl 7) - 1) "int8" + | Uint8 -> ranged_int ~minimum:0 ~maximum:((1 lsl 8) - 1) "uint8" + | Int16 -> ranged_int ~minimum:~-(1 lsl 15) ~maximum:((1 lsl 15) - 1) "int16" + | Uint16 -> ranged_int ~minimum:0 ~maximum:((1 lsl 16) - 1) "uint16" + | RangedInt { minimum ; maximum } -> ranged_int ~minimum ~maximum "rangedInt" + | Int31 -> int + | Int32 -> int32 + | Int64 -> int64_encoding + | Bool -> bool + | Float -> float + | RangedFloat { minimum; maximum } -> ranged_float ~minimum ~maximum "rangedFloat" + | String _ -> string (* TODO: check length *) + | Bytes _ -> bytes_jsont (* TODO check length *) + | String_enum (tbl, _) -> string_enum (Hashtbl.fold (fun a (str, _) acc -> (str, a) :: acc) tbl []) + | Array e -> array (get_json e) + | List e -> list (get_json e) + | Obj f -> obj1 (field_json f) + | Objs (_, e1, e2) -> + merge_objs (get_json e1) (get_json e2) + | Tup e -> tup1 (get_json e) + | Tups (_, e1, e2) -> + merge_tups (get_json e1) (get_json e2) + | Conv { proj ; inj ; encoding = e ; schema } -> conv ?schema proj inj (get_json e) + | Describe { title ; description ; encoding = e } -> + describe ?title ?description (get_json e) + | Def { name ; encoding = e } -> def name (get_json e) + | Mu (_, name, self) as ty -> + mu name (fun json_encoding -> get_json @@ self (make ~json_encoding ty)) + | Union (_tag_size, _, cases) -> union (List.map case_json cases) + | Splitted { json_encoding } -> json_encoding + | Dynamic_size e -> get_json e + | Delayed f -> get_json (f ()) + +and field_json + : type a. a field -> a Json_encoding.field = + let open Json_encoding in + function + | Req (name, e) -> req name (get_json e) + | Opt (_, name, e) -> opt name (get_json e) + | Dft (name, e, d) -> dft name (get_json e) d + +and case_json : type a. a case -> a Json_encoding.case = + let open Json_encoding in + function + | Case { encoding = e ; proj ; inj ; _ } -> case (get_json e) proj inj + +and get_json : type a. a t -> a Json_encoding.encoding = fun e -> + match e.json_encoding with + | None -> + let json_encoding = json (lift_union e).encoding in + e.json_encoding <- Some json_encoding ; + json_encoding + | Some json_encoding -> json_encoding + +let convert = get_json + +type path = path_item list +and path_item = + [ `Field of string + (** A field in an object. *) + | `Index of int + (** An index in an array. *) + | `Star + (** Any / every field or index. *) + | `Next + (** The next element after an array. *) ] + +include Json_encoding + +let construct e v = construct (get_json e) v +let destruct e v = destruct (get_json e) v +let schema e = schema (get_json e) + +let cannot_destruct fmt = + Format.kasprintf + (fun msg -> raise (Cannot_destruct ([], Failure msg))) + fmt + +type t = json + +let to_root = function + | `O ctns -> `O ctns + | `A ctns -> `A ctns + | `Null -> `O [] + | oth -> `A [ oth ] + +let to_string ?minify j = Ezjsonm.to_string ?minify (to_root j) + +let pp = Json_repr.(pp (module Ezjsonm)) + +let from_string s = + try Ok (Ezjsonm.from_string s :> json) + with Ezjsonm.Parse_error (_, msg) -> Error msg + +let from_stream (stream: string Lwt_stream.t) = + let buffer = ref "" in + Lwt_stream.filter_map + (fun str -> + buffer := !buffer ^ str ; + try + let json = Ezjsonm.from_string !buffer in + buffer := "" ; + Some (Ok json) + with Ezjsonm.Parse_error _ -> + None) + stream + +let encoding = + let binary : Json_repr.ezjsonm Encoding.t = + Encoding.conv + (fun json -> + Json_repr.convert + (module Json_repr.Ezjsonm) + (module Json_repr_bson.Repr) + json |> + Json_repr_bson.bson_to_bytes |> + Bytes.to_string) + (fun s -> try + Bytes.of_string s |> + Json_repr_bson.bytes_to_bson ~copy:false |> + Json_repr.convert + (module Json_repr_bson.Repr) + (module Json_repr.Ezjsonm) + with + | Json_repr_bson.Bson_decoding_error (msg, _, _) -> + raise (Parse_error msg)) + Encoding.string in + let json = + Json_encoding.any_ezjson_value in + raw_splitted ~binary ~json + +let schema_encoding = + Encoding.conv + Json_schema.to_json + Json_schema.of_json + encoding + diff --git a/src/lib_data_encoding/json.mli b/src/lib_data_encoding/json.mli new file mode 100644 index 000000000..75811afe6 --- /dev/null +++ b/src/lib_data_encoding/json.mli @@ -0,0 +1,96 @@ +(**************************************************************************) +(* *) +(* Copyright (c) 2014 - 2018. *) +(* Dynamic Ledger Solutions, Inc. *) +(* *) +(* All rights reserved. No warranty, explicit or implicit, provided. *) +(* *) +(**************************************************************************) + +(** Type-safe serialization and deserialization of data structures. *) + +(** In memory JSON data, compatible with [Ezjsonm]. *) +type json = + [ `O of (string * json) list + | `Bool of bool + | `Float of float + | `A of json list + | `Null + | `String of string ] +type t = json + +type schema = Json_schema.schema + + +(** Create a {!Json_encoding.encoding} from an {encoding}. *) +val convert : 'a Encoding.t -> 'a Json_encoding.encoding + +(** Generate a schema from an {!encoding}. *) +val schema : 'a Encoding.t -> schema + +val encoding: json Encoding.t +val schema_encoding: schema Encoding.t + + +(** Construct a JSON object from an encoding. *) +val construct : 't Encoding.t -> 't -> json + +(** Destruct a JSON object into a value. + Fail with an exception if the JSON object and encoding do not match.. *) +val destruct : 't Encoding.t -> json -> 't + +(** JSON Error. *) + +type path = path_item list + +(** A set of accessors that point to a location in a JSON object. *) +and path_item = + [ `Field of string + (** A field in an object. *) + | `Index of int + (** An index in an array. *) + | `Star + (** Any / every field or index. *) + | `Next + (** The next element after an array. *) ] + +(** Exception raised by destructors, with the location in the original + JSON structure and the specific error. *) +exception Cannot_destruct of (path * exn) + +(** Unexpected kind of data encountered (w/ the expectation). *) +exception Unexpected of string * string + +(** Some {!union} couldn't be destructed, w/ the reasons for each {!case}. *) +exception No_case_matched of exn list + +(** Array of unexpected size encountered (w/ the expectation). *) +exception Bad_array_size of int * int + +(** Missing field in an object. *) +exception Missing_field of string + +(** Supernumerary field in an object. *) +exception Unexpected_field of string + +val print_error : + ?print_unknown: (Format.formatter -> exn -> unit) -> + Format.formatter -> exn -> unit + +(** Helpers for writing encoders. *) +val cannot_destruct : ('a, Format.formatter, unit, 'b) format4 -> 'a +val wrap_error : ('a -> 'b) -> 'a -> 'b + +(** Read a JSON document from a string. *) +val from_string : string -> (json, string) result + +(** Read a stream of JSON documents from a stream of strings. + A single JSON document may be represented in multiple consecutive + strings. But only the first document of a string is considered. *) +val from_stream : string Lwt_stream.t -> (json, string) result Lwt_stream.t + +(** Write a JSON document to a string. This goes via an intermediate + buffer and so may be slow on large documents. *) +val to_string : ?minify:bool -> json -> string + +val pp : Format.formatter -> json -> unit