From a521c01115bf49326245c05f779c98bb28cc35e0 Mon Sep 17 00:00:00 2001 From: Tom Jack Date: Sat, 21 Sep 2019 18:38:45 -0700 Subject: [PATCH] some michelson optimizations --- src/main/compile/dune | 1 - src/main/compile/michelson.ml | 0 src/main/compile/of_mini_c.ml | 12 +- src/passes/9-self_michelson/self_michelson.ml | 402 +++++++++++++++++- vendors/ligo-utils/simple-utils/x_list.ml | 9 +- 5 files changed, 393 insertions(+), 31 deletions(-) delete mode 100644 src/main/compile/michelson.ml diff --git a/src/main/compile/dune b/src/main/compile/dune index a3c992c69..705ed50b9 100644 --- a/src/main/compile/dune +++ b/src/main/compile/dune @@ -14,7 +14,6 @@ mini_c compiler self_michelson - operators ) (preprocess (pps ppx_let) diff --git a/src/main/compile/michelson.ml b/src/main/compile/michelson.ml deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/main/compile/of_mini_c.ml b/src/main/compile/of_mini_c.ml index 1a385040f..296e4d814 100644 --- a/src/main/compile/of_mini_c.ml +++ b/src/main/compile/of_mini_c.ml @@ -2,19 +2,21 @@ open Trace open Mini_c open Tezos_utils -let compile_value : value -> type_value -> Michelson.t result = - Compiler.Program.translate_value +let compile_value : value -> type_value -> Michelson.t result = fun x a -> + let%bind body = Compiler.Program.translate_value x a in + let body = Self_michelson.optimize body in + ok body let compile_expression_as_value : expression -> _ result = fun e -> let%bind value = expression_to_value e in let%bind result = compile_value value e.type_value in - let%bind result = Self_michelson.all_expression result in + let result = Self_michelson.optimize result in ok result let compile_expression_as_function : expression -> _ result = fun e -> let (input , output) = t_unit , e.type_value in let%bind body = Compiler.Program.translate_expression e Compiler.Environment.empty in - let%bind body = Self_michelson.all_expression body in + let body = Self_michelson.optimize body in let body = Michelson.(seq [ i_drop ; body ]) in let%bind (input , output) = bind_map_pair Compiler.Type.Ty.type_ (input , output) in let open! Compiler.Program in @@ -24,7 +26,7 @@ let compile_function = fun e -> let%bind (input , output) = get_t_function e.type_value in let%bind body = get_function e in let%bind body = compile_value body (t_function input output) in - let%bind body = Self_michelson.all_expression body in + let body = Self_michelson.optimize body in let%bind (input , output) = bind_map_pair Compiler.Type.Ty.type_ (input , output) in let open! Compiler.Program in ok { input ; output ; body } diff --git a/src/passes/9-self_michelson/self_michelson.ml b/src/passes/9-self_michelson/self_michelson.ml index 07d8e4f64..3085376e3 100644 --- a/src/passes/9-self_michelson/self_michelson.ml +++ b/src/passes/9-self_michelson/self_michelson.ml @@ -1,31 +1,387 @@ -open Trace +(* This file attempts to optimize Michelson code. The goal is to + reduce the code size (the size of the binary Micheline.) + + I have ignored the 'execution gas' completely, because it seems + that users will encounter code size problems earlier and more + often. +*) + open Tezos_micheline.Micheline -open Memory_proto_alpha.Protocol.Michelson_v1_primitives +open Tezos_utils.Michelson -let strip_annots = fun e -> - match e with - | Prim (l , p , lst , _) -> ok @@ Prim (l , p , lst , []) - | x -> ok x +(* `arity p` should be `Some n` only if p is (always) an instruction + which removes n items from the stack and uses them to push 1 item, + without effects other than gas consumption. It must never fail. *) -let strip_nops = fun e -> - match e with - | Seq(l, [Prim (_, I_UNIT, _, _) ; Prim(_, I_DROP, _, _)]) -> ok @@ Seq (l, []) - | x -> ok x +let arity : prim -> int option = function + | I_PACK -> Some 1 + | I_UNPACK -> Some 1 + | I_BLAKE2B -> Some 1 + | I_SHA256 -> Some 1 + | I_SHA512 -> Some 1 + | I_ABS -> Some 1 + | I_ADD -> None (* can fail for tez *) + | I_AMOUNT -> Some 0 + | I_AND -> Some 2 + | I_BALANCE -> Some 0 + | I_CAR -> Some 1 + | I_CDR -> Some 1 + | I_CHECK_SIGNATURE -> Some 3 + | I_COMPARE -> Some 2 + | I_CONCAT -> None (* sometimes 1, sometimes 2 :( *) + | I_CONS -> Some 2 + | I_CREATE_ACCOUNT -> None (* effects, kind of *) + | I_CREATE_CONTRACT -> None (* effects, kind of *) + | I_IMPLICIT_ACCOUNT -> Some 1 + | I_DIP -> None + | I_DROP -> None + | I_DUP -> None + | I_EDIV -> Some 2 + | I_EMPTY_MAP -> Some 0 + | I_EMPTY_SET -> Some 0 + | I_EQ -> Some 1 + | I_EXEC -> None (* effects *) + | I_FAILWITH -> None + | I_GE -> Some 1 + | I_GET -> Some 2 + | I_GT -> Some 1 + | I_HASH_KEY -> Some 1 + | I_IF -> None + | I_IF_CONS -> None + | I_IF_LEFT -> None + | I_IF_NONE -> None + | I_INT -> Some 1 + | I_LAMBDA -> Some 0 + | I_LE -> Some 1 + | I_LEFT -> Some 1 + | I_LOOP -> None + | I_LSL -> Some 1 + | I_LSR -> Some 1 + | I_LT -> Some 1 + | I_MAP -> None + | I_MEM -> Some 2 + | I_MUL -> None (* can fail for tez *) + | I_NEG -> Some 1 + | I_NEQ -> Some 1 + | I_NIL -> Some 0 + | I_NONE -> Some 0 + | I_NOT -> Some 1 + | I_NOW -> Some 0 + | I_OR -> Some 2 + | I_PAIR -> Some 2 + | I_PUSH -> Some 0 + | I_RIGHT -> Some 1 + | I_SIZE -> Some 1 + | I_SOME -> Some 1 + | I_SOURCE -> Some 0 + | I_SENDER -> Some 0 + | I_SELF -> Some 0 + | I_SLICE -> Some 3 + | I_STEPS_TO_QUOTA -> Some 0 + | I_SUB -> None (* can fail for tez *) + | I_SWAP -> None + | I_TRANSFER_TOKENS -> None (* effects, kind of *) + | I_SET_DELEGATE -> None (* effects, kind of *) + | I_UNIT -> Some 0 + | I_UPDATE -> Some 3 + | I_XOR -> Some 2 + | I_ITER -> None + | I_LOOP_LEFT -> None + | I_ADDRESS -> Some 1 + | I_CONTRACT -> Some 1 + | I_ISNAT -> Some 1 + | I_CAST -> None + | I_RENAME -> None + | K_parameter + | K_storage + | K_code + | D_False + | D_Elt + | D_Left + | D_None + | D_Pair + | D_Right + | D_Some + | D_True + | D_Unit + | T_bool + | T_contract + | T_int + | T_key + | T_key_hash + | T_lambda + | T_list + | T_map + | T_big_map + | T_nat + | T_option + | T_or + | T_pair + | T_set + | T_signature + | T_string + | T_bytes + | T_mutez + | T_timestamp + | T_unit + | T_operation + | T_address -> None -let all = [ - strip_annots ; - strip_nops ; -] +let is_nullary_op (p : prim) : bool = + match arity p with + | Some 0 -> true + | _ -> false -let rec bind_chain : ('a -> 'a result) list -> 'a -> 'a result = fun fs x -> +let is_unary_op (p : prim) : bool = + match arity p with + | Some 1 -> true + | _ -> false + +let is_binary_op (p : prim) : bool = + match arity p with + | Some 2 -> true + | _ -> false + +let is_ternary_op (p : prim) : bool = + match arity p with + | Some 3 -> true + | _ -> false + +let unseq : michelson -> michelson list = function + | Seq (_, args) -> args + | x -> [x] + +(* Replace `PUSH (lambda a b) {}` with `LAMBDA a b {}` *) +let rec use_lambda_instr : michelson -> michelson = + fun x -> + match x with + | Seq (l, args) -> + Seq (l, List.map use_lambda_instr args) + | Prim (_, I_PUSH, [Prim (_, T_lambda, [arg; ret], _); code], _) -> + i_lambda arg ret code + | Prim (_, I_PUSH, _, _) -> + x (* possibly missing some nested lambdas *) + | Prim (l, p, args, annot) -> + Prim (l, p, List.map use_lambda_instr args, annot) + | _ -> x + +(* This flattens nested seqs. {} is erased, { { code1 } ; { code2 } } + becomes { code1 ; code2 }, etc. This is important because each seq + costs 5 bytes, for the "Seq" tag and a 4 byte length. *) +let rec flatten_seqs : michelson -> michelson = + fun x -> + match x with + | Seq (l, args) -> + let args = List.concat @@ List.map (fun x -> unseq (flatten_seqs x)) args in + Seq (l, args) + (* Should not flatten literal seq data in PUSH. Ugh... *) + | Prim (_, I_PUSH, _, _) -> x + | Prim (l, p, args, annot) -> Prim (l, p, List.map flatten_seqs args, annot) + | _ -> x + +type peep1 = michelson -> michelson list option +type peep2 = michelson * michelson -> michelson list option +type peep3 = michelson * michelson * michelson -> michelson list option +type peep4 = michelson * michelson * michelson * michelson -> michelson list option + +let rec peep1 (f : peep1) : michelson list -> bool * michelson list = function + | [] -> (false, []) + | x1 :: xs -> + match f x1 with + | Some xs' -> let (_, xs') = peep1 f (xs' @ xs) in + (true, xs') + | None -> let (changed, xs) = peep1 f xs in + (changed, x1 :: xs) + +let rec peep2 (f : peep2) : michelson list -> bool * michelson list = function + | [] -> (false, []) + | [x] -> (false, [x]) + | x1 :: x2 :: xs -> + match f (x1, x2) with + | Some xs' -> let (_, xs') = peep2 f (xs' @ xs) in + (true, xs') + | None -> let (changed, xs') = peep2 f (x2 :: xs) in + (changed, x1 :: xs') + +let rec peep3 (f : peep3) : michelson list -> bool * michelson list = function + | [] -> (false, []) + | [x] -> (false, [x]) + | [x ; y] -> (false, [x ; y]) + | x1 :: x2 :: x3 :: xs -> + match f (x1, x2, x3) with + | Some xs' -> let (_, xs') = peep3 f (xs' @ xs) in + (true, xs') + | None -> let (changed, xs') = peep3 f (x2 :: x3 :: xs) in + (changed, x1 :: xs') + +let rec peep4 (f : peep4) : michelson list -> bool * michelson list = function + | [] -> (false, []) + | [x] -> (false, [x]) + | [x ; y] -> (false, [x ; y]) + | [x ; y ; z] -> (false, [x ; y ; z]) + | x1 :: x2 :: x3 :: x4 :: xs -> + match f (x1, x2, x3, x4) with + | Some xs' -> let (_, xs') = peep4 f (xs' @ xs) in + (true, xs') + | None -> let (changed, xs') = peep4 f (x2 :: x3 :: x4 :: xs) in + (changed, x1 :: xs') + +(* apply f to all seqs *) +let rec peephole (f : michelson list -> bool * michelson list) : michelson -> bool * michelson = + let peep_args ~seq args = + let (changed, args) = if seq + then f args + else (false, args) in + List.fold_map_acc + (fun changed1 arg -> + let (changed2, arg) = peephole f arg in + (changed1 || changed2, arg)) + changed + args in + function + | Seq (l, args) -> let (changed, args) = peep_args ~seq:true args in + (changed, Seq (l, args)) + | Prim (l, p, args, annot) -> let (changed, args) = peep_args ~seq:false args in + (changed, Prim (l, p, args, annot)) + | x -> (false, x) + +(* apply the optimizers in order *) +let rec sequence_optimizers (fs : (michelson -> bool * michelson) list) : michelson -> bool * michelson = match fs with - | [] -> ok x - | hd :: tl -> ( - let aux : 'a -> 'a result = fun x -> bind (bind_chain tl) (hd x) in - bind aux (ok x) - ) + | [] -> fun x -> (false, x) + | f :: fs -> fun x -> let (changed1, x) = f x in + let (changed2, x) = sequence_optimizers fs x in + (changed1 || changed2, x) -let all_expression = - let all_expr = List.map Helpers.map_expression all in - bind_chain all_expr +(* take the fixed point of an optimizer (!) *) +let rec iterate_optimizer (f : michelson -> bool * michelson) : michelson -> michelson = + fun x -> + let (changed, x) = f x in + if changed + then iterate_optimizer f x + else x + +let opt_drop2 : peep2 = function + (* nullary_op ; DROP ↦ *) + | Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_nullary_op p -> Some [] + (* DUP ; DROP ↦ *) + | Prim (_, I_DUP, _, _), Prim (_, I_DROP, _, _) -> Some [] + (* unary_op ; DROP ↦ DROP *) + | Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_unary_op p -> Some [i_drop] + (* binary_op ; DROP ↦ DROP ; DROP *) + | Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_binary_op p -> Some [i_drop; i_drop] + (* ternary_op ; DROP ↦ DROP ; DROP ; DROP *) + | Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_ternary_op p -> Some [i_drop; i_drop; i_drop] + | _ -> None + +let opt_drop4 : peep4 = function + (* DUP; unary_op; SWAP; DROP ↦ unary_op *) + | Prim (_, I_DUP, _, _), + (Prim (_, p, _, _) as unary_op), + Prim (_, I_SWAP, _, _), + Prim (_, I_DROP, _, _) + when is_unary_op p -> + Some [unary_op] + | _ -> None + +let opt_dip1 : peep1 = function + (* DIP {} ↦ *) + | Prim (_, I_DIP, [Seq (_, [])], _) -> Some [] + (* DIP { nullary_op } ↦ nullary_op ; SWAP *) + | Prim (_, I_DIP, [Seq (_, [(Prim (_, p, _, _) as push)])], _) when is_nullary_op p -> + Some [push ; i_swap] + (* DIP { unary_op } ↦ SWAP ; unary_op ; SWAP *) + | Prim (_, I_DIP, [Seq (_, [(Prim (_, p, _, _) as unary_op)])], _) when is_unary_op p -> + Some [i_swap ; unary_op ; i_swap] + (* saves 5 bytes *) + (* DIP { DROP } ↦ SWAP ; DROP *) + | Prim (_, I_DIP, [Seq (_, [Prim (_, I_DROP, _, _)])], _) -> + Some [i_swap; i_drop] + (* saves 3 bytes *) + (* DIP { DROP ; DROP } ↦ SWAP ; DROP ; SWAP ; DROP *) + | Prim (_, I_DIP, [Seq (_, [Prim (_, I_DROP, _, _) ; Prim (_, I_DROP, _, _)])], _) -> + Some [i_swap; i_drop; i_swap; i_drop] + (* still saves 1 byte *) + (* DIP { DROP ; DROP ; DROP } ↦ SWAP ; DROP ; SWAP ; DROP ; SWAP ; DROP *) + | Prim (_, I_DIP, [Seq (_, [Prim (_, I_DROP, _, _) ; Prim (_, I_DROP, _, _) ; Prim (_, I_DROP, _, _)])], _) -> + Some [i_swap; i_drop; i_swap; i_drop; i_swap; i_drop] + (* after this, DIP { DROP ; ... } is smaller *) + | _ -> None + +let opt_dip2 : peep2 = function + (* combine adjacent dips, shaving a seq and enabling further + optimization inside the DIP: *) + (* DIP { code1 } ; DIP { code2 } ↦ DIP { code1 ; code2 } *) + | Prim (_, I_DIP, [Seq (_, code1)], _), Prim (_, I_DIP, [Seq (_, code2)], _) -> + Some [Prim (0, I_DIP, [Seq (0, code1 @ code2)], [])] + (* DIP { code } ; DROP ↦ DROP ; code *) + | Prim (_, I_DIP, code, _), (Prim (_, I_DROP, _, _) as drop) -> + Some (drop :: code) + (* nullary_op ; DIP { code } ↦ code ; nullary_op *) + | (Prim (_, p, _, _) as nullary_op), Prim (_, I_DIP, [Seq (_, code)], _) when is_nullary_op p -> + Some (code @ [nullary_op]) + (* DIP { code } ; unary_op ↦ unary_op ; DIP { code } *) + | (Prim (_, I_DIP, _, _) as dip), (Prim (_, p, _, _) as unary_op) when is_unary_op p -> + Some [unary_op; dip] + (* unary_op ; DIP { code } ↦ DIP { code } ; unary_op *) + (* | (Prim (_, p, _, _) as unary_op), (Prim (_, I_DIP, _, _) as dip) when is_unary_op p -> + * Some [dip; unary_op] *) + | _ -> None + +let opt_dip3 : peep3 = function + (* replace UNPAIR/UNPIAR with a smaller version *) + (* TODO probably better to implement optimal UNPAIR in the compiler *) + (* DUP ; CAR ; DIP { CDR } ↦ DUP ; CDR ; SWAP ; CAR *) + | Prim (_, I_DUP, _, _), + (Prim (_, (I_CAR | I_CDR), _, _) as proj1), + Prim (_, I_DIP, [Seq (_, [(Prim (_, (I_CAR | I_CDR), _, _) as proj2)])], _) -> + Some [ i_dup ; proj2 ; i_swap ; proj1 ] + | _ -> None + +let opt_swap2 : peep2 = function + (* SWAP ; SWAP ↦ *) + | Prim (_, I_SWAP, _, _), Prim (_, I_SWAP, _, _) -> + Some [] + (* DUP ; SWAP ↦ DUP *) + | Prim (_, I_DUP, _, _), Prim (_, I_SWAP, _, _) -> + Some [i_dup] + (* SWAP ; ADD ↦ ADD *) + (* etc *) + | Prim (_, I_SWAP, _, _), (Prim (_, (I_ADD | I_OR | I_AND | I_XOR), _, _) as comm_op) -> + Some [comm_op] + | _ -> None + +(* This "optimization" deletes dead code produced by the compiler + after a FAILWITH, which is illegal in Michelson. This means we are + thwarting the intent of the Michelson tail fail restriction -- the + LIGO _user_ might accidentally write dead code immediately after a + failure, and we will simply erase it. *) +let rec opt_tail_fail : michelson -> michelson = + function + | Seq (l, args) -> + let rec aux args = + match args with + | [] -> [] + | Prim (l, I_FAILWITH, args, annot) :: _ -> [ Prim (l, I_FAILWITH, args, annot) ] + | arg :: args -> arg :: aux args in + let args = aux args in + Seq (l, List.map opt_tail_fail args) + | Prim (l, p, args, annot) -> + Prim (l, p, List.map opt_tail_fail args, annot) + | x -> x + +let optimize : michelson -> michelson = + fun x -> + let x = use_lambda_instr x in + let x = flatten_seqs x in + let x = opt_tail_fail x in + let optimizers = [ peephole @@ peep2 opt_drop2 ; + peephole @@ peep4 opt_drop4 ; + peephole @@ peep3 opt_dip3 ; + peephole @@ peep2 opt_dip2 ; + peephole @@ peep1 opt_dip1 ; + peephole @@ peep2 opt_swap2 ; + ] in + let x = iterate_optimizer (sequence_optimizers optimizers) x in + x diff --git a/vendors/ligo-utils/simple-utils/x_list.ml b/vendors/ligo-utils/simple-utils/x_list.ml index 67ee96331..a7d36261b 100644 --- a/vendors/ligo-utils/simple-utils/x_list.ml +++ b/vendors/ligo-utils/simple-utils/x_list.ml @@ -22,7 +22,7 @@ let fold_map_right : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> el in snd @@ aux (acc , []) f (List.rev lst) -let fold_map : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list -> ret list = +let fold_map_acc : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list -> acc * ret list = fun f acc lst -> let rec aux (acc , prev) f = function | [] -> (acc , prev) @@ -30,7 +30,12 @@ let fold_map : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list let (acc' , hd') = f acc hd in aux (acc' , hd' :: prev) f tl in - List.rev @@ snd @@ aux (acc , []) f lst + let (acc, lst) = aux (acc , []) f lst in + (acc, List.rev lst) + +let fold_map : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list -> ret list = + fun f acc lst -> + snd (fold_map_acc f acc lst) let fold_right' f init lst = List.fold_left f init (List.rev lst)