some michelson optimizations

This commit is contained in:
Tom Jack 2019-09-21 18:38:45 -07:00
parent a2240e0649
commit a521c01115
5 changed files with 393 additions and 31 deletions

View File

@ -14,7 +14,6 @@
mini_c
compiler
self_michelson
operators
)
(preprocess
(pps ppx_let)

View File

@ -2,19 +2,21 @@ open Trace
open Mini_c
open Tezos_utils
let compile_value : value -> type_value -> Michelson.t result =
Compiler.Program.translate_value
let compile_value : value -> type_value -> Michelson.t result = fun x a ->
let%bind body = Compiler.Program.translate_value x a in
let body = Self_michelson.optimize body in
ok body
let compile_expression_as_value : expression -> _ result = fun e ->
let%bind value = expression_to_value e in
let%bind result = compile_value value e.type_value in
let%bind result = Self_michelson.all_expression result in
let result = Self_michelson.optimize result in
ok result
let compile_expression_as_function : expression -> _ result = fun e ->
let (input , output) = t_unit , e.type_value in
let%bind body = Compiler.Program.translate_expression e Compiler.Environment.empty in
let%bind body = Self_michelson.all_expression body in
let body = Self_michelson.optimize body in
let body = Michelson.(seq [ i_drop ; body ]) in
let%bind (input , output) = bind_map_pair Compiler.Type.Ty.type_ (input , output) in
let open! Compiler.Program in
@ -24,7 +26,7 @@ let compile_function = fun e ->
let%bind (input , output) = get_t_function e.type_value in
let%bind body = get_function e in
let%bind body = compile_value body (t_function input output) in
let%bind body = Self_michelson.all_expression body in
let body = Self_michelson.optimize body in
let%bind (input , output) = bind_map_pair Compiler.Type.Ty.type_ (input , output) in
let open! Compiler.Program in
ok { input ; output ; body }

View File

@ -1,31 +1,387 @@
open Trace
(* This file attempts to optimize Michelson code. The goal is to
reduce the code size (the size of the binary Micheline.)
I have ignored the 'execution gas' completely, because it seems
that users will encounter code size problems earlier and more
often.
*)
open Tezos_micheline.Micheline
open Memory_proto_alpha.Protocol.Michelson_v1_primitives
open Tezos_utils.Michelson
let strip_annots = fun e ->
match e with
| Prim (l , p , lst , _) -> ok @@ Prim (l , p , lst , [])
| x -> ok x
(* `arity p` should be `Some n` only if p is (always) an instruction
which removes n items from the stack and uses them to push 1 item,
without effects other than gas consumption. It must never fail. *)
let strip_nops = fun e ->
match e with
| Seq(l, [Prim (_, I_UNIT, _, _) ; Prim(_, I_DROP, _, _)]) -> ok @@ Seq (l, [])
| x -> ok x
let arity : prim -> int option = function
| I_PACK -> Some 1
| I_UNPACK -> Some 1
| I_BLAKE2B -> Some 1
| I_SHA256 -> Some 1
| I_SHA512 -> Some 1
| I_ABS -> Some 1
| I_ADD -> None (* can fail for tez *)
| I_AMOUNT -> Some 0
| I_AND -> Some 2
| I_BALANCE -> Some 0
| I_CAR -> Some 1
| I_CDR -> Some 1
| I_CHECK_SIGNATURE -> Some 3
| I_COMPARE -> Some 2
| I_CONCAT -> None (* sometimes 1, sometimes 2 :( *)
| I_CONS -> Some 2
| I_CREATE_ACCOUNT -> None (* effects, kind of *)
| I_CREATE_CONTRACT -> None (* effects, kind of *)
| I_IMPLICIT_ACCOUNT -> Some 1
| I_DIP -> None
| I_DROP -> None
| I_DUP -> None
| I_EDIV -> Some 2
| I_EMPTY_MAP -> Some 0
| I_EMPTY_SET -> Some 0
| I_EQ -> Some 1
| I_EXEC -> None (* effects *)
| I_FAILWITH -> None
| I_GE -> Some 1
| I_GET -> Some 2
| I_GT -> Some 1
| I_HASH_KEY -> Some 1
| I_IF -> None
| I_IF_CONS -> None
| I_IF_LEFT -> None
| I_IF_NONE -> None
| I_INT -> Some 1
| I_LAMBDA -> Some 0
| I_LE -> Some 1
| I_LEFT -> Some 1
| I_LOOP -> None
| I_LSL -> Some 1
| I_LSR -> Some 1
| I_LT -> Some 1
| I_MAP -> None
| I_MEM -> Some 2
| I_MUL -> None (* can fail for tez *)
| I_NEG -> Some 1
| I_NEQ -> Some 1
| I_NIL -> Some 0
| I_NONE -> Some 0
| I_NOT -> Some 1
| I_NOW -> Some 0
| I_OR -> Some 2
| I_PAIR -> Some 2
| I_PUSH -> Some 0
| I_RIGHT -> Some 1
| I_SIZE -> Some 1
| I_SOME -> Some 1
| I_SOURCE -> Some 0
| I_SENDER -> Some 0
| I_SELF -> Some 0
| I_SLICE -> Some 3
| I_STEPS_TO_QUOTA -> Some 0
| I_SUB -> None (* can fail for tez *)
| I_SWAP -> None
| I_TRANSFER_TOKENS -> None (* effects, kind of *)
| I_SET_DELEGATE -> None (* effects, kind of *)
| I_UNIT -> Some 0
| I_UPDATE -> Some 3
| I_XOR -> Some 2
| I_ITER -> None
| I_LOOP_LEFT -> None
| I_ADDRESS -> Some 1
| I_CONTRACT -> Some 1
| I_ISNAT -> Some 1
| I_CAST -> None
| I_RENAME -> None
| K_parameter
| K_storage
| K_code
| D_False
| D_Elt
| D_Left
| D_None
| D_Pair
| D_Right
| D_Some
| D_True
| D_Unit
| T_bool
| T_contract
| T_int
| T_key
| T_key_hash
| T_lambda
| T_list
| T_map
| T_big_map
| T_nat
| T_option
| T_or
| T_pair
| T_set
| T_signature
| T_string
| T_bytes
| T_mutez
| T_timestamp
| T_unit
| T_operation
| T_address -> None
let all = [
strip_annots ;
strip_nops ;
]
let is_nullary_op (p : prim) : bool =
match arity p with
| Some 0 -> true
| _ -> false
let rec bind_chain : ('a -> 'a result) list -> 'a -> 'a result = fun fs x ->
let is_unary_op (p : prim) : bool =
match arity p with
| Some 1 -> true
| _ -> false
let is_binary_op (p : prim) : bool =
match arity p with
| Some 2 -> true
| _ -> false
let is_ternary_op (p : prim) : bool =
match arity p with
| Some 3 -> true
| _ -> false
let unseq : michelson -> michelson list = function
| Seq (_, args) -> args
| x -> [x]
(* Replace `PUSH (lambda a b) {}` with `LAMBDA a b {}` *)
let rec use_lambda_instr : michelson -> michelson =
fun x ->
match x with
| Seq (l, args) ->
Seq (l, List.map use_lambda_instr args)
| Prim (_, I_PUSH, [Prim (_, T_lambda, [arg; ret], _); code], _) ->
i_lambda arg ret code
| Prim (_, I_PUSH, _, _) ->
x (* possibly missing some nested lambdas *)
| Prim (l, p, args, annot) ->
Prim (l, p, List.map use_lambda_instr args, annot)
| _ -> x
(* This flattens nested seqs. {} is erased, { { code1 } ; { code2 } }
becomes { code1 ; code2 }, etc. This is important because each seq
costs 5 bytes, for the "Seq" tag and a 4 byte length. *)
let rec flatten_seqs : michelson -> michelson =
fun x ->
match x with
| Seq (l, args) ->
let args = List.concat @@ List.map (fun x -> unseq (flatten_seqs x)) args in
Seq (l, args)
(* Should not flatten literal seq data in PUSH. Ugh... *)
| Prim (_, I_PUSH, _, _) -> x
| Prim (l, p, args, annot) -> Prim (l, p, List.map flatten_seqs args, annot)
| _ -> x
type peep1 = michelson -> michelson list option
type peep2 = michelson * michelson -> michelson list option
type peep3 = michelson * michelson * michelson -> michelson list option
type peep4 = michelson * michelson * michelson * michelson -> michelson list option
let rec peep1 (f : peep1) : michelson list -> bool * michelson list = function
| [] -> (false, [])
| x1 :: xs ->
match f x1 with
| Some xs' -> let (_, xs') = peep1 f (xs' @ xs) in
(true, xs')
| None -> let (changed, xs) = peep1 f xs in
(changed, x1 :: xs)
let rec peep2 (f : peep2) : michelson list -> bool * michelson list = function
| [] -> (false, [])
| [x] -> (false, [x])
| x1 :: x2 :: xs ->
match f (x1, x2) with
| Some xs' -> let (_, xs') = peep2 f (xs' @ xs) in
(true, xs')
| None -> let (changed, xs') = peep2 f (x2 :: xs) in
(changed, x1 :: xs')
let rec peep3 (f : peep3) : michelson list -> bool * michelson list = function
| [] -> (false, [])
| [x] -> (false, [x])
| [x ; y] -> (false, [x ; y])
| x1 :: x2 :: x3 :: xs ->
match f (x1, x2, x3) with
| Some xs' -> let (_, xs') = peep3 f (xs' @ xs) in
(true, xs')
| None -> let (changed, xs') = peep3 f (x2 :: x3 :: xs) in
(changed, x1 :: xs')
let rec peep4 (f : peep4) : michelson list -> bool * michelson list = function
| [] -> (false, [])
| [x] -> (false, [x])
| [x ; y] -> (false, [x ; y])
| [x ; y ; z] -> (false, [x ; y ; z])
| x1 :: x2 :: x3 :: x4 :: xs ->
match f (x1, x2, x3, x4) with
| Some xs' -> let (_, xs') = peep4 f (xs' @ xs) in
(true, xs')
| None -> let (changed, xs') = peep4 f (x2 :: x3 :: x4 :: xs) in
(changed, x1 :: xs')
(* apply f to all seqs *)
let rec peephole (f : michelson list -> bool * michelson list) : michelson -> bool * michelson =
let peep_args ~seq args =
let (changed, args) = if seq
then f args
else (false, args) in
List.fold_map_acc
(fun changed1 arg ->
let (changed2, arg) = peephole f arg in
(changed1 || changed2, arg))
changed
args in
function
| Seq (l, args) -> let (changed, args) = peep_args ~seq:true args in
(changed, Seq (l, args))
| Prim (l, p, args, annot) -> let (changed, args) = peep_args ~seq:false args in
(changed, Prim (l, p, args, annot))
| x -> (false, x)
(* apply the optimizers in order *)
let rec sequence_optimizers (fs : (michelson -> bool * michelson) list) : michelson -> bool * michelson =
match fs with
| [] -> ok x
| hd :: tl -> (
let aux : 'a -> 'a result = fun x -> bind (bind_chain tl) (hd x) in
bind aux (ok x)
)
| [] -> fun x -> (false, x)
| f :: fs -> fun x -> let (changed1, x) = f x in
let (changed2, x) = sequence_optimizers fs x in
(changed1 || changed2, x)
let all_expression =
let all_expr = List.map Helpers.map_expression all in
bind_chain all_expr
(* take the fixed point of an optimizer (!) *)
let rec iterate_optimizer (f : michelson -> bool * michelson) : michelson -> michelson =
fun x ->
let (changed, x) = f x in
if changed
then iterate_optimizer f x
else x
let opt_drop2 : peep2 = function
(* nullary_op ; DROP ↦ *)
| Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_nullary_op p -> Some []
(* DUP ; DROP ↦ *)
| Prim (_, I_DUP, _, _), Prim (_, I_DROP, _, _) -> Some []
(* unary_op ; DROP ↦ DROP *)
| Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_unary_op p -> Some [i_drop]
(* binary_op ; DROP ↦ DROP ; DROP *)
| Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_binary_op p -> Some [i_drop; i_drop]
(* ternary_op ; DROP ↦ DROP ; DROP ; DROP *)
| Prim (_, p, _, _), Prim (_, I_DROP, _, _) when is_ternary_op p -> Some [i_drop; i_drop; i_drop]
| _ -> None
let opt_drop4 : peep4 = function
(* DUP; unary_op; SWAP; DROP ↦ unary_op *)
| Prim (_, I_DUP, _, _),
(Prim (_, p, _, _) as unary_op),
Prim (_, I_SWAP, _, _),
Prim (_, I_DROP, _, _)
when is_unary_op p ->
Some [unary_op]
| _ -> None
let opt_dip1 : peep1 = function
(* DIP {} ↦ *)
| Prim (_, I_DIP, [Seq (_, [])], _) -> Some []
(* DIP { nullary_op } ↦ nullary_op ; SWAP *)
| Prim (_, I_DIP, [Seq (_, [(Prim (_, p, _, _) as push)])], _) when is_nullary_op p ->
Some [push ; i_swap]
(* DIP { unary_op } ↦ SWAP ; unary_op ; SWAP *)
| Prim (_, I_DIP, [Seq (_, [(Prim (_, p, _, _) as unary_op)])], _) when is_unary_op p ->
Some [i_swap ; unary_op ; i_swap]
(* saves 5 bytes *)
(* DIP { DROP } ↦ SWAP ; DROP *)
| Prim (_, I_DIP, [Seq (_, [Prim (_, I_DROP, _, _)])], _) ->
Some [i_swap; i_drop]
(* saves 3 bytes *)
(* DIP { DROP ; DROP } ↦ SWAP ; DROP ; SWAP ; DROP *)
| Prim (_, I_DIP, [Seq (_, [Prim (_, I_DROP, _, _) ; Prim (_, I_DROP, _, _)])], _) ->
Some [i_swap; i_drop; i_swap; i_drop]
(* still saves 1 byte *)
(* DIP { DROP ; DROP ; DROP } ↦ SWAP ; DROP ; SWAP ; DROP ; SWAP ; DROP *)
| Prim (_, I_DIP, [Seq (_, [Prim (_, I_DROP, _, _) ; Prim (_, I_DROP, _, _) ; Prim (_, I_DROP, _, _)])], _) ->
Some [i_swap; i_drop; i_swap; i_drop; i_swap; i_drop]
(* after this, DIP { DROP ; ... } is smaller *)
| _ -> None
let opt_dip2 : peep2 = function
(* combine adjacent dips, shaving a seq and enabling further
optimization inside the DIP: *)
(* DIP { code1 } ; DIP { code2 } ↦ DIP { code1 ; code2 } *)
| Prim (_, I_DIP, [Seq (_, code1)], _), Prim (_, I_DIP, [Seq (_, code2)], _) ->
Some [Prim (0, I_DIP, [Seq (0, code1 @ code2)], [])]
(* DIP { code } ; DROP ↦ DROP ; code *)
| Prim (_, I_DIP, code, _), (Prim (_, I_DROP, _, _) as drop) ->
Some (drop :: code)
(* nullary_op ; DIP { code } ↦ code ; nullary_op *)
| (Prim (_, p, _, _) as nullary_op), Prim (_, I_DIP, [Seq (_, code)], _) when is_nullary_op p ->
Some (code @ [nullary_op])
(* DIP { code } ; unary_op ↦ unary_op ; DIP { code } *)
| (Prim (_, I_DIP, _, _) as dip), (Prim (_, p, _, _) as unary_op) when is_unary_op p ->
Some [unary_op; dip]
(* unary_op ; DIP { code } ↦ DIP { code } ; unary_op *)
(* | (Prim (_, p, _, _) as unary_op), (Prim (_, I_DIP, _, _) as dip) when is_unary_op p ->
* Some [dip; unary_op] *)
| _ -> None
let opt_dip3 : peep3 = function
(* replace UNPAIR/UNPIAR with a smaller version *)
(* TODO probably better to implement optimal UNPAIR in the compiler *)
(* DUP ; CAR ; DIP { CDR } ↦ DUP ; CDR ; SWAP ; CAR *)
| Prim (_, I_DUP, _, _),
(Prim (_, (I_CAR | I_CDR), _, _) as proj1),
Prim (_, I_DIP, [Seq (_, [(Prim (_, (I_CAR | I_CDR), _, _) as proj2)])], _) ->
Some [ i_dup ; proj2 ; i_swap ; proj1 ]
| _ -> None
let opt_swap2 : peep2 = function
(* SWAP ; SWAP ↦ *)
| Prim (_, I_SWAP, _, _), Prim (_, I_SWAP, _, _) ->
Some []
(* DUP ; SWAP ↦ DUP *)
| Prim (_, I_DUP, _, _), Prim (_, I_SWAP, _, _) ->
Some [i_dup]
(* SWAP ; ADD ↦ ADD *)
(* etc *)
| Prim (_, I_SWAP, _, _), (Prim (_, (I_ADD | I_OR | I_AND | I_XOR), _, _) as comm_op) ->
Some [comm_op]
| _ -> None
(* This "optimization" deletes dead code produced by the compiler
after a FAILWITH, which is illegal in Michelson. This means we are
thwarting the intent of the Michelson tail fail restriction -- the
LIGO _user_ might accidentally write dead code immediately after a
failure, and we will simply erase it. *)
let rec opt_tail_fail : michelson -> michelson =
function
| Seq (l, args) ->
let rec aux args =
match args with
| [] -> []
| Prim (l, I_FAILWITH, args, annot) :: _ -> [ Prim (l, I_FAILWITH, args, annot) ]
| arg :: args -> arg :: aux args in
let args = aux args in
Seq (l, List.map opt_tail_fail args)
| Prim (l, p, args, annot) ->
Prim (l, p, List.map opt_tail_fail args, annot)
| x -> x
let optimize : michelson -> michelson =
fun x ->
let x = use_lambda_instr x in
let x = flatten_seqs x in
let x = opt_tail_fail x in
let optimizers = [ peephole @@ peep2 opt_drop2 ;
peephole @@ peep4 opt_drop4 ;
peephole @@ peep3 opt_dip3 ;
peephole @@ peep2 opt_dip2 ;
peephole @@ peep1 opt_dip1 ;
peephole @@ peep2 opt_swap2 ;
] in
let x = iterate_optimizer (sequence_optimizers optimizers) x in
x

View File

@ -22,7 +22,7 @@ let fold_map_right : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> el
in
snd @@ aux (acc , []) f (List.rev lst)
let fold_map : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list -> ret list =
let fold_map_acc : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list -> acc * ret list =
fun f acc lst ->
let rec aux (acc , prev) f = function
| [] -> (acc , prev)
@ -30,7 +30,12 @@ let fold_map : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list
let (acc' , hd') = f acc hd in
aux (acc' , hd' :: prev) f tl
in
List.rev @@ snd @@ aux (acc , []) f lst
let (acc, lst) = aux (acc , []) f lst in
(acc, List.rev lst)
let fold_map : type acc ele ret . (acc -> ele -> (acc * ret)) -> acc -> ele list -> ret list =
fun f acc lst ->
snd (fold_map_acc f acc lst)
let fold_right' f init lst = List.fold_left f init (List.rev lst)