Michelson: new parser and macro expander.

This commit is contained in:
Benjamin Canou 2017-06-14 23:35:24 +02:00
parent 09f95c6b8f
commit f50a37e6f4
20 changed files with 1897 additions and 1180 deletions

View File

@ -37,7 +37,7 @@ ${OBJS} ${OBJS_DEPS}: TARGET="(client_$(PROTO_VERSION).cmx)"
${OBJS}: EXTRA_OCAMLFLAGS = -for-pack Client_$(PROTO_VERSION)
../client_$(PROTO_VERSION).cmx: EXTRA_OCAMLFLAGS =
${OBJS} ${OBJS_DEPS} ../client_$(PROTO_VERSION).cmx: \
PACKAGES=lwt ocplib-json-typed sodium ocplib-ocamlres
PACKAGES=lwt ocplib-json-typed sodium ocplib-ocamlres uutf
${OBJS} ${OBJS_DEPS} ../client_$(PROTO_VERSION).cmx: \
../../../proto/client_embedded_proto_${PROTO_VERSION}.cmxa
@ -58,14 +58,6 @@ ${OBJS} ${OBJS_DEPS} ../client_$(PROTO_VERSION).cmx: \
@echo OCAMLOPT ${TARGET} $(notdir $@)
@$(OCAMLOPT) ${OCAMLFLAGS} -c $<
%.ml: %.mll
@echo OCAMLLEX ${TARGET} $(notdir $@)
@$(OCAMLLEX) $<
%.ml %.mli: %.mly
@echo MENHIR ${TARGET} $(notdir $@)
@$(MENHIR) --explain $<
.PHONY: clean
clean::
-rm -f ../client_$(PROTO_VERSION).cm* ../client_$(PROTO_VERSION).o

View File

@ -2,7 +2,8 @@
PROTO_VERSION := alpha
CLIENT_INTFS := \
concrete_parser.mli \
michelson_macros.mli \
michelson_parser.mli \
client_proto_rpcs.mli \
client_proto_args.mli \
client_proto_contracts.mli \
@ -13,7 +14,8 @@ CLIENT_INTFS := \
CLIENT_IMPLS := \
script_located_ir.ml \
concrete_parser.ml concrete_lexer.ml \
michelson_macros.ml \
michelson_parser.ml \
client_proto_rpcs.ml \
client_proto_args.ml \
client_proto_contracts.ml \
@ -28,10 +30,7 @@ include ../Makefile.shared
${OBJS}: OPENED_MODULES += Environment Tezos_context
predepend: concrete_parser.ml concrete_lexer.ml
.PHONY: clean
clean::
-rm -f baker/*.cm* baker/*~ baker/*.o baker/*.a
-rm -f baker/*.deps baker/*.deps.byte
-rm -f concrete_lexer.ml concrete_parser.ml concrete_parser.mli

View File

@ -10,30 +10,75 @@
module Ed25519 = Environment.Ed25519
open Client_proto_args
let report_parse_error _prefix exn _lexbuf =
let report_parse_error prefix exn =
let open Lexing in
let open Script_located_ir in
let print_loc ppf (s, e) =
if s.line = e.line then
if s.column = e.column then
let print_point ppf { line ; column } =
Format.fprintf ppf
"at line %d character %d"
s.line s.column
line column in
let print_token ppf = function
| Michelson_parser.Open_paren
| Michelson_parser.Close_paren ->
Format.fprintf ppf "parenthesis"
| Michelson_parser.Open_brace
| Michelson_parser.Close_brace ->
Format.fprintf ppf "curly brace"
| Michelson_parser.String _ ->
Format.fprintf ppf "string constant"
| Michelson_parser.Int _ ->
Format.fprintf ppf "integer constant"
| Michelson_parser.Ident _ ->
Format.fprintf ppf "identifier"
| Michelson_parser.Annot _ ->
Format.fprintf ppf "annotation"
| Michelson_parser.Comment _
| Michelson_parser.Eol_comment _ ->
Format.fprintf ppf "comment"
| Michelson_parser.Semi ->
Format.fprintf ppf "semi colon" in
let print_loc ppf loc =
Format.fprintf ppf "in %s, " prefix ;
if loc.start.line = loc.stop.line then
if loc.start.column = loc.stop.column then
Format.fprintf ppf
"at line %d character %d"
loc.start.line loc.start.column
else
Format.fprintf ppf
"at line %d characters %d to %d"
s.line s.column e.column
loc.start.line loc.start.column loc.stop.column
else
Format.fprintf ppf
"from line %d character %d to line %d character %d"
s.line s.column e.line e.column in
loc.start.line loc.start.column loc.stop.line loc.stop.column in
match exn with
| Missing_program_field n ->
| Script_located_ir.Missing_program_field n ->
failwith "missing script %s" n
| Illegal_character (loc, c) ->
failwith "%a, illegal character %C" print_loc loc c
| Illegal_escape (loc, c) ->
failwith "%a, illegal escape sequence %S" print_loc loc c
| Michelson_parser.Invalid_utf8_sequence (point, str) ->
failwith "%a, invalid UTF-8 sequence %S" print_point point str
| Michelson_parser.Unexpected_character (point, str) ->
failwith "%a, unexpected character %s" print_point point str
| Michelson_parser.Undefined_escape_character (point, str) ->
failwith "%a, undefined escape character \"%s\"" print_point point str
| Michelson_parser.Missing_break_after_number point ->
failwith "%a, missing break" print_point point
| Michelson_parser.Unterminated_string loc ->
failwith "%a, unterminated string" print_loc loc
| Michelson_parser.Unterminated_integer loc ->
failwith "%a, unterminated integer" print_loc loc
| Michelson_parser.Unterminated_comment loc ->
failwith "%a, unterminated comment" print_loc loc
| Michelson_parser.Unclosed { loc ; token } ->
failwith "%a, unclosed %a" print_loc loc print_token token
| Michelson_parser.Unexpected { loc ; token } ->
failwith "%a, unexpected %a" print_loc loc print_token token
| Michelson_parser.Extra { loc ; token } ->
failwith "%a, extra %a" print_loc loc print_token token
| Michelson_parser.Misaligned node ->
failwith "%a, misaligned expression" print_loc (node_location node)
| Michelson_parser.Empty ->
failwith "empty expression"
| Failure s ->
failwith "%s" s
| exn ->
@ -45,41 +90,54 @@ let print_location_mark ppf = function
let no_locations _ = None
let print_annotation ppf = function
| None -> ()
| Some a -> Format.fprintf ppf " %s@," a
let rec print_expr_unwrapped_help emacs locations ppf = function
| Script.Prim (loc, name, []) ->
| Script.Prim (loc, name, [], None) ->
begin match locations loc with
| None -> Format.fprintf ppf "%s" name
| Some _ as l -> Format.fprintf ppf "(%s%a)" name print_location_mark l
| Some _ as l -> Format.fprintf ppf "%s%a" name print_location_mark l
end
| Script.Prim (loc, name, args) ->
Format.fprintf ppf (if emacs then "%s%a %a" else "@[<hov 2>%s%a@ %a@]")
name print_location_mark (locations loc)
| Script.Prim (loc, name, args, (Some _ as annot)) ->
Format.fprintf ppf (if emacs then "%s%a %a" else "@[<hov 2>%s%a@ %a]")
name print_location_mark (locations loc) print_annotation annot
| Script.Prim (loc, name, args, annot) ->
Format.fprintf ppf "@[<hv 2>%s%a%a@ %a@]"
name
print_location_mark (locations loc)
print_annotation annot
(Format.pp_print_list
~pp_sep: Format.pp_print_space
(print_expr_help emacs locations))
args
| Script.Seq (loc, []) ->
| Script.Seq (loc, [], None) ->
begin match locations loc with
| None -> Format.fprintf ppf "{}"
| Some _ as l -> Format.fprintf ppf "{%a }" print_location_mark l
end
| Script.Seq (loc, exprs) ->
| Script.Seq (loc, exprs, annot) ->
begin match locations loc with
| None -> Format.fprintf ppf "@[<hv 2>{ "
| Some _ as l -> Format.fprintf ppf "@[<hv 2>{%a@ " print_location_mark l
end ;
Format.fprintf ppf "%a@] }"
Format.fprintf ppf "%a%a@] }"
(Format.pp_print_list
~pp_sep: (fun ppf () -> Format.fprintf ppf " ;@ ")
(print_expr_unwrapped_help emacs locations))
exprs
print_annotation annot
| Script.Int (loc, n) ->
Format.fprintf ppf "%s%a" n print_location_mark (locations loc)
| Script.String (loc, s) ->
Format.fprintf ppf "%S%a" s print_location_mark (locations loc)
and print_expr_help emacs locations ppf = function
| Script.Prim (_, _, _ :: _) as expr ->
| Script.Prim (_, _, _ :: _, _)
| Script.Prim (_, _, [], Some _) as expr ->
Format.fprintf ppf "(%a)" (print_expr_unwrapped_help emacs locations) expr
| Script.Prim (loc, _, [], None) as expr when locations loc <> None ->
Format.fprintf ppf "(%a)" (print_expr_unwrapped_help emacs locations) expr
| expr -> print_expr_unwrapped_help emacs locations ppf expr
@ -103,19 +161,33 @@ let print_stack = print_stack_help false
let print_emacs_stack = print_stack_help true
let print_typed_code locations ppf (expr, type_map) =
let print_stack ppf = function
| [] -> Format.fprintf ppf "[]"
| more ->
Format.fprintf ppf "@[<hov 2>[ %a ]@]"
(Format.pp_print_list
~pp_sep: (fun ppf () -> Format.fprintf ppf " :@ ")
(print_expr_unwrapped no_locations))
more in
let print_annot ppf = function
| None -> ()
| Some annot -> Format.fprintf ppf " %s@," annot in
let rec print_typed_code_unwrapped ppf expr =
match expr with
| Script.Prim (loc, name, []) ->
| Script.Prim (loc, name, [], None) ->
Format.fprintf ppf "%s%a"
name print_location_mark (locations loc)
| Script.Prim (loc, name, args) ->
Format.fprintf ppf "@[<hov 2>%s%a@ %a@]"
name print_location_mark (locations loc)
| Script.Prim (loc, name, [], Some annot) ->
Format.fprintf ppf "(%s %s%a)"
name annot print_location_mark (locations loc)
| Script.Prim (loc, name, args, annot) ->
Format.fprintf ppf "@[<v 2>%s%a%a@ %a@]"
name print_annot annot print_location_mark (locations loc)
(Format.pp_print_list
~pp_sep: Format.pp_print_space
print_typed_code)
args
| Script.Seq (loc, []) ->
| Script.Seq (loc, [], None) ->
begin match List.assoc loc type_map with
| exception Not_found -> Format.fprintf ppf "{}"
| (first, _) ->
@ -127,17 +199,33 @@ let print_typed_code locations ppf (expr, type_map) =
Format.fprintf ppf "{%a %a }"
print_location_mark l print_stack first
end
| Script.Seq (loc, exprs) ->
begin match locations loc with
| Script.Seq (loc, [], Some annot) ->
begin match List.assoc loc type_map with
| exception Not_found -> Format.fprintf ppf "{ %@%s }" annot
| (first, _) ->
match locations loc with
| None ->
Format.fprintf ppf "@[<v 2>{ "
Format.fprintf ppf "{ %@%s } /* %a */"
annot
print_stack first
| Some _ as l ->
Format.fprintf ppf "@[<v 2>{%a@,"
Format.fprintf ppf "{ %@%s%a %a }"
annot print_location_mark l print_stack first
end
| Script.Seq (loc, exprs, annot) ->
begin match locations loc, annot with
| None, None ->
Format.fprintf ppf "@[<v 2>{ "
| None, Some annot ->
Format.fprintf ppf "@[<v 2>{ %@%s@," annot
| Some _ as l, _ ->
Format.fprintf ppf "@[<v 2>{%a%a@,"
print_annot annot
print_location_mark l
end ;
let rec loop = function
| [] -> assert false
| [ Script.Int (loc, _) | String (loc, _) | Prim (loc, _, _) as expr ] ->
| [ Script.Int (loc, _) | String (loc, _) | Prim (loc, _, _, _) as expr ] ->
begin match List.assoc loc type_map with
| exception Not_found ->
Format.fprintf ppf "%a }@]"
@ -148,7 +236,7 @@ let print_typed_code locations ppf (expr, type_map) =
print_typed_code_unwrapped expr
print_stack after
end ;
| Script.Int (loc, _) | String (loc, _) | Prim (loc, _, _) as expr :: rest ->
| Script.Int (loc, _) | String (loc, _) | Prim (loc, _, _, _) as expr :: rest ->
begin match List.assoc loc type_map with
| exception Not_found ->
Format.fprintf ppf "%a ;@,"
@ -160,10 +248,10 @@ let print_typed_code locations ppf (expr, type_map) =
print_typed_code_unwrapped expr ;
loop rest
end ;
| [ Seq (_, _) as expr ] ->
| [ Seq (_, _, _) as expr ] ->
Format.fprintf ppf "%a }@]"
print_typed_code_unwrapped expr
| Seq (_, _) as expr :: rest ->
| Seq (_, _, _) as expr :: rest ->
Format.fprintf ppf "%a@,"
print_typed_code_unwrapped expr ;
loop rest in
@ -173,20 +261,18 @@ let print_typed_code locations ppf (expr, type_map) =
| Script.String (loc, s) ->
Format.fprintf ppf "%S%a" s print_location_mark (locations loc)
and print_typed_code ppf = function
| Script.Prim (_, _, _ :: _) as expr ->
| Script.Prim (_, _, _ :: _, _) as expr ->
Format.fprintf ppf "(%a)" print_typed_code_unwrapped expr
| expr -> print_typed_code_unwrapped ppf expr in
print_typed_code_unwrapped ppf expr
let print_program locations ppf ((c : Script.code), type_map) =
Format.fprintf ppf
"@[<v 0>@[<hov 2>storage@ %a ;@]@,\
@[<hov 2>parameter@ %a ;@]@,\
@[<hov 2>return@ %a ;@]@,\
"@[<v 0>%a ;@,%a ;@,%a ;@,\
@[<hov 2>code@ %a@]@]"
(print_expr no_locations) c.storage_type
(print_expr no_locations) c.arg_type
(print_expr no_locations) c.ret_type
(print_expr_unwrapped no_locations) (Script.Prim (-1, "storage", [ c.storage_type ], None))
(print_expr_unwrapped no_locations) (Script.Prim (-1, "parameter", [ c.arg_type ], None))
(print_expr_unwrapped no_locations) (Script.Prim (-1, "return", [ c.ret_type ], None))
(print_typed_code locations) (c.code, type_map)
let collect_error_locations errs =
@ -489,91 +575,101 @@ type 'a parsed =
loc_table : (string * (int * Script_located_ir.location) list) list }
let parse_program source =
let lexbuf = Lexing.from_string source in
try
return
(Concrete_parser.tree Concrete_lexer.(token (init_state ())) lexbuf |> fun fields ->
let fields = Michelson_parser.parse_toplevel (Michelson_parser.tokenize source) in
let fields = List.map Script_located_ir.strip_locations fields in
let rec get_field n = function
| Script_located_ir.Prim (_, pn, [ ctns ]) :: _ when n = pn -> ctns
| (Script.Prim (_, pn, [ ctns ], _), locs) :: _ when n = pn -> ctns, locs
| _ :: rest -> get_field n rest
| [] -> raise (Script_located_ir.Missing_program_field n) in
let code, code_loc_table =
Script_located_ir.strip_locations (get_field "code" fields) in
let arg_type, parameter_loc_table =
Script_located_ir.strip_locations (get_field "parameter" fields) in
let ret_type, return_loc_table =
Script_located_ir.strip_locations (get_field "return" fields) in
let storage_type, storage_loc_table =
Script_located_ir.strip_locations (get_field "storage" fields) in
let code, code_loc_table = get_field "code" fields in
let arg_type, parameter_loc_table = get_field "parameter" fields in
let ret_type, return_loc_table = get_field "return" fields in
let storage_type, storage_loc_table = get_field "storage" fields in
let ast = Script.{ code ; arg_type ; ret_type ; storage_type } in
let loc_table =
[ "code", code_loc_table ;
"parameter", parameter_loc_table ;
"return", return_loc_table ;
"storage", storage_loc_table ] in
{ ast ; source ; loc_table })
return { ast ; source ; loc_table }
with
| exn -> report_parse_error "program: " exn lexbuf
| exn -> report_parse_error "program" exn
let parse_data source =
let lexbuf = Lexing.from_string source in
try
match Concrete_parser.tree Concrete_lexer.(token (init_state ())) lexbuf with
| [node] ->
let node = Michelson_parser.parse_expression (Michelson_parser.tokenize source) in
let ast, loc_table = Script_located_ir.strip_locations node in
let loc_table = [ "data", loc_table ] in
return { ast ; source ; loc_table }
| _ -> failwith "single data expression expected"
with
| exn -> report_parse_error "data: " exn lexbuf
| exn -> report_parse_error "data" exn
let parse_data_type source =
let lexbuf = Lexing.from_string source in
try
match Concrete_parser.tree Concrete_lexer.(token (init_state ())) lexbuf with
| [node] ->
let node = Michelson_parser.parse_expression (Michelson_parser.tokenize source) in
let ast, loc_table = Script_located_ir.strip_locations node in
let loc_table = [ "data", loc_table ] in
return { ast ; source ; loc_table }
| _ -> failwith "single type expression expected"
with
| exn -> report_parse_error "data_type: " exn lexbuf
| exn -> report_parse_error "type" exn
let unexpand_macros type_map (program : Script.code) =
let open Script in
let rec caddr type_map acc = function
| [] -> Some (List.rev acc)
| Prim (loc, "CAR" , []) :: rest when List.mem_assoc loc type_map ->
caddr type_map ((loc, "A") :: acc) rest
| Prim (loc, "CDR" , []) :: rest when List.mem_assoc loc type_map ->
caddr type_map ((loc, "D") :: acc) rest
| _ -> None in
let rec unexpand type_map node =
match node with
| Seq (loc, l) ->
begin match caddr type_map [] l with
| None | Some [] ->
let type_map, l =
let rec first_prim_in_sequence = function
| Int _ | String _ -> None
| Prim (loc, _, _, _) -> Some loc
| Seq (_, children, _) ->
let rec loop = function
| [] -> None
| child :: children ->
match first_prim_in_sequence child with
| None -> loop children
| Some loc -> Some loc in
loop children in
let rec last_prim_in_sequence = function
| Int _ | String _ -> None
| Prim (loc, _, _, _) -> Some loc
| Seq (_, children, _) ->
let rec reversed = function
| [] -> None
| child :: children ->
match last_prim_in_sequence child with
| None -> reversed children
| Some loc -> Some loc in
reversed (List.rev children) in
let rec unexpand type_map original =
match Michelson_macros.unexpand original with
| Seq (loc, children, annot) ->
let type_map, children =
List.fold_left
(fun (type_map, acc) e ->
let type_map, e = unexpand type_map e in
type_map, e :: acc)
(type_map, [])
l in
type_map, Seq (loc, List.rev l)
| Some l ->
let locs, steps = List.split l in
let name = "C" ^ String.concat "" steps ^ "R" in
let first, last = List.hd locs, List.hd (List.rev locs) in
let (before, _) = List.assoc first type_map in
let (_, after) = List.assoc last type_map in
(fun (type_map, acc) node ->
let type_map, node = unexpand type_map node in
type_map, node :: acc)
(type_map, []) children in
type_map, Seq (loc, List.rev children, annot)
| Prim (loc, name, children, annot) ->
let type_map =
List.filter
(fun (loc, _) -> not (List.mem loc locs))
type_map in
let type_map = (loc, (before, after)) :: type_map in
type_map, Prim (loc, name, [])
match original with
| Seq _ ->
if List.mem_assoc loc type_map then
type_map
else
begin match first_prim_in_sequence original, last_prim_in_sequence original with
| None, _ | _, None -> type_map
| Some floc, Some lloc ->
let fty, _ = List.assoc floc type_map in
let _, lty = List.assoc lloc type_map in
(loc, (fty, lty)) :: type_map
end
| _ -> type_map in
let type_map, children =
List.fold_left
(fun (type_map, acc) node ->
let type_map, node = unexpand type_map node in
type_map, node :: acc)
(type_map, []) children in
type_map, Prim (loc, name, List.rev children, annot)
| oth -> type_map, oth in
let type_map, code = unexpand type_map program.code in
type_map, { program with code }
@ -745,15 +841,13 @@ let commands () =
cctxt.message
"((types . (%a)) (errors . (%a)))"
(Format.pp_print_list
(fun ppf (({ Script_located_ir.point = s },
{ Script_located_ir.point = e }),
(fun ppf ({ Script_located_ir.start = { point = s } ; stop = { point = e } },
bef, aft) ->
Format.fprintf ppf "(%d %d %a %a)" (s + 1) (e + 1)
print_emacs_stack bef print_emacs_stack aft))
types
(Format.pp_print_list
(fun ppf (({ Script_located_ir.point = s },
{ Script_located_ir.point = e }),
(fun ppf ({ Script_located_ir.start = { point = s } ; stop = { point = e } },
err) ->
Format.fprintf ppf "(%d %d %S)" (s + 1) (e + 1) err))
errors >>= fun () ->

View File

@ -1,368 +0,0 @@
{
open Concrete_parser
open Script_located_ir
let count_nl s =
let c = ref 0 in
for i = 0 to String.length s - 1 do
if Compare.Char.(s.[i] = '\010') then
incr c
done;
!c
let update_loc lexbuf nl indent =
let open Lexing in
let lcp = lexbuf.lex_curr_p in
lexbuf.lex_curr_p <- { lcp with
pos_lnum = lcp.pos_lnum + nl;
pos_bol = lcp.pos_cnum - indent;
}
let may_update_loc lexbuf nl indent =
if Compare.Int.(nl <> 0) then update_loc lexbuf nl indent
let start_offset lexbuf =
let open Lexing in
let lsp = lexbuf.lex_start_p in
lsp.pos_cnum - lsp.pos_bol
let end_offset lexbuf =
let open Lexing in
let lcp = lexbuf.lex_curr_p in
lcp.pos_cnum - lcp.pos_bol
let curr_location lexbuf =
lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p
let pos pos =
{ line = pos.Lexing.pos_lnum ;
column = pos.Lexing.pos_cnum - pos.Lexing.pos_bol ;
point = pos.Lexing.pos_cnum }
let pos2 (start, stop) =
pos start, pos stop
(* To translate escape sequences *)
let char_for_backslash = function
| 'n' -> '\010'
| 'r' -> '\013'
| 'b' -> '\008'
| 't' -> '\009'
| c -> c
let char_for_decimal_code lexbuf i =
let c = 100 * (int_of_char(Lexing.lexeme_char lexbuf i) - 48) +
10 * (int_of_char(Lexing.lexeme_char lexbuf (i+1)) - 48) +
(int_of_char(Lexing.lexeme_char lexbuf (i+2)) - 48) in
if Compare.Int.(c < 0 || c > 255) then
raise @@ Illegal_escape (pos2 (curr_location lexbuf), Lexing.lexeme lexbuf)
else char_of_int c
let char_for_hexadecimal_code lexbuf i =
let d1 = int_of_char (Lexing.lexeme_char lexbuf i) in
let val1 = if Compare.Int.(d1 >= 97) then d1 - 87
else if Compare.Int.(d1 >= 65) then d1 - 55
else d1 - 48
in
let d2 = int_of_char (Lexing.lexeme_char lexbuf (i+1)) in
let val2 = if Compare.Int.(d2 >= 97) then d2 - 87
else if Compare.Int.(d2 >= 65) then d2 - 55
else d2 - 48
in
char_of_int (val1 * 16 + val2)
(** Lexer state *)
type state = {
mutable indent_stack:
(int * [`Indent | `Open of (char * (Lexing.position * Lexing.position)) ]) list;
mutable buffer: Concrete_parser.token list;
mutable string_buff: bytes;
mutable string_index: int;
mutable string_start_loc: Lexing.position * Lexing.position;
mutable comment_start_loc: (Lexing.position * Lexing.position) list;
}
let init_state () = {
indent_stack = [];
buffer = [];
string_index = 0;
string_buff = Bytes.create 256;
string_start_loc = Lexing.dummy_pos, Lexing.dummy_pos;
comment_start_loc = [];
}
(** String helpers *)
let reset_string_buffer st =
st.string_buff <- Bytes.create 256;
st.string_index <- 0
let store_string_char st c =
if st.string_index >= Bytes.length st.string_buff then begin
let new_buff = Bytes.create (Bytes.length (st.string_buff) * 2) in
Bytes.blit st.string_buff 0 new_buff 0 (Bytes.length st.string_buff);
st.string_buff <- new_buff
end;
Bytes.set st.string_buff st.string_index c;
st.string_index <- st.string_index + 1
let store_string st s =
for i = 0 to String.length s - 1 do
store_string_char st s.[i];
done
let store_lexeme st lexbuf =
store_string st (Lexing.lexeme lexbuf)
let get_stored_string st =
let s = Bytes.sub st.string_buff 0 st.string_index in
st.string_buff <- Bytes.create 256;
Bytes.to_string s
(** Indentation helpers *)
let first_token st =
match st.indent_stack with
| [] -> true
| _ :: _ -> false
let starting_offset (start, _) =
let open Lexing in
start.pos_cnum - start.pos_bol
let rec pop_indent st loc xs i =
match xs with
| [] -> assert false
| ((x, _) :: _) as xs when Compare.Int.(x = i) ->
st.indent_stack <- xs;
[NEWLINE]
| (x, `Indent) :: xs ->
if Compare.Int.(x > i) then
DEDENT :: pop_indent st loc xs i
else
raise @@ Invalid_indentation (pos2 loc)
| (_, `Open (c, opener_loc)) :: _ ->
let opener_offset = starting_offset opener_loc in
if Compare.Int.(i > opener_offset) then
raise @@ Invalid_indentation_in_block (pos2 loc, c, pos2 opener_loc)
else
raise @@ Unclosed (pos2 loc, c, pos2 opener_loc)
let indent_token st loc =
let i = starting_offset loc in
match st.indent_stack with
| (x, `Indent) :: xs when Compare.Int.(x > i) ->
DEDENT :: pop_indent st loc xs i;
| (x, `Open (c, opener_loc)) :: _ when Compare.Int.(x > i) ->
let opener_offset = starting_offset opener_loc in
if Compare.Int.(i > opener_offset) then
raise @@ Invalid_indentation_in_block (pos2 loc, c, pos2 opener_loc)
else
raise @@ Unclosed (pos2 loc, c, pos2 opener_loc)
| (x, _) :: _ when Compare.Int.(x = i) ->
[NEWLINE]
| [] | (_, _) :: _ (* when Compare.Int.(x < i) *) ->
st.indent_stack <- (i, `Indent) :: st.indent_stack;
[INDENT]
let open_block st opener opener_loc token_offset =
let opener_offset = starting_offset opener_loc in
if Compare.Int.(token_offset <= opener_offset) then
raise @@ Invalid_indentation_after_opener (pos2 opener_loc, opener) ;
st.indent_stack <-
(token_offset, `Open (opener, opener_loc)) :: st.indent_stack;
match opener with
| '{' -> [LBRACE]
| '(' -> [LPAREN]
| _ -> assert false
let close_block st bol closer closer_loc =
let closer_offset = starting_offset closer_loc in
let rec pop xs =
match xs with
| [] -> raise @@ Unopened (pos2 closer_loc, closer)
| (_, `Indent) :: xs -> DEDENT :: pop xs
| (_, `Open (opener, opener_loc)) :: xs ->
let opener_offset = starting_offset opener_loc in
if bol && Compare.Int.(opener_offset <> closer_offset) then
raise @@
Unaligned_closer (pos2 closer_loc, opener, closer, pos2 opener_loc) ;
st.indent_stack <- xs;
[ match opener, closer with
| '{', '}' -> RBRACE
| '(', ')' -> RPAREN
| _ ->
raise @@ Unclosed (pos2 closer_loc, opener, pos2 opener_loc) ]
in
pop st.indent_stack
}
let eol_comment = '#' [^ '\010'] *
let newline = eol_comment ? ('\010' | "\013\010" )
let space = [' ']
let firstidentchar = ['A'-'Z' 'a'-'z' '_']
let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']
let decimal_literal =
['0'-'9'] ['0'-'9' '_']*
let hex_literal =
'0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
let oct_literal =
'0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
let bin_literal =
'0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
let int_literal =
'-' ? ( decimal_literal | hex_literal | oct_literal | bin_literal)
rule indent_tokens st nl = parse
| space { indent_tokens st nl lexbuf }
| newline { Lexing.new_line lexbuf; indent_tokens st (nl + 1) lexbuf }
| ""
{ let bol = nl <> 0 || first_token st in
if bol then indent_token st (curr_location lexbuf) else [] }
| "/*"
{ st.comment_start_loc <- [curr_location lexbuf];
comment st nl lexbuf }
| ('{' | '(' as opener)
{ let opener_loc = curr_location lexbuf in
let token_offset = next_token_indent st lexbuf in
let bol = nl <> 0 || first_token st in
let prefix =
if bol then indent_token st opener_loc else [] in
prefix @ open_block st opener opener_loc token_offset }
| ('}' | ')' as closer)
{ let closer_loc = curr_location lexbuf in
let bol = Compare.Int.(nl <> 0) in
close_block st bol closer closer_loc }
| eof
{ List.map
(function
| (_, `Indent) -> DEDENT
| (_, `Open (c, loc)) ->
raise @@ Unclosed (pos2 (curr_location lexbuf), c, pos2 loc))
st.indent_stack
@ [EOF]
}
and comment st nl = parse
| "/*" { st.comment_start_loc <-
curr_location lexbuf :: st.comment_start_loc;
comment st nl lexbuf }
| "*/" { match st.comment_start_loc with
| [] -> assert false
| [_] -> indent_tokens st nl lexbuf
| _ :: xs -> st.comment_start_loc <- xs; comment st nl lexbuf }
| "\"" { st.string_start_loc <- curr_location lexbuf;
let nl =
try string st nl lexbuf
with Unterminated_string str_start ->
match st.comment_start_loc with
| [] -> assert false
| loc :: _ ->
let start = List.hd (List.rev st.comment_start_loc) in
raise @@
Unterminated_string_in_comment (pos2 loc, pos2 start, str_start)
in
comment st nl lexbuf }
| newline { Lexing.new_line lexbuf; comment st (nl+1) lexbuf }
| eof { match st.comment_start_loc with
| [] -> assert false
| loc :: _ ->
let start = List.hd (List.rev st.comment_start_loc) in
raise @@ Unterminated_comment (pos2 loc, pos2 start) }
| _ { comment st nl lexbuf }
(** Eat spacings and return the next token offset. *)
and next_token_indent st = parse
| space { next_token_indent st lexbuf }
| newline { Lexing.new_line lexbuf; next_token_indent st lexbuf }
| "" { end_offset lexbuf }
(** The lexer for non-indentation tokens.
It should not care about 'space', 'newline', '{}()' nor comments. *)
and raw_token st = parse
| ";" { SEMICOLON }
| firstidentchar identchar *
{ PRIM (Lexing.lexeme lexbuf) }
| int_literal
{ INT (Lexing.lexeme lexbuf) }
| "\""
{ reset_string_buffer st;
let string_start = lexbuf.Lexing.lex_start_p in
st.string_start_loc <- curr_location lexbuf;
ignore (string st 0 lexbuf);
lexbuf.Lexing.lex_start_p <- string_start;
STRING (get_stored_string st) }
| _
{ raise (Illegal_character (pos2 (curr_location lexbuf),
Lexing.lexeme_char lexbuf 0))
}
and string st nl = parse
'"'
{ nl }
| '\\' newline ([' ' '\t'] * as space)
{ update_loc lexbuf 1 (String.length space);
string st nl lexbuf
}
| '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
{ store_string_char st (char_for_backslash(Lexing.lexeme_char lexbuf 1));
string st nl lexbuf }
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
{ store_string_char st (char_for_decimal_code lexbuf 1);
string st nl lexbuf }
| '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
{ store_string_char st (char_for_hexadecimal_code lexbuf 2);
string st nl lexbuf }
| newline
{ match st.comment_start_loc with
| [] -> raise @@ Newline_in_string (pos2 (curr_location lexbuf))
| _ -> Lexing.new_line lexbuf; string st (nl+1) lexbuf }
| eof
{ raise @@ Unterminated_string (pos2 st.string_start_loc) }
| _
{ store_string_char st (Lexing.lexeme_char lexbuf 0);
string st nl lexbuf }
{
let rec token st lexbuf =
match st.buffer with
| tok :: tokens ->
st.buffer <- tokens;
tok
| [] ->
match indent_tokens st 0 lexbuf with
| [] -> raw_token st lexbuf
| _ :: _ as tokens -> st.buffer <- tokens; token st lexbuf
}

View File

@ -1,280 +0,0 @@
%token DEDENT
%token EOF
%token INDENT
%token LBRACE
%token LPAREN
%token NEWLINE
%token RBRACE
%token RPAREN
%token SEMICOLON
%token <string> INT
%token <string> PRIM
%token <string> STRING
%left PRIM INT LPAREN LBRACE STRING
%left apply
%start <Script_located_ir.node list> tree
%{
open Script_located_ir
let expand_caddadr original =
match original with
| Prim (loc, str, []) ->
let len = String.length str in
if len > 3
&& String.get str 0 = 'C'
&& String.get str (len - 1) = 'R' then
let rec parse i acc =
if i = 0 then
Some (Seq (loc, acc))
else
match String.get str i with
| 'A' -> parse (i - 1) (Prim (loc, "CAR", []) :: acc)
| 'D' -> parse (i - 1) (Prim (loc, "CDR", []) :: acc)
| _ -> None in
parse (len - 2) []
else
None
| _ -> None
exception Not_a_roman
let decimal_of_roman roman =
(* http://rosettacode.org/wiki/Roman_numerals/Decode#OCaml *)
let arabic = ref 0 in
let lastval = ref 0 in
for i = (String.length roman) - 1 downto 0 do
let n =
match roman.[i] with
| 'M' -> 1000
| 'D' -> 500
| 'C' -> 100
| 'L' -> 50
| 'X' -> 10
| 'V' -> 5
| 'I' -> 1
| _ -> raise Not_a_roman
in
if Compare.Int.(n < !lastval)
then arabic := !arabic - n
else arabic := !arabic + n;
lastval := n
done;
!arabic
let expand_dxiiivp original =
match original with
| Prim (loc, str, [ arg ]) ->
let len = String.length str in
if len > 3
&& String.get str 0 = 'D'
&& String.get str (len - 1) = 'P' then
try
let depth = decimal_of_roman (String.sub str 1 (len - 2)) in
let rec make i acc =
if i = 0 then
acc
else
make (i - 1)
(Seq (loc, [ Prim (loc, "DIP", [ acc ]) ])) in
Some (make depth arg)
with Not_a_roman -> None
else None
| _ -> None
exception Not_a_pair
let expand_paaiair original =
match original with
| Prim (loc, str, []) ->
let len = String.length str in
if len > 4
&& String.get str 0 = 'P'
&& String.get str (len - 1) = 'R' then
try
let rec parse i acc =
if i = 0 then
acc
else if String.get str i = 'I'
&& String.get str (i - 1) = 'A' then
parse (i - 2) (Prim (loc, "PAIR", []) :: acc)
else if String.get str i = 'A' then
match acc with
| [] ->
raise Not_a_pair
| acc :: accs ->
parse (i - 1) (Prim (loc, "DIP", [ acc ]) :: accs)
else
raise Not_a_pair in
Some (Seq (loc, parse (len - 2) []))
with Not_a_pair -> None
else
None
| _ -> None
exception Not_a_dup
let expand_duuuuup original =
match original with
| Prim (loc, str, []) ->
let len = String.length str in
if len > 3
&& String.get str 0 = 'D'
&& String.get str 1 = 'U'
&& String.get str (len - 1) = 'P' then
try
let rec parse i acc =
if i = 1 then acc
else if String.get str i = 'U' then
parse (i - 1)
(Seq (loc, [ Prim (loc, "DIP", [ acc ]) ;
Prim (loc, "SWAP", []) ]))
else
raise Not_a_dup in
Some (parse (len - 2) (Seq (loc, [ Prim (loc, "DUP", []) ])))
with Not_a_dup -> None
else
None
| _ -> None
let expand_compare original =
match original with
| Prim (loc, "CMPEQ", []) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "EQ", []) ]))
| Prim (loc, "CMPNEQ", []) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "NEQ", []) ]))
| Prim (loc, "CMPLT", []) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "LT", []) ]))
| Prim (loc, "CMPGT", []) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "GT", []) ]))
| Prim (loc, "CMPLE", []) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "LE", []) ]))
| Prim (loc, "CMPGE", []) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "GE", []) ]))
| Prim (loc, "IFCMPEQ", args) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "EQ", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFCMPNEQ", args) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "NEQ", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFCMPLT", args) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "LT", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFCMPGT", args) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "GT", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFCMPLE", args) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "LE", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFCMPGE", args) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", []) ;
Prim (loc, "GE", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFEQ", args) ->
Some (Seq (loc, [ Prim (loc, "EQ", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFNEQ", args) ->
Some (Seq (loc, [ Prim (loc, "NEQ", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFLT", args) ->
Some (Seq (loc, [ Prim (loc, "LT", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFGT", args) ->
Some (Seq (loc, [ Prim (loc, "GT", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFLE", args) ->
Some (Seq (loc, [ Prim (loc, "LE", []) ;
Prim (loc, "IF", args) ]))
| Prim (loc, "IFGE", args) ->
Some (Seq (loc, [ Prim (loc, "GE", []) ;
Prim (loc, "IF", args) ]))
| _ -> None
let expand original =
let try_expansions expanders =
match
List.fold_left
(fun acc f ->
match acc with
| None -> f original
| Some rewritten -> Some rewritten)
None expanders with
| None -> original
| Some rewritten -> rewritten in
try_expansions
[ expand_dxiiivp ;
expand_paaiair ;
expand_caddadr ;
expand_duuuuup ;
expand_compare ]
let loc = function
| Prim (loc, _, _) | Int (loc, _) | String (loc, _) | Seq (loc, _) -> loc
let apply node arg =
match node with
| Prim ((sloc, _), n, args) ->
Prim ((sloc, snd (loc arg)), n, args @ [arg])
| Int _ | String _ | Seq _ as _node ->
raise (Invalid_application (node_location arg))
let rec apply_seq node = function
| [] -> node
| n1 :: n2 -> apply_seq (apply node n1) n2
let pos p1 p2 =
({ line = p1.Lexing.pos_lnum ;
column = p1.Lexing.pos_cnum - p1.Lexing.pos_bol ;
point = p1.Lexing.pos_cnum },
{ line = p2.Lexing.pos_lnum ;
column = p2.Lexing.pos_cnum - p2.Lexing.pos_bol ;
point = p2.Lexing.pos_cnum })
%}
%%
%public tree:
| node = nodes EOF { node }
| INDENT node = nodes DEDENT EOF { node }
nodes:
| { [] }
| n1 = node { [n1] }
| n1 = node SEMICOLON n2 = nodes { n1 :: n2 }
| n1 = node SEMICOLON NEWLINE n2 = nodes { n1 :: n2 }
| n1 = node NEWLINE n2 = nodes { n1 :: n2 }
node:
| node = line_node { expand node }
| line_node error
(* Un seul elt par bloc de '(' ... ')' (pas de NEWLINE ou de ';' *)
{ raise (Sequence_in_parens (pos $startpos $endpos)) }
| node = line_node INDENT nodes = nodes DEDENT { expand (apply_seq node nodes) }
line_node:
| n1 = line_node n2 = line_node %prec apply { apply n1 n2 }
| LPAREN node = node RPAREN { node }
| LBRACE nodes = nodes RBRACE { Seq (pos $startpos $endpos, nodes) }
| prim = PRIM { Prim (pos $startpos $endpos, prim, []) }
| i = INT { Int (pos $startpos $endpos, i) }
| s = STRING { String (pos $startpos $endpos, s) }
%%

View File

@ -0,0 +1,672 @@
(**************************************************************************)
(* *)
(* Copyright (c) 2014 - 2016. *)
(* Dynamic Ledger Solutions, Inc. <contact@tezos.com> *)
(* *)
(* All rights reserved. No warranty, explicit or implicit, provided. *)
(* *)
(**************************************************************************)
open Script_located_ir
let expand_caddadr original =
match original with
| Prim (loc, str, [], annot) ->
let len = String.length str in
if len > 3
&& String.get str 0 = 'C'
&& String.get str (len - 1) = 'R' then
let rec parse i ?annot acc =
if i = 0 then
Some (Seq (loc, acc, None))
else
match String.get str i with
| 'A' -> parse (i - 1) (Prim (loc, "CAR", [], annot) :: acc)
| 'D' -> parse (i - 1) (Prim (loc, "CDR", [], annot) :: acc)
| _ -> None in
parse (len - 2) ?annot []
else
None
| _ -> None
let expand_set_caddadr original =
match original with
| Prim (loc, str, [], None) ->
let len = String.length str in
if len >= 7
&& String.sub str 0 5 = "SET_C"
&& String.get str (len - 1) = 'R' then
let rec parse i acc =
if i = 4 then
Some acc
else
match String.get str i with
| 'A' ->
let acc =
Seq (loc,
[ Prim (loc, "DUP", [], None) ;
Prim (loc, "DIP",
[ Seq (loc,
[ Prim (loc, "CAR", [], None) ;
acc ], None) ], None) ;
Prim (loc, "CDR", [], None) ;
Prim (loc, "SWAP", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (i - 1) acc
| 'D' ->
let acc =
Seq (loc,
[ Prim (loc, "DUP", [], None) ;
Prim (loc, "DIP",
[ Seq (loc,
[ Prim (loc, "CDR", [], None) ;
acc ], None) ], None) ;
Prim (loc, "CAR", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (i - 1) acc
| _ -> None in
match String.get str (len - 2) with
| 'A' ->
let init =
Seq (loc,
[ Prim (loc, "CDR", [], None) ;
Prim (loc, "SWAP", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (len - 3) init
| 'D' ->
let init =
Seq (loc,
[ Prim (loc, "CAR", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (len - 3) init
| _ -> None
else
None
| _ -> None
let expand_map_caddadr original =
match original with
| Prim (loc, str, [ Seq _ as code ], None) ->
let len = String.length str in
if len >= 7
&& String.sub str 0 5 = "MAP_C"
&& String.get str (len - 1) = 'R' then
let rec parse i acc =
if i = 4 then
Some acc
else
match String.get str i with
| 'A' ->
let acc =
Seq (loc,
[ Prim (loc, "DUP", [], None) ;
Prim (loc, "DIP",
[ Seq (loc,
[ Prim (loc, "CAR", [], None) ;
acc ], None) ], None) ;
Prim (loc, "CDR", [], None) ;
Prim (loc, "SWAP", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (i - 1) acc
| 'D' ->
let acc =
Seq (loc,
[ Prim (loc, "DUP", [], None) ;
Prim (loc, "DIP",
[ Seq (loc,
[ Prim (loc, "CDR", [], None) ;
acc ], None) ], None) ;
Prim (loc, "CAR", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (i - 1) acc
| _ -> None in
match String.get str (len - 2) with
| 'A' ->
let init =
Seq (loc,
[ Prim (loc, "DUP", [], None) ;
Prim (loc, "CDR", [], None) ;
Prim (loc, "SWAP", [], None) ;
Prim (loc, "CAR", [], None) ;
code ;
Prim (loc, "PAIR", [], None) ], None) in
parse (len - 3) init
| 'D' ->
let init =
Seq (loc,
[ Prim (loc, "DUP", [], None) ;
Prim (loc, "CDR", [], None) ;
code ;
Prim (loc, "SWAP", [], None) ;
Prim (loc, "CAR", [], None) ;
Prim (loc, "PAIR", [], None) ], None) in
parse (len - 3) init
| _ -> None
else
None
| _ -> None
exception Not_a_roman
let decimal_of_roman roman =
(* http://rosettacode.org/wiki/Roman_numerals/Decode#OCaml *)
let arabic = ref 0 in
let lastval = ref 0 in
for i = (String.length roman) - 1 downto 0 do
let n =
match roman.[i] with
| 'M' -> 1000
| 'D' -> 500
| 'C' -> 100
| 'L' -> 50
| 'X' -> 10
| 'V' -> 5
| 'I' -> 1
| _ -> raise Not_a_roman
in
if Compare.Int.(n < !lastval)
then arabic := !arabic - n
else arabic := !arabic + n;
lastval := n
done;
!arabic
let expand_dxiiivp original =
match original with
| Prim (loc, str, [ arg ], None) ->
let len = String.length str in
if len > 3
&& String.get str 0 = 'D'
&& String.get str (len - 1) = 'P' then
try
let depth = decimal_of_roman (String.sub str 1 (len - 2)) in
let rec make i acc =
if i = 0 then
acc
else
make (i - 1)
(Seq (loc, [ Prim (loc, "DIP", [ acc ], None) ], None)) in
Some (make depth arg)
with Not_a_roman -> None
else None
| _ -> None
exception Not_a_pair
let expand_paaiair original =
match original with
| Prim (loc, str, [], None) ->
let len = String.length str in
if len > 4
&& String.get str 0 = 'P'
&& String.get str (len - 1) = 'R' then
try
let rec parse i acc =
if i = 0 then
acc
else if String.get str i = 'I'
&& String.get str (i - 1) = 'A' then
parse (i - 2) (Prim (loc, "PAIR", [], None) :: acc)
else if String.get str i = 'A' then
match acc with
| [] ->
raise Not_a_pair
| acc :: accs ->
parse (i - 1)
(Prim (loc, "DIP", [ Seq (loc, [ acc ], None) ], None)
:: accs)
else
raise Not_a_pair in
Some (Seq (loc, parse (len - 2) [], None))
with Not_a_pair -> None
else
None
| _ -> None
let expand_unpaaiair original =
match original with
| Prim (loc, str, [], None) ->
let len = String.length str in
if len >= 6
&& String.sub str 0 3 = "UNP"
&& String.get str (len - 1) = 'R' then
try
let rec parse i acc =
if i = 2 then
match acc with
| [ Seq _ as acc ] -> acc
| _ -> Seq (loc, List.rev acc, None)
else if String.get str i = 'I'
&& String.get str (i - 1) = 'A' then
parse (i - 2)
(Seq (loc, [ Prim (loc, "DUP", [], None) ;
Prim (loc, "CAR", [], None) ;
Prim (loc, "DIP",
[ Seq (loc,
[ Prim (loc, "CDR", [], None) ],
None) ], None) ], None)
:: acc)
else if String.get str i = 'A' then
match acc with
| [] ->
raise Not_a_pair
| (Seq _ as acc) :: accs ->
parse (i - 1)
(Prim (loc, "DIP", [ acc ], None) :: accs)
| acc :: accs ->
parse (i - 1)
(Prim (loc, "DIP",
[ Seq (loc, [ acc ], None) ],
None) :: accs)
else
raise Not_a_pair in
Some (parse (len - 2) [])
with Not_a_pair -> None
else
None
| _ -> None
exception Not_a_dup
let expand_duuuuup original =
match original with
| Prim (loc, str, [], None) ->
let len = String.length str in
if len > 3
&& String.get str 0 = 'D'
&& String.get str 1 = 'U'
&& String.get str (len - 1) = 'P' then
try
let rec parse i acc =
if i = 1 then acc
else if String.get str i = 'U' then
parse (i - 1)
(Seq (loc, [ Prim (loc, "DIP", [ acc ], None) ;
Prim (loc, "SWAP", [], None) ], None))
else
raise Not_a_dup in
Some (parse (len - 2) (Seq (loc, [ Prim (loc, "DUP", [], None) ], None)))
with Not_a_dup -> None
else
None
| _ -> None
let expand_compare original =
match original with
| Prim (loc, "CMPEQ", [], None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "EQ", [], None) ], None))
| Prim (loc, "CMPNEQ", [], None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "NEQ", [], None) ], None))
| Prim (loc, "CMPLT", [], None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "LT", [], None) ], None))
| Prim (loc, "CMPGT", [], None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "GT", [], None) ], None))
| Prim (loc, "CMPLE", [], None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "LE", [], None) ], None))
| Prim (loc, "CMPGE", [], None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "GE", [], None) ], None))
| Prim (loc, "IFCMPEQ", args, None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "EQ", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFCMPNEQ", args, None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "NEQ", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFCMPLT", args, None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "LT", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFCMPGT", args, None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "GT", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFCMPLE", args, None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "LE", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFCMPGE", args, None) ->
Some (Seq (loc, [ Prim (loc, "COMPARE", [], None) ;
Prim (loc, "GE", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFEQ", args, None) ->
Some (Seq (loc, [ Prim (loc, "EQ", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFNEQ", args, None) ->
Some (Seq (loc, [ Prim (loc, "NEQ", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFLT", args, None) ->
Some (Seq (loc, [ Prim (loc, "LT", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFGT", args, None) ->
Some (Seq (loc, [ Prim (loc, "GT", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFLE", args, None) ->
Some (Seq (loc, [ Prim (loc, "LE", [], None) ;
Prim (loc, "IF", args, None) ], None))
| Prim (loc, "IFGE", args, None) ->
Some (Seq (loc, [ Prim (loc, "GE", [], None) ;
Prim (loc, "IF", args, None) ], None))
| _ -> None
let expand original =
let try_expansions expanders =
match
List.fold_left
(fun acc f ->
match acc with
| None -> f original
| Some rewritten -> Some rewritten)
None expanders with
| None -> original
| Some rewritten -> rewritten in
try_expansions
[ expand_caddadr ;
expand_set_caddadr ;
expand_map_caddadr ;
expand_dxiiivp ;
expand_paaiair ;
expand_unpaaiair ;
expand_duuuuup ;
expand_compare ]
open Script
let unexpand_caddadr expanded =
let rec rsteps acc = function
| [] -> Some acc
| Prim (_, "CAR" , [], None) :: rest ->
rsteps ("A" :: acc) rest
| Prim (_, "CDR" , [], None) :: rest ->
rsteps ("D" :: acc) rest
| _ -> None in
match expanded with
| Seq (loc, (Prim (_, "CAR" , [], None) :: _ as nodes), None)
| Seq (loc, (Prim (_, "CDR" , [], None) :: _ as nodes), None) ->
begin match rsteps [] nodes with
| Some steps ->
let name = String.concat "" ("C" :: List.rev ("R" :: steps)) in
Some (Prim (loc, name, [], None))
| None -> None
end
| _ -> None
let unexpand_set_caddadr expanded =
let rec steps acc = function
| Seq (loc,
[ Prim (_, "CDR", [], None) ;
Prim (_, "SWAP", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
Some (loc, "A" :: acc)
| Seq (loc,
[ Prim (_, "CAR", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
Some (loc, "D" :: acc)
| Seq (_,
[ Prim (_, "DUP", [], None) ;
Prim (_, "DIP",
[ Seq (_,
[ Prim (_, "CAR", [], None) ;
sub ], None) ], None) ;
Prim (_, "CDR", [], None) ;
Prim (_, "SWAP", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
steps ("A" :: acc) sub
| Seq (_,
[ Prim (_, "DUP", [], None) ;
Prim (_, "DIP",
[ Seq (_,
[ Prim (_, "CDR", [], None) ;
sub ], None) ], None) ;
Prim (_, "CAR", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
steps ("D" :: acc) sub
| _ -> None in
match steps [] expanded with
| Some (loc, steps) ->
let name = String.concat "" ("SET_C" :: List.rev ("R" :: steps)) in
Some (Prim (loc, name, [], None))
| None -> None
let unexpand_map_caddadr expanded =
let rec steps acc = function
| Seq (loc,
[ Prim (_, "DUP", [], None) ;
Prim (_, "CDR", [], None) ;
Prim (_, "SWAP", [], None) ;
Prim (_, "CAR", [], None) ;
code ;
Prim (_, "PAIR", [], None) ], None) ->
Some (loc, "A" :: acc, code)
| Seq (loc,
[ Prim (_, "DUP", [], None) ;
Prim (_, "CDR", [], None) ;
code ;
Prim (_, "SWAP", [], None) ;
Prim (_, "CAR", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
Some (loc, "D" :: acc, code)
| Seq (_,
[ Prim (_, "DUP", [], None) ;
Prim (_, "DIP",
[ Seq (_,
[ Prim (_, "CAR", [], None) ;
sub ], None) ], None) ;
Prim (_, "CDR", [], None) ;
Prim (_, "SWAP", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
steps ("A" :: acc) sub
| Seq (_,
[ Prim (_, "DUP", [], None) ;
Prim (_, "DIP",
[ Seq (_,
[ Prim (_, "CDR", [], None) ;
sub ], None) ], None) ;
Prim (_, "CAR", [], None) ;
Prim (_, "PAIR", [], None) ], None) ->
steps ("D" :: acc) sub
| _ -> None in
match steps [] expanded with
| Some (loc, steps, code) ->
let name = String.concat "" ("MAP_C" :: List.rev ("R" :: steps)) in
Some (Prim (loc, name, [ code ], None))
| None -> None
let roman_of_decimal decimal =
(* http://rosettacode.org/wiki/Roman_numerals/Encode#OCaml *)
let digit x y z = function
| 1 -> [ x ]
| 2 -> [ x ; x ]
| 3 -> [ x ; x ; x ]
| 4 -> [ x ; y ]
| 5 -> [ y ]
| 6 -> [ y ; x ]
| 7 -> [ y ; x ; x ]
| 8 -> [ y ; x ; x ; x ]
| 9 -> [ x ; z ]
| _ -> assert false in
let rec to_roman x =
if x = 0 then []
else if x < 0 then
invalid_arg "Negative roman numeral"
else if x >= 1000 then
"M" :: to_roman (x - 1000)
else if x >= 100 then
digit "C" "D" "M" (x / 100) @ to_roman (x mod 100)
else if x >= 10 then
digit "X" "L" "C" (x / 10) @ to_roman (x mod 10)
else
digit "I" "V" "X" x in
String.concat "" (to_roman decimal)
let unexpand_dxiiivp expanded =
match expanded with
| Seq (loc,
[ Prim (_, "DIP",
[ Seq (_, [ Prim (_, "DIP", [ _ ], None) ], None) as sub ],
None) ],
None) ->
let rec count acc = function
| Seq (_, [ Prim (_, "DIP", [ sub ], None) ], None) -> count (acc + 1) sub
| sub -> (acc, sub) in
let depth, sub = count 1 sub in
let name = "D" ^ roman_of_decimal depth ^ "P" in
Some (Prim (loc, name, [ sub ], None))
| _ -> None
let unexpand_duuuuup expanded =
let is_duuuuup_prim_name str =
let len = String.length str in
len >= 3
&& String.get str 0 = 'D'
&& String.get str (len - 1) = 'P'
&& begin
let all_u = ref true in
for i = 1 to len - 2 do
all_u := !all_u && String.get str i = 'U'
done ;
!all_u
end in
match expanded with
| Seq (loc,
[ Prim (_, "DIP",
[ Prim (_, sub, [], None) ], None) ;
Prim (_, "SWAP", [], None) ], None)
when is_duuuuup_prim_name sub ->
let name = "DU" ^ String.sub sub 1 (String.length sub - 1) in
Some (Prim (loc, name, [], None))
| _ -> None
let unexpand_paaiair expanded =
match expanded with
| Seq (_, [ Prim (_, "PAIR", [], None) ], None) -> Some expanded
| Seq (loc, (_ :: _ as nodes), None) ->
let rec destruct acc = function
| [] -> Some acc
| Prim (_, "DIP", [ Seq (_, [ sub ], None) ], None) :: rest ->
destruct ("A" :: acc) (sub :: rest)
| Prim (_, "PAIR", [], None) :: rest ->
destruct ("AI" :: acc) rest
| _ -> None in
begin match destruct [] nodes with
| None -> None
| Some seq ->
let name = String.concat "" ("P" :: List.rev ("R" :: seq)) in
Some (Prim (loc, name, [], None))
end
| _ -> None
let unexpand_unpaaiair expanded =
match expanded with
| Seq (loc, (_ :: _ as nodes), None) ->
let rec destruct sacc acc = function
| [] -> Some acc
| Prim (_, "DIP", [ Seq (_, [ sub ], None) ], None) :: rest
| Prim (_, "DIP", [ Seq (_, _, _) as sub ], None) :: rest ->
destruct ("A" :: sacc) acc (sub :: rest)
| Seq (_, [ Prim (_, "DUP", [], None) ;
Prim (_, "CAR", [], None) ;
Prim (_, "DIP",
[ Seq (_,
[ Prim (_, "CDR", [], None) ], None) ],
None) ], None) :: rest ->
destruct [] (List.rev ("AI" :: sacc) :: acc) rest
| _ -> None in
begin match destruct [] [ [ "R" ] ] nodes with
| None -> None
| Some seq ->
let name = String.concat "" ("UNP" :: List.flatten seq) in
Some (Prim (loc, name, [], None))
end
| _ -> None
let unexpand_compare expanded =
match expanded with
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "EQ", [], None) ], None) ->
Some (Prim (loc, "CMPEQ", [], None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "NEQ", [], None) ], None) ->
Some (Prim (loc, "CMPNEQ", [], None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "LT", [], None) ], None) ->
Some (Prim (loc, "CMPLT", [], None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "GT", [], None) ], None) ->
Some (Prim (loc, "CMPGT", [], None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "LE", [], None) ], None) ->
Some (Prim (loc, "CMPLE", [], None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "GE", [], None) ], None) ->
Some (Prim (loc, "CMPGE", [], None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "EQ", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFCMPEQ", args, None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "NEQ", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFCMPNEQ", args, None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "LT", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFCMPLT", args, None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "GT", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFCMPGT", args, None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "LE", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFCMPLE", args, None))
| Seq (loc, [ Prim (_, "COMPARE", [], None) ;
Prim (_, "GE", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFCMPGE", args, None))
| Seq (loc, [ Prim (_, "EQ", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFEQ", args, None))
| Seq (loc, [ Prim (_, "NEQ", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFNEQ", args, None))
| Seq (loc, [ Prim (_, "LT", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFLT", args, None))
| Seq (loc, [ Prim (_, "GT", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFGT", args, None))
| Seq (loc, [ Prim (_, "LE", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFLE", args, None))
| Seq (loc, [ Prim (_, "GE", [], None) ;
Prim (_, "IF", args, None) ], None) ->
Some (Prim (loc, "IFGE", args, None))
| _ -> None
let unexpand original =
let try_expansions unexpanders =
match
List.fold_left
(fun acc f ->
match acc with
| None -> f original
| Some rewritten -> Some rewritten)
None unexpanders with
| None -> original
| Some rewritten -> rewritten in
try_expansions
[ unexpand_caddadr ;
unexpand_set_caddadr ;
unexpand_map_caddadr ;
unexpand_dxiiivp ;
unexpand_paaiair ;
unexpand_unpaaiair ;
unexpand_duuuuup ;
unexpand_compare ]

View File

@ -0,0 +1,34 @@
(**************************************************************************)
(* *)
(* Copyright (c) 2014 - 2016. *)
(* Dynamic Ledger Solutions, Inc. <contact@tezos.com> *)
(* *)
(* All rights reserved. No warranty, explicit or implicit, provided. *)
(* *)
(**************************************************************************)
open Script_located_ir
val expand : node -> node
val expand_caddadr : node -> node option
val expand_set_caddadr : node -> node option
val expand_map_caddadr : node -> node option
val expand_dxiiivp : node -> node option
val expand_paaiair : node -> node option
val expand_duuuuup : node -> node option
val expand_compare : node -> node option
val expand_unpaaiair : node -> node option
open Script
val unexpand : expr -> expr
val unexpand_caddadr : expr -> expr option
val unexpand_set_caddadr : expr -> expr option
val unexpand_map_caddadr : expr -> expr option
val unexpand_dxiiivp : expr -> expr option
val unexpand_paaiair : expr -> expr option
val unexpand_duuuuup : expr -> expr option
val unexpand_compare : expr -> expr option
val unexpand_unpaaiair : expr -> expr option

View File

@ -0,0 +1,521 @@
(**************************************************************************)
(* *)
(* Copyright (c) 2014 - 2016. *)
(* Dynamic Ledger Solutions, Inc. <contact@tezos.com> *)
(* *)
(* All rights reserved. No warranty, explicit or implicit, provided. *)
(* *)
(**************************************************************************)
open Script_located_ir
exception Invalid_utf8_sequence of point * string
exception Unexpected_character of point * string
exception Undefined_escape_character of point * string
exception Missing_break_after_number of point
exception Unterminated_string of location
exception Unterminated_integer of location
exception Unterminated_comment of location
type token_value =
| String of string
| Int of string
| Ident of string
| Annot of string
| Comment of string
| Eol_comment of string
| Semi
| Open_paren | Close_paren
| Open_brace | Close_brace
type token =
{ token : token_value ;
loc : location }
let tokenize source =
let decoder = Uutf.decoder ~encoding:`UTF_8 (`String source) in
let here () =
{ point = Uutf.decoder_count decoder ;
byte = Uutf.decoder_byte_count decoder ;
line = Uutf.decoder_line decoder ;
column = Uutf.decoder_col decoder } in
let tok start stop token =
{ loc = { start ; stop } ; token } in
let stack = ref [] in
let next () =
match !stack with
| charloc :: charlocs ->
stack := charlocs ;
charloc
| [] ->
let loc = here () in
match Uutf.decode decoder with
| `Await -> assert false
| `Malformed s -> raise (Invalid_utf8_sequence (loc, s))
| `Uchar _ | `End as other -> other, loc in
let back charloc =
stack := charloc :: !stack in
let uchar_to_char c =
if Uchar.is_char c then
Some (Uchar.to_char c)
else
None in
let rec skip acc =
match next () with
| `End, _ -> List.rev acc
| `Uchar c, start ->
begin match uchar_to_char c with
| Some ('a'..'z' | 'A'..'Z') -> ident acc start (fun s -> Ident s)
| Some '@' -> ident acc start (fun s -> Annot s)
| Some '-' ->
begin match next () with
| `End, stop ->
raise (Unterminated_integer { start ; stop })
| `Uchar c, stop ->
begin match uchar_to_char c with
| Some '0' -> base acc start
| Some ('1'..'9') -> integer `dec acc start false
| Some _ | None ->
raise (Unterminated_integer { start ; stop })
end
end
| Some '0' -> base acc start
| Some ('1'..'9') -> integer `dec acc start false
| Some (' ' | '\n') -> skip acc
| Some ';' -> skip (tok start (here ()) Semi :: acc)
| Some '{' -> skip (tok start (here ()) Open_brace :: acc)
| Some '}' -> skip (tok start (here ()) Close_brace :: acc)
| Some '(' -> skip (tok start (here ()) Open_paren :: acc)
| Some ')' -> skip (tok start (here ()) Close_paren :: acc)
| Some '"' -> string acc [] start
| Some '#' -> eol_comment acc start
| Some '/' ->
begin match next () with
| `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') ->
comment acc start 0
| (`Uchar _ | `End), _ ->
raise (Unexpected_character (start, "/"))
end
| Some _ | None ->
let byte = Uutf.decoder_byte_count decoder in
let s = String.sub source start.byte (byte - start.byte) in
raise (Unexpected_character (start, s))
end
and base acc start =
match next () with
| (`Uchar c, stop) as charloc ->
begin match uchar_to_char c with
| Some ('0'.. '9') -> integer `dec acc start false
| Some 'x' -> integer `hex acc start true
| Some 'b' -> integer `bin acc start true
| Some ('a' | 'c'..'w' | 'y' | 'z' | 'A'..'Z') ->
raise (Missing_break_after_number stop)
| Some _ | None ->
back charloc ;
skip (tok start (here ()) (Int "0") :: acc)
end
| (_, stop) as other ->
back other ;
skip (tok start stop (Int "0") :: acc)
and integer base acc start first =
let tok stop =
let value =
String.sub source start.byte (stop.byte - start.byte) in
tok start stop (Int value) in
match next () with
| (`Uchar c, stop) as charloc ->
begin match base, Uchar.to_char c with
| `dec, ('0'.. '9') ->
integer `dec acc start false
| `dec, ('a'..'z' | 'A'..'Z') ->
raise (Missing_break_after_number stop)
| `hex, ('0'..'9' | 'a'..'f' | 'A'..'F') ->
integer `hex acc start false
| `hex, ('g'..'z' | 'G'..'Z') ->
raise (Missing_break_after_number stop)
| `bin, ('0' | '1') ->
integer `bin acc start false
| `bin, ('2'..'9' | 'a'..'z' | 'A'..'Z') ->
raise (Missing_break_after_number stop)
| (`bin | `hex), _ when first ->
raise (Unterminated_integer { start ; stop })
| _ ->
back charloc ;
skip (tok stop :: acc)
end
| (`End, stop) as other ->
if first && base = `bin || base = `hex then
raise (Unterminated_integer { start ; stop }) ;
back other ;
skip (tok stop :: acc)
and string acc sacc start =
let tok () =
tok start (here ()) (String (String.concat "" (List.rev sacc))) in
match next () with
| `End, stop -> raise (Unterminated_string { start ; stop })
| `Uchar c, stop ->
match uchar_to_char c with
| Some '"' -> skip (tok () :: acc)
| Some '\n' -> raise (Unterminated_string { start ; stop })
| Some '\\' ->
begin match next () with
| `End, stop -> raise (Unterminated_string { start ; stop })
| `Uchar c, loc ->
match uchar_to_char c with
| Some '"' -> string acc ("\"" :: sacc) start
| Some 'r' -> string acc ("\r" :: sacc) start
| Some 'n' -> string acc ("\n" :: sacc) start
| Some 't' -> string acc ("\t" :: sacc) start
| Some 'b' -> string acc ("\b" :: sacc) start
| Some '\\' -> string acc ("\\" :: sacc) start
| Some _ | None ->
let byte = Uutf.decoder_byte_count decoder in
let s = String.sub source loc.byte (byte - loc.byte) in
raise (Undefined_escape_character (loc, s))
end
| Some _ | None ->
let byte = Uutf.decoder_byte_count decoder in
let s = String.sub source stop.byte (byte - stop.byte) in
string acc (s :: sacc) start
and ident acc start ret =
let tok stop =
let name =
String.sub source start.byte (stop.byte - start.byte) in
tok start stop (ret name) in
match next () with
| (`Uchar c, stop) as charloc ->
begin match uchar_to_char c with
| Some ('a'..'z' | 'A'..'Z' | '_' | '0'..'9') ->
ident acc start ret
| Some _ | None ->
back charloc ;
skip (tok stop :: acc)
end
| (_, stop) as other ->
back other ;
skip (tok stop :: acc)
and comment acc start lvl =
match next () with
| `End, stop -> raise (Unterminated_comment { start ; stop })
| `Uchar c, _ ->
begin match uchar_to_char c with
| Some '*' ->
begin match next () with
| `Uchar c, _ when Uchar.equal c (Uchar.of_char '/') ->
if lvl = 0 then
let stop = here () in
let text =
String.sub source start.byte (stop.byte - start.byte) in
skip (tok start stop (Comment text) :: acc)
else
comment acc start (lvl - 1)
| other ->
back other ;
comment acc start lvl
end
| Some '/' ->
begin match next () with
| `Uchar c, _ when Uchar.equal c (Uchar.of_char '*') ->
comment acc start (lvl + 1)
| other ->
back other ;
comment acc start lvl
end
| Some _ | None -> comment acc start lvl
end
and eol_comment acc start =
let tok stop =
let text = String.sub source start.byte (stop.byte - start.byte) in
tok start stop (Eol_comment text) in
match next () with
| `Uchar c, stop ->
begin match uchar_to_char c with
| Some '\n' -> skip (tok stop :: acc)
| Some _ | None -> eol_comment acc start
end
| (_, stop) as other ->
back other ;
skip (tok stop :: acc) in
skip []
(* Beginning of a sequence of consecutive primitives *)
let min_point : node list -> point = function
| [] -> point_zero
| Int ({ start }, _) :: _
| String ({ start }, _) :: _
| Prim ({ start }, _, _, _) :: _
| Seq ({ start }, _, _) :: _ -> start
(* End of a sequence of consecutive primitives *)
let rec max_point : node list -> point = function
| [] -> point_zero
| _ :: (_ :: _ as rest) -> max_point rest
| Int ({ stop }, _) :: []
| String ({ stop }, _) :: []
| Prim ({ stop }, _, _, _) :: []
| Seq ({ stop }, _, _) :: [] -> stop
(* An item in the parser's state stack.
Not every value of type [mode list] is a valid parsing context.
It must respect the following additional invariants.
- a state stack always ends in [Toplevel _],
- [Toplevel _] does not appear anywhere else,
- [Unwrapped _] cannot appear directly on top of [Wrapped _],
- [Wrapped _] cannot appear directly on top of [Sequence _],
- [Wrapped _] cannot appear directly on top of [Sequence _]. *)
type mode =
| Toplevel of node list
| Expression of node option
| Sequence of token * node list * string option
| Unwrapped of location * string * node list * string option
| Wrapped of token * string * node list * string option
(* Enter a new parsing state. *)
let push_mode mode stack =
mode :: stack
(* Leave a parsing state. *)
let pop_mode = function
| [] -> assert false
| _ :: rest -> rest
(* Usually after a [pop_mode], jump back into the previous parsing
state, injecting the current reduction (insert the just parsed item
of a sequence or argument of a primitive application). *)
let fill_mode result = function
| [] -> assert false
| Expression _ :: _ :: _ -> assert false
| Expression (Some _) :: [] -> assert false
| Toplevel _ :: _ :: _ -> assert false
| Expression None :: [] ->
Expression (Some result) :: []
| Toplevel exprs :: [] ->
Toplevel (result :: exprs) :: []
| Sequence (token, exprs, annot) :: rest ->
Sequence (token, result :: exprs, annot) :: rest
| Wrapped (token, name, exprs, annot) :: rest ->
Wrapped (token, name, result :: exprs, annot) :: rest
| Unwrapped (start, name, exprs, annot) :: rest ->
Unwrapped (start, name, result :: exprs, annot) :: rest
exception Unclosed of token
exception Unexpected of token
exception Extra of token
exception Misaligned of node
exception Empty
let rec parse
?expand:(do_expand = true)
?check:(do_check = true)
tokens stack =
(* Two steps:
- 1. parse without checking indentation [parse]
- 2. check indentation [check] (inlined in 1)
- 3. expand macros (inlined in 1, after 2) *)
match stack, tokens with
(* Start by preventing all absurd cases, so now the pattern
matching exhaustivity can tell us that we treater all
possible tokens for all possible valid states. *)
| [], _
| [ Wrapped _ ], _
| [ Unwrapped _ ], _
| Unwrapped _ :: Unwrapped _ :: _, _
| Unwrapped _ :: Wrapped _ :: _, _
| Toplevel _ :: _ :: _, _
| Expression _ :: _ :: _, _ ->
assert false
(* Return *)
| Expression (Some result) :: _, [] ->
[ result ]
| Expression (Some _) :: _, token :: _ ->
raise (Unexpected token)
| Expression None :: _, [] ->
raise Empty
| Toplevel [ Seq (_, exprs, _) as expr ] :: [],
[] ->
if do_check then check ~toplevel: false expr ;
let exprs =
if do_expand then
List.map Michelson_macros.expand exprs
else exprs in
exprs
| Toplevel exprs :: [],
[] ->
let exprs = List.rev exprs in
let loc = { start = min_point exprs ; stop = max_point exprs } in
let expr = Seq (loc, exprs, None) in
if do_check then check ~toplevel: true expr ;
let exprs =
if do_expand then
List.map Michelson_macros.expand exprs
else exprs in
exprs
(* Ignore comments *)
| _,
{ token = Eol_comment _ | Comment _ } :: rest ->
parse rest stack
| (Wrapped _ | Unwrapped _) :: _,
({ token = Open_paren } as token)
:: { token = Eol_comment _ | Comment _ } :: rest ->
parse (token :: rest) stack
(* Erroneous states *)
| (Wrapped _ | Unwrapped _) :: _ ,
({ token = Open_paren } as token)
:: { token = Open_paren | Open_brace } :: _
| Unwrapped _ :: Expression _ :: _ ,
({ token = Semi | Close_brace | Close_paren } as token) :: _
| Expression None :: _ ,
({ token = Semi | Close_brace | Close_paren | Open_brace | Open_paren } as token) :: _ ->
raise (Unexpected token)
| (Sequence _ | Toplevel _) :: _ ,
{ token = Semi } :: ({ token = Semi } as token) :: _ ->
raise (Extra token)
| (Wrapped _ | Unwrapped _) :: _ ,
{ token = Open_paren }
:: ({ token = Int _ | String _ | Annot _ | Close_paren } as token) :: _
| Unwrapped (_, _, _, _) :: Toplevel _ :: _,
({ token = Close_brace } as token) :: _
| Unwrapped (_, _, _, _) :: _,
({ token = Close_paren } as token) :: _
| Toplevel _ :: [],
({ token = Close_paren } as token) :: _
| Toplevel _ :: [],
({ token = Close_brace } as token) :: _
| _,
({ token = Annot _ } as token) :: _ ->
raise (Unexpected token)
| Wrapped (token, _, _, _) :: _,
({ token = Close_brace | Semi } :: _ | [])
| (Sequence _ | Toplevel _) :: _,
({ token = Open_paren } as token) :: _
| (Wrapped _ | Unwrapped _) :: _,
({ token = Open_paren } as token) :: ({ token = Close_brace | Semi } :: _ | [])
| (Sequence (token, _, _) :: _ | Unwrapped _ :: Sequence (token, _, _) :: _),
({ token = Close_paren } :: _ | [])->
raise (Unclosed token)
(* Valid states *)
| (Toplevel _ | Sequence (_, _, _)) :: _ ,
{ token = Ident name ; loc } :: { token = Annot annot } :: rest ->
let mode = Unwrapped (loc, name, [], Some annot) in
parse rest (push_mode mode stack)
| (Expression None | Toplevel _ | Sequence (_, _, _)) :: _ ,
{ token = Ident name ; loc } :: rest ->
let mode = Unwrapped (loc, name, [], None) in
parse rest (push_mode mode stack)
| (Expression None | Sequence _ | Toplevel _ | Unwrapped _ | Wrapped _) :: _,
{ token = Int value ; loc } :: rest ->
let expr : node = Int (loc, value) in
if do_check then check ~toplevel: false expr ;
let expr =
if do_expand then
Michelson_macros.expand expr
else expr in
parse rest (fill_mode expr stack)
| (Expression None | Sequence _ | Toplevel _ | Unwrapped _ | Wrapped _) :: _,
{ token = String contents ; loc } :: rest ->
let expr : node = String (loc, contents) in
if do_check then check ~toplevel: false expr ;
let expr =
if do_expand then
Michelson_macros.expand expr
else expr in
parse rest (fill_mode expr stack)
| Sequence ({ loc = { start } }, exprs, annot) :: _ ,
{ token = Close_brace ; loc = { stop } } :: rest ->
let exprs = List.rev exprs in
let expr = Seq ({ start ; stop }, exprs, annot) in
if do_check then check ~toplevel: false expr ;
let expr =
if do_expand then
Michelson_macros.expand expr
else expr in
parse rest (fill_mode expr (pop_mode stack))
| (Sequence _ | Toplevel _) :: _ ,
{ token = Semi } :: rest ->
parse rest stack
| Unwrapped ({ start ; stop }, name, exprs, annot) :: Expression _ :: _,
([] as rest)
| Unwrapped ({ start ; stop }, name, exprs, annot) :: Toplevel _ :: _,
({ token = Semi } :: _ | [] as rest)
| Unwrapped ({ start ; stop }, name, exprs, annot) :: Sequence _ :: _ ,
({ token = Close_brace | Semi } :: _ as rest)
| Wrapped ({ loc = { start ; stop } }, name, exprs, annot) :: _ ,
{ token = Close_paren } :: rest ->
let exprs = List.rev exprs in
let stop = if exprs = [] then stop else max_point exprs in
let expr = Prim ({ start ; stop }, name, exprs, annot) in
if do_check then check ~toplevel: false expr ;
let expr =
if do_expand then
Michelson_macros.expand expr
else expr in
parse rest (fill_mode expr (pop_mode stack))
| (Wrapped _ | Unwrapped _) :: _ ,
({ token = Open_paren } as token) :: { token = Ident name } :: { token = Annot annot } :: rest ->
let mode = Wrapped (token, name, [], Some annot) in
parse rest (push_mode mode stack)
| (Wrapped _ | Unwrapped _) :: _ ,
({ token = Open_paren } as token) :: { token = Ident name } :: rest ->
let mode = Wrapped (token, name, [], None) in
parse rest (push_mode mode stack)
| (Wrapped _ | Unwrapped _) :: _ ,
{ token = Ident name ; loc } :: rest ->
let expr = Prim (loc, name, [], None) in
if do_check then check ~toplevel: false expr ;
let expr =
if do_expand then
Michelson_macros.expand expr
else expr in
parse rest (fill_mode expr stack)
| (Wrapped _ | Unwrapped _ | Toplevel _ | Sequence _) :: _ ,
({ token = Open_brace } as token) :: { token = Annot annot } :: rest ->
let mode = Sequence (token, [], Some annot) in
parse rest (push_mode mode stack)
| (Wrapped _ | Unwrapped _ | Toplevel _ | Sequence _) :: _ ,
({ token = Open_brace } as token) :: rest ->
let mode = Sequence (token, [], None) in
parse rest (push_mode mode stack)
(* indentation checker *)
and check ?(toplevel = false) = function
| Seq ({ start ; stop }, [], _) as expr ->
if start.column >= stop.column then
raise (Misaligned expr)
| Prim ({ start ; stop }, _, first :: rest, _)
| Seq ({ start ; stop }, first :: rest, _) as expr ->
let { column = first_column ; line = first_line } =
min_point [ first ] in
if start.column >= stop.column then
raise (Misaligned expr) ;
if not toplevel && start.column >= first_column then
raise (Misaligned expr) ;
(* In a sequence or in the arguments of a primitive, we
require all items to be aligned, but we relax the rule to
allow consecutive items to be writtem on the same line. *)
let rec in_line_or_aligned prev_start_line = function
| [] -> ()
| expr :: rest ->
let { column ; line = start_line } = min_point [ expr ] in
let { line = stop_line } = max_point [ expr ] in
if stop_line <> prev_start_line
&& column <> first_column then
raise (Misaligned expr) ;
in_line_or_aligned start_line rest in
in_line_or_aligned first_line rest
| Prim (_, _, [], _) | String _ | Int _ -> ()
let parse_expression ?expand ?check tokens =
let result = match tokens with
| ({ token = Open_paren } as token) :: { token = Ident name } :: { token = Annot annot } :: rest ->
let mode = Wrapped (token, name, [], Some annot) in
parse ?expand ?check rest [ mode ; Expression None ]
| ({ token = Open_paren } as token) :: { token = Ident name } :: rest ->
let mode = Wrapped (token, name, [], None) in
parse ?expand ?check rest [ mode ; Expression None ]
| _ ->
parse ?expand ?check tokens [ Expression None ] in
match result with
| [ single ] -> single
| _ -> assert false
let parse_toplevel ?expand ?check tokens =
parse ?expand ?check tokens [ Toplevel [] ]

View File

@ -0,0 +1,44 @@
(**************************************************************************)
(* *)
(* Copyright (c) 2014 - 2016. *)
(* Dynamic Ledger Solutions, Inc. <contact@tezos.com> *)
(* *)
(* All rights reserved. No warranty, explicit or implicit, provided. *)
(* *)
(**************************************************************************)
open Script_located_ir
exception Invalid_utf8_sequence of point * string
exception Unexpected_character of point * string
exception Undefined_escape_character of point * string
exception Missing_break_after_number of point
exception Unterminated_string of location
exception Unterminated_integer of location
exception Unterminated_comment of location
type token_value =
| String of string
| Int of string
| Ident of string
| Annot of string
| Comment of string
| Eol_comment of string
| Semi
| Open_paren | Close_paren
| Open_brace | Close_brace
type token =
{ token : token_value ;
loc : location }
val tokenize : string -> token list
exception Unclosed of token
exception Unexpected of token
exception Extra of token
exception Misaligned of node
exception Empty
val parse_toplevel : ?expand:bool -> ?check:bool -> token list -> node list
val parse_expression : ?expand:bool -> ?check:bool -> token list -> node

View File

@ -8,64 +8,53 @@
(**************************************************************************)
type point =
{ line : int ;
column : int ;
point : int }
{ point : int ;
byte : int ;
line : int ;
column : int }
let point_zero =
{ point = 0 ;
byte = 0 ;
line = 0 ;
column = 0 }
type location =
point * point
{ start : point ;
stop : point }
let location_encoding =
let open Data_encoding in
let point_encoding =
conv
(fun { line ; column ; point } -> (line, column, point))
(fun (line, column, point) -> { line ; column ; point })
(obj3
(fun { line ; column ; point ; byte } -> (line, column, point, byte))
(fun (line, column, point, byte) -> { line ; column ; point ; byte })
(obj4
(req "line" uint16)
(req "column" uint16)
(req "point" uint16)) in
obj2
(req "point" uint16)
(req "byte" uint16)) in
conv
(fun { start ; stop } -> (start, stop))
(fun (start, stop) -> { start ; stop })
(obj2
(req "start" point_encoding)
(req "stop" point_encoding)
(req "stop" point_encoding))
type node =
| Int of location * string
| String of location * string
| Prim of location * string * node list
| Seq of location * node list
| Prim of location * string * node list * string option
| Seq of location * node list * string option
let node_location = function
| Int (loc, _)
| String (loc, _)
| Prim (loc, _, _)
| Seq (loc, _) -> loc
(*-- Located errors ---------------------------------------------------------*)
(* Lexer error *)
exception Illegal_character of location * char
exception Illegal_escape of location * string
exception Invalid_indentation of location
exception Invalid_indentation_after_opener of location * char
exception Invalid_indentation_in_block of location * char * location
exception Newline_in_string of location
exception Unaligned_closer of location * char * char * location
exception Unclosed of location * char * location
exception Unopened of location * char
exception Unterminated_comment of location * location
exception Unterminated_string of location
exception Unterminated_string_in_comment of location * location * location
(* Parser error *)
exception Invalid_application of location
exception Sequence_in_parens of location
exception Missing_program_field of string
(*-- Converters between IR and Located IR -----------------------------------*)
| Prim (loc, _, _, _)
| Seq (loc, _, _) -> loc
let strip_locations root =
let id = let id = ref 0 in fun () -> incr id ; !id in
let id = let id = ref (-1) in fun () -> incr id ; !id in
let loc_table = ref [] in
let rec strip_locations l =
let id = id () in
@ -76,11 +65,13 @@ let strip_locations root =
| String (loc, v) ->
loc_table := (id, loc) :: !loc_table ;
Script.String (id, v)
| Seq (loc, seq) ->
| Seq (loc, seq, annot) ->
loc_table := (id, loc) :: !loc_table ;
Script.Seq (id, List.map strip_locations seq)
| Prim (loc, name, seq) ->
Script.Seq (id, List.map strip_locations seq, annot)
| Prim (loc, name, seq, annot) ->
loc_table := (id, loc) :: !loc_table ;
Script.Prim (id, name, List.map strip_locations seq) in
Script.Prim (id, name, List.map strip_locations seq, annot) in
let stripped = strip_locations root in
stripped, List.rev !loc_table
exception Missing_program_field of string

View File

@ -91,7 +91,7 @@ let apply_manager_operation_content
Contract.get_script ctxt destination >>=? function
| None -> begin
match parameters with
| None | Some (Prim (_, "Unit", [])) ->
| None | Some (Prim (_, "Unit", [], _)) ->
return (ctxt, origination_nonce, None)
| Some _ -> fail (Bad_contract_parameter (destination, None, parameters))
end
@ -112,7 +112,7 @@ let apply_manager_operation_content
| Error err ->
return (ctxt, origination_nonce, Some err) in
match parameters, code.arg_type with
| None, Prim (_, "unit", _) -> call_contract (Prim (0, "Unit", []))
| None, Prim (_, "unit", _, _) -> call_contract (Prim (0, "Unit", [], None))
| Some parameters, arg_type -> begin
Script_ir_translator.typecheck_data ctxt (parameters, arg_type) >>= function
| Ok () -> call_contract parameters

File diff suppressed because it is too large Load Diff

View File

@ -30,8 +30,8 @@ let location_encoding =
type expr = (* TODO: turn the location into an alpha ? *)
| Int of location * string
| String of location * string
| Prim of location * string * expr list
| Seq of location * expr list
| Prim of location * string * expr list * string option
| Seq of location * expr list * string option
let expr_encoding =
let open Data_encoding in
@ -43,15 +43,15 @@ let expr_encoding =
let json =
union
[ case string
(function (v, []) -> Some v | _ -> None)
(fun v -> (v, [])) ;
case (assoc (list expr_encoding))
(fun (v, args) -> Some [ (v, args) ])
(function (v, [], None) -> Some v | _ -> None)
(fun v -> (v, [], None)) ;
case (assoc (tup2 (list expr_encoding) (option string)))
(fun (v, args, annot) -> Some [ (v, (args, annot)) ])
(function
| [ (v, args) ] -> (v, args)
| [ (v, (args, annot)) ] -> (v, args, annot)
| _ -> Json.cannot_destruct "invalid script expression") ] in
let binary =
obj2 (req "prim" string) (req "args" (list expr_encoding)) in
obj3 (req "prim" string) (req "args" (list expr_encoding)) (opt "annot" string) in
splitted ~json ~binary in
let seq_encoding expr_encoding =
list expr_encoding in
@ -67,31 +67,31 @@ let expr_encoding =
(fun v -> String (-1, v)) ;
case ~tag:2 (prim_encoding expr_encoding)
(function
| Prim (_, v, args) -> Some (v, args)
| Prim (_, v, args, annot) -> Some (v, args, annot)
| _ -> None)
(function (prim, args) -> Prim (-1, prim, args)) ;
(function (prim, args, annot) -> Prim (-1, prim, args, annot)) ;
case ~tag:3 (seq_encoding expr_encoding)
(function Seq (_, v) -> Some v | _ -> None)
(fun args -> Seq (-1, args)) ])
(function Seq (_, v, _annot) -> Some v | _ -> None)
(fun args -> Seq (-1, args, None)) ])
let update_locations ir =
let rec update_locations i = function
| Int (_, v) -> (Int (i, v), succ i)
| String (_, v) -> (String (i, v), succ i)
| Prim (_, name, args) ->
| Prim (_, name, args, annot) ->
let (nargs, ni) =
List.fold_left (fun (nargs, ni) arg ->
let narg, ni = update_locations ni arg in
(narg :: nargs, ni))
([], succ i) args in
(Prim (i, name, List.rev nargs), ni)
| Seq (_, args) ->
(Prim (i, name, List.rev nargs, annot), ni)
| Seq (_, args, annot) ->
let (nargs, ni) =
List.fold_left (fun (nargs, ni) arg ->
let narg, ni = update_locations ni arg in
(narg :: nargs, ni))
([], succ i) args in
(Seq (i, List.rev nargs), ni) in
(Seq (i, List.rev nargs, annot), ni) in
fst (update_locations 1 ir)
let expr_encoding =

View File

@ -24,8 +24,8 @@ type location =
type expr =
| Int of location * string
| String of location * string
| Prim of location * string * expr list
| Seq of location * expr list
| Prim of location * string * expr list * string option
| Seq of location * expr list * string option
type code =
{ code : expr ;

View File

@ -312,4 +312,5 @@ and ('bef, 'aft) descr =
{ loc : Script.location ;
bef : 'bef stack_ty ;
aft : 'aft stack_ty ;
instr : ('bef, 'aft) instr }
instr : ('bef, 'aft) instr ;
annot : string option }

View File

@ -116,8 +116,8 @@ module Script : sig
type expr =
| Int of location * string
| String of location * string
| Prim of location * string * expr list
| Seq of location * expr list
| Prim of location * string * expr list * string option
| Seq of location * expr list * string option
type code = {
code: expr ;

View File

@ -5,6 +5,6 @@ code {CAR; DUP; DUP; DUP; DUP; DIIIIIP {NIL bool};
DIIIIP {DUP; CAR; DIP {CDR}; COMPARE; LE; CONS};
DIIIP {DUP; CAR; DIP {CDR}; COMPARE; GE; CONS};
DIIP{DUP; CAR; DIP {CDR}; COMPARE; LT; CONS};
DIP {DUP; CAR; DIP {CDR}; COMPARE; GT; CONS}
DIP {DUP; CAR; DIP {CDR}; COMPARE; GT; CONS};
DUP; CAR; DIP {CDR}; COMPARE; EQ; CONS;
UNIT; SWAP; PAIR};

View File

@ -1,11 +1,13 @@
parameter key;
storage string;
return unit;
code {CAR; DIP{UNIT; LAMBDA (pair string unit)
(pair string unit)
code { CAR;
DIP { UNIT;
LAMBDA
(pair string unit) (pair string unit)
{ CAR; UNIT; SWAP; PAIR };
PUSH tez "100.00"; PUSH bool False;
PUSH bool False; NONE key };
CREATE_CONTRACT; DIP { PUSH string "" }; PUSH tez "0.00";
PUSH string "abcdefg"; TRANSFER_TOKENS;
DIP{DROP}; UNIT; PAIR};
DIP { DROP }; UNIT; PAIR }

View File

@ -1,7 +1,7 @@
parameter unit
parameter unit ;
code
{ # This contract stops accepting transactions after N incoming transactions
CDR ; DUP ; PUSH int 0 ; CMPLT; IF {PUSH int -1 ; ADD} {FAIL};
UNIT; PAIR}
return unit
UNIT; PAIR} ;
return unit ;
storage int

View File

@ -60,8 +60,8 @@ start_sandboxed_node() {
data_dir="$(mktemp -d -t tezos_node.XXXXXXXXXX)"
register_dir "$data_dir"
${TZNODE} identity generate 0 --data-dir "${data_dir}" |& sed 's/^/## /' 1>&2
${TZNODE} config init --data-dir="${data_dir}" --connections=2 --expected-pow=0.0 |& sed 's/^/## /' 1>&2
${TZNODE} identity generate 0 --data-dir "${data_dir}" 2>&1| sed 's/^/## /' 1>&2
${TZNODE} config init --data-dir="${data_dir}" --connections=2 --expected-pow=0.0 2>&1| sed 's/^/## /' 1>&2
${TZNODE} run --data-dir "${data_dir}" ${CUSTOM_PARAM} "$@" $default_args > "$data_dir"/LOG 2>&1 &
node_pid="$!"
CLEANUP_PROCESSES+=($node_pid)