Merge AST_with_records into master

This commit is contained in:
Georges Dupéron 2019-03-27 11:17:20 +01:00
commit b5dc0c9e64
40 changed files with 8019 additions and 0 deletions

View File

@ -0,0 +1 @@
ocamlc: -w -42

View File

View File

@ -0,0 +1 @@
--explain --external-tokens LexToken --base Parser ParToken.mly

View File

7
src/ligo/ligo-parser/.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
_build/*
*/_build
*~
.merlin
*/.merlin
*.install
/Version.ml

View File

@ -0,0 +1,21 @@
before_script:
- apt-get update -qq
- apt-get -y -qq install libhidapi-dev libcap-dev bubblewrap
- wget https://github.com/ocaml/opam/releases/download/2.0.1/opam-2.0.1-x86_64-linux -O opam-2.0.1-x86_64-linux
- cp opam-2.0.1-x86_64-linux /usr/local/bin/opam
- chmod +x /usr/local/bin/opam
- export PATH="/usr/local/bin${PATH:+:}${PATH:-}"
- echo "$PATH"
- printf '' | opam init
- eval $(opam config env)
- opam repository add tezos-opam-repository https://gitlab.com/gabriel.alfour/tezos-opam-repository.git
- eval $(opam config env)
- opam --version
- printf '' | ocaml
default-job:
script:
- opam install -y --working-dir .
artifacts:
paths:
- Parser.exe

View File

@ -0,0 +1,2 @@
$HOME/git/OCaml-build/Makefile
$HOME/git/OCaml-build/Makefile.cfg

1497
src/ligo/ligo-parser/AST.ml Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,664 @@
(* Abstract Syntax Tree (AST) for LIGO *)
[@@@warning "-30"]
open Utils
(* Regions
The AST carries all the regions where tokens have been found by the
lexer, plus additional regions corresponding to whole subtrees
(like entire expressions, patterns etc.). These regions are needed
for error reporting and source-to-source transformations. To make
these pervasive regions more legible, we define singleton types for
the symbols, keywords etc. with suggestive names like "kwd_and"
denoting the _region_ of the occurrence of the keyword "and".
*)
type 'a reg = 'a Region.reg
val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t
val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t
(* Keywords of LIGO *)
type keyword = Region.t
type kwd_and = Region.t
type kwd_begin = Region.t
type kwd_block = Region.t
type kwd_case = Region.t
type kwd_const = Region.t
type kwd_contains = Region.t
type kwd_down = Region.t
type kwd_else = Region.t
type kwd_end = Region.t
type kwd_entrypoint = Region.t
type kwd_fail = Region.t
type kwd_for = Region.t
type kwd_from = Region.t
type kwd_function = Region.t
type kwd_if = Region.t
type kwd_in = Region.t
type kwd_is = Region.t
type kwd_list = Region.t
type kwd_map = Region.t
type kwd_mod = Region.t
type kwd_nil = Region.t
type kwd_not = Region.t
type kwd_of = Region.t
type kwd_or = Region.t
type kwd_patch = Region.t
type kwd_procedure = Region.t
type kwd_record = Region.t
type kwd_remove = Region.t
type kwd_set = Region.t
type kwd_skip = Region.t
type kwd_step = Region.t
type kwd_storage = Region.t
type kwd_then = Region.t
type kwd_to = Region.t
type kwd_type = Region.t
type kwd_var = Region.t
type kwd_while = Region.t
type kwd_with = Region.t
(* Data constructors *)
type c_False = Region.t
type c_None = Region.t
type c_Some = Region.t
type c_True = Region.t
type c_Unit = Region.t
(* Symbols *)
type semi = Region.t (* ";" *)
type comma = Region.t (* "," *)
type lpar = Region.t (* "(" *)
type rpar = Region.t (* ")" *)
type lbrace = Region.t (* "{" *)
type rbrace = Region.t (* "}" *)
type lbracket = Region.t (* "[" *)
type rbracket = Region.t (* "]" *)
type cons = Region.t (* "#" *)
type vbar = Region.t (* "|" *)
type arrow = Region.t (* "->" *)
type assign = Region.t (* ":=" *)
type equal = Region.t (* "=" *)
type colon = Region.t (* ":" *)
type lt = Region.t (* "<" *)
type leq = Region.t (* "<=" *)
type gt = Region.t (* ">" *)
type geq = Region.t (* ">=" *)
type neq = Region.t (* "=/=" *)
type plus = Region.t (* "+" *)
type minus = Region.t (* "-" *)
type slash = Region.t (* "/" *)
type times = Region.t (* "*" *)
type dot = Region.t (* "." *)
type wild = Region.t (* "_" *)
type cat = Region.t (* "^" *)
(* Virtual tokens *)
type eof = Region.t
(* Literals *)
type variable = string reg
type fun_name = string reg
type type_name = string reg
type field_name = string reg
type map_name = string reg
type set_name = string reg
type constr = string reg
(* Parentheses *)
type 'a par = {
lpar : lpar;
inside : 'a;
rpar : rpar
}
(* Brackets compounds *)
type 'a brackets = {
lbracket : lbracket;
inside : 'a;
rbracket : rbracket
}
(* Braced compounds *)
type 'a braces = {
lbrace : lbrace;
inside : 'a;
rbrace : rbrace
}
(* The Abstract Syntax Tree *)
type t = {
decl : declaration nseq;
eof : eof
}
and ast = t
and declaration =
TypeDecl of type_decl reg
| ConstDecl of const_decl reg
| LambdaDecl of lambda_decl
and const_decl = {
kwd_const : kwd_const;
name : variable;
colon : colon;
const_type : type_expr;
equal : equal;
init : expr;
terminator : semi option
}
(* Type declarations *)
and type_decl = {
kwd_type : kwd_type;
name : type_name;
kwd_is : kwd_is;
type_expr : type_expr;
terminator : semi option
}
and type_expr =
TProd of cartesian
| TSum of (variant reg, vbar) nsepseq reg
| TRecord of record_type reg
| TApp of (type_name * type_tuple) reg
| TPar of type_expr par reg
| TAlias of variable
and cartesian = (type_expr, times) nsepseq reg
and variant = {
constr : constr;
kwd_of : kwd_of;
product : cartesian
}
and record_type = {
opening : kwd_record;
field_decls : field_decls;
terminator : semi option;
closing : kwd_end
}
and field_decls = (field_decl reg, semi) nsepseq
and field_decl = {
field_name : field_name;
colon : colon;
field_type : type_expr
}
and type_tuple = (type_expr, comma) nsepseq par reg
(* Function and procedure declarations *)
and lambda_decl =
FunDecl of fun_decl reg
| ProcDecl of proc_decl reg
| EntryDecl of entry_decl reg
and fun_decl = {
kwd_function : kwd_function;
name : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
local_decls : local_decl list;
block : block reg;
kwd_with : kwd_with;
return : expr;
terminator : semi option
}
and proc_decl = {
kwd_procedure : kwd_procedure;
name : variable;
param : parameters;
kwd_is : kwd_is;
local_decls : local_decl list;
block : block reg;
terminator : semi option
}
and entry_decl = {
kwd_entrypoint : kwd_entrypoint;
name : variable;
param : entry_params;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
local_decls : local_decl list;
block : block reg;
kwd_with : kwd_with;
return : expr;
terminator : semi option
}
and parameters = (param_decl, semi) nsepseq par reg
and entry_params = (entry_param_decl, semi) nsepseq par reg
and entry_param_decl =
EntryConst of param_const reg
| EntryVar of param_var reg
| EntryStore of storage reg
and storage = {
kwd_storage : kwd_storage;
var : variable;
colon : colon;
storage_type : type_expr
}
and param_decl =
ParamConst of param_const reg
| ParamVar of param_var reg
and param_const = {
kwd_const : kwd_const;
var : variable;
colon : colon;
param_type : type_expr
}
and param_var = {
kwd_var : kwd_var;
var : variable;
colon : colon;
param_type : type_expr
}
and block = {
opening : block_opening;
instr : instructions;
terminator : semi option;
closing : block_closing
}
and block_opening =
Block of kwd_block * lbrace
| Begin of kwd_begin
and block_closing =
Block of rbrace
| End of kwd_end
and local_decl =
LocalLam of lambda_decl
| LocalConst of const_decl reg
| LocalVar of var_decl reg
and var_decl = {
kwd_var : kwd_var;
name : variable;
colon : colon;
var_type : type_expr;
assign : assign;
init : expr;
terminator : semi option
}
and instructions = (instruction, semi) nsepseq
and instruction =
Single of single_instr
| Block of block reg
and single_instr =
Cond of conditional reg
| Case of case_instr reg
| Assign of assignment reg
| Loop of loop
| ProcCall of fun_call
| Fail of fail_instr reg
| Skip of kwd_skip
| RecordPatch of record_patch reg
| MapPatch of map_patch reg
| SetPatch of set_patch reg
| MapRemove of map_remove reg
| SetRemove of set_remove reg
and set_remove = {
kwd_remove : kwd_remove;
element : expr;
kwd_from : kwd_from;
kwd_set : kwd_set;
set : path
}
and map_remove = {
kwd_remove : kwd_remove;
key : expr;
kwd_from : kwd_from;
kwd_map : kwd_map;
map : path
}
and set_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
set_inj : expr injection reg
}
and map_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
map_inj : binding reg injection reg
}
and binding = {
source : expr;
arrow : arrow;
image : expr
}
and record_patch = {
kwd_patch : kwd_patch;
path : path;
kwd_with : kwd_with;
record_inj : record_injection reg
}
and fail_instr = {
kwd_fail : kwd_fail;
fail_expr : expr
}
and conditional = {
kwd_if : kwd_if;
test : expr;
kwd_then : kwd_then;
ifso : if_clause;
terminator : semi option;
kwd_else : kwd_else;
ifnot : if_clause
}
and if_clause =
ClauseInstr of instruction
| ClauseBlock of (instructions * semi option) braces reg
and set_membership = {
set : expr;
kwd_contains : kwd_contains;
element : expr
}
and case_instr = {
kwd_case : kwd_case;
expr : expr;
kwd_of : kwd_of;
lead_vbar : vbar option;
cases : cases;
kwd_end : kwd_end
}
and cases = (case reg, vbar) nsepseq reg
and case = {
pattern : pattern;
arrow : arrow;
instr : instruction
}
and assignment = {
lhs : lhs;
assign : assign;
rhs : rhs;
}
and lhs =
Path of path
| MapPath of map_lookup reg
and rhs =
Expr of expr
| NoneExpr of c_None
and loop =
While of while_loop reg
| For of for_loop
and while_loop = {
kwd_while : kwd_while;
cond : expr;
block : block reg
}
and for_loop =
ForInt of for_int reg
| ForCollect of for_collect reg
and for_int = {
kwd_for : kwd_for;
assign : var_assign reg;
down : kwd_down option;
kwd_to : kwd_to;
bound : expr;
step : (kwd_step * expr) option;
block : block reg
}
and var_assign = {
name : variable;
assign : assign;
expr : expr
}
and for_collect = {
kwd_for : kwd_for;
var : variable;
bind_to : (arrow * variable) option;
kwd_in : kwd_in;
expr : expr;
block : block reg
}
(* Expressions *)
and expr =
ELogic of logic_expr
| EArith of arith_expr
| EString of string_expr
| EList of list_expr
| ESet of set_expr
| EConstr of constr_expr
| ERecord of record_expr
| EProj of projection reg
| EMap of map_expr
| EVar of Lexer.lexeme reg
| ECall of fun_call
| EBytes of (Lexer.lexeme * Hex.t) reg
| EUnit of c_Unit
| ETuple of tuple_expr
| EPar of expr par reg
and set_expr =
SetInj of expr injection reg
| SetMem of set_membership reg
and 'a injection = {
opening : opening;
elements : ('a, semi) sepseq;
terminator : semi option;
closing : closing
}
and opening =
Kwd of keyword
| KwdBracket of keyword * lbracket
and closing =
End of kwd_end
| RBracket of rbracket
and map_expr =
MapLookUp of map_lookup reg
| MapInj of binding reg injection reg
and map_lookup = {
path : path;
index : expr brackets reg
}
and path =
Name of variable
| Path of projection reg
and logic_expr =
BoolExpr of bool_expr
| CompExpr of comp_expr
and bool_expr =
Or of kwd_or bin_op reg
| And of kwd_and bin_op reg
| Not of kwd_not un_op reg
| False of c_False
| True of c_True
and 'a bin_op = {
op : 'a;
arg1 : expr;
arg2 : expr
}
and 'a un_op = {
op : 'a;
arg : expr
}
and comp_expr =
Lt of lt bin_op reg
| Leq of leq bin_op reg
| Gt of gt bin_op reg
| Geq of geq bin_op reg
| Equal of equal bin_op reg
| Neq of neq bin_op reg
and arith_expr =
Add of plus bin_op reg
| Sub of minus bin_op reg
| Mult of times bin_op reg
| Div of slash bin_op reg
| Mod of kwd_mod bin_op reg
| Neg of minus un_op reg
| Int of (Lexer.lexeme * Z.t) reg
and string_expr =
Cat of cat bin_op reg
| String of Lexer.lexeme reg
and list_expr =
Cons of cons bin_op reg
| List of expr injection reg
| Nil of nil par reg
and nil = {
nil : kwd_nil;
colon : colon;
list_type : type_expr
}
and constr_expr =
SomeApp of (c_Some * arguments) reg
| NoneExpr of none_expr reg
| ConstrApp of (constr * arguments) reg
and record_expr =
RecordInj of record_injection reg
and record_injection = {
opening : kwd_record;
fields : (field_assign reg, semi) nsepseq;
terminator : semi option;
closing : kwd_end
}
and field_assign = {
field_name : field_name;
equal : equal;
field_expr : expr
}
and projection = {
record_name : variable;
selector : dot;
field_path : (selection, dot) nsepseq
}
and selection =
FieldName of field_name
| Component of (Lexer.lexeme * Z.t) reg
and tuple_expr =
TupleInj of tuple_injection
and tuple_injection = (expr, comma) nsepseq par reg
and none_expr = typed_none_expr par
and typed_none_expr = {
c_None : c_None;
colon : colon;
opt_type : type_expr
}
and fun_call = (fun_name * arguments) reg
and arguments = tuple_injection
(* Patterns *)
and pattern =
PCons of (pattern, cons) nsepseq reg
| PVar of Lexer.lexeme reg
| PWild of wild
| PInt of (Lexer.lexeme * Z.t) reg
| PBytes of (Lexer.lexeme * Hex.t) reg
| PString of Lexer.lexeme reg
| PUnit of c_Unit
| PFalse of c_False
| PTrue of c_True
| PNone of c_None
| PSome of (c_Some * pattern par reg) reg
| PList of list_pattern
| PTuple of (pattern, comma) nsepseq par reg
and list_pattern =
Sugar of (pattern, semi) sepseq brackets reg
| Raw of (pattern * cons * pattern) par reg
(* Projecting regions *)
val type_expr_to_region : type_expr -> Region.t
val expr_to_region : expr -> Region.t
val instr_to_region : instruction -> Region.t
val pattern_to_region : pattern -> Region.t
val local_decl_to_region : local_decl -> Region.t
val path_to_region : path -> Region.t
val lhs_to_region : lhs -> Region.t
val rhs_to_region : rhs -> Region.t
val if_clause_to_region : if_clause -> Region.t
val selection_to_region : selection -> Region.t
(* Printing *)
val print_tokens : t -> unit

View File

@ -0,0 +1,795 @@
[@@@warning "-30"]
module I = AST
open Region
module SMap = Map.Make(String)
module O = struct
type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *)
type name_and_region = {name: string; orig: Region.t}
type type_name = name_and_region
type var_name = name_and_region
type field_name = name_and_region
type pattern =
PVar of var_name
| PWild
| PInt of Z.t
| PBytes of MBytes.t
| PString of string
| PUnit
| PFalse
| PTrue
| PNone
| PSome of pattern
| PCons of pattern * pattern
| PNull
| PRecord of (field_name * pattern) SMap.t
type type_constructor =
Option
| List
| Set
| Map
type type_expr_case =
Sum of (type_name * type_expr) SMap.t
| Record of (field_name * type_expr) SMap.t
| TypeApp of type_constructor * (type_expr list)
| Function of { arg: type_expr; ret: type_expr }
| Ref of type_expr
| String
| Bytes
| Int
| Unit
| Bool
and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t }
type typed_var = { name:var_name; ty:type_expr; orig: asttodo }
type type_decl = { name:type_name; ty:type_expr; orig: asttodo }
type expr =
App of { operator: operator; arguments: expr list }
| Var of var_name
| Constant of constant
| Record of (field_name * expr) list
| Lambda of lambda
and decl = { name:var_name; ty:type_expr; value: expr }
and lambda = {
parameter: typed_var;
declarations: decl list;
instructions: instr list;
result: expr;
}
and operator =
Function of var_name
| Constructor of var_name
| UpdateField of field_name
| GetField of field_name
| Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod
| Neg | Not
| Set | List
| MapLookup
and constant =
Unit
| Int of Z.t | String of string | Bytes of MBytes.t
| False | True
| Null of type_expr
| EmptySet of type_expr
| CNone of type_expr
and instr =
Assignment of { name: var_name; value: expr; orig: asttodo }
| While of { condition: expr; body: instr list; orig: asttodo }
| ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo }
| Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo }
| ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *)
| Fail of { expr: expr; orig: asttodo }
type ast = {
types : type_decl list;
storage_decl : typed_var;
declarations : decl list;
orig : AST.t
}
end
(* open Sanity: *)
let (|>) v f = f v (* pipe f to v *)
let (@@) f v = f v (* apply f on v *)
let (@.) f g x = f (g x) (* compose *)
let map f l = List.rev (List.rev_map f l)
let mapi f l =
let f (i, l) elem =
(i + 1, (f i elem) :: l)
in snd (List.fold_left f (0,[]) l)
(* TODO: check that List.append is not broken
(i.e. check that it is tail-recursive) *)
let append_map f l = map f l |> List.flatten
let append l1 l2 = List.append l1 l2
let list_to_map l = List.fold_left (fun m (k,v) -> SMap.add k v m) SMap.empty l
let fold_map f a l =
let f (acc, l) elem =
let acc', elem' = f acc elem
in acc', (elem' :: l) in
let last_acc, last_l = List.fold_left f (a, []) l
in last_acc, List.rev last_l
(* Simplify the AST *)
let name_and_region_of_int i = O.{name = string_of_int i; orig = Region.ghost}
let s_nsepseq : ('a,'sep) Utils.nsepseq -> 'a list =
fun (first, rest) -> first :: (map snd rest)
let s_sepseq : ('a,'sep) Utils.sepseq -> 'a list =
function
None -> []
| Some nsepseq -> s_nsepseq nsepseq
let s_name {value=name; region} : O.var_name =
let () = ignore (region) in
{name;orig = region}
let name_to_string {value=name; region} : string =
let () = ignore (region) in
name
let type_expr (orig : Region.t) (e : O.type_expr_case) : O.type_expr =
{ type_expr = e; name = None; orig }
let s_type_constructor {value=name;region} : O.type_constructor =
let () = ignore (region) in
match name with
"Option" -> Option
| "List" -> List
| "Map" -> Map
| "Set" -> Set
(* TODO: escape the name, prevent any \x1b and other weird characters from appearing in the output *)
| _ -> failwith ("Unknown type constructor: " ^ name)
let named_list_to_map (l : (O.name_and_region * 'a) list) : (O.name_and_region * 'a) SMap.t =
List.fold_left
(fun m ((x,_) as p) ->
let {name;_} : O.name_and_region = x in
SMap.add name p m)
SMap.empty
l
let rec s_cartesian {value=sequence; region} : O.type_expr =
let () = ignore (region) in
s_nsepseq sequence
|>map s_type_expr
|> mapi (fun i p -> name_and_region_of_int i, p)
|> named_list_to_map
|> (fun x -> (Record x : O.type_expr_case))
|> type_expr region
and s_sum_type {value=sequence; region} : O.type_expr =
let () = ignore (region) in
type_expr region (Sum (map s_variant (s_nsepseq sequence) |> named_list_to_map))
and s_variant {value=(constr, kwd_of, cartesian); region} =
let () = ignore (kwd_of,region) in
(s_name constr, s_cartesian cartesian)
and s_record_type {value=(kwd_record, field_decls, kwd_end); region} : O.type_expr =
let () = ignore (kwd_record,region,kwd_end) in
type_expr region (Record (map s_field_decl (s_nsepseq field_decls) |> named_list_to_map) : O.type_expr_case)
and s_field_decl {value=(var, colon, type_expr); region} : O.type_name * O.type_expr =
let () = ignore (colon,region) in
((s_name var), (s_type_expr type_expr))
and s_type_app {value=(type_name,type_tuple); region} : O.type_expr =
let () = ignore (region) in
type_expr region (TypeApp (s_type_constructor type_name, s_type_tuple type_tuple))
and s_type_tuple ({value=(lpar, sequence, rpar); region} : (I.type_name, I.comma) Utils.nsepseq I.par) : O.type_expr list =
let () = ignore (lpar,rpar,region) in
(* TODO: the grammar should allow any type expr, not just type_name in the tuple elements *)
map s_type_expr (map (fun a -> I.TAlias a) (s_nsepseq sequence))
and s_par_type {value=(lpar, type_expr, rpar); region} : O.type_expr =
let () = ignore (lpar,rpar,region) in
s_type_expr type_expr
and s_type_alias name : O.type_expr =
let () = ignore () in
type_expr name.region (TypeApp (s_type_constructor name, []))
and s_type_expr (orig : I.type_expr) : O.type_expr = match orig with
Prod cartesian -> s_cartesian cartesian
| Sum sum_type -> s_sum_type sum_type
| Record record_type -> s_record_type record_type
| TypeApp type_app -> s_type_app type_app
| ParType par_type -> s_par_type par_type
| TAlias type_alias -> s_type_alias type_alias
let s_type_decl I.{value={kwd_type;name;kwd_is;type_expr;terminator}; region} : O.type_decl =
let () = ignore (kwd_type,kwd_is,terminator,region) in
let ty = s_type_expr type_expr in
O.{ name = s_name name; ty = { ty with name = Some (s_name name) }; orig = `TODO }
let s_storage_decl I.{value={kwd_storage; name; colon; store_type; terminator}; region} : O.typed_var =
let () = ignore (kwd_storage,colon,terminator,region) in
O.{ name = s_name name; ty = s_type_expr store_type; orig = `TODO }
let s_operations_decl I.{value={kwd_operations;name;colon;op_type;terminator}; region} : O.typed_var =
let () = ignore (kwd_operations,colon,terminator,region) in
O.{ name = s_name name; ty = s_type_expr op_type; orig = `TODO }
let s_empty_list {value=(l, (lbracket, rbracket, colon, type_expr), r); region} : O.expr =
let () = ignore (l, lbracket, rbracket, colon, r, region) in
Constant (Null (s_type_expr type_expr))
let s_empty_set {value=(l, (lbrace, rbrace, colon, type_expr), r); region} : O.expr =
let () = ignore (l, lbrace, rbrace, colon, r, region) in
Constant (EmptySet (s_type_expr type_expr))
let s_none {value=(l, (c_None, colon, type_expr), r); region} : O.expr =
let () = ignore (l, c_None, colon, r, region) in
Constant (CNone (s_type_expr type_expr))
let parameters_to_tuple (parameters : (string * O.type_expr) list) : O.type_expr =
(* TODO: use records with named fields to have named arguments. *)
let parameter_tuple : O.type_expr_case =
Record (mapi (fun i (_name,ty) -> name_and_region_of_int i, ty) parameters |> named_list_to_map) in
O.{ type_expr = parameter_tuple; name = None; orig = Region.ghost }
and parameters_to_decls singleparam (parameters : (string * O.type_expr) list) : O.decl list =
let f i (name,ty) =
O.{ name = {name; orig=Region.ghost};
ty = ty;
value = App { operator = O.GetField (name_and_region_of_int i);
arguments = [Var singleparam] } }
in mapi f parameters
let rec bin l operator r = O.App { operator; arguments = [s_expr l; s_expr r] }
and una operator v = O.App { operator; arguments = [s_expr v] }
and s_expr : I.expr -> O.expr =
function
Or {value=(l, bool_or, r); region} -> let () = ignore (region, bool_or) in bin l Or r
| And {value=(l, bool_and, r); region} -> let () = ignore (region,bool_and) in bin l And r
| Lt {value=(l, lt, r); region} -> let () = ignore (region, lt) in bin l Lt r
| Leq {value=(l, leq, r); region} -> let () = ignore (region, leq) in bin l Leq r
| Gt {value=(l, gt, r); region} -> let () = ignore (region, gt) in bin l Gt r
| Geq {value=(l, geq, r); region} -> let () = ignore (region, geq) in bin l Geq r
| Equal {value=(l, equal, r); region} -> let () = ignore (region, equal) in bin l Equal r
| Neq {value=(l, neq, r); region} -> let () = ignore (region, neq) in bin l Neq r
| Cat {value=(l, cat, r); region} -> let () = ignore (region, cat) in bin l Cat r
| Cons {value=(l, cons, r); region} -> let () = ignore (region, cons) in bin l Cons r
| Add {value=(l, plus, r); region} -> let () = ignore (region, plus) in bin l Add r
| Sub {value=(l, minus, r); region} -> let () = ignore (region, minus) in bin l Sub r
| Mult {value=(l, times, r); region} -> let () = ignore (region, times) in bin l Mult r
| Div {value=(l, slash, r); region} -> let () = ignore (region, slash) in bin l Div r
| Mod {value=(l, kwd_mod, r); region} -> let () = ignore (region, kwd_mod) in bin l Mod r
| Neg {value=(minus, expr); region} -> let () = ignore (region, minus) in una Neg expr
| Not {value=(kwd_not, expr); region} -> let () = ignore (region, kwd_not) in una Not expr
| Int {value=(lexeme, z); region} -> let () = ignore (region, lexeme) in Constant (Int z)
| Var lexeme -> Var (s_name lexeme)
| String {value=s; region} -> let () = ignore (region) in Constant (String s)
| Bytes {value=(lexeme, mbytes); region} -> let () = ignore (region, lexeme) in Constant (Bytes mbytes)
| False c_False -> let () = ignore (c_False) in Constant (False)
| True c_True -> let () = ignore (c_True) in Constant (True)
| Unit c_Unit -> let () = ignore (c_Unit) in Constant (Unit)
| Tuple {value=(l,tuple,r); region} -> let () = ignore (l,r,region) in s_tuple_expr (tuple |> s_nsepseq |> map s_expr)
| List list -> s_list list
| EmptyList empty_list -> s_empty_list empty_list
| Set set -> s_set set
| EmptySet empty_set -> s_empty_set empty_set
| NoneExpr none_expr -> s_none none_expr
| FunCall fun_call -> s_fun_call fun_call
| ConstrApp constr_app -> s_constr_app constr_app
| SomeApp some_app -> s_some_app some_app
| MapLookUp map_lookup -> s_map_lookup map_lookup
| ParExpr {value=(lpar,expr,rpar); region} -> let () = ignore (lpar,rpar,region) in s_expr expr
and s_tuple_expr tuple : O.expr =
Record (mapi (fun i e -> name_and_region_of_int i, e) tuple)
and s_map_lookup I.{value = {map_name; selector; index}; region} : O.expr =
let {value = lbracket, index_expr, rbracket; region=region2} = index in
let () = ignore (selector, lbracket, rbracket, region2, region) in
App { operator = MapLookup; arguments = [Var (s_name map_name); s_expr index_expr] }
and s_some_app {value=(c_Some, {value=(l,arguments,r); region=region2}); region} : O.expr =
let () = ignore (c_Some,l,r,region2,region) in
match s_nsepseq arguments with
[] -> failwith "tuple cannot be empty"
| [a] -> s_expr a
| l -> s_tuple_expr (map s_expr l)
and s_list {value=(l, list, r); region} : O.expr =
let () = ignore (l, r, region) in
App { operator = List; arguments = map s_expr (s_nsepseq list) }
and s_set {value=(l, set, r); region} : O.expr =
let () = ignore (l, r, region) in
App { operator = Set; arguments = map s_expr (s_nsepseq set) }
and s_pattern {value=sequence; region} : O.pattern =
let () = ignore (region) in
s_pattern_conses (s_nsepseq sequence)
and s_pattern_conses : I.core_pattern list -> O.pattern = function
[] -> assert false
| [p] -> s_core_pattern p
| hd :: tl -> PCons (s_core_pattern hd, s_pattern_conses tl)
and s_case ({value=(pattern, arrow, instruction); region} : I.case) : O.pattern * O.instr list =
let () = ignore (arrow,region) in
s_pattern pattern, s_instruction instruction
and s_core_pattern : I.core_pattern -> O.pattern = function
PVar var -> PVar (s_name var)
| PWild wild -> let () = ignore (wild) in PWild
| PInt {value=(si,i);region} -> let () = ignore (si,region) in PInt i
| PBytes {value=(sb,b);region} -> let () = ignore (sb,region) in PBytes b
| PString {value=s;region} -> let () = ignore (region) in PString s
| PUnit region -> let () = ignore (region) in PUnit
| PFalse region -> let () = ignore (region) in PFalse
| PTrue region -> let () = ignore (region) in PTrue
| PNone region -> let () = ignore (region) in PNone
| PSome psome -> s_psome psome
| PList pattern -> s_list_pattern pattern
| PTuple ptuple -> s_ptuple ptuple
and s_list_pattern = function
Sugar sugar -> s_sugar sugar
| Raw raw -> s_raw raw
and s_sugar {value=(lbracket, sequence, rbracket); region} : O.pattern =
let () = ignore (lbracket, rbracket, region) in
List.fold_left (fun acc p -> O.PCons (s_core_pattern p, acc))
O.PNull
(s_sepseq sequence);
and s_raw {value=(lpar, (core_pattern, cons, pattern), rpar); region} =
let () = ignore (lpar, cons, rpar, region) in
O.PCons (s_core_pattern core_pattern, s_pattern pattern)
and s_ptuple {value=(lpar, sequence, rpar); region} =
let () = ignore (lpar, rpar, region) in
s_nsepseq sequence
|> map s_core_pattern
|> mapi (fun i p -> name_and_region_of_int i, p)
|> fun x -> O.PRecord (x |> named_list_to_map)
and s_psome {value=(c_Some,{value=(l,psome,r);region=region2});region} : O.pattern =
let () = ignore (c_Some,l,r,region2,region) in
PSome (s_core_pattern psome)
and s_const_decl I.{value={kwd_const;name;colon;const_type;equal;init;terminator}; region} : O.decl =
let () = ignore (kwd_const,colon,equal,terminator,region) in
O.{ name = s_name name; ty = s_type_expr const_type; value = s_expr init }
and s_param_const {value=(kwd_const,variable,colon,type_expr); region} : string * O.type_expr =
let () = ignore (kwd_const,colon,region) in
name_to_string variable, s_type_expr type_expr
and s_param_var {value=(kwd_var,variable,colon,type_expr); region} : string * O.type_expr =
let () = ignore (kwd_var,colon,region) in
name_to_string variable, s_type_expr type_expr
and s_param_decl : I.param_decl -> string * O.type_expr = function
ParamConst p -> s_param_const p
| ParamVar p -> s_param_var p
and s_parameters ({value=(lpar,param_decl,rpar);region} : I.parameters) : (string * O.type_expr) list =
let () = ignore (lpar,rpar,region) in
let l = (s_nsepseq param_decl) in
map s_param_decl l
and s_var_decl I.{value={kwd_var;name;colon;var_type;ass;init;terminator}; region} : O.decl =
let () = ignore (kwd_var,colon,ass,terminator,region) in
O.{
name = s_name name;
ty = s_type_expr var_type;
value = s_expr init
}
and s_local_decl : I.local_decl -> O.decl = function
LocalLam decl -> s_lambda_decl decl
| LocalConst decl -> s_const_decl decl
| LocalVar decl -> s_var_decl decl
and s_instructions ({value=sequence; region} : I.instructions) : O.instr list =
let () = ignore (region) in
append_map s_instruction (s_nsepseq sequence)
and s_instruction : I.instruction -> O.instr list = function
Single instr -> s_single_instr instr
| Block block -> (s_block block)
and s_conditional I.{kwd_if;test;kwd_then;ifso;kwd_else;ifnot} : O.instr =
let () = ignore (kwd_if,kwd_then,kwd_else) in
let test = s_expr test in
let ifso = O.PTrue, s_instruction ifso in
let ifnot = O.PFalse, s_instruction ifnot in
Match {
expr = test;
cases = [ifso; ifnot];
orig = `TODO
}
and s_match_instr I.{kwd_match;expr;kwd_with;lead_vbar;cases;kwd_end} : O.instr =
let {value=cases;region} = cases in
let () = ignore (kwd_match,kwd_with,lead_vbar,kwd_end,region) in
Match { expr = s_expr expr; cases = map s_case (s_nsepseq cases); orig = `TODO }
and s_ass_instr {value=(variable,ass,expr); region} : O.instr =
let () = ignore (ass,region) in
Assignment { name = s_name variable; value = s_expr expr; orig = `TODO }
and s_while_loop {value=(kwd_while, expr, block); region} : O.instr list =
let () = ignore (kwd_while,region) in
[While {condition = s_expr expr; body = s_block block; orig = `TODO}]
and s_for_loop : I.for_loop -> O.instr list = function
ForInt for_int -> s_for_int for_int
| ForCollect for_collect -> s_for_collect for_collect
and s_for_int ({value={kwd_for;ass;down;kwd_to;bound;step;block}; region} : I.for_int reg) : O.instr list =
let {value=(variable,ass_kwd,expr);region = ass_region} = ass in
let () = ignore (kwd_for,ass_region,ass_kwd,kwd_to,region) in
let name = s_name variable in
let condition, operator = match down with Some kwd_down -> ignore kwd_down; O.Gt, O.Sub
| None -> O.Lt, O.Add in
let step = s_step step
in [
Assignment { name; value = s_expr expr; orig = `TODO };
(* TODO: lift the declaration of the variable, to avoid creating a nested scope here. *)
While {
condition = App { operator = condition;
arguments = [Var name; s_expr bound]};
body = append (s_block block)
[O.Assignment { name;
value = App { operator;
arguments = [Var name; step]};
orig = `TODO }];
orig = `TODO
}
]
and s_for_collect ({value={kwd_for;var;bind_to;kwd_in;expr;block}; _} : I.for_collect reg) : O.instr list =
let () = ignore (kwd_for,kwd_in) in
let for_instr =
match s_bind_to bind_to with
Some _ ->
failwith "TODO: For on maps is not supported yet!"
| None ->
O.ForCollection {
list = s_expr expr;
var = s_name var;
body = s_block block;
orig = `TODO
}
in [for_instr]
and s_step : (I.kwd_step * I.expr) option -> O.expr = function
Some (kwd_step, expr) -> let () = ignore (kwd_step) in s_expr expr
| None -> Constant (Int (Z.of_int 1))
and s_bind_to : (I.arrow * I.variable) option -> O.var_name option = function
Some (arrow, variable) -> let () = ignore (arrow) in Some (s_name variable)
| None -> None
and s_loop : I.loop -> O.instr list = function
While while_loop -> s_while_loop while_loop
| For for_loop -> s_for_loop for_loop
and s_fun_call {value=(fun_name, arguments); region} : O.expr =
let () = ignore (region) in
let {value=fun_name_string;_} = fun_name in
let firstchar = String.sub fun_name_string 0 1 in
(* If it starts with a capital letter, then it is a constructor *)
if String.equal firstchar (String.uppercase_ascii firstchar) then
App { operator = Constructor (s_name fun_name); arguments = s_arguments arguments }
else
App { operator = Function (s_name fun_name); arguments = s_arguments arguments }
and s_constr_app {value=(constr, arguments); region} : O.expr =
let () = ignore (region) in
App { operator = Function (s_name constr); arguments = s_arguments arguments }
and s_arguments {value=(lpar, sequence, rpar); region} : O.expr list =
(* TODO: should return a tuple *)
let () = ignore (lpar,rpar,region) in
match map s_expr (s_nsepseq sequence) with
[] -> [Constant Unit]
| [single_argument] -> [single_argument]
| args -> [s_tuple_expr args] ;
and s_fail ((kwd_fail, expr) : (I.kwd_fail * I.expr)) : O.instr =
let () = ignore (kwd_fail) in
Fail { expr = s_expr expr; orig = `TODO }
and s_single_instr : I.single_instr -> O.instr list = function
Cond {value; _} -> [s_conditional value]
| Match {value; _} -> [s_match_instr value]
| Ass instr -> [s_ass_instr instr]
| Loop loop -> s_loop loop
| ProcCall fun_call -> [ProcedureCall { expr = s_fun_call fun_call; orig = `TODO }]
| Null kwd_null -> let () = ignore (kwd_null) in
[]
| Fail {value; _} -> [s_fail value]
and s_block I.{value={opening;instr;terminator;close}; _} : O.instr list =
let () = ignore (opening,terminator,close) in
s_instructions instr
and gensym =
let i = ref 0 in
fun ty ->
i := !i + 1;
(* TODO: Region.ghost *)
({name = {name=(string_of_int !i) ^ "gensym"; orig = Region.ghost}; ty; orig = `TODO} : O.typed_var)
and s_fun_decl I.{value={kwd_function;name;param;colon;ret_type;kwd_is;local_decls;block;kwd_with;return;terminator}; region} : O.decl =
let () = ignore (kwd_function,colon,kwd_is,kwd_with,terminator,region) in
let tuple_type = s_parameters param |> parameters_to_tuple in
let single_argument = gensym tuple_type in
let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in
O.{
name = s_name name;
ty = type_expr region (Function { arg = tuple_type;
ret = s_type_expr ret_type });
value = Lambda {
parameter = single_argument;
declarations = append
(s_parameters param |> parameters_to_decls single_argument_xxx)
(map s_local_decl local_decls);
instructions = s_block block;
result = s_expr return
}
}
and s_proc_decl I.{value={kwd_procedure;name;param;kwd_is;local_decls;block;terminator}; region} =
let () = ignore (kwd_procedure,kwd_is,terminator,region) in
let tuple_type = s_parameters param |> parameters_to_tuple in
let single_argument = gensym tuple_type in
let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in
O.{
name = s_name name;
ty = type_expr region (Function { arg = tuple_type;
ret = type_expr region Unit });
value = Lambda {
parameter = single_argument;
declarations = append
(s_parameters param |> parameters_to_decls single_argument_xxx)
(map s_local_decl local_decls);
instructions = s_block block;
result = O.Constant O.Unit
}
}
and s_entry_decl I.{value={kwd_entrypoint;name;param;kwd_is;local_decls;block;terminator}; region} =
let () = ignore (kwd_entrypoint,kwd_is,terminator,region) in
let tuple_type = s_parameters param |> parameters_to_tuple in
let single_argument = gensym tuple_type in
let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in
O.{
name = s_name name;
ty = type_expr region (Function { arg = tuple_type;
ret = type_expr region Unit });
value = Lambda {
parameter = single_argument;
declarations = append
(s_parameters param |> parameters_to_decls single_argument_xxx)
(map s_local_decl local_decls);
instructions = s_block block;
result = O.Constant O.Unit
}
}
and s_lambda_decl : I.lambda_decl -> O.decl = function
FunDecl fun_decl -> s_fun_decl fun_decl
| EntryDecl entry_decl -> s_entry_decl entry_decl
| ProcDecl proc_decl -> s_proc_decl proc_decl
type tmp_ast = {
types : O.type_decl list;
storage_decl : O.typed_var option;
operations_decl : O.typed_var option;
declarations : O.decl list;
}
let s_declaration (ast : tmp_ast) : I.declaration -> tmp_ast = function
TypeDecl t -> { ast with types = (s_type_decl t) :: ast.types }
| ConstDecl c -> { ast with declarations = (s_const_decl c) :: ast.declarations }
| StorageDecl s -> { ast with storage_decl = Some (s_storage_decl s) }
| OpDecl o -> { ast with operations_decl = Some (s_operations_decl o) }
| LambdaDecl l -> { ast with declarations = (s_lambda_decl l) :: ast.declarations }
let s_ast (ast : I.ast) : O.ast =
let I.{decl=(decl1,decls);eof} = ast in
let () = ignore (eof) in
let {types; storage_decl; operations_decl; declarations} =
List.fold_left s_declaration
{ types = [];
storage_decl = None;
operations_decl = None;
declarations = [] }
( decl1 :: decls ) in
let storage_decl = match storage_decl with
Some x -> x
| None -> failwith "Missing storage declaration" in
let () = match operations_decl with
Some _ -> failwith "Operations declaration is not allowed anymore TODO"
| None -> ()
in {types; storage_decl; declarations; orig = ast}
(* let s_token region lexeme = *)
(* printf "%s: %s\n"(compact region) lexeme *)
(* and s_var {region; value=lexeme} = *)
(* printf "%s: Ident \"%s\"\n" (compact region) lexeme *)
(* and s_constr {region; value=lexeme} = *)
(* printf "%s: Constr \"%s\"\n" *)
(* (compact region) lexeme *)
(* and s_string {region; value=lexeme} = *)
(* printf "%s: String \"%s\"\n" *)
(* (compact region) lexeme *)
(* and s_bytes {region; value = lexeme, abstract} = *)
(* printf "%s: Bytes (\"%s\", \"0x%s\")\n" *)
(* (compact region) lexeme *)
(* (MBytes.to_hex abstract |> Hex.to_string) *)
(* and s_int {region; value = lexeme, abstract} = *)
(* printf "%s: Int (\"%s\", %s)\n" *)
(* (compact region) lexeme *)
(* (Z.to_string abstract) *)
(* and s_parameters {value=node; _} = *)
(* let lpar, sequence, rpar = node in *)
(* s_token lpar "("; *)
(* s_nsepseq ";" s_param_decl sequence; *)
(* s_token rpar ")" *)
(* and s_param_decl = function *)
(* ParamConst param_const -> s_param_const param_const *)
(* | ParamVar param_var -> s_param_var param_var *)
(* and s_region_cases {value=sequence; _} = *)
(* s_nsepseq "|" s_case sequence *)
(* and s_expr = function *)
(* Or {value = expr1, bool_or, expr2; _} -> *)
(* s_expr expr1; s_token bool_or "||"; s_expr expr2 *)
(* | And {value = expr1, bool_and, expr2; _} -> *)
(* s_expr expr1; s_token bool_and "&&"; s_expr expr2 *)
(* | Lt {value = expr1, lt, expr2; _} -> *)
(* s_expr expr1; s_token lt "<"; s_expr expr2 *)
(* | Leq {value = expr1, leq, expr2; _} -> *)
(* s_expr expr1; s_token leq "<="; s_expr expr2 *)
(* | Gt {value = expr1, gt, expr2; _} -> *)
(* s_expr expr1; s_token gt ">"; s_expr expr2 *)
(* | Geq {value = expr1, geq, expr2; _} -> *)
(* s_expr expr1; s_token geq ">="; s_expr expr2 *)
(* | Equal {value = expr1, equal, expr2; _} -> *)
(* s_expr expr1; s_token equal "="; s_expr expr2 *)
(* | Neq {value = expr1, neq, expr2; _} -> *)
(* s_expr expr1; s_token neq "=/="; s_expr expr2 *)
(* | Cat {value = expr1, cat, expr2; _} -> *)
(* s_expr expr1; s_token cat "^"; s_expr expr2 *)
(* | Cons {value = expr1, cons, expr2; _} -> *)
(* s_expr expr1; s_token cons "<:"; s_expr expr2 *)
(* | Add {value = expr1, add, expr2; _} -> *)
(* s_expr expr1; s_token add "+"; s_expr expr2 *)
(* | Sub {value = expr1, sub, expr2; _} -> *)
(* s_expr expr1; s_token sub "-"; s_expr expr2 *)
(* | Mult {value = expr1, mult, expr2; _} -> *)
(* s_expr expr1; s_token mult "*"; s_expr expr2 *)
(* | Div {value = expr1, div, expr2; _} -> *)
(* s_expr expr1; s_token div "/"; s_expr expr2 *)
(* | Mod {value = expr1, kwd_mod, expr2; _} -> *)
(* s_expr expr1; s_token kwd_mod "mod"; s_expr expr2 *)
(* | Neg {value = minus, expr; _} -> *)
(* s_token minus "-"; s_expr expr *)
(* | Not {value = kwd_not, expr; _} -> *)
(* s_token kwd_not "not"; s_expr expr *)
(* | Int i -> s_int i *)
(* | Var var -> s_var var *)
(* | String s -> s_string s *)
(* | Bytes b -> s_bytes b *)
(* | False region -> s_token region "False" *)
(* | True region -> s_token region "True" *)
(* | Unit region -> s_token region "Unit" *)
(* | Tuple tuple -> s_tuple tuple *)
(* | List list -> s_list list *)
(* | EmptyList elist -> s_empty_list elist *)
(* | Set set -> s_set set *)
(* | EmptySet eset -> s_empty_set eset *)
(* | NoneExpr nexpr -> s_none_expr nexpr *)
(* | FunCall fun_call -> s_fun_call fun_call *)
(* | ConstrApp capp -> s_constr_app capp *)
(* | SomeApp sapp -> s_some_app sapp *)
(* | MapLookUp lookup -> s_map_lookup lookup *)
(* | ParExpr pexpr -> s_par_expr pexpr *)
(* and s_list {value=node; _} = *)
(* let lbra, sequence, rbra = node in *)
(* s_token lbra "["; *)
(* s_nsepseq "," s_expr sequence; *)
(* s_token rbra "]" *)
(* and s_empty_list {value=node; _} = *)
(* let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in *)
(* s_token lpar "("; *)
(* s_token lbracket "["; *)
(* s_token rbracket "]"; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr; *)
(* s_token rpar ")" *)
(* and s_set {value=node; _} = *)
(* let lbrace, sequence, rbrace = node in *)
(* s_token lbrace "{"; *)
(* s_nsepseq "," s_expr sequence; *)
(* s_token rbrace "}" *)
(* and s_empty_set {value=node; _} = *)
(* let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in *)
(* s_token lpar "("; *)
(* s_token lbrace "{"; *)
(* s_token rbrace "}"; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr; *)
(* s_token rpar ")" *)
(* and s_none_expr {value=node; _} = *)
(* let lpar, (c_None, colon, type_expr), rpar = node in *)
(* s_token lpar "("; *)
(* s_token c_None "None"; *)
(* s_token colon ":"; *)
(* s_type_expr type_expr; *)
(* s_token rpar ")" *)
(* and s_constr_app {value=node; _} = *)
(* let constr, arguments = node in *)
(* s_constr constr; *)
(* s_tuple arguments *)
(* and s_some_app {value=node; _} = *)
(* let c_Some, arguments = node in *)
(* s_token c_Some "Some"; *)
(* s_tuple arguments *)
(* and s_par_expr {value=node; _} = *)
(* let lpar, expr, rpar = node in *)
(* s_token lpar "("; *)
(* s_expr expr; *)
(* s_token rpar ")" *)
(* and s_psome {value=node; _} = *)
(* let c_Some, patterns = node in *)
(* s_token c_Some "Some"; *)
(* s_patterns patterns *)
(* and s_terminator = function *)
(* Some semi -> s_token semi ";" *)
(* | None -> () *)

View File

@ -0,0 +1,3 @@
type t = ..
type error = t

View File

@ -0,0 +1,161 @@
(* Parsing the command-line option for testing the LIGO lexer and
parser *)
let printf = Printf.printf
let sprintf = Printf.sprintf
let abort msg =
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
(* Help *)
let help () =
let file = Filename.basename Sys.argv.(0) in
printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file;
print_endline "where <input>.ligo is the LIGO source file (default: stdin),";
print_endline "and each <option> (if any) is one of the following:";
print_endline " -I <paths> Library paths (colon-separated)";
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)";
print_endline " -t, --tokens Print tokens (lexer)";
print_endline " -u, --units Print tokens and markup (lexer)";
print_endline " -q, --quiet No output, except errors (default)";
print_endline " --columns Columns for source locations";
print_endline " --bytes Bytes for source locations";
print_endline " --verbose=<stages> cmdline, cpp, ast (colon-separated)";
print_endline " --version Commit hash on stdout";
print_endline " -h, --help This help";
exit 0
(* Version *)
let version () = printf "%s\n" Version.version; exit 0
(* Specifying the command-line options a la GNU *)
let copy = ref false
and tokens = ref false
and units = ref false
and quiet = ref false
and columns = ref false
and bytes = ref false
and verbose = ref Utils.String.Set.empty
and input = ref None
and libs = ref []
let split_at_colon = Str.(split (regexp ":"))
let add_path p = libs := !libs @ split_at_colon p
let add_verbose d =
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
!verbose
(split_at_colon d)
let specs =
let open! Getopt in [
'I', nolong, None, Some add_path;
'c', "copy", set copy true, None;
't', "tokens", set tokens true, None;
'u', "units", set units true, None;
'q', "quiet", set quiet true, None;
noshort, "columns", set columns true, None;
noshort, "bytes", set bytes true, None;
noshort, "verbose", None, Some add_verbose;
'h', "help", Some help, None;
noshort, "version", Some version, None
]
;;
(* Handler of anonymous arguments *)
let anonymous arg =
match !input with
None -> input := Some arg
| Some _ -> abort (sprintf "Multiple inputs")
;;
(* Parsing the command-line options *)
try Getopt.parse_cmdline specs anonymous with
Getopt.Error msg -> abort msg
;;
(* Checking options and exporting them as non-mutable values *)
type command = Quiet | Copy | Units | Tokens
let cmd =
match !quiet, !copy, !units, !tokens with
false, false, false, false
| true, false, false, false -> Quiet
| false, true, false, false -> Copy
| false, false, true, false -> Units
| false, false, false, true -> Tokens
| _ -> abort "Choose one of -q, -c, -u, -t."
let string_of convert = function
None -> "None"
| Some s -> sprintf "Some %s" (convert s)
let string_of_path p =
let apply s a = if a = "" then s else s ^ ":" ^ a
in List.fold_right apply p ""
let quote s = sprintf "\"%s\"" s
let verbose_str =
let apply e a =
if a <> "" then sprintf "%s, %s" e a else e
in Utils.String.Set.fold apply !verbose ""
let print_opt () =
printf "COMMAND LINE\n";
printf "copy = %b\n" !copy;
printf "tokens = %b\n" !tokens;
printf "units = %b\n" !units;
printf "quiet = %b\n" !quiet;
printf "columns = %b\n" !columns;
printf "bytes = %b\n" !bytes;
printf "verbose = \"%s\"\n" verbose_str;
printf "input = %s\n" (string_of quote !input);
printf "libs = %s\n" (string_of_path !libs)
;;
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
let input =
match !input with
None | Some "-" -> !input
| Some file_path ->
if Filename.check_suffix file_path ".ligo"
then if Sys.file_exists file_path
then Some file_path
else abort "Source file not found."
else abort "Source file lacks the extension .ligo."
(* Exporting remaining options as non-mutable values *)
let copy = !copy
and tokens = !tokens
and units = !units
and quiet = !quiet
and offsets = not !columns
and mode = if !bytes then `Byte else `Point
and verbose = !verbose
and libs = !libs
;;
if Utils.String.Set.mem "cmdline" verbose then
begin
printf "\nEXPORTED COMMAND LINE\n";
printf "copy = %b\n" copy;
printf "tokens = %b\n" tokens;
printf "units = %b\n" units;
printf "quiet = %b\n" quiet;
printf "offsets = %b\n" offsets;
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
printf "verbose = \"%s\"\n" verbose_str;
printf "input = %s\n" (string_of quote input);
printf "I = %s\n" (string_of_path libs)
end
;;

View File

@ -0,0 +1,46 @@
(* Parsing the command-line option for testing the LIGO lexer and
parser *)
(* If the value [offsets] is [true], then the user requested that
messages about source positions and regions be expressed in terms
of horizontal offsets. *)
val offsets : bool
(* If the value [mode] is [`Byte], then the unit in which source
positions and regions are expressed in messages is the byte. If
[`Point], the unit is unicode points. *)
val mode : [`Byte | `Point]
(* If the option [verbose] is set to a list of predefined stages of
the compiler chain, then more information may be displayed about
those stages. *)
val verbose : Utils.String.Set.t
(* If the value [input] is [None] or [Some "-"], the input is standard
input. If [Some f], then the input is the file whose name (file
path) is [f]. *)
val input : string option
(* Paths where to find LIGO files for inclusion *)
val libs : string list
(* If the value [cmd] is
* [Quiet], then no output from the lexer and parser should be
expected, safe error messages: this is the default value;
* [Copy], then lexemes of tokens and markup will be printed to
standard output, with the expectation of a perfect match with
the input file;
* [Units], then the tokens and markup will be printed to standard
output, that is, the abstract representation of the concrete
lexical syntax;
* [Tokens], then the tokens only will be printed.
*)
type command = Quiet | Copy | Units | Tokens
val cmd : command

View File

@ -0,0 +1,19 @@
(* Purely functional queues based on a pair of lists *)
type 'a t = {rear: 'a list; front: 'a list}
let empty = {rear=[]; front=[]}
let enq x q = {q with rear = x::q.rear}
let rec deq = function
{rear=[]; front= []} -> None
| {rear; front= []} -> deq {rear=[]; front = List.rev rear}
| {rear; front=x::f} -> Some ({rear; front=f}, x)
let rec peek = function
{rear=[]; front= []} -> None
| {rear; front= []} -> peek {rear=[]; front = List.rev rear}
| {rear=_; front=x::_} as q -> Some (q,x)
let is_empty q = (q = empty)

View File

@ -0,0 +1,17 @@
(* Purely functional queues *)
type 'a t
val empty : 'a t
val enq : 'a -> 'a t -> 'a t
val deq : 'a t -> ('a t * 'a) option
val is_empty : 'a t -> bool
(* The call [peek q] is [None] if the queue [q] is empty, and,
otherwise, is a pair made of a queue and the next item in it to be
dequeued. The returned queue contains the same items as [q], in the
same order, but more efficient, in general, to use in further
calls. *)
val peek : 'a t -> ('a t * 'a) option

View File

@ -0,0 +1,158 @@
(* This signature defines the lexical tokens for LIGO
_Tokens_ are the abstract units which are used by the parser to
build the abstract syntax tree (AST), in other words, the stream of
tokens is the minimal model of the input program, carrying
implicitly all its structure in a linear encoding, and nothing
else, in particular, comments and whitespace are absent.
A _lexeme_ is a specific character string (concrete
representation) denoting a token (abstract representation). Tokens
can be thought of as sets, and lexemes as elements of those sets --
there is often an infinite number of lexemes, but a small number of
tokens. (Think of identifiers as lexemes and one token.)
The tokens are qualified here as being "lexical" because the
parser generator Menhir expects to define them, in which context
they are called "parsing tokens", and they are made to match each
other. (This is an idiosyncratic terminology.)
The type of the lexical tokens is the variant [t], also
aliased to [token].
*)
type lexeme = string
(* TOKENS *)
type t =
(* Literals *)
String of lexeme Region.reg
| Bytes of (lexeme * Hex.t) Region.reg
| Int of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg
| Constr of lexeme Region.reg
(* Symbols *)
| SEMI of Region.t (* ";" *)
| COMMA of Region.t (* "," *)
| LPAR of Region.t (* "(" *)
| RPAR of Region.t (* ")" *)
| LBRACE of Region.t (* "{" *)
| RBRACE of Region.t (* "}" *)
| LBRACKET of Region.t (* "[" *)
| RBRACKET of Region.t (* "]" *)
| CONS of Region.t (* "#" *)
| VBAR of Region.t (* "|" *)
| ARROW of Region.t (* "->" *)
| ASS of Region.t (* ":=" *)
| EQUAL of Region.t (* "=" *)
| COLON of Region.t (* ":" *)
| LT of Region.t (* "<" *)
| LEQ of Region.t (* "<=" *)
| GT of Region.t (* ">" *)
| GEQ of Region.t (* ">=" *)
| NEQ of Region.t (* "=/=" *)
| PLUS of Region.t (* "+" *)
| MINUS of Region.t (* "-" *)
| SLASH of Region.t (* "/" *)
| TIMES of Region.t (* "*" *)
| DOT of Region.t (* "." *)
| WILD of Region.t (* "_" *)
| CAT of Region.t (* "^" *)
(* Keywords *)
| And of Region.t (* "and" *)
| Begin of Region.t (* "begin" *)
| Block of Region.t (* "block" *)
| Case of Region.t (* "case" *)
| Const of Region.t (* "const" *)
| Contains of Region.t (* "contains" *)
| Down of Region.t (* "down" *)
| Else of Region.t (* "else" *)
| End of Region.t (* "end" *)
| Entrypoint of Region.t (* "entrypoint" *)
| Fail of Region.t (* "fail" *)
| For of Region.t (* "for" *)
| From of Region.t (* "from" *)
| Function of Region.t (* "function" *)
| If of Region.t (* "if" *)
| In of Region.t (* "in" *)
| Is of Region.t (* "is" *)
| List of Region.t (* "list" *)
| Map of Region.t (* "map" *)
| Mod of Region.t (* "mod" *)
| Nil of Region.t (* "nil" *)
| Not of Region.t (* "not" *)
| Of of Region.t (* "of" *)
| Or of Region.t (* "or" *)
| Patch of Region.t (* "patch" *)
| Procedure of Region.t (* "procedure" *)
| Record of Region.t (* "record" *)
| Remove of Region.t (* "remove" *)
| Set of Region.t (* "set" *)
| Skip of Region.t (* "skip" *)
| Step of Region.t (* "step" *)
| Storage of Region.t (* "storage" *)
| Then of Region.t (* "then" *)
| To of Region.t (* "to" *)
| Type of Region.t (* "type" *)
| Var of Region.t (* "var" *)
| While of Region.t (* "while" *)
| With of Region.t (* "with" *)
(* Data constructors *)
| C_False of Region.t (* "False" *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
| C_True of Region.t (* "True" *)
| C_Unit of Region.t (* "Unit" *)
(* Virtual tokens *)
| EOF of Region.t
type token = t
(* Projections
The difference between extracting the lexeme and a string from a
token is that the latter is the textual representation of the OCaml
value denoting the token (its abstract syntax), rather than its
lexeme (concrete syntax).
*)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
(* Injections *)
type int_err =
Non_canonical_zero
type ident_err = Reserved_name
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool

View File

@ -0,0 +1,659 @@
(* Lexer specification for LIGO, to be processed by [ocamllex] *)
{
(* START HEADER *)
(* Shorthands *)
type lexeme = string
let sprintf = Printf.sprintf
module SMap = Utils.String.Map
module SSet = Utils.String.Set
(* Hack to roll back one lexeme in the current semantic action *)
(*
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
*)
(* TOKENS *)
type t =
(* Literals *)
String of lexeme Region.reg
| Bytes of (lexeme * Hex.t) Region.reg
| Int of (lexeme * Z.t) Region.reg
| Ident of lexeme Region.reg
| Constr of lexeme Region.reg
(* Symbols *)
| SEMI of Region.t
| COMMA of Region.t
| LPAR of Region.t
| RPAR of Region.t
| LBRACE of Region.t
| RBRACE of Region.t
| LBRACKET of Region.t
| RBRACKET of Region.t
| CONS of Region.t
| VBAR of Region.t
| ARROW of Region.t
| ASS of Region.t
| EQUAL of Region.t
| COLON of Region.t
| LT of Region.t
| LEQ of Region.t
| GT of Region.t
| GEQ of Region.t
| NEQ of Region.t
| PLUS of Region.t
| MINUS of Region.t
| SLASH of Region.t
| TIMES of Region.t
| DOT of Region.t
| WILD of Region.t
| CAT of Region.t
(* Keywords *)
| And of Region.t (* "and" *)
| Begin of Region.t (* "begin" *)
| Block of Region.t (* "block" *)
| Case of Region.t (* "case" *)
| Const of Region.t (* "const" *)
| Contains of Region.t (* "contains" *)
| Down of Region.t (* "down" *)
| Else of Region.t (* "else" *)
| End of Region.t (* "end" *)
| Entrypoint of Region.t (* "entrypoint" *)
| Fail of Region.t (* "fail" *)
| For of Region.t (* "for" *)
| From of Region.t (* "from" *)
| Function of Region.t (* "function" *)
| If of Region.t (* "if" *)
| In of Region.t (* "in" *)
| Is of Region.t (* "is" *)
| List of Region.t (* "list" *)
| Map of Region.t (* "map" *)
| Mod of Region.t (* "mod" *)
| Nil of Region.t (* "nil" *)
| Not of Region.t (* "not" *)
| Of of Region.t (* "of" *)
| Or of Region.t (* "or" *)
| Patch of Region.t (* "patch" *)
| Procedure of Region.t (* "procedure" *)
| Record of Region.t (* "record" *)
| Remove of Region.t (* "remove" *)
| Set of Region.t (* "set" *)
| Skip of Region.t (* "skip" *)
| Step of Region.t (* "step" *)
| Storage of Region.t (* "storage" *)
| Then of Region.t (* "then" *)
| To of Region.t (* "to" *)
| Type of Region.t (* "type" *)
| Var of Region.t (* "var" *)
| While of Region.t (* "while" *)
| With of Region.t (* "with" *)
(* Types *)
(*
| T_address of Region.t (* "address" *)
| T_big_map of Region.t (* "big_map" *)
| T_bool of Region.t (* "bool" *)
| T_bytes of Region.t (* "bytes" *)
| T_contract of Region.t (* "contract" *)
| T_int of Region.t (* "int" *)
| T_key of Region.t (* "key" *)
| T_key_hash of Region.t (* "key_hash" *)
| T_list of Region.t (* "list" *)
| T_map of Region.t (* "map" *)
| T_mutez of Region.t (* "mutez" *)
| T_nat of Region.t (* "nat" *)
| T_operation of Region.t (* "operation" *)
| T_option of Region.t (* "option" *)
| T_set of Region.t (* "set" *)
| T_signature of Region.t (* "signature" *)
| T_string of Region.t (* "string" *)
| T_timestamp of Region.t (* "timestamp" *)
| T_unit of Region.t (* "unit" *)
*)
(* Data constructors *)
| C_False of Region.t (* "False" *)
| C_None of Region.t (* "None" *)
| C_Some of Region.t (* "Some" *)
| C_True of Region.t (* "True" *)
| C_Unit of Region.t (* "Unit" *)
(* Virtual tokens *)
| EOF of Region.t
type token = t
let proj_token = function
(* Literals *)
String Region.{region; value} ->
region, sprintf "String %s" value
| Bytes Region.{region; value = s,b} ->
region,
sprintf "Bytes (\"%s\", \"0x%s\")"
s (Hex.to_string b)
| Int Region.{region; value = s,n} ->
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
| Ident Region.{region; value} ->
region, sprintf "Ident \"%s\"" value
| Constr Region.{region; value} ->
region, sprintf "Constr \"%s\"" value
(* Symbols *)
| SEMI region -> region, "SEMI"
| COMMA region -> region, "COMMA"
| LPAR region -> region, "LPAR"
| RPAR region -> region, "RPAR"
| LBRACE region -> region, "LBRACE"
| RBRACE region -> region, "RBRACE"
| LBRACKET region -> region, "LBRACKET"
| RBRACKET region -> region, "RBRACKET"
| CONS region -> region, "CONS"
| VBAR region -> region, "VBAR"
| ARROW region -> region, "ARROW"
| ASS region -> region, "ASS"
| EQUAL region -> region, "EQUAL"
| COLON region -> region, "COLON"
| LT region -> region, "LT"
| LEQ region -> region, "LEQ"
| GT region -> region, "GT"
| GEQ region -> region, "GEQ"
| NEQ region -> region, "NEQ"
| PLUS region -> region, "PLUS"
| MINUS region -> region, "MINUS"
| SLASH region -> region, "SLASH"
| TIMES region -> region, "TIMES"
| DOT region -> region, "DOT"
| WILD region -> region, "WILD"
| CAT region -> region, "CAT"
(* Keywords *)
| And region -> region, "And"
| Begin region -> region, "Begin"
| Block region -> region, "Block"
| Case region -> region, "Case"
| Const region -> region, "Const"
| Contains region -> region, "Contains"
| Down region -> region, "Down"
| Else region -> region, "Else"
| End region -> region, "End"
| Entrypoint region -> region, "Entrypoint"
| Fail region -> region, "Fail"
| For region -> region, "For"
| From region -> region, "From"
| Function region -> region, "Function"
| If region -> region, "If"
| In region -> region, "In"
| Is region -> region, "Is"
| List region -> region, "List"
| Map region -> region, "Map"
| Mod region -> region, "Mod"
| Nil region -> region, "Nil"
| Not region -> region, "Not"
| Of region -> region, "Of"
| Or region -> region, "Or"
| Patch region -> region, "Patch"
| Procedure region -> region, "Procedure"
| Record region -> region, "Record"
| Remove region -> region, "Remove"
| Set region -> region, "Set"
| Skip region -> region, "Skip"
| Step region -> region, "Step"
| Storage region -> region, "Storage"
| Then region -> region, "Then"
| To region -> region, "To"
| Type region -> region, "Type"
| Var region -> region, "Var"
| While region -> region, "While"
| With region -> region, "With"
(* Data *)
| C_False region -> region, "C_False"
| C_None region -> region, "C_None"
| C_Some region -> region, "C_Some"
| C_True region -> region, "C_True"
| C_Unit region -> region, "C_Unit"
(* Virtual tokens *)
| EOF region -> region, "EOF"
let to_lexeme = function
(* Literals *)
String s -> s.Region.value
| Bytes b -> fst b.Region.value
| Int i -> fst i.Region.value
| Ident id
| Constr id -> id.Region.value
(* Symbols *)
| SEMI _ -> ";"
| COMMA _ -> ","
| LPAR _ -> "("
| RPAR _ -> ")"
| LBRACE _ -> "{"
| RBRACE _ -> "}"
| LBRACKET _ -> "["
| RBRACKET _ -> "]"
| CONS _ -> "#"
| VBAR _ -> "|"
| ARROW _ -> "->"
| ASS _ -> ":="
| EQUAL _ -> "="
| COLON _ -> ":"
| LT _ -> "<"
| LEQ _ -> "<="
| GT _ -> ">"
| GEQ _ -> ">="
| NEQ _ -> "=/="
| PLUS _ -> "+"
| MINUS _ -> "-"
| SLASH _ -> "/"
| TIMES _ -> "*"
| DOT _ -> "."
| WILD _ -> "_"
| CAT _ -> "^"
(* Keywords *)
| And _ -> "and"
| Begin _ -> "begin"
| Block _ -> "block"
| Case _ -> "case"
| Const _ -> "const"
| Contains _ -> "contains"
| Down _ -> "down"
| Else _ -> "else"
| End _ -> "end"
| Entrypoint _ -> "entrypoint"
| Fail _ -> "fail"
| For _ -> "for"
| From _ -> "from"
| Function _ -> "function"
| If _ -> "if"
| In _ -> "in"
| Is _ -> "is"
| List _ -> "list"
| Map _ -> "map"
| Mod _ -> "mod"
| Nil _ -> "nil"
| Not _ -> "not"
| Of _ -> "of"
| Or _ -> "or"
| Patch _ -> "patch"
| Procedure _ -> "procedure"
| Record _ -> "record"
| Remove _ -> "remove"
| Set _ -> "set"
| Skip _ -> "skip"
| Step _ -> "step"
| Storage _ -> "storage"
| Then _ -> "then"
| To _ -> "to"
| Type _ -> "type"
| Var _ -> "var"
| While _ -> "while"
| With _ -> "with"
(* Data constructors *)
| C_False _ -> "False"
| C_None _ -> "None"
| C_Some _ -> "Some"
| C_True _ -> "True"
| C_Unit _ -> "Unit"
(* Virtual tokens *)
| EOF _ -> ""
let to_string token ?(offsets=true) mode =
let region, val_str = proj_token token in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str
let to_region token = proj_token token |> fst
(* LEXIS *)
let keywords = [
(fun reg -> And reg);
(fun reg -> Begin reg);
(fun reg -> Block reg);
(fun reg -> Case reg);
(fun reg -> Const reg);
(fun reg -> Contains reg);
(fun reg -> Down reg);
(fun reg -> Else reg);
(fun reg -> End reg);
(fun reg -> Entrypoint reg);
(fun reg -> For reg);
(fun reg -> From reg);
(fun reg -> Function reg);
(fun reg -> Fail reg);
(fun reg -> If reg);
(fun reg -> In reg);
(fun reg -> Is reg);
(fun reg -> List reg);
(fun reg -> Map reg);
(fun reg -> Mod reg);
(fun reg -> Nil reg);
(fun reg -> Not reg);
(fun reg -> Of reg);
(fun reg -> Or reg);
(fun reg -> Patch reg);
(fun reg -> Procedure reg);
(fun reg -> Record reg);
(fun reg -> Remove reg);
(fun reg -> Set reg);
(fun reg -> Skip reg);
(fun reg -> Step reg);
(fun reg -> Storage reg);
(fun reg -> Then reg);
(fun reg -> To reg);
(fun reg -> Type reg);
(fun reg -> Var reg);
(fun reg -> While reg);
(fun reg -> With reg)
]
let reserved =
let open SSet in
empty |> add "as"
|> add "asr"
|> add "assert"
|> add "class"
|> add "constraint"
|> add "do"
|> add "done"
|> add "downto"
|> add "exception"
|> add "external"
|> add "false"
|> add "fun"
|> add "functor"
|> add "include"
|> add "inherit"
|> add "initializer"
|> add "land"
|> add "lazy"
|> add "let"
|> add "lor"
|> add "lsl"
|> add "lsr"
|> add "lxor"
|> add "method"
|> add "module"
|> add "mutable"
|> add "new"
|> add "nonrec"
|> add "object"
|> add "open"
|> add "private"
|> add "rec"
|> add "sig"
|> add "struct"
|> add "true"
|> add "try"
|> add "val"
|> add "virtual"
|> add "when"
let constructors = [
(fun reg -> C_False reg);
(fun reg -> C_None reg);
(fun reg -> C_Some reg);
(fun reg -> C_True reg);
(fun reg -> C_Unit reg)
]
let add map (key, value) = SMap.add key value map
let mk_map mk_key list =
let apply map value = add map (mk_key value, value)
in List.fold_left apply SMap.empty list
type lexis = {
kwd : (Region.t -> token) SMap.t;
cstr : (Region.t -> token) SMap.t;
res : SSet.t
}
let lexicon : lexis =
let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
in {kwd = build keywords;
cstr = build constructors;
res = reserved}
(* Identifiers *)
type ident_err = Reserved_name
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let digit = ['0'-'9']
let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)*
(* Rules *)
rule scan_ident region lexicon = parse
(ident as value) eof {
if SSet.mem value lexicon.res
then Error Reserved_name
else Ok (match SMap.find_opt value lexicon.kwd with
Some mk_kwd -> mk_kwd region
| None -> Ident Region.{region; value}) }
and scan_constr region lexicon = parse
(constr as value) eof {
match SMap.find_opt value lexicon.cstr with
Some mk_cstr -> mk_cstr region
| None -> Constr Region.{region; value} }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Smart constructors (injections) *)
let mk_string lexeme region = String Region.{region; value=lexeme}
let mk_bytes lexeme region =
let norm = Str.(global_replace (regexp "_") "" lexeme) in
let value = lexeme, Hex.of_string norm
in Bytes Region.{region; value}
type int_err = Non_canonical_zero
let mk_int lexeme region =
let z = Str.(global_replace (regexp "_") "" lexeme)
|> Z.of_string in
if Z.equal z Z.zero && lexeme <> "0"
then Error Non_canonical_zero
else Ok (Int Region.{region; value = lexeme, z})
let eof region = EOF region
let mk_sym lexeme region =
match lexeme with
";" -> SEMI region
| "," -> COMMA region
| "(" -> LPAR region
| ")" -> RPAR region
| "{" -> LBRACE region
| "}" -> RBRACE region
| "[" -> LBRACKET region
| "]" -> RBRACKET region
| "#" -> CONS region
| "|" -> VBAR region
| "->" -> ARROW region
| ":=" -> ASS region
| "=" -> EQUAL region
| ":" -> COLON region
| "<" -> LT region
| "<=" -> LEQ region
| ">" -> GT region
| ">=" -> GEQ region
| "=/=" -> NEQ region
| "+" -> PLUS region
| "-" -> MINUS region
| "/" -> SLASH region
| "*" -> TIMES region
| "." -> DOT region
| "_" -> WILD region
| "^" -> CAT region
| _ -> assert false
(* Identifiers *)
let mk_ident' lexeme region lexicon =
Lexing.from_string lexeme |> scan_ident region lexicon
let mk_ident lexeme region = mk_ident' lexeme region lexicon
(* Constructors *)
let mk_constr' lexeme region lexicon =
Lexing.from_string lexeme |> scan_constr region lexicon
let mk_constr lexeme region = mk_constr' lexeme region lexicon
(* Predicates *)
let is_string = function
String _ -> true
| _ -> false
let is_bytes = function
Bytes _ -> true
| _ -> false
let is_int = function
Int _ -> true
| _ -> false
let is_ident = function
Ident _ -> true
| _ -> false
let is_kwd = function
And _
| Begin _
| Block _
| Case _
| Const _
| Contains _
| Down _
| Else _
| End _
| Entrypoint _
| Fail _
| For _
| From _
| Function _
| If _
| In _
| Is _
| List _
| Map _
| Mod _
| Nil _
| Not _
| Of _
| Or _
| Patch _
| Procedure _
| Record _
| Remove _
| Set _
| Skip _
| Step _
| Storage _
| Then _
| To _
| Type _
| Var _
| While _
| With _ -> true
| _ -> false
let is_constr = function
Constr _
| C_False _
| C_None _
| C_Some _
| C_True _
| C_Unit _ -> true
| _ -> false
let is_sym = function
SEMI _
| COMMA _
| LPAR _
| RPAR _
| LBRACE _
| RBRACE _
| LBRACKET _
| RBRACKET _
| CONS _
| VBAR _
| ARROW _
| ASS _
| EQUAL _
| COLON _
| LT _
| LEQ _
| GT _
| GEQ _
| NEQ _
| PLUS _
| MINUS _
| SLASH _
| TIMES _
| DOT _
| WILD _
| CAT _ -> true
| _ -> false
let is_eof = function EOF _ -> true | _ -> false
(* END TRAILER *)
}

View File

@ -0,0 +1,153 @@
(* Lexer specification for LIGO, to be processed by [ocamllex].
The underlying design principles are:
(1) enforce stylistic constraints at a lexical level, in order to
early reject potentially misleading or poorly written
LIGO contracts;
(2) provide precise error messages with hint as how to fix the
issue, which is achieved by consulting the lexical
right-context of lexemes;
(3) be as independent as possible from the LIGO version, so
upgrades have as little impact as possible on this
specification: this is achieved by using the most general
regular expressions to match the lexing buffer and broadly
distinguish the syntactic categories, and then delegating a
finer, protocol-dependent, second analysis to an external
module making the tokens (hence a functor below);
(4) support unit testing (lexing of the whole input with debug
traces);
The limitation to the protocol independence lies in the errors that
the external module building the tokens (which is
protocol-dependent) may have to report. Indeed these errors have to
be contextualised by the lexer in terms of input source regions, so
useful error messages can be printed, therefore they are part of
the signature [TOKEN] that parameterise the functor generated
here. For instance, if, in a future release of LIGO, new tokens may
be added, and the recognition of their lexemes may entail new
errors, the signature [TOKEN] will have to be augmented and the
lexer specification changed. However, it is more likely that
instructions or types are added, instead of new kinds of tokens.
*)
type lexeme = string
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. This enables to externalise
version-dependent constraints in any module whose signature matches
[TOKEN]. Generic functions to construct tokens are required.
Note the predicate [is_eof], which caracterises the virtual token
for end-of-file, because it requires special handling. Some of
those functions may yield errors, which are defined as values of
the type [int_err] etc. These errors can be better understood by
reading the ocamllex specification for the lexer ([Lexer.mll]).
*)
module type TOKEN =
sig
type token
(* Errors *)
type int_err = Non_canonical_zero
type ident_err = Reserved_name
(* Injections *)
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. It mainly exports the function
[open_token_stream], which returns
* a function [read] that extracts tokens from a lexing buffer,
* together with a lexing buffer [buffer] to read from,
* a function [close] that closes that buffer,
* a function [get_pos] that returns the current position, and
* a function [get_last] that returns the region of the last
recognised token.
Note that a module [Token] is exported too, because the signature
of the exported functions depend on it.
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
parser generated by Menhir.
The argument labelled [log] is a logger. It may print a token and
its left markup to a given channel, at the caller's discretion.
*)
module type S =
sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
val output_token :
?offsets:bool -> [`Byte | `Point] ->
EvalOpt.command -> out_channel -> logger
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
val open_token_stream : file_path option -> instance
(* Error reporting *)
exception Error of Error.t Region.reg
val print_error : ?offsets:bool -> [`Byte | `Point] ->
Error.t Region.reg -> unit
(* Standalone tracer *)
val trace :
?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit
end
(* The functorised interface
Note that the module parameter [Token] is re-exported as a
submodule in [S].
*)
module Make (Token: TOKEN) : S with module Token = Token

View File

@ -0,0 +1,872 @@
(* Lexer specification for LIGO, to be processed by [ocamllex]. *)
{
(* START HEADER *)
type lexeme = string
(* STRING PROCESSING *)
(* The value of [mk_str len p] ("make string") is a string of length
[len] containing the [len] characters in the list [p], in reverse
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
let mk_str (len: int) (p: char list) : string =
let bytes = Bytes.make len ' ' in
let rec fill i = function
[] -> bytes
| char::l -> Bytes.set bytes i char; fill (i-1) l
in fill (len-1) p |> Bytes.to_string
(* The call [explode s a] is the list made by pushing the characters
in the string [s] on top of [a], in reverse order. For example,
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
let explode s acc =
let rec push = function
0 -> acc
| i -> s.[i-1] :: push (i-1)
in push (String.length s)
(* LEXER ENGINE *)
(* Resetting file name and line number in the lexing buffer
The call [reset ~file ~line buffer] modifies in-place the lexing
buffer [buffer] so the lexing engine records that the file
associated with [buffer] is named [file], and the current line is
[line]. This function is useful when lexing a file that has been
previously preprocessed by the C preprocessor, in which case the
argument [file] is the name of the file that was preprocessed,
_not_ the preprocessed file (of which the user is not normally
aware). By default, the [line] argument is [1].
*)
let reset_file ~file buffer =
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
let reset_line ~line buffer =
assert (line >= 0);
let open Lexing in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
let reset_offset ~offset buffer =
assert (offset >= 0);
Printf.printf "[reset] offset=%i\n" offset;
let open Lexing in
let bol = buffer.lex_curr_p.pos_bol in
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
let reset ?file ?line ?offset buffer =
let () =
match file with
Some file -> reset_file ~file buffer
| None -> () in
let () =
match line with
Some line -> reset_line ~line buffer
| None -> () in
match offset with
Some offset -> reset_offset ~offset buffer
| None -> ()
(* Rolling back one lexeme _within the current semantic action_ *)
let rollback buffer =
let open Lexing in
let len = String.length (lexeme buffer) in
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
(* ALIASES *)
let sprintf = Printf.sprintf
(* TOKENS *)
(* The signature [TOKEN] exports an abstract type [token], so a lexer
can be a functor over tokens. Consequently, generic functions to
construct tokens are provided. Note predicate [is_eof], which
caracterises the virtual token for end-of-file, because it requires
special handling. *)
module type TOKEN =
sig
type token
(* Errors *)
type int_err = Non_canonical_zero
type ident_err = Reserved_name
(* Injections *)
val mk_string : lexeme -> Region.t -> token
val mk_bytes : lexeme -> Region.t -> token
val mk_int : lexeme -> Region.t -> (token, int_err) result
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
val mk_constr : lexeme -> Region.t -> token
val mk_sym : lexeme -> Region.t -> token
val eof : Region.t -> token
(* Predicates *)
val is_string : token -> bool
val is_bytes : token -> bool
val is_int : token -> bool
val is_ident : token -> bool
val is_kwd : token -> bool
val is_constr : token -> bool
val is_sym : token -> bool
val is_eof : token -> bool
(* Projections *)
val to_lexeme : token -> lexeme
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
val to_region : token -> Region.t
end
(* The module type for lexers is [S]. *)
module type S = sig
module Token : TOKEN
type token = Token.token
type file_path = string
type logger = Markup.t list -> token -> unit
val output_token :
?offsets:bool -> [`Byte | `Point] ->
EvalOpt.command -> out_channel -> logger
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
val open_token_stream : file_path option -> instance
(* Error reporting *)
exception Error of Error.t Region.reg
val print_error :
?offsets:bool -> [`Byte | `Point] -> Error.t Region.reg -> unit
(* Standalone tracer *)
val trace :
?offsets:bool -> [`Byte | `Point] ->
file_path option -> EvalOpt.command -> unit
end
(* The functorised interface
Note that the module parameter [Token] is re-exported as a
submodule in [S].
*)
module Make (Token: TOKEN) : (S with module Token = Token) =
struct
module Token = Token
type token = Token.token
type file_path = string
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
(* When scanning structured constructs, like strings and comments,
we need to keep the region of the opening symbol (like double
quote, "//" or "(*") in order to report any error more
precisely. Since ocamllex is byte-oriented, we need to store
the parsed bytes as characters in an accumulator [acc] and
also its length [len], so, we are done, it is easy to build the
string making up the structured construct with [mk_str] (see
above).
The resulting data structure is called a _thread_.
(Note for Emacs: "*)".)
*)
type thread = {
opening : Region.t;
len : int;
acc : char list
}
let push_char char {opening; len; acc} = {opening; len=len+1; acc=char::acc}
let push_string str {opening; len; acc} =
{opening;
len = len + String.length str;
acc = explode str acc}
(* STATE *)
(* Beyond tokens, the result of lexing is a state. The type
[state] represents the logical state of the lexing engine, that
is, a value which is threaded during scanning and which denotes
useful, high-level information beyond what the type
[Lexing.lexbuf] in the standard library already provides for
all generic lexers.
Tokens are the smallest units used by the parser to build the
abstract syntax tree. The state includes a queue of recognised
tokens, with the markup at the left of its lexeme until either
the start of the file or the end of the previously recognised
token.
The markup from the last recognised token or, if the first
token has not been recognised yet, from the beginning of the
file is stored in the field [markup] of the state because it is
a side-effect, with respect to the output token list, and we
use a record with a single field [units] because that record
may be easily extended during the future maintenance of this
lexer.
The state also includes a field [pos] which holds the current
position in the LIGO source file. The position is not always
updated after a single character has been matched: that depends
on the regular expression that matched the lexing buffer.
The fields [decoder] and [supply] offer the support needed
for the lexing of UTF-8 encoded characters in comments (the
only place where they are allowed in LIGO). The former is the
decoder proper and the latter is the effectful function
[supply] that takes a byte, a start index and a length and feed
it to [decoder]. See the documentation of the third-party
library Uutf.
*)
type state = {
units : (Markup.t list * token) FQueue.t;
markup : Markup.t list;
last : Region.t;
pos : Pos.t;
decoder : Uutf.decoder;
supply : Bytes.t -> int -> int -> unit
}
(* The call [enqueue (token, state)] updates functionally the
state [state] by associating the token [token] with the stored
markup and enqueuing the pair into the units queue. The field
[markup] is then reset to the empty list. *)
let enqueue (token, state) = {
state with
units = FQueue.enq (state.markup, token) state.units;
markup = []
}
(* The call [sync state buffer] updates the current position in
accordance with the contents of the lexing buffer, more
precisely, depending on the length of the string which has just
been recognised by the scanner: that length is used as a
positive offset to the current column. *)
let sync state buffer =
let lex = Lexing.lexeme buffer in
let len = String.length lex in
let start = state.pos in
let stop = start#shift_bytes len in
let state = {state with pos = stop}
in Region.make ~start ~stop, lex, state
(* MARKUP *)
(* Committing markup to the current logical state *)
let push_newline state buffer =
let value = Lexing.lexeme buffer
and () = Lexing.new_line buffer
and start = state.pos in
let stop = start#new_line value in
let state = {state with pos = stop}
and region = Region.make ~start ~stop in
let unit = Markup.Newline Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_line (thread, state) =
let start = thread.opening#start in
let region = Region.make ~start ~stop:state.pos
and value = mk_str thread.len thread.acc in
let unit = Markup.LineCom Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_block (thread, state) =
let start = thread.opening#start in
let region = Region.make ~start ~stop:state.pos
and value = mk_str thread.len thread.acc in
let unit = Markup.BlockCom Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_space state buffer =
let region, lex, state = sync state buffer in
let value = String.length lex in
let unit = Markup.Space Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_tabs state buffer =
let region, lex, state = sync state buffer in
let value = String.length lex in
let unit = Markup.Tabs Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
let push_bom state buffer =
let region, value, state = sync state buffer in
let unit = Markup.BOM Region.{region; value} in
let markup = unit :: state.markup
in {state with markup}
(* ERRORS *)
type Error.t += Invalid_utf8_sequence
type Error.t += Unexpected_character of char
type Error.t += Undefined_escape_sequence
type Error.t += Missing_break
type Error.t += Unterminated_string
type Error.t += Unterminated_integer
type Error.t += Odd_lengthed_bytes
type Error.t += Unterminated_comment
type Error.t += Orphan_minus
type Error.t += Non_canonical_zero
type Error.t += Negative_byte_sequence
type Error.t += Broken_string
type Error.t += Invalid_character_in_string
type Error.t += Reserved_name
let error_to_string = function
Invalid_utf8_sequence ->
"Invalid UTF-8 sequence.\n"
| Unexpected_character c ->
sprintf "Unexpected character '%s'.\n" (Char.escaped c)
| Undefined_escape_sequence ->
"Undefined escape sequence.\n\
Hint: Remove or replace the sequence.\n"
| Missing_break ->
"Missing break.\n\
Hint: Insert some space.\n"
| Unterminated_string ->
"Unterminated string.\n\
Hint: Close with double quotes.\n"
| Unterminated_integer ->
"Unterminated integer.\n\
Hint: Remove the sign or proceed with a natural number.\n"
| Odd_lengthed_bytes ->
"The length of the byte sequence is an odd number.\n\
Hint: Add or remove a digit.\n"
| Unterminated_comment ->
"Unterminated comment.\n\
Hint: Close with \"*)\".\n"
| Orphan_minus ->
"Orphan minus sign.\n\
Hint: Remove the trailing space.\n"
| Non_canonical_zero ->
"Non-canonical zero.\n\
Hint: Use 0.\n"
| Negative_byte_sequence ->
"Negative byte sequence.\n\
Hint: Remove the leading minus sign.\n"
| Broken_string ->
"The string starting here is interrupted by a line break.\n\
Hint: Remove the break or close the string before.\n"
| Invalid_character_in_string ->
"Invalid character in string.\n\
Hint: Remove or replace the character.\n"
| Reserved_name ->
"Reserved named.\n\
Hint: Change the name.\n"
| _ -> assert false
exception Error of Error.t Region.reg
let fail region value = raise (Error Region.{region; value})
(* TOKENS *)
(* Making tokens *)
let mk_string (thread, state) =
let start = thread.opening#start in
let stop = state.pos in
let region = Region.make ~start ~stop in
let lexeme = mk_str thread.len thread.acc in
let token = Token.mk_string lexeme region
in token, state
let mk_bytes bytes state buffer =
let region, _, state = sync state buffer in
let token = Token.mk_bytes bytes region
in token, state
let mk_int state buffer =
let region, lexeme, state = sync state buffer in
match Token.mk_int lexeme region with
Ok token -> token, state
| Error Token.Non_canonical_zero ->
fail region Non_canonical_zero
let mk_ident state buffer =
let region, lexeme, state = sync state buffer in
match Token.mk_ident lexeme region with
Ok token -> token, state
| Error Token.Reserved_name -> fail region Reserved_name
let mk_constr state buffer =
let region, lexeme, state = sync state buffer
in Token.mk_constr lexeme region, state
let mk_sym state buffer =
let region, lexeme, state = sync state buffer
in Token.mk_sym lexeme region, state
let mk_eof state buffer =
let region, _, state = sync state buffer
in Token.eof region, state
(* END HEADER *)
}
(* START LEXER DEFINITION *)
(* Named regular expressions *)
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
let nl = ['\n' '\r'] | "\r\n"
let blank = ' ' | '\t'
let digit = ['0'-'9']
let natural = digit | digit (digit | '_')* digit
let integer = '-'? natural
let small = ['a'-'z']
let capital = ['A'-'Z']
let letter = small | capital
let ident = small (letter | '_' | digit)*
let constr = capital (letter | '_' | digit)*
let hexa_digit = digit | ['A'-'F']
let byte = hexa_digit hexa_digit
let byte_seq = byte | byte (byte | '_')* byte
let bytes = "0x" (byte_seq? as seq)
let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
| "\\r" | "\\t" | "\\x" byte
let symbol = ';' | ',' | '(' | ')'| '[' | ']' | '{' | '}'
| '#' | '|' | "->" | ":=" | '=' | ':'
| '<' | "<=" | '>' | ">=" | "=/="
| '+' | '-' | '*' | '.' | '_' | '^'
let string = [^'"' '\\' '\n']* (* For strings of #include *)
(* RULES *)
(* Except for the first rule [init], all rules bear a name starting
with "scan".
All have a parameter [state] that they thread through their
recursive calls. The rules for the structured constructs (strings
and comments) have an extra parameter of type [thread] (see above).
*)
rule init state = parse
utf8_bom { scan (push_bom state lexbuf) lexbuf }
| _ { rollback lexbuf; scan state lexbuf }
and scan state = parse
nl { scan (push_newline state lexbuf) lexbuf }
| ' '+ { scan (push_space state lexbuf) lexbuf }
| '\t'+ { scan (push_tabs state lexbuf) lexbuf }
| ident { mk_ident state lexbuf |> enqueue }
| constr { mk_constr state lexbuf |> enqueue }
| bytes { (mk_bytes seq) state lexbuf |> enqueue }
| integer { mk_int state lexbuf |> enqueue }
| symbol { mk_sym state lexbuf |> enqueue }
| eof { mk_eof state lexbuf |> enqueue }
| '"' { let opening, _, state = sync state lexbuf in
let thread = {opening; len=1; acc=['"']} in
scan_string thread state lexbuf |> mk_string |> enqueue }
| "(*" { let opening, _, state = sync state lexbuf in
let thread = {opening; len=2; acc=['*';'(']} in
let state = scan_block thread state lexbuf |> push_block
in scan state lexbuf }
| "//" { let opening, _, state = sync state lexbuf in
let thread = {opening; len=2; acc=['/';'/']} in
let state = scan_line thread state lexbuf |> push_line
in scan state lexbuf }
(* Management of #include CPP directives
An input LIGO program may contain GNU CPP (C preprocessor)
directives, and the entry modules (named *Main.ml) run CPP on them
in traditional mode:
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
The main interest in using CPP is that it can stand for a poor
man's (flat) module system for LIGO thanks to #include
directives, and the traditional mode leaves the markup mostly
undisturbed.
Some of the #line resulting from processing #include directives
deal with system file headers and thus have to be ignored for our
purpose. Moreover, these #line directives may also carry some
additional flags:
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
of which 1 and 2 indicate, respectively, the start of a new file
and the return from a file (after its inclusion has been
processed).
*)
| '#' blank* ("line" blank+)? (integer as line) blank+
'"' (string as file) '"' {
let _, _, state = sync state lexbuf in
let flags, state = scan_flags state [] lexbuf in
let () = ignore flags in
let line = int_of_string line
and file = Filename.basename file in
let pos = state.pos#set ~file ~line ~offset:0 in
let state = {state with pos} in
scan state lexbuf
}
(* Some special errors
Some special errors are recognised in the semantic actions of the
following regular expressions. The first error is a minus sign
separated from the integer it modifies by some markup (space or
tabs). The second is a minus sign immediately followed by
anything else than a natural number (matched above) or markup and
a number (previous error). The third is the strange occurrence of
an attempt at defining a negative byte sequence. Finally, the
catch-all rule reports unexpected characters in the buffer (and
is not so special, after all).
*)
| '-' { let region, _, state = sync state lexbuf in
let state = scan state lexbuf in
let open Markup in
match FQueue.peek state.units with
None -> assert false
| Some (_, ((Space _ | Tabs _)::_, token))
when Token.is_int token ->
fail region Orphan_minus
| _ -> fail region Unterminated_integer }
| '-' "0x" byte_seq?
{ let region, _, _ = sync state lexbuf
in fail region Negative_byte_sequence }
| _ as c { let region, _, _ = sync state lexbuf
in fail region (Unexpected_character c) }
(* Scanning CPP #include flags *)
and scan_flags state acc = parse
blank+ { let _, _, state = sync state lexbuf
in scan_flags state acc lexbuf }
| integer as code { let _, _, state = sync state lexbuf in
let acc = int_of_string code :: acc
in scan_flags state acc lexbuf }
| nl { List.rev acc, push_newline state lexbuf }
| eof { let _, _, state = sync state lexbuf
in List.rev acc, state (* TODO *) }
(* Finishing a string *)
and scan_string thread state = parse
nl { fail thread.opening Broken_string }
| eof { fail thread.opening Unterminated_string }
| ['\t' '\r' '\b']
{ let region, _, _ = sync state lexbuf
in fail region Invalid_character_in_string }
| '"' { let _, _, state = sync state lexbuf
in push_char '"' thread, state }
| esc { let _, lexeme, state = sync state lexbuf
in scan_string (push_string lexeme thread) state lexbuf }
| '\\' _ { let region, _, _ = sync state lexbuf
in fail region Undefined_escape_sequence }
| _ as c { let _, _, state = sync state lexbuf in
scan_string (push_char c thread) state lexbuf }
(* Finishing a block comment
(Note for Emacs: ("(*")
The lexing of block comments must take care of embedded block
comments that may occur within, as well as strings, so no substring
"*)" may inadvertently close the block. This is the purpose
of the first case of the scanner [scan_block].
*)
and scan_block thread state = parse
'"' | "(*" { let opening = thread.opening in
let opening', lexeme, state = sync state lexbuf in
let thread = push_string lexeme thread in
let thread = {thread with opening=opening'} in
let next = if lexeme = "\"" then scan_string
else scan_block in
let thread, state = next thread state lexbuf in
let thread = {thread with opening}
in scan_block thread state lexbuf }
| "*)" { let _, lexeme, state = sync state lexbuf
in push_string lexeme thread, state }
| nl as nl { let () = Lexing.new_line lexbuf
and state = {state with pos = state.pos#new_line nl}
and thread = push_string nl thread
in scan_block thread state lexbuf }
| eof { fail thread.opening Unterminated_comment }
| _ { let () = rollback lexbuf in
let len = thread.len in
let thread,
status = scan_utf8 thread state lexbuf in
let delta = thread.len - len in
let pos = state.pos#shift_one_uchar delta in
match status with
None -> scan_block thread {state with pos} lexbuf
| Some error ->
let region = Region.make ~start:state.pos ~stop:pos
in fail region error }
(* Finishing a line comment *)
and scan_line thread state = parse
nl as nl { let () = Lexing.new_line lexbuf
and thread = push_string nl thread
and state = {state with pos = state.pos#new_line nl}
in thread, state }
| eof { fail thread.opening Unterminated_comment }
| _ { let () = rollback lexbuf in
let len = thread.len in
let thread,
status = scan_utf8 thread state lexbuf in
let delta = thread.len - len in
let pos = state.pos#shift_one_uchar delta in
match status with
None -> scan_line thread {state with pos} lexbuf
| Some error ->
let region = Region.make ~start:state.pos ~stop:pos
in fail region error }
and scan_utf8 thread state = parse
eof { fail thread.opening Unterminated_comment }
| _ as c { let thread = push_char c thread in
let lexeme = Lexing.lexeme lexbuf in
let () = state.supply (Bytes.of_string lexeme) 0 1 in
match Uutf.decode state.decoder with
`Uchar _ -> thread, None
| `Malformed _ -> thread, Some Invalid_utf8_sequence
| `Await -> scan_utf8 thread state lexbuf
| `End -> assert false }
(* END LEXER DEFINITION *)
{
(* START TRAILER *)
(* Scanning the lexing buffer for tokens (and markup, as a
side-effect).
Because we want the lexer to have access to the right lexical
context of a recognised lexeme (to enforce stylistic constraints or
report special error patterns), we need to keep a hidden reference
to a queue of recognised lexical units (that is, tokens and markup)
that acts as a mutable state between the calls to
[read_token]. When [read_token] is called, that queue is consulted
first and, if it contains at least one token, that token is
returned; otherwise, the lexing buffer is scanned for at least one
more new token. That is the general principle: we put a high-level
buffer (our queue) on top of the low-level lexing buffer.
One tricky and important detail is that we must make any parser
generated by Menhir (and calling [read_token]) believe that the
last region of the input source that was matched indeed corresponds
to the returned token, despite that many tokens and markup may have
been matched since it was actually read from the input. In other
words, the parser requests a token that is taken from the
high-level buffer, but the parser requests the source regions from
the _low-level_ lexing buffer, and they may disagree if more than
one token has actually been recognised.
Consequently, in order to maintain a consistent view for the
parser, we have to patch some fields of the lexing buffer, namely
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
generated by Menhir when querying source positions (regions). This
is the purpose of the function [patch_buffer]. After reading one
ore more tokens and markup by the scanning rule [scan], we have to
save in the hidden reference [buf_reg] the region of the source
that was matched by [scan]. This atomic sequence of patching,
scanning and saving is implemented by the _function_ [scan]
(beware: it shadows the scanning rule [scan]). The function
[patch_buffer] is, of course, also called just before returning the
token, so the parser has a view of the lexing buffer consistent
with the token.
Note that an additional reference [first_call] is needed to
distinguish the first call to the function [scan], as the first
scanning rule is actually [init] (which can handle the BOM), not
[scan].
*)
type logger = Markup.t list -> token -> unit
type instance = {
read : ?log:logger -> Lexing.lexbuf -> token;
buffer : Lexing.lexbuf;
get_pos : unit -> Pos.t;
get_last : unit -> Region.t;
close : unit -> unit
}
let file_path = match EvalOpt.input with
None | Some "-" -> ""
| Some file_path -> file_path
let pos = Pos.min#set_file file_path
let buf_reg = ref (pos#byte, pos#byte)
and first_call = ref true
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual
let supply = Uutf.Manual.src decoder
let state = ref {units = FQueue.empty;
last = Region.ghost;
pos;
markup = [];
decoder;
supply}
let get_pos () = !state.pos
let get_last () = !state.last
let patch_buffer (start, stop) buffer =
let open Lexing in
let file_path = buffer.lex_curr_p.pos_fname in
buffer.lex_start_p <- {start with pos_fname = file_path};
buffer.lex_curr_p <- {stop with pos_fname = file_path}
and save_region buffer =
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p)
let scan buffer =
patch_buffer !buf_reg buffer;
(if !first_call
then (state := init !state buffer; first_call := false)
else state := scan !state buffer);
save_region buffer
let next_token buffer =
scan buffer;
match FQueue.peek !state.units with
None -> assert false
| Some (units, ext_token) ->
state := {!state with units}; Some ext_token
let check_right_context token buffer =
let open Token in
if is_int token || is_bytes token then
match next_token buffer with
Some ([], next) ->
let pos = (Token.to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos in
if is_bytes token && is_int next then
fail region Odd_lengthed_bytes
else
if is_ident next || is_string next
|| is_bytes next || is_int next then
fail region Missing_break
| _ -> ()
else
if Token.is_ident token || Token.is_string token then
match next_token buffer with
Some ([], next) ->
if Token.is_ident next || Token.is_string next
|| Token.is_bytes next || Token.is_int next
then
let pos = (Token.to_region token)#stop in
let region = Region.make ~start:pos ~stop:pos
in fail region Missing_break
| _ -> ()
let rec read_token ?(log=fun _ _ -> ()) buffer =
match FQueue.deq !state.units with
None ->
scan buffer;
read_token ~log buffer
| Some (units, (left_mark, token)) ->
log left_mark token;
state := {!state with units; last = Token.to_region token};
check_right_context token buffer;
patch_buffer (Token.to_region token)#byte_pos buffer;
token
let open_token_stream file_path_opt =
let cin = match file_path_opt with
None | Some "-" -> stdin
| Some file_path -> open_in file_path in
let buffer = Lexing.from_channel cin in
let () = match file_path_opt with
None | Some "-" -> ()
| Some file_path -> reset ~file:file_path buffer
and close () = close_in cin in
{read = read_token; buffer; get_pos; get_last; close}
(* Standalone lexer for debugging purposes *)
(* Pretty-printing in a string the lexemes making up the markup
between two tokens, concatenated with the last lexeme itself. *)
let output_token ?(offsets=true) mode command
channel left_mark token : unit =
let output str = Printf.fprintf channel "%s%!" str in
let output_nl str = output (str ^ "\n") in
match command with
EvalOpt.Quiet -> ()
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
| EvalOpt.Copy ->
let lexeme = Token.to_lexeme token
and apply acc markup = Markup.to_lexeme markup :: acc
in List.fold_left apply [lexeme] left_mark
|> String.concat "" |> output
| EvalOpt.Units ->
let abs_token = Token.to_string token ~offsets mode
and apply acc markup =
Markup.to_string markup ~offsets mode :: acc
in List.fold_left apply [abs_token] left_mark
|> String.concat "\n" |> output_nl
let print_error ?(offsets=true) mode Region.{region; value} =
let msg = error_to_string value in
let file = match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg)
let trace ?(offsets=true) mode file_path_opt command : unit =
try
let {read; buffer; close; _} = open_token_stream file_path_opt
and cout = stdout in
let log = output_token ~offsets mode command cout
and close_all () = close (); close_out cout in
let rec iter () =
match read ~log buffer with
token ->
if Token.is_eof token then close_all ()
else iter ()
| exception Error e -> print_error ~offsets mode e; close_all ()
in iter ()
with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg)
end (* of functor [Make] in HEADER *)
(* END TRAILER *)
}

View File

@ -0,0 +1,55 @@
(* Driver for the lexer of LIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
(* Path for CPP inclusions (#include) *)
let lib_path =
match EvalOpt.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - -o %s"
lib_path pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp%s %s -o %s"
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Running the lexer on the input file *)
module Lexer = Lexer.Make (LexToken)
let () = Lexer.trace ~offsets:EvalOpt.offsets
EvalOpt.mode (Some pp_input) EvalOpt.cmd

View File

@ -0,0 +1,42 @@
type lexeme = string
type t =
Tabs of int Region.reg
| Space of int Region.reg
| Newline of lexeme Region.reg
| LineCom of lexeme Region.reg
| BlockCom of lexeme Region.reg
| BOM of lexeme Region.reg
type markup = t
(* Pretty-printing *)
let sprintf = Printf.sprintf
let to_lexeme = function
Tabs Region.{value;_} -> String.make value '\t'
| Space Region.{value;_} -> String.make value ' '
| Newline Region.{value;_}
| LineCom Region.{value;_}
| BlockCom Region.{value;_}
| BOM Region.{value;_} -> value
let to_string markup ?(offsets=true) mode =
let region, val_str =
match markup with
Tabs Region.{value; region} ->
let lex = String.make value '\t' |> String.escaped
in region, sprintf "Tabs \"%s\"" lex
| Space Region.{value; region} ->
region, sprintf "Space \"%s\"" (String.make value ' ')
| Newline Region.{value; region} ->
region, sprintf "Newline \"%s\"" (String.escaped value)
| LineCom Region.{value; region} ->
region, sprintf "LineCom \"%s\"" (String.escaped value)
| BlockCom Region.{value; region} ->
region, sprintf "BlockCom \"%s\"" (String.escaped value)
| BOM Region.{value; region} ->
region, sprintf "BOM \"%s\"" (String.escaped value) in
let reg_str = region#compact ~offsets mode
in sprintf "%s: %s" reg_str val_str

View File

@ -0,0 +1,32 @@
(* This module defines the sorts of markup recognised by the LIGO
lexer *)
(* A lexeme is piece of concrete syntax belonging to a token. In
algebraic terms, a token is also a piece of abstract lexical
syntax. Lexical units emcompass both markup and lexemes. *)
type lexeme = string
type t =
Tabs of int Region.reg (* Tabulations *)
| Space of int Region.reg (* Space *)
| Newline of lexeme Region.reg (* "\n" or "\c\r" escape characters *)
| LineCom of lexeme Region.reg (* Line comments *)
| BlockCom of lexeme Region.reg (* Block comments *)
| BOM of lexeme Region.reg (* Byte-Order Mark for UTF-8 (optional) *)
type markup = t
(* Pretty-printing of markup
The difference between [to_lexeme] and [to_string] is that the
former builds the corresponding concrete syntax (the lexeme),
whilst the latter makes up a textual representation of the abstract
syntax (the OCaml data constructors).
The result of [to_string] is escaped to avoid capture by the
terminal.
*)
val to_lexeme : t -> lexeme
val to_string : t -> ?offsets:bool -> [`Byte | `Point] -> string

View File

@ -0,0 +1,96 @@
%{
%}
(* Tokens (mirroring thise defined in module LexToken) *)
(* Literals *)
%token <LexToken.lexeme Region.reg> String
%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes
%token <(LexToken.lexeme * Z.t) Region.reg> Int
%token <LexToken.lexeme Region.reg> Ident
%token <LexToken.lexeme Region.reg> Constr
(* Symbols *)
%token <Region.t> SEMI (* ";" *)
%token <Region.t> COMMA (* "," *)
%token <Region.t> LPAR (* "(" *)
%token <Region.t> RPAR (* ")" *)
%token <Region.t> LBRACE (* "{" *)
%token <Region.t> RBRACE (* "}" *)
%token <Region.t> LBRACKET (* "[" *)
%token <Region.t> RBRACKET (* "]" *)
%token <Region.t> CONS (* "#" *)
%token <Region.t> VBAR (* "|" *)
%token <Region.t> ARROW (* "->" *)
%token <Region.t> ASS (* ":=" *)
%token <Region.t> EQUAL (* "=" *)
%token <Region.t> COLON (* ":" *)
%token <Region.t> LT (* "<" *)
%token <Region.t> LEQ (* "<=" *)
%token <Region.t> GT (* ">" *)
%token <Region.t> GEQ (* ">=" *)
%token <Region.t> NEQ (* "=/=" *)
%token <Region.t> PLUS (* "+" *)
%token <Region.t> MINUS (* "-" *)
%token <Region.t> SLASH (* "/" *)
%token <Region.t> TIMES (* "*" *)
%token <Region.t> DOT (* "." *)
%token <Region.t> WILD (* "_" *)
%token <Region.t> CAT (* "^" *)
(* Keywords *)
%token <Region.t> And (* "and" *)
%token <Region.t> Begin (* "begin" *)
%token <Region.t> Block (* "block" *)
%token <Region.t> Case (* "case" *)
%token <Region.t> Const (* "const" *)
%token <Region.t> Contains (* "contains" *)
%token <Region.t> Down (* "down" *)
%token <Region.t> Else (* "else" *)
%token <Region.t> End (* "end" *)
%token <Region.t> Entrypoint (* "entrypoint" *)
%token <Region.t> Fail (* "fail" *)
%token <Region.t> For (* "for" *)
%token <Region.t> Function (* "function" *)
%token <Region.t> From (* "from" *)
%token <Region.t> If (* "if" *)
%token <Region.t> In (* "in" *)
%token <Region.t> Is (* "is" *)
%token <Region.t> List (* "list" *)
%token <Region.t> Map (* "map" *)
%token <Region.t> Mod (* "mod" *)
%token <Region.t> Nil (* "nil" *)
%token <Region.t> Not (* "not" *)
%token <Region.t> Of (* "of" *)
%token <Region.t> Or (* "or" *)
%token <Region.t> Patch (* "patch" *)
%token <Region.t> Procedure (* "procedure" *)
%token <Region.t> Record (* "record" *)
%token <Region.t> Remove (* "remove" *)
%token <Region.t> Set (* "set" *)
%token <Region.t> Skip (* "skip" *)
%token <Region.t> Step (* "step" *)
%token <Region.t> Storage (* "storage" *)
%token <Region.t> Then (* "then" *)
%token <Region.t> To (* "to" *)
%token <Region.t> Type (* "type" *)
%token <Region.t> Var (* "var" *)
%token <Region.t> While (* "while" *)
%token <Region.t> With (* "with" *)
(* Data constructors *)
%token <Region.t> C_False (* "False" *)
%token <Region.t> C_None (* "None" *)
%token <Region.t> C_Some (* "Some" *)
%token <Region.t> C_True (* "True" *)
%token <Region.t> C_Unit (* "Unit" *)
(* Virtual tokens *)
%token <Region.t> EOF
%%

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,118 @@
(* Driver for the parser of LIGO *)
open! EvalOpt (* Reads the command-line options: Effectful! *)
let sprintf = Printf.sprintf
(* Error printing and exception tracing *)
let () = Printexc.record_backtrace true
let external_ text =
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
type Error.t += ParseError
let error_to_string = function
ParseError -> "Syntax error.\n"
| _ -> assert false
let print_error ?(offsets=true) mode Region.{region; value} =
let msg = error_to_string value in
let file = match EvalOpt.input with
None | Some "-" -> false
| Some _ -> true in
let reg = region#to_string ~file ~offsets mode in
Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
(* Path for CPP inclusions (#include) *)
let lib_path =
match EvalOpt.libs with
[] -> ""
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
in List.fold_right mk_I libs ""
(* Preprocessing the input source and opening the input channels *)
let prefix =
match EvalOpt.input with
None | Some "-" -> "temp"
| Some file -> Filename.(file |> basename |> remove_extension)
let suffix = ".pp.li"
let pp_input =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then prefix ^ suffix
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
in close_out pp_out; pp_input
let cpp_cmd =
match EvalOpt.input with
None | Some "-" ->
Printf.sprintf "cpp -traditional-cpp%s - -o %s"
lib_path pp_input
| Some file ->
Printf.sprintf "cpp -traditional-cpp%s %s -o %s"
lib_path file pp_input
let () =
if Utils.String.Set.mem "cpp" EvalOpt.verbose
then Printf.eprintf "%s\n%!" cpp_cmd;
if Sys.command cpp_cmd <> 0 then
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
(* Instanciating the lexer *)
module Lexer = Lexer.Make (LexToken)
let Lexer.{read; buffer; get_pos; get_last; close} =
Lexer.open_token_stream (Some pp_input)
and cout = stdout
let log = Lexer.output_token ~offsets:EvalOpt.offsets
EvalOpt.mode EvalOpt.cmd cout
and close_all () = close (); close_out cout
(* Tokeniser *)
let tokeniser = read ~log
(* Main *)
let () =
try
let ast = Parser.contract tokeniser buffer in
if Utils.String.Set.mem "ast" EvalOpt.verbose
then AST.print_tokens ast
with
Lexer.Error err ->
close_all ();
Lexer.print_error ~offsets EvalOpt.mode err
| Parser.Error ->
let region = get_last () in
let error = Region.{region; value=ParseError} in
let () = close_all () in
print_error ~offsets EvalOpt.mode error
| Sys_error msg -> Utils.highlight msg
(*
(* Temporary: force dune to build AST2.ml *)
let () =
let open AST2 in
let _ = s_ast in
()
(*
(* Temporary: force dune to build AST2.ml *)
let () =
if false then
let _ = Typecheck2.annotate in
()
else
()
*)
*)

138
src/ligo/ligo-parser/Pos.ml Normal file
View File

@ -0,0 +1,138 @@
type t = <
byte : Lexing.position;
point_num : int;
point_bol : int;
file : string;
line : int;
set_file : string -> t;
set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
new_line : string -> t;
add_nl : t;
shift_bytes : int -> t;
shift_one_uchar : int -> t;
offset : [`Byte | `Point] -> int;
column : [`Byte | `Point] -> int;
line_offset : [`Byte | `Point] -> int;
byte_offset : int;
is_ghost : bool;
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
>
type pos = t
(* Constructors *)
let sprintf = Printf.sprintf
let make ~byte ~point_num ~point_bol =
let () = assert (point_num >= point_bol) in
object (self)
val byte = byte
method byte = byte
val point_num = point_num
method point_num = point_num
val point_bol = point_bol
method point_bol = point_bol
method set_file file =
{< byte = Lexing.{byte with pos_fname = file} >}
method set_line line =
{< byte = Lexing.{byte with pos_lnum = line} >}
method set_offset offset =
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
method set ~file ~line ~offset =
let pos = self#set_file file in
let pos = pos#set_line line in
let pos = pos#set_offset offset
in pos
(* The string must not contain '\n'. See [new_line]. *)
method shift_bytes len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + len >}
method shift_one_uchar len =
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
point_num = point_num + 1 >}
method add_nl =
{< byte = Lexing.{byte with
pos_lnum = byte.pos_lnum + 1;
pos_bol = byte.pos_cnum};
point_bol = point_num >}
method new_line string =
let len = String.length string
in (self#shift_bytes len)#add_nl
method is_ghost = byte = Lexing.dummy_pos
method file = byte.Lexing.pos_fname
method line = byte.Lexing.pos_lnum
method offset = function
`Byte -> Lexing.(byte.pos_cnum - byte.pos_bol)
| `Point -> point_num - point_bol
method column mode = 1 + self#offset mode
method line_offset = function
`Byte -> byte.Lexing.pos_bol
| `Point -> point_bol
method byte_offset = byte.Lexing.pos_cnum
method to_string ?(offsets=true) mode =
let offset = self#offset mode in
let horizontal, value =
if offsets then "character", offset else "column", offset + 1
in sprintf "File \"%s\", line %i, %s %i"
self#file self#line horizontal value
method compact ?(offsets=true) mode =
if self#is_ghost then "ghost"
else
let offset = self#offset mode in
sprintf "%s:%i:%i"
self#file self#line (if offsets then offset else offset + 1)
method anonymous ?(offsets=true) mode =
if self#is_ghost then "ghost"
else sprintf "%i:%i" self#line
(if offsets then self#offset mode else self#column mode)
end
let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
let min =
let byte = Lexing.{
pos_fname = "";
pos_lnum = 1;
pos_bol = 0;
pos_cnum = 0}
in make ~byte ~point_num:0 ~point_bol:0
(* Comparisons *)
let equal pos1 pos2 =
pos1#file = pos2#file && pos1#byte_offset = pos2#byte_offset
let lt pos1 pos2 =
pos1#file = pos2#file && pos1#byte_offset < pos2#byte_offset

View File

@ -0,0 +1,107 @@
(* Positions in a file
A position in a file denotes a single unit belonging to it, for
example, in an ASCII text file, it is a particular character within
that file (the unit is the byte in this instance, since in ASCII
one character is encoded with one byte).
Units can be either bytes (as ASCII characters) or, more
generally, unicode points.
The type for positions is the object type [t].
We use here lexing positions to denote byte-oriented positions
(field [byte]), and we manage code points by means of the fields
[point_num] and [point_bol]. These two fields have a meaning
similar to the fields [pos_cnum] and [pos_bol], respectively, from
the standard module [Lexing]. That is to say, [point_num] holds the
number of code points since the beginning of the file, and
[point_bol] the number of code points since the beginning of the
current line.
The name of the file is given by the field [file], and the line
number by the field [line].
*)
type t = <
(* Payload *)
byte : Lexing.position;
point_num : int;
point_bol : int;
file : string;
line : int;
(* Setters *)
set_file : string -> t;
set_line : int -> t;
set_offset : int -> t;
set : file:string -> line:int -> offset:int -> t;
(* The call [pos#new_line s], where the string [s] is either "\n" or
"\c\r", updates the position [pos] with a new line. *)
new_line : string -> t;
add_nl : t;
(* The call [pos#shift_bytes n] evaluates in a position that is the
translation of position [pos] of [n] bytes forward in the
file. The call [pos#shift_one_uchar n] is similar, except that it
assumes that [n] is the number of bytes making up one unicode
point. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
(* Getters *)
(* The call [pos#offset `Byte] provides the horizontal offset of the
position [pos] in bytes. (An offset is the number of units, like
bytes, since the beginning of the current line.) The call
[pos#offset `Point] is the offset counted in number of unicode
points.
The calls to the method [column] are similar to those to
[offset], except that they give the curren column number.
The call [pos#line_offset `Byte] is the offset of the line of
position [pos], counted in bytes. Dually, [pos#line_offset
`Point] counts the same offset in code points.
The call [pos#byte_offset] is the offset of the position [pos]
since the begininng of the file, counted in bytes.
*)
offset : [`Byte | `Point] -> int;
column : [`Byte | `Point] -> int;
line_offset : [`Byte | `Point] -> int;
byte_offset : int;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?offsets:bool -> [`Byte | `Point] -> string;
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
>
type pos = t
(* Constructors *)
val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t
(* Special positions *)
val ghost : t (* Same as [Lexing.dummy_pos] *)
val min : t (* Lexing convention: line 1, offsets to 0 and file to "". *)
(* Comparisons *)
val equal : t -> t -> bool
val lt : t -> t -> bool

View File

@ -0,0 +1,128 @@
(* Regions of a file *)
let sprintf = Printf.sprintf
type t = <
start : Pos.t;
stop : Pos.t;
(* Setters *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *)
file : string;
pos : Pos.t * Pos.t;
byte_pos : Lexing.position * Lexing.position;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
>
type region = t
type 'a reg = {region: t; value: 'a}
(* Injections *)
exception Invalid
let make ~(start: Pos.t) ~(stop: Pos.t) =
if start#file <> stop#file || start#byte_offset > stop#byte_offset
then raise Invalid
else
object
val start = start
method start = start
val stop = stop
method stop = stop
method shift_bytes len =
let start = start#shift_bytes len
and stop = stop#shift_bytes len
in {< start = start; stop = stop >}
method shift_one_uchar len =
let start = start#shift_one_uchar len
and stop = stop#shift_one_uchar len
in {< start = start; stop = stop >}
method set_file name =
let start = start#set_file name
and stop = stop#set_file name
in {< start = start; stop = stop >}
(* Getters *)
method file = start#file
method pos = start, stop
method byte_pos = start#byte, stop#byte
(* Predicates *)
method is_ghost = start#is_ghost && stop#is_ghost
(* Conversions to strings *)
method to_string ?(file=true) ?(offsets=true) mode =
let horizontal = if offsets then "character" else "column"
and start_offset =
if offsets then start#offset mode else start#column mode
and stop_offset =
if offsets then stop#offset mode else stop#column mode in
let info =
if file
then sprintf "in file \"%s\", line %i, %s"
(String.escaped start#file) start#line horizontal
else sprintf "at line %i, %s" start#line horizontal
in if stop#line = start#line
then sprintf "%ss %i-%i" info start_offset stop_offset
else sprintf "%s %i to line %i, %s %i"
info start_offset stop#line horizontal stop_offset
method compact ?(file=true) ?(offsets=true) mode =
let start_str = start#anonymous ~offsets mode
and stop_str = stop#anonymous ~offsets mode in
if start#file = stop#file then
if file then sprintf "%s:%s-%s" start#file start_str stop_str
else sprintf "%s-%s" start_str stop_str
else sprintf "%s:%s-%s:%s" start#file start_str stop#file stop_str
end
(* Special regions *)
let ghost = make ~start:Pos.ghost ~stop:Pos.ghost
let min = make ~start:Pos.min ~stop:Pos.min
(* Comparisons *)
let equal r1 r2 =
r1#file = r2#file
&& Pos.equal r1#start r2#start
&& Pos.equal r1#stop r2#stop
let lt r1 r2 =
r1#file = r2#file
&& not r1#is_ghost
&& not r2#is_ghost
&& Pos.lt r1#start r2#start
&& Pos.lt r1#stop r2#stop
let cover r1 r2 =
if r1#is_ghost
then r2
else if r2#is_ghost
then r1
else if lt r1 r2
then make ~start:r1#start ~stop:r2#stop
else make ~start:r2#start ~stop:r1#stop

View File

@ -0,0 +1,125 @@
(* Regions of a file
A _region_ is a contiguous series of bytes, for example, in a text
file. It is here denoted by the object type [t].
The start (included) of the region is given by the field [start],
which is a _position_, and the end (excluded) is the position given
by the field [stop]. The convention of including the start and
excluding the end enables to have empty regions if, and only if,
[start = stop]. See module [Pos] for the definition of positions.
The first byte of a file starts at the offset zero (that is,
column one), and [start] is always lower than or equal to [stop],
and they must refer to the same file.
*)
type t = <
start : Pos.t;
stop : Pos.t;
(* Setters *)
(* The call [region#shift_bytes n] evaluates in a region that is the
translation of region [region] of [n] bytes forward in the
file. The call [region#shift_one_uchar n] is similar, except that
it assumes that [n] is the number of bytes making up one unicode
point. The call [region#set_file f] sets the file name to be
[f]. *)
shift_bytes : int -> t;
shift_one_uchar : int -> t;
set_file : string -> t;
(* Getters *)
(* The method [file] returns the file name.
The method [pos] returns the values of the fields [start] and [stop].
The method [byte_pos] returns the start and end positions of the
region at hand _interpreting them as lexing positions_, that is,
the unit is the byte. *)
file : string;
pos : Pos.t * Pos.t;
byte_pos : Lexing.position * Lexing.position;
(* Predicates *)
is_ghost : bool;
(* Conversions to [string] *)
(* The call [region#to_string ~file ~offsets mode] evaluates in a
string denoting the region [region].
The name of the file is present if, and only if, [file = true] or
[file] is missing.
The positions in the file are expressed horizontal offsets if
[offsets = true] or [offsets] is missing (the default), otherwise
as columns.
If [mode = `Byte], those positions will be assumed to have bytes
as their unit, otherwise, if [mode = `Point], they will be
assumed to refer to code points.
The method [compact] has the same signature and calling
convention as [to_string], except that the resulting string is
more compact.
*)
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
>
type region = t
type 'a reg = {region: t; value: 'a}
(* Constructors *)
(* The function [make] creates a region from two positions. If the
positions are not properly ordered or refer to different files, the
exception [Invalid] is raised. *)
exception Invalid
val make : start:Pos.t -> stop:Pos.t -> t
(* Special regions *)
(* To deal with ghost expressions, that is, pieces of abstract syntax
that have not been built from excerpts of concrete syntax, we need
_ghost regions_. The module [Pos] provides a [ghost] position, and
we also provide a [ghost] region and, in type [t], the method
[is_ghost] to check it. *)
val ghost : t (* Two [Pos.ghost] positions *)
(* Occasionnally, we may need a minimum region. It is here made of two
minimal positions. *)
val min : t (* Two [Pos.min] positions *)
(* Comparisons *)
(* Two regions are equal if, and only if, they refer to the same file
and their start positions are equal and their stop positions are
equal. See [Pos.equal]. Note that [r1] and [r2] can be ghosts. *)
val equal : t -> t -> bool
(* The call [lt r1 r2] ("lower than") has the value [true] if, and
only if, regions [r1] and [r2] refer to the same file, none is a
ghost and the start position of [r1] is lower than that of
[r2]. (See [Pos.lt].) *)
val lt : t -> t -> bool
(* Given two regions [r1] and [r2], we may want the region [cover r1
r2] that covers [r1] and [r2]. We property [equal (cover r1 r2)
(cover r2 r1)]. (In a sense, it is the maximum region, but we avoid
that name because of the [min] function above.) If [r1] is a ghost,
the cover is [r2], and if [r2] is a ghost, the cover is [r1]. *)
val cover : t -> t -> t

View File

@ -0,0 +1,45 @@
type t is int * string
type u is t
type v is record
foo: key;
bar: mutez;
baz: address
end
type w is K of (U of int) // v * u
type i is int;
const x : v =
record
foo = 4;
bar = 5;
baz = 0x3244
end
(* Block comment *)
entrypoint g (storage s : u; const l : list (int))
: operation (list) is
var m : map (int, string) := empty_map;
var y : v := copy x with record bar = 7 end;
function f (const x : int) : int is
var y : int := 5 - x
const z : int = 6
begin
y := x + y
end with y * 2
begin
y.[4] := "hello";
match l with
[] -> null
| h#t -> q (h+2)
end;
begin
g (Unit);
fail "in extremis"
end
end with (s, ([]: (u * operation (list))))

View File

@ -0,0 +1,64 @@
type store is
record
goal : nat;
deadline : timestamp;
backers : map (address, nat);
funded : bool;
end
entrypoint contribute (storage store : store;
const sender : address;
const amount : mutez)
: store * list (operation) is
var operations : list (operation) := nil
const s : list (int) = list [1; 2; 3]
const t : set (int) = set []
block {
if now > store.deadline then
fail "Deadline passed";
else
case store.backers[sender] of
None -> store.backers[sender] := Some (amount)
// None -> patch store.backers with map sender -> amount end
| _ -> skip
end
} with (store, operations)
entrypoint withdraw (storage store : store; const sender : address)
: store * list (operation) is
var operations : list (operation) := list end
begin
// if set ["a"; "b"] contains x then skip else skip;
if sender = owner then
if now (Unit) >= store.deadline then
if balance >= store.goal then {
store.funded := True;
// patch store with record funded = True end;
operations := list [Transfer (owner, balance)];
};
else fail "Below target"
else { fail "Too soon"; }
else skip
end with (store, operations)
entrypoint claim (storage store : store; const sender : address)
: store * list (operation) is
var operations : list (operation) := list []
var amount : mutez := 0
begin
if now <= store.deadline then
fail "Too soon"
else
case store.backers[sender] of
None ->
fail "Not a backer"
| Some (amount) ->
if balance >= store.goal or store.funded then
fail "Cannot refund"
else
begin
operations := list [Transfer (sender, amount)];
remove sender from map store.backers
end
end
end with (store, operations)

View File

@ -0,0 +1,274 @@
[@@@warning "-27"] (* TODO *)
[@@@warning "-32"] (* TODO *)
[@@@warning "-30"]
module SMap = Map.Make(String)
module I = AST2.O
module O = struct
type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *)
type name_and_region = {name: string; orig: Region.t}
type type_name = name_and_region
type var_name = name_and_region
type field_name = name_and_region
type pattern =
PVar of var_name
| PWild
| PInt of Z.t
| PBytes of Hex.t
| PString of string
| PUnit
| PFalse
| PTrue
| PNone
| PSome of pattern
| PCons of pattern * pattern
| PNull
| PRecord of (field_name * pattern) SMap.t
type type_constructor =
Option
| List
| Set
| Map
type type_expr_case =
Sum of (type_name * type_expr) SMap.t
| Record of (field_name * type_expr) SMap.t
| TypeApp of type_constructor * (type_expr list)
| Function of { arg: type_expr; ret: type_expr }
| Ref of type_expr
| String
| Bytes
| Int
| Unit
| Bool
and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t }
type typed_var = { name:var_name; ty:type_expr; orig: asttodo }
type type_decl = { name: type_name; ty:type_expr; orig: asttodo }
type expr_case =
App of { operator: operator; arguments: expr list }
| Var of typed_var
| Constant of constant
| Record of (field_name * expr) list
| Lambda of lambda
and expr = { expr: expr_case; ty:type_expr; orig: asttodo }
and decl = { var: typed_var; value: expr; orig: asttodo }
and lambda = {
parameter: typed_var;
declarations: decl list;
instructions: instr list;
result: expr;
}
and operator_case =
Function of var_name
| Constructor of var_name
| UpdateField of field_name
| GetField of field_name
| Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod
| Neg | Not
| Set
| MapLookup
and operator = { operator: operator_case; ty:type_expr; orig: asttodo }
and constant =
Unit
| Int of Z.t | String of string | Bytes of Hex.t
| False | True
| Null
| EmptySet
| CNone
and instr =
Assignment of { name: var_name; value: expr; orig: asttodo }
| While of { condition: expr; body: instr list; orig: asttodo }
| ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo }
| Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo }
| ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *)
| Fail of { expr: expr; orig: asttodo }
type ast = {
types : type_decl list;
storage_decl : typed_var;
declarations : decl list;
orig : AST.t
}
end
type te = O.type_expr list SMap.t
type ve = O.type_expr list SMap.t
type tve = te * ve
let fold_map f a l =
let f (acc, l) elem =
let acc', elem' = f acc elem
in acc', (elem' :: l) in
let last_acc, last_l = List.fold_left f (a, []) l
in last_acc, List.rev last_l
let map f l = List.rev (List.rev_map f l)
let shadow (name : string) (typ : O.type_expr) (env : O.type_expr list SMap.t)
: O.type_expr list SMap.t =
SMap.update name (function None -> Some [typ] | Some tl -> Some (typ :: tl)) env
let lookup (name : string) (env : O.type_expr list SMap.t) : O.type_expr =
match SMap.find name env with
latest :: shadowed -> latest
| [] -> failwith "Unbound variable"
let string_of_name ({name;_} : I.name_and_region) = name
let a_name_and_region ({name; orig} : I.name_and_region) : O.name_and_region =
{name; orig}
let a_type_constructor (tve : tve) : I.type_constructor -> O.type_constructor = function
Option -> Option
| List -> List
| Set -> Set
| Map -> Map
let a_type_expr_case (tve : tve) : I.type_expr_case -> O.type_expr_case = function
Sum lt -> failwith "TODO"
| Record lt -> failwith "TODO"
| TypeApp (tc, args) -> failwith "TODO"
| Function {arg;ret} -> failwith "TODO"
| Ref t -> failwith "TODO"
| String -> String
| Int -> Int
| Unit -> Unit
| Bool -> Bool
let a_type_expr (tve : tve) ({type_expr;name;orig} : I.type_expr) : O.type_expr =
let type_expr = a_type_expr_case tve type_expr in
let name = match name with
None -> None
|Some name -> Some (a_name_and_region name)
in {type_expr;name;orig}
let a_type (te,ve : tve) ({name;ty;orig} : I.type_decl) : tve * O.type_decl =
let ty = a_type_expr (te,ve) ty in
let tve = shadow (string_of_name name) ty te, ve in
let name = (a_name_and_region name) in
tve, {name; ty; orig}
let a_types (tve : tve) (l : I.type_decl list) : tve * O.type_decl list =
fold_map a_type tve l
let a_storage_decl : tve -> I.typed_var -> tve * O.typed_var =
failwith "TODO"
let type_expr_case_equal (t1 : O.type_expr_case) (t2 : O.type_expr_case) : bool = match t1,t2 with
Sum m1, Sum m2 -> failwith "TODO" (* of (O.name_and_region * O.type_expr) SMap.t *)
| Record m1, Record m2 -> failwith "TODO" (* of (O.name_and_region * O.type_expr) SMap.t *)
| TypeApp (tc1, args1), TypeApp (tc2, args2) -> failwith "TODO" (* of O.type_constructor * O.type_expr list *)
| Function {arg=arg1;ret=ret1}, Function {arg=arg2;ret=ret2} -> failwith "TODO" (* of { arg : O.type_expr; ret : O.type_expr; } *)
| Ref t1, Ref t2 -> failwith "TODO" (* of O.type_expr *)
| String, String -> true
| Int, Int -> true
| Unit, Unit -> true
| Bool, Bool -> true
| _ -> false
let type_expr_equal (t1 : O.type_expr) (t2 : O.type_expr) : bool =
type_expr_case_equal t1.type_expr t2.type_expr
let check_type_expr_equal (expected : O.type_expr) (actual : O.type_expr) : unit =
if type_expr_equal expected actual then
()
else
failwith "got [actual] but expected [expected]"
let a_var_expr (te,ve : tve) (expected : O.type_expr) (var_name : I.name_and_region) : O.expr_case =
check_type_expr_equal expected (lookup (string_of_name var_name) ve);
Var { name = a_name_and_region var_name;
ty = expected;
orig = `TODO }
let a_constant_expr (tve : tve) (expected : O.type_expr) (constant : I.constant) : O.expr_case =
let to_type_expr type_expr_case : O.type_expr =
{ type_expr = type_expr_case; name = None; orig = Region.ghost } in
let actual : O.type_expr = match constant with
Unit -> to_type_expr Unit
| Int _ -> to_type_expr Int
| String _ -> to_type_expr String
| Bytes _ -> to_type_expr Bytes
| False -> to_type_expr Bool
| True -> to_type_expr Bool
| Null t -> a_type_expr tve t
| EmptySet t -> a_type_expr tve t
| CNone t -> a_type_expr tve t
in
check_type_expr_equal expected actual;
let c : O.constant = match constant with
Unit -> Unit
| Int i -> Int i
| String s -> String s
| Bytes b -> Bytes b
| False -> False
| True -> True
| Null _ -> Null
| EmptySet _ -> EmptySet
| CNone _ -> CNone
in Constant c
let map_to_list m =
List.rev (SMap.fold (fun field_name_string p l -> p :: l) m [])
let a_field tve (expected,expr) =
failwith "TODO"
let a_record (tve : tve) (expected : O.type_expr) (record : (I.field_name * I.expr) list)
: O.expr_case =
let {type_expr = expected; _} : O.type_expr = expected in
let expected = match expected with
Record fields -> fields
| _ -> failwith "expected some_type but got record" in
let expected_and_field =
List.combine
(map_to_list expected)
record (* TODO SHOULD BE (map_to_list record) *) in
Record (map (a_field tve) expected_and_field)
let a_expr_case (te,ve : tve) (expected : O.type_expr) : I.expr -> O.expr_case = function
App {operator;arguments} -> failwith "TODO"
| Var var_name -> a_var_expr (te,ve) expected var_name
| Constant constant -> a_constant_expr (te,ve) expected constant
| Record record -> a_record (te,ve) expected record
| Lambda lambda -> failwith "TODO"
let a_expr (te,ve : tve) (expected : O.type_expr) (e : I.expr) : O.expr =
let expr_case = a_expr_case (te,ve) expected e in
{ expr = expr_case; ty = expected; orig = `TODO }
let a_declaration (te,ve : tve) ({name;ty;value} : I.decl) : tve * O.decl =
let ty = a_type_expr (te,ve) ty in
let value = a_expr (te,ve) ty value in
let ve = shadow (string_of_name name) ty ve in
let name = a_name_and_region name in
(te,ve), {var={name;ty;orig=`TODO};value;orig = `TODO}
let a_declarations (tve : tve) (l : I.decl list) : tve * O.decl list =
fold_map a_declaration tve l
let a_ast I.{types; storage_decl; declarations; orig} =
let tve = SMap.empty, SMap.empty in
let tve, types = a_types tve types in
let tve, storage_decl = a_storage_decl tve storage_decl in
let tve, declarations = a_declarations tve declarations in
let _ = tve in
O.{types; storage_decl; declarations; orig}
let annotate : I.ast -> O.ast = a_ast

View File

@ -0,0 +1,108 @@
[@@@warning "-30"]
module SMap : Map.S with type key = string
module I = AST2.O
module O : sig
type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *)
type name_and_region = {name: string; orig: Region.t}
type type_name = name_and_region
type var_name = name_and_region
type field_name = name_and_region
type pattern =
PVar of var_name
| PWild
| PInt of Z.t
| PBytes of Hex.t
| PString of string
| PUnit
| PFalse
| PTrue
| PNone
| PSome of pattern
| PCons of pattern * pattern
| PNull
| PRecord of (field_name * pattern) SMap.t
type type_constructor =
Option
| List
| Set
| Map
type type_expr_case =
Sum of (type_name * type_expr) SMap.t
| Record of (field_name * type_expr) SMap.t
| TypeApp of type_constructor * (type_expr list)
| Function of { arg: type_expr; ret: type_expr }
| Ref of type_expr
| String
| Bytes
| Int
| Unit
| Bool
and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t }
type typed_var = { name:var_name; ty:type_expr; orig: asttodo }
type type_decl = { name:type_name; ty:type_expr; orig: asttodo }
type expr_case =
App of { operator: operator; arguments: expr list }
| Var of typed_var
| Constant of constant
| Record of (field_name * expr) list
| Lambda of lambda
and expr = { expr: expr_case; ty:type_expr; orig: asttodo }
and decl = { var: typed_var; value: expr; orig: asttodo }
and lambda = {
parameter: typed_var;
declarations: decl list;
instructions: instr list;
result: expr;
}
and operator_case =
Function of var_name
| Constructor of var_name
| UpdateField of field_name
| GetField of field_name
| Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod
| Neg | Not
| Set
| MapLookup
and operator = { operator: operator_case; ty:type_expr; orig: asttodo }
and constant =
Unit
| Int of Z.t | String of string | Bytes of Hex.t
| False | True
| Null
| EmptySet
| CNone
and instr =
Assignment of { name: var_name; value: expr; orig: asttodo }
| While of { condition: expr; body: instr list; orig: asttodo }
| ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo }
| Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo }
| ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *)
| Fail of { expr: expr; orig: asttodo }
type ast = {
types : type_decl list;
storage_decl : typed_var;
declarations : decl list;
orig : AST.t
}
end
val annotate : I.ast -> O.ast

View File

@ -0,0 +1,157 @@
(* Utility types and functions *)
(* Identity *)
let id x = x
(* Combinators *)
let (<@) f g x = f (g x)
let swap f x y = f y x
let lambda = fun x _ -> x
let curry f x y = f (x,y)
let uncurry f (x,y) = f x y
(* Parametric rules for sequences *)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
let nseq_cons x (hd,tl) = x, hd::tl
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
let sepseq_cons x sep = function
None -> x, []
| Some (hd,tl) -> x, (sep,hd)::tl
(* Rightwards iterators *)
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
let nsepseq_foldl f a (hd,tl) =
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
let sepseq_foldl f a = function
None -> a
| Some s -> nsepseq_foldl f a s
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
let sepseq_iter f = function
None -> ()
| Some s -> nsepseq_iter f s
(* Reversing *)
let nseq_rev (hd,tl) =
let rec aux acc = function
[] -> acc
| x::l -> aux (nseq_cons x acc) l
in aux (hd,[]) tl
let nsepseq_rev =
let rec aux acc = function
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
| hd, [] -> hd, acc in
function
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
| s -> s
let sepseq_rev = function
None -> None
| Some seq -> Some (nsepseq_rev seq)
(* Leftwards iterators *)
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
let sepseq_foldr f = function
None -> fun a -> a
| Some s -> nsepseq_foldr f s
(* Conversions to lists *)
let nseq_to_list (x,y) = x::y
let nsepseq_to_list (x,y) = x :: List.map snd y
let sepseq_to_list = function
None -> []
| Some s -> nsepseq_to_list s
(* Optional values *)
module Option =
struct
let apply f x =
match x with
Some y -> Some (f y)
| None -> None
let rev_apply x y =
match x with
Some f -> f y
| None -> y
let to_string = function
Some x -> x
| None -> ""
end
(* Modules based on [String], like sets and maps. *)
module String =
struct
include String
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Integers *)
module Int =
struct
type t = int
module Ord =
struct
type nonrec t = t
let compare = compare
end
module Map = Map.Make (Ord)
module Set = Set.Make (Ord)
end
(* Effectful symbol generator *)
let gen_sym =
let counter = ref 0 in
fun () -> incr counter; "v" ^ string_of_int !counter
(* General tracing function *)
let trace text = function
None -> ()
| Some chan -> output_string chan text; flush chan
(* Printing a string in red to standard error *)
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg

View File

@ -0,0 +1,97 @@
(* Utility types and functions *)
(* Polymorphic identity function *)
val id : 'a -> 'a
(* Combinators *)
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
val lambda : 'a -> 'b -> 'a
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
(* Parametric rules for sequences
nseq: non-empty sequence;
sepseq: (possibly empty) sequence of separated items;
nsepseq: non-empty sequence of separated items.
*)
type 'a nseq = 'a * 'a list
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
(* Consing *)
val nseq_cons : 'a -> 'a nseq -> 'a nseq
val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_cons : 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
(* Reversing *)
val nseq_rev: 'a nseq -> 'a nseq
val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
val sepseq_rev: ('a,'sep) sepseq -> ('a,'sep) sepseq
(* Rightwards iterators *)
val nseq_foldl : ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
val sepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
val nseq_iter : ('a -> unit) -> 'a nseq -> unit
val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
val sepseq_iter : ('a -> unit) -> ('a,'b) sepseq -> unit
(* Leftwards iterators *)
val nseq_foldr : ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
val sepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
(* Conversions to lists *)
val nseq_to_list : 'a nseq -> 'a list
val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
val sepseq_to_list : ('a,'b) sepseq -> 'a list
(* Effectful symbol generator *)
val gen_sym : unit -> string
(* General tracing function *)
val trace : string -> out_channel option -> unit
(* Printing a string in red to standard error *)
val highlight : string -> unit
(* Working with optional values *)
module Option :
sig
val apply : ('a -> 'b) -> 'a option -> 'b option
val rev_apply : ('a -> 'a) option -> 'a -> 'a
val to_string : string option -> string
end
(* An extension to the standard module [String] *)
module String :
sig
include module type of String
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end
(* Integer maps *)
module Int :
sig
type t = int
module Map : Map.S with type key = t
module Set : Set.S with type elt = t
end

View File

@ -0,0 +1,10 @@
#!/bin/sh
set -e
if test -d ../../.git; then
echo true > dot_git_is_dir
else
echo false > dot_git_is_dir
cat .git >> dot_git_is_dir
fi

70
src/ligo/ligo-parser/dune Normal file
View File

@ -0,0 +1,70 @@
(ocamllex LexToken)
(ocamllex Lexer)
(menhir
(merge_into Parser)
(modules ParToken Parser)
(flags -la 1 --explain --external-tokens LexToken))
(executables
(names LexerMain ParserMain)
(public_names ligo-lexer ligo-parser)
(package ligo-parser)
(modules_without_implementation Error)
(libraries getopt hex str uutf zarith))
;; Les deux directives (rule) qui suivent sont pour le dev local.
;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier.
;; Pour le purger, il faut faire "dune clean".
(rule
(targets Parser.exe)
(deps ParserMain.exe)
(action (copy ParserMain.exe Parser.exe))
(mode promote-until-clean))
(rule
(targets Lexer.exe)
(deps LexerMain.exe)
(action (copy LexerMain.exe Lexer.exe))
(mode promote-until-clean))
(rule
(targets dot_git_is_dir)
(deps check_dot_git_is_dir.sh)
(action (run "sh" "check_dot_git_is_dir.sh")))
(rule
(targets .git_main_dir)
(deps dot_git_is_dir check_dot_git_is_dir.sh)
(action
(progn (run "sh" "check_dot_git_is_dir.sh")
(run "sh" "-c" "if \"$(cat dot_git_is_dir)\" = true; then printf %s '../../.git' > .git_main_dir; else cat ../../.git | sed -e 's/^gitdir: //' | sed -e 's|$|/../..|' > .git_main_dir; fi"))))
(rule
(targets .git_worktree_dir)
(deps dot_git_is_dir check_dot_git_is_dir.sh)
(action
(progn (run "sh" "check_dot_git_is_dir.sh")
(run "sh" "-c" "if \"$(cat dot_git_is_dir)\" = true; then printf %s '../../.git' > .git_worktree_dir; else cat ../../.git | sed -e 's/^gitdir: //' > .git_worktree_dir; fi"))))
(rule
(targets .gitHEAD)
(deps .git_main_dir .git_worktree_dir check_dot_git_is_dir.sh)
(action
(progn (run "sh" "check_dot_git_is_dir.sh")
(run "sh" "-c" "ln -s \"$(cat .git_worktree_dir)/HEAD\" .gitHEAD"))))
(rule
(targets Version.gitHEAD)
(deps .gitHEAD check_dot_git_is_dir.sh)
(action
(progn (run "sh" "check_dot_git_is_dir.sh")
(run "sh" "-c" "if git symbolic-ref HEAD >/dev/null 2>&1; then ln -s \"$(cat .git_main_dir)/$(git symbolic-ref HEAD)\" Version.gitHEAD; else ln -s \"$(cat .git_worktree_dir)/HEAD\" Version.gitHEAD; fi"))))
(rule
(targets Version.ml)
(deps Version.gitHEAD check_dot_git_is_dir.sh)
(action
(progn (run "sh" "check_dot_git_is_dir.sh")
(run "sh" "-c" "printf 'let version = \"%s\"'\\\\n \"$(git describe --always --dirty --abbrev=0)\" > Version.ml")))
(mode promote-until-clean))

View File

@ -0,0 +1,2 @@
(lang dune 1.7)
(using menhir 2.0)

View File

@ -0,0 +1,19 @@
opam-version : "2.0"
version : "1.0"
maintainer : "gabriel.alfour@gmail.com"
authors : [ "Galfour" ]
homepage : "https://gitlab.com/gabriel.alfour/ligo-parser"
bug-reports : "https://gitlab.com/gabriel.alfour/ligo-parser/issues"
dev-repo : "git+https://gitlab.com/gabriel.alfour/ligo-parser.git"
license : "MIT"
depends : [ "dune" "menhir" "hex" "zarith" "getopt" "uutf" ]
build : [
[ "sh" "-c" "printf 'let version = \"%s\"' \"$(git describe --always --dirty --abbrev=0)\" > Version.ml" ]
[ "dune" "build" "-p" name "-j" jobs ]
]
url {
src: "https://gitlab.com/gabriel.alfour/ligo-parser/-/archive/master/ligo-parser.tar.gz"
}

View File

@ -0,0 +1,229 @@
(* module I = AST (\* In *\) *)
(* module SMap = Map.Make(String) *)
(* type te = type_expr list SMap.t *)
(* type ve = type_expr list SMap.t *)
(* type tve = te * ve *)
(*
module I = AST (* In *)
module SMap = Map.Make(String)
module O = struct
open AST (* TODO: for now, should disappear *)
type t = ast
and type_expr =
Prod of cartesian
| Sum of (variant, vbar) Utils.nsepseq
| Record of record_type
| TypeApp of (type_name * type_tuple)
| ParType of type_expr par
| TAlias of variable
| Function of (type_expr list) * type_expr
| Mutable of type_expr
| Unit
| TODO of string
and te = type_expr list SMap.t
and ve = type_expr list SMap.t
and vte = ve * te
and ast = {
lambdas : lambda_decl list;
block : block
}
and lambda_decl =
FunDecl of fun_decl
| ProcDecl of proc_decl
and fun_decl = {
kwd_function : kwd_function;
var : variable;
param : parameters;
colon : colon;
ret_type : type_expr;
kwd_is : kwd_is;
body : block;
kwd_with : kwd_with;
return : checked_expr
}
and proc_decl = {
kwd_procedure : kwd_procedure;
var : variable;
param : parameters;
kwd_is : kwd_is;
body : block
}
and block = {
decls : value_decls;
opening : kwd_begin;
instr : instructions;
close : kwd_end
}
and value_decls = var_decl list
and var_decl = {
kind : var_kind;
var : variable;
colon : colon;
vtype : type_expr;
setter : Region.t; (* "=" or ":=" *)
init : checked_expr
}
and checked_expr = {ty:type_expr;expr:expr}
end [@warning "-30"]
open O
open AST
open Region
let mk_checked_expr ~ty ~expr = {ty;expr}
let mk_proc_decl ~kwd_procedure ~var ~param ~kwd_is ~body =
O.{kwd_procedure; var; param; kwd_is; body}
let mk_ast ~lambdas ~block = {lambdas;block}
let mk_fun_decl ~kwd_function ~var ~param ~colon ~ret_type ~kwd_is ~body ~kwd_with ~return =
O.{kwd_function; var; param; colon; ret_type; kwd_is; body; kwd_with; return}
let unreg : 'a reg -> 'a = fun {value; _} -> value
let unpar : 'a par -> 'a = (fun (_left_par, x, _right_par) -> x) @. unreg
let nsepseq_to_list : ('a,'sep) Utils.nsepseq -> 'a list =
fun (first, rest) -> first :: (map snd rest)
let sepseq_to_list : ('a,'sep) Utils.sepseq -> 'a list =
function
None -> []
| Some nsepseq -> nsepseq_to_list nsepseq
let rec xty : I.type_expr -> O.type_expr =
function
I.Prod x -> O.Prod x
| I.Sum x -> O.Sum (unreg x)
| I.Record x -> O.Record x
| I.TypeApp x -> O.TypeApp (unreg x)
| I.ParType {region;value=(l,x,r)} -> O.ParType {region;value=(l, xty x, r)}
| I.TAlias x -> O.TAlias x
let shadow (name : string) (typ : O.type_expr) (env : O.type_expr list SMap.t)
: O.type_expr list SMap.t =
SMap.update name (function None -> Some [typ] | Some tl -> Some (typ :: tl)) env
let shadow_list (name_typ_list : (string * O.type_expr) list) (env : O.type_expr list SMap.t)
: O.type_expr list SMap.t =
List.fold_left (fun acc (name, typ) -> shadow name typ acc) env name_typ_list
let type_decls_to_tenv (td : I.type_decl list) (te : te) : O.te =
td
|> List.map unreg
|> List.map (fun (_, name, _, type_expr) -> (unreg name, xty type_expr))
|> fun up -> shadow_list up te
let var_kind_to_ty : var_kind -> I.type_expr -> O.type_expr =
fun var_kind ty ->
match var_kind with
Mutable _ -> O.Mutable (xty ty)
| Const _ -> xty ty
let params_to_xty params ret_type =
unpar params
|> nsepseq_to_list
|> map (fun {value=(var_kind, _variable, _colon, type_expr);_} -> var_kind_to_ty var_kind type_expr)
|> fun param_types -> O.Function (param_types, ret_type)
let type_equal t1 t2 = match t1,t2 with
| O.Prod _x, O.Prod _y -> true (* TODO *)
| O.Sum _x, O.Sum _y -> true (* TODO *)
| _ -> false
exception TypeError of string
let check_type expr expected_type =
if type_equal expr.ty expected_type then expr
else raise (TypeError "oops")
let tc_expr (_te,_ve) expr = mk_checked_expr ~ty:(TODO "all expressions") ~expr (* TODO *)
let tc_var_decl : vte -> I.var_decl -> vte * O.var_decl =
fun (ve,te) var_decl ->
let vtype = (xty var_decl.vtype) in
let init = check_type (tc_expr (te,ve) var_decl.init) vtype in
let ve = shadow (unreg var_decl.var) vtype ve in
(ve,te), {
kind = var_decl.kind;
var = var_decl.var;
colon = var_decl.colon;
vtype;
setter = var_decl.setter;
init}
let tc_var_decls (ve,te) var_decls = fold_map tc_var_decl (ve,te) var_decls
let tc_block (te, ve : vte) (block : I.block) : vte * O.block =
let decls,opening,instr,close = block.decls, block.opening, block.instr, block.close in
let (ve,te), decls = tc_var_decls (ve,te) (decls |> unreg |> sepseq_to_list |> map unreg) in
(ve,te), O.{decls;opening;instr;close} (* TODO *)
let tc_proc_decl : vte -> I.proc_decl -> O.proc_decl =
fun vte proc_decl ->
let _vte', block' = tc_block vte (unreg proc_decl.body)
in mk_proc_decl
~kwd_procedure: proc_decl.kwd_procedure
~kwd_is: proc_decl.kwd_is
~var: proc_decl.var
~param: proc_decl.param
~body: block'
let tc_fun_decl : vte -> I.fun_decl -> O.fun_decl =
fun vte fun_decl ->
let vte', block' = tc_block vte (unreg fun_decl.body) in
let return' = tc_expr vte' fun_decl.return in
let checked_return' = check_type return' (xty fun_decl.ret_type)
in mk_fun_decl
~kwd_function: fun_decl.kwd_function
~colon: fun_decl.colon
~kwd_is: fun_decl.kwd_is
~kwd_with: fun_decl.kwd_with
~var: fun_decl.var
~param: fun_decl.param
~ret_type: (xty fun_decl.ret_type)
~body: block'
~return: checked_return'
let ve_lambda_decl : vte -> I.lambda_decl -> ve =
fun (ve,_te) ->
function
FunDecl {value;_} -> shadow value.var.value (params_to_xty value.param (xty value.ret_type)) ve
| ProcDecl {value;_} -> shadow value.var.value (params_to_xty value.param Unit) ve
let tc_lambda_decl (ve, te : vte) (whole : I.lambda_decl) : vte * O.lambda_decl =
match whole with
FunDecl {value;_} -> ((ve_lambda_decl (ve, te) whole), te), O.FunDecl (tc_fun_decl (ve, te) value)
| ProcDecl {value;_} -> ((ve_lambda_decl (ve, te) whole), te), O.ProcDecl (tc_proc_decl (ve, te) value)
let tc_ast (ast : I.ast) : O.ast =
(* te is the type environment, ve is the variable environment *)
let te =
SMap.empty
|> type_decls_to_tenv ast.types in
let ve =
SMap.empty
|> (match ast.parameter.value with (_,name,_,ty) -> shadow (unreg name) @@ xty ty)
|> shadow "storage" @@ xty (snd ast.storage.value)
|> shadow "operations" @@ xty (snd ast.operations.value)
in
let (ve',te'), lambdas = fold_map tc_lambda_decl (ve, te) ast.lambdas in
let (ve'', te''), block = tc_block (ve', te') (unreg ast.block) in
let _ve'' = ve'' in (* not needed anymore *)
let _te'' = te'' in (* not needed anymore *)
mk_ast ~lambdas ~block
*)