Merge AST_with_records into master
This commit is contained in:
commit
b5dc0c9e64
1
src/ligo/ligo-parser/.Lexer.ml.tag
Normal file
1
src/ligo/ligo-parser/.Lexer.ml.tag
Normal file
@ -0,0 +1 @@
|
||||
ocamlc: -w -42
|
0
src/ligo/ligo-parser/.LexerMain.tag
Normal file
0
src/ligo/ligo-parser/.LexerMain.tag
Normal file
1
src/ligo/ligo-parser/.Parser.mly.tag
Normal file
1
src/ligo/ligo-parser/.Parser.mly.tag
Normal file
@ -0,0 +1 @@
|
||||
--explain --external-tokens LexToken --base Parser ParToken.mly
|
0
src/ligo/ligo-parser/.ParserMain.tag
Normal file
0
src/ligo/ligo-parser/.ParserMain.tag
Normal file
7
src/ligo/ligo-parser/.gitignore
vendored
Normal file
7
src/ligo/ligo-parser/.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
_build/*
|
||||
*/_build
|
||||
*~
|
||||
.merlin
|
||||
*/.merlin
|
||||
*.install
|
||||
/Version.ml
|
21
src/ligo/ligo-parser/.gitlab-ci.yml
Normal file
21
src/ligo/ligo-parser/.gitlab-ci.yml
Normal file
@ -0,0 +1,21 @@
|
||||
before_script:
|
||||
- apt-get update -qq
|
||||
- apt-get -y -qq install libhidapi-dev libcap-dev bubblewrap
|
||||
- wget https://github.com/ocaml/opam/releases/download/2.0.1/opam-2.0.1-x86_64-linux -O opam-2.0.1-x86_64-linux
|
||||
- cp opam-2.0.1-x86_64-linux /usr/local/bin/opam
|
||||
- chmod +x /usr/local/bin/opam
|
||||
- export PATH="/usr/local/bin${PATH:+:}${PATH:-}"
|
||||
- echo "$PATH"
|
||||
- printf '' | opam init
|
||||
- eval $(opam config env)
|
||||
- opam repository add tezos-opam-repository https://gitlab.com/gabriel.alfour/tezos-opam-repository.git
|
||||
- eval $(opam config env)
|
||||
- opam --version
|
||||
- printf '' | ocaml
|
||||
|
||||
default-job:
|
||||
script:
|
||||
- opam install -y --working-dir .
|
||||
artifacts:
|
||||
paths:
|
||||
- Parser.exe
|
2
src/ligo/ligo-parser/.links
Normal file
2
src/ligo/ligo-parser/.links
Normal file
@ -0,0 +1,2 @@
|
||||
$HOME/git/OCaml-build/Makefile
|
||||
$HOME/git/OCaml-build/Makefile.cfg
|
1497
src/ligo/ligo-parser/AST.ml
Normal file
1497
src/ligo/ligo-parser/AST.ml
Normal file
File diff suppressed because it is too large
Load Diff
664
src/ligo/ligo-parser/AST.mli
Normal file
664
src/ligo/ligo-parser/AST.mli
Normal file
@ -0,0 +1,664 @@
|
||||
(* Abstract Syntax Tree (AST) for LIGO *)
|
||||
|
||||
[@@@warning "-30"]
|
||||
|
||||
open Utils
|
||||
|
||||
(* Regions
|
||||
|
||||
The AST carries all the regions where tokens have been found by the
|
||||
lexer, plus additional regions corresponding to whole subtrees
|
||||
(like entire expressions, patterns etc.). These regions are needed
|
||||
for error reporting and source-to-source transformations. To make
|
||||
these pervasive regions more legible, we define singleton types for
|
||||
the symbols, keywords etc. with suggestive names like "kwd_and"
|
||||
denoting the _region_ of the occurrence of the keyword "and".
|
||||
*)
|
||||
|
||||
type 'a reg = 'a Region.reg
|
||||
|
||||
val nseq_to_region : ('a -> Region.t) -> 'a nseq -> Region.t
|
||||
val nsepseq_to_region : ('a -> Region.t) -> ('a,'sep) nsepseq -> Region.t
|
||||
val sepseq_to_region : ('a -> Region.t) -> ('a,'sep) sepseq -> Region.t
|
||||
|
||||
(* Keywords of LIGO *)
|
||||
|
||||
type keyword = Region.t
|
||||
type kwd_and = Region.t
|
||||
type kwd_begin = Region.t
|
||||
type kwd_block = Region.t
|
||||
type kwd_case = Region.t
|
||||
type kwd_const = Region.t
|
||||
type kwd_contains = Region.t
|
||||
type kwd_down = Region.t
|
||||
type kwd_else = Region.t
|
||||
type kwd_end = Region.t
|
||||
type kwd_entrypoint = Region.t
|
||||
type kwd_fail = Region.t
|
||||
type kwd_for = Region.t
|
||||
type kwd_from = Region.t
|
||||
type kwd_function = Region.t
|
||||
type kwd_if = Region.t
|
||||
type kwd_in = Region.t
|
||||
type kwd_is = Region.t
|
||||
type kwd_list = Region.t
|
||||
type kwd_map = Region.t
|
||||
type kwd_mod = Region.t
|
||||
type kwd_nil = Region.t
|
||||
type kwd_not = Region.t
|
||||
type kwd_of = Region.t
|
||||
type kwd_or = Region.t
|
||||
type kwd_patch = Region.t
|
||||
type kwd_procedure = Region.t
|
||||
type kwd_record = Region.t
|
||||
type kwd_remove = Region.t
|
||||
type kwd_set = Region.t
|
||||
type kwd_skip = Region.t
|
||||
type kwd_step = Region.t
|
||||
type kwd_storage = Region.t
|
||||
type kwd_then = Region.t
|
||||
type kwd_to = Region.t
|
||||
type kwd_type = Region.t
|
||||
type kwd_var = Region.t
|
||||
type kwd_while = Region.t
|
||||
type kwd_with = Region.t
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
type c_False = Region.t
|
||||
type c_None = Region.t
|
||||
type c_Some = Region.t
|
||||
type c_True = Region.t
|
||||
type c_Unit = Region.t
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
type semi = Region.t (* ";" *)
|
||||
type comma = Region.t (* "," *)
|
||||
type lpar = Region.t (* "(" *)
|
||||
type rpar = Region.t (* ")" *)
|
||||
type lbrace = Region.t (* "{" *)
|
||||
type rbrace = Region.t (* "}" *)
|
||||
type lbracket = Region.t (* "[" *)
|
||||
type rbracket = Region.t (* "]" *)
|
||||
type cons = Region.t (* "#" *)
|
||||
type vbar = Region.t (* "|" *)
|
||||
type arrow = Region.t (* "->" *)
|
||||
type assign = Region.t (* ":=" *)
|
||||
type equal = Region.t (* "=" *)
|
||||
type colon = Region.t (* ":" *)
|
||||
type lt = Region.t (* "<" *)
|
||||
type leq = Region.t (* "<=" *)
|
||||
type gt = Region.t (* ">" *)
|
||||
type geq = Region.t (* ">=" *)
|
||||
type neq = Region.t (* "=/=" *)
|
||||
type plus = Region.t (* "+" *)
|
||||
type minus = Region.t (* "-" *)
|
||||
type slash = Region.t (* "/" *)
|
||||
type times = Region.t (* "*" *)
|
||||
type dot = Region.t (* "." *)
|
||||
type wild = Region.t (* "_" *)
|
||||
type cat = Region.t (* "^" *)
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
type eof = Region.t
|
||||
|
||||
(* Literals *)
|
||||
|
||||
type variable = string reg
|
||||
type fun_name = string reg
|
||||
type type_name = string reg
|
||||
type field_name = string reg
|
||||
type map_name = string reg
|
||||
type set_name = string reg
|
||||
type constr = string reg
|
||||
|
||||
(* Parentheses *)
|
||||
|
||||
type 'a par = {
|
||||
lpar : lpar;
|
||||
inside : 'a;
|
||||
rpar : rpar
|
||||
}
|
||||
|
||||
(* Brackets compounds *)
|
||||
|
||||
type 'a brackets = {
|
||||
lbracket : lbracket;
|
||||
inside : 'a;
|
||||
rbracket : rbracket
|
||||
}
|
||||
|
||||
(* Braced compounds *)
|
||||
|
||||
type 'a braces = {
|
||||
lbrace : lbrace;
|
||||
inside : 'a;
|
||||
rbrace : rbrace
|
||||
}
|
||||
|
||||
(* The Abstract Syntax Tree *)
|
||||
|
||||
type t = {
|
||||
decl : declaration nseq;
|
||||
eof : eof
|
||||
}
|
||||
|
||||
and ast = t
|
||||
|
||||
and declaration =
|
||||
TypeDecl of type_decl reg
|
||||
| ConstDecl of const_decl reg
|
||||
| LambdaDecl of lambda_decl
|
||||
|
||||
and const_decl = {
|
||||
kwd_const : kwd_const;
|
||||
name : variable;
|
||||
colon : colon;
|
||||
const_type : type_expr;
|
||||
equal : equal;
|
||||
init : expr;
|
||||
terminator : semi option
|
||||
}
|
||||
|
||||
(* Type declarations *)
|
||||
|
||||
and type_decl = {
|
||||
kwd_type : kwd_type;
|
||||
name : type_name;
|
||||
kwd_is : kwd_is;
|
||||
type_expr : type_expr;
|
||||
terminator : semi option
|
||||
}
|
||||
|
||||
and type_expr =
|
||||
TProd of cartesian
|
||||
| TSum of (variant reg, vbar) nsepseq reg
|
||||
| TRecord of record_type reg
|
||||
| TApp of (type_name * type_tuple) reg
|
||||
| TPar of type_expr par reg
|
||||
| TAlias of variable
|
||||
|
||||
and cartesian = (type_expr, times) nsepseq reg
|
||||
|
||||
and variant = {
|
||||
constr : constr;
|
||||
kwd_of : kwd_of;
|
||||
product : cartesian
|
||||
}
|
||||
|
||||
and record_type = {
|
||||
opening : kwd_record;
|
||||
field_decls : field_decls;
|
||||
terminator : semi option;
|
||||
closing : kwd_end
|
||||
}
|
||||
|
||||
and field_decls = (field_decl reg, semi) nsepseq
|
||||
|
||||
and field_decl = {
|
||||
field_name : field_name;
|
||||
colon : colon;
|
||||
field_type : type_expr
|
||||
}
|
||||
|
||||
and type_tuple = (type_expr, comma) nsepseq par reg
|
||||
|
||||
(* Function and procedure declarations *)
|
||||
|
||||
and lambda_decl =
|
||||
FunDecl of fun_decl reg
|
||||
| ProcDecl of proc_decl reg
|
||||
| EntryDecl of entry_decl reg
|
||||
|
||||
and fun_decl = {
|
||||
kwd_function : kwd_function;
|
||||
name : variable;
|
||||
param : parameters;
|
||||
colon : colon;
|
||||
ret_type : type_expr;
|
||||
kwd_is : kwd_is;
|
||||
local_decls : local_decl list;
|
||||
block : block reg;
|
||||
kwd_with : kwd_with;
|
||||
return : expr;
|
||||
terminator : semi option
|
||||
}
|
||||
|
||||
and proc_decl = {
|
||||
kwd_procedure : kwd_procedure;
|
||||
name : variable;
|
||||
param : parameters;
|
||||
kwd_is : kwd_is;
|
||||
local_decls : local_decl list;
|
||||
block : block reg;
|
||||
terminator : semi option
|
||||
}
|
||||
|
||||
and entry_decl = {
|
||||
kwd_entrypoint : kwd_entrypoint;
|
||||
name : variable;
|
||||
param : entry_params;
|
||||
colon : colon;
|
||||
ret_type : type_expr;
|
||||
kwd_is : kwd_is;
|
||||
local_decls : local_decl list;
|
||||
block : block reg;
|
||||
kwd_with : kwd_with;
|
||||
return : expr;
|
||||
terminator : semi option
|
||||
}
|
||||
|
||||
and parameters = (param_decl, semi) nsepseq par reg
|
||||
|
||||
and entry_params = (entry_param_decl, semi) nsepseq par reg
|
||||
|
||||
and entry_param_decl =
|
||||
EntryConst of param_const reg
|
||||
| EntryVar of param_var reg
|
||||
| EntryStore of storage reg
|
||||
|
||||
and storage = {
|
||||
kwd_storage : kwd_storage;
|
||||
var : variable;
|
||||
colon : colon;
|
||||
storage_type : type_expr
|
||||
}
|
||||
|
||||
and param_decl =
|
||||
ParamConst of param_const reg
|
||||
| ParamVar of param_var reg
|
||||
|
||||
and param_const = {
|
||||
kwd_const : kwd_const;
|
||||
var : variable;
|
||||
colon : colon;
|
||||
param_type : type_expr
|
||||
}
|
||||
|
||||
and param_var = {
|
||||
kwd_var : kwd_var;
|
||||
var : variable;
|
||||
colon : colon;
|
||||
param_type : type_expr
|
||||
}
|
||||
|
||||
and block = {
|
||||
opening : block_opening;
|
||||
instr : instructions;
|
||||
terminator : semi option;
|
||||
closing : block_closing
|
||||
}
|
||||
|
||||
and block_opening =
|
||||
Block of kwd_block * lbrace
|
||||
| Begin of kwd_begin
|
||||
|
||||
and block_closing =
|
||||
Block of rbrace
|
||||
| End of kwd_end
|
||||
|
||||
and local_decl =
|
||||
LocalLam of lambda_decl
|
||||
| LocalConst of const_decl reg
|
||||
| LocalVar of var_decl reg
|
||||
|
||||
and var_decl = {
|
||||
kwd_var : kwd_var;
|
||||
name : variable;
|
||||
colon : colon;
|
||||
var_type : type_expr;
|
||||
assign : assign;
|
||||
init : expr;
|
||||
terminator : semi option
|
||||
}
|
||||
|
||||
and instructions = (instruction, semi) nsepseq
|
||||
|
||||
and instruction =
|
||||
Single of single_instr
|
||||
| Block of block reg
|
||||
|
||||
and single_instr =
|
||||
Cond of conditional reg
|
||||
| Case of case_instr reg
|
||||
| Assign of assignment reg
|
||||
| Loop of loop
|
||||
| ProcCall of fun_call
|
||||
| Fail of fail_instr reg
|
||||
| Skip of kwd_skip
|
||||
| RecordPatch of record_patch reg
|
||||
| MapPatch of map_patch reg
|
||||
| SetPatch of set_patch reg
|
||||
| MapRemove of map_remove reg
|
||||
| SetRemove of set_remove reg
|
||||
|
||||
and set_remove = {
|
||||
kwd_remove : kwd_remove;
|
||||
element : expr;
|
||||
kwd_from : kwd_from;
|
||||
kwd_set : kwd_set;
|
||||
set : path
|
||||
}
|
||||
|
||||
and map_remove = {
|
||||
kwd_remove : kwd_remove;
|
||||
key : expr;
|
||||
kwd_from : kwd_from;
|
||||
kwd_map : kwd_map;
|
||||
map : path
|
||||
}
|
||||
|
||||
and set_patch = {
|
||||
kwd_patch : kwd_patch;
|
||||
path : path;
|
||||
kwd_with : kwd_with;
|
||||
set_inj : expr injection reg
|
||||
}
|
||||
|
||||
and map_patch = {
|
||||
kwd_patch : kwd_patch;
|
||||
path : path;
|
||||
kwd_with : kwd_with;
|
||||
map_inj : binding reg injection reg
|
||||
}
|
||||
|
||||
and binding = {
|
||||
source : expr;
|
||||
arrow : arrow;
|
||||
image : expr
|
||||
}
|
||||
|
||||
and record_patch = {
|
||||
kwd_patch : kwd_patch;
|
||||
path : path;
|
||||
kwd_with : kwd_with;
|
||||
record_inj : record_injection reg
|
||||
}
|
||||
|
||||
and fail_instr = {
|
||||
kwd_fail : kwd_fail;
|
||||
fail_expr : expr
|
||||
}
|
||||
|
||||
and conditional = {
|
||||
kwd_if : kwd_if;
|
||||
test : expr;
|
||||
kwd_then : kwd_then;
|
||||
ifso : if_clause;
|
||||
terminator : semi option;
|
||||
kwd_else : kwd_else;
|
||||
ifnot : if_clause
|
||||
}
|
||||
|
||||
and if_clause =
|
||||
ClauseInstr of instruction
|
||||
| ClauseBlock of (instructions * semi option) braces reg
|
||||
|
||||
and set_membership = {
|
||||
set : expr;
|
||||
kwd_contains : kwd_contains;
|
||||
element : expr
|
||||
}
|
||||
|
||||
and case_instr = {
|
||||
kwd_case : kwd_case;
|
||||
expr : expr;
|
||||
kwd_of : kwd_of;
|
||||
lead_vbar : vbar option;
|
||||
cases : cases;
|
||||
kwd_end : kwd_end
|
||||
}
|
||||
|
||||
and cases = (case reg, vbar) nsepseq reg
|
||||
|
||||
and case = {
|
||||
pattern : pattern;
|
||||
arrow : arrow;
|
||||
instr : instruction
|
||||
}
|
||||
|
||||
and assignment = {
|
||||
lhs : lhs;
|
||||
assign : assign;
|
||||
rhs : rhs;
|
||||
}
|
||||
|
||||
and lhs =
|
||||
Path of path
|
||||
| MapPath of map_lookup reg
|
||||
|
||||
and rhs =
|
||||
Expr of expr
|
||||
| NoneExpr of c_None
|
||||
|
||||
and loop =
|
||||
While of while_loop reg
|
||||
| For of for_loop
|
||||
|
||||
and while_loop = {
|
||||
kwd_while : kwd_while;
|
||||
cond : expr;
|
||||
block : block reg
|
||||
}
|
||||
|
||||
and for_loop =
|
||||
ForInt of for_int reg
|
||||
| ForCollect of for_collect reg
|
||||
|
||||
and for_int = {
|
||||
kwd_for : kwd_for;
|
||||
assign : var_assign reg;
|
||||
down : kwd_down option;
|
||||
kwd_to : kwd_to;
|
||||
bound : expr;
|
||||
step : (kwd_step * expr) option;
|
||||
block : block reg
|
||||
}
|
||||
|
||||
and var_assign = {
|
||||
name : variable;
|
||||
assign : assign;
|
||||
expr : expr
|
||||
}
|
||||
|
||||
and for_collect = {
|
||||
kwd_for : kwd_for;
|
||||
var : variable;
|
||||
bind_to : (arrow * variable) option;
|
||||
kwd_in : kwd_in;
|
||||
expr : expr;
|
||||
block : block reg
|
||||
}
|
||||
|
||||
(* Expressions *)
|
||||
|
||||
and expr =
|
||||
ELogic of logic_expr
|
||||
| EArith of arith_expr
|
||||
| EString of string_expr
|
||||
| EList of list_expr
|
||||
| ESet of set_expr
|
||||
| EConstr of constr_expr
|
||||
| ERecord of record_expr
|
||||
| EProj of projection reg
|
||||
| EMap of map_expr
|
||||
| EVar of Lexer.lexeme reg
|
||||
| ECall of fun_call
|
||||
| EBytes of (Lexer.lexeme * Hex.t) reg
|
||||
| EUnit of c_Unit
|
||||
| ETuple of tuple_expr
|
||||
| EPar of expr par reg
|
||||
|
||||
and set_expr =
|
||||
SetInj of expr injection reg
|
||||
| SetMem of set_membership reg
|
||||
|
||||
and 'a injection = {
|
||||
opening : opening;
|
||||
elements : ('a, semi) sepseq;
|
||||
terminator : semi option;
|
||||
closing : closing
|
||||
}
|
||||
|
||||
and opening =
|
||||
Kwd of keyword
|
||||
| KwdBracket of keyword * lbracket
|
||||
|
||||
and closing =
|
||||
End of kwd_end
|
||||
| RBracket of rbracket
|
||||
|
||||
and map_expr =
|
||||
MapLookUp of map_lookup reg
|
||||
| MapInj of binding reg injection reg
|
||||
|
||||
and map_lookup = {
|
||||
path : path;
|
||||
index : expr brackets reg
|
||||
}
|
||||
|
||||
and path =
|
||||
Name of variable
|
||||
| Path of projection reg
|
||||
|
||||
and logic_expr =
|
||||
BoolExpr of bool_expr
|
||||
| CompExpr of comp_expr
|
||||
|
||||
and bool_expr =
|
||||
Or of kwd_or bin_op reg
|
||||
| And of kwd_and bin_op reg
|
||||
| Not of kwd_not un_op reg
|
||||
| False of c_False
|
||||
| True of c_True
|
||||
|
||||
and 'a bin_op = {
|
||||
op : 'a;
|
||||
arg1 : expr;
|
||||
arg2 : expr
|
||||
}
|
||||
|
||||
and 'a un_op = {
|
||||
op : 'a;
|
||||
arg : expr
|
||||
}
|
||||
|
||||
and comp_expr =
|
||||
Lt of lt bin_op reg
|
||||
| Leq of leq bin_op reg
|
||||
| Gt of gt bin_op reg
|
||||
| Geq of geq bin_op reg
|
||||
| Equal of equal bin_op reg
|
||||
| Neq of neq bin_op reg
|
||||
|
||||
and arith_expr =
|
||||
Add of plus bin_op reg
|
||||
| Sub of minus bin_op reg
|
||||
| Mult of times bin_op reg
|
||||
| Div of slash bin_op reg
|
||||
| Mod of kwd_mod bin_op reg
|
||||
| Neg of minus un_op reg
|
||||
| Int of (Lexer.lexeme * Z.t) reg
|
||||
|
||||
and string_expr =
|
||||
Cat of cat bin_op reg
|
||||
| String of Lexer.lexeme reg
|
||||
|
||||
and list_expr =
|
||||
Cons of cons bin_op reg
|
||||
| List of expr injection reg
|
||||
| Nil of nil par reg
|
||||
|
||||
and nil = {
|
||||
nil : kwd_nil;
|
||||
colon : colon;
|
||||
list_type : type_expr
|
||||
}
|
||||
|
||||
and constr_expr =
|
||||
SomeApp of (c_Some * arguments) reg
|
||||
| NoneExpr of none_expr reg
|
||||
| ConstrApp of (constr * arguments) reg
|
||||
|
||||
and record_expr =
|
||||
RecordInj of record_injection reg
|
||||
|
||||
and record_injection = {
|
||||
opening : kwd_record;
|
||||
fields : (field_assign reg, semi) nsepseq;
|
||||
terminator : semi option;
|
||||
closing : kwd_end
|
||||
}
|
||||
|
||||
and field_assign = {
|
||||
field_name : field_name;
|
||||
equal : equal;
|
||||
field_expr : expr
|
||||
}
|
||||
|
||||
and projection = {
|
||||
record_name : variable;
|
||||
selector : dot;
|
||||
field_path : (selection, dot) nsepseq
|
||||
}
|
||||
|
||||
and selection =
|
||||
FieldName of field_name
|
||||
| Component of (Lexer.lexeme * Z.t) reg
|
||||
|
||||
and tuple_expr =
|
||||
TupleInj of tuple_injection
|
||||
|
||||
and tuple_injection = (expr, comma) nsepseq par reg
|
||||
|
||||
and none_expr = typed_none_expr par
|
||||
|
||||
and typed_none_expr = {
|
||||
c_None : c_None;
|
||||
colon : colon;
|
||||
opt_type : type_expr
|
||||
}
|
||||
|
||||
and fun_call = (fun_name * arguments) reg
|
||||
|
||||
and arguments = tuple_injection
|
||||
|
||||
(* Patterns *)
|
||||
|
||||
and pattern =
|
||||
PCons of (pattern, cons) nsepseq reg
|
||||
| PVar of Lexer.lexeme reg
|
||||
| PWild of wild
|
||||
| PInt of (Lexer.lexeme * Z.t) reg
|
||||
| PBytes of (Lexer.lexeme * Hex.t) reg
|
||||
| PString of Lexer.lexeme reg
|
||||
| PUnit of c_Unit
|
||||
| PFalse of c_False
|
||||
| PTrue of c_True
|
||||
| PNone of c_None
|
||||
| PSome of (c_Some * pattern par reg) reg
|
||||
| PList of list_pattern
|
||||
| PTuple of (pattern, comma) nsepseq par reg
|
||||
|
||||
and list_pattern =
|
||||
Sugar of (pattern, semi) sepseq brackets reg
|
||||
| Raw of (pattern * cons * pattern) par reg
|
||||
|
||||
(* Projecting regions *)
|
||||
|
||||
val type_expr_to_region : type_expr -> Region.t
|
||||
val expr_to_region : expr -> Region.t
|
||||
val instr_to_region : instruction -> Region.t
|
||||
val pattern_to_region : pattern -> Region.t
|
||||
val local_decl_to_region : local_decl -> Region.t
|
||||
val path_to_region : path -> Region.t
|
||||
val lhs_to_region : lhs -> Region.t
|
||||
val rhs_to_region : rhs -> Region.t
|
||||
val if_clause_to_region : if_clause -> Region.t
|
||||
val selection_to_region : selection -> Region.t
|
||||
|
||||
(* Printing *)
|
||||
|
||||
val print_tokens : t -> unit
|
795
src/ligo/ligo-parser/AST2.ml
Normal file
795
src/ligo/ligo-parser/AST2.ml
Normal file
@ -0,0 +1,795 @@
|
||||
[@@@warning "-30"]
|
||||
|
||||
module I = AST
|
||||
|
||||
open Region
|
||||
|
||||
module SMap = Map.Make(String)
|
||||
|
||||
module O = struct
|
||||
type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *)
|
||||
|
||||
type name_and_region = {name: string; orig: Region.t}
|
||||
type type_name = name_and_region
|
||||
type var_name = name_and_region
|
||||
type field_name = name_and_region
|
||||
|
||||
type pattern =
|
||||
PVar of var_name
|
||||
| PWild
|
||||
| PInt of Z.t
|
||||
| PBytes of MBytes.t
|
||||
| PString of string
|
||||
| PUnit
|
||||
| PFalse
|
||||
| PTrue
|
||||
| PNone
|
||||
| PSome of pattern
|
||||
| PCons of pattern * pattern
|
||||
| PNull
|
||||
| PRecord of (field_name * pattern) SMap.t
|
||||
|
||||
type type_constructor =
|
||||
Option
|
||||
| List
|
||||
| Set
|
||||
| Map
|
||||
|
||||
type type_expr_case =
|
||||
Sum of (type_name * type_expr) SMap.t
|
||||
| Record of (field_name * type_expr) SMap.t
|
||||
| TypeApp of type_constructor * (type_expr list)
|
||||
| Function of { arg: type_expr; ret: type_expr }
|
||||
| Ref of type_expr
|
||||
| String
|
||||
| Bytes
|
||||
| Int
|
||||
| Unit
|
||||
| Bool
|
||||
|
||||
and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t }
|
||||
|
||||
type typed_var = { name:var_name; ty:type_expr; orig: asttodo }
|
||||
|
||||
type type_decl = { name:type_name; ty:type_expr; orig: asttodo }
|
||||
|
||||
type expr =
|
||||
App of { operator: operator; arguments: expr list }
|
||||
| Var of var_name
|
||||
| Constant of constant
|
||||
| Record of (field_name * expr) list
|
||||
| Lambda of lambda
|
||||
|
||||
and decl = { name:var_name; ty:type_expr; value: expr }
|
||||
|
||||
and lambda = {
|
||||
parameter: typed_var;
|
||||
declarations: decl list;
|
||||
instructions: instr list;
|
||||
result: expr;
|
||||
}
|
||||
|
||||
and operator =
|
||||
Function of var_name
|
||||
| Constructor of var_name
|
||||
| UpdateField of field_name
|
||||
| GetField of field_name
|
||||
| Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod
|
||||
| Neg | Not
|
||||
| Set | List
|
||||
| MapLookup
|
||||
|
||||
and constant =
|
||||
Unit
|
||||
| Int of Z.t | String of string | Bytes of MBytes.t
|
||||
| False | True
|
||||
| Null of type_expr
|
||||
| EmptySet of type_expr
|
||||
| CNone of type_expr
|
||||
|
||||
and instr =
|
||||
Assignment of { name: var_name; value: expr; orig: asttodo }
|
||||
| While of { condition: expr; body: instr list; orig: asttodo }
|
||||
| ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo }
|
||||
| Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo }
|
||||
| ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *)
|
||||
| Fail of { expr: expr; orig: asttodo }
|
||||
|
||||
type ast = {
|
||||
types : type_decl list;
|
||||
storage_decl : typed_var;
|
||||
declarations : decl list;
|
||||
orig : AST.t
|
||||
}
|
||||
end
|
||||
|
||||
(* open Sanity: *)
|
||||
let (|>) v f = f v (* pipe f to v *)
|
||||
let (@@) f v = f v (* apply f on v *)
|
||||
let (@.) f g x = f (g x) (* compose *)
|
||||
let map f l = List.rev (List.rev_map f l)
|
||||
let mapi f l =
|
||||
let f (i, l) elem =
|
||||
(i + 1, (f i elem) :: l)
|
||||
in snd (List.fold_left f (0,[]) l)
|
||||
(* TODO: check that List.append is not broken
|
||||
(i.e. check that it is tail-recursive) *)
|
||||
let append_map f l = map f l |> List.flatten
|
||||
let append l1 l2 = List.append l1 l2
|
||||
let list_to_map l = List.fold_left (fun m (k,v) -> SMap.add k v m) SMap.empty l
|
||||
let fold_map f a l =
|
||||
let f (acc, l) elem =
|
||||
let acc', elem' = f acc elem
|
||||
in acc', (elem' :: l) in
|
||||
let last_acc, last_l = List.fold_left f (a, []) l
|
||||
in last_acc, List.rev last_l
|
||||
|
||||
(* Simplify the AST *)
|
||||
|
||||
let name_and_region_of_int i = O.{name = string_of_int i; orig = Region.ghost}
|
||||
|
||||
let s_nsepseq : ('a,'sep) Utils.nsepseq -> 'a list =
|
||||
fun (first, rest) -> first :: (map snd rest)
|
||||
|
||||
let s_sepseq : ('a,'sep) Utils.sepseq -> 'a list =
|
||||
function
|
||||
None -> []
|
||||
| Some nsepseq -> s_nsepseq nsepseq
|
||||
|
||||
let s_name {value=name; region} : O.var_name =
|
||||
let () = ignore (region) in
|
||||
{name;orig = region}
|
||||
|
||||
let name_to_string {value=name; region} : string =
|
||||
let () = ignore (region) in
|
||||
name
|
||||
|
||||
let type_expr (orig : Region.t) (e : O.type_expr_case) : O.type_expr =
|
||||
{ type_expr = e; name = None; orig }
|
||||
|
||||
let s_type_constructor {value=name;region} : O.type_constructor =
|
||||
let () = ignore (region) in
|
||||
match name with
|
||||
"Option" -> Option
|
||||
| "List" -> List
|
||||
| "Map" -> Map
|
||||
| "Set" -> Set
|
||||
(* TODO: escape the name, prevent any \x1b and other weird characters from appearing in the output *)
|
||||
| _ -> failwith ("Unknown type constructor: " ^ name)
|
||||
|
||||
let named_list_to_map (l : (O.name_and_region * 'a) list) : (O.name_and_region * 'a) SMap.t =
|
||||
List.fold_left
|
||||
(fun m ((x,_) as p) ->
|
||||
let {name;_} : O.name_and_region = x in
|
||||
SMap.add name p m)
|
||||
SMap.empty
|
||||
l
|
||||
|
||||
let rec s_cartesian {value=sequence; region} : O.type_expr =
|
||||
let () = ignore (region) in
|
||||
s_nsepseq sequence
|
||||
|>map s_type_expr
|
||||
|> mapi (fun i p -> name_and_region_of_int i, p)
|
||||
|> named_list_to_map
|
||||
|> (fun x -> (Record x : O.type_expr_case))
|
||||
|> type_expr region
|
||||
|
||||
and s_sum_type {value=sequence; region} : O.type_expr =
|
||||
let () = ignore (region) in
|
||||
type_expr region (Sum (map s_variant (s_nsepseq sequence) |> named_list_to_map))
|
||||
|
||||
and s_variant {value=(constr, kwd_of, cartesian); region} =
|
||||
let () = ignore (kwd_of,region) in
|
||||
(s_name constr, s_cartesian cartesian)
|
||||
|
||||
and s_record_type {value=(kwd_record, field_decls, kwd_end); region} : O.type_expr =
|
||||
let () = ignore (kwd_record,region,kwd_end) in
|
||||
type_expr region (Record (map s_field_decl (s_nsepseq field_decls) |> named_list_to_map) : O.type_expr_case)
|
||||
|
||||
and s_field_decl {value=(var, colon, type_expr); region} : O.type_name * O.type_expr =
|
||||
let () = ignore (colon,region) in
|
||||
((s_name var), (s_type_expr type_expr))
|
||||
|
||||
and s_type_app {value=(type_name,type_tuple); region} : O.type_expr =
|
||||
let () = ignore (region) in
|
||||
type_expr region (TypeApp (s_type_constructor type_name, s_type_tuple type_tuple))
|
||||
|
||||
and s_type_tuple ({value=(lpar, sequence, rpar); region} : (I.type_name, I.comma) Utils.nsepseq I.par) : O.type_expr list =
|
||||
let () = ignore (lpar,rpar,region) in
|
||||
(* TODO: the grammar should allow any type expr, not just type_name in the tuple elements *)
|
||||
map s_type_expr (map (fun a -> I.TAlias a) (s_nsepseq sequence))
|
||||
|
||||
and s_par_type {value=(lpar, type_expr, rpar); region} : O.type_expr =
|
||||
let () = ignore (lpar,rpar,region) in
|
||||
s_type_expr type_expr
|
||||
|
||||
and s_type_alias name : O.type_expr =
|
||||
let () = ignore () in
|
||||
type_expr name.region (TypeApp (s_type_constructor name, []))
|
||||
|
||||
and s_type_expr (orig : I.type_expr) : O.type_expr = match orig with
|
||||
Prod cartesian -> s_cartesian cartesian
|
||||
| Sum sum_type -> s_sum_type sum_type
|
||||
| Record record_type -> s_record_type record_type
|
||||
| TypeApp type_app -> s_type_app type_app
|
||||
| ParType par_type -> s_par_type par_type
|
||||
| TAlias type_alias -> s_type_alias type_alias
|
||||
|
||||
|
||||
let s_type_decl I.{value={kwd_type;name;kwd_is;type_expr;terminator}; region} : O.type_decl =
|
||||
let () = ignore (kwd_type,kwd_is,terminator,region) in
|
||||
let ty = s_type_expr type_expr in
|
||||
O.{ name = s_name name; ty = { ty with name = Some (s_name name) }; orig = `TODO }
|
||||
|
||||
let s_storage_decl I.{value={kwd_storage; name; colon; store_type; terminator}; region} : O.typed_var =
|
||||
let () = ignore (kwd_storage,colon,terminator,region) in
|
||||
O.{ name = s_name name; ty = s_type_expr store_type; orig = `TODO }
|
||||
|
||||
let s_operations_decl I.{value={kwd_operations;name;colon;op_type;terminator}; region} : O.typed_var =
|
||||
let () = ignore (kwd_operations,colon,terminator,region) in
|
||||
O.{ name = s_name name; ty = s_type_expr op_type; orig = `TODO }
|
||||
|
||||
let s_empty_list {value=(l, (lbracket, rbracket, colon, type_expr), r); region} : O.expr =
|
||||
let () = ignore (l, lbracket, rbracket, colon, r, region) in
|
||||
Constant (Null (s_type_expr type_expr))
|
||||
|
||||
let s_empty_set {value=(l, (lbrace, rbrace, colon, type_expr), r); region} : O.expr =
|
||||
let () = ignore (l, lbrace, rbrace, colon, r, region) in
|
||||
Constant (EmptySet (s_type_expr type_expr))
|
||||
|
||||
let s_none {value=(l, (c_None, colon, type_expr), r); region} : O.expr =
|
||||
let () = ignore (l, c_None, colon, r, region) in
|
||||
Constant (CNone (s_type_expr type_expr))
|
||||
|
||||
let parameters_to_tuple (parameters : (string * O.type_expr) list) : O.type_expr =
|
||||
(* TODO: use records with named fields to have named arguments. *)
|
||||
let parameter_tuple : O.type_expr_case =
|
||||
Record (mapi (fun i (_name,ty) -> name_and_region_of_int i, ty) parameters |> named_list_to_map) in
|
||||
O.{ type_expr = parameter_tuple; name = None; orig = Region.ghost }
|
||||
|
||||
and parameters_to_decls singleparam (parameters : (string * O.type_expr) list) : O.decl list =
|
||||
let f i (name,ty) =
|
||||
O.{ name = {name; orig=Region.ghost};
|
||||
ty = ty;
|
||||
value = App { operator = O.GetField (name_and_region_of_int i);
|
||||
arguments = [Var singleparam] } }
|
||||
in mapi f parameters
|
||||
|
||||
let rec bin l operator r = O.App { operator; arguments = [s_expr l; s_expr r] }
|
||||
and una operator v = O.App { operator; arguments = [s_expr v] }
|
||||
and s_expr : I.expr -> O.expr =
|
||||
function
|
||||
Or {value=(l, bool_or, r); region} -> let () = ignore (region, bool_or) in bin l Or r
|
||||
| And {value=(l, bool_and, r); region} -> let () = ignore (region,bool_and) in bin l And r
|
||||
| Lt {value=(l, lt, r); region} -> let () = ignore (region, lt) in bin l Lt r
|
||||
| Leq {value=(l, leq, r); region} -> let () = ignore (region, leq) in bin l Leq r
|
||||
| Gt {value=(l, gt, r); region} -> let () = ignore (region, gt) in bin l Gt r
|
||||
| Geq {value=(l, geq, r); region} -> let () = ignore (region, geq) in bin l Geq r
|
||||
| Equal {value=(l, equal, r); region} -> let () = ignore (region, equal) in bin l Equal r
|
||||
| Neq {value=(l, neq, r); region} -> let () = ignore (region, neq) in bin l Neq r
|
||||
| Cat {value=(l, cat, r); region} -> let () = ignore (region, cat) in bin l Cat r
|
||||
| Cons {value=(l, cons, r); region} -> let () = ignore (region, cons) in bin l Cons r
|
||||
| Add {value=(l, plus, r); region} -> let () = ignore (region, plus) in bin l Add r
|
||||
| Sub {value=(l, minus, r); region} -> let () = ignore (region, minus) in bin l Sub r
|
||||
| Mult {value=(l, times, r); region} -> let () = ignore (region, times) in bin l Mult r
|
||||
| Div {value=(l, slash, r); region} -> let () = ignore (region, slash) in bin l Div r
|
||||
| Mod {value=(l, kwd_mod, r); region} -> let () = ignore (region, kwd_mod) in bin l Mod r
|
||||
| Neg {value=(minus, expr); region} -> let () = ignore (region, minus) in una Neg expr
|
||||
| Not {value=(kwd_not, expr); region} -> let () = ignore (region, kwd_not) in una Not expr
|
||||
| Int {value=(lexeme, z); region} -> let () = ignore (region, lexeme) in Constant (Int z)
|
||||
| Var lexeme -> Var (s_name lexeme)
|
||||
| String {value=s; region} -> let () = ignore (region) in Constant (String s)
|
||||
| Bytes {value=(lexeme, mbytes); region} -> let () = ignore (region, lexeme) in Constant (Bytes mbytes)
|
||||
| False c_False -> let () = ignore (c_False) in Constant (False)
|
||||
| True c_True -> let () = ignore (c_True) in Constant (True)
|
||||
| Unit c_Unit -> let () = ignore (c_Unit) in Constant (Unit)
|
||||
| Tuple {value=(l,tuple,r); region} -> let () = ignore (l,r,region) in s_tuple_expr (tuple |> s_nsepseq |> map s_expr)
|
||||
| List list -> s_list list
|
||||
| EmptyList empty_list -> s_empty_list empty_list
|
||||
| Set set -> s_set set
|
||||
| EmptySet empty_set -> s_empty_set empty_set
|
||||
| NoneExpr none_expr -> s_none none_expr
|
||||
| FunCall fun_call -> s_fun_call fun_call
|
||||
| ConstrApp constr_app -> s_constr_app constr_app
|
||||
| SomeApp some_app -> s_some_app some_app
|
||||
| MapLookUp map_lookup -> s_map_lookup map_lookup
|
||||
| ParExpr {value=(lpar,expr,rpar); region} -> let () = ignore (lpar,rpar,region) in s_expr expr
|
||||
|
||||
and s_tuple_expr tuple : O.expr =
|
||||
Record (mapi (fun i e -> name_and_region_of_int i, e) tuple)
|
||||
|
||||
and s_map_lookup I.{value = {map_name; selector; index}; region} : O.expr =
|
||||
let {value = lbracket, index_expr, rbracket; region=region2} = index in
|
||||
let () = ignore (selector, lbracket, rbracket, region2, region) in
|
||||
App { operator = MapLookup; arguments = [Var (s_name map_name); s_expr index_expr] }
|
||||
|
||||
and s_some_app {value=(c_Some, {value=(l,arguments,r); region=region2}); region} : O.expr =
|
||||
let () = ignore (c_Some,l,r,region2,region) in
|
||||
match s_nsepseq arguments with
|
||||
[] -> failwith "tuple cannot be empty"
|
||||
| [a] -> s_expr a
|
||||
| l -> s_tuple_expr (map s_expr l)
|
||||
|
||||
and s_list {value=(l, list, r); region} : O.expr =
|
||||
let () = ignore (l, r, region) in
|
||||
App { operator = List; arguments = map s_expr (s_nsepseq list) }
|
||||
|
||||
and s_set {value=(l, set, r); region} : O.expr =
|
||||
let () = ignore (l, r, region) in
|
||||
App { operator = Set; arguments = map s_expr (s_nsepseq set) }
|
||||
|
||||
and s_pattern {value=sequence; region} : O.pattern =
|
||||
let () = ignore (region) in
|
||||
s_pattern_conses (s_nsepseq sequence)
|
||||
|
||||
and s_pattern_conses : I.core_pattern list -> O.pattern = function
|
||||
[] -> assert false
|
||||
| [p] -> s_core_pattern p
|
||||
| hd :: tl -> PCons (s_core_pattern hd, s_pattern_conses tl)
|
||||
|
||||
and s_case ({value=(pattern, arrow, instruction); region} : I.case) : O.pattern * O.instr list =
|
||||
let () = ignore (arrow,region) in
|
||||
s_pattern pattern, s_instruction instruction
|
||||
|
||||
and s_core_pattern : I.core_pattern -> O.pattern = function
|
||||
PVar var -> PVar (s_name var)
|
||||
| PWild wild -> let () = ignore (wild) in PWild
|
||||
| PInt {value=(si,i);region} -> let () = ignore (si,region) in PInt i
|
||||
| PBytes {value=(sb,b);region} -> let () = ignore (sb,region) in PBytes b
|
||||
| PString {value=s;region} -> let () = ignore (region) in PString s
|
||||
| PUnit region -> let () = ignore (region) in PUnit
|
||||
| PFalse region -> let () = ignore (region) in PFalse
|
||||
| PTrue region -> let () = ignore (region) in PTrue
|
||||
| PNone region -> let () = ignore (region) in PNone
|
||||
| PSome psome -> s_psome psome
|
||||
| PList pattern -> s_list_pattern pattern
|
||||
| PTuple ptuple -> s_ptuple ptuple
|
||||
|
||||
and s_list_pattern = function
|
||||
Sugar sugar -> s_sugar sugar
|
||||
| Raw raw -> s_raw raw
|
||||
|
||||
and s_sugar {value=(lbracket, sequence, rbracket); region} : O.pattern =
|
||||
let () = ignore (lbracket, rbracket, region) in
|
||||
List.fold_left (fun acc p -> O.PCons (s_core_pattern p, acc))
|
||||
O.PNull
|
||||
(s_sepseq sequence);
|
||||
|
||||
and s_raw {value=(lpar, (core_pattern, cons, pattern), rpar); region} =
|
||||
let () = ignore (lpar, cons, rpar, region) in
|
||||
O.PCons (s_core_pattern core_pattern, s_pattern pattern)
|
||||
|
||||
and s_ptuple {value=(lpar, sequence, rpar); region} =
|
||||
let () = ignore (lpar, rpar, region) in
|
||||
s_nsepseq sequence
|
||||
|> map s_core_pattern
|
||||
|> mapi (fun i p -> name_and_region_of_int i, p)
|
||||
|> fun x -> O.PRecord (x |> named_list_to_map)
|
||||
|
||||
and s_psome {value=(c_Some,{value=(l,psome,r);region=region2});region} : O.pattern =
|
||||
let () = ignore (c_Some,l,r,region2,region) in
|
||||
PSome (s_core_pattern psome)
|
||||
|
||||
and s_const_decl I.{value={kwd_const;name;colon;const_type;equal;init;terminator}; region} : O.decl =
|
||||
let () = ignore (kwd_const,colon,equal,terminator,region) in
|
||||
O.{ name = s_name name; ty = s_type_expr const_type; value = s_expr init }
|
||||
|
||||
and s_param_const {value=(kwd_const,variable,colon,type_expr); region} : string * O.type_expr =
|
||||
let () = ignore (kwd_const,colon,region) in
|
||||
name_to_string variable, s_type_expr type_expr
|
||||
|
||||
and s_param_var {value=(kwd_var,variable,colon,type_expr); region} : string * O.type_expr =
|
||||
let () = ignore (kwd_var,colon,region) in
|
||||
name_to_string variable, s_type_expr type_expr
|
||||
|
||||
and s_param_decl : I.param_decl -> string * O.type_expr = function
|
||||
ParamConst p -> s_param_const p
|
||||
| ParamVar p -> s_param_var p
|
||||
|
||||
and s_parameters ({value=(lpar,param_decl,rpar);region} : I.parameters) : (string * O.type_expr) list =
|
||||
let () = ignore (lpar,rpar,region) in
|
||||
let l = (s_nsepseq param_decl) in
|
||||
map s_param_decl l
|
||||
|
||||
and s_var_decl I.{value={kwd_var;name;colon;var_type;ass;init;terminator}; region} : O.decl =
|
||||
let () = ignore (kwd_var,colon,ass,terminator,region) in
|
||||
O.{
|
||||
name = s_name name;
|
||||
ty = s_type_expr var_type;
|
||||
value = s_expr init
|
||||
}
|
||||
|
||||
and s_local_decl : I.local_decl -> O.decl = function
|
||||
LocalLam decl -> s_lambda_decl decl
|
||||
| LocalConst decl -> s_const_decl decl
|
||||
| LocalVar decl -> s_var_decl decl
|
||||
|
||||
and s_instructions ({value=sequence; region} : I.instructions) : O.instr list =
|
||||
let () = ignore (region) in
|
||||
append_map s_instruction (s_nsepseq sequence)
|
||||
|
||||
and s_instruction : I.instruction -> O.instr list = function
|
||||
Single instr -> s_single_instr instr
|
||||
| Block block -> (s_block block)
|
||||
|
||||
and s_conditional I.{kwd_if;test;kwd_then;ifso;kwd_else;ifnot} : O.instr =
|
||||
let () = ignore (kwd_if,kwd_then,kwd_else) in
|
||||
let test = s_expr test in
|
||||
let ifso = O.PTrue, s_instruction ifso in
|
||||
let ifnot = O.PFalse, s_instruction ifnot in
|
||||
Match {
|
||||
expr = test;
|
||||
cases = [ifso; ifnot];
|
||||
orig = `TODO
|
||||
}
|
||||
|
||||
and s_match_instr I.{kwd_match;expr;kwd_with;lead_vbar;cases;kwd_end} : O.instr =
|
||||
let {value=cases;region} = cases in
|
||||
let () = ignore (kwd_match,kwd_with,lead_vbar,kwd_end,region) in
|
||||
Match { expr = s_expr expr; cases = map s_case (s_nsepseq cases); orig = `TODO }
|
||||
|
||||
and s_ass_instr {value=(variable,ass,expr); region} : O.instr =
|
||||
let () = ignore (ass,region) in
|
||||
Assignment { name = s_name variable; value = s_expr expr; orig = `TODO }
|
||||
|
||||
and s_while_loop {value=(kwd_while, expr, block); region} : O.instr list =
|
||||
let () = ignore (kwd_while,region) in
|
||||
[While {condition = s_expr expr; body = s_block block; orig = `TODO}]
|
||||
|
||||
and s_for_loop : I.for_loop -> O.instr list = function
|
||||
ForInt for_int -> s_for_int for_int
|
||||
| ForCollect for_collect -> s_for_collect for_collect
|
||||
|
||||
and s_for_int ({value={kwd_for;ass;down;kwd_to;bound;step;block}; region} : I.for_int reg) : O.instr list =
|
||||
let {value=(variable,ass_kwd,expr);region = ass_region} = ass in
|
||||
let () = ignore (kwd_for,ass_region,ass_kwd,kwd_to,region) in
|
||||
let name = s_name variable in
|
||||
let condition, operator = match down with Some kwd_down -> ignore kwd_down; O.Gt, O.Sub
|
||||
| None -> O.Lt, O.Add in
|
||||
let step = s_step step
|
||||
in [
|
||||
Assignment { name; value = s_expr expr; orig = `TODO };
|
||||
(* TODO: lift the declaration of the variable, to avoid creating a nested scope here. *)
|
||||
While {
|
||||
condition = App { operator = condition;
|
||||
arguments = [Var name; s_expr bound]};
|
||||
body = append (s_block block)
|
||||
[O.Assignment { name;
|
||||
value = App { operator;
|
||||
arguments = [Var name; step]};
|
||||
orig = `TODO }];
|
||||
orig = `TODO
|
||||
}
|
||||
]
|
||||
|
||||
and s_for_collect ({value={kwd_for;var;bind_to;kwd_in;expr;block}; _} : I.for_collect reg) : O.instr list =
|
||||
let () = ignore (kwd_for,kwd_in) in
|
||||
let for_instr =
|
||||
match s_bind_to bind_to with
|
||||
Some _ ->
|
||||
failwith "TODO: For on maps is not supported yet!"
|
||||
| None ->
|
||||
O.ForCollection {
|
||||
list = s_expr expr;
|
||||
var = s_name var;
|
||||
body = s_block block;
|
||||
orig = `TODO
|
||||
}
|
||||
in [for_instr]
|
||||
|
||||
and s_step : (I.kwd_step * I.expr) option -> O.expr = function
|
||||
Some (kwd_step, expr) -> let () = ignore (kwd_step) in s_expr expr
|
||||
| None -> Constant (Int (Z.of_int 1))
|
||||
|
||||
and s_bind_to : (I.arrow * I.variable) option -> O.var_name option = function
|
||||
Some (arrow, variable) -> let () = ignore (arrow) in Some (s_name variable)
|
||||
| None -> None
|
||||
|
||||
and s_loop : I.loop -> O.instr list = function
|
||||
While while_loop -> s_while_loop while_loop
|
||||
| For for_loop -> s_for_loop for_loop
|
||||
|
||||
and s_fun_call {value=(fun_name, arguments); region} : O.expr =
|
||||
let () = ignore (region) in
|
||||
let {value=fun_name_string;_} = fun_name in
|
||||
let firstchar = String.sub fun_name_string 0 1 in
|
||||
(* If it starts with a capital letter, then it is a constructor *)
|
||||
if String.equal firstchar (String.uppercase_ascii firstchar) then
|
||||
App { operator = Constructor (s_name fun_name); arguments = s_arguments arguments }
|
||||
else
|
||||
App { operator = Function (s_name fun_name); arguments = s_arguments arguments }
|
||||
|
||||
and s_constr_app {value=(constr, arguments); region} : O.expr =
|
||||
let () = ignore (region) in
|
||||
App { operator = Function (s_name constr); arguments = s_arguments arguments }
|
||||
|
||||
and s_arguments {value=(lpar, sequence, rpar); region} : O.expr list =
|
||||
(* TODO: should return a tuple *)
|
||||
let () = ignore (lpar,rpar,region) in
|
||||
match map s_expr (s_nsepseq sequence) with
|
||||
[] -> [Constant Unit]
|
||||
| [single_argument] -> [single_argument]
|
||||
| args -> [s_tuple_expr args] ;
|
||||
|
||||
and s_fail ((kwd_fail, expr) : (I.kwd_fail * I.expr)) : O.instr =
|
||||
let () = ignore (kwd_fail) in
|
||||
Fail { expr = s_expr expr; orig = `TODO }
|
||||
|
||||
|
||||
|
||||
|
||||
and s_single_instr : I.single_instr -> O.instr list = function
|
||||
Cond {value; _} -> [s_conditional value]
|
||||
| Match {value; _} -> [s_match_instr value]
|
||||
| Ass instr -> [s_ass_instr instr]
|
||||
| Loop loop -> s_loop loop
|
||||
| ProcCall fun_call -> [ProcedureCall { expr = s_fun_call fun_call; orig = `TODO }]
|
||||
| Null kwd_null -> let () = ignore (kwd_null) in
|
||||
[]
|
||||
| Fail {value; _} -> [s_fail value]
|
||||
|
||||
and s_block I.{value={opening;instr;terminator;close}; _} : O.instr list =
|
||||
let () = ignore (opening,terminator,close) in
|
||||
s_instructions instr
|
||||
|
||||
and gensym =
|
||||
let i = ref 0 in
|
||||
fun ty ->
|
||||
i := !i + 1;
|
||||
(* TODO: Region.ghost *)
|
||||
({name = {name=(string_of_int !i) ^ "gensym"; orig = Region.ghost}; ty; orig = `TODO} : O.typed_var)
|
||||
|
||||
and s_fun_decl I.{value={kwd_function;name;param;colon;ret_type;kwd_is;local_decls;block;kwd_with;return;terminator}; region} : O.decl =
|
||||
let () = ignore (kwd_function,colon,kwd_is,kwd_with,terminator,region) in
|
||||
let tuple_type = s_parameters param |> parameters_to_tuple in
|
||||
let single_argument = gensym tuple_type in
|
||||
let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in
|
||||
O.{
|
||||
name = s_name name;
|
||||
ty = type_expr region (Function { arg = tuple_type;
|
||||
ret = s_type_expr ret_type });
|
||||
value = Lambda {
|
||||
parameter = single_argument;
|
||||
declarations = append
|
||||
(s_parameters param |> parameters_to_decls single_argument_xxx)
|
||||
(map s_local_decl local_decls);
|
||||
instructions = s_block block;
|
||||
result = s_expr return
|
||||
}
|
||||
}
|
||||
|
||||
and s_proc_decl I.{value={kwd_procedure;name;param;kwd_is;local_decls;block;terminator}; region} =
|
||||
let () = ignore (kwd_procedure,kwd_is,terminator,region) in
|
||||
let tuple_type = s_parameters param |> parameters_to_tuple in
|
||||
let single_argument = gensym tuple_type in
|
||||
let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in
|
||||
O.{
|
||||
name = s_name name;
|
||||
ty = type_expr region (Function { arg = tuple_type;
|
||||
ret = type_expr region Unit });
|
||||
value = Lambda {
|
||||
parameter = single_argument;
|
||||
declarations = append
|
||||
(s_parameters param |> parameters_to_decls single_argument_xxx)
|
||||
(map s_local_decl local_decls);
|
||||
instructions = s_block block;
|
||||
result = O.Constant O.Unit
|
||||
}
|
||||
}
|
||||
|
||||
and s_entry_decl I.{value={kwd_entrypoint;name;param;kwd_is;local_decls;block;terminator}; region} =
|
||||
let () = ignore (kwd_entrypoint,kwd_is,terminator,region) in
|
||||
let tuple_type = s_parameters param |> parameters_to_tuple in
|
||||
let single_argument = gensym tuple_type in
|
||||
let ({name = single_argument_xxx; ty = _; orig = `TODO} : O.typed_var) = single_argument in
|
||||
O.{
|
||||
name = s_name name;
|
||||
ty = type_expr region (Function { arg = tuple_type;
|
||||
ret = type_expr region Unit });
|
||||
value = Lambda {
|
||||
parameter = single_argument;
|
||||
declarations = append
|
||||
(s_parameters param |> parameters_to_decls single_argument_xxx)
|
||||
(map s_local_decl local_decls);
|
||||
instructions = s_block block;
|
||||
result = O.Constant O.Unit
|
||||
}
|
||||
}
|
||||
|
||||
and s_lambda_decl : I.lambda_decl -> O.decl = function
|
||||
FunDecl fun_decl -> s_fun_decl fun_decl
|
||||
| EntryDecl entry_decl -> s_entry_decl entry_decl
|
||||
| ProcDecl proc_decl -> s_proc_decl proc_decl
|
||||
|
||||
type tmp_ast = {
|
||||
types : O.type_decl list;
|
||||
storage_decl : O.typed_var option;
|
||||
operations_decl : O.typed_var option;
|
||||
declarations : O.decl list;
|
||||
}
|
||||
|
||||
|
||||
let s_declaration (ast : tmp_ast) : I.declaration -> tmp_ast = function
|
||||
TypeDecl t -> { ast with types = (s_type_decl t) :: ast.types }
|
||||
| ConstDecl c -> { ast with declarations = (s_const_decl c) :: ast.declarations }
|
||||
| StorageDecl s -> { ast with storage_decl = Some (s_storage_decl s) }
|
||||
| OpDecl o -> { ast with operations_decl = Some (s_operations_decl o) }
|
||||
| LambdaDecl l -> { ast with declarations = (s_lambda_decl l) :: ast.declarations }
|
||||
|
||||
let s_ast (ast : I.ast) : O.ast =
|
||||
let I.{decl=(decl1,decls);eof} = ast in
|
||||
let () = ignore (eof) in
|
||||
let {types; storage_decl; operations_decl; declarations} =
|
||||
List.fold_left s_declaration
|
||||
{ types = [];
|
||||
storage_decl = None;
|
||||
operations_decl = None;
|
||||
declarations = [] }
|
||||
( decl1 :: decls ) in
|
||||
let storage_decl = match storage_decl with
|
||||
Some x -> x
|
||||
| None -> failwith "Missing storage declaration" in
|
||||
let () = match operations_decl with
|
||||
Some _ -> failwith "Operations declaration is not allowed anymore TODO"
|
||||
| None -> ()
|
||||
in {types; storage_decl; declarations; orig = ast}
|
||||
|
||||
|
||||
|
||||
|
||||
(* let s_token region lexeme = *)
|
||||
(* printf "%s: %s\n"(compact region) lexeme *)
|
||||
|
||||
(* and s_var {region; value=lexeme} = *)
|
||||
(* printf "%s: Ident \"%s\"\n" (compact region) lexeme *)
|
||||
|
||||
(* and s_constr {region; value=lexeme} = *)
|
||||
(* printf "%s: Constr \"%s\"\n" *)
|
||||
(* (compact region) lexeme *)
|
||||
|
||||
(* and s_string {region; value=lexeme} = *)
|
||||
(* printf "%s: String \"%s\"\n" *)
|
||||
(* (compact region) lexeme *)
|
||||
|
||||
(* and s_bytes {region; value = lexeme, abstract} = *)
|
||||
(* printf "%s: Bytes (\"%s\", \"0x%s\")\n" *)
|
||||
(* (compact region) lexeme *)
|
||||
(* (MBytes.to_hex abstract |> Hex.to_string) *)
|
||||
|
||||
(* and s_int {region; value = lexeme, abstract} = *)
|
||||
(* printf "%s: Int (\"%s\", %s)\n" *)
|
||||
(* (compact region) lexeme *)
|
||||
(* (Z.to_string abstract) *)
|
||||
|
||||
|
||||
(* and s_parameters {value=node; _} = *)
|
||||
(* let lpar, sequence, rpar = node in *)
|
||||
(* s_token lpar "("; *)
|
||||
(* s_nsepseq ";" s_param_decl sequence; *)
|
||||
(* s_token rpar ")" *)
|
||||
|
||||
(* and s_param_decl = function *)
|
||||
(* ParamConst param_const -> s_param_const param_const *)
|
||||
(* | ParamVar param_var -> s_param_var param_var *)
|
||||
|
||||
(* and s_region_cases {value=sequence; _} = *)
|
||||
(* s_nsepseq "|" s_case sequence *)
|
||||
|
||||
(* and s_expr = function *)
|
||||
(* Or {value = expr1, bool_or, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token bool_or "||"; s_expr expr2 *)
|
||||
(* | And {value = expr1, bool_and, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token bool_and "&&"; s_expr expr2 *)
|
||||
(* | Lt {value = expr1, lt, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token lt "<"; s_expr expr2 *)
|
||||
(* | Leq {value = expr1, leq, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token leq "<="; s_expr expr2 *)
|
||||
(* | Gt {value = expr1, gt, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token gt ">"; s_expr expr2 *)
|
||||
(* | Geq {value = expr1, geq, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token geq ">="; s_expr expr2 *)
|
||||
(* | Equal {value = expr1, equal, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token equal "="; s_expr expr2 *)
|
||||
(* | Neq {value = expr1, neq, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token neq "=/="; s_expr expr2 *)
|
||||
(* | Cat {value = expr1, cat, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token cat "^"; s_expr expr2 *)
|
||||
(* | Cons {value = expr1, cons, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token cons "<:"; s_expr expr2 *)
|
||||
(* | Add {value = expr1, add, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token add "+"; s_expr expr2 *)
|
||||
(* | Sub {value = expr1, sub, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token sub "-"; s_expr expr2 *)
|
||||
(* | Mult {value = expr1, mult, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token mult "*"; s_expr expr2 *)
|
||||
(* | Div {value = expr1, div, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token div "/"; s_expr expr2 *)
|
||||
(* | Mod {value = expr1, kwd_mod, expr2; _} -> *)
|
||||
(* s_expr expr1; s_token kwd_mod "mod"; s_expr expr2 *)
|
||||
(* | Neg {value = minus, expr; _} -> *)
|
||||
(* s_token minus "-"; s_expr expr *)
|
||||
(* | Not {value = kwd_not, expr; _} -> *)
|
||||
(* s_token kwd_not "not"; s_expr expr *)
|
||||
(* | Int i -> s_int i *)
|
||||
(* | Var var -> s_var var *)
|
||||
(* | String s -> s_string s *)
|
||||
(* | Bytes b -> s_bytes b *)
|
||||
(* | False region -> s_token region "False" *)
|
||||
(* | True region -> s_token region "True" *)
|
||||
(* | Unit region -> s_token region "Unit" *)
|
||||
(* | Tuple tuple -> s_tuple tuple *)
|
||||
(* | List list -> s_list list *)
|
||||
(* | EmptyList elist -> s_empty_list elist *)
|
||||
(* | Set set -> s_set set *)
|
||||
(* | EmptySet eset -> s_empty_set eset *)
|
||||
(* | NoneExpr nexpr -> s_none_expr nexpr *)
|
||||
(* | FunCall fun_call -> s_fun_call fun_call *)
|
||||
(* | ConstrApp capp -> s_constr_app capp *)
|
||||
(* | SomeApp sapp -> s_some_app sapp *)
|
||||
(* | MapLookUp lookup -> s_map_lookup lookup *)
|
||||
(* | ParExpr pexpr -> s_par_expr pexpr *)
|
||||
|
||||
(* and s_list {value=node; _} = *)
|
||||
(* let lbra, sequence, rbra = node in *)
|
||||
(* s_token lbra "["; *)
|
||||
(* s_nsepseq "," s_expr sequence; *)
|
||||
(* s_token rbra "]" *)
|
||||
|
||||
(* and s_empty_list {value=node; _} = *)
|
||||
(* let lpar, (lbracket, rbracket, colon, type_expr), rpar = node in *)
|
||||
(* s_token lpar "("; *)
|
||||
(* s_token lbracket "["; *)
|
||||
(* s_token rbracket "]"; *)
|
||||
(* s_token colon ":"; *)
|
||||
(* s_type_expr type_expr; *)
|
||||
(* s_token rpar ")" *)
|
||||
|
||||
(* and s_set {value=node; _} = *)
|
||||
(* let lbrace, sequence, rbrace = node in *)
|
||||
(* s_token lbrace "{"; *)
|
||||
(* s_nsepseq "," s_expr sequence; *)
|
||||
(* s_token rbrace "}" *)
|
||||
|
||||
(* and s_empty_set {value=node; _} = *)
|
||||
(* let lpar, (lbrace, rbrace, colon, type_expr), rpar = node in *)
|
||||
(* s_token lpar "("; *)
|
||||
(* s_token lbrace "{"; *)
|
||||
(* s_token rbrace "}"; *)
|
||||
(* s_token colon ":"; *)
|
||||
(* s_type_expr type_expr; *)
|
||||
(* s_token rpar ")" *)
|
||||
|
||||
(* and s_none_expr {value=node; _} = *)
|
||||
(* let lpar, (c_None, colon, type_expr), rpar = node in *)
|
||||
(* s_token lpar "("; *)
|
||||
(* s_token c_None "None"; *)
|
||||
(* s_token colon ":"; *)
|
||||
(* s_type_expr type_expr; *)
|
||||
(* s_token rpar ")" *)
|
||||
|
||||
(* and s_constr_app {value=node; _} = *)
|
||||
(* let constr, arguments = node in *)
|
||||
(* s_constr constr; *)
|
||||
(* s_tuple arguments *)
|
||||
|
||||
(* and s_some_app {value=node; _} = *)
|
||||
(* let c_Some, arguments = node in *)
|
||||
(* s_token c_Some "Some"; *)
|
||||
(* s_tuple arguments *)
|
||||
|
||||
|
||||
(* and s_par_expr {value=node; _} = *)
|
||||
(* let lpar, expr, rpar = node in *)
|
||||
(* s_token lpar "("; *)
|
||||
(* s_expr expr; *)
|
||||
(* s_token rpar ")" *)
|
||||
|
||||
(* and s_psome {value=node; _} = *)
|
||||
(* let c_Some, patterns = node in *)
|
||||
(* s_token c_Some "Some"; *)
|
||||
(* s_patterns patterns *)
|
||||
|
||||
|
||||
(* and s_terminator = function *)
|
||||
(* Some semi -> s_token semi ";" *)
|
||||
(* | None -> () *)
|
3
src/ligo/ligo-parser/Error.mli
Normal file
3
src/ligo/ligo-parser/Error.mli
Normal file
@ -0,0 +1,3 @@
|
||||
type t = ..
|
||||
|
||||
type error = t
|
161
src/ligo/ligo-parser/EvalOpt.ml
Normal file
161
src/ligo/ligo-parser/EvalOpt.ml
Normal file
@ -0,0 +1,161 @@
|
||||
(* Parsing the command-line option for testing the LIGO lexer and
|
||||
parser *)
|
||||
|
||||
let printf = Printf.printf
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
let abort msg =
|
||||
Utils.highlight (sprintf "Command-line error: %s\n" msg); exit 1
|
||||
|
||||
(* Help *)
|
||||
|
||||
let help () =
|
||||
let file = Filename.basename Sys.argv.(0) in
|
||||
printf "Usage: %s [<option> ...] [<input>.ligo | \"-\"]\n" file;
|
||||
print_endline "where <input>.ligo is the LIGO source file (default: stdin),";
|
||||
print_endline "and each <option> (if any) is one of the following:";
|
||||
print_endline " -I <paths> Library paths (colon-separated)";
|
||||
print_endline " -c, --copy Print lexemes of tokens and markup (lexer)";
|
||||
print_endline " -t, --tokens Print tokens (lexer)";
|
||||
print_endline " -u, --units Print tokens and markup (lexer)";
|
||||
print_endline " -q, --quiet No output, except errors (default)";
|
||||
print_endline " --columns Columns for source locations";
|
||||
print_endline " --bytes Bytes for source locations";
|
||||
print_endline " --verbose=<stages> cmdline, cpp, ast (colon-separated)";
|
||||
print_endline " --version Commit hash on stdout";
|
||||
print_endline " -h, --help This help";
|
||||
exit 0
|
||||
|
||||
(* Version *)
|
||||
|
||||
let version () = printf "%s\n" Version.version; exit 0
|
||||
|
||||
(* Specifying the command-line options a la GNU *)
|
||||
|
||||
let copy = ref false
|
||||
and tokens = ref false
|
||||
and units = ref false
|
||||
and quiet = ref false
|
||||
and columns = ref false
|
||||
and bytes = ref false
|
||||
and verbose = ref Utils.String.Set.empty
|
||||
and input = ref None
|
||||
and libs = ref []
|
||||
|
||||
let split_at_colon = Str.(split (regexp ":"))
|
||||
|
||||
let add_path p = libs := !libs @ split_at_colon p
|
||||
|
||||
let add_verbose d =
|
||||
verbose := List.fold_left (Utils.swap Utils.String.Set.add)
|
||||
!verbose
|
||||
(split_at_colon d)
|
||||
|
||||
let specs =
|
||||
let open! Getopt in [
|
||||
'I', nolong, None, Some add_path;
|
||||
'c', "copy", set copy true, None;
|
||||
't', "tokens", set tokens true, None;
|
||||
'u', "units", set units true, None;
|
||||
'q', "quiet", set quiet true, None;
|
||||
noshort, "columns", set columns true, None;
|
||||
noshort, "bytes", set bytes true, None;
|
||||
noshort, "verbose", None, Some add_verbose;
|
||||
'h', "help", Some help, None;
|
||||
noshort, "version", Some version, None
|
||||
]
|
||||
;;
|
||||
|
||||
(* Handler of anonymous arguments *)
|
||||
|
||||
let anonymous arg =
|
||||
match !input with
|
||||
None -> input := Some arg
|
||||
| Some _ -> abort (sprintf "Multiple inputs")
|
||||
;;
|
||||
|
||||
(* Parsing the command-line options *)
|
||||
|
||||
try Getopt.parse_cmdline specs anonymous with
|
||||
Getopt.Error msg -> abort msg
|
||||
;;
|
||||
|
||||
(* Checking options and exporting them as non-mutable values *)
|
||||
|
||||
type command = Quiet | Copy | Units | Tokens
|
||||
|
||||
let cmd =
|
||||
match !quiet, !copy, !units, !tokens with
|
||||
false, false, false, false
|
||||
| true, false, false, false -> Quiet
|
||||
| false, true, false, false -> Copy
|
||||
| false, false, true, false -> Units
|
||||
| false, false, false, true -> Tokens
|
||||
| _ -> abort "Choose one of -q, -c, -u, -t."
|
||||
|
||||
let string_of convert = function
|
||||
None -> "None"
|
||||
| Some s -> sprintf "Some %s" (convert s)
|
||||
|
||||
let string_of_path p =
|
||||
let apply s a = if a = "" then s else s ^ ":" ^ a
|
||||
in List.fold_right apply p ""
|
||||
|
||||
let quote s = sprintf "\"%s\"" s
|
||||
|
||||
let verbose_str =
|
||||
let apply e a =
|
||||
if a <> "" then sprintf "%s, %s" e a else e
|
||||
in Utils.String.Set.fold apply !verbose ""
|
||||
|
||||
let print_opt () =
|
||||
printf "COMMAND LINE\n";
|
||||
printf "copy = %b\n" !copy;
|
||||
printf "tokens = %b\n" !tokens;
|
||||
printf "units = %b\n" !units;
|
||||
printf "quiet = %b\n" !quiet;
|
||||
printf "columns = %b\n" !columns;
|
||||
printf "bytes = %b\n" !bytes;
|
||||
printf "verbose = \"%s\"\n" verbose_str;
|
||||
printf "input = %s\n" (string_of quote !input);
|
||||
printf "libs = %s\n" (string_of_path !libs)
|
||||
;;
|
||||
|
||||
if Utils.String.Set.mem "cmdline" !verbose then print_opt ();;
|
||||
|
||||
let input =
|
||||
match !input with
|
||||
None | Some "-" -> !input
|
||||
| Some file_path ->
|
||||
if Filename.check_suffix file_path ".ligo"
|
||||
then if Sys.file_exists file_path
|
||||
then Some file_path
|
||||
else abort "Source file not found."
|
||||
else abort "Source file lacks the extension .ligo."
|
||||
|
||||
(* Exporting remaining options as non-mutable values *)
|
||||
|
||||
let copy = !copy
|
||||
and tokens = !tokens
|
||||
and units = !units
|
||||
and quiet = !quiet
|
||||
and offsets = not !columns
|
||||
and mode = if !bytes then `Byte else `Point
|
||||
and verbose = !verbose
|
||||
and libs = !libs
|
||||
;;
|
||||
|
||||
if Utils.String.Set.mem "cmdline" verbose then
|
||||
begin
|
||||
printf "\nEXPORTED COMMAND LINE\n";
|
||||
printf "copy = %b\n" copy;
|
||||
printf "tokens = %b\n" tokens;
|
||||
printf "units = %b\n" units;
|
||||
printf "quiet = %b\n" quiet;
|
||||
printf "offsets = %b\n" offsets;
|
||||
printf "mode = %s\n" (if mode = `Byte then "`Byte" else "`Point");
|
||||
printf "verbose = \"%s\"\n" verbose_str;
|
||||
printf "input = %s\n" (string_of quote input);
|
||||
printf "I = %s\n" (string_of_path libs)
|
||||
end
|
||||
;;
|
46
src/ligo/ligo-parser/EvalOpt.mli
Normal file
46
src/ligo/ligo-parser/EvalOpt.mli
Normal file
@ -0,0 +1,46 @@
|
||||
(* Parsing the command-line option for testing the LIGO lexer and
|
||||
parser *)
|
||||
|
||||
(* If the value [offsets] is [true], then the user requested that
|
||||
messages about source positions and regions be expressed in terms
|
||||
of horizontal offsets. *)
|
||||
|
||||
val offsets : bool
|
||||
|
||||
(* If the value [mode] is [`Byte], then the unit in which source
|
||||
positions and regions are expressed in messages is the byte. If
|
||||
[`Point], the unit is unicode points. *)
|
||||
|
||||
val mode : [`Byte | `Point]
|
||||
|
||||
(* If the option [verbose] is set to a list of predefined stages of
|
||||
the compiler chain, then more information may be displayed about
|
||||
those stages. *)
|
||||
|
||||
val verbose : Utils.String.Set.t
|
||||
|
||||
(* If the value [input] is [None] or [Some "-"], the input is standard
|
||||
input. If [Some f], then the input is the file whose name (file
|
||||
path) is [f]. *)
|
||||
|
||||
val input : string option
|
||||
|
||||
(* Paths where to find LIGO files for inclusion *)
|
||||
|
||||
val libs : string list
|
||||
|
||||
(* If the value [cmd] is
|
||||
* [Quiet], then no output from the lexer and parser should be
|
||||
expected, safe error messages: this is the default value;
|
||||
* [Copy], then lexemes of tokens and markup will be printed to
|
||||
standard output, with the expectation of a perfect match with
|
||||
the input file;
|
||||
* [Units], then the tokens and markup will be printed to standard
|
||||
output, that is, the abstract representation of the concrete
|
||||
lexical syntax;
|
||||
* [Tokens], then the tokens only will be printed.
|
||||
*)
|
||||
|
||||
type command = Quiet | Copy | Units | Tokens
|
||||
|
||||
val cmd : command
|
19
src/ligo/ligo-parser/FQueue.ml
Normal file
19
src/ligo/ligo-parser/FQueue.ml
Normal file
@ -0,0 +1,19 @@
|
||||
(* Purely functional queues based on a pair of lists *)
|
||||
|
||||
type 'a t = {rear: 'a list; front: 'a list}
|
||||
|
||||
let empty = {rear=[]; front=[]}
|
||||
|
||||
let enq x q = {q with rear = x::q.rear}
|
||||
|
||||
let rec deq = function
|
||||
{rear=[]; front= []} -> None
|
||||
| {rear; front= []} -> deq {rear=[]; front = List.rev rear}
|
||||
| {rear; front=x::f} -> Some ({rear; front=f}, x)
|
||||
|
||||
let rec peek = function
|
||||
{rear=[]; front= []} -> None
|
||||
| {rear; front= []} -> peek {rear=[]; front = List.rev rear}
|
||||
| {rear=_; front=x::_} as q -> Some (q,x)
|
||||
|
||||
let is_empty q = (q = empty)
|
17
src/ligo/ligo-parser/FQueue.mli
Normal file
17
src/ligo/ligo-parser/FQueue.mli
Normal file
@ -0,0 +1,17 @@
|
||||
(* Purely functional queues *)
|
||||
|
||||
type 'a t
|
||||
|
||||
val empty : 'a t
|
||||
val enq : 'a -> 'a t -> 'a t
|
||||
val deq : 'a t -> ('a t * 'a) option
|
||||
|
||||
val is_empty : 'a t -> bool
|
||||
|
||||
(* The call [peek q] is [None] if the queue [q] is empty, and,
|
||||
otherwise, is a pair made of a queue and the next item in it to be
|
||||
dequeued. The returned queue contains the same items as [q], in the
|
||||
same order, but more efficient, in general, to use in further
|
||||
calls. *)
|
||||
|
||||
val peek : 'a t -> ('a t * 'a) option
|
158
src/ligo/ligo-parser/LexToken.mli
Normal file
158
src/ligo/ligo-parser/LexToken.mli
Normal file
@ -0,0 +1,158 @@
|
||||
(* This signature defines the lexical tokens for LIGO
|
||||
|
||||
_Tokens_ are the abstract units which are used by the parser to
|
||||
build the abstract syntax tree (AST), in other words, the stream of
|
||||
tokens is the minimal model of the input program, carrying
|
||||
implicitly all its structure in a linear encoding, and nothing
|
||||
else, in particular, comments and whitespace are absent.
|
||||
|
||||
A _lexeme_ is a specific character string (concrete
|
||||
representation) denoting a token (abstract representation). Tokens
|
||||
can be thought of as sets, and lexemes as elements of those sets --
|
||||
there is often an infinite number of lexemes, but a small number of
|
||||
tokens. (Think of identifiers as lexemes and one token.)
|
||||
|
||||
The tokens are qualified here as being "lexical" because the
|
||||
parser generator Menhir expects to define them, in which context
|
||||
they are called "parsing tokens", and they are made to match each
|
||||
other. (This is an idiosyncratic terminology.)
|
||||
|
||||
The type of the lexical tokens is the variant [t], also
|
||||
aliased to [token].
|
||||
*)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
type t =
|
||||
(* Literals *)
|
||||
|
||||
String of lexeme Region.reg
|
||||
| Bytes of (lexeme * Hex.t) Region.reg
|
||||
| Int of (lexeme * Z.t) Region.reg
|
||||
| Ident of lexeme Region.reg
|
||||
| Constr of lexeme Region.reg
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| SEMI of Region.t (* ";" *)
|
||||
| COMMA of Region.t (* "," *)
|
||||
| LPAR of Region.t (* "(" *)
|
||||
| RPAR of Region.t (* ")" *)
|
||||
| LBRACE of Region.t (* "{" *)
|
||||
| RBRACE of Region.t (* "}" *)
|
||||
| LBRACKET of Region.t (* "[" *)
|
||||
| RBRACKET of Region.t (* "]" *)
|
||||
| CONS of Region.t (* "#" *)
|
||||
| VBAR of Region.t (* "|" *)
|
||||
| ARROW of Region.t (* "->" *)
|
||||
| ASS of Region.t (* ":=" *)
|
||||
| EQUAL of Region.t (* "=" *)
|
||||
| COLON of Region.t (* ":" *)
|
||||
| LT of Region.t (* "<" *)
|
||||
| LEQ of Region.t (* "<=" *)
|
||||
| GT of Region.t (* ">" *)
|
||||
| GEQ of Region.t (* ">=" *)
|
||||
| NEQ of Region.t (* "=/=" *)
|
||||
| PLUS of Region.t (* "+" *)
|
||||
| MINUS of Region.t (* "-" *)
|
||||
| SLASH of Region.t (* "/" *)
|
||||
| TIMES of Region.t (* "*" *)
|
||||
| DOT of Region.t (* "." *)
|
||||
| WILD of Region.t (* "_" *)
|
||||
| CAT of Region.t (* "^" *)
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| And of Region.t (* "and" *)
|
||||
| Begin of Region.t (* "begin" *)
|
||||
| Block of Region.t (* "block" *)
|
||||
| Case of Region.t (* "case" *)
|
||||
| Const of Region.t (* "const" *)
|
||||
| Contains of Region.t (* "contains" *)
|
||||
| Down of Region.t (* "down" *)
|
||||
| Else of Region.t (* "else" *)
|
||||
| End of Region.t (* "end" *)
|
||||
| Entrypoint of Region.t (* "entrypoint" *)
|
||||
| Fail of Region.t (* "fail" *)
|
||||
| For of Region.t (* "for" *)
|
||||
| From of Region.t (* "from" *)
|
||||
| Function of Region.t (* "function" *)
|
||||
| If of Region.t (* "if" *)
|
||||
| In of Region.t (* "in" *)
|
||||
| Is of Region.t (* "is" *)
|
||||
| List of Region.t (* "list" *)
|
||||
| Map of Region.t (* "map" *)
|
||||
| Mod of Region.t (* "mod" *)
|
||||
| Nil of Region.t (* "nil" *)
|
||||
| Not of Region.t (* "not" *)
|
||||
| Of of Region.t (* "of" *)
|
||||
| Or of Region.t (* "or" *)
|
||||
| Patch of Region.t (* "patch" *)
|
||||
| Procedure of Region.t (* "procedure" *)
|
||||
| Record of Region.t (* "record" *)
|
||||
| Remove of Region.t (* "remove" *)
|
||||
| Set of Region.t (* "set" *)
|
||||
| Skip of Region.t (* "skip" *)
|
||||
| Step of Region.t (* "step" *)
|
||||
| Storage of Region.t (* "storage" *)
|
||||
| Then of Region.t (* "then" *)
|
||||
| To of Region.t (* "to" *)
|
||||
| Type of Region.t (* "type" *)
|
||||
| Var of Region.t (* "var" *)
|
||||
| While of Region.t (* "while" *)
|
||||
| With of Region.t (* "with" *)
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
| C_False of Region.t (* "False" *)
|
||||
| C_None of Region.t (* "None" *)
|
||||
| C_Some of Region.t (* "Some" *)
|
||||
| C_True of Region.t (* "True" *)
|
||||
| C_Unit of Region.t (* "Unit" *)
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF of Region.t
|
||||
|
||||
|
||||
type token = t
|
||||
|
||||
(* Projections
|
||||
|
||||
The difference between extracting the lexeme and a string from a
|
||||
token is that the latter is the textual representation of the OCaml
|
||||
value denoting the token (its abstract syntax), rather than its
|
||||
lexeme (concrete syntax).
|
||||
*)
|
||||
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
|
||||
(* Injections *)
|
||||
|
||||
type int_err =
|
||||
Non_canonical_zero
|
||||
|
||||
type ident_err = Reserved_name
|
||||
|
||||
val mk_string : lexeme -> Region.t -> token
|
||||
val mk_bytes : lexeme -> Region.t -> token
|
||||
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
||||
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
||||
val mk_constr : lexeme -> Region.t -> token
|
||||
val mk_sym : lexeme -> Region.t -> token
|
||||
val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
659
src/ligo/ligo-parser/LexToken.mll
Normal file
659
src/ligo/ligo-parser/LexToken.mll
Normal file
@ -0,0 +1,659 @@
|
||||
(* Lexer specification for LIGO, to be processed by [ocamllex] *)
|
||||
|
||||
{
|
||||
(* START HEADER *)
|
||||
|
||||
(* Shorthands *)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
module SMap = Utils.String.Map
|
||||
module SSet = Utils.String.Set
|
||||
|
||||
(* Hack to roll back one lexeme in the current semantic action *)
|
||||
(*
|
||||
let rollback buffer =
|
||||
let open Lexing in
|
||||
let len = String.length (lexeme buffer) in
|
||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||
*)
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
type t =
|
||||
(* Literals *)
|
||||
|
||||
String of lexeme Region.reg
|
||||
| Bytes of (lexeme * Hex.t) Region.reg
|
||||
| Int of (lexeme * Z.t) Region.reg
|
||||
| Ident of lexeme Region.reg
|
||||
| Constr of lexeme Region.reg
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| SEMI of Region.t
|
||||
| COMMA of Region.t
|
||||
| LPAR of Region.t
|
||||
| RPAR of Region.t
|
||||
| LBRACE of Region.t
|
||||
| RBRACE of Region.t
|
||||
| LBRACKET of Region.t
|
||||
| RBRACKET of Region.t
|
||||
| CONS of Region.t
|
||||
| VBAR of Region.t
|
||||
| ARROW of Region.t
|
||||
| ASS of Region.t
|
||||
| EQUAL of Region.t
|
||||
| COLON of Region.t
|
||||
| LT of Region.t
|
||||
| LEQ of Region.t
|
||||
| GT of Region.t
|
||||
| GEQ of Region.t
|
||||
| NEQ of Region.t
|
||||
| PLUS of Region.t
|
||||
| MINUS of Region.t
|
||||
| SLASH of Region.t
|
||||
| TIMES of Region.t
|
||||
| DOT of Region.t
|
||||
| WILD of Region.t
|
||||
| CAT of Region.t
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| And of Region.t (* "and" *)
|
||||
| Begin of Region.t (* "begin" *)
|
||||
| Block of Region.t (* "block" *)
|
||||
| Case of Region.t (* "case" *)
|
||||
| Const of Region.t (* "const" *)
|
||||
| Contains of Region.t (* "contains" *)
|
||||
| Down of Region.t (* "down" *)
|
||||
| Else of Region.t (* "else" *)
|
||||
| End of Region.t (* "end" *)
|
||||
| Entrypoint of Region.t (* "entrypoint" *)
|
||||
| Fail of Region.t (* "fail" *)
|
||||
| For of Region.t (* "for" *)
|
||||
| From of Region.t (* "from" *)
|
||||
| Function of Region.t (* "function" *)
|
||||
| If of Region.t (* "if" *)
|
||||
| In of Region.t (* "in" *)
|
||||
| Is of Region.t (* "is" *)
|
||||
| List of Region.t (* "list" *)
|
||||
| Map of Region.t (* "map" *)
|
||||
| Mod of Region.t (* "mod" *)
|
||||
| Nil of Region.t (* "nil" *)
|
||||
| Not of Region.t (* "not" *)
|
||||
| Of of Region.t (* "of" *)
|
||||
| Or of Region.t (* "or" *)
|
||||
| Patch of Region.t (* "patch" *)
|
||||
| Procedure of Region.t (* "procedure" *)
|
||||
| Record of Region.t (* "record" *)
|
||||
| Remove of Region.t (* "remove" *)
|
||||
| Set of Region.t (* "set" *)
|
||||
| Skip of Region.t (* "skip" *)
|
||||
| Step of Region.t (* "step" *)
|
||||
| Storage of Region.t (* "storage" *)
|
||||
| Then of Region.t (* "then" *)
|
||||
| To of Region.t (* "to" *)
|
||||
| Type of Region.t (* "type" *)
|
||||
| Var of Region.t (* "var" *)
|
||||
| While of Region.t (* "while" *)
|
||||
| With of Region.t (* "with" *)
|
||||
|
||||
(* Types *)
|
||||
(*
|
||||
| T_address of Region.t (* "address" *)
|
||||
| T_big_map of Region.t (* "big_map" *)
|
||||
| T_bool of Region.t (* "bool" *)
|
||||
| T_bytes of Region.t (* "bytes" *)
|
||||
| T_contract of Region.t (* "contract" *)
|
||||
| T_int of Region.t (* "int" *)
|
||||
| T_key of Region.t (* "key" *)
|
||||
| T_key_hash of Region.t (* "key_hash" *)
|
||||
| T_list of Region.t (* "list" *)
|
||||
| T_map of Region.t (* "map" *)
|
||||
| T_mutez of Region.t (* "mutez" *)
|
||||
| T_nat of Region.t (* "nat" *)
|
||||
| T_operation of Region.t (* "operation" *)
|
||||
| T_option of Region.t (* "option" *)
|
||||
| T_set of Region.t (* "set" *)
|
||||
| T_signature of Region.t (* "signature" *)
|
||||
| T_string of Region.t (* "string" *)
|
||||
| T_timestamp of Region.t (* "timestamp" *)
|
||||
| T_unit of Region.t (* "unit" *)
|
||||
*)
|
||||
(* Data constructors *)
|
||||
|
||||
| C_False of Region.t (* "False" *)
|
||||
| C_None of Region.t (* "None" *)
|
||||
| C_Some of Region.t (* "Some" *)
|
||||
| C_True of Region.t (* "True" *)
|
||||
| C_Unit of Region.t (* "Unit" *)
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF of Region.t
|
||||
|
||||
|
||||
type token = t
|
||||
|
||||
let proj_token = function
|
||||
(* Literals *)
|
||||
|
||||
String Region.{region; value} ->
|
||||
region, sprintf "String %s" value
|
||||
|
||||
| Bytes Region.{region; value = s,b} ->
|
||||
region,
|
||||
sprintf "Bytes (\"%s\", \"0x%s\")"
|
||||
s (Hex.to_string b)
|
||||
|
||||
| Int Region.{region; value = s,n} ->
|
||||
region, sprintf "Int (\"%s\", %s)" s (Z.to_string n)
|
||||
|
||||
| Ident Region.{region; value} ->
|
||||
region, sprintf "Ident \"%s\"" value
|
||||
|
||||
| Constr Region.{region; value} ->
|
||||
region, sprintf "Constr \"%s\"" value
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| SEMI region -> region, "SEMI"
|
||||
| COMMA region -> region, "COMMA"
|
||||
| LPAR region -> region, "LPAR"
|
||||
| RPAR region -> region, "RPAR"
|
||||
| LBRACE region -> region, "LBRACE"
|
||||
| RBRACE region -> region, "RBRACE"
|
||||
| LBRACKET region -> region, "LBRACKET"
|
||||
| RBRACKET region -> region, "RBRACKET"
|
||||
| CONS region -> region, "CONS"
|
||||
| VBAR region -> region, "VBAR"
|
||||
| ARROW region -> region, "ARROW"
|
||||
| ASS region -> region, "ASS"
|
||||
| EQUAL region -> region, "EQUAL"
|
||||
| COLON region -> region, "COLON"
|
||||
| LT region -> region, "LT"
|
||||
| LEQ region -> region, "LEQ"
|
||||
| GT region -> region, "GT"
|
||||
| GEQ region -> region, "GEQ"
|
||||
| NEQ region -> region, "NEQ"
|
||||
| PLUS region -> region, "PLUS"
|
||||
| MINUS region -> region, "MINUS"
|
||||
| SLASH region -> region, "SLASH"
|
||||
| TIMES region -> region, "TIMES"
|
||||
| DOT region -> region, "DOT"
|
||||
| WILD region -> region, "WILD"
|
||||
| CAT region -> region, "CAT"
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| And region -> region, "And"
|
||||
| Begin region -> region, "Begin"
|
||||
| Block region -> region, "Block"
|
||||
| Case region -> region, "Case"
|
||||
| Const region -> region, "Const"
|
||||
| Contains region -> region, "Contains"
|
||||
| Down region -> region, "Down"
|
||||
| Else region -> region, "Else"
|
||||
| End region -> region, "End"
|
||||
| Entrypoint region -> region, "Entrypoint"
|
||||
| Fail region -> region, "Fail"
|
||||
| For region -> region, "For"
|
||||
| From region -> region, "From"
|
||||
| Function region -> region, "Function"
|
||||
| If region -> region, "If"
|
||||
| In region -> region, "In"
|
||||
| Is region -> region, "Is"
|
||||
| List region -> region, "List"
|
||||
| Map region -> region, "Map"
|
||||
| Mod region -> region, "Mod"
|
||||
| Nil region -> region, "Nil"
|
||||
| Not region -> region, "Not"
|
||||
| Of region -> region, "Of"
|
||||
| Or region -> region, "Or"
|
||||
| Patch region -> region, "Patch"
|
||||
| Procedure region -> region, "Procedure"
|
||||
| Record region -> region, "Record"
|
||||
| Remove region -> region, "Remove"
|
||||
| Set region -> region, "Set"
|
||||
| Skip region -> region, "Skip"
|
||||
| Step region -> region, "Step"
|
||||
| Storage region -> region, "Storage"
|
||||
| Then region -> region, "Then"
|
||||
| To region -> region, "To"
|
||||
| Type region -> region, "Type"
|
||||
| Var region -> region, "Var"
|
||||
| While region -> region, "While"
|
||||
| With region -> region, "With"
|
||||
|
||||
(* Data *)
|
||||
|
||||
| C_False region -> region, "C_False"
|
||||
| C_None region -> region, "C_None"
|
||||
| C_Some region -> region, "C_Some"
|
||||
| C_True region -> region, "C_True"
|
||||
| C_Unit region -> region, "C_Unit"
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF region -> region, "EOF"
|
||||
|
||||
|
||||
let to_lexeme = function
|
||||
(* Literals *)
|
||||
|
||||
String s -> s.Region.value
|
||||
| Bytes b -> fst b.Region.value
|
||||
| Int i -> fst i.Region.value
|
||||
| Ident id
|
||||
| Constr id -> id.Region.value
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
| SEMI _ -> ";"
|
||||
| COMMA _ -> ","
|
||||
| LPAR _ -> "("
|
||||
| RPAR _ -> ")"
|
||||
| LBRACE _ -> "{"
|
||||
| RBRACE _ -> "}"
|
||||
| LBRACKET _ -> "["
|
||||
| RBRACKET _ -> "]"
|
||||
| CONS _ -> "#"
|
||||
| VBAR _ -> "|"
|
||||
| ARROW _ -> "->"
|
||||
| ASS _ -> ":="
|
||||
| EQUAL _ -> "="
|
||||
| COLON _ -> ":"
|
||||
| LT _ -> "<"
|
||||
| LEQ _ -> "<="
|
||||
| GT _ -> ">"
|
||||
| GEQ _ -> ">="
|
||||
| NEQ _ -> "=/="
|
||||
| PLUS _ -> "+"
|
||||
| MINUS _ -> "-"
|
||||
| SLASH _ -> "/"
|
||||
| TIMES _ -> "*"
|
||||
| DOT _ -> "."
|
||||
| WILD _ -> "_"
|
||||
| CAT _ -> "^"
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
| And _ -> "and"
|
||||
| Begin _ -> "begin"
|
||||
| Block _ -> "block"
|
||||
| Case _ -> "case"
|
||||
| Const _ -> "const"
|
||||
| Contains _ -> "contains"
|
||||
| Down _ -> "down"
|
||||
| Else _ -> "else"
|
||||
| End _ -> "end"
|
||||
| Entrypoint _ -> "entrypoint"
|
||||
| Fail _ -> "fail"
|
||||
| For _ -> "for"
|
||||
| From _ -> "from"
|
||||
| Function _ -> "function"
|
||||
| If _ -> "if"
|
||||
| In _ -> "in"
|
||||
| Is _ -> "is"
|
||||
| List _ -> "list"
|
||||
| Map _ -> "map"
|
||||
| Mod _ -> "mod"
|
||||
| Nil _ -> "nil"
|
||||
| Not _ -> "not"
|
||||
| Of _ -> "of"
|
||||
| Or _ -> "or"
|
||||
| Patch _ -> "patch"
|
||||
| Procedure _ -> "procedure"
|
||||
| Record _ -> "record"
|
||||
| Remove _ -> "remove"
|
||||
| Set _ -> "set"
|
||||
| Skip _ -> "skip"
|
||||
| Step _ -> "step"
|
||||
| Storage _ -> "storage"
|
||||
| Then _ -> "then"
|
||||
| To _ -> "to"
|
||||
| Type _ -> "type"
|
||||
| Var _ -> "var"
|
||||
| While _ -> "while"
|
||||
| With _ -> "with"
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
| C_False _ -> "False"
|
||||
| C_None _ -> "None"
|
||||
| C_Some _ -> "Some"
|
||||
| C_True _ -> "True"
|
||||
| C_Unit _ -> "Unit"
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
| EOF _ -> ""
|
||||
|
||||
|
||||
let to_string token ?(offsets=true) mode =
|
||||
let region, val_str = proj_token token in
|
||||
let reg_str = region#compact ~offsets mode
|
||||
in sprintf "%s: %s" reg_str val_str
|
||||
|
||||
let to_region token = proj_token token |> fst
|
||||
|
||||
(* LEXIS *)
|
||||
|
||||
let keywords = [
|
||||
(fun reg -> And reg);
|
||||
(fun reg -> Begin reg);
|
||||
(fun reg -> Block reg);
|
||||
(fun reg -> Case reg);
|
||||
(fun reg -> Const reg);
|
||||
(fun reg -> Contains reg);
|
||||
(fun reg -> Down reg);
|
||||
(fun reg -> Else reg);
|
||||
(fun reg -> End reg);
|
||||
(fun reg -> Entrypoint reg);
|
||||
(fun reg -> For reg);
|
||||
(fun reg -> From reg);
|
||||
(fun reg -> Function reg);
|
||||
(fun reg -> Fail reg);
|
||||
(fun reg -> If reg);
|
||||
(fun reg -> In reg);
|
||||
(fun reg -> Is reg);
|
||||
(fun reg -> List reg);
|
||||
(fun reg -> Map reg);
|
||||
(fun reg -> Mod reg);
|
||||
(fun reg -> Nil reg);
|
||||
(fun reg -> Not reg);
|
||||
(fun reg -> Of reg);
|
||||
(fun reg -> Or reg);
|
||||
(fun reg -> Patch reg);
|
||||
(fun reg -> Procedure reg);
|
||||
(fun reg -> Record reg);
|
||||
(fun reg -> Remove reg);
|
||||
(fun reg -> Set reg);
|
||||
(fun reg -> Skip reg);
|
||||
(fun reg -> Step reg);
|
||||
(fun reg -> Storage reg);
|
||||
(fun reg -> Then reg);
|
||||
(fun reg -> To reg);
|
||||
(fun reg -> Type reg);
|
||||
(fun reg -> Var reg);
|
||||
(fun reg -> While reg);
|
||||
(fun reg -> With reg)
|
||||
]
|
||||
|
||||
let reserved =
|
||||
let open SSet in
|
||||
empty |> add "as"
|
||||
|> add "asr"
|
||||
|> add "assert"
|
||||
|> add "class"
|
||||
|> add "constraint"
|
||||
|> add "do"
|
||||
|> add "done"
|
||||
|> add "downto"
|
||||
|> add "exception"
|
||||
|> add "external"
|
||||
|> add "false"
|
||||
|> add "fun"
|
||||
|> add "functor"
|
||||
|> add "include"
|
||||
|> add "inherit"
|
||||
|> add "initializer"
|
||||
|> add "land"
|
||||
|> add "lazy"
|
||||
|> add "let"
|
||||
|> add "lor"
|
||||
|> add "lsl"
|
||||
|> add "lsr"
|
||||
|> add "lxor"
|
||||
|> add "method"
|
||||
|> add "module"
|
||||
|> add "mutable"
|
||||
|> add "new"
|
||||
|> add "nonrec"
|
||||
|> add "object"
|
||||
|> add "open"
|
||||
|> add "private"
|
||||
|> add "rec"
|
||||
|> add "sig"
|
||||
|> add "struct"
|
||||
|> add "true"
|
||||
|> add "try"
|
||||
|> add "val"
|
||||
|> add "virtual"
|
||||
|> add "when"
|
||||
|
||||
let constructors = [
|
||||
(fun reg -> C_False reg);
|
||||
(fun reg -> C_None reg);
|
||||
(fun reg -> C_Some reg);
|
||||
(fun reg -> C_True reg);
|
||||
(fun reg -> C_Unit reg)
|
||||
]
|
||||
|
||||
let add map (key, value) = SMap.add key value map
|
||||
|
||||
let mk_map mk_key list =
|
||||
let apply map value = add map (mk_key value, value)
|
||||
in List.fold_left apply SMap.empty list
|
||||
|
||||
type lexis = {
|
||||
kwd : (Region.t -> token) SMap.t;
|
||||
cstr : (Region.t -> token) SMap.t;
|
||||
res : SSet.t
|
||||
}
|
||||
|
||||
let lexicon : lexis =
|
||||
let build list = mk_map (fun f -> to_lexeme (f Region.ghost)) list
|
||||
in {kwd = build keywords;
|
||||
cstr = build constructors;
|
||||
res = reserved}
|
||||
|
||||
(* Identifiers *)
|
||||
|
||||
type ident_err = Reserved_name
|
||||
|
||||
(* END HEADER *)
|
||||
}
|
||||
|
||||
(* START LEXER DEFINITION *)
|
||||
|
||||
(* Named regular expressions *)
|
||||
|
||||
let small = ['a'-'z']
|
||||
let capital = ['A'-'Z']
|
||||
let letter = small | capital
|
||||
let digit = ['0'-'9']
|
||||
let ident = small (letter | '_' | digit)*
|
||||
let constr = capital (letter | '_' | digit)*
|
||||
|
||||
(* Rules *)
|
||||
|
||||
rule scan_ident region lexicon = parse
|
||||
(ident as value) eof {
|
||||
if SSet.mem value lexicon.res
|
||||
then Error Reserved_name
|
||||
else Ok (match SMap.find_opt value lexicon.kwd with
|
||||
Some mk_kwd -> mk_kwd region
|
||||
| None -> Ident Region.{region; value}) }
|
||||
|
||||
and scan_constr region lexicon = parse
|
||||
(constr as value) eof {
|
||||
match SMap.find_opt value lexicon.cstr with
|
||||
Some mk_cstr -> mk_cstr region
|
||||
| None -> Constr Region.{region; value} }
|
||||
|
||||
(* END LEXER DEFINITION *)
|
||||
|
||||
{
|
||||
(* START TRAILER *)
|
||||
|
||||
(* Smart constructors (injections) *)
|
||||
|
||||
let mk_string lexeme region = String Region.{region; value=lexeme}
|
||||
|
||||
let mk_bytes lexeme region =
|
||||
let norm = Str.(global_replace (regexp "_") "" lexeme) in
|
||||
let value = lexeme, Hex.of_string norm
|
||||
in Bytes Region.{region; value}
|
||||
|
||||
type int_err = Non_canonical_zero
|
||||
|
||||
let mk_int lexeme region =
|
||||
let z = Str.(global_replace (regexp "_") "" lexeme)
|
||||
|> Z.of_string in
|
||||
if Z.equal z Z.zero && lexeme <> "0"
|
||||
then Error Non_canonical_zero
|
||||
else Ok (Int Region.{region; value = lexeme, z})
|
||||
|
||||
let eof region = EOF region
|
||||
|
||||
let mk_sym lexeme region =
|
||||
match lexeme with
|
||||
";" -> SEMI region
|
||||
| "," -> COMMA region
|
||||
| "(" -> LPAR region
|
||||
| ")" -> RPAR region
|
||||
| "{" -> LBRACE region
|
||||
| "}" -> RBRACE region
|
||||
| "[" -> LBRACKET region
|
||||
| "]" -> RBRACKET region
|
||||
| "#" -> CONS region
|
||||
| "|" -> VBAR region
|
||||
| "->" -> ARROW region
|
||||
| ":=" -> ASS region
|
||||
| "=" -> EQUAL region
|
||||
| ":" -> COLON region
|
||||
| "<" -> LT region
|
||||
| "<=" -> LEQ region
|
||||
| ">" -> GT region
|
||||
| ">=" -> GEQ region
|
||||
| "=/=" -> NEQ region
|
||||
| "+" -> PLUS region
|
||||
| "-" -> MINUS region
|
||||
| "/" -> SLASH region
|
||||
| "*" -> TIMES region
|
||||
| "." -> DOT region
|
||||
| "_" -> WILD region
|
||||
| "^" -> CAT region
|
||||
| _ -> assert false
|
||||
|
||||
(* Identifiers *)
|
||||
|
||||
let mk_ident' lexeme region lexicon =
|
||||
Lexing.from_string lexeme |> scan_ident region lexicon
|
||||
|
||||
let mk_ident lexeme region = mk_ident' lexeme region lexicon
|
||||
|
||||
(* Constructors *)
|
||||
|
||||
let mk_constr' lexeme region lexicon =
|
||||
Lexing.from_string lexeme |> scan_constr region lexicon
|
||||
|
||||
let mk_constr lexeme region = mk_constr' lexeme region lexicon
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
let is_string = function
|
||||
String _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_bytes = function
|
||||
Bytes _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_int = function
|
||||
Int _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_ident = function
|
||||
Ident _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_kwd = function
|
||||
And _
|
||||
| Begin _
|
||||
| Block _
|
||||
| Case _
|
||||
| Const _
|
||||
| Contains _
|
||||
| Down _
|
||||
| Else _
|
||||
| End _
|
||||
| Entrypoint _
|
||||
| Fail _
|
||||
| For _
|
||||
| From _
|
||||
| Function _
|
||||
| If _
|
||||
| In _
|
||||
| Is _
|
||||
| List _
|
||||
| Map _
|
||||
| Mod _
|
||||
| Nil _
|
||||
| Not _
|
||||
| Of _
|
||||
| Or _
|
||||
| Patch _
|
||||
| Procedure _
|
||||
| Record _
|
||||
| Remove _
|
||||
| Set _
|
||||
| Skip _
|
||||
| Step _
|
||||
| Storage _
|
||||
| Then _
|
||||
| To _
|
||||
| Type _
|
||||
| Var _
|
||||
| While _
|
||||
| With _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_constr = function
|
||||
Constr _
|
||||
| C_False _
|
||||
| C_None _
|
||||
| C_Some _
|
||||
| C_True _
|
||||
| C_Unit _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_sym = function
|
||||
SEMI _
|
||||
| COMMA _
|
||||
| LPAR _
|
||||
| RPAR _
|
||||
| LBRACE _
|
||||
| RBRACE _
|
||||
| LBRACKET _
|
||||
| RBRACKET _
|
||||
| CONS _
|
||||
| VBAR _
|
||||
| ARROW _
|
||||
| ASS _
|
||||
| EQUAL _
|
||||
| COLON _
|
||||
| LT _
|
||||
| LEQ _
|
||||
| GT _
|
||||
| GEQ _
|
||||
| NEQ _
|
||||
| PLUS _
|
||||
| MINUS _
|
||||
| SLASH _
|
||||
| TIMES _
|
||||
| DOT _
|
||||
| WILD _
|
||||
| CAT _ -> true
|
||||
| _ -> false
|
||||
|
||||
let is_eof = function EOF _ -> true | _ -> false
|
||||
|
||||
(* END TRAILER *)
|
||||
}
|
153
src/ligo/ligo-parser/Lexer.mli
Normal file
153
src/ligo/ligo-parser/Lexer.mli
Normal file
@ -0,0 +1,153 @@
|
||||
(* Lexer specification for LIGO, to be processed by [ocamllex].
|
||||
|
||||
The underlying design principles are:
|
||||
|
||||
(1) enforce stylistic constraints at a lexical level, in order to
|
||||
early reject potentially misleading or poorly written
|
||||
LIGO contracts;
|
||||
|
||||
(2) provide precise error messages with hint as how to fix the
|
||||
issue, which is achieved by consulting the lexical
|
||||
right-context of lexemes;
|
||||
|
||||
(3) be as independent as possible from the LIGO version, so
|
||||
upgrades have as little impact as possible on this
|
||||
specification: this is achieved by using the most general
|
||||
regular expressions to match the lexing buffer and broadly
|
||||
distinguish the syntactic categories, and then delegating a
|
||||
finer, protocol-dependent, second analysis to an external
|
||||
module making the tokens (hence a functor below);
|
||||
|
||||
(4) support unit testing (lexing of the whole input with debug
|
||||
traces);
|
||||
|
||||
The limitation to the protocol independence lies in the errors that
|
||||
the external module building the tokens (which is
|
||||
protocol-dependent) may have to report. Indeed these errors have to
|
||||
be contextualised by the lexer in terms of input source regions, so
|
||||
useful error messages can be printed, therefore they are part of
|
||||
the signature [TOKEN] that parameterise the functor generated
|
||||
here. For instance, if, in a future release of LIGO, new tokens may
|
||||
be added, and the recognition of their lexemes may entail new
|
||||
errors, the signature [TOKEN] will have to be augmented and the
|
||||
lexer specification changed. However, it is more likely that
|
||||
instructions or types are added, instead of new kinds of tokens.
|
||||
*)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||
can be a functor over tokens. This enables to externalise
|
||||
version-dependent constraints in any module whose signature matches
|
||||
[TOKEN]. Generic functions to construct tokens are required.
|
||||
|
||||
Note the predicate [is_eof], which caracterises the virtual token
|
||||
for end-of-file, because it requires special handling. Some of
|
||||
those functions may yield errors, which are defined as values of
|
||||
the type [int_err] etc. These errors can be better understood by
|
||||
reading the ocamllex specification for the lexer ([Lexer.mll]).
|
||||
*)
|
||||
|
||||
module type TOKEN =
|
||||
sig
|
||||
type token
|
||||
|
||||
(* Errors *)
|
||||
|
||||
type int_err = Non_canonical_zero
|
||||
type ident_err = Reserved_name
|
||||
|
||||
(* Injections *)
|
||||
|
||||
val mk_string : lexeme -> Region.t -> token
|
||||
val mk_bytes : lexeme -> Region.t -> token
|
||||
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
||||
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
||||
val mk_constr : lexeme -> Region.t -> token
|
||||
val mk_sym : lexeme -> Region.t -> token
|
||||
val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Projections *)
|
||||
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
end
|
||||
|
||||
(* The module type for lexers is [S]. It mainly exports the function
|
||||
[open_token_stream], which returns
|
||||
|
||||
* a function [read] that extracts tokens from a lexing buffer,
|
||||
* together with a lexing buffer [buffer] to read from,
|
||||
* a function [close] that closes that buffer,
|
||||
* a function [get_pos] that returns the current position, and
|
||||
* a function [get_last] that returns the region of the last
|
||||
recognised token.
|
||||
|
||||
Note that a module [Token] is exported too, because the signature
|
||||
of the exported functions depend on it.
|
||||
|
||||
The call [read ~log] evaluates in a lexer (a.k.a tokeniser or
|
||||
scanner) whose type is [Lexing.lexbuf -> token], and suitable for a
|
||||
parser generated by Menhir.
|
||||
|
||||
The argument labelled [log] is a logger. It may print a token and
|
||||
its left markup to a given channel, at the caller's discretion.
|
||||
*)
|
||||
|
||||
module type S =
|
||||
sig
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
type file_path = string
|
||||
type logger = Markup.t list -> token -> unit
|
||||
|
||||
val output_token :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
EvalOpt.command -> out_channel -> logger
|
||||
|
||||
type instance = {
|
||||
read : ?log:logger -> Lexing.lexbuf -> token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
val open_token_stream : file_path option -> instance
|
||||
|
||||
(* Error reporting *)
|
||||
|
||||
exception Error of Error.t Region.reg
|
||||
|
||||
val print_error : ?offsets:bool -> [`Byte | `Point] ->
|
||||
Error.t Region.reg -> unit
|
||||
|
||||
(* Standalone tracer *)
|
||||
|
||||
val trace :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
file_path option -> EvalOpt.command -> unit
|
||||
end
|
||||
|
||||
(* The functorised interface
|
||||
|
||||
Note that the module parameter [Token] is re-exported as a
|
||||
submodule in [S].
|
||||
*)
|
||||
|
||||
module Make (Token: TOKEN) : S with module Token = Token
|
872
src/ligo/ligo-parser/Lexer.mll
Normal file
872
src/ligo/ligo-parser/Lexer.mll
Normal file
@ -0,0 +1,872 @@
|
||||
(* Lexer specification for LIGO, to be processed by [ocamllex]. *)
|
||||
|
||||
{
|
||||
(* START HEADER *)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
(* STRING PROCESSING *)
|
||||
|
||||
(* The value of [mk_str len p] ("make string") is a string of length
|
||||
[len] containing the [len] characters in the list [p], in reverse
|
||||
order. For instance, [mk_str 3 ['c';'b';'a'] = "abc"]. *)
|
||||
|
||||
let mk_str (len: int) (p: char list) : string =
|
||||
let bytes = Bytes.make len ' ' in
|
||||
let rec fill i = function
|
||||
[] -> bytes
|
||||
| char::l -> Bytes.set bytes i char; fill (i-1) l
|
||||
in fill (len-1) p |> Bytes.to_string
|
||||
|
||||
(* The call [explode s a] is the list made by pushing the characters
|
||||
in the string [s] on top of [a], in reverse order. For example,
|
||||
[explode "ba" ['c';'d'] = ['a'; 'b'; 'c'; 'd']]. *)
|
||||
|
||||
let explode s acc =
|
||||
let rec push = function
|
||||
0 -> acc
|
||||
| i -> s.[i-1] :: push (i-1)
|
||||
in push (String.length s)
|
||||
|
||||
(* LEXER ENGINE *)
|
||||
|
||||
(* Resetting file name and line number in the lexing buffer
|
||||
|
||||
The call [reset ~file ~line buffer] modifies in-place the lexing
|
||||
buffer [buffer] so the lexing engine records that the file
|
||||
associated with [buffer] is named [file], and the current line is
|
||||
[line]. This function is useful when lexing a file that has been
|
||||
previously preprocessed by the C preprocessor, in which case the
|
||||
argument [file] is the name of the file that was preprocessed,
|
||||
_not_ the preprocessed file (of which the user is not normally
|
||||
aware). By default, the [line] argument is [1].
|
||||
*)
|
||||
|
||||
let reset_file ~file buffer =
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_fname = file}
|
||||
|
||||
let reset_line ~line buffer =
|
||||
assert (line >= 0);
|
||||
let open Lexing in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_lnum = line}
|
||||
|
||||
let reset_offset ~offset buffer =
|
||||
assert (offset >= 0);
|
||||
Printf.printf "[reset] offset=%i\n" offset;
|
||||
let open Lexing in
|
||||
let bol = buffer.lex_curr_p.pos_bol in
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum = bol (*+ offset*)}
|
||||
|
||||
let reset ?file ?line ?offset buffer =
|
||||
let () =
|
||||
match file with
|
||||
Some file -> reset_file ~file buffer
|
||||
| None -> () in
|
||||
let () =
|
||||
match line with
|
||||
Some line -> reset_line ~line buffer
|
||||
| None -> () in
|
||||
match offset with
|
||||
Some offset -> reset_offset ~offset buffer
|
||||
| None -> ()
|
||||
|
||||
(* Rolling back one lexeme _within the current semantic action_ *)
|
||||
|
||||
let rollback buffer =
|
||||
let open Lexing in
|
||||
let len = String.length (lexeme buffer) in
|
||||
let pos_cnum = buffer.lex_curr_p.pos_cnum - len in
|
||||
buffer.lex_curr_pos <- buffer.lex_curr_pos - len;
|
||||
buffer.lex_curr_p <- {buffer.lex_curr_p with pos_cnum}
|
||||
|
||||
(* ALIASES *)
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
(* The signature [TOKEN] exports an abstract type [token], so a lexer
|
||||
can be a functor over tokens. Consequently, generic functions to
|
||||
construct tokens are provided. Note predicate [is_eof], which
|
||||
caracterises the virtual token for end-of-file, because it requires
|
||||
special handling. *)
|
||||
|
||||
module type TOKEN =
|
||||
sig
|
||||
type token
|
||||
|
||||
(* Errors *)
|
||||
|
||||
type int_err = Non_canonical_zero
|
||||
type ident_err = Reserved_name
|
||||
|
||||
(* Injections *)
|
||||
|
||||
val mk_string : lexeme -> Region.t -> token
|
||||
val mk_bytes : lexeme -> Region.t -> token
|
||||
val mk_int : lexeme -> Region.t -> (token, int_err) result
|
||||
val mk_ident : lexeme -> Region.t -> (token, ident_err) result
|
||||
val mk_constr : lexeme -> Region.t -> token
|
||||
val mk_sym : lexeme -> Region.t -> token
|
||||
val eof : Region.t -> token
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
val is_string : token -> bool
|
||||
val is_bytes : token -> bool
|
||||
val is_int : token -> bool
|
||||
val is_ident : token -> bool
|
||||
val is_kwd : token -> bool
|
||||
val is_constr : token -> bool
|
||||
val is_sym : token -> bool
|
||||
val is_eof : token -> bool
|
||||
|
||||
(* Projections *)
|
||||
|
||||
val to_lexeme : token -> lexeme
|
||||
val to_string : token -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
val to_region : token -> Region.t
|
||||
end
|
||||
|
||||
(* The module type for lexers is [S]. *)
|
||||
|
||||
module type S = sig
|
||||
module Token : TOKEN
|
||||
type token = Token.token
|
||||
|
||||
type file_path = string
|
||||
type logger = Markup.t list -> token -> unit
|
||||
|
||||
val output_token :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
EvalOpt.command -> out_channel -> logger
|
||||
|
||||
type instance = {
|
||||
read : ?log:logger -> Lexing.lexbuf -> token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
val open_token_stream : file_path option -> instance
|
||||
|
||||
(* Error reporting *)
|
||||
|
||||
exception Error of Error.t Region.reg
|
||||
|
||||
val print_error :
|
||||
?offsets:bool -> [`Byte | `Point] -> Error.t Region.reg -> unit
|
||||
|
||||
(* Standalone tracer *)
|
||||
|
||||
val trace :
|
||||
?offsets:bool -> [`Byte | `Point] ->
|
||||
file_path option -> EvalOpt.command -> unit
|
||||
end
|
||||
|
||||
(* The functorised interface
|
||||
|
||||
Note that the module parameter [Token] is re-exported as a
|
||||
submodule in [S].
|
||||
*)
|
||||
|
||||
module Make (Token: TOKEN) : (S with module Token = Token) =
|
||||
struct
|
||||
module Token = Token
|
||||
type token = Token.token
|
||||
|
||||
type file_path = string
|
||||
|
||||
(* THREAD FOR STRUCTURED CONSTRUCTS (STRINGS, COMMENTS) *)
|
||||
|
||||
(* When scanning structured constructs, like strings and comments,
|
||||
we need to keep the region of the opening symbol (like double
|
||||
quote, "//" or "(*") in order to report any error more
|
||||
precisely. Since ocamllex is byte-oriented, we need to store
|
||||
the parsed bytes as characters in an accumulator [acc] and
|
||||
also its length [len], so, we are done, it is easy to build the
|
||||
string making up the structured construct with [mk_str] (see
|
||||
above).
|
||||
|
||||
The resulting data structure is called a _thread_.
|
||||
(Note for Emacs: "*)".)
|
||||
*)
|
||||
|
||||
type thread = {
|
||||
opening : Region.t;
|
||||
len : int;
|
||||
acc : char list
|
||||
}
|
||||
|
||||
let push_char char {opening; len; acc} = {opening; len=len+1; acc=char::acc}
|
||||
|
||||
let push_string str {opening; len; acc} =
|
||||
{opening;
|
||||
len = len + String.length str;
|
||||
acc = explode str acc}
|
||||
|
||||
(* STATE *)
|
||||
|
||||
(* Beyond tokens, the result of lexing is a state. The type
|
||||
[state] represents the logical state of the lexing engine, that
|
||||
is, a value which is threaded during scanning and which denotes
|
||||
useful, high-level information beyond what the type
|
||||
[Lexing.lexbuf] in the standard library already provides for
|
||||
all generic lexers.
|
||||
|
||||
Tokens are the smallest units used by the parser to build the
|
||||
abstract syntax tree. The state includes a queue of recognised
|
||||
tokens, with the markup at the left of its lexeme until either
|
||||
the start of the file or the end of the previously recognised
|
||||
token.
|
||||
|
||||
The markup from the last recognised token or, if the first
|
||||
token has not been recognised yet, from the beginning of the
|
||||
file is stored in the field [markup] of the state because it is
|
||||
a side-effect, with respect to the output token list, and we
|
||||
use a record with a single field [units] because that record
|
||||
may be easily extended during the future maintenance of this
|
||||
lexer.
|
||||
|
||||
The state also includes a field [pos] which holds the current
|
||||
position in the LIGO source file. The position is not always
|
||||
updated after a single character has been matched: that depends
|
||||
on the regular expression that matched the lexing buffer.
|
||||
|
||||
The fields [decoder] and [supply] offer the support needed
|
||||
for the lexing of UTF-8 encoded characters in comments (the
|
||||
only place where they are allowed in LIGO). The former is the
|
||||
decoder proper and the latter is the effectful function
|
||||
[supply] that takes a byte, a start index and a length and feed
|
||||
it to [decoder]. See the documentation of the third-party
|
||||
library Uutf.
|
||||
*)
|
||||
|
||||
type state = {
|
||||
units : (Markup.t list * token) FQueue.t;
|
||||
markup : Markup.t list;
|
||||
last : Region.t;
|
||||
pos : Pos.t;
|
||||
decoder : Uutf.decoder;
|
||||
supply : Bytes.t -> int -> int -> unit
|
||||
}
|
||||
|
||||
(* The call [enqueue (token, state)] updates functionally the
|
||||
state [state] by associating the token [token] with the stored
|
||||
markup and enqueuing the pair into the units queue. The field
|
||||
[markup] is then reset to the empty list. *)
|
||||
|
||||
let enqueue (token, state) = {
|
||||
state with
|
||||
units = FQueue.enq (state.markup, token) state.units;
|
||||
markup = []
|
||||
}
|
||||
|
||||
(* The call [sync state buffer] updates the current position in
|
||||
accordance with the contents of the lexing buffer, more
|
||||
precisely, depending on the length of the string which has just
|
||||
been recognised by the scanner: that length is used as a
|
||||
positive offset to the current column. *)
|
||||
|
||||
let sync state buffer =
|
||||
let lex = Lexing.lexeme buffer in
|
||||
let len = String.length lex in
|
||||
let start = state.pos in
|
||||
let stop = start#shift_bytes len in
|
||||
let state = {state with pos = stop}
|
||||
in Region.make ~start ~stop, lex, state
|
||||
|
||||
(* MARKUP *)
|
||||
|
||||
(* Committing markup to the current logical state *)
|
||||
|
||||
let push_newline state buffer =
|
||||
let value = Lexing.lexeme buffer
|
||||
and () = Lexing.new_line buffer
|
||||
and start = state.pos in
|
||||
let stop = start#new_line value in
|
||||
let state = {state with pos = stop}
|
||||
and region = Region.make ~start ~stop in
|
||||
let unit = Markup.Newline Region.{region; value} in
|
||||
let markup = unit :: state.markup
|
||||
in {state with markup}
|
||||
|
||||
let push_line (thread, state) =
|
||||
let start = thread.opening#start in
|
||||
let region = Region.make ~start ~stop:state.pos
|
||||
and value = mk_str thread.len thread.acc in
|
||||
let unit = Markup.LineCom Region.{region; value} in
|
||||
let markup = unit :: state.markup
|
||||
in {state with markup}
|
||||
|
||||
let push_block (thread, state) =
|
||||
let start = thread.opening#start in
|
||||
let region = Region.make ~start ~stop:state.pos
|
||||
and value = mk_str thread.len thread.acc in
|
||||
let unit = Markup.BlockCom Region.{region; value} in
|
||||
let markup = unit :: state.markup
|
||||
in {state with markup}
|
||||
|
||||
let push_space state buffer =
|
||||
let region, lex, state = sync state buffer in
|
||||
let value = String.length lex in
|
||||
let unit = Markup.Space Region.{region; value} in
|
||||
let markup = unit :: state.markup
|
||||
in {state with markup}
|
||||
|
||||
let push_tabs state buffer =
|
||||
let region, lex, state = sync state buffer in
|
||||
let value = String.length lex in
|
||||
let unit = Markup.Tabs Region.{region; value} in
|
||||
let markup = unit :: state.markup
|
||||
in {state with markup}
|
||||
|
||||
let push_bom state buffer =
|
||||
let region, value, state = sync state buffer in
|
||||
let unit = Markup.BOM Region.{region; value} in
|
||||
let markup = unit :: state.markup
|
||||
in {state with markup}
|
||||
|
||||
(* ERRORS *)
|
||||
|
||||
type Error.t += Invalid_utf8_sequence
|
||||
type Error.t += Unexpected_character of char
|
||||
type Error.t += Undefined_escape_sequence
|
||||
type Error.t += Missing_break
|
||||
type Error.t += Unterminated_string
|
||||
type Error.t += Unterminated_integer
|
||||
type Error.t += Odd_lengthed_bytes
|
||||
type Error.t += Unterminated_comment
|
||||
type Error.t += Orphan_minus
|
||||
type Error.t += Non_canonical_zero
|
||||
type Error.t += Negative_byte_sequence
|
||||
type Error.t += Broken_string
|
||||
type Error.t += Invalid_character_in_string
|
||||
type Error.t += Reserved_name
|
||||
|
||||
let error_to_string = function
|
||||
Invalid_utf8_sequence ->
|
||||
"Invalid UTF-8 sequence.\n"
|
||||
| Unexpected_character c ->
|
||||
sprintf "Unexpected character '%s'.\n" (Char.escaped c)
|
||||
| Undefined_escape_sequence ->
|
||||
"Undefined escape sequence.\n\
|
||||
Hint: Remove or replace the sequence.\n"
|
||||
| Missing_break ->
|
||||
"Missing break.\n\
|
||||
Hint: Insert some space.\n"
|
||||
| Unterminated_string ->
|
||||
"Unterminated string.\n\
|
||||
Hint: Close with double quotes.\n"
|
||||
| Unterminated_integer ->
|
||||
"Unterminated integer.\n\
|
||||
Hint: Remove the sign or proceed with a natural number.\n"
|
||||
| Odd_lengthed_bytes ->
|
||||
"The length of the byte sequence is an odd number.\n\
|
||||
Hint: Add or remove a digit.\n"
|
||||
| Unterminated_comment ->
|
||||
"Unterminated comment.\n\
|
||||
Hint: Close with \"*)\".\n"
|
||||
| Orphan_minus ->
|
||||
"Orphan minus sign.\n\
|
||||
Hint: Remove the trailing space.\n"
|
||||
| Non_canonical_zero ->
|
||||
"Non-canonical zero.\n\
|
||||
Hint: Use 0.\n"
|
||||
| Negative_byte_sequence ->
|
||||
"Negative byte sequence.\n\
|
||||
Hint: Remove the leading minus sign.\n"
|
||||
| Broken_string ->
|
||||
"The string starting here is interrupted by a line break.\n\
|
||||
Hint: Remove the break or close the string before.\n"
|
||||
| Invalid_character_in_string ->
|
||||
"Invalid character in string.\n\
|
||||
Hint: Remove or replace the character.\n"
|
||||
| Reserved_name ->
|
||||
"Reserved named.\n\
|
||||
Hint: Change the name.\n"
|
||||
| _ -> assert false
|
||||
|
||||
exception Error of Error.t Region.reg
|
||||
|
||||
let fail region value = raise (Error Region.{region; value})
|
||||
|
||||
(* TOKENS *)
|
||||
|
||||
(* Making tokens *)
|
||||
|
||||
let mk_string (thread, state) =
|
||||
let start = thread.opening#start in
|
||||
let stop = state.pos in
|
||||
let region = Region.make ~start ~stop in
|
||||
let lexeme = mk_str thread.len thread.acc in
|
||||
let token = Token.mk_string lexeme region
|
||||
in token, state
|
||||
|
||||
let mk_bytes bytes state buffer =
|
||||
let region, _, state = sync state buffer in
|
||||
let token = Token.mk_bytes bytes region
|
||||
in token, state
|
||||
|
||||
let mk_int state buffer =
|
||||
let region, lexeme, state = sync state buffer in
|
||||
match Token.mk_int lexeme region with
|
||||
Ok token -> token, state
|
||||
| Error Token.Non_canonical_zero ->
|
||||
fail region Non_canonical_zero
|
||||
|
||||
let mk_ident state buffer =
|
||||
let region, lexeme, state = sync state buffer in
|
||||
match Token.mk_ident lexeme region with
|
||||
Ok token -> token, state
|
||||
| Error Token.Reserved_name -> fail region Reserved_name
|
||||
|
||||
let mk_constr state buffer =
|
||||
let region, lexeme, state = sync state buffer
|
||||
in Token.mk_constr lexeme region, state
|
||||
|
||||
let mk_sym state buffer =
|
||||
let region, lexeme, state = sync state buffer
|
||||
in Token.mk_sym lexeme region, state
|
||||
|
||||
let mk_eof state buffer =
|
||||
let region, _, state = sync state buffer
|
||||
in Token.eof region, state
|
||||
|
||||
(* END HEADER *)
|
||||
}
|
||||
|
||||
(* START LEXER DEFINITION *)
|
||||
|
||||
(* Named regular expressions *)
|
||||
|
||||
let utf8_bom = "\xEF\xBB\xBF" (* Byte Order Mark for UTF-8 *)
|
||||
let nl = ['\n' '\r'] | "\r\n"
|
||||
let blank = ' ' | '\t'
|
||||
let digit = ['0'-'9']
|
||||
let natural = digit | digit (digit | '_')* digit
|
||||
let integer = '-'? natural
|
||||
let small = ['a'-'z']
|
||||
let capital = ['A'-'Z']
|
||||
let letter = small | capital
|
||||
let ident = small (letter | '_' | digit)*
|
||||
let constr = capital (letter | '_' | digit)*
|
||||
let hexa_digit = digit | ['A'-'F']
|
||||
let byte = hexa_digit hexa_digit
|
||||
let byte_seq = byte | byte (byte | '_')* byte
|
||||
let bytes = "0x" (byte_seq? as seq)
|
||||
let esc = "\\n" | "\\\"" | "\\\\" | "\\b"
|
||||
| "\\r" | "\\t" | "\\x" byte
|
||||
let symbol = ';' | ',' | '(' | ')'| '[' | ']' | '{' | '}'
|
||||
| '#' | '|' | "->" | ":=" | '=' | ':'
|
||||
| '<' | "<=" | '>' | ">=" | "=/="
|
||||
| '+' | '-' | '*' | '.' | '_' | '^'
|
||||
let string = [^'"' '\\' '\n']* (* For strings of #include *)
|
||||
|
||||
(* RULES *)
|
||||
|
||||
(* Except for the first rule [init], all rules bear a name starting
|
||||
with "scan".
|
||||
|
||||
All have a parameter [state] that they thread through their
|
||||
recursive calls. The rules for the structured constructs (strings
|
||||
and comments) have an extra parameter of type [thread] (see above).
|
||||
*)
|
||||
|
||||
rule init state = parse
|
||||
utf8_bom { scan (push_bom state lexbuf) lexbuf }
|
||||
| _ { rollback lexbuf; scan state lexbuf }
|
||||
|
||||
and scan state = parse
|
||||
nl { scan (push_newline state lexbuf) lexbuf }
|
||||
| ' '+ { scan (push_space state lexbuf) lexbuf }
|
||||
| '\t'+ { scan (push_tabs state lexbuf) lexbuf }
|
||||
|
||||
| ident { mk_ident state lexbuf |> enqueue }
|
||||
| constr { mk_constr state lexbuf |> enqueue }
|
||||
| bytes { (mk_bytes seq) state lexbuf |> enqueue }
|
||||
| integer { mk_int state lexbuf |> enqueue }
|
||||
| symbol { mk_sym state lexbuf |> enqueue }
|
||||
| eof { mk_eof state lexbuf |> enqueue }
|
||||
|
||||
| '"' { let opening, _, state = sync state lexbuf in
|
||||
let thread = {opening; len=1; acc=['"']} in
|
||||
scan_string thread state lexbuf |> mk_string |> enqueue }
|
||||
|
||||
| "(*" { let opening, _, state = sync state lexbuf in
|
||||
let thread = {opening; len=2; acc=['*';'(']} in
|
||||
let state = scan_block thread state lexbuf |> push_block
|
||||
in scan state lexbuf }
|
||||
|
||||
| "//" { let opening, _, state = sync state lexbuf in
|
||||
let thread = {opening; len=2; acc=['/';'/']} in
|
||||
let state = scan_line thread state lexbuf |> push_line
|
||||
in scan state lexbuf }
|
||||
|
||||
(* Management of #include CPP directives
|
||||
|
||||
An input LIGO program may contain GNU CPP (C preprocessor)
|
||||
directives, and the entry modules (named *Main.ml) run CPP on them
|
||||
in traditional mode:
|
||||
|
||||
https://gcc.gnu.org/onlinedocs/cpp/Traditional-Mode.html
|
||||
|
||||
The main interest in using CPP is that it can stand for a poor
|
||||
man's (flat) module system for LIGO thanks to #include
|
||||
directives, and the traditional mode leaves the markup mostly
|
||||
undisturbed.
|
||||
|
||||
Some of the #line resulting from processing #include directives
|
||||
deal with system file headers and thus have to be ignored for our
|
||||
purpose. Moreover, these #line directives may also carry some
|
||||
additional flags:
|
||||
|
||||
https://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
|
||||
|
||||
of which 1 and 2 indicate, respectively, the start of a new file
|
||||
and the return from a file (after its inclusion has been
|
||||
processed).
|
||||
*)
|
||||
|
||||
| '#' blank* ("line" blank+)? (integer as line) blank+
|
||||
'"' (string as file) '"' {
|
||||
let _, _, state = sync state lexbuf in
|
||||
let flags, state = scan_flags state [] lexbuf in
|
||||
let () = ignore flags in
|
||||
let line = int_of_string line
|
||||
and file = Filename.basename file in
|
||||
let pos = state.pos#set ~file ~line ~offset:0 in
|
||||
let state = {state with pos} in
|
||||
scan state lexbuf
|
||||
}
|
||||
|
||||
(* Some special errors
|
||||
|
||||
Some special errors are recognised in the semantic actions of the
|
||||
following regular expressions. The first error is a minus sign
|
||||
separated from the integer it modifies by some markup (space or
|
||||
tabs). The second is a minus sign immediately followed by
|
||||
anything else than a natural number (matched above) or markup and
|
||||
a number (previous error). The third is the strange occurrence of
|
||||
an attempt at defining a negative byte sequence. Finally, the
|
||||
catch-all rule reports unexpected characters in the buffer (and
|
||||
is not so special, after all).
|
||||
*)
|
||||
|
||||
| '-' { let region, _, state = sync state lexbuf in
|
||||
let state = scan state lexbuf in
|
||||
let open Markup in
|
||||
match FQueue.peek state.units with
|
||||
None -> assert false
|
||||
| Some (_, ((Space _ | Tabs _)::_, token))
|
||||
when Token.is_int token ->
|
||||
fail region Orphan_minus
|
||||
| _ -> fail region Unterminated_integer }
|
||||
|
||||
| '-' "0x" byte_seq?
|
||||
{ let region, _, _ = sync state lexbuf
|
||||
in fail region Negative_byte_sequence }
|
||||
|
||||
| _ as c { let region, _, _ = sync state lexbuf
|
||||
in fail region (Unexpected_character c) }
|
||||
|
||||
(* Scanning CPP #include flags *)
|
||||
|
||||
and scan_flags state acc = parse
|
||||
blank+ { let _, _, state = sync state lexbuf
|
||||
in scan_flags state acc lexbuf }
|
||||
| integer as code { let _, _, state = sync state lexbuf in
|
||||
let acc = int_of_string code :: acc
|
||||
in scan_flags state acc lexbuf }
|
||||
| nl { List.rev acc, push_newline state lexbuf }
|
||||
| eof { let _, _, state = sync state lexbuf
|
||||
in List.rev acc, state (* TODO *) }
|
||||
|
||||
(* Finishing a string *)
|
||||
|
||||
and scan_string thread state = parse
|
||||
nl { fail thread.opening Broken_string }
|
||||
| eof { fail thread.opening Unterminated_string }
|
||||
| ['\t' '\r' '\b']
|
||||
{ let region, _, _ = sync state lexbuf
|
||||
in fail region Invalid_character_in_string }
|
||||
| '"' { let _, _, state = sync state lexbuf
|
||||
in push_char '"' thread, state }
|
||||
| esc { let _, lexeme, state = sync state lexbuf
|
||||
in scan_string (push_string lexeme thread) state lexbuf }
|
||||
| '\\' _ { let region, _, _ = sync state lexbuf
|
||||
in fail region Undefined_escape_sequence }
|
||||
| _ as c { let _, _, state = sync state lexbuf in
|
||||
scan_string (push_char c thread) state lexbuf }
|
||||
|
||||
(* Finishing a block comment
|
||||
|
||||
(Note for Emacs: ("(*")
|
||||
The lexing of block comments must take care of embedded block
|
||||
comments that may occur within, as well as strings, so no substring
|
||||
"*)" may inadvertently close the block. This is the purpose
|
||||
of the first case of the scanner [scan_block].
|
||||
*)
|
||||
|
||||
and scan_block thread state = parse
|
||||
'"' | "(*" { let opening = thread.opening in
|
||||
let opening', lexeme, state = sync state lexbuf in
|
||||
let thread = push_string lexeme thread in
|
||||
let thread = {thread with opening=opening'} in
|
||||
let next = if lexeme = "\"" then scan_string
|
||||
else scan_block in
|
||||
let thread, state = next thread state lexbuf in
|
||||
let thread = {thread with opening}
|
||||
in scan_block thread state lexbuf }
|
||||
| "*)" { let _, lexeme, state = sync state lexbuf
|
||||
in push_string lexeme thread, state }
|
||||
| nl as nl { let () = Lexing.new_line lexbuf
|
||||
and state = {state with pos = state.pos#new_line nl}
|
||||
and thread = push_string nl thread
|
||||
in scan_block thread state lexbuf }
|
||||
| eof { fail thread.opening Unterminated_comment }
|
||||
| _ { let () = rollback lexbuf in
|
||||
let len = thread.len in
|
||||
let thread,
|
||||
status = scan_utf8 thread state lexbuf in
|
||||
let delta = thread.len - len in
|
||||
let pos = state.pos#shift_one_uchar delta in
|
||||
match status with
|
||||
None -> scan_block thread {state with pos} lexbuf
|
||||
| Some error ->
|
||||
let region = Region.make ~start:state.pos ~stop:pos
|
||||
in fail region error }
|
||||
|
||||
(* Finishing a line comment *)
|
||||
|
||||
and scan_line thread state = parse
|
||||
nl as nl { let () = Lexing.new_line lexbuf
|
||||
and thread = push_string nl thread
|
||||
and state = {state with pos = state.pos#new_line nl}
|
||||
in thread, state }
|
||||
| eof { fail thread.opening Unterminated_comment }
|
||||
| _ { let () = rollback lexbuf in
|
||||
let len = thread.len in
|
||||
let thread,
|
||||
status = scan_utf8 thread state lexbuf in
|
||||
let delta = thread.len - len in
|
||||
let pos = state.pos#shift_one_uchar delta in
|
||||
match status with
|
||||
None -> scan_line thread {state with pos} lexbuf
|
||||
| Some error ->
|
||||
let region = Region.make ~start:state.pos ~stop:pos
|
||||
in fail region error }
|
||||
|
||||
and scan_utf8 thread state = parse
|
||||
eof { fail thread.opening Unterminated_comment }
|
||||
| _ as c { let thread = push_char c thread in
|
||||
let lexeme = Lexing.lexeme lexbuf in
|
||||
let () = state.supply (Bytes.of_string lexeme) 0 1 in
|
||||
match Uutf.decode state.decoder with
|
||||
`Uchar _ -> thread, None
|
||||
| `Malformed _ -> thread, Some Invalid_utf8_sequence
|
||||
| `Await -> scan_utf8 thread state lexbuf
|
||||
| `End -> assert false }
|
||||
|
||||
(* END LEXER DEFINITION *)
|
||||
|
||||
{
|
||||
(* START TRAILER *)
|
||||
|
||||
(* Scanning the lexing buffer for tokens (and markup, as a
|
||||
side-effect).
|
||||
|
||||
Because we want the lexer to have access to the right lexical
|
||||
context of a recognised lexeme (to enforce stylistic constraints or
|
||||
report special error patterns), we need to keep a hidden reference
|
||||
to a queue of recognised lexical units (that is, tokens and markup)
|
||||
that acts as a mutable state between the calls to
|
||||
[read_token]. When [read_token] is called, that queue is consulted
|
||||
first and, if it contains at least one token, that token is
|
||||
returned; otherwise, the lexing buffer is scanned for at least one
|
||||
more new token. That is the general principle: we put a high-level
|
||||
buffer (our queue) on top of the low-level lexing buffer.
|
||||
|
||||
One tricky and important detail is that we must make any parser
|
||||
generated by Menhir (and calling [read_token]) believe that the
|
||||
last region of the input source that was matched indeed corresponds
|
||||
to the returned token, despite that many tokens and markup may have
|
||||
been matched since it was actually read from the input. In other
|
||||
words, the parser requests a token that is taken from the
|
||||
high-level buffer, but the parser requests the source regions from
|
||||
the _low-level_ lexing buffer, and they may disagree if more than
|
||||
one token has actually been recognised.
|
||||
|
||||
Consequently, in order to maintain a consistent view for the
|
||||
parser, we have to patch some fields of the lexing buffer, namely
|
||||
[lex_start_p] and [lex_curr_p], as these fields are read by parsers
|
||||
generated by Menhir when querying source positions (regions). This
|
||||
is the purpose of the function [patch_buffer]. After reading one
|
||||
ore more tokens and markup by the scanning rule [scan], we have to
|
||||
save in the hidden reference [buf_reg] the region of the source
|
||||
that was matched by [scan]. This atomic sequence of patching,
|
||||
scanning and saving is implemented by the _function_ [scan]
|
||||
(beware: it shadows the scanning rule [scan]). The function
|
||||
[patch_buffer] is, of course, also called just before returning the
|
||||
token, so the parser has a view of the lexing buffer consistent
|
||||
with the token.
|
||||
|
||||
Note that an additional reference [first_call] is needed to
|
||||
distinguish the first call to the function [scan], as the first
|
||||
scanning rule is actually [init] (which can handle the BOM), not
|
||||
[scan].
|
||||
*)
|
||||
|
||||
type logger = Markup.t list -> token -> unit
|
||||
|
||||
type instance = {
|
||||
read : ?log:logger -> Lexing.lexbuf -> token;
|
||||
buffer : Lexing.lexbuf;
|
||||
get_pos : unit -> Pos.t;
|
||||
get_last : unit -> Region.t;
|
||||
close : unit -> unit
|
||||
}
|
||||
|
||||
let file_path = match EvalOpt.input with
|
||||
None | Some "-" -> ""
|
||||
| Some file_path -> file_path
|
||||
let pos = Pos.min#set_file file_path
|
||||
let buf_reg = ref (pos#byte, pos#byte)
|
||||
and first_call = ref true
|
||||
and decoder = Uutf.decoder ~encoding:`UTF_8 `Manual
|
||||
let supply = Uutf.Manual.src decoder
|
||||
let state = ref {units = FQueue.empty;
|
||||
last = Region.ghost;
|
||||
pos;
|
||||
markup = [];
|
||||
decoder;
|
||||
supply}
|
||||
|
||||
let get_pos () = !state.pos
|
||||
|
||||
let get_last () = !state.last
|
||||
|
||||
let patch_buffer (start, stop) buffer =
|
||||
let open Lexing in
|
||||
let file_path = buffer.lex_curr_p.pos_fname in
|
||||
buffer.lex_start_p <- {start with pos_fname = file_path};
|
||||
buffer.lex_curr_p <- {stop with pos_fname = file_path}
|
||||
|
||||
and save_region buffer =
|
||||
buf_reg := Lexing.(buffer.lex_start_p, buffer.lex_curr_p)
|
||||
|
||||
let scan buffer =
|
||||
patch_buffer !buf_reg buffer;
|
||||
(if !first_call
|
||||
then (state := init !state buffer; first_call := false)
|
||||
else state := scan !state buffer);
|
||||
save_region buffer
|
||||
|
||||
let next_token buffer =
|
||||
scan buffer;
|
||||
match FQueue.peek !state.units with
|
||||
None -> assert false
|
||||
| Some (units, ext_token) ->
|
||||
state := {!state with units}; Some ext_token
|
||||
|
||||
let check_right_context token buffer =
|
||||
let open Token in
|
||||
if is_int token || is_bytes token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
let pos = (Token.to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos in
|
||||
if is_bytes token && is_int next then
|
||||
fail region Odd_lengthed_bytes
|
||||
else
|
||||
if is_ident next || is_string next
|
||||
|| is_bytes next || is_int next then
|
||||
fail region Missing_break
|
||||
| _ -> ()
|
||||
else
|
||||
if Token.is_ident token || Token.is_string token then
|
||||
match next_token buffer with
|
||||
Some ([], next) ->
|
||||
if Token.is_ident next || Token.is_string next
|
||||
|| Token.is_bytes next || Token.is_int next
|
||||
then
|
||||
let pos = (Token.to_region token)#stop in
|
||||
let region = Region.make ~start:pos ~stop:pos
|
||||
in fail region Missing_break
|
||||
| _ -> ()
|
||||
|
||||
let rec read_token ?(log=fun _ _ -> ()) buffer =
|
||||
match FQueue.deq !state.units with
|
||||
None ->
|
||||
scan buffer;
|
||||
read_token ~log buffer
|
||||
| Some (units, (left_mark, token)) ->
|
||||
log left_mark token;
|
||||
state := {!state with units; last = Token.to_region token};
|
||||
check_right_context token buffer;
|
||||
patch_buffer (Token.to_region token)#byte_pos buffer;
|
||||
token
|
||||
|
||||
let open_token_stream file_path_opt =
|
||||
let cin = match file_path_opt with
|
||||
None | Some "-" -> stdin
|
||||
| Some file_path -> open_in file_path in
|
||||
let buffer = Lexing.from_channel cin in
|
||||
let () = match file_path_opt with
|
||||
None | Some "-" -> ()
|
||||
| Some file_path -> reset ~file:file_path buffer
|
||||
and close () = close_in cin in
|
||||
{read = read_token; buffer; get_pos; get_last; close}
|
||||
|
||||
(* Standalone lexer for debugging purposes *)
|
||||
|
||||
(* Pretty-printing in a string the lexemes making up the markup
|
||||
between two tokens, concatenated with the last lexeme itself. *)
|
||||
|
||||
let output_token ?(offsets=true) mode command
|
||||
channel left_mark token : unit =
|
||||
let output str = Printf.fprintf channel "%s%!" str in
|
||||
let output_nl str = output (str ^ "\n") in
|
||||
match command with
|
||||
EvalOpt.Quiet -> ()
|
||||
| EvalOpt.Tokens -> Token.to_string token ~offsets mode |> output_nl
|
||||
| EvalOpt.Copy ->
|
||||
let lexeme = Token.to_lexeme token
|
||||
and apply acc markup = Markup.to_lexeme markup :: acc
|
||||
in List.fold_left apply [lexeme] left_mark
|
||||
|> String.concat "" |> output
|
||||
| EvalOpt.Units ->
|
||||
let abs_token = Token.to_string token ~offsets mode
|
||||
and apply acc markup =
|
||||
Markup.to_string markup ~offsets mode :: acc
|
||||
in List.fold_left apply [abs_token] left_mark
|
||||
|> String.concat "\n" |> output_nl
|
||||
|
||||
let print_error ?(offsets=true) mode Region.{region; value} =
|
||||
let msg = error_to_string value in
|
||||
let file = match EvalOpt.input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let reg = region#to_string ~file ~offsets mode in
|
||||
Utils.highlight (sprintf "Lexical error %s:\n%s%!" reg msg)
|
||||
|
||||
let trace ?(offsets=true) mode file_path_opt command : unit =
|
||||
try
|
||||
let {read; buffer; close; _} = open_token_stream file_path_opt
|
||||
and cout = stdout in
|
||||
let log = output_token ~offsets mode command cout
|
||||
and close_all () = close (); close_out cout in
|
||||
let rec iter () =
|
||||
match read ~log buffer with
|
||||
token ->
|
||||
if Token.is_eof token then close_all ()
|
||||
else iter ()
|
||||
| exception Error e -> print_error ~offsets mode e; close_all ()
|
||||
in iter ()
|
||||
with Sys_error msg -> Utils.highlight (sprintf "%s\n" msg)
|
||||
|
||||
end (* of functor [Make] in HEADER *)
|
||||
(* END TRAILER *)
|
||||
}
|
55
src/ligo/ligo-parser/LexerMain.ml
Normal file
55
src/ligo/ligo-parser/LexerMain.ml
Normal file
@ -0,0 +1,55 @@
|
||||
(* Driver for the lexer of LIGO *)
|
||||
|
||||
open! EvalOpt (* Reads the command-line options: Effectful! *)
|
||||
|
||||
(* Error printing and exception tracing *)
|
||||
|
||||
let () = Printexc.record_backtrace true
|
||||
|
||||
let external_ text =
|
||||
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
||||
|
||||
(* Path for CPP inclusions (#include) *)
|
||||
|
||||
let lib_path =
|
||||
match EvalOpt.libs with
|
||||
[] -> ""
|
||||
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
|
||||
in List.fold_right mk_I libs ""
|
||||
|
||||
(* Preprocessing the input source and opening the input channels *)
|
||||
|
||||
let prefix =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" -> "temp"
|
||||
| Some file -> Filename.(file |> basename |> remove_extension)
|
||||
|
||||
let suffix = ".pp.li"
|
||||
|
||||
let pp_input =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then prefix ^ suffix
|
||||
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
|
||||
in close_out pp_out; pp_input
|
||||
|
||||
let cpp_cmd =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" ->
|
||||
Printf.sprintf "cpp -traditional-cpp%s - -o %s"
|
||||
lib_path pp_input
|
||||
| Some file ->
|
||||
Printf.sprintf "cpp -traditional-cpp%s %s -o %s"
|
||||
lib_path file pp_input
|
||||
|
||||
let () =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then Printf.eprintf "%s\n%!" cpp_cmd;
|
||||
if Sys.command cpp_cmd <> 0 then
|
||||
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
|
||||
|
||||
(* Running the lexer on the input file *)
|
||||
|
||||
module Lexer = Lexer.Make (LexToken)
|
||||
|
||||
let () = Lexer.trace ~offsets:EvalOpt.offsets
|
||||
EvalOpt.mode (Some pp_input) EvalOpt.cmd
|
42
src/ligo/ligo-parser/Markup.ml
Normal file
42
src/ligo/ligo-parser/Markup.ml
Normal file
@ -0,0 +1,42 @@
|
||||
type lexeme = string
|
||||
|
||||
type t =
|
||||
Tabs of int Region.reg
|
||||
| Space of int Region.reg
|
||||
| Newline of lexeme Region.reg
|
||||
| LineCom of lexeme Region.reg
|
||||
| BlockCom of lexeme Region.reg
|
||||
| BOM of lexeme Region.reg
|
||||
|
||||
type markup = t
|
||||
|
||||
(* Pretty-printing *)
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
let to_lexeme = function
|
||||
Tabs Region.{value;_} -> String.make value '\t'
|
||||
| Space Region.{value;_} -> String.make value ' '
|
||||
| Newline Region.{value;_}
|
||||
| LineCom Region.{value;_}
|
||||
| BlockCom Region.{value;_}
|
||||
| BOM Region.{value;_} -> value
|
||||
|
||||
let to_string markup ?(offsets=true) mode =
|
||||
let region, val_str =
|
||||
match markup with
|
||||
Tabs Region.{value; region} ->
|
||||
let lex = String.make value '\t' |> String.escaped
|
||||
in region, sprintf "Tabs \"%s\"" lex
|
||||
| Space Region.{value; region} ->
|
||||
region, sprintf "Space \"%s\"" (String.make value ' ')
|
||||
| Newline Region.{value; region} ->
|
||||
region, sprintf "Newline \"%s\"" (String.escaped value)
|
||||
| LineCom Region.{value; region} ->
|
||||
region, sprintf "LineCom \"%s\"" (String.escaped value)
|
||||
| BlockCom Region.{value; region} ->
|
||||
region, sprintf "BlockCom \"%s\"" (String.escaped value)
|
||||
| BOM Region.{value; region} ->
|
||||
region, sprintf "BOM \"%s\"" (String.escaped value) in
|
||||
let reg_str = region#compact ~offsets mode
|
||||
in sprintf "%s: %s" reg_str val_str
|
32
src/ligo/ligo-parser/Markup.mli
Normal file
32
src/ligo/ligo-parser/Markup.mli
Normal file
@ -0,0 +1,32 @@
|
||||
(* This module defines the sorts of markup recognised by the LIGO
|
||||
lexer *)
|
||||
|
||||
(* A lexeme is piece of concrete syntax belonging to a token. In
|
||||
algebraic terms, a token is also a piece of abstract lexical
|
||||
syntax. Lexical units emcompass both markup and lexemes. *)
|
||||
|
||||
type lexeme = string
|
||||
|
||||
type t =
|
||||
Tabs of int Region.reg (* Tabulations *)
|
||||
| Space of int Region.reg (* Space *)
|
||||
| Newline of lexeme Region.reg (* "\n" or "\c\r" escape characters *)
|
||||
| LineCom of lexeme Region.reg (* Line comments *)
|
||||
| BlockCom of lexeme Region.reg (* Block comments *)
|
||||
| BOM of lexeme Region.reg (* Byte-Order Mark for UTF-8 (optional) *)
|
||||
|
||||
type markup = t
|
||||
|
||||
(* Pretty-printing of markup
|
||||
|
||||
The difference between [to_lexeme] and [to_string] is that the
|
||||
former builds the corresponding concrete syntax (the lexeme),
|
||||
whilst the latter makes up a textual representation of the abstract
|
||||
syntax (the OCaml data constructors).
|
||||
|
||||
The result of [to_string] is escaped to avoid capture by the
|
||||
terminal.
|
||||
*)
|
||||
|
||||
val to_lexeme : t -> lexeme
|
||||
val to_string : t -> ?offsets:bool -> [`Byte | `Point] -> string
|
96
src/ligo/ligo-parser/ParToken.mly
Normal file
96
src/ligo/ligo-parser/ParToken.mly
Normal file
@ -0,0 +1,96 @@
|
||||
%{
|
||||
%}
|
||||
|
||||
(* Tokens (mirroring thise defined in module LexToken) *)
|
||||
|
||||
(* Literals *)
|
||||
|
||||
%token <LexToken.lexeme Region.reg> String
|
||||
%token <(LexToken.lexeme * Hex.t) Region.reg> Bytes
|
||||
%token <(LexToken.lexeme * Z.t) Region.reg> Int
|
||||
%token <LexToken.lexeme Region.reg> Ident
|
||||
%token <LexToken.lexeme Region.reg> Constr
|
||||
|
||||
(* Symbols *)
|
||||
|
||||
%token <Region.t> SEMI (* ";" *)
|
||||
%token <Region.t> COMMA (* "," *)
|
||||
%token <Region.t> LPAR (* "(" *)
|
||||
%token <Region.t> RPAR (* ")" *)
|
||||
%token <Region.t> LBRACE (* "{" *)
|
||||
%token <Region.t> RBRACE (* "}" *)
|
||||
%token <Region.t> LBRACKET (* "[" *)
|
||||
%token <Region.t> RBRACKET (* "]" *)
|
||||
%token <Region.t> CONS (* "#" *)
|
||||
%token <Region.t> VBAR (* "|" *)
|
||||
%token <Region.t> ARROW (* "->" *)
|
||||
%token <Region.t> ASS (* ":=" *)
|
||||
%token <Region.t> EQUAL (* "=" *)
|
||||
%token <Region.t> COLON (* ":" *)
|
||||
%token <Region.t> LT (* "<" *)
|
||||
%token <Region.t> LEQ (* "<=" *)
|
||||
%token <Region.t> GT (* ">" *)
|
||||
%token <Region.t> GEQ (* ">=" *)
|
||||
%token <Region.t> NEQ (* "=/=" *)
|
||||
%token <Region.t> PLUS (* "+" *)
|
||||
%token <Region.t> MINUS (* "-" *)
|
||||
%token <Region.t> SLASH (* "/" *)
|
||||
%token <Region.t> TIMES (* "*" *)
|
||||
%token <Region.t> DOT (* "." *)
|
||||
%token <Region.t> WILD (* "_" *)
|
||||
%token <Region.t> CAT (* "^" *)
|
||||
|
||||
(* Keywords *)
|
||||
|
||||
%token <Region.t> And (* "and" *)
|
||||
%token <Region.t> Begin (* "begin" *)
|
||||
%token <Region.t> Block (* "block" *)
|
||||
%token <Region.t> Case (* "case" *)
|
||||
%token <Region.t> Const (* "const" *)
|
||||
%token <Region.t> Contains (* "contains" *)
|
||||
%token <Region.t> Down (* "down" *)
|
||||
%token <Region.t> Else (* "else" *)
|
||||
%token <Region.t> End (* "end" *)
|
||||
%token <Region.t> Entrypoint (* "entrypoint" *)
|
||||
%token <Region.t> Fail (* "fail" *)
|
||||
%token <Region.t> For (* "for" *)
|
||||
%token <Region.t> Function (* "function" *)
|
||||
%token <Region.t> From (* "from" *)
|
||||
%token <Region.t> If (* "if" *)
|
||||
%token <Region.t> In (* "in" *)
|
||||
%token <Region.t> Is (* "is" *)
|
||||
%token <Region.t> List (* "list" *)
|
||||
%token <Region.t> Map (* "map" *)
|
||||
%token <Region.t> Mod (* "mod" *)
|
||||
%token <Region.t> Nil (* "nil" *)
|
||||
%token <Region.t> Not (* "not" *)
|
||||
%token <Region.t> Of (* "of" *)
|
||||
%token <Region.t> Or (* "or" *)
|
||||
%token <Region.t> Patch (* "patch" *)
|
||||
%token <Region.t> Procedure (* "procedure" *)
|
||||
%token <Region.t> Record (* "record" *)
|
||||
%token <Region.t> Remove (* "remove" *)
|
||||
%token <Region.t> Set (* "set" *)
|
||||
%token <Region.t> Skip (* "skip" *)
|
||||
%token <Region.t> Step (* "step" *)
|
||||
%token <Region.t> Storage (* "storage" *)
|
||||
%token <Region.t> Then (* "then" *)
|
||||
%token <Region.t> To (* "to" *)
|
||||
%token <Region.t> Type (* "type" *)
|
||||
%token <Region.t> Var (* "var" *)
|
||||
%token <Region.t> While (* "while" *)
|
||||
%token <Region.t> With (* "with" *)
|
||||
|
||||
(* Data constructors *)
|
||||
|
||||
%token <Region.t> C_False (* "False" *)
|
||||
%token <Region.t> C_None (* "None" *)
|
||||
%token <Region.t> C_Some (* "Some" *)
|
||||
%token <Region.t> C_True (* "True" *)
|
||||
%token <Region.t> C_Unit (* "Unit" *)
|
||||
|
||||
(* Virtual tokens *)
|
||||
|
||||
%token <Region.t> EOF
|
||||
|
||||
%%
|
1027
src/ligo/ligo-parser/Parser.mly
Normal file
1027
src/ligo/ligo-parser/Parser.mly
Normal file
File diff suppressed because it is too large
Load Diff
118
src/ligo/ligo-parser/ParserMain.ml
Normal file
118
src/ligo/ligo-parser/ParserMain.ml
Normal file
@ -0,0 +1,118 @@
|
||||
(* Driver for the parser of LIGO *)
|
||||
|
||||
open! EvalOpt (* Reads the command-line options: Effectful! *)
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
(* Error printing and exception tracing *)
|
||||
|
||||
let () = Printexc.record_backtrace true
|
||||
|
||||
let external_ text =
|
||||
Utils.highlight (Printf.sprintf "External error: %s" text); exit 1;;
|
||||
|
||||
type Error.t += ParseError
|
||||
|
||||
let error_to_string = function
|
||||
ParseError -> "Syntax error.\n"
|
||||
| _ -> assert false
|
||||
|
||||
let print_error ?(offsets=true) mode Region.{region; value} =
|
||||
let msg = error_to_string value in
|
||||
let file = match EvalOpt.input with
|
||||
None | Some "-" -> false
|
||||
| Some _ -> true in
|
||||
let reg = region#to_string ~file ~offsets mode in
|
||||
Utils.highlight (sprintf "Parse error %s:\n%s%!" reg msg)
|
||||
|
||||
(* Path for CPP inclusions (#include) *)
|
||||
|
||||
let lib_path =
|
||||
match EvalOpt.libs with
|
||||
[] -> ""
|
||||
| libs -> let mk_I dir path = Printf.sprintf " -I %s%s" dir path
|
||||
in List.fold_right mk_I libs ""
|
||||
|
||||
(* Preprocessing the input source and opening the input channels *)
|
||||
|
||||
let prefix =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" -> "temp"
|
||||
| Some file -> Filename.(file |> basename |> remove_extension)
|
||||
|
||||
let suffix = ".pp.li"
|
||||
|
||||
let pp_input =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then prefix ^ suffix
|
||||
else let pp_input, pp_out = Filename.open_temp_file prefix suffix
|
||||
in close_out pp_out; pp_input
|
||||
|
||||
let cpp_cmd =
|
||||
match EvalOpt.input with
|
||||
None | Some "-" ->
|
||||
Printf.sprintf "cpp -traditional-cpp%s - -o %s"
|
||||
lib_path pp_input
|
||||
| Some file ->
|
||||
Printf.sprintf "cpp -traditional-cpp%s %s -o %s"
|
||||
lib_path file pp_input
|
||||
|
||||
let () =
|
||||
if Utils.String.Set.mem "cpp" EvalOpt.verbose
|
||||
then Printf.eprintf "%s\n%!" cpp_cmd;
|
||||
if Sys.command cpp_cmd <> 0 then
|
||||
external_ (Printf.sprintf "the command \"%s\" failed." cpp_cmd)
|
||||
|
||||
(* Instanciating the lexer *)
|
||||
|
||||
module Lexer = Lexer.Make (LexToken)
|
||||
|
||||
let Lexer.{read; buffer; get_pos; get_last; close} =
|
||||
Lexer.open_token_stream (Some pp_input)
|
||||
|
||||
and cout = stdout
|
||||
|
||||
let log = Lexer.output_token ~offsets:EvalOpt.offsets
|
||||
EvalOpt.mode EvalOpt.cmd cout
|
||||
|
||||
and close_all () = close (); close_out cout
|
||||
|
||||
(* Tokeniser *)
|
||||
|
||||
let tokeniser = read ~log
|
||||
|
||||
(* Main *)
|
||||
|
||||
let () =
|
||||
try
|
||||
let ast = Parser.contract tokeniser buffer in
|
||||
if Utils.String.Set.mem "ast" EvalOpt.verbose
|
||||
then AST.print_tokens ast
|
||||
with
|
||||
Lexer.Error err ->
|
||||
close_all ();
|
||||
Lexer.print_error ~offsets EvalOpt.mode err
|
||||
| Parser.Error ->
|
||||
let region = get_last () in
|
||||
let error = Region.{region; value=ParseError} in
|
||||
let () = close_all () in
|
||||
print_error ~offsets EvalOpt.mode error
|
||||
| Sys_error msg -> Utils.highlight msg
|
||||
|
||||
(*
|
||||
(* Temporary: force dune to build AST2.ml *)
|
||||
let () =
|
||||
let open AST2 in
|
||||
let _ = s_ast in
|
||||
()
|
||||
|
||||
(*
|
||||
(* Temporary: force dune to build AST2.ml *)
|
||||
let () =
|
||||
if false then
|
||||
let _ = Typecheck2.annotate in
|
||||
()
|
||||
else
|
||||
()
|
||||
*)
|
||||
*)
|
138
src/ligo/ligo-parser/Pos.ml
Normal file
138
src/ligo/ligo-parser/Pos.ml
Normal file
@ -0,0 +1,138 @@
|
||||
type t = <
|
||||
byte : Lexing.position;
|
||||
point_num : int;
|
||||
point_bol : int;
|
||||
file : string;
|
||||
line : int;
|
||||
|
||||
set_file : string -> t;
|
||||
set_line : int -> t;
|
||||
set_offset : int -> t;
|
||||
set : file:string -> line:int -> offset:int -> t;
|
||||
new_line : string -> t;
|
||||
add_nl : t;
|
||||
|
||||
shift_bytes : int -> t;
|
||||
shift_one_uchar : int -> t;
|
||||
|
||||
offset : [`Byte | `Point] -> int;
|
||||
column : [`Byte | `Point] -> int;
|
||||
|
||||
line_offset : [`Byte | `Point] -> int;
|
||||
byte_offset : int;
|
||||
|
||||
is_ghost : bool;
|
||||
|
||||
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||
compact : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
|
||||
>
|
||||
|
||||
type pos = t
|
||||
|
||||
(* Constructors *)
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
let make ~byte ~point_num ~point_bol =
|
||||
let () = assert (point_num >= point_bol) in
|
||||
object (self)
|
||||
val byte = byte
|
||||
method byte = byte
|
||||
|
||||
val point_num = point_num
|
||||
method point_num = point_num
|
||||
|
||||
val point_bol = point_bol
|
||||
method point_bol = point_bol
|
||||
|
||||
method set_file file =
|
||||
{< byte = Lexing.{byte with pos_fname = file} >}
|
||||
|
||||
method set_line line =
|
||||
{< byte = Lexing.{byte with pos_lnum = line} >}
|
||||
|
||||
method set_offset offset =
|
||||
{< byte = Lexing.{byte with pos_cnum = byte.pos_bol + offset} >}
|
||||
|
||||
method set ~file ~line ~offset =
|
||||
let pos = self#set_file file in
|
||||
let pos = pos#set_line line in
|
||||
let pos = pos#set_offset offset
|
||||
in pos
|
||||
|
||||
(* The string must not contain '\n'. See [new_line]. *)
|
||||
|
||||
method shift_bytes len =
|
||||
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
|
||||
point_num = point_num + len >}
|
||||
|
||||
method shift_one_uchar len =
|
||||
{< byte = Lexing.{byte with pos_cnum = byte.pos_cnum + len};
|
||||
point_num = point_num + 1 >}
|
||||
|
||||
method add_nl =
|
||||
{< byte = Lexing.{byte with
|
||||
pos_lnum = byte.pos_lnum + 1;
|
||||
pos_bol = byte.pos_cnum};
|
||||
point_bol = point_num >}
|
||||
|
||||
method new_line string =
|
||||
let len = String.length string
|
||||
in (self#shift_bytes len)#add_nl
|
||||
|
||||
method is_ghost = byte = Lexing.dummy_pos
|
||||
|
||||
method file = byte.Lexing.pos_fname
|
||||
|
||||
method line = byte.Lexing.pos_lnum
|
||||
|
||||
method offset = function
|
||||
`Byte -> Lexing.(byte.pos_cnum - byte.pos_bol)
|
||||
| `Point -> point_num - point_bol
|
||||
|
||||
method column mode = 1 + self#offset mode
|
||||
|
||||
method line_offset = function
|
||||
`Byte -> byte.Lexing.pos_bol
|
||||
| `Point -> point_bol
|
||||
|
||||
method byte_offset = byte.Lexing.pos_cnum
|
||||
|
||||
method to_string ?(offsets=true) mode =
|
||||
let offset = self#offset mode in
|
||||
let horizontal, value =
|
||||
if offsets then "character", offset else "column", offset + 1
|
||||
in sprintf "File \"%s\", line %i, %s %i"
|
||||
self#file self#line horizontal value
|
||||
|
||||
method compact ?(offsets=true) mode =
|
||||
if self#is_ghost then "ghost"
|
||||
else
|
||||
let offset = self#offset mode in
|
||||
sprintf "%s:%i:%i"
|
||||
self#file self#line (if offsets then offset else offset + 1)
|
||||
|
||||
method anonymous ?(offsets=true) mode =
|
||||
if self#is_ghost then "ghost"
|
||||
else sprintf "%i:%i" self#line
|
||||
(if offsets then self#offset mode else self#column mode)
|
||||
end
|
||||
|
||||
let ghost = make ~byte:Lexing.dummy_pos ~point_num:(-1) ~point_bol:(-1)
|
||||
|
||||
let min =
|
||||
let byte = Lexing.{
|
||||
pos_fname = "";
|
||||
pos_lnum = 1;
|
||||
pos_bol = 0;
|
||||
pos_cnum = 0}
|
||||
in make ~byte ~point_num:0 ~point_bol:0
|
||||
|
||||
(* Comparisons *)
|
||||
|
||||
let equal pos1 pos2 =
|
||||
pos1#file = pos2#file && pos1#byte_offset = pos2#byte_offset
|
||||
|
||||
let lt pos1 pos2 =
|
||||
pos1#file = pos2#file && pos1#byte_offset < pos2#byte_offset
|
107
src/ligo/ligo-parser/Pos.mli
Normal file
107
src/ligo/ligo-parser/Pos.mli
Normal file
@ -0,0 +1,107 @@
|
||||
(* Positions in a file
|
||||
|
||||
A position in a file denotes a single unit belonging to it, for
|
||||
example, in an ASCII text file, it is a particular character within
|
||||
that file (the unit is the byte in this instance, since in ASCII
|
||||
one character is encoded with one byte).
|
||||
|
||||
Units can be either bytes (as ASCII characters) or, more
|
||||
generally, unicode points.
|
||||
|
||||
The type for positions is the object type [t].
|
||||
|
||||
We use here lexing positions to denote byte-oriented positions
|
||||
(field [byte]), and we manage code points by means of the fields
|
||||
[point_num] and [point_bol]. These two fields have a meaning
|
||||
similar to the fields [pos_cnum] and [pos_bol], respectively, from
|
||||
the standard module [Lexing]. That is to say, [point_num] holds the
|
||||
number of code points since the beginning of the file, and
|
||||
[point_bol] the number of code points since the beginning of the
|
||||
current line.
|
||||
|
||||
The name of the file is given by the field [file], and the line
|
||||
number by the field [line].
|
||||
*)
|
||||
|
||||
type t = <
|
||||
(* Payload *)
|
||||
|
||||
byte : Lexing.position;
|
||||
point_num : int;
|
||||
point_bol : int;
|
||||
file : string;
|
||||
line : int;
|
||||
|
||||
(* Setters *)
|
||||
|
||||
set_file : string -> t;
|
||||
set_line : int -> t;
|
||||
set_offset : int -> t;
|
||||
set : file:string -> line:int -> offset:int -> t;
|
||||
|
||||
(* The call [pos#new_line s], where the string [s] is either "\n" or
|
||||
"\c\r", updates the position [pos] with a new line. *)
|
||||
|
||||
new_line : string -> t;
|
||||
add_nl : t;
|
||||
|
||||
(* The call [pos#shift_bytes n] evaluates in a position that is the
|
||||
translation of position [pos] of [n] bytes forward in the
|
||||
file. The call [pos#shift_one_uchar n] is similar, except that it
|
||||
assumes that [n] is the number of bytes making up one unicode
|
||||
point. *)
|
||||
|
||||
shift_bytes : int -> t;
|
||||
shift_one_uchar : int -> t;
|
||||
|
||||
(* Getters *)
|
||||
|
||||
(* The call [pos#offset `Byte] provides the horizontal offset of the
|
||||
position [pos] in bytes. (An offset is the number of units, like
|
||||
bytes, since the beginning of the current line.) The call
|
||||
[pos#offset `Point] is the offset counted in number of unicode
|
||||
points.
|
||||
|
||||
The calls to the method [column] are similar to those to
|
||||
[offset], except that they give the curren column number.
|
||||
|
||||
The call [pos#line_offset `Byte] is the offset of the line of
|
||||
position [pos], counted in bytes. Dually, [pos#line_offset
|
||||
`Point] counts the same offset in code points.
|
||||
|
||||
The call [pos#byte_offset] is the offset of the position [pos]
|
||||
since the begininng of the file, counted in bytes.
|
||||
*)
|
||||
|
||||
offset : [`Byte | `Point] -> int;
|
||||
column : [`Byte | `Point] -> int;
|
||||
|
||||
line_offset : [`Byte | `Point] -> int;
|
||||
byte_offset : int;
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
is_ghost : bool;
|
||||
|
||||
(* Conversions to [string] *)
|
||||
|
||||
to_string : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||
compact : ?offsets:bool -> [`Byte | `Point] -> string;
|
||||
anonymous : ?offsets:bool -> [`Byte | `Point] -> string
|
||||
>
|
||||
|
||||
type pos = t
|
||||
|
||||
(* Constructors *)
|
||||
|
||||
val make : byte:Lexing.position -> point_num:int -> point_bol:int -> t
|
||||
|
||||
(* Special positions *)
|
||||
|
||||
val ghost : t (* Same as [Lexing.dummy_pos] *)
|
||||
val min : t (* Lexing convention: line 1, offsets to 0 and file to "". *)
|
||||
|
||||
(* Comparisons *)
|
||||
|
||||
val equal : t -> t -> bool
|
||||
val lt : t -> t -> bool
|
128
src/ligo/ligo-parser/Region.ml
Normal file
128
src/ligo/ligo-parser/Region.ml
Normal file
@ -0,0 +1,128 @@
|
||||
(* Regions of a file *)
|
||||
|
||||
let sprintf = Printf.sprintf
|
||||
|
||||
type t = <
|
||||
start : Pos.t;
|
||||
stop : Pos.t;
|
||||
|
||||
(* Setters *)
|
||||
|
||||
shift_bytes : int -> t;
|
||||
shift_one_uchar : int -> t;
|
||||
set_file : string -> t;
|
||||
|
||||
(* Getters *)
|
||||
|
||||
file : string;
|
||||
pos : Pos.t * Pos.t;
|
||||
byte_pos : Lexing.position * Lexing.position;
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
is_ghost : bool;
|
||||
|
||||
(* Conversions to [string] *)
|
||||
|
||||
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
|
||||
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
>
|
||||
|
||||
type region = t
|
||||
|
||||
type 'a reg = {region: t; value: 'a}
|
||||
|
||||
(* Injections *)
|
||||
|
||||
exception Invalid
|
||||
|
||||
let make ~(start: Pos.t) ~(stop: Pos.t) =
|
||||
if start#file <> stop#file || start#byte_offset > stop#byte_offset
|
||||
then raise Invalid
|
||||
else
|
||||
object
|
||||
val start = start
|
||||
method start = start
|
||||
val stop = stop
|
||||
method stop = stop
|
||||
|
||||
method shift_bytes len =
|
||||
let start = start#shift_bytes len
|
||||
and stop = stop#shift_bytes len
|
||||
in {< start = start; stop = stop >}
|
||||
|
||||
method shift_one_uchar len =
|
||||
let start = start#shift_one_uchar len
|
||||
and stop = stop#shift_one_uchar len
|
||||
in {< start = start; stop = stop >}
|
||||
|
||||
method set_file name =
|
||||
let start = start#set_file name
|
||||
and stop = stop#set_file name
|
||||
in {< start = start; stop = stop >}
|
||||
|
||||
(* Getters *)
|
||||
|
||||
method file = start#file
|
||||
method pos = start, stop
|
||||
method byte_pos = start#byte, stop#byte
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
method is_ghost = start#is_ghost && stop#is_ghost
|
||||
|
||||
(* Conversions to strings *)
|
||||
|
||||
method to_string ?(file=true) ?(offsets=true) mode =
|
||||
let horizontal = if offsets then "character" else "column"
|
||||
and start_offset =
|
||||
if offsets then start#offset mode else start#column mode
|
||||
and stop_offset =
|
||||
if offsets then stop#offset mode else stop#column mode in
|
||||
let info =
|
||||
if file
|
||||
then sprintf "in file \"%s\", line %i, %s"
|
||||
(String.escaped start#file) start#line horizontal
|
||||
else sprintf "at line %i, %s" start#line horizontal
|
||||
in if stop#line = start#line
|
||||
then sprintf "%ss %i-%i" info start_offset stop_offset
|
||||
else sprintf "%s %i to line %i, %s %i"
|
||||
info start_offset stop#line horizontal stop_offset
|
||||
|
||||
method compact ?(file=true) ?(offsets=true) mode =
|
||||
let start_str = start#anonymous ~offsets mode
|
||||
and stop_str = stop#anonymous ~offsets mode in
|
||||
if start#file = stop#file then
|
||||
if file then sprintf "%s:%s-%s" start#file start_str stop_str
|
||||
else sprintf "%s-%s" start_str stop_str
|
||||
else sprintf "%s:%s-%s:%s" start#file start_str stop#file stop_str
|
||||
end
|
||||
|
||||
(* Special regions *)
|
||||
|
||||
let ghost = make ~start:Pos.ghost ~stop:Pos.ghost
|
||||
|
||||
let min = make ~start:Pos.min ~stop:Pos.min
|
||||
|
||||
(* Comparisons *)
|
||||
|
||||
let equal r1 r2 =
|
||||
r1#file = r2#file
|
||||
&& Pos.equal r1#start r2#start
|
||||
&& Pos.equal r1#stop r2#stop
|
||||
|
||||
let lt r1 r2 =
|
||||
r1#file = r2#file
|
||||
&& not r1#is_ghost
|
||||
&& not r2#is_ghost
|
||||
&& Pos.lt r1#start r2#start
|
||||
&& Pos.lt r1#stop r2#stop
|
||||
|
||||
let cover r1 r2 =
|
||||
if r1#is_ghost
|
||||
then r2
|
||||
else if r2#is_ghost
|
||||
then r1
|
||||
else if lt r1 r2
|
||||
then make ~start:r1#start ~stop:r2#stop
|
||||
else make ~start:r2#start ~stop:r1#stop
|
125
src/ligo/ligo-parser/Region.mli
Normal file
125
src/ligo/ligo-parser/Region.mli
Normal file
@ -0,0 +1,125 @@
|
||||
(* Regions of a file
|
||||
|
||||
A _region_ is a contiguous series of bytes, for example, in a text
|
||||
file. It is here denoted by the object type [t].
|
||||
|
||||
The start (included) of the region is given by the field [start],
|
||||
which is a _position_, and the end (excluded) is the position given
|
||||
by the field [stop]. The convention of including the start and
|
||||
excluding the end enables to have empty regions if, and only if,
|
||||
[start = stop]. See module [Pos] for the definition of positions.
|
||||
|
||||
The first byte of a file starts at the offset zero (that is,
|
||||
column one), and [start] is always lower than or equal to [stop],
|
||||
and they must refer to the same file.
|
||||
*)
|
||||
|
||||
type t = <
|
||||
start : Pos.t;
|
||||
stop : Pos.t;
|
||||
|
||||
(* Setters *)
|
||||
|
||||
(* The call [region#shift_bytes n] evaluates in a region that is the
|
||||
translation of region [region] of [n] bytes forward in the
|
||||
file. The call [region#shift_one_uchar n] is similar, except that
|
||||
it assumes that [n] is the number of bytes making up one unicode
|
||||
point. The call [region#set_file f] sets the file name to be
|
||||
[f]. *)
|
||||
|
||||
shift_bytes : int -> t;
|
||||
shift_one_uchar : int -> t;
|
||||
set_file : string -> t;
|
||||
|
||||
(* Getters *)
|
||||
|
||||
(* The method [file] returns the file name.
|
||||
The method [pos] returns the values of the fields [start] and [stop].
|
||||
The method [byte_pos] returns the start and end positions of the
|
||||
region at hand _interpreting them as lexing positions_, that is,
|
||||
the unit is the byte. *)
|
||||
|
||||
file : string;
|
||||
pos : Pos.t * Pos.t;
|
||||
byte_pos : Lexing.position * Lexing.position;
|
||||
|
||||
(* Predicates *)
|
||||
|
||||
is_ghost : bool;
|
||||
|
||||
(* Conversions to [string] *)
|
||||
|
||||
(* The call [region#to_string ~file ~offsets mode] evaluates in a
|
||||
string denoting the region [region].
|
||||
|
||||
The name of the file is present if, and only if, [file = true] or
|
||||
[file] is missing.
|
||||
|
||||
The positions in the file are expressed horizontal offsets if
|
||||
[offsets = true] or [offsets] is missing (the default), otherwise
|
||||
as columns.
|
||||
|
||||
If [mode = `Byte], those positions will be assumed to have bytes
|
||||
as their unit, otherwise, if [mode = `Point], they will be
|
||||
assumed to refer to code points.
|
||||
|
||||
The method [compact] has the same signature and calling
|
||||
convention as [to_string], except that the resulting string is
|
||||
more compact.
|
||||
*)
|
||||
|
||||
to_string : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string;
|
||||
compact : ?file:bool -> ?offsets:bool -> [`Byte | `Point] -> string
|
||||
>
|
||||
|
||||
type region = t
|
||||
|
||||
type 'a reg = {region: t; value: 'a}
|
||||
|
||||
(* Constructors *)
|
||||
|
||||
(* The function [make] creates a region from two positions. If the
|
||||
positions are not properly ordered or refer to different files, the
|
||||
exception [Invalid] is raised. *)
|
||||
|
||||
exception Invalid
|
||||
|
||||
val make : start:Pos.t -> stop:Pos.t -> t
|
||||
|
||||
(* Special regions *)
|
||||
|
||||
(* To deal with ghost expressions, that is, pieces of abstract syntax
|
||||
that have not been built from excerpts of concrete syntax, we need
|
||||
_ghost regions_. The module [Pos] provides a [ghost] position, and
|
||||
we also provide a [ghost] region and, in type [t], the method
|
||||
[is_ghost] to check it. *)
|
||||
|
||||
val ghost : t (* Two [Pos.ghost] positions *)
|
||||
|
||||
(* Occasionnally, we may need a minimum region. It is here made of two
|
||||
minimal positions. *)
|
||||
|
||||
val min : t (* Two [Pos.min] positions *)
|
||||
|
||||
(* Comparisons *)
|
||||
|
||||
(* Two regions are equal if, and only if, they refer to the same file
|
||||
and their start positions are equal and their stop positions are
|
||||
equal. See [Pos.equal]. Note that [r1] and [r2] can be ghosts. *)
|
||||
|
||||
val equal : t -> t -> bool
|
||||
|
||||
(* The call [lt r1 r2] ("lower than") has the value [true] if, and
|
||||
only if, regions [r1] and [r2] refer to the same file, none is a
|
||||
ghost and the start position of [r1] is lower than that of
|
||||
[r2]. (See [Pos.lt].) *)
|
||||
|
||||
val lt : t -> t -> bool
|
||||
|
||||
(* Given two regions [r1] and [r2], we may want the region [cover r1
|
||||
r2] that covers [r1] and [r2]. We property [equal (cover r1 r2)
|
||||
(cover r2 r1)]. (In a sense, it is the maximum region, but we avoid
|
||||
that name because of the [min] function above.) If [r1] is a ghost,
|
||||
the cover is [r2], and if [r2] is a ghost, the cover is [r1]. *)
|
||||
|
||||
val cover : t -> t -> t
|
45
src/ligo/ligo-parser/Tests/a.ligo
Normal file
45
src/ligo/ligo-parser/Tests/a.ligo
Normal file
@ -0,0 +1,45 @@
|
||||
type t is int * string
|
||||
type u is t
|
||||
|
||||
type v is record
|
||||
foo: key;
|
||||
bar: mutez;
|
||||
baz: address
|
||||
end
|
||||
|
||||
type w is K of (U of int) // v * u
|
||||
|
||||
type i is int;
|
||||
|
||||
const x : v =
|
||||
record
|
||||
foo = 4;
|
||||
bar = 5;
|
||||
baz = 0x3244
|
||||
end
|
||||
|
||||
(* Block comment *)
|
||||
|
||||
entrypoint g (storage s : u; const l : list (int))
|
||||
: operation (list) is
|
||||
var m : map (int, string) := empty_map;
|
||||
var y : v := copy x with record bar = 7 end;
|
||||
|
||||
function f (const x : int) : int is
|
||||
var y : int := 5 - x
|
||||
const z : int = 6
|
||||
begin
|
||||
y := x + y
|
||||
end with y * 2
|
||||
|
||||
begin
|
||||
y.[4] := "hello";
|
||||
match l with
|
||||
[] -> null
|
||||
| h#t -> q (h+2)
|
||||
end;
|
||||
begin
|
||||
g (Unit);
|
||||
fail "in extremis"
|
||||
end
|
||||
end with (s, ([]: (u * operation (list))))
|
64
src/ligo/ligo-parser/Tests/crowdfunding.ligo
Normal file
64
src/ligo/ligo-parser/Tests/crowdfunding.ligo
Normal file
@ -0,0 +1,64 @@
|
||||
type store is
|
||||
record
|
||||
goal : nat;
|
||||
deadline : timestamp;
|
||||
backers : map (address, nat);
|
||||
funded : bool;
|
||||
end
|
||||
|
||||
entrypoint contribute (storage store : store;
|
||||
const sender : address;
|
||||
const amount : mutez)
|
||||
: store * list (operation) is
|
||||
var operations : list (operation) := nil
|
||||
const s : list (int) = list [1; 2; 3]
|
||||
const t : set (int) = set []
|
||||
block {
|
||||
if now > store.deadline then
|
||||
fail "Deadline passed";
|
||||
else
|
||||
case store.backers[sender] of
|
||||
None -> store.backers[sender] := Some (amount)
|
||||
// None -> patch store.backers with map sender -> amount end
|
||||
| _ -> skip
|
||||
end
|
||||
} with (store, operations)
|
||||
|
||||
entrypoint withdraw (storage store : store; const sender : address)
|
||||
: store * list (operation) is
|
||||
var operations : list (operation) := list end
|
||||
begin
|
||||
// if set ["a"; "b"] contains x then skip else skip;
|
||||
if sender = owner then
|
||||
if now (Unit) >= store.deadline then
|
||||
if balance >= store.goal then {
|
||||
store.funded := True;
|
||||
// patch store with record funded = True end;
|
||||
operations := list [Transfer (owner, balance)];
|
||||
};
|
||||
else fail "Below target"
|
||||
else { fail "Too soon"; }
|
||||
else skip
|
||||
end with (store, operations)
|
||||
|
||||
entrypoint claim (storage store : store; const sender : address)
|
||||
: store * list (operation) is
|
||||
var operations : list (operation) := list []
|
||||
var amount : mutez := 0
|
||||
begin
|
||||
if now <= store.deadline then
|
||||
fail "Too soon"
|
||||
else
|
||||
case store.backers[sender] of
|
||||
None ->
|
||||
fail "Not a backer"
|
||||
| Some (amount) ->
|
||||
if balance >= store.goal or store.funded then
|
||||
fail "Cannot refund"
|
||||
else
|
||||
begin
|
||||
operations := list [Transfer (sender, amount)];
|
||||
remove sender from map store.backers
|
||||
end
|
||||
end
|
||||
end with (store, operations)
|
274
src/ligo/ligo-parser/Typecheck2.ml
Normal file
274
src/ligo/ligo-parser/Typecheck2.ml
Normal file
@ -0,0 +1,274 @@
|
||||
[@@@warning "-27"] (* TODO *)
|
||||
[@@@warning "-32"] (* TODO *)
|
||||
[@@@warning "-30"]
|
||||
|
||||
module SMap = Map.Make(String)
|
||||
|
||||
module I = AST2.O
|
||||
|
||||
module O = struct
|
||||
type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *)
|
||||
|
||||
type name_and_region = {name: string; orig: Region.t}
|
||||
type type_name = name_and_region
|
||||
type var_name = name_and_region
|
||||
type field_name = name_and_region
|
||||
|
||||
type pattern =
|
||||
PVar of var_name
|
||||
| PWild
|
||||
| PInt of Z.t
|
||||
| PBytes of Hex.t
|
||||
| PString of string
|
||||
| PUnit
|
||||
| PFalse
|
||||
| PTrue
|
||||
| PNone
|
||||
| PSome of pattern
|
||||
| PCons of pattern * pattern
|
||||
| PNull
|
||||
| PRecord of (field_name * pattern) SMap.t
|
||||
|
||||
type type_constructor =
|
||||
Option
|
||||
| List
|
||||
| Set
|
||||
| Map
|
||||
|
||||
type type_expr_case =
|
||||
Sum of (type_name * type_expr) SMap.t
|
||||
| Record of (field_name * type_expr) SMap.t
|
||||
| TypeApp of type_constructor * (type_expr list)
|
||||
| Function of { arg: type_expr; ret: type_expr }
|
||||
| Ref of type_expr
|
||||
| String
|
||||
| Bytes
|
||||
| Int
|
||||
| Unit
|
||||
| Bool
|
||||
|
||||
and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t }
|
||||
|
||||
type typed_var = { name:var_name; ty:type_expr; orig: asttodo }
|
||||
|
||||
type type_decl = { name: type_name; ty:type_expr; orig: asttodo }
|
||||
|
||||
type expr_case =
|
||||
App of { operator: operator; arguments: expr list }
|
||||
| Var of typed_var
|
||||
| Constant of constant
|
||||
| Record of (field_name * expr) list
|
||||
| Lambda of lambda
|
||||
|
||||
and expr = { expr: expr_case; ty:type_expr; orig: asttodo }
|
||||
|
||||
and decl = { var: typed_var; value: expr; orig: asttodo }
|
||||
|
||||
and lambda = {
|
||||
parameter: typed_var;
|
||||
declarations: decl list;
|
||||
instructions: instr list;
|
||||
result: expr;
|
||||
}
|
||||
|
||||
and operator_case =
|
||||
Function of var_name
|
||||
| Constructor of var_name
|
||||
| UpdateField of field_name
|
||||
| GetField of field_name
|
||||
| Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod
|
||||
| Neg | Not
|
||||
| Set
|
||||
| MapLookup
|
||||
|
||||
and operator = { operator: operator_case; ty:type_expr; orig: asttodo }
|
||||
|
||||
and constant =
|
||||
Unit
|
||||
| Int of Z.t | String of string | Bytes of Hex.t
|
||||
| False | True
|
||||
| Null
|
||||
| EmptySet
|
||||
| CNone
|
||||
|
||||
and instr =
|
||||
Assignment of { name: var_name; value: expr; orig: asttodo }
|
||||
| While of { condition: expr; body: instr list; orig: asttodo }
|
||||
| ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo }
|
||||
| Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo }
|
||||
| ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *)
|
||||
| Fail of { expr: expr; orig: asttodo }
|
||||
|
||||
type ast = {
|
||||
types : type_decl list;
|
||||
storage_decl : typed_var;
|
||||
declarations : decl list;
|
||||
orig : AST.t
|
||||
}
|
||||
end
|
||||
|
||||
type te = O.type_expr list SMap.t
|
||||
type ve = O.type_expr list SMap.t
|
||||
type tve = te * ve
|
||||
|
||||
let fold_map f a l =
|
||||
let f (acc, l) elem =
|
||||
let acc', elem' = f acc elem
|
||||
in acc', (elem' :: l) in
|
||||
let last_acc, last_l = List.fold_left f (a, []) l
|
||||
in last_acc, List.rev last_l
|
||||
|
||||
let map f l = List.rev (List.rev_map f l)
|
||||
|
||||
let shadow (name : string) (typ : O.type_expr) (env : O.type_expr list SMap.t)
|
||||
: O.type_expr list SMap.t =
|
||||
SMap.update name (function None -> Some [typ] | Some tl -> Some (typ :: tl)) env
|
||||
|
||||
let lookup (name : string) (env : O.type_expr list SMap.t) : O.type_expr =
|
||||
match SMap.find name env with
|
||||
latest :: shadowed -> latest
|
||||
| [] -> failwith "Unbound variable"
|
||||
|
||||
let string_of_name ({name;_} : I.name_and_region) = name
|
||||
|
||||
let a_name_and_region ({name; orig} : I.name_and_region) : O.name_and_region =
|
||||
{name; orig}
|
||||
|
||||
let a_type_constructor (tve : tve) : I.type_constructor -> O.type_constructor = function
|
||||
Option -> Option
|
||||
| List -> List
|
||||
| Set -> Set
|
||||
| Map -> Map
|
||||
|
||||
let a_type_expr_case (tve : tve) : I.type_expr_case -> O.type_expr_case = function
|
||||
Sum lt -> failwith "TODO"
|
||||
| Record lt -> failwith "TODO"
|
||||
| TypeApp (tc, args) -> failwith "TODO"
|
||||
| Function {arg;ret} -> failwith "TODO"
|
||||
| Ref t -> failwith "TODO"
|
||||
| String -> String
|
||||
| Int -> Int
|
||||
| Unit -> Unit
|
||||
| Bool -> Bool
|
||||
|
||||
let a_type_expr (tve : tve) ({type_expr;name;orig} : I.type_expr) : O.type_expr =
|
||||
let type_expr = a_type_expr_case tve type_expr in
|
||||
let name = match name with
|
||||
None -> None
|
||||
|Some name -> Some (a_name_and_region name)
|
||||
in {type_expr;name;orig}
|
||||
|
||||
let a_type (te,ve : tve) ({name;ty;orig} : I.type_decl) : tve * O.type_decl =
|
||||
let ty = a_type_expr (te,ve) ty in
|
||||
let tve = shadow (string_of_name name) ty te, ve in
|
||||
let name = (a_name_and_region name) in
|
||||
tve, {name; ty; orig}
|
||||
|
||||
let a_types (tve : tve) (l : I.type_decl list) : tve * O.type_decl list =
|
||||
fold_map a_type tve l
|
||||
|
||||
let a_storage_decl : tve -> I.typed_var -> tve * O.typed_var =
|
||||
failwith "TODO"
|
||||
|
||||
let type_expr_case_equal (t1 : O.type_expr_case) (t2 : O.type_expr_case) : bool = match t1,t2 with
|
||||
Sum m1, Sum m2 -> failwith "TODO" (* of (O.name_and_region * O.type_expr) SMap.t *)
|
||||
| Record m1, Record m2 -> failwith "TODO" (* of (O.name_and_region * O.type_expr) SMap.t *)
|
||||
| TypeApp (tc1, args1), TypeApp (tc2, args2) -> failwith "TODO" (* of O.type_constructor * O.type_expr list *)
|
||||
| Function {arg=arg1;ret=ret1}, Function {arg=arg2;ret=ret2} -> failwith "TODO" (* of { arg : O.type_expr; ret : O.type_expr; } *)
|
||||
| Ref t1, Ref t2 -> failwith "TODO" (* of O.type_expr *)
|
||||
| String, String -> true
|
||||
| Int, Int -> true
|
||||
| Unit, Unit -> true
|
||||
| Bool, Bool -> true
|
||||
| _ -> false
|
||||
|
||||
let type_expr_equal (t1 : O.type_expr) (t2 : O.type_expr) : bool =
|
||||
type_expr_case_equal t1.type_expr t2.type_expr
|
||||
|
||||
let check_type_expr_equal (expected : O.type_expr) (actual : O.type_expr) : unit =
|
||||
if type_expr_equal expected actual then
|
||||
()
|
||||
else
|
||||
failwith "got [actual] but expected [expected]"
|
||||
|
||||
let a_var_expr (te,ve : tve) (expected : O.type_expr) (var_name : I.name_and_region) : O.expr_case =
|
||||
check_type_expr_equal expected (lookup (string_of_name var_name) ve);
|
||||
Var { name = a_name_and_region var_name;
|
||||
ty = expected;
|
||||
orig = `TODO }
|
||||
|
||||
let a_constant_expr (tve : tve) (expected : O.type_expr) (constant : I.constant) : O.expr_case =
|
||||
let to_type_expr type_expr_case : O.type_expr =
|
||||
{ type_expr = type_expr_case; name = None; orig = Region.ghost } in
|
||||
let actual : O.type_expr = match constant with
|
||||
Unit -> to_type_expr Unit
|
||||
| Int _ -> to_type_expr Int
|
||||
| String _ -> to_type_expr String
|
||||
| Bytes _ -> to_type_expr Bytes
|
||||
| False -> to_type_expr Bool
|
||||
| True -> to_type_expr Bool
|
||||
| Null t -> a_type_expr tve t
|
||||
| EmptySet t -> a_type_expr tve t
|
||||
| CNone t -> a_type_expr tve t
|
||||
in
|
||||
check_type_expr_equal expected actual;
|
||||
let c : O.constant = match constant with
|
||||
Unit -> Unit
|
||||
| Int i -> Int i
|
||||
| String s -> String s
|
||||
| Bytes b -> Bytes b
|
||||
| False -> False
|
||||
| True -> True
|
||||
| Null _ -> Null
|
||||
| EmptySet _ -> EmptySet
|
||||
| CNone _ -> CNone
|
||||
in Constant c
|
||||
|
||||
let map_to_list m =
|
||||
List.rev (SMap.fold (fun field_name_string p l -> p :: l) m [])
|
||||
|
||||
let a_field tve (expected,expr) =
|
||||
failwith "TODO"
|
||||
|
||||
let a_record (tve : tve) (expected : O.type_expr) (record : (I.field_name * I.expr) list)
|
||||
: O.expr_case =
|
||||
let {type_expr = expected; _} : O.type_expr = expected in
|
||||
let expected = match expected with
|
||||
Record fields -> fields
|
||||
| _ -> failwith "expected some_type but got record" in
|
||||
let expected_and_field =
|
||||
List.combine
|
||||
(map_to_list expected)
|
||||
record (* TODO SHOULD BE (map_to_list record) *) in
|
||||
Record (map (a_field tve) expected_and_field)
|
||||
|
||||
let a_expr_case (te,ve : tve) (expected : O.type_expr) : I.expr -> O.expr_case = function
|
||||
App {operator;arguments} -> failwith "TODO"
|
||||
| Var var_name -> a_var_expr (te,ve) expected var_name
|
||||
| Constant constant -> a_constant_expr (te,ve) expected constant
|
||||
| Record record -> a_record (te,ve) expected record
|
||||
| Lambda lambda -> failwith "TODO"
|
||||
|
||||
let a_expr (te,ve : tve) (expected : O.type_expr) (e : I.expr) : O.expr =
|
||||
let expr_case = a_expr_case (te,ve) expected e in
|
||||
{ expr = expr_case; ty = expected; orig = `TODO }
|
||||
|
||||
let a_declaration (te,ve : tve) ({name;ty;value} : I.decl) : tve * O.decl =
|
||||
let ty = a_type_expr (te,ve) ty in
|
||||
let value = a_expr (te,ve) ty value in
|
||||
let ve = shadow (string_of_name name) ty ve in
|
||||
let name = a_name_and_region name in
|
||||
(te,ve), {var={name;ty;orig=`TODO};value;orig = `TODO}
|
||||
|
||||
let a_declarations (tve : tve) (l : I.decl list) : tve * O.decl list =
|
||||
fold_map a_declaration tve l
|
||||
|
||||
let a_ast I.{types; storage_decl; declarations; orig} =
|
||||
let tve = SMap.empty, SMap.empty in
|
||||
let tve, types = a_types tve types in
|
||||
let tve, storage_decl = a_storage_decl tve storage_decl in
|
||||
let tve, declarations = a_declarations tve declarations in
|
||||
let _ = tve in
|
||||
O.{types; storage_decl; declarations; orig}
|
||||
|
||||
let annotate : I.ast -> O.ast = a_ast
|
108
src/ligo/ligo-parser/Typecheck2.mli
Normal file
108
src/ligo/ligo-parser/Typecheck2.mli
Normal file
@ -0,0 +1,108 @@
|
||||
[@@@warning "-30"]
|
||||
|
||||
module SMap : Map.S with type key = string
|
||||
|
||||
module I = AST2.O
|
||||
|
||||
module O : sig
|
||||
type asttodo = [`TODO] (* occurrences of asttodo will point to some part of the original parser AST *)
|
||||
|
||||
type name_and_region = {name: string; orig: Region.t}
|
||||
type type_name = name_and_region
|
||||
type var_name = name_and_region
|
||||
type field_name = name_and_region
|
||||
|
||||
type pattern =
|
||||
PVar of var_name
|
||||
| PWild
|
||||
| PInt of Z.t
|
||||
| PBytes of Hex.t
|
||||
| PString of string
|
||||
| PUnit
|
||||
| PFalse
|
||||
| PTrue
|
||||
| PNone
|
||||
| PSome of pattern
|
||||
| PCons of pattern * pattern
|
||||
| PNull
|
||||
| PRecord of (field_name * pattern) SMap.t
|
||||
|
||||
type type_constructor =
|
||||
Option
|
||||
| List
|
||||
| Set
|
||||
| Map
|
||||
|
||||
type type_expr_case =
|
||||
Sum of (type_name * type_expr) SMap.t
|
||||
| Record of (field_name * type_expr) SMap.t
|
||||
| TypeApp of type_constructor * (type_expr list)
|
||||
| Function of { arg: type_expr; ret: type_expr }
|
||||
| Ref of type_expr
|
||||
| String
|
||||
| Bytes
|
||||
| Int
|
||||
| Unit
|
||||
| Bool
|
||||
|
||||
and type_expr = { type_expr: type_expr_case; name: type_name option; orig: Region.t }
|
||||
|
||||
type typed_var = { name:var_name; ty:type_expr; orig: asttodo }
|
||||
|
||||
type type_decl = { name:type_name; ty:type_expr; orig: asttodo }
|
||||
|
||||
type expr_case =
|
||||
App of { operator: operator; arguments: expr list }
|
||||
| Var of typed_var
|
||||
| Constant of constant
|
||||
| Record of (field_name * expr) list
|
||||
| Lambda of lambda
|
||||
|
||||
and expr = { expr: expr_case; ty:type_expr; orig: asttodo }
|
||||
|
||||
and decl = { var: typed_var; value: expr; orig: asttodo }
|
||||
|
||||
and lambda = {
|
||||
parameter: typed_var;
|
||||
declarations: decl list;
|
||||
instructions: instr list;
|
||||
result: expr;
|
||||
}
|
||||
|
||||
and operator_case =
|
||||
Function of var_name
|
||||
| Constructor of var_name
|
||||
| UpdateField of field_name
|
||||
| GetField of field_name
|
||||
| Or | And | Lt | Leq | Gt | Geq | Equal | Neq | Cat | Cons | Add | Sub | Mult | Div | Mod
|
||||
| Neg | Not
|
||||
| Set
|
||||
| MapLookup
|
||||
|
||||
and operator = { operator: operator_case; ty:type_expr; orig: asttodo }
|
||||
|
||||
and constant =
|
||||
Unit
|
||||
| Int of Z.t | String of string | Bytes of Hex.t
|
||||
| False | True
|
||||
| Null
|
||||
| EmptySet
|
||||
| CNone
|
||||
|
||||
and instr =
|
||||
Assignment of { name: var_name; value: expr; orig: asttodo }
|
||||
| While of { condition: expr; body: instr list; orig: asttodo }
|
||||
| ForCollection of { list: expr; var: var_name; body: instr list; orig: asttodo }
|
||||
| Match of { expr: expr; cases: (pattern * instr list) list; orig: asttodo }
|
||||
| ProcedureCall of { expr: expr; orig: asttodo } (* expr returns unit, drop the result. Similar to OCaml's ";". *)
|
||||
| Fail of { expr: expr; orig: asttodo }
|
||||
|
||||
type ast = {
|
||||
types : type_decl list;
|
||||
storage_decl : typed_var;
|
||||
declarations : decl list;
|
||||
orig : AST.t
|
||||
}
|
||||
end
|
||||
|
||||
val annotate : I.ast -> O.ast
|
157
src/ligo/ligo-parser/Utils.ml
Normal file
157
src/ligo/ligo-parser/Utils.ml
Normal file
@ -0,0 +1,157 @@
|
||||
(* Utility types and functions *)
|
||||
|
||||
(* Identity *)
|
||||
|
||||
let id x = x
|
||||
|
||||
(* Combinators *)
|
||||
|
||||
let (<@) f g x = f (g x)
|
||||
|
||||
let swap f x y = f y x
|
||||
|
||||
let lambda = fun x _ -> x
|
||||
|
||||
let curry f x y = f (x,y)
|
||||
let uncurry f (x,y) = f x y
|
||||
|
||||
(* Parametric rules for sequences *)
|
||||
|
||||
type 'a nseq = 'a * 'a list
|
||||
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
|
||||
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
|
||||
|
||||
(* Consing *)
|
||||
|
||||
let nseq_cons x (hd,tl) = x, hd::tl
|
||||
let nsepseq_cons x sep (hd,tl) = x, (sep,hd)::tl
|
||||
|
||||
let sepseq_cons x sep = function
|
||||
None -> x, []
|
||||
| Some (hd,tl) -> x, (sep,hd)::tl
|
||||
|
||||
(* Rightwards iterators *)
|
||||
|
||||
let nseq_foldl f a (hd,tl) = List.fold_left f a (hd::tl)
|
||||
|
||||
let nsepseq_foldl f a (hd,tl) =
|
||||
List.fold_left (fun a (_,e) -> f a e) (f a hd) tl
|
||||
|
||||
let sepseq_foldl f a = function
|
||||
None -> a
|
||||
| Some s -> nsepseq_foldl f a s
|
||||
|
||||
let nseq_iter f (hd,tl) = List.iter f (hd::tl)
|
||||
|
||||
let nsepseq_iter f (hd,tl) = f hd; List.iter (f <@ snd) tl
|
||||
|
||||
let sepseq_iter f = function
|
||||
None -> ()
|
||||
| Some s -> nsepseq_iter f s
|
||||
|
||||
(* Reversing *)
|
||||
|
||||
let nseq_rev (hd,tl) =
|
||||
let rec aux acc = function
|
||||
[] -> acc
|
||||
| x::l -> aux (nseq_cons x acc) l
|
||||
in aux (hd,[]) tl
|
||||
|
||||
let nsepseq_rev =
|
||||
let rec aux acc = function
|
||||
hd, (sep,snd)::tl -> aux ((sep,hd)::acc) (snd,tl)
|
||||
| hd, [] -> hd, acc in
|
||||
function
|
||||
hd, (sep,snd)::tl -> aux [sep,hd] (snd,tl)
|
||||
| s -> s
|
||||
|
||||
let sepseq_rev = function
|
||||
None -> None
|
||||
| Some seq -> Some (nsepseq_rev seq)
|
||||
|
||||
(* Leftwards iterators *)
|
||||
|
||||
let nseq_foldr f (hd,tl) = List.fold_right f (hd::tl)
|
||||
|
||||
let nsepseq_foldr f (hd,tl) a = f hd (List.fold_right (f <@ snd) tl a)
|
||||
|
||||
let sepseq_foldr f = function
|
||||
None -> fun a -> a
|
||||
| Some s -> nsepseq_foldr f s
|
||||
|
||||
(* Conversions to lists *)
|
||||
|
||||
let nseq_to_list (x,y) = x::y
|
||||
|
||||
let nsepseq_to_list (x,y) = x :: List.map snd y
|
||||
|
||||
let sepseq_to_list = function
|
||||
None -> []
|
||||
| Some s -> nsepseq_to_list s
|
||||
|
||||
(* Optional values *)
|
||||
|
||||
module Option =
|
||||
struct
|
||||
let apply f x =
|
||||
match x with
|
||||
Some y -> Some (f y)
|
||||
| None -> None
|
||||
|
||||
let rev_apply x y =
|
||||
match x with
|
||||
Some f -> f y
|
||||
| None -> y
|
||||
|
||||
let to_string = function
|
||||
Some x -> x
|
||||
| None -> ""
|
||||
end
|
||||
|
||||
(* Modules based on [String], like sets and maps. *)
|
||||
|
||||
module String =
|
||||
struct
|
||||
include String
|
||||
|
||||
module Ord =
|
||||
struct
|
||||
type nonrec t = t
|
||||
let compare = compare
|
||||
end
|
||||
|
||||
module Map = Map.Make (Ord)
|
||||
module Set = Set.Make (Ord)
|
||||
end
|
||||
|
||||
(* Integers *)
|
||||
|
||||
module Int =
|
||||
struct
|
||||
type t = int
|
||||
|
||||
module Ord =
|
||||
struct
|
||||
type nonrec t = t
|
||||
let compare = compare
|
||||
end
|
||||
|
||||
module Map = Map.Make (Ord)
|
||||
module Set = Set.Make (Ord)
|
||||
end
|
||||
|
||||
(* Effectful symbol generator *)
|
||||
|
||||
let gen_sym =
|
||||
let counter = ref 0 in
|
||||
fun () -> incr counter; "v" ^ string_of_int !counter
|
||||
|
||||
(* General tracing function *)
|
||||
|
||||
let trace text = function
|
||||
None -> ()
|
||||
| Some chan -> output_string chan text; flush chan
|
||||
|
||||
(* Printing a string in red to standard error *)
|
||||
|
||||
let highlight msg = Printf.eprintf "\027[31m%s\027[0m%!" msg
|
97
src/ligo/ligo-parser/Utils.mli
Normal file
97
src/ligo/ligo-parser/Utils.mli
Normal file
@ -0,0 +1,97 @@
|
||||
(* Utility types and functions *)
|
||||
|
||||
(* Polymorphic identity function *)
|
||||
|
||||
val id : 'a -> 'a
|
||||
|
||||
(* Combinators *)
|
||||
|
||||
val ( <@ ) : ('a -> 'b) -> ('c -> 'a) -> 'c -> 'b
|
||||
val swap : ('a -> 'b -> 'c) -> 'b -> 'a -> 'c
|
||||
val lambda : 'a -> 'b -> 'a
|
||||
val curry : ('a * 'b -> 'c) -> 'a -> 'b -> 'c
|
||||
val uncurry : ('a -> 'b -> 'c) -> 'a * 'b -> 'c
|
||||
|
||||
(* Parametric rules for sequences
|
||||
|
||||
nseq: non-empty sequence;
|
||||
sepseq: (possibly empty) sequence of separated items;
|
||||
nsepseq: non-empty sequence of separated items.
|
||||
*)
|
||||
|
||||
type 'a nseq = 'a * 'a list
|
||||
type ('a,'sep) nsepseq = 'a * ('sep * 'a) list
|
||||
type ('a,'sep) sepseq = ('a,'sep) nsepseq option
|
||||
|
||||
(* Consing *)
|
||||
|
||||
val nseq_cons : 'a -> 'a nseq -> 'a nseq
|
||||
val nsepseq_cons : 'a -> 'sep -> ('a,'sep) nsepseq -> ('a,'sep) nsepseq
|
||||
val sepseq_cons : 'a -> 'sep -> ('a,'sep) sepseq -> ('a,'sep) nsepseq
|
||||
|
||||
(* Reversing *)
|
||||
|
||||
val nseq_rev: 'a nseq -> 'a nseq
|
||||
val nsepseq_rev: ('a,'sep) nsepseq -> ('a,'sep) nsepseq
|
||||
val sepseq_rev: ('a,'sep) sepseq -> ('a,'sep) sepseq
|
||||
|
||||
(* Rightwards iterators *)
|
||||
|
||||
val nseq_foldl : ('a -> 'b -> 'a) -> 'a -> 'b nseq -> 'a
|
||||
val nsepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) nsepseq -> 'a
|
||||
val sepseq_foldl : ('a -> 'b -> 'a) -> 'a -> ('b,'c) sepseq -> 'a
|
||||
|
||||
val nseq_iter : ('a -> unit) -> 'a nseq -> unit
|
||||
val nsepseq_iter : ('a -> unit) -> ('a,'b) nsepseq -> unit
|
||||
val sepseq_iter : ('a -> unit) -> ('a,'b) sepseq -> unit
|
||||
|
||||
(* Leftwards iterators *)
|
||||
|
||||
val nseq_foldr : ('a -> 'b -> 'b) -> 'a nseq -> 'b -> 'b
|
||||
val nsepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) nsepseq -> 'b -> 'b
|
||||
val sepseq_foldr : ('a -> 'b -> 'b) -> ('a,'c) sepseq -> 'b -> 'b
|
||||
|
||||
(* Conversions to lists *)
|
||||
|
||||
val nseq_to_list : 'a nseq -> 'a list
|
||||
val nsepseq_to_list : ('a,'b) nsepseq -> 'a list
|
||||
val sepseq_to_list : ('a,'b) sepseq -> 'a list
|
||||
|
||||
(* Effectful symbol generator *)
|
||||
|
||||
val gen_sym : unit -> string
|
||||
|
||||
(* General tracing function *)
|
||||
|
||||
val trace : string -> out_channel option -> unit
|
||||
|
||||
(* Printing a string in red to standard error *)
|
||||
|
||||
val highlight : string -> unit
|
||||
|
||||
(* Working with optional values *)
|
||||
|
||||
module Option :
|
||||
sig
|
||||
val apply : ('a -> 'b) -> 'a option -> 'b option
|
||||
val rev_apply : ('a -> 'a) option -> 'a -> 'a
|
||||
val to_string : string option -> string
|
||||
end
|
||||
|
||||
(* An extension to the standard module [String] *)
|
||||
|
||||
module String :
|
||||
sig
|
||||
include module type of String
|
||||
module Map : Map.S with type key = t
|
||||
module Set : Set.S with type elt = t
|
||||
end
|
||||
|
||||
(* Integer maps *)
|
||||
|
||||
module Int :
|
||||
sig
|
||||
type t = int
|
||||
module Map : Map.S with type key = t
|
||||
module Set : Set.S with type elt = t
|
||||
end
|
10
src/ligo/ligo-parser/check_dot_git_is_dir.sh
Executable file
10
src/ligo/ligo-parser/check_dot_git_is_dir.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
if test -d ../../.git; then
|
||||
echo true > dot_git_is_dir
|
||||
else
|
||||
echo false > dot_git_is_dir
|
||||
cat .git >> dot_git_is_dir
|
||||
fi
|
70
src/ligo/ligo-parser/dune
Normal file
70
src/ligo/ligo-parser/dune
Normal file
@ -0,0 +1,70 @@
|
||||
(ocamllex LexToken)
|
||||
(ocamllex Lexer)
|
||||
|
||||
(menhir
|
||||
(merge_into Parser)
|
||||
(modules ParToken Parser)
|
||||
(flags -la 1 --explain --external-tokens LexToken))
|
||||
|
||||
(executables
|
||||
(names LexerMain ParserMain)
|
||||
(public_names ligo-lexer ligo-parser)
|
||||
(package ligo-parser)
|
||||
(modules_without_implementation Error)
|
||||
(libraries getopt hex str uutf zarith))
|
||||
|
||||
;; Les deux directives (rule) qui suivent sont pour le dev local.
|
||||
;; Il suffit de faire "dune build Parser.exe" pour avoir un Parser.exe dans le dossier.
|
||||
;; Pour le purger, il faut faire "dune clean".
|
||||
(rule
|
||||
(targets Parser.exe)
|
||||
(deps ParserMain.exe)
|
||||
(action (copy ParserMain.exe Parser.exe))
|
||||
(mode promote-until-clean))
|
||||
|
||||
(rule
|
||||
(targets Lexer.exe)
|
||||
(deps LexerMain.exe)
|
||||
(action (copy LexerMain.exe Lexer.exe))
|
||||
(mode promote-until-clean))
|
||||
|
||||
(rule
|
||||
(targets dot_git_is_dir)
|
||||
(deps check_dot_git_is_dir.sh)
|
||||
(action (run "sh" "check_dot_git_is_dir.sh")))
|
||||
|
||||
(rule
|
||||
(targets .git_main_dir)
|
||||
(deps dot_git_is_dir check_dot_git_is_dir.sh)
|
||||
(action
|
||||
(progn (run "sh" "check_dot_git_is_dir.sh")
|
||||
(run "sh" "-c" "if \"$(cat dot_git_is_dir)\" = true; then printf %s '../../.git' > .git_main_dir; else cat ../../.git | sed -e 's/^gitdir: //' | sed -e 's|$|/../..|' > .git_main_dir; fi"))))
|
||||
|
||||
(rule
|
||||
(targets .git_worktree_dir)
|
||||
(deps dot_git_is_dir check_dot_git_is_dir.sh)
|
||||
(action
|
||||
(progn (run "sh" "check_dot_git_is_dir.sh")
|
||||
(run "sh" "-c" "if \"$(cat dot_git_is_dir)\" = true; then printf %s '../../.git' > .git_worktree_dir; else cat ../../.git | sed -e 's/^gitdir: //' > .git_worktree_dir; fi"))))
|
||||
|
||||
(rule
|
||||
(targets .gitHEAD)
|
||||
(deps .git_main_dir .git_worktree_dir check_dot_git_is_dir.sh)
|
||||
(action
|
||||
(progn (run "sh" "check_dot_git_is_dir.sh")
|
||||
(run "sh" "-c" "ln -s \"$(cat .git_worktree_dir)/HEAD\" .gitHEAD"))))
|
||||
|
||||
(rule
|
||||
(targets Version.gitHEAD)
|
||||
(deps .gitHEAD check_dot_git_is_dir.sh)
|
||||
(action
|
||||
(progn (run "sh" "check_dot_git_is_dir.sh")
|
||||
(run "sh" "-c" "if git symbolic-ref HEAD >/dev/null 2>&1; then ln -s \"$(cat .git_main_dir)/$(git symbolic-ref HEAD)\" Version.gitHEAD; else ln -s \"$(cat .git_worktree_dir)/HEAD\" Version.gitHEAD; fi"))))
|
||||
|
||||
(rule
|
||||
(targets Version.ml)
|
||||
(deps Version.gitHEAD check_dot_git_is_dir.sh)
|
||||
(action
|
||||
(progn (run "sh" "check_dot_git_is_dir.sh")
|
||||
(run "sh" "-c" "printf 'let version = \"%s\"'\\\\n \"$(git describe --always --dirty --abbrev=0)\" > Version.ml")))
|
||||
(mode promote-until-clean))
|
2
src/ligo/ligo-parser/dune-project
Normal file
2
src/ligo/ligo-parser/dune-project
Normal file
@ -0,0 +1,2 @@
|
||||
(lang dune 1.7)
|
||||
(using menhir 2.0)
|
19
src/ligo/ligo-parser/ligo-parser.opam
Normal file
19
src/ligo/ligo-parser/ligo-parser.opam
Normal file
@ -0,0 +1,19 @@
|
||||
opam-version : "2.0"
|
||||
version : "1.0"
|
||||
maintainer : "gabriel.alfour@gmail.com"
|
||||
authors : [ "Galfour" ]
|
||||
homepage : "https://gitlab.com/gabriel.alfour/ligo-parser"
|
||||
bug-reports : "https://gitlab.com/gabriel.alfour/ligo-parser/issues"
|
||||
dev-repo : "git+https://gitlab.com/gabriel.alfour/ligo-parser.git"
|
||||
license : "MIT"
|
||||
|
||||
depends : [ "dune" "menhir" "hex" "zarith" "getopt" "uutf" ]
|
||||
|
||||
build : [
|
||||
[ "sh" "-c" "printf 'let version = \"%s\"' \"$(git describe --always --dirty --abbrev=0)\" > Version.ml" ]
|
||||
[ "dune" "build" "-p" name "-j" jobs ]
|
||||
]
|
||||
|
||||
url {
|
||||
src: "https://gitlab.com/gabriel.alfour/ligo-parser/-/archive/master/ligo-parser.tar.gz"
|
||||
}
|
229
src/ligo/ligo-parser/typecheck.ml
Normal file
229
src/ligo/ligo-parser/typecheck.ml
Normal file
@ -0,0 +1,229 @@
|
||||
(* module I = AST (\* In *\) *)
|
||||
|
||||
(* module SMap = Map.Make(String) *)
|
||||
|
||||
(* type te = type_expr list SMap.t *)
|
||||
(* type ve = type_expr list SMap.t *)
|
||||
(* type tve = te * ve *)
|
||||
|
||||
(*
|
||||
module I = AST (* In *)
|
||||
|
||||
module SMap = Map.Make(String)
|
||||
|
||||
module O = struct
|
||||
open AST (* TODO: for now, should disappear *)
|
||||
|
||||
type t = ast
|
||||
|
||||
and type_expr =
|
||||
Prod of cartesian
|
||||
| Sum of (variant, vbar) Utils.nsepseq
|
||||
| Record of record_type
|
||||
| TypeApp of (type_name * type_tuple)
|
||||
| ParType of type_expr par
|
||||
| TAlias of variable
|
||||
| Function of (type_expr list) * type_expr
|
||||
| Mutable of type_expr
|
||||
| Unit
|
||||
| TODO of string
|
||||
|
||||
and te = type_expr list SMap.t
|
||||
|
||||
and ve = type_expr list SMap.t
|
||||
|
||||
and vte = ve * te
|
||||
|
||||
and ast = {
|
||||
lambdas : lambda_decl list;
|
||||
block : block
|
||||
}
|
||||
|
||||
and lambda_decl =
|
||||
FunDecl of fun_decl
|
||||
| ProcDecl of proc_decl
|
||||
|
||||
and fun_decl = {
|
||||
kwd_function : kwd_function;
|
||||
var : variable;
|
||||
param : parameters;
|
||||
colon : colon;
|
||||
ret_type : type_expr;
|
||||
kwd_is : kwd_is;
|
||||
body : block;
|
||||
kwd_with : kwd_with;
|
||||
return : checked_expr
|
||||
}
|
||||
|
||||
and proc_decl = {
|
||||
kwd_procedure : kwd_procedure;
|
||||
var : variable;
|
||||
param : parameters;
|
||||
kwd_is : kwd_is;
|
||||
body : block
|
||||
}
|
||||
|
||||
and block = {
|
||||
decls : value_decls;
|
||||
opening : kwd_begin;
|
||||
instr : instructions;
|
||||
close : kwd_end
|
||||
}
|
||||
|
||||
and value_decls = var_decl list
|
||||
|
||||
and var_decl = {
|
||||
kind : var_kind;
|
||||
var : variable;
|
||||
colon : colon;
|
||||
vtype : type_expr;
|
||||
setter : Region.t; (* "=" or ":=" *)
|
||||
init : checked_expr
|
||||
}
|
||||
|
||||
and checked_expr = {ty:type_expr;expr:expr}
|
||||
end [@warning "-30"]
|
||||
|
||||
open O
|
||||
open AST
|
||||
open Region
|
||||
|
||||
let mk_checked_expr ~ty ~expr = {ty;expr}
|
||||
let mk_proc_decl ~kwd_procedure ~var ~param ~kwd_is ~body =
|
||||
O.{kwd_procedure; var; param; kwd_is; body}
|
||||
let mk_ast ~lambdas ~block = {lambdas;block}
|
||||
let mk_fun_decl ~kwd_function ~var ~param ~colon ~ret_type ~kwd_is ~body ~kwd_with ~return =
|
||||
O.{kwd_function; var; param; colon; ret_type; kwd_is; body; kwd_with; return}
|
||||
|
||||
|
||||
let unreg : 'a reg -> 'a = fun {value; _} -> value
|
||||
let unpar : 'a par -> 'a = (fun (_left_par, x, _right_par) -> x) @. unreg
|
||||
let nsepseq_to_list : ('a,'sep) Utils.nsepseq -> 'a list =
|
||||
fun (first, rest) -> first :: (map snd rest)
|
||||
let sepseq_to_list : ('a,'sep) Utils.sepseq -> 'a list =
|
||||
function
|
||||
None -> []
|
||||
| Some nsepseq -> nsepseq_to_list nsepseq
|
||||
|
||||
let rec xty : I.type_expr -> O.type_expr =
|
||||
function
|
||||
I.Prod x -> O.Prod x
|
||||
| I.Sum x -> O.Sum (unreg x)
|
||||
| I.Record x -> O.Record x
|
||||
| I.TypeApp x -> O.TypeApp (unreg x)
|
||||
| I.ParType {region;value=(l,x,r)} -> O.ParType {region;value=(l, xty x, r)}
|
||||
| I.TAlias x -> O.TAlias x
|
||||
|
||||
let shadow (name : string) (typ : O.type_expr) (env : O.type_expr list SMap.t)
|
||||
: O.type_expr list SMap.t =
|
||||
SMap.update name (function None -> Some [typ] | Some tl -> Some (typ :: tl)) env
|
||||
|
||||
let shadow_list (name_typ_list : (string * O.type_expr) list) (env : O.type_expr list SMap.t)
|
||||
: O.type_expr list SMap.t =
|
||||
List.fold_left (fun acc (name, typ) -> shadow name typ acc) env name_typ_list
|
||||
|
||||
let type_decls_to_tenv (td : I.type_decl list) (te : te) : O.te =
|
||||
td
|
||||
|> List.map unreg
|
||||
|> List.map (fun (_, name, _, type_expr) -> (unreg name, xty type_expr))
|
||||
|> fun up -> shadow_list up te
|
||||
|
||||
let var_kind_to_ty : var_kind -> I.type_expr -> O.type_expr =
|
||||
fun var_kind ty ->
|
||||
match var_kind with
|
||||
Mutable _ -> O.Mutable (xty ty)
|
||||
| Const _ -> xty ty
|
||||
|
||||
let params_to_xty params ret_type =
|
||||
unpar params
|
||||
|> nsepseq_to_list
|
||||
|> map (fun {value=(var_kind, _variable, _colon, type_expr);_} -> var_kind_to_ty var_kind type_expr)
|
||||
|> fun param_types -> O.Function (param_types, ret_type)
|
||||
|
||||
let type_equal t1 t2 = match t1,t2 with
|
||||
| O.Prod _x, O.Prod _y -> true (* TODO *)
|
||||
| O.Sum _x, O.Sum _y -> true (* TODO *)
|
||||
| _ -> false
|
||||
|
||||
exception TypeError of string
|
||||
|
||||
let check_type expr expected_type =
|
||||
if type_equal expr.ty expected_type then expr
|
||||
else raise (TypeError "oops")
|
||||
|
||||
let tc_expr (_te,_ve) expr = mk_checked_expr ~ty:(TODO "all expressions") ~expr (* TODO *)
|
||||
|
||||
let tc_var_decl : vte -> I.var_decl -> vte * O.var_decl =
|
||||
fun (ve,te) var_decl ->
|
||||
let vtype = (xty var_decl.vtype) in
|
||||
let init = check_type (tc_expr (te,ve) var_decl.init) vtype in
|
||||
let ve = shadow (unreg var_decl.var) vtype ve in
|
||||
(ve,te), {
|
||||
kind = var_decl.kind;
|
||||
var = var_decl.var;
|
||||
colon = var_decl.colon;
|
||||
vtype;
|
||||
setter = var_decl.setter;
|
||||
init}
|
||||
|
||||
let tc_var_decls (ve,te) var_decls = fold_map tc_var_decl (ve,te) var_decls
|
||||
|
||||
let tc_block (te, ve : vte) (block : I.block) : vte * O.block =
|
||||
let decls,opening,instr,close = block.decls, block.opening, block.instr, block.close in
|
||||
let (ve,te), decls = tc_var_decls (ve,te) (decls |> unreg |> sepseq_to_list |> map unreg) in
|
||||
(ve,te), O.{decls;opening;instr;close} (* TODO *)
|
||||
|
||||
let tc_proc_decl : vte -> I.proc_decl -> O.proc_decl =
|
||||
fun vte proc_decl ->
|
||||
let _vte', block' = tc_block vte (unreg proc_decl.body)
|
||||
in mk_proc_decl
|
||||
~kwd_procedure: proc_decl.kwd_procedure
|
||||
~kwd_is: proc_decl.kwd_is
|
||||
~var: proc_decl.var
|
||||
~param: proc_decl.param
|
||||
~body: block'
|
||||
|
||||
let tc_fun_decl : vte -> I.fun_decl -> O.fun_decl =
|
||||
fun vte fun_decl ->
|
||||
let vte', block' = tc_block vte (unreg fun_decl.body) in
|
||||
let return' = tc_expr vte' fun_decl.return in
|
||||
let checked_return' = check_type return' (xty fun_decl.ret_type)
|
||||
in mk_fun_decl
|
||||
~kwd_function: fun_decl.kwd_function
|
||||
~colon: fun_decl.colon
|
||||
~kwd_is: fun_decl.kwd_is
|
||||
~kwd_with: fun_decl.kwd_with
|
||||
~var: fun_decl.var
|
||||
~param: fun_decl.param
|
||||
~ret_type: (xty fun_decl.ret_type)
|
||||
~body: block'
|
||||
~return: checked_return'
|
||||
|
||||
let ve_lambda_decl : vte -> I.lambda_decl -> ve =
|
||||
fun (ve,_te) ->
|
||||
function
|
||||
FunDecl {value;_} -> shadow value.var.value (params_to_xty value.param (xty value.ret_type)) ve
|
||||
| ProcDecl {value;_} -> shadow value.var.value (params_to_xty value.param Unit) ve
|
||||
|
||||
let tc_lambda_decl (ve, te : vte) (whole : I.lambda_decl) : vte * O.lambda_decl =
|
||||
match whole with
|
||||
FunDecl {value;_} -> ((ve_lambda_decl (ve, te) whole), te), O.FunDecl (tc_fun_decl (ve, te) value)
|
||||
| ProcDecl {value;_} -> ((ve_lambda_decl (ve, te) whole), te), O.ProcDecl (tc_proc_decl (ve, te) value)
|
||||
|
||||
let tc_ast (ast : I.ast) : O.ast =
|
||||
(* te is the type environment, ve is the variable environment *)
|
||||
let te =
|
||||
SMap.empty
|
||||
|> type_decls_to_tenv ast.types in
|
||||
let ve =
|
||||
SMap.empty
|
||||
|> (match ast.parameter.value with (_,name,_,ty) -> shadow (unreg name) @@ xty ty)
|
||||
|> shadow "storage" @@ xty (snd ast.storage.value)
|
||||
|> shadow "operations" @@ xty (snd ast.operations.value)
|
||||
in
|
||||
let (ve',te'), lambdas = fold_map tc_lambda_decl (ve, te) ast.lambdas in
|
||||
let (ve'', te''), block = tc_block (ve', te') (unreg ast.block) in
|
||||
let _ve'' = ve'' in (* not needed anymore *)
|
||||
let _te'' = te'' in (* not needed anymore *)
|
||||
mk_ast ~lambdas ~block
|
||||
*)
|
Loading…
Reference in New Issue
Block a user